From 4f4aeab54ba17f9cc4680e82dafc968b07295b0a Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Tue, 26 May 2026 16:55:12 -0600 Subject: [PATCH 01/16] Draft code to standardize frame of motifs --- scripts/check-loci.py | 120 +++++++++++++++++++++++++++++++++--------- 1 file changed, 94 insertions(+), 26 deletions(-) diff --git a/scripts/check-loci.py b/scripts/check-loci.py index cec4b52e..7466a48c 100644 --- a/scripts/check-loci.py +++ b/scripts/check-loci.py @@ -104,6 +104,45 @@ def circular_permuted(x): modified_sequences.extend([x[i:] + x[:i] for i in range(n)]) return modified_sequences +standard_motif_arrangements = [ + "CAG", + "CCG", + "CGG", + "CTG", + "GCN", + "TTTCA", + "AAATG", +] + +def standardise_reference_motif(motif): + """ + Args: + motif (str) + Returns: + str: motif rewritten to the preferred standard arrangement if possible + >>> standardise_reference_motif('GCC') + 'CCG' + >>> standardise_reference_motif('CGC') + 'CCG' + >>> standardise_reference_motif('CAG') + 'CAG' + >>> standardise_reference_motif('XYZ') + 'XYZ' + """ + if motif is None or len(motif) == 0: + return motif + motif = motif.upper() + for standard_motif in standard_motif_arrangements: + standard_motif = standard_motif.upper() + + if len(motif) != len(standard_motif): + continue + + if standard_motif in circular_permuted(motif): + return standard_motif + + return motif + def normalise_str(in_dna): """ Args: @@ -127,67 +166,96 @@ def normalise_str(in_dna): return min(all_possible) -def get_new_motif(motif, gene_strand): +def get_new_motif(reference_motif, gene_strand): """ Args: - motif (string) + reference_motif (string) gene_strand: either + or - Returns: - the normalized output of the string from ref to gene orientation - Get the new normalized motif for each row. - If gene_strand is +, reference orientation = gene orientation - If gene_strand is -, reverse_complement ref_ori for gene_ori - >>> get_new_motif('GAG', '+') - 'AGG' - >>> get_new_motif('GAG', '-') - 'CCT' - >>> get_new_motif('TCATC', '-') - 'AGATG' + motif in gene orientation + + If gene_strand is +, gene orientation copies reference orientation. + If gene_strand is -, gene orientation is the reverse complement of reference orientation. + + >>> get_new_motif('CCG', '+') + 'CCG' + >>> get_new_motif('CCG', '-') + 'CGG' + >>> get_new_motif('CAG', '-') + 'CTG' >>> get_new_motif('TAG', 'plus') Traceback (most recent call last): ... AssertionError: Gene strand plus is not +/- """ if gene_strand == "+": - normalized_motif = normalise_str(motif) + return reference_motif elif gene_strand == "-": - seq = Seq(motif) - reverse_comp = str(seq.reverse_complement()) - normalized_motif = normalise_str(reverse_comp) + seq = Seq(reference_motif) + return str(seq.reverse_complement()) else: raise AssertionError(f'Gene strand {gene_strand} is not +/-') - return normalized_motif def check_motif_orientation(record): """ Args: record (dict): a dictionary containing a single locus from the STRchive json Returns: - record (dict): the record with any motif fields with incorrect orientation updated + record (dict): the record with motif reference orientations standardized + and gene orientations recalculated from the standardized reference motifs """ + field_pairs = [ ('pathogenic_motif_reference_orientation', 'pathogenic_motif_gene_orientation'), ('benign_motif_reference_orientation', 'benign_motif_gene_orientation'), ('unknown_motif_reference_orientation', 'unknown_motif_gene_orientation'), ('interruption_reference_orientation', 'interruption_gene_orientation') ] + for ref_field, gene_field in field_pairs: if record[ref_field] is None: continue - old = record[gene_field] - new = [get_new_motif(x, record['gene_strand']) for x in record[ref_field]] - if old != new: - for old_motif, new_motif in zip(old, new): + + # 1. Standardize reference orientation + old_ref_motifs = record[ref_field] + new_ref_motifs = [ + standardise_reference_motif(motif) + for motif in old_ref_motifs + ] + + if old_ref_motifs != new_ref_motifs: + for old_motif, new_motif in zip(old_ref_motifs, new_ref_motifs): + if old_motif != new_motif: + sys.stderr.write( + f"Updating {record['id']} {ref_field} from {old_motif} to {new_motif}\n" + ) + + record[ref_field] = new_ref_motifs + + # 2. Recompute gene orientation from the standardized reference orientation + old_gene_motifs = record[gene_field] + new_gene_motifs = [ + get_new_motif(motif, record['gene_strand']) + for motif in record[ref_field] + ] + + if old_gene_motifs != new_gene_motifs: + for old_motif, new_motif in zip(old_gene_motifs, new_gene_motifs): if old_motif != new_motif: - sys.stderr.write(f'Updating {record['id']} {gene_field} from {old_motif} to {new_motif}\n') - record[gene_field] = new + sys.stderr.write( + f"Updating {record['id']} {gene_field} from {old_motif} to {new_motif}\n" + ) + record[gene_field] = new_gene_motifs # Replace locus_structure with a string of the motifs in reference orientation - # example [ { "motif": "CAGG", "count": null, "type": "pathogenic_repeat" } ] if record['locus_structure'] is None: record['locus_structure'] = [] for motif in record['pathogenic_motif_reference_orientation']: - record['locus_structure'].append({"motif": motif, "count": None, "type": "pathogenic_repeat"}) + record['locus_structure'].append({ + "motif": motif, + "count": None, + "type": "pathogenic_repeat" + }) return record From 3e34c80755d2283b156c96f34fe7d5c1b345f858 Mon Sep 17 00:00:00 2001 From: gaberbz <182678422+gaberbz@users.noreply.github.com> Date: Tue, 26 May 2026 23:05:53 +0000 Subject: [PATCH 02/16] Update data --- data/STRchive-loci.json | 214 +++++++++--------- .../STRchive-disease-loci.T2T-chm13.TRGT.bed | 58 ++--- ...STRchive-disease-loci.T2T-chm13.atarva.bed | 48 ++-- ...chive-disease-loci.T2T-chm13.atarva.bed.gz | Bin 1856 -> 1834 bytes ...e-disease-loci.T2T-chm13.atarva.bed.gz.tbi | Bin 4453 -> 4452 bytes ...TRchive-disease-loci.T2T-chm13.general.bed | 56 ++--- ...STRchive-disease-loci.T2T-chm13.longTR.bed | 58 ++--- ...TRchive-disease-loci.T2T-chm13.straglr.bed | 48 ++-- ...chive-disease-loci.T2T-chm13.stranger.json | 104 ++++----- .../STRchive-disease-loci.hg19.TRGT.bed | 58 ++--- .../STRchive-disease-loci.hg19.atarva.bed | 48 ++-- .../STRchive-disease-loci.hg19.atarva.bed.gz | Bin 1871 -> 1855 bytes ...Rchive-disease-loci.hg19.atarva.bed.gz.tbi | Bin 4460 -> 4460 bytes .../STRchive-disease-loci.hg19.general.bed | 56 ++--- .../STRchive-disease-loci.hg19.longTR.bed | 58 ++--- .../STRchive-disease-loci.hg19.straglr.bed | 48 ++-- .../STRchive-disease-loci.hg19.stranger.json | 104 ++++----- .../STRchive-disease-loci.hg38.TRGT.bed | 58 ++--- .../STRchive-disease-loci.hg38.atarva.bed | 48 ++-- .../STRchive-disease-loci.hg38.atarva.bed.gz | Bin 1859 -> 1840 bytes ...Rchive-disease-loci.hg38.atarva.bed.gz.tbi | Bin 4418 -> 4417 bytes .../STRchive-disease-loci.hg38.general.bed | 56 ++--- .../STRchive-disease-loci.hg38.longTR.bed | 58 ++--- .../STRchive-disease-loci.hg38.straglr.bed | 48 ++-- .../STRchive-disease-loci.hg38.stranger.json | 104 ++++----- data/ref-alleles/ref-alleles.T2T-chm13.txt | 130 +++++------ data/ref-alleles/ref-alleles.hg19.txt | 126 +++++------ data/ref-alleles/ref-alleles.hg38.txt | 126 +++++------ 28 files changed, 856 insertions(+), 856 deletions(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 30e17c29..6001e685 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -30,7 +30,7 @@ "location_in_gene": "5' UTR", "gene_strand": "+", "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -96,7 +96,7 @@ "location_in_gene": "5' UTR", "gene_strand": "+", "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -162,7 +162,7 @@ "location_in_gene": "Intron 3", "gene_strand": "-", "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -228,11 +228,11 @@ "location_in_gene": "Coding Exon 1", "gene_strand": "+", "reference_motif_reference_orientation": ["GCA"], - "pathogenic_motif_reference_orientation": ["GCA"], + "pathogenic_motif_reference_orientation": ["CAG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -294,11 +294,11 @@ "location_in_gene": "Coding Exon 2, aa 110-115", "gene_strand": "-", "reference_motif_reference_orientation": ["NGC"], - "pathogenic_motif_reference_orientation": ["NGC"], + "pathogenic_motif_reference_orientation": ["GCN"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["NGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -360,11 +360,11 @@ "location_in_gene": "Coding Exon 2, aa 144-155", "gene_strand": "-", "reference_motif_reference_orientation": ["NGC"], - "pathogenic_motif_reference_orientation": ["NGC"], + "pathogenic_motif_reference_orientation": ["GCN"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["NGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -430,7 +430,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -496,10 +496,10 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["ATG", "TTG"], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["ATC", "AAC"], + "interruption_gene_orientation": ["CAT", "CAA"], "locus_structure": [], "benign_min": 6, "benign_max": 35, @@ -565,7 +565,7 @@ "pathogenic_motif_gene_orientation": ["ATTCT"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["ATCCT", "ATCCC", "ATTCC", "ATTTCT", "ATATTCT", "ATTCTTCT", "ATTGT", "CTTTT", "ATTTTCT", "ATTCTCT", "CTGTTT", "CTCTT", "ATGCT"], + "interruption_gene_orientation": ["ATCCT", "ATCCC", "ATTCC", "ATTTCT", "ATATTCT", "ATTCTTCT", "ATTGT", "TTTCT", "ATTTTCT", "ATTCTCT", "GTTTCT", "CTTCT", "ATGCT"], "locus_structure": [], "benign_min": 10, "benign_max": 32, @@ -628,10 +628,10 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["TTG"], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AAC"], + "interruption_gene_orientation": ["CAA"], "locus_structure": [], "benign_min": 14, "benign_max": 28, @@ -694,10 +694,10 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["TTG", "AGG"], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AAC", "CCT"], + "interruption_gene_orientation": ["CAA", "CCT"], "locus_structure": [], "benign_min": 11, "benign_max": 44, @@ -760,7 +760,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -839,7 +839,7 @@ "pathogenic_motif_gene_orientation": ["CTG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["CCG", "ACT", "CCT", "ACC", "CTT"], + "interruption_gene_orientation": ["CCG", "CTA", "CTC", "CCA", "CTT"], "locus_structure": [ { "motif": "CTA", @@ -912,9 +912,9 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AAATG", "AGAAA", "ATAAG", "TAAAC", "TAACA", "TACAA", "TCAAA", "TGCAA"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AATGG", "AATAG"], + "pathogenic_motif_gene_orientation": ["TGGAA", "TAGAA"], "benign_motif_gene_orientation": [], - "unknown_motif_gene_orientation": ["AAAAA", "AAAAC", "AAATG", "AAAAG", "AAGAT", "AAACT", "AACAT", "AATAC", "AAATC", "AATGC"], + "unknown_motif_gene_orientation": ["AAAAA", "AAAAC", "AAATG", "AGAAA", "ATAAG", "TAAAC", "TAACA", "TACAA", "TCAAA", "TGCAA"], "interruption_gene_orientation": [], "locus_structure": [], "benign_min": null, @@ -978,7 +978,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CCGGGG"], + "pathogenic_motif_gene_orientation": ["GGGGCC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -1044,7 +1044,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -1176,7 +1176,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC"], + "pathogenic_motif_gene_orientation": ["GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -1249,7 +1249,7 @@ "interruption_reference_orientation": [], "pathogenic_motif_gene_orientation": ["CCTG"], "benign_motif_gene_orientation": [], - "unknown_motif_gene_orientation": ["CTGT"], + "unknown_motif_gene_orientation": ["TCTG"], "interruption_gene_orientation": [], "locus_structure": [ { @@ -1328,7 +1328,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ACG"], + "pathogenic_motif_gene_orientation": ["GAC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -1527,11 +1527,11 @@ "location_in_gene": "Intron 1 (most isoforms)", "gene_strand": "-", "reference_motif_reference_orientation": ["AAAAT"], - "pathogenic_motif_reference_orientation": ["GAAAT"], + "pathogenic_motif_reference_orientation": ["AAATG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["AAAAA"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["CATTT"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": ["TTTTT"], "interruption_gene_orientation": [], @@ -1603,7 +1603,7 @@ "location_in_gene": "5' UTR", "gene_strand": "+", "reference_motif_reference_orientation": ["GGC"], - "pathogenic_motif_reference_orientation": ["GGC"], + "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -1673,7 +1673,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AAG"], + "pathogenic_motif_gene_orientation": ["GAA"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -1815,7 +1815,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ACAGCGAGGTCGGCAGCGGC"], + "pathogenic_motif_gene_orientation": ["TCGGCAGCGGCACAGCGAGG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -1877,7 +1877,7 @@ "location_in_gene": "5' UTR", "gene_strand": "-", "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -1944,13 +1944,13 @@ "gene_strand": "-", "reference_motif_reference_orientation": ["GAA"], "pathogenic_motif_reference_orientation": ["GAA"], - "benign_motif_reference_orientation": ["GGA", "GCA"], + "benign_motif_reference_orientation": ["GGA", "CAG"], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["GAG", "GAAGGA", "GAAGAAAGAA", "GAAAAGAAGAAGGAAGAAGGAA", "GAAAAGAAGAAGGAA", "GCAGAAGAAGAAGAA"], - "pathogenic_motif_gene_orientation": ["CTT"], - "benign_motif_gene_orientation": ["CCT", "CTG"], + "pathogenic_motif_gene_orientation": ["TTC"], + "benign_motif_gene_orientation": ["TCC", "CTG"], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["CCT", "CCTTCT", "CTTCTTCTTT", "CCTTCTTCCTTCTTCTTTTCTT", "CCTTCTTCTTTTCTT", "CTGCTTCTTCTTCTT"], + "interruption_gene_orientation": ["CTC", "TCCTTC", "TTCTTTCTTC", "TTCCTTCTTCCTTCTTCTTTTC", "TTCCTTCTTCTTTTC", "TTCTTCTTCTTCTGC"], "locus_structure": [], "benign_min": 8, "benign_max": 179, @@ -2075,11 +2075,11 @@ "location_in_gene": "Coding Exon 1", "gene_strand": "-", "reference_motif_reference_orientation": ["NGC"], - "pathogenic_motif_reference_orientation": ["NGC"], + "pathogenic_motif_reference_orientation": ["GCN"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["NGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2145,7 +2145,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AAG"], + "pathogenic_motif_gene_orientation": ["GAA"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2283,11 +2283,11 @@ "location_in_gene": "5' UTR", "gene_strand": "+", "reference_motif_reference_orientation": ["GCA"], - "pathogenic_motif_reference_orientation": ["GCA"], + "pathogenic_motif_reference_orientation": ["CAG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2420,11 +2420,11 @@ "location_in_gene": "Coding Exon 1", "gene_strand": "-", "reference_motif_reference_orientation": ["NGC"], - "pathogenic_motif_reference_orientation": ["NGC"], + "pathogenic_motif_reference_orientation": ["GCN"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["NGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2486,11 +2486,11 @@ "location_in_gene": "Coding Exon 1", "gene_strand": "-", "reference_motif_reference_orientation": ["NGC"], - "pathogenic_motif_reference_orientation": ["NGC"], + "pathogenic_motif_reference_orientation": ["GCN"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["NGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2552,11 +2552,11 @@ "location_in_gene": "Coding Exon 1", "gene_strand": "-", "reference_motif_reference_orientation": ["NGC"], - "pathogenic_motif_reference_orientation": ["NGC"], + "pathogenic_motif_reference_orientation": ["GCN"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["NGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2622,7 +2622,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2688,10 +2688,10 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["CAA"], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AAC"], + "interruption_gene_orientation": ["CAA"], "locus_structure": [ { "motif": "CAG", @@ -2831,7 +2831,7 @@ "location_in_gene": "5' UTR", "gene_strand": "-", "reference_motif_reference_orientation": ["CGC"], - "pathogenic_motif_reference_orientation": ["CGC"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -2901,9 +2901,9 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["ATGTT", "TAGTT", "TTTTG", "TTTTT"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": [], - "unknown_motif_gene_orientation": ["ATGTT", "AGTTT", "GTTTT", "TTTTT"], + "unknown_motif_gene_orientation": ["ATGTT", "TAGTT", "TTTTG", "TTTTT"], "interruption_gene_orientation": [], "locus_structure": [ { @@ -2977,7 +2977,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AAAC"], + "pathogenic_motif_gene_orientation": ["CAAA"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3043,8 +3043,8 @@ "benign_motif_reference_orientation": ["GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA"], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG"], - "benign_motif_gene_orientation": ["ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG"], + "pathogenic_motif_gene_orientation": ["GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA"], + "benign_motif_gene_orientation": ["GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA"], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], "locus_structure": [], @@ -3109,7 +3109,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CCGGG"], + "pathogenic_motif_gene_orientation": ["GGGCC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3171,14 +3171,14 @@ "location_in_gene": "Coding Exon 1/Intron 1 depending on transcript", "gene_strand": "+", "reference_motif_reference_orientation": ["GCG"], - "pathogenic_motif_reference_orientation": ["GCG"], + "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["GCA", "GCT", "GCC"], + "interruption_reference_orientation": ["CAG", "CTG", "CCG"], "pathogenic_motif_gene_orientation": ["CGG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AGC", "CTG", "CCG"], + "interruption_gene_orientation": ["CAG", "CTG", "CCG"], "locus_structure": [], "benign_min": 6, "benign_max": 10, @@ -3241,10 +3241,10 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["GGCTG", "GGCCCTG", "GGCCG", "GGCCTT"], - "pathogenic_motif_gene_orientation": ["CCTGGG"], + "pathogenic_motif_gene_orientation": ["GGCCTG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["CTGGG", "CCCTGGG", "CCGGG", "CCTTGG"], + "interruption_gene_orientation": ["GGCTG", "GGCCCTG", "GGCCG", "GGCCTT"], "locus_structure": [ { "motif": "GGCCTG", @@ -3313,14 +3313,14 @@ "location_in_gene": "5' UTR", "gene_strand": "+", "reference_motif_reference_orientation": ["GGC"], - "pathogenic_motif_reference_orientation": ["GGC"], + "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["GGA", "AGC"], + "interruption_reference_orientation": ["GGA", "CAG"], "pathogenic_motif_gene_orientation": ["CGG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AGG", "AGC"], + "interruption_gene_orientation": ["GGA", "CAG"], "locus_structure": [], "benign_min": 7, "benign_max": 37, @@ -3379,7 +3379,7 @@ "location_in_gene": "Exon 1 of lncRNA (noncoding)", "gene_strand": "+", "reference_motif_reference_orientation": ["GGC"], - "pathogenic_motif_reference_orientation": ["GGC"], + "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -3449,7 +3449,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3515,7 +3515,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["NGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3581,7 +3581,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AAAGGAACCATCCAGACCGGCGTGGACACCAGTAAGACTGTCCTAACAGGTACCAAGGACACCGTCTGTAGTGGGGTGACTGGTGCCATGAATGTGGCC"], + "pathogenic_motif_gene_orientation": ["GTAAGACTGTCCTAACAGGTACCAAGGACACCGTCTGTAGTGGGGTGACTGGTGCCATGAATGTGGCCAAAGGAACCATCCAGACCGGCGTGGACACCA"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3643,11 +3643,11 @@ "location_in_gene": "Coding Exon 2", "gene_strand": "-", "reference_motif_reference_orientation": ["GCT"], - "pathogenic_motif_reference_orientation": ["GCT"], + "pathogenic_motif_reference_orientation": ["CTG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3724,11 +3724,11 @@ "location_in_gene": "5' UTR", "gene_strand": "-", "reference_motif_reference_orientation": ["GCT"], - "pathogenic_motif_reference_orientation": ["GCT"], + "pathogenic_motif_reference_orientation": ["CTG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3790,7 +3790,7 @@ "location_in_gene": "Coding Exon 5", "gene_strand": "+", "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -3860,7 +3860,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGCCTCATGGTGGTGGCTGGGGGC"], + "pathogenic_motif_gene_orientation": ["CCTCATGGTGGTGGCTGGGGGCAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3936,10 +3936,10 @@ "benign_motif_reference_orientation": ["TTTTA"], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["GGGGT", "GGGAT"], - "pathogenic_motif_gene_orientation": ["ATTTC"], - "benign_motif_gene_orientation": ["ATTTT"], + "pathogenic_motif_gene_orientation": ["TTTCA"], + "benign_motif_gene_orientation": ["TTTTA"], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["GGGGT", "ATGGG"], + "interruption_gene_orientation": ["GGGGT", "GGGAT"], "locus_structure": [ { "motif": "TTTTA", @@ -4012,9 +4012,9 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["TTTTT", "TTATG"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": [], - "unknown_motif_gene_orientation": ["TTTTT", "ATGTT"], + "unknown_motif_gene_orientation": ["TTTTT", "TTATG"], "interruption_gene_orientation": [], "locus_structure": [ { @@ -4088,9 +4088,9 @@ "benign_motif_reference_orientation": ["AAAAG", "AAAGGG"], "unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AACGG", "AAGAC", "AAGGT", "AGGGG", "AAGAG", "AAAAGG", "AAACG", "AACAG", "AGGTG", "ACGGG", "AAAAAG", "AAGGC"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CCCTT", "CCTGT", "CCTTT", "CCCTG"], + "pathogenic_motif_gene_orientation": ["CCCTT", "CCTGT", "CCTTT", "GCCCT"], "benign_motif_gene_orientation": ["CTTTT", "CCCTTT"], - "unknown_motif_gene_orientation": ["TTTTT", "GTTTT", "CCGTT", "CTTGT", "ACCTT", "CCCCT", "CTCTT", "CCTTTT", "CGTTT", "CTGTT", "ACCTC", "CCCGT", "CTTTTT", "CCTTG"], + "unknown_motif_gene_orientation": ["TTTTT", "GTTTT", "CCGTT", "GTCTT", "ACCTT", "CCCCT", "CTCTT", "CCTTTT", "CGTTT", "CTGTT", "CACCT", "CCCGT", "CTTTTT", "GCCTT"], "interruption_gene_orientation": [], "locus_structure": [ { @@ -4160,14 +4160,14 @@ "location_in_gene": "5' UTR", "gene_strand": "-", "reference_motif_reference_orientation": ["GGC"], - "pathogenic_motif_reference_orientation": ["GGC"], + "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["TGG", "CGT", "AGG"], "pathogenic_motif_gene_orientation": ["CCG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["ACC", "ACG", "CCT"], + "interruption_gene_orientation": ["CCA", "ACG", "CCT"], "locus_structure": [], "benign_min": 6, "benign_max": 16, @@ -4230,7 +4230,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -4292,13 +4292,13 @@ "location_in_gene": "Intron 4/4", "gene_strand": "-", "reference_motif_reference_orientation": ["TAAAA"], - "pathogenic_motif_reference_orientation": ["TGAAA"], + "pathogenic_motif_reference_orientation": ["AAATG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["AAAAA", "TAAAC", "TAACA", "TACAA", "TACAC"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["CATTT"], "benign_motif_gene_orientation": [], - "unknown_motif_gene_orientation": ["TTTTT", "AGTTT", "ATGTT", "ATTGT", "AGTGT"], + "unknown_motif_gene_orientation": ["TTTTT", "GTTTA", "TGTTA", "TTGTA", "GTGTA"], "interruption_gene_orientation": [], "locus_structure": [ { @@ -4368,11 +4368,11 @@ "location_in_gene": "Coding Exon 1", "gene_strand": "-", "reference_motif_reference_orientation": ["NGC"], - "pathogenic_motif_reference_orientation": ["NGC"], + "pathogenic_motif_reference_orientation": ["GCN"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["NGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -4438,9 +4438,9 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AAACC", "AAACG", "AAACT", "AACTC", "AACTG", "AATAC", "AATAG", "ATAAC"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["CATTT"], "benign_motif_gene_orientation": [], - "unknown_motif_gene_orientation": ["TTTTT", "GTTTT", "GGTTT", "CGTTT", "AGTTT", "AGTTG", "AGTTC", "ATTGT", "ATTCT", "ATGTT"], + "unknown_motif_gene_orientation": ["TTTTT", "GTTTT", "GGTTT", "CGTTT", "AGTTT", "GAGTT", "CAGTT", "GTATT", "CTATT", "GTTAT"], "interruption_gene_orientation": [], "locus_structure": [ { @@ -4576,7 +4576,7 @@ "location_in_gene": "5' UTR", "gene_strand": "-", "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -4646,10 +4646,10 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["CAA"], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AAC"], + "interruption_gene_orientation": ["CAA"], "locus_structure": [], "benign_min": 25, "benign_max": 40, @@ -4712,7 +4712,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -4844,10 +4844,10 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["CAA"], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AAC"], + "interruption_gene_orientation": ["CAA"], "locus_structure": [], "benign_min": 20, "benign_max": 38, @@ -4910,7 +4910,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["TTTTT"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": ["TTTTT"], "interruption_gene_orientation": [], @@ -5052,7 +5052,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGCGGCGCGG"], + "pathogenic_motif_gene_orientation": ["GGCGCGGAGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -5114,7 +5114,7 @@ "location_in_gene": "5' promoter region. Note, it can also be annotated coding or introntic depending on the reference, due to missing sequences in some reference genomes.", "gene_strand": "-", "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -5199,9 +5199,9 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["TTTTT", "TGTTA"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": [], - "unknown_motif_gene_orientation": ["TTTTT", "ATGTT"], + "unknown_motif_gene_orientation": ["TTTTT", "TGTTA"], "interruption_gene_orientation": [], "locus_structure": [ { @@ -5271,7 +5271,7 @@ "location_in_gene": "Coding, Last Exon (exon number is transcript dependent)", "gene_strand": "-", "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -5341,7 +5341,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -5407,7 +5407,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -5469,7 +5469,7 @@ "location_in_gene": "Intron 1", "gene_strand": "+", "reference_motif_reference_orientation": ["GCG"], - "pathogenic_motif_reference_orientation": ["GCG"], + "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed b/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed index 41f2fcb5..7b23e014 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed @@ -1,58 +1,58 @@ chr1 870158 870178 ID=HMNR7_VWA1;MOTIFS=GGCGCGGAGC;STRUC= -chr1 57245935 57245973 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT;STRUC= -chr1 94266544 94266567 ID=OPDM5_ABCD3;MOTIFS=GCC;STRUC= -chr1 148519695 148519738 ID=NIID_NOTCH2NLC;MOTIFS=GGC;STRUC= +chr1 57245935 57245973 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT,AAATG;STRUC= +chr1 94266544 94266567 ID=OPDM5_ABCD3;MOTIFS=CCG,GCC;STRUC= +chr1 148519695 148519738 ID=NIID_NOTCH2NLC;MOTIFS=CGG,GGC;STRUC= chr1 154328121 154330802 ID=ADTKD_MUC1;MOTIFS=GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA;STRUC= chr1 155728131 155728159 ID=NME_NAXE;MOTIFS=GGGCC;STRUC= chr2 96703674 96703732 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT;STRUC= -chr2 100563685 100563738 ID=FRA2A_AFF3;MOTIFS=GCC;STRUC= +chr2 100563685 100563738 ID=FRA2A_AFF3;MOTIFS=CCG,GCC;STRUC= chr2 176581179 176581224 ID=SD5_HOXD13;MOTIFS=GCN;STRUC= -chr2 191369982 191370024 ID=GDPAG_GLS;MOTIFS=GCA;STRUC= +chr2 191369982 191370024 ID=GDPAG_GLS;MOTIFS=CAG,GCA;STRUC= chr3 63956302 63956345 ID=SCA7_ATXN7;MOTIFS=CAG,CCG;STRUC= chr3 131917482 131917635 ID=DM2_CNBP;MOTIFS=CAGG,CAGA,CA;STRUC= -chr3 141687011 141687054 ID=BPES_FOXL2;MOTIFS=NGC;STRUC= +chr3 141687011 141687054 ID=BPES_FOXL2;MOTIFS=GCN,NGC;STRUC= chr3 186521667 186521706 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA;STRUC= chr4 3073603 3073723 ID=HD_HTT;MOTIFS=CAG,CCG;STRUC= chr4 39318077 39318136 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,AGGGC,AAAGGG;STRUC= chr4 41719745 41719805 ID=CCHS_PHOX2B;MOTIFS=GCN;STRUC= chr4 162693303 162693405 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA;STRUC= chr5 10295525 10295593 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA;STRUC= -chr5 147414733 147414780 ID=SCA12_PPP2R2B;MOTIFS=GCT;STRUC= -chr5 178096748 178096792 ID=OPDM_FAM193B;MOTIFS=GCC;STRUC= -chr6 13201716 13201843 ID=OPDM_TBC1D7;MOTIFS=GCC;STRUC= +chr5 147414733 147414780 ID=SCA12_PPP2R2B;MOTIFS=CTG,GCT;STRUC= +chr5 178096748 178096792 ID=OPDM_FAM193B;MOTIFS=CCG,GCC;STRUC= +chr6 13201716 13201843 ID=OPDM_TBC1D7;MOTIFS=CCG,GCC;STRUC= chr6 16200188 16200282 ID=SCA1_ATXN1;MOTIFS=CTG;STRUC= chr6 45257567 45257618 ID=CCD_RUNX2;MOTIFS=GCN;STRUC= chr6 171935458 171935569 ID=SCA17_TBP;MOTIFS=CAG;STRUC= -chr7 27335684 27335720 ID=HFG_HOXA13-III;MOTIFS=NGC;STRUC= -chr7 27335813 27335849 ID=HFG_HOXA13-II;MOTIFS=NGC;STRUC= -chr7 27335912 27335954 ID=HFG_HOXA13-I;MOTIFS=NGC;STRUC= -chr7 56047900 56047939 ID=FRA7A_ZNF713;MOTIFS=GCG;STRUC= -chr8 105716409 105716441 ID=OPDM1_LRP12;MOTIFS=CGC;STRUC= -chr8 119495247 119495353 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA;STRUC= +chr7 27335684 27335720 ID=HFG_HOXA13-III;MOTIFS=GCN,NGC;STRUC= +chr7 27335813 27335849 ID=HFG_HOXA13-II;MOTIFS=GCN,NGC;STRUC= +chr7 27335912 27335954 ID=HFG_HOXA13-I;MOTIFS=GCN,NGC;STRUC= +chr7 56047900 56047939 ID=FRA7A_ZNF713;MOTIFS=CGG,GCG;STRUC= +chr8 105716409 105716441 ID=OPDM1_LRP12;MOTIFS=CCG,CGC;STRUC= +chr8 119495247 119495353 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA,AAATG;STRUC= chr9 27584063 27584155 ID=FTDALS1_C9orf72;MOTIFS=GGCCCC;STRUC= chr9 81210818 81210861 ID=FRDA_FXN;MOTIFS=A,GAA;STRUC= -chr9 142886568 142886595 ID=HSAN-VIII_PRDM12;MOTIFS=GCC;STRUC= +chr9 142886568 142886595 ID=HSAN-VIII_PRDM12;MOTIFS=CCG,GCC;STRUC= chr9 145285333 145285861 ID=MODY8_CEL;MOTIFS=GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG;STRUC= -chr10 80695718 80695748 ID=OPML1_NUTM2B-AS1;MOTIFS=GGC;STRUC= +chr10 80695718 80695748 ID=OPML1_NUTM2B-AS1;MOTIFS=CGG,GGC;STRUC= chr11 119226662 119226696 ID=JBS_CBL;MOTIFS=CGG;STRUC= chr12 6947903 6947941 ID=DRPLA_ATN1;MOTIFS=CAG;STRUC= -chr12 50468095 50468118 ID=FRA12A_DIP2B;MOTIFS=GGC;STRUC= +chr12 50468095 50468118 ID=FRA12A_DIP2B;MOTIFS=CGG,GGC;STRUC= chr12 111575873 111575940 ID=SCA2_ATXN2;MOTIFS=CTG;STRUC= -chr12 123532573 123532603 ID=OPDM4_RILPL1;MOTIFS=GGC;STRUC= +chr12 123532573 123532603 ID=OPDM4_RILPL1;MOTIFS=CGG,GGC;STRUC= chr13 69361213 69361270 ID=SCA8_ATXN8OS;MOTIFS=CTA,CTG;STRUC= chr13 99196358 99196404 ID=HPE5_ZIC2;MOTIFS=GCN;STRUC= -chr13 101377549 101377792 ID=SCA27B_FGF14;MOTIFS=GAA,GGA,GCA;STRUC= +chr13 101377549 101377792 ID=SCA27B_FGF14;MOTIFS=GAA,GGA,CAG;STRUC= chr14 17522488 17522519 ID=OPMD_PABPN1;MOTIFS=GCN;STRUC= chr14 86300519 86300603 ID=SCA3_ATXN3;MOTIFS=CTG;STRUC= -chr15 20458510 20458536 ID=ALS1_NIPA1;MOTIFS=GCG;STRUC= +chr15 20458510 20458536 ID=ALS1_NIPA1;MOTIFS=CGG,GCG;STRUC= chr15 32225152 32225178 ID=aFTLD-U_GOLGA8A;MOTIFS=CT,TTTC,CCTT,CCCTCT;STRUC= chr15 86324038 86324057 ID=CHNG3_MIR7-2;MOTIFS=TTTG;STRUC= -chr15 87088402 87088452 ID=CPEO_POLG;MOTIFS=GCT,GTT;STRUC= -chr16 17477909 17478002 ID=DBQD2_XYLT1;MOTIFS=GCC;STRUC= +chr15 87088402 87088452 ID=CPEO_POLG;MOTIFS=GCT,GTT,CTG;STRUC= +chr16 17477909 17478002 ID=DBQD2_XYLT1;MOTIFS=GCC,CCG;STRUC= chr16 24890366 24890430 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA;STRUC= chr16 72284666 72284761 ID=SCA31_BEAN1;MOTIFS=TGGAA,TAGAA,AATAA;STRUC= chr16 73638636 73638724 ID=SCA_THAP11;MOTIFS=CAG;STRUC= -chr16 78605502 78605569 ID=SCA4_ZFHX3;MOTIFS=GCC;STRUC= +chr16 78605502 78605569 ID=SCA4_ZFHX3;MOTIFS=CCG,GCC;STRUC= chr16 93675723 93675776 ID=HDL2_JPH3;MOTIFS=CTG;STRUC= chr17 17754961 17755053 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA;STRUC= chr17 81047404 81047534 ID=RCPS_EIF4A3;MOTIFS=CCTCGCTGTGCCGCTGCCGA;STRUC= @@ -69,12 +69,12 @@ chr21 42132054 42132091 ID=EPM1_CSTB;MOTIFS=CGCGGGGCGGGG;STRUC= chr22 20143615 20143660 ID=TOF_TBX1;MOTIFS=GCN;STRUC= chr22 38781587 38781680 ID=EPM_CSNK1E;MOTIFS=CCG;STRUC= chr22 46280059 46280134 ID=SCA10_ATXN10;MOTIFS=ATTCT;STRUC= -chrX 24597766 24597802 ID=PRTS_ARX;MOTIFS=NGC;STRUC= -chrX 24597886 24597934 ID=EIEE1_ARX;MOTIFS=NGC;STRUC= +chrX 24597766 24597802 ID=PRTS_ARX;MOTIFS=GCN,NGC;STRUC= +chrX 24597886 24597934 ID=EIEE1_ARX;MOTIFS=GCN,NGC;STRUC= chrX 30882677 30882751 ID=DMD_DMD;MOTIFS=TTC,T;STRUC= -chrX 65975147 65975250 ID=SBMA_AR;MOTIFS=GCA;STRUC= +chrX 65975147 65975250 ID=SBMA_AR;MOTIFS=CAG,GCA;STRUC= chrX 69887153 69887230 ID=XDP_TAF1;MOTIFS=AGAGGG;STRUC= chrX 135876774 135876804 ID=VACTERLX_ZIC3;MOTIFS=GCN;STRUC= -chrX 138816203 138816248 ID=XLID_SOX3;MOTIFS=NGC;STRUC= +chrX 138816203 138816248 ID=XLID_SOX3;MOTIFS=GCN,NGC;STRUC= chrX 146176677 146176769 ID=FXS_FMR1;MOTIFS=CGG;STRUC= -chrX 146765190 146765342 ID=FRAXE_AFF2;MOTIFS=GCC;STRUC= +chrX 146765190 146765342 ID=FRAXE_AFF2;MOTIFS=CCG,GCC;STRUC= diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed b/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed index ca754b45..8114fb59 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed @@ -1,20 +1,20 @@ #chrom start stop motif motif_len id chr1 870158 870178 GGCGCGGAGC 10 HMNR7_VWA1 chr1 57245970 57245973 GAAAT 5 SCA37_DAB1 -chr1 94266544 94266567 GCC 3 OPDM5_ABCD3 -chr1 148519695 148519738 GGC 3 NIID_NOTCH2NLC +chr1 94266544 94266567 CCG 3 OPDM5_ABCD3 +chr1 148519695 148519738 CGG 3 NIID_NOTCH2NLC chr1 154328121 154330802 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA 61 ADTKD_MUC1 chr1 155728131 155728159 GGGCC 5 NME_NAXE chr2 96703674 96703677 AAATG 5 FAME2_STARD7 -chr2 100563685 100563738 GCC 3 FRA2A_AFF3 +chr2 100563685 100563738 CCG 3 FRA2A_AFF3 chr2 176581179 176581224 GCN 3 SD5_HOXD13 -chr2 191369982 191370024 GCA 3 GDPAG_GLS +chr2 191369982 191370024 CAG 3 GDPAG_GLS chr3 63956302 63956333 CAG 3 SCA7_ATXN7 chr3 63956333 63956345 CCG 3 SCA7_ATXN7_flank chr3 131917482 131917557 CAGG 4 DM2_CNBP chr3 131917557 131917597 CAGA 4 DM2_CNBP_flank chr3 131917597 131917635 CA 2 DM2_CNBP_flank -chr3 141687011 141687054 NGC 3 BPES_FOXL2 +chr3 141687011 141687054 GCN 3 BPES_FOXL2 chr3 186521702 186521706 TTTCA 5 FAME4_YEATS2 chr4 3073603 3073681 CAG 3 HD_HTT chr4 3073687 3073723 CCG 3 HD_HTT_flank @@ -22,36 +22,36 @@ chr4 39318132 39318136 AAGGG 5 CANVAS_RFC1 chr4 41719745 41719805 GCN 3 CCHS_PHOX2B chr4 162693388 162693405 TTTCA 5 FAME7_RAPGEF2 chr5 10295585 10295593 TTTCA 5 FAME3_MARCHF6 -chr5 147414733 147414780 GCT 3 SCA12_PPP2R2B -chr5 178096748 178096792 GCC 3 OPDM_FAM193B -chr6 13201716 13201843 GCC 3 OPDM_TBC1D7 +chr5 147414733 147414780 CTG 3 SCA12_PPP2R2B +chr5 178096748 178096792 CCG 3 OPDM_FAM193B +chr6 13201716 13201843 CCG 3 OPDM_TBC1D7 chr6 16200188 16200282 CTG 3 SCA1_ATXN1 chr6 45257567 45257618 GCN 3 CCD_RUNX2 chr6 171935458 171935569 CAG 3 SCA17_TBP -chr7 27335684 27335720 NGC 3 HFG_HOXA13-III -chr7 27335813 27335849 NGC 3 HFG_HOXA13-II -chr7 27335912 27335954 NGC 3 HFG_HOXA13-I -chr7 56047900 56047939 GCG 3 FRA7A_ZNF713 -chr8 105716409 105716441 CGC 3 OPDM1_LRP12 +chr7 27335684 27335720 GCN 3 HFG_HOXA13-III +chr7 27335813 27335849 GCN 3 HFG_HOXA13-II +chr7 27335912 27335954 GCN 3 HFG_HOXA13-I +chr7 56047900 56047939 CGG 3 FRA7A_ZNF713 +chr8 105716409 105716441 CCG 3 OPDM1_LRP12 chr8 119495347 119495353 TGAAA 5 FAME1_SAMD12 chr9 27584063 27584155 GGCCCC 6 FTDALS1_C9orf72 chr9 81210818 81210834 A 1 FRDA_FXN_flank chr9 81210834 81210861 GAA 3 FRDA_FXN -chr9 142886568 142886595 GCC 3 HSAN-VIII_PRDM12 +chr9 142886568 142886595 CCG 3 HSAN-VIII_PRDM12 chr9 145285333 145285861 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG 32 MODY8_CEL -chr10 80695718 80695748 GGC 3 OPML1_NUTM2B-AS1 +chr10 80695718 80695748 CGG 3 OPML1_NUTM2B-AS1 chr11 119226662 119226696 CGG 3 JBS_CBL chr12 6947903 6947941 CAG 3 DRPLA_ATN1 -chr12 50468095 50468118 GGC 3 FRA12A_DIP2B +chr12 50468095 50468118 CGG 3 FRA12A_DIP2B chr12 111575873 111575940 CTG 3 SCA2_ATXN2 -chr12 123532573 123532603 GGC 3 OPDM4_RILPL1 +chr12 123532573 123532603 CGG 3 OPDM4_RILPL1 chr13 69361213 69361243 CTA 3 SCA8_ATXN8OS_flank chr13 69361243 69361270 CTG 3 SCA8_ATXN8OS chr13 99196358 99196404 GCN 3 HPE5_ZIC2 chr13 101377549 101377792 GAA 3 SCA27B_FGF14 chr14 17522488 17522519 GCN 3 OPMD_PABPN1 chr14 86300519 86300603 CTG 3 SCA3_ATXN3 -chr15 20458510 20458536 GCG 3 ALS1_NIPA1 +chr15 20458510 20458536 CGG 3 ALS1_NIPA1 chr15 32225152 32225178 CT 2 aFTLD-U_GOLGA8A chr15 86324038 86324057 TTTG 4 CHNG3_MIR7-2 chr15 87088402 87088408 GCT 3 CPEO_POLG_flank @@ -61,7 +61,7 @@ chr16 17477909 17478002 GCC 3 DBQD2_XYLT1 chr16 24890416 24890430 TTTCA 5 FAME6_TNRC6A chr16 72284666 72284761 TGGAA 5 SCA31_BEAN1 chr16 73638636 73638724 CAG 3 SCA_THAP11 -chr16 78605502 78605569 GCC 3 SCA4_ZFHX3 +chr16 78605502 78605569 CCG 3 SCA4_ZFHX3 chr16 93675723 93675776 CTG 3 HDL2_JPH3 chr17 17755051 17755053 TTTCA 5 FAME8_RAI1 chr17 81047404 81047534 CCTCGCTGTGCCGCTGCCGA 20 RCPS_EIF4A3 @@ -80,13 +80,13 @@ chr21 42132054 42132091 CGCGGGGCGGGG 12 EPM1_CSTB chr22 20143615 20143660 GCN 3 TOF_TBX1 chr22 38781587 38781680 CCG 3 EPM_CSNK1E chr22 46280059 46280134 ATTCT 5 SCA10_ATXN10 -chrX 24597766 24597802 NGC 3 PRTS_ARX -chrX 24597886 24597934 NGC 3 EIEE1_ARX +chrX 24597766 24597802 GCN 3 PRTS_ARX +chrX 24597886 24597934 GCN 3 EIEE1_ARX chrX 30882677 30882743 TTC 3 DMD_DMD chrX 30882743 30882751 T 1 DMD_DMD_flank -chrX 65975147 65975250 GCA 3 SBMA_AR +chrX 65975147 65975250 CAG 3 SBMA_AR chrX 69887153 69887230 AGAGGG 6 XDP_TAF1 chrX 135876774 135876804 GCN 3 VACTERLX_ZIC3 -chrX 138816203 138816248 NGC 3 XLID_SOX3 +chrX 138816203 138816248 GCN 3 XLID_SOX3 chrX 146176677 146176769 CGG 3 FXS_FMR1 -chrX 146765190 146765342 GCC 3 FRAXE_AFF2 +chrX 146765190 146765342 CCG 3 FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed.gz b/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed.gz index b416feff11da9634be3328f752d27aed9a7627ef..4d84dd9f68f14fc057d5f1d7e3d9792cebe3ae3a 100644 GIT binary patch delta 1825 zcmV++2j2L=4yq1+ABzYC000000RIL6LPG)o4F`pn+m7S75kQ}7|D^#|6|avpWyzkg zWJO|6Jo7><7FYyGkO2Gt4?&TX+npI8gF*AyKB9On7XSFiUkKhQG`<$j!!)HSO;el_ zpn}W1Y_5F!d&J-V5Z5@;COCBf!xUr8U|>&?T^Zu3n?aL*wMOd>)C^A?4U=^|e-i-^-u>`u*Sk{Pci6z`Zno3am-TGQ zk87gtp+a`f=+Mapl6Bep=1ygtM$}c-fX)zU?;98lz0&(S4`q#~wU4d_Z&@i3D%4Rp zTFSMhZ3P=3rHwbJI`A>4@wQ`Gu4~^t2AJfHhB`P8DFeapW4;Y|p4B z>#m*!blSO~LItnd#*|fi`rAO|@%@&uo1}eE`U$M0XDNb#G0taO^0FlxyX=U(wzh+_ z2zippZ6OEC=^U4%V#k)fM)1@$%p<2DZ*P z;ttOCHE=5>6#t<|Jb+U*?o#^-c0ql%5IK=vrg0e4S&8(-DW0$XTNClyj`1SZd2L zK?pE!?`!;4*4uSCT7t~X$hO0Cp!XDQNii)EB+tW)Z|fe@rKJ139)sacaE=duF!l($GnQdqzluf>tW#{AKEQ#TfK#7oDtnIG z;F^m{8Ozq2PW5a{Jwj6~aU8ju&Ki&CLMNgl*zJQUm-}PD!wkqa1mDOh^<3rCu*RuO zx4nuA$a+Q}YDui*XqPdyb>rBw+*e~?R8eYK53!%skDx#bv}13TZ6$ME!coqDNSS{5 zFDpkr#|g+m`ZAPtTh^cJV6LT8rqz*uRe^q1P#;CmgiOll4gsabRhK`21m|_elJ0jv7(=KEOk>49Bi?>rjs8d$ITe4W?1q<#5{=PqhXUMUkAAZw6q(K8`b z^?X1jGK1w{EGz0D!*zruS4H=K>hS^Ed_<-0eT*LiG6r;77D6RsoYX5PgzST~oYq%S ze(n^i7+=Jn>lLYyWdkV!{aV&&C3D1s%&~4y>v=EZ{c)pidTR)sq9@r;F;bWB=a~0u zYhdMAPTX4hBLTV$<&yJ@Arqw>OMq@e)r^l8YT&@=k?8;nQ5Gslr$R-gFdywjZO9w0t5vAhd)%&iLep+%Q=X#SzGOrR=KJv7mvpWdo(I&+q_eg` zyVS`Xty|tp+#WA~miOHXe9YwedOqWa#>#t=gSmp!vfXMTIuB)-hZjkHYhVyE;Guks z+XJi#&Y*S;$f#rSp5|EM)}`RRL&FW^B#l#zG8P`gT5=q*iO9w~>zwZ7pO>0%G3E1i zd&tFx57~R!gM5%aw%Fr#9m;-x9HO@9taQ1Nv(Kgb*vmK=Z}{|_H5gPUN%@}n$m1;e P2>te76zsFtlOP8mAD^8N delta 1847 zcmV-72gvxU4!{n7ABzYC000000RIL6LPG)oBL{_-&5q-^5rEIN@6rIPioZvivSiO# zvLdl3p1BZ<1r`AkB*4D^At;h^yLSiJ!Jzfo{zQ>fu~_`=Uw1hYHu*uU^+RjodAaAdL7Dg&*?(TEp^*yCXdvJl~*JUr5D8H9X{ z^6B_yN?F`gKgz!d79kG#>rm$RgvUdSBai$f6F}}wU|^ZgWr>e-H5GVegK_9P=%erV zb-rO)-t$ar48 zy{)&_1?eGMq$g;Glw)hv*H-;~FTeli*Z=c zkXd;4lxt601REfwjW?(|u!Sj!7;ih4<+}FOV}MDXXsCnpkTMYbI_BH(_Fks@ZHhj2 z*;^)mt=M-k4sy;DXh@f3;xb=1_eO{7l=oWE0UoB5fg!Euds*+d>E#ue52*nnO!@C@ z#14l3+;;UWpwrF;6)Jeu7N$h)=^q1?$M;*tZj$yv=_jy~k);R*#yFpC$;*~(?6M>B z+S(4zBIHS4Zc8~>PUpBB4Li2%Eq8FF`@-3OQ!M!sSA2@zDq|FNu%Y*LPf?d2o1fRKBd2<~cy;j8O&;;7>#HWs6trsz;@r1ak$$8fwl;dCHQhQvDl& z6l`hT%lSI$*dF7-lPpQO1Z7}~d73g{@fKe4S&8(~*ah$XT-Slyj`9 zSX#?5K?pE!?`!;4*4uSCdV)Gd*{)3*T8z!W9z)mCGE>sJafkr^xpV_8vu2br!TEx9VXSC0?Sra%0{`lyWQyx=mFxK6iU6|9t>~VSfcD-K9x()NuU(|-Y;To+%wcX=%)eD+-I-Bwo<@6<| z9fJ8jeD@`t>w)Kg0rfEHtZg8Ny7rFNE$=06j~C1PZUsKx$^olfQL-Zy*YD!XUzV?kD-pzkMjfg&gC-(Pxs`1Rt|v& zuNL+(add5{IO4d^aDK(ili3NY;>2Aisd!FWb9C}V9-aFC+peqZRF@0?9&Q>kd*iBm z$5vI%&svcOq$F0qDpq!8d+u`&Rk=vech$dgd3cz+wQvh-sJr diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed b/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed index 03c1911e..c79207eb 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed @@ -1,59 +1,59 @@ #chrom start stop id gene reference_motif_reference_orientation pathogenic_motif_reference_orientation pathogenic_min inheritance disease chr1 870158 870178 HMNR7_VWA1 VWA1 GGCGCGGAGC GGCGCGGAGC 1 AR Neuronopathy, distal hereditary motor, autosomal recessive 7 -chr1 57245935 57245973 SCA37_DAB1 DAB1 AAAAT GAAAT 31 AD Spinocerebellar ataxia type 37 -chr1 94266544 94266567 OPDM5_ABCD3 ABCD3 GCC GCC 118 AD Oculopharyngodistal myopathy type 5 -chr1 148519695 148519738 NIID_NOTCH2NLC NOTCH2NLC GGC GGC 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 +chr1 57245935 57245973 SCA37_DAB1 DAB1 AAAAT AAATG 31 AD Spinocerebellar ataxia type 37 +chr1 94266544 94266567 OPDM5_ABCD3 ABCD3 GCC CCG 118 AD Oculopharyngodistal myopathy type 5 +chr1 148519695 148519738 NIID_NOTCH2NLC NOTCH2NLC GGC CGG 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 chr1 154328121 154330802 ADTKD_MUC1 MUC1 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA None AD Autosomal dominant tubulointerstitial kidney disease chr1 155728131 155728159 NME_NAXE NAXE GGGCC GGGCC 200 AR NAXE-related mitochondrial encephalopathy chr2 96703674 96703732 FAME2_STARD7 STARD7 AAAAT AAATG 274 AD Familial adult myoclonic epilepsy 2 -chr2 100563685 100563738 FRA2A_AFF3 AFF3 GCC GCC 300 AD Intellectual disability associated with fragile site FRA2A +chr2 100563685 100563738 FRA2A_AFF3 AFF3 GCC CCG 300 AD Intellectual disability associated with fragile site FRA2A chr2 176581179 176581224 SD5_HOXD13 HOXD13 GCN GCN 22 AD Syndactyly -chr2 191369982 191370024 GDPAG_GLS GLS GCA GCA 680 AR Glutaminase deficiency +chr2 191369982 191370024 GDPAG_GLS GLS GCA CAG 680 AR Glutaminase deficiency chr3 63956302 63956333 SCA7_ATXN7 ATXN7 CAG CAG 37 AD Spinocerebellar ataxia type 7 chr3 131917482 131917557 DM2_CNBP CNBP CAGG CAGG 75 AD Myotonic dystrophy type 2 -chr3 141687011 141687054 BPES_FOXL2 FOXL2 NGC NGC 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis +chr3 141687011 141687054 BPES_FOXL2 FOXL2 NGC GCN 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis chr3 186521667 186521706 FAME4_YEATS2 YEATS2 TTTTA TTTCA 1000 AD Familial adult myoclonic epilepsy 4 chr4 3073603 3073687 HD_HTT HTT CAG CAG 36 AD Huntington disease chr4 39318077 39318136 CANVAS_RFC1 RFC1 AAAAG AAGGG,ACAGG,AAAGG,AGGGC 400 AR Cerebellar ataxia, neuropathy, and vestibular areflexia syndrome chr4 41719745 41719805 CCHS_PHOX2B PHOX2B GCN GCN 26 AD Congenital central hypoventilation syndrome chr4 162693303 162693405 FAME7_RAPGEF2 RAPGEF2 TTTTA TTTCA 60 AD Familial adult myoclonic epilepsy type 7 chr5 10295525 10295593 FAME3_MARCHF6 MARCHF6 TTTTA TTTCA 650 AD Familial adult myoclonic epilepsy type 3 -chr5 147414733 147414780 SCA12_PPP2R2B PPP2R2B GCT GCT 51 AD Spinocerebellar ataxia type 12 -chr5 178096748 178096792 OPDM_FAM193B FAM193B GCC GCC 194 AD Oculopharyngodistal myopathy -chr6 13201716 13201843 OPDM_TBC1D7 TBC1D7 GCC GCC 83 AD Oculopharyngodistal myopathy +chr5 147414733 147414780 SCA12_PPP2R2B PPP2R2B GCT CTG 51 AD Spinocerebellar ataxia type 12 +chr5 178096748 178096792 OPDM_FAM193B FAM193B GCC CCG 194 AD Oculopharyngodistal myopathy +chr6 13201716 13201843 OPDM_TBC1D7 TBC1D7 GCC CCG 83 AD Oculopharyngodistal myopathy chr6 16200188 16200282 SCA1_ATXN1 ATXN1 CTG CTG 39 AD Spinocerebellar ataxia type 1 chr6 45257567 45257618 CCD_RUNX2 RUNX2 GCN GCN 20 AD Cleidocranial dysplasia chr6 171935458 171935569 SCA17_TBP TBP CAG CAG 49 AD Spinocerebellar ataxia type 17 -chr7 27335684 27335720 HFG_HOXA13-III HOXA13 NGC NGC 22 AD Hand-foot-genital syndrome 3 -chr7 27335813 27335849 HFG_HOXA13-II HOXA13 NGC NGC 18 AD Hand-foot-genital syndrome 2 -chr7 27335912 27335954 HFG_HOXA13-I HOXA13 NGC NGC 22 AD Hand-foot-genital syndrome 1 -chr7 56047900 56047939 FRA7A_ZNF713 ZNF713 GCG GCG 450 AD Autism spectrum disorder associated with fragile site FRA7A -chr8 105716409 105716441 OPDM1_LRP12 LRP12 CGC CGC 85 AD Oculopharyngodistal myopathy type 1 -chr8 119495247 119495353 FAME1_SAMD12 SAMD12 TAAAA TGAAA 105 AD Familial adult myoclonic epilepsy type 1 +chr7 27335684 27335720 HFG_HOXA13-III HOXA13 NGC GCN 22 AD Hand-foot-genital syndrome 3 +chr7 27335813 27335849 HFG_HOXA13-II HOXA13 NGC GCN 18 AD Hand-foot-genital syndrome 2 +chr7 27335912 27335954 HFG_HOXA13-I HOXA13 NGC GCN 22 AD Hand-foot-genital syndrome 1 +chr7 56047900 56047939 FRA7A_ZNF713 ZNF713 GCG CGG 450 AD Autism spectrum disorder associated with fragile site FRA7A +chr8 105716409 105716441 OPDM1_LRP12 LRP12 CGC CCG 85 AD Oculopharyngodistal myopathy type 1 +chr8 119495247 119495353 FAME1_SAMD12 SAMD12 TAAAA AAATG 105 AD Familial adult myoclonic epilepsy type 1 chr9 27584063 27584155 FTDALS1_C9orf72 C9orf72 GGCCCC GGCCCC 31 AD Frontotemporal dementia (FTD) and/or amyotrophic lateral sclerosis (ALS) chr9 81210834 81210861 FRDA_FXN FXN GAA GAA 56 AR Friedreich ataxia -chr9 142886568 142886595 HSAN-VIII_PRDM12 PRDM12 GCC GCC 18 AR Hereditary sensory and autonomic neuropathy type VIII +chr9 142886568 142886595 HSAN-VIII_PRDM12 PRDM12 GCC CCG 18 AR Hereditary sensory and autonomic neuropathy type VIII chr9 145285333 145285861 MODY8_CEL CEL GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG None AD Maturity-Onset Diabetes of the Young Type 8 -chr10 80695718 80695748 OPML1_NUTM2B-AS1 NUTM2B-AS1 GGC GGC 161 AD Oculopharyngeal myopathy with leukoencephalopathy 1 +chr10 80695718 80695748 OPML1_NUTM2B-AS1 NUTM2B-AS1 GGC CGG 161 AD Oculopharyngeal myopathy with leukoencephalopathy 1 chr11 119226662 119226696 JBS_CBL CBL CGG CGG 101 AD Jacobsen syndrome (FRAX11B fragile site) chr12 6947903 6947941 DRPLA_ATN1 ATN1 CAG CAG 48 AD Dentatorubral-Pallidoluysian Atrophy -chr12 50468095 50468118 FRA12A_DIP2B DIP2B GGC GGC 273 AD Intellectual developmental disorder, FRA12A type +chr12 50468095 50468118 FRA12A_DIP2B DIP2B GGC CGG 273 AD Intellectual developmental disorder, FRA12A type chr12 111575873 111575940 SCA2_ATXN2 ATXN2 CTG CTG 35 AD,AR Spinocerebellar ataxia type 2 -chr12 123532573 123532603 OPDM4_RILPL1 RILPL1 GGC GGC 120 AD Oculopharyngodistal myopathy type 4 +chr12 123532573 123532603 OPDM4_RILPL1 RILPL1 GGC CGG 120 AD Oculopharyngodistal myopathy type 4 chr13 69361243 69361270 SCA8_ATXN8OS ATXN8OS CTG CTG 71 AD Spinocerebellar ataxia type 8 chr13 99196358 99196404 HPE5_ZIC2 ZIC2 GCN GCN 25 AD Holoprosencephaly-5 chr13 101377549 101377792 SCA27B_FGF14 FGF14 GAA GAA 320 AD Spinocerebellar ataxia 27B chr14 17522488 17522519 OPMD_PABPN1 PABPN1 GCN GCN 12 AD,AR Oculopharyngeal muscular dystrophy chr14 86300519 86300603 SCA3_ATXN3 ATXN3 CTG CTG 60 AD Spinocerebellar ataxia type 3/Machado-Joseph disease -chr15 20458510 20458536 ALS1_NIPA1 NIPA1 GCG GCG 11 AD Amyotrophic lateral sclerosis +chr15 20458510 20458536 ALS1_NIPA1 NIPA1 GCG CGG 11 AD Amyotrophic lateral sclerosis chr15 32225152 32225178 aFTLD-U_GOLGA8A GOLGA8A TTTC CT 190 Atypical frontotemporal lobar degeneration with ubiquitinated inclusions (aFTLD-U) chr15 86324038 86324057 CHNG3_MIR7-2 MIR7-2 TTTG TTTG 3 AD Nongoitrous congenital hypothyroidism-3 -chr15 87088411 87088452 CPEO_POLG POLG GCT GCT None Progressive external ophthalmoplegia, Parkinson's disease -chr16 17477909 17478002 DBQD2_XYLT1 XYLT1 GCC GCC 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 +chr15 87088411 87088452 CPEO_POLG POLG GCT CTG None Progressive external ophthalmoplegia, Parkinson's disease +chr16 17477909 17478002 DBQD2_XYLT1 XYLT1 GCC CCG 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 chr16 24890366 24890430 FAME6_TNRC6A TNRC6A TTTTA TTTCA 1100 AD Familial adult myoclonic epilepsy type 6 chr16 72284666 72284761 SCA31_BEAN1 BEAN1 AATAA TGGAA,TAGAA 110 AD Spinocerebellar ataxia type 31 chr16 73638636 73638724 SCA_THAP11 THAP11 CAG CAG 45 AD Spinocerebellar ataxia 51 -chr16 78605502 78605569 SCA4_ZFHX3 ZFHX3 GCC GCC 46 AD Spinocerebellar ataxia 4 +chr16 78605502 78605569 SCA4_ZFHX3 ZFHX3 GCC CCG 46 AD Spinocerebellar ataxia 4 chr16 93675723 93675776 HDL2_JPH3 JPH3 CTG CTG 40 AD Huntington disease-like 2 chr17 17754961 17755053 FAME8_RAI1 RAI1 TTTTA TTTCA 9 AD Familial adult myoclonic epilepsy type 8 chr17 81047404 81047534 RCPS_EIF4A3 EIF4A3 CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA 14 AR Richieri-Costa-Pereira syndrome @@ -70,12 +70,12 @@ chr21 42132054 42132091 EPM1_CSTB CSTB CGCGGGGCGGGG CGCGGGGCGGGG 30 AR Progressi chr22 20143615 20143660 TOF_TBX1 TBX1 GCN GCN 25 AD Tetralogy of Fallot chr22 38781587 38781680 EPM_CSNK1E CSNK1E CCG CCG 745 AR Progressive Myoclonic Epilepsy and Developmental and Epileptic Encephalopathy chr22 46280059 46280134 SCA10_ATXN10 ATXN10 ATTCT ATTCT 800 AD Spinocerebellar ataxia type 10 -chrX 24597766 24597802 PRTS_ARX ARX NGC NGC 20 XR Partington syndrome -chrX 24597886 24597934 EIEE1_ARX ARX NGC NGC 17 XR Early-infantile epileptic encephalopathy +chrX 24597766 24597802 PRTS_ARX ARX NGC GCN 20 XR Partington syndrome +chrX 24597886 24597934 EIEE1_ARX ARX NGC GCN 17 XR Early-infantile epileptic encephalopathy chrX 30882677 30882743 DMD_DMD DMD TTC TTC 59 XR Duchenne muscular dystrophy -chrX 65975147 65975250 SBMA_AR AR GCA GCA 38 XR Spinal and bulbar muscular atrophy, Kennedy Disease +chrX 65975147 65975250 SBMA_AR AR GCA CAG 38 XR Spinal and bulbar muscular atrophy, Kennedy Disease chrX 69887153 69887230 XDP_TAF1 TAF1 AGAGGG AGAGGG 35 XR X-linked dystonia-parkinsonism (XDP) a.k.a. Dystonia 3, torsion, X-linked (DYT3) chrX 135876774 135876804 VACTERLX_ZIC3 ZIC3 GCN GCN 12 XR X-linked VACTERL syndrome -chrX 138816203 138816248 XLID_SOX3 SOX3 NGC NGC 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) +chrX 138816203 138816248 XLID_SOX3 SOX3 NGC GCN 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) chrX 146176677 146176769 FXS_FMR1 FMR1 CGG CGG 201 XD Fragile X syndrome (FXS), fragile X-associated tremor/ataxia syndrome (FXTAS), and fragile X-associated primary ovarian insufficiency FXPOI/POF1 -chrX 146765190 146765342 FRAXE_AFF2 AFF2 GCC GCC 201 XR Intellectual developmental disorder, Fragile X intellectual disability +chrX 146765190 146765342 FRAXE_AFF2 AFF2 GCC CCG 201 XR Intellectual developmental disorder, Fragile X intellectual disability diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.longTR.bed b/data/catalogs/STRchive-disease-loci.T2T-chm13.longTR.bed index c67fe4e0..66187d67 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.longTR.bed +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.longTR.bed @@ -1,58 +1,58 @@ chr1 870159 870178 GGCGCGGAGC HMNR7_VWA1 -chr1 57245936 57245973 GAAAT,AAAAT SCA37_DAB1 -chr1 94266545 94266567 GCC OPDM5_ABCD3 -chr1 148519696 148519738 GGC NIID_NOTCH2NLC +chr1 57245936 57245973 AAATG,AAAAT SCA37_DAB1 +chr1 94266545 94266567 CCG,GCC OPDM5_ABCD3 +chr1 148519696 148519738 CGG,GGC NIID_NOTCH2NLC chr1 154328122 154330802 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ADTKD_MUC1 chr1 155728132 155728159 GGGCC NME_NAXE chr2 96703675 96703732 AAATG,AAAAT FAME2_STARD7 -chr2 100563686 100563738 GCC FRA2A_AFF3 +chr2 100563686 100563738 CCG,GCC FRA2A_AFF3 chr2 176581180 176581224 GCN SD5_HOXD13 -chr2 191369983 191370024 GCA GDPAG_GLS +chr2 191369983 191370024 CAG,GCA GDPAG_GLS chr3 63956303 63956333 CAG SCA7_ATXN7 chr3 131917483 131917557 CAGG DM2_CNBP -chr3 141687012 141687054 NGC BPES_FOXL2 +chr3 141687012 141687054 GCN,NGC BPES_FOXL2 chr3 186521668 186521706 TTTCA,TTTTA FAME4_YEATS2 chr4 3073604 3073687 CAG HD_HTT chr4 39318078 39318136 AAGGG,ACAGG,AAAGG,AGGGC,AAAAG,AAAGGG CANVAS_RFC1 chr4 41719746 41719805 GCN CCHS_PHOX2B chr4 162693304 162693405 TTTCA,TTTTA FAME7_RAPGEF2 chr5 10295526 10295593 TTTCA,TTTTA FAME3_MARCHF6 -chr5 147414734 147414780 GCT SCA12_PPP2R2B -chr5 178096749 178096792 GCC OPDM_FAM193B -chr6 13201717 13201843 GCC OPDM_TBC1D7 +chr5 147414734 147414780 CTG,GCT SCA12_PPP2R2B +chr5 178096749 178096792 CCG,GCC OPDM_FAM193B +chr6 13201717 13201843 CCG,GCC OPDM_TBC1D7 chr6 16200189 16200282 CTG SCA1_ATXN1 chr6 45257568 45257618 GCN CCD_RUNX2 chr6 171935459 171935569 CAG SCA17_TBP -chr7 27335685 27335720 NGC HFG_HOXA13-III -chr7 27335814 27335849 NGC HFG_HOXA13-II -chr7 27335913 27335954 NGC HFG_HOXA13-I -chr7 56047901 56047939 GCG FRA7A_ZNF713 -chr8 105716410 105716441 CGC OPDM1_LRP12 -chr8 119495248 119495353 TGAAA,TAAAA FAME1_SAMD12 +chr7 27335685 27335720 GCN,NGC HFG_HOXA13-III +chr7 27335814 27335849 GCN,NGC HFG_HOXA13-II +chr7 27335913 27335954 GCN,NGC HFG_HOXA13-I +chr7 56047901 56047939 CGG,GCG FRA7A_ZNF713 +chr8 105716410 105716441 CCG,CGC OPDM1_LRP12 +chr8 119495248 119495353 AAATG,TAAAA FAME1_SAMD12 chr9 27584064 27584155 GGCCCC FTDALS1_C9orf72 chr9 81210835 81210861 GAA FRDA_FXN -chr9 142886569 142886595 GCC HSAN-VIII_PRDM12 +chr9 142886569 142886595 CCG,GCC HSAN-VIII_PRDM12 chr9 145285334 145285861 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG MODY8_CEL -chr10 80695719 80695748 GGC OPML1_NUTM2B-AS1 +chr10 80695719 80695748 CGG,GGC OPML1_NUTM2B-AS1 chr11 119226663 119226696 CGG JBS_CBL chr12 6947904 6947941 CAG DRPLA_ATN1 -chr12 50468096 50468118 GGC FRA12A_DIP2B +chr12 50468096 50468118 CGG,GGC FRA12A_DIP2B chr12 111575874 111575940 CTG SCA2_ATXN2 -chr12 123532574 123532603 GGC OPDM4_RILPL1 +chr12 123532574 123532603 CGG,GGC OPDM4_RILPL1 chr13 69361244 69361270 CTG SCA8_ATXN8OS chr13 99196359 99196404 GCN HPE5_ZIC2 -chr13 101377550 101377792 GAA,GGA,GCA SCA27B_FGF14 +chr13 101377550 101377792 GAA,GGA,CAG SCA27B_FGF14 chr14 17522489 17522519 GCN OPMD_PABPN1 chr14 86300520 86300603 CTG SCA3_ATXN3 -chr15 20458511 20458536 GCG ALS1_NIPA1 +chr15 20458511 20458536 CGG,GCG ALS1_NIPA1 chr15 32225153 32225178 CT,CCTT,CCCTCT,TTTC aFTLD-U_GOLGA8A chr15 86324039 86324057 TTTG CHNG3_MIR7-2 -chr15 87088412 87088452 GCT CPEO_POLG -chr16 17477910 17478002 GCC DBQD2_XYLT1 +chr15 87088412 87088452 CTG,GCT CPEO_POLG +chr16 17477910 17478002 CCG,GCC DBQD2_XYLT1 chr16 24890367 24890430 TTTCA,TTTTA FAME6_TNRC6A chr16 72284667 72284761 TGGAA,TAGAA,AATAA SCA31_BEAN1 chr16 73638637 73638724 CAG SCA_THAP11 -chr16 78605503 78605569 GCC SCA4_ZFHX3 +chr16 78605503 78605569 CCG,GCC SCA4_ZFHX3 chr16 93675724 93675776 CTG HDL2_JPH3 chr17 17754962 17755053 TTTCA,TTTTA FAME8_RAI1 chr17 81047405 81047534 CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 @@ -69,12 +69,12 @@ chr21 42132055 42132091 CGCGGGGCGGGG EPM1_CSTB chr22 20143616 20143660 GCN TOF_TBX1 chr22 38781588 38781680 CCG EPM_CSNK1E chr22 46280060 46280134 ATTCT SCA10_ATXN10 -chrX 24597767 24597802 NGC PRTS_ARX -chrX 24597887 24597934 NGC EIEE1_ARX +chrX 24597767 24597802 GCN,NGC PRTS_ARX +chrX 24597887 24597934 GCN,NGC EIEE1_ARX chrX 30882678 30882743 TTC DMD_DMD -chrX 65975148 65975250 GCA SBMA_AR +chrX 65975148 65975250 CAG,GCA SBMA_AR chrX 69887154 69887230 AGAGGG XDP_TAF1 chrX 135876775 135876804 GCN VACTERLX_ZIC3 -chrX 138816204 138816248 NGC XLID_SOX3 +chrX 138816204 138816248 GCN,NGC XLID_SOX3 chrX 146176678 146176769 CGG FXS_FMR1 -chrX 146765191 146765342 GCC FRAXE_AFF2 +chrX 146765191 146765342 CCG,GCC FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.straglr.bed b/data/catalogs/STRchive-disease-loci.T2T-chm13.straglr.bed index 6eb055bf..268c026a 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.straglr.bed +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.straglr.bed @@ -1,18 +1,18 @@ chr1 870158 870178 GGCGCGGAGC HMNR7_VWA1 HMNR7_VWA1 chr1 57245970 57245973 GAAAT SCA37_DAB1 SCA37_DAB1 -chr1 94266544 94266567 GCC OPDM5_ABCD3 OPDM5_ABCD3 -chr1 148519695 148519738 GGC NIID_NOTCH2NLC NIID_NOTCH2NLC +chr1 94266544 94266567 CCG OPDM5_ABCD3 OPDM5_ABCD3 +chr1 148519695 148519738 CGG NIID_NOTCH2NLC NIID_NOTCH2NLC chr1 155728131 155728159 GGGCC NME_NAXE NME_NAXE chr2 96703674 96703677 AAATG FAME2_STARD7 FAME2_STARD7 -chr2 100563685 100563738 GCC FRA2A_AFF3 FRA2A_AFF3 +chr2 100563685 100563738 CCG FRA2A_AFF3 FRA2A_AFF3 chr2 176581179 176581224 GCN SD5_HOXD13 SD5_HOXD13 -chr2 191369982 191370024 GCA GDPAG_GLS GDPAG_GLS +chr2 191369982 191370024 CAG GDPAG_GLS GDPAG_GLS chr3 63956302 63956333 CAG SCA7_ATXN7 SCA7_ATXN7 chr3 63956333 63956345 CCG SCA7_ATXN7 SCA7_ATXN7_CCG chr3 131917482 131917557 CAGG DM2_CNBP DM2_CNBP chr3 131917557 131917597 CAGA DM2_CNBP DM2_CNBP_CAGA chr3 131917597 131917635 CA DM2_CNBP DM2_CNBP_CA -chr3 141687011 141687054 NGC BPES_FOXL2 BPES_FOXL2 +chr3 141687011 141687054 GCN BPES_FOXL2 BPES_FOXL2 chr3 186521702 186521706 TTTCA FAME4_YEATS2 FAME4_YEATS2 chr4 3073603 3073681 CAG HD_HTT HD_HTT chr4 3073687 3073723 CCG HD_HTT HD_HTT_CCG @@ -20,35 +20,35 @@ chr4 39318132 39318136 AAGGG CANVAS_RFC1 CANVAS_RFC1 chr4 41719745 41719805 GCN CCHS_PHOX2B CCHS_PHOX2B chr4 162693388 162693405 TTTCA FAME7_RAPGEF2 FAME7_RAPGEF2 chr5 10295585 10295593 TTTCA FAME3_MARCHF6 FAME3_MARCHF6 -chr5 147414733 147414780 GCT SCA12_PPP2R2B SCA12_PPP2R2B -chr5 178096748 178096792 GCC OPDM_FAM193B OPDM_FAM193B -chr6 13201716 13201843 GCC OPDM_TBC1D7 OPDM_TBC1D7 +chr5 147414733 147414780 CTG SCA12_PPP2R2B SCA12_PPP2R2B +chr5 178096748 178096792 CCG OPDM_FAM193B OPDM_FAM193B +chr6 13201716 13201843 CCG OPDM_TBC1D7 OPDM_TBC1D7 chr6 16200188 16200282 CTG SCA1_ATXN1 SCA1_ATXN1 chr6 45257567 45257618 GCN CCD_RUNX2 CCD_RUNX2 chr6 171935458 171935569 CAG SCA17_TBP SCA17_TBP -chr7 27335684 27335720 NGC HFG_HOXA13-III HFG_HOXA13-III -chr7 27335813 27335849 NGC HFG_HOXA13-II HFG_HOXA13-II -chr7 27335912 27335954 NGC HFG_HOXA13-I HFG_HOXA13-I -chr7 56047900 56047939 GCG FRA7A_ZNF713 FRA7A_ZNF713 -chr8 105716409 105716441 CGC OPDM1_LRP12 OPDM1_LRP12 +chr7 27335684 27335720 GCN HFG_HOXA13-III HFG_HOXA13-III +chr7 27335813 27335849 GCN HFG_HOXA13-II HFG_HOXA13-II +chr7 27335912 27335954 GCN HFG_HOXA13-I HFG_HOXA13-I +chr7 56047900 56047939 CGG FRA7A_ZNF713 FRA7A_ZNF713 +chr8 105716409 105716441 CCG OPDM1_LRP12 OPDM1_LRP12 chr8 119495347 119495353 TGAAA FAME1_SAMD12 FAME1_SAMD12 chr9 27584063 27584155 GGCCCC FTDALS1_C9orf72 FTDALS1_C9orf72 chr9 81210818 81210834 A FRDA_FXN FRDA_FXN_A chr9 81210834 81210861 GAA FRDA_FXN FRDA_FXN -chr9 142886568 142886595 GCC HSAN-VIII_PRDM12 HSAN-VIII_PRDM12 -chr10 80695718 80695748 GGC OPML1_NUTM2B-AS1 OPML1_NUTM2B-AS1 +chr9 142886568 142886595 CCG HSAN-VIII_PRDM12 HSAN-VIII_PRDM12 +chr10 80695718 80695748 CGG OPML1_NUTM2B-AS1 OPML1_NUTM2B-AS1 chr11 119226662 119226696 CGG JBS_CBL JBS_CBL chr12 6947903 6947941 CAG DRPLA_ATN1 DRPLA_ATN1 -chr12 50468095 50468118 GGC FRA12A_DIP2B FRA12A_DIP2B +chr12 50468095 50468118 CGG FRA12A_DIP2B FRA12A_DIP2B chr12 111575873 111575940 CTG SCA2_ATXN2 SCA2_ATXN2 -chr12 123532573 123532603 GGC OPDM4_RILPL1 OPDM4_RILPL1 +chr12 123532573 123532603 CGG OPDM4_RILPL1 OPDM4_RILPL1 chr13 69361213 69361243 CTA SCA8_ATXN8OS SCA8_ATXN8OS_CTA chr13 69361243 69361270 CTG SCA8_ATXN8OS SCA8_ATXN8OS chr13 99196358 99196404 GCN HPE5_ZIC2 HPE5_ZIC2 chr13 101377549 101377792 GAA SCA27B_FGF14 SCA27B_FGF14 chr14 17522488 17522519 GCN OPMD_PABPN1 OPMD_PABPN1 chr14 86300519 86300603 CTG SCA3_ATXN3 SCA3_ATXN3 -chr15 20458510 20458536 GCG ALS1_NIPA1 ALS1_NIPA1 +chr15 20458510 20458536 CGG ALS1_NIPA1 ALS1_NIPA1 chr15 32225152 32225178 CT aFTLD-U_GOLGA8A aFTLD-U_GOLGA8A chr15 86324038 86324057 TTTG CHNG3_MIR7-2 CHNG3_MIR7-2 chr15 87088402 87088408 GCT CPEO_POLG CPEO_POLG_GCT @@ -58,7 +58,7 @@ chr16 17477909 17478002 GCC DBQD2_XYLT1 DBQD2_XYLT1 chr16 24890416 24890430 TTTCA FAME6_TNRC6A FAME6_TNRC6A chr16 72284666 72284761 TGGAA SCA31_BEAN1 SCA31_BEAN1 chr16 73638636 73638724 CAG SCA_THAP11 SCA_THAP11 -chr16 78605502 78605569 GCC SCA4_ZFHX3 SCA4_ZFHX3 +chr16 78605502 78605569 CCG SCA4_ZFHX3 SCA4_ZFHX3 chr16 93675723 93675776 CTG HDL2_JPH3 HDL2_JPH3 chr17 17755051 17755053 TTTCA FAME8_RAI1 FAME8_RAI1 chr17 81047404 81047534 CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 RCPS_EIF4A3 @@ -77,13 +77,13 @@ chr21 42132054 42132091 CGCGGGGCGGGG EPM1_CSTB EPM1_CSTB chr22 20143615 20143660 GCN TOF_TBX1 TOF_TBX1 chr22 38781587 38781680 CCG EPM_CSNK1E EPM_CSNK1E chr22 46280059 46280134 ATTCT SCA10_ATXN10 SCA10_ATXN10 -chrX 24597766 24597802 NGC PRTS_ARX PRTS_ARX -chrX 24597886 24597934 NGC EIEE1_ARX EIEE1_ARX +chrX 24597766 24597802 GCN PRTS_ARX PRTS_ARX +chrX 24597886 24597934 GCN EIEE1_ARX EIEE1_ARX chrX 30882677 30882743 TTC DMD_DMD DMD_DMD chrX 30882743 30882751 T DMD_DMD DMD_DMD_T -chrX 65975147 65975250 GCA SBMA_AR SBMA_AR +chrX 65975147 65975250 CAG SBMA_AR SBMA_AR chrX 69887153 69887230 AGAGGG XDP_TAF1 XDP_TAF1 chrX 135876774 135876804 GCN VACTERLX_ZIC3 VACTERLX_ZIC3 -chrX 138816203 138816248 NGC XLID_SOX3 XLID_SOX3 +chrX 138816203 138816248 GCN XLID_SOX3 XLID_SOX3 chrX 146176677 146176769 CGG FXS_FMR1 FXS_FMR1 -chrX 146765190 146765342 GCC FRAXE_AFF2 FRAXE_AFF2 +chrX 146765190 146765342 CCG FRAXE_AFF2 FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.stranger.json b/data/catalogs/STRchive-disease-loci.T2T-chm13.stranger.json index cd667006..4e980e5c 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.stranger.json +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.stranger.json @@ -21,7 +21,7 @@ "PathologicRegion": "chr1:57245970-57245973", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GAAAT", + "DisplayRU": "AAATG", "Disease": "SCA37", "NormalMax": 30, "PathologicMin": 31, @@ -30,11 +30,11 @@ { "LocusId": "OPDM5_ABCD3", "ReferenceRegion": "chr1:94266544-94266567", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "OPDM5", "NormalMax": 44, "PathologicMin": 118, @@ -43,11 +43,11 @@ { "LocusId": "NIID_NOTCH2NLC", "ReferenceRegion": "chr1:148519695-148519738", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "NIID", "NormalMax": 37, "PathologicMin": 66, @@ -84,11 +84,11 @@ { "LocusId": "FRA2A_AFF3", "ReferenceRegion": "chr2:100563685-100563738", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "FRA2A", "NormalMax": 20, "PathologicMin": 300, @@ -110,11 +110,11 @@ { "LocusId": "GDPAG_GLS", "ReferenceRegion": "chr2:191369982-191370024", - "LocusStructure": "(GCA)*", + "LocusStructure": "(CAG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GCA", + "DisplayRU": "CAG", "Disease": "GDPAG", "NormalMax": 38, "PathologicMin": 680, @@ -153,11 +153,11 @@ { "LocusId": "BPES_FOXL2", "ReferenceRegion": "chr3:141687011-141687054", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD", "AR"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "BPES", "NormalMax": 14, "PathologicMin": 15, @@ -254,11 +254,11 @@ { "LocusId": "SCA12_PPP2R2B", "ReferenceRegion": "chr5:147414733-147414780", - "LocusStructure": "(GCT)*", + "LocusStructure": "(CTG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCT", + "DisplayRU": "CTG", "Disease": "SCA12", "NormalMax": 32, "PathologicMin": 51, @@ -267,11 +267,11 @@ { "LocusId": "OPDM_FAM193B", "ReferenceRegion": "chr5:178096748-178096792", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "OPDM", "NormalMax": 50, "PathologicMin": 194, @@ -280,11 +280,11 @@ { "LocusId": "OPDM_TBC1D7", "ReferenceRegion": "chr6:13201716-13201843", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "OPDM", "NormalMax": 60, "PathologicMin": 83, @@ -332,11 +332,11 @@ { "LocusId": "HFG_HOXA13-III", "ReferenceRegion": "chr7:27335684-27335720", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "HFG-III", "NormalMax": 18, "PathologicMin": 22, @@ -345,11 +345,11 @@ { "LocusId": "HFG_HOXA13-II", "ReferenceRegion": "chr7:27335813-27335849", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "HFG-II", "NormalMax": 12, "PathologicMin": 18, @@ -358,11 +358,11 @@ { "LocusId": "HFG_HOXA13-I", "ReferenceRegion": "chr7:27335912-27335954", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "HFG-I", "NormalMax": 14, "PathologicMin": 22, @@ -371,11 +371,11 @@ { "LocusId": "FRA7A_ZNF713", "ReferenceRegion": "chr7:56047900-56047939", - "LocusStructure": "(GCG)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCG", + "DisplayRU": "CGG", "Disease": "FRA7A", "NormalMax": 22, "PathologicMin": 450, @@ -384,11 +384,11 @@ { "LocusId": "OPDM1_LRP12", "ReferenceRegion": "chr8:105716409-105716441", - "LocusStructure": "(CGC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "CGC", + "DisplayRU": "CCG", "Disease": "OPDM1", "NormalMax": 45, "PathologicMin": 85, @@ -403,7 +403,7 @@ "PathologicRegion": "chr8:119495347-119495353", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TGAAA", + "DisplayRU": "AAATG", "Disease": "FAME1", "NormalMax": 104, "PathologicMin": 105, @@ -440,11 +440,11 @@ { "LocusId": "HSAN-VIII_PRDM12", "ReferenceRegion": "chr9:142886568-142886595", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "HSAN VIII", "NormalMax": 14, "PathologicMin": 18, @@ -453,11 +453,11 @@ { "LocusId": "OPML1_NUTM2B-AS1", "ReferenceRegion": "chr10:80695718-80695748", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "OPML1", "NormalMax": 16, "PathologicMin": 161, @@ -492,11 +492,11 @@ { "LocusId": "FRA12A_DIP2B", "ReferenceRegion": "chr12:50468095-50468118", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "FRA12A", "NormalMax": 23, "PathologicMin": 273, @@ -518,11 +518,11 @@ { "LocusId": "OPDM4_RILPL1", "ReferenceRegion": "chr12:123532573-123532603", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "OPDM4", "NormalMax": 16, "PathologicMin": 120, @@ -598,11 +598,11 @@ { "LocusId": "ALS1_NIPA1", "ReferenceRegion": "chr15:20458510-20458536", - "LocusStructure": "(GCG)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCG", + "DisplayRU": "CGG", "Disease": "ALS1", "NormalMax": 10, "PathologicMin": 11, @@ -645,7 +645,7 @@ "PathologicRegion": "chr15:87088411-87088452", "HGNCId": null, "InheritanceMode": [], - "DisplayRU": "GCT", + "DisplayRU": "CTG", "Disease": "CPEO", "NormalMax": 10, "PathologicMin": 11, @@ -660,7 +660,7 @@ "PathologicRegion": "chr16:17477909-17478002", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "DBQD2, BSS", "NormalMax": 20, "PathologicMin": 72, @@ -710,11 +710,11 @@ { "LocusId": "SCA4_ZFHX3", "ReferenceRegion": "chr16:78605502-78605569", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "SCA4", "NormalMax": 26, "PathologicMin": 46, @@ -939,11 +939,11 @@ { "LocusId": "PRTS_ARX", "ReferenceRegion": "chrX:24597766-24597802", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "PRTS", "NormalMax": 12, "PathologicMin": 20, @@ -952,11 +952,11 @@ { "LocusId": "EIEE1_ARX", "ReferenceRegion": "chrX:24597886-24597934", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "EIEE1", "NormalMax": 16, "PathologicMin": 17, @@ -980,11 +980,11 @@ { "LocusId": "SBMA_AR", "ReferenceRegion": "chrX:65975147-65975250", - "LocusStructure": "(GCA)*", + "LocusStructure": "(CAG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "GCA", + "DisplayRU": "CAG", "Disease": "SBMA", "NormalMax": 34, "PathologicMin": 38, @@ -1019,11 +1019,11 @@ { "LocusId": "XLID_SOX3", "ReferenceRegion": "chrX:138816203-138816248", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "XLID, PHPX", "NormalMax": 15, "PathologicMin": 22, @@ -1045,11 +1045,11 @@ { "LocusId": "FRAXE_AFF2", "ReferenceRegion": "chrX:146765190-146765342", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "FRAXE", "NormalMax": 39, "PathologicMin": 201, diff --git a/data/catalogs/STRchive-disease-loci.hg19.TRGT.bed b/data/catalogs/STRchive-disease-loci.hg19.TRGT.bed index 04b2193a..6340ba42 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.TRGT.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.TRGT.bed @@ -1,58 +1,58 @@ chr1 1371178 1371198 ID=HMNR7_VWA1;MOTIFS=GGCGCGGAGC;STRUC= -chr1 57832715 57832793 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT;STRUC= -chr1 94883977 94884000 ID=OPDM5_ABCD3;MOTIFS=GCC;STRUC= -chr1 145209323 145209354 ID=NIID_NOTCH2NLC;MOTIFS=GGC;STRUC= +chr1 57832715 57832793 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT,AAATG;STRUC= +chr1 94883977 94884000 ID=OPDM5_ABCD3;MOTIFS=CCG,GCC;STRUC= +chr1 145209323 145209354 ID=NIID_NOTCH2NLC;MOTIFS=CGG,GGC;STRUC= chr1 155160981 155162030 ID=ADTKD_MUC1;MOTIFS=GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA;STRUC= chr1 156561557 156561575 ID=NME_NAXE;MOTIFS=GGGCC;STRUC= chr2 96862804 96862862 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT;STRUC= -chr2 100721260 100721286 ID=FRA2A_AFF3;MOTIFS=GCC;STRUC= +chr2 100721260 100721286 ID=FRA2A_AFF3;MOTIFS=CCG,GCC;STRUC= chr2 176957786 176957831 ID=SD5_HOXD13;MOTIFS=GCN;STRUC= -chr2 191745598 191745646 ID=GDPAG_GLS;MOTIFS=GCA;STRUC= +chr2 191745598 191745646 ID=GDPAG_GLS;MOTIFS=CAG,GCA;STRUC= chr3 63898360 63898403 ID=SCA7_ATXN7;MOTIFS=CAG,CCG;STRUC= chr3 128891419 128891577 ID=DM2_CNBP;MOTIFS=CAGG,CAGA,CA;STRUC= -chr3 138664861 138664904 ID=BPES_FOXL2;MOTIFS=NGC;STRUC= +chr3 138664861 138664904 ID=BPES_FOXL2;MOTIFS=GCN,NGC;STRUC= chr3 183429975 183430014 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA;STRUC= chr4 3076603 3076696 ID=HD_HTT;MOTIFS=CAG,CCG;STRUC= chr4 39350044 39350103 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,AGGGC,AAAGGG;STRUC= chr4 41747989 41748049 ID=CCHS_PHOX2B;MOTIFS=GCN;STRUC= chr4 160263678 160263770 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA;STRUC= chr5 10356455 10356523 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA;STRUC= -chr5 146258290 146258322 ID=SCA12_PPP2R2B;MOTIFS=GCT;STRUC= -chr5 176981490 176981532 ID=OPDM_FAM193B;MOTIFS=GCC;STRUC= -chr6 13328708 13328835 ID=OPDM_TBC1D7;MOTIFS=GCC;STRUC= +chr5 146258290 146258322 ID=SCA12_PPP2R2B;MOTIFS=CTG,GCT;STRUC= +chr5 176981490 176981532 ID=OPDM_FAM193B;MOTIFS=CCG,GCC;STRUC= +chr6 13328708 13328835 ID=OPDM_TBC1D7;MOTIFS=CCG,GCC;STRUC= chr6 16327864 16327955 ID=SCA1_ATXN1;MOTIFS=CTG;STRUC= chr6 45390487 45390538 ID=CCD_RUNX2;MOTIFS=GCN;STRUC= chr6 170870994 170871105 ID=SCA17_TBP;MOTIFS=CAG;STRUC= -chr7 27239297 27239351 ID=HFG_HOXA13-III;MOTIFS=NGC;STRUC= -chr7 27239444 27239480 ID=HFG_HOXA13-II;MOTIFS=NGC;STRUC= -chr7 27239543 27239585 ID=HFG_HOXA13-I;MOTIFS=NGC;STRUC= -chr7 55955293 55955332 ID=FRA7A_ZNF713;MOTIFS=GCG;STRUC= -chr8 105601198 105601227 ID=OPDM1_LRP12;MOTIFS=CGC;STRUC= -chr8 119379051 119379157 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA;STRUC= +chr7 27239297 27239351 ID=HFG_HOXA13-III;MOTIFS=GCN,NGC;STRUC= +chr7 27239444 27239480 ID=HFG_HOXA13-II;MOTIFS=GCN,NGC;STRUC= +chr7 27239543 27239585 ID=HFG_HOXA13-I;MOTIFS=GCN,NGC;STRUC= +chr7 55955293 55955332 ID=FRA7A_ZNF713;MOTIFS=CGG,GCG;STRUC= +chr8 105601198 105601227 ID=OPDM1_LRP12;MOTIFS=CCG,CGC;STRUC= +chr8 119379051 119379157 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA,AAATG;STRUC= chr9 27573482 27573544 ID=FTDALS1_C9orf72;MOTIFS=GGCCCC;STRUC= chr9 71652186 71652220 ID=FRDA_FXN;MOTIFS=A,GAA;STRUC= -chr9 133556992 133557028 ID=HSAN-VIII_PRDM12;MOTIFS=GCC;STRUC= +chr9 133556992 133557028 ID=HSAN-VIII_PRDM12;MOTIFS=CCG,GCC;STRUC= chr9 135946564 135947124 ID=MODY8_CEL;MOTIFS=GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG;STRUC= -chr10 81586139 81586160 ID=OPML1_NUTM2B-AS1;MOTIFS=GGC;STRUC= +chr10 81586139 81586160 ID=OPML1_NUTM2B-AS1;MOTIFS=CGG,GGC;STRUC= chr11 119076999 119077033 ID=JBS_CBL;MOTIFS=CGG;STRUC= chr12 7045879 7045938 ID=DRPLA_ATN1;MOTIFS=CAG;STRUC= -chr12 50898784 50898807 ID=FRA12A_DIP2B;MOTIFS=GGC;STRUC= +chr12 50898784 50898807 ID=FRA12A_DIP2B;MOTIFS=CGG,GGC;STRUC= chr12 112036753 112036823 ID=SCA2_ATXN2;MOTIFS=CTG;STRUC= -chr12 124018267 124018297 ID=OPDM4_RILPL1;MOTIFS=GGC;STRUC= +chr12 124018267 124018297 ID=OPDM4_RILPL1;MOTIFS=CGG,GGC;STRUC= chr13 70713485 70713561 ID=SCA8_ATXN8OS;MOTIFS=CTA,CTG;STRUC= chr13 100637702 100637748 ID=HPE5_ZIC2;MOTIFS=GCN;STRUC= -chr13 102813924 102814076 ID=SCA27B_FGF14;MOTIFS=GAA,GGA,GCA;STRUC= +chr13 102813924 102814076 ID=SCA27B_FGF14;MOTIFS=GAA,GGA,CAG;STRUC= chr14 23790681 23790712 ID=OPMD_PABPN1;MOTIFS=GCN;STRUC= chr14 92537354 92537396 ID=SCA3_ATXN3;MOTIFS=CTG;STRUC= -chr15 23086363 23086389 ID=ALS1_NIPA1;MOTIFS=GCG;STRUC= +chr15 23086363 23086389 ID=ALS1_NIPA1;MOTIFS=CGG,GCG;STRUC= chr15 34711626 34711652 ID=aFTLD-U_GOLGA8A;MOTIFS=CT,TTTC,CCTT,CCCTCT;STRUC= chr15 89112664 89112683 ID=CHNG3_MIR7-2;MOTIFS=TTTG;STRUC= -chr15 89876810 89876860 ID=CPEO_POLG;MOTIFS=GCT,GTT;STRUC= -chr16 17564764 17564779 ID=DBQD2_XYLT1;MOTIFS=GCC;STRUC= +chr15 89876810 89876860 ID=CPEO_POLG;MOTIFS=GCT,GTT,CTG;STRUC= +chr16 17564764 17564779 ID=DBQD2_XYLT1;MOTIFS=GCC,CCG;STRUC= chr16 24624759 24624853 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA;STRUC= chr16 66524299 66524369 ID=SCA31_BEAN1;MOTIFS=TGGAA,TAGAA,AATAA;STRUC= chr16 67876765 67876853 ID=SCA_THAP11;MOTIFS=CAG;STRUC= -chr16 72821593 72821657 ID=SCA4_ZFHX3;MOTIFS=GCC;STRUC= +chr16 72821593 72821657 ID=SCA4_ZFHX3;MOTIFS=CCG,GCC;STRUC= chr16 87637888 87637935 ID=HDL2_JPH3;MOTIFS=CTG;STRUC= chr17 17711672 17711774 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA;STRUC= chr17 78120808 78120938 ID=RCPS_EIF4A3;MOTIFS=CCTCGCTGTGCCGCTGCCGA;STRUC= @@ -69,12 +69,12 @@ chr21 45196323 45196360 ID=EPM1_CSTB;MOTIFS=CGCGGGGCGGGG;STRUC= chr22 19754285 19754330 ID=TOF_TBX1;MOTIFS=GCN;STRUC= chr22 38713287 38713380 ID=EPM_CSNK1E;MOTIFS=CCG;STRUC= chr22 46191234 46191304 ID=SCA10_ATXN10;MOTIFS=ATTCT;STRUC= -chrX 25031646 25031682 ID=PRTS_ARX;MOTIFS=NGC;STRUC= -chrX 25031766 25031814 ID=EIEE1_ARX;MOTIFS=NGC;STRUC= +chrX 25031646 25031682 ID=PRTS_ARX;MOTIFS=GCN,NGC;STRUC= +chrX 25031766 25031814 ID=EIEE1_ARX;MOTIFS=GCN,NGC;STRUC= chrX 31302674 31302730 ID=DMD_DMD;MOTIFS=TTC,T;STRUC= -chrX 66765158 66765261 ID=SBMA_AR;MOTIFS=GCA;STRUC= +chrX 66765158 66765261 ID=SBMA_AR;MOTIFS=CAG,GCA;STRUC= chrX 70672904 70672981 ID=XDP_TAF1;MOTIFS=AGAGGG;STRUC= chrX 136648985 136649015 ID=VACTERLX_ZIC3;MOTIFS=GCN;STRUC= -chrX 139586481 139586526 ID=XLID_SOX3;MOTIFS=NGC;STRUC= +chrX 139586481 139586526 ID=XLID_SOX3;MOTIFS=GCN,NGC;STRUC= chrX 146993567 146993629 ID=FXS_FMR1;MOTIFS=CGG;STRUC= -chrX 147582124 147582273 ID=FRAXE_AFF2;MOTIFS=GCC;STRUC= +chrX 147582124 147582273 ID=FRAXE_AFF2;MOTIFS=CCG,GCC;STRUC= diff --git a/data/catalogs/STRchive-disease-loci.hg19.atarva.bed b/data/catalogs/STRchive-disease-loci.hg19.atarva.bed index f6abb39f..376471bb 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.atarva.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.atarva.bed @@ -1,20 +1,20 @@ #chrom start stop motif motif_len id chr1 1371178 1371198 GGCGCGGAGC 10 HMNR7_VWA1 chr1 57832750 57832793 GAAAT 5 SCA37_DAB1 -chr1 94883977 94884000 GCC 3 OPDM5_ABCD3 -chr1 145209323 145209354 GGC 3 NIID_NOTCH2NLC +chr1 94883977 94884000 CCG 3 OPDM5_ABCD3 +chr1 145209323 145209354 CGG 3 NIID_NOTCH2NLC chr1 155160981 155162030 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA 61 ADTKD_MUC1 chr1 156561557 156561575 GGGCC 5 NME_NAXE chr2 96862804 96862807 AAATG 5 FAME2_STARD7 -chr2 100721260 100721286 GCC 3 FRA2A_AFF3 +chr2 100721260 100721286 CCG 3 FRA2A_AFF3 chr2 176957786 176957831 GCN 3 SD5_HOXD13 -chr2 191745598 191745646 GCA 3 GDPAG_GLS +chr2 191745598 191745646 CAG 3 GDPAG_GLS chr3 63898360 63898391 CAG 3 SCA7_ATXN7 chr3 63898391 63898403 CCG 3 SCA7_ATXN7_flank chr3 128891419 128891499 CAGG 4 DM2_CNBP chr3 128891499 128891539 CAGA 4 DM2_CNBP_flank chr3 128891539 128891577 CA 2 DM2_CNBP_flank -chr3 138664861 138664904 NGC 3 BPES_FOXL2 +chr3 138664861 138664904 GCN 3 BPES_FOXL2 chr3 183430010 183430014 TTTCA 5 FAME4_YEATS2 chr4 3076603 3076654 CAG 3 HD_HTT chr4 3076660 3076696 CCG 3 HD_HTT_flank @@ -22,36 +22,36 @@ chr4 39350099 39350103 AAGGG 5 CANVAS_RFC1 chr4 41747989 41748049 GCN 3 CCHS_PHOX2B chr4 160263763 160263770 TTTCA 5 FAME7_RAPGEF2 chr5 10356515 10356523 TTTCA 5 FAME3_MARCHF6 -chr5 146258290 146258322 GCT 3 SCA12_PPP2R2B -chr5 176981490 176981532 GCC 3 OPDM_FAM193B -chr6 13328708 13328835 GCC 3 OPDM_TBC1D7 +chr5 146258290 146258322 CTG 3 SCA12_PPP2R2B +chr5 176981490 176981532 CCG 3 OPDM_FAM193B +chr6 13328708 13328835 CCG 3 OPDM_TBC1D7 chr6 16327864 16327955 CTG 3 SCA1_ATXN1 chr6 45390487 45390538 GCN 3 CCD_RUNX2 chr6 170870994 170871105 CAG 3 SCA17_TBP -chr7 27239297 27239351 NGC 3 HFG_HOXA13-III -chr7 27239444 27239480 NGC 3 HFG_HOXA13-II -chr7 27239543 27239585 NGC 3 HFG_HOXA13-I -chr7 55955293 55955332 GCG 3 FRA7A_ZNF713 -chr8 105601198 105601227 CGC 3 OPDM1_LRP12 +chr7 27239297 27239351 GCN 3 HFG_HOXA13-III +chr7 27239444 27239480 GCN 3 HFG_HOXA13-II +chr7 27239543 27239585 GCN 3 HFG_HOXA13-I +chr7 55955293 55955332 CGG 3 FRA7A_ZNF713 +chr8 105601198 105601227 CCG 3 OPDM1_LRP12 chr8 119379151 119379157 TGAAA 5 FAME1_SAMD12 chr9 27573482 27573544 GGCCCC 6 FTDALS1_C9orf72 chr9 71652186 71652202 A 1 FRDA_FXN_flank chr9 71652202 71652220 GAA 3 FRDA_FXN -chr9 133556992 133557028 GCC 3 HSAN-VIII_PRDM12 +chr9 133556992 133557028 CCG 3 HSAN-VIII_PRDM12 chr9 135946564 135947124 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG 32 MODY8_CEL -chr10 81586139 81586160 GGC 3 OPML1_NUTM2B-AS1 +chr10 81586139 81586160 CGG 3 OPML1_NUTM2B-AS1 chr11 119076999 119077033 CGG 3 JBS_CBL chr12 7045879 7045938 CAG 3 DRPLA_ATN1 -chr12 50898784 50898807 GGC 3 FRA12A_DIP2B +chr12 50898784 50898807 CGG 3 FRA12A_DIP2B chr12 112036753 112036823 CTG 3 SCA2_ATXN2 -chr12 124018267 124018297 GGC 3 OPDM4_RILPL1 +chr12 124018267 124018297 CGG 3 OPDM4_RILPL1 chr13 70713485 70713515 CTA 3 SCA8_ATXN8OS_flank chr13 70713515 70713561 CTG 3 SCA8_ATXN8OS chr13 100637702 100637748 GCN 3 HPE5_ZIC2 chr13 102813924 102814076 GAA 3 SCA27B_FGF14 chr14 23790681 23790712 GCN 3 OPMD_PABPN1 chr14 92537354 92537396 CTG 3 SCA3_ATXN3 -chr15 23086363 23086389 GCG 3 ALS1_NIPA1 +chr15 23086363 23086389 CGG 3 ALS1_NIPA1 chr15 34711626 34711652 CT 2 aFTLD-U_GOLGA8A chr15 89112664 89112683 TTTG 4 CHNG3_MIR7-2 chr15 89876810 89876816 GCT 3 CPEO_POLG_flank @@ -61,7 +61,7 @@ chr16 17564764 17564779 GCC 3 DBQD2_XYLT1 chr16 24624809 24624853 TTTCA 5 FAME6_TNRC6A chr16 66524299 66524369 TGGAA 5 SCA31_BEAN1 chr16 67876765 67876853 CAG 3 SCA_THAP11 -chr16 72821593 72821657 GCC 3 SCA4_ZFHX3 +chr16 72821593 72821657 CCG 3 SCA4_ZFHX3 chr16 87637888 87637935 CTG 3 HDL2_JPH3 chr17 17711762 17711774 TTTCA 5 FAME8_RAI1 chr17 78120808 78120938 CCTCGCTGTGCCGCTGCCGA 20 RCPS_EIF4A3 @@ -80,13 +80,13 @@ chr21 45196323 45196360 CGCGGGGCGGGG 12 EPM1_CSTB chr22 19754285 19754330 GCN 3 TOF_TBX1 chr22 38713287 38713380 CCG 3 EPM_CSNK1E chr22 46191234 46191304 ATTCT 5 SCA10_ATXN10 -chrX 25031646 25031682 NGC 3 PRTS_ARX -chrX 25031766 25031814 NGC 3 EIEE1_ARX +chrX 25031646 25031682 GCN 3 PRTS_ARX +chrX 25031766 25031814 GCN 3 EIEE1_ARX chrX 31302674 31302722 TTC 3 DMD_DMD chrX 31302722 31302730 T 1 DMD_DMD_flank -chrX 66765158 66765261 GCA 3 SBMA_AR +chrX 66765158 66765261 CAG 3 SBMA_AR chrX 70672904 70672981 AGAGGG 6 XDP_TAF1 chrX 136648985 136649015 GCN 3 VACTERLX_ZIC3 -chrX 139586481 139586526 NGC 3 XLID_SOX3 +chrX 139586481 139586526 GCN 3 XLID_SOX3 chrX 146993567 146993629 CGG 3 FXS_FMR1 -chrX 147582124 147582273 GCC 3 FRAXE_AFF2 +chrX 147582124 147582273 CCG 3 FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.hg19.atarva.bed.gz b/data/catalogs/STRchive-disease-loci.hg19.atarva.bed.gz index eb912884d4fd9f01f468b3e24b28a69bef849e74..e428e60fa43cabfb99c8f246a3e5adc599018639 100644 GIT binary patch delta 1846 zcmV-62g&%)4!;h6ABzYC000000RIL6LPG)oA_s++(XQht5Y+eplN`U=n9KM20D)RKUTO6H6V^shiP%hQLu|2WyOhoF`y-EW_+- zI^}_zh}MybOni{DkSA5|*XvO4kCHERzvXT_hu*}{i3}TnEMUCp0=7#x7{NUV_lHAmyrYHq|UG|w^UQP zW7nJsF-Witf{|X?xF)1jhwDP8t|F3QFOE9}V#r11Ehnm}JnwX-DzWS`!$=2(k^XfkOwmq*iO#YYmUB{_(Hh{{7E?_Z!q};PKUrj~lv*#&{MnbdU`& zOEf-U13~JQK9+f?OFAumd@V$B zEE;sl=8`k@E>GQ_{TS4^XJ2-+|fZ(^W+=fqI>h!ox zDWoochloht+YW)Dlrn*Ze7R3t=j$q;=)(=-gGOWn4^u9{keBnLE|1&v_7dDn>JVWn z|JRNky!qd{ch3Wg^H#j=AW^5L#_j2E1J&oZTfuIU^<;yI9o*itoWQ}D=CiNmzGk1g z?7bsPF*aZyp-lR6ThhULI;ZeUtu_v}ekDe%-UKUM?x8>Bz)c z<)Zm`Dkbg7SZgaOLyR!5UzYT>F1PFbXbCc7@InoAT7FD6UvtX$gy~SNH~Y(N5aexf zp*mfcp2;I%2+;>?J80+@4axH`RT zV-FC}HE`;0n(9{4I!JTT7$GEY4hp`vbzBprn#Pg4DLy)p;5!Azo|m@4)yv~K;9*7- z>s$<-y!E}E)3BtePPc6r4UlJM343V`>*B6s>i3D`>;17f|E8+4mxPdiqP~)U0vWJA z7=yl*miG`&x=QNn*ME&1{hlTuf&A-Gm-W8oR=AE-pXmeLx)YArQjOyzH!juEswF5(-XVKL69j(0}jW=<-E)_Z}-o5 z?xOV=k=Qn1V?n1y-wUDH*9SCzDl_Pk5VVmEBEHg+?4rDTI=>b4eSBo4?!Cs30V#xt zR>Qnej9Ra*5IPU?{j~fb#zSZO!7!<$ydV_r4m)j|t@9N8HYV*@k{ zNdpxeJ-@|u^Q_}&Y>)E)s)pK!zXz!nh8pj?epR9>kZ>fK1;a zA8xad**PC0H4;PNm|0u8fBJg8UhA?B^U+>p7p-?1EJ}iOv{+AEZ9~(}XH%Jyp1*WN z3FgP}E!2G8k33I*AZHkLC2J6lP5H8xt)}()W{K}sz$4Y1ujezqG)6Y$#WoNkDEq2E z%tIaK;Z4%t8nA%^cqp%Nd+vKJhb}ZA|B<{;bE;`=k{Bh^dZqV+Md(=*#no{l5P k_}uC^ugBA^xIp@j8j*pFa_h9isE?8azx>n$k4I zDFG_D%**D=w|__cr{-M}l?pFVC8DVE{7(y6P6q}V#cojft5EK&Cqsl@zP=uk;3^km&Z!F{~d zFzd2iZ|ki!fx#(pYY-p4Q&K z8Uh)1l8_C8eYTZrTU!nrAXeV8$>Ry(UZxX|bPaQfFku@nC12u-hv>)?n!pXLalNg3 zin{z5FAwgbcb1*^zCmEqbKo-EsC-#3&2xZG3aO<%9%g)7_n0mv-RJe_3`>r+r3QxdQFv!8!@PbK zjX>g%SNZ@4vIb6lq^ayVZi8zsD&+{ts)M}ki8gMLVu|C(-E=k>=e+IY8GBvY2U9Nh z$AE_!&}n0W?_{;alwvmxYn;k-+pDO6EHgV_uUfHGap_ZACyp)4eKqz)6{TK(2_gMN zeHH~$psiO5ZM$l&DI8^$l;M|O$sG9{Cm;vu%TU&BS^pkGbG^zUT4Us&1<)S>>a8gG z8m#OZ8c2f6qo?);g$Bnw_S+i28KIn6e)aCm*=Dpe)SUGDIx2rj3;$y9+Evzn=JYh4E!4n) zjZz^rK*bOxQNcip%eT0fZEW>IgGQ%a@S%cNI`kY#>9Uvga(}e_lm@hlEIZ2wIrZsX zT-o9}oyYnz22ioFvgv6+pVvDsbKKHpv|ZMgjHjSFF;QP|C9$Zi>q=W|h^fWGqZYZ6 zVS3qVmod&5xVlyfD|x4Xd;`zs)y$`at!i07%jCS(4T3SxQj9IHo2Ly2rM+ul*nf1_ zP`3BwK|P#P-l-6DBSdAM`H!yTmZuKL5eM0BR$zlYlP=>V6)+;dU9Z;{S8LtDtDm_q zs?prRo1du7G$t5Bfg5n`%c&J>7t4=jbgc+`KxM3|C#FUuHSS}7lZ@rItw>$Pz%HP{ z4|0&@+|-93%O2;U*#XEI$J$c^qw8rXr34nnJk$+k3V;0hyX(G_T_k$jfD}OlgmD{U z86V4Y9)zdRfK=ZiA8xad*%%uFHFAo4Pm{|Ym$z@%>$R-gFdywjHbGk>$s**Cjuy*_ zt8Hl7`E1Hll=GK=oOcN3`|#bDbgoC92ar*WvXV9M)W>Mu@?PThc(I)AmdB&ioUi9I zJ~c*GWSwr{h#=Qhf0&0d%)^T$|7*YsGT@;+`|VNpS_X}8KoGh)qP6av5YD5()}UpEciN%IEF&kb@09A966>YB}y!GGzN3KW^8d z?DxkZY8wI@IkQ!=6#Bt(eC%bMx8v#B8Ba1sjmYtHKk_)seuRGc3!UHwuah4KAjmbZ AdH?_b diff --git a/data/catalogs/STRchive-disease-loci.hg19.atarva.bed.gz.tbi b/data/catalogs/STRchive-disease-loci.hg19.atarva.bed.gz.tbi index 20fb52cd89f22d592fad0884d672c6a765d5d5d6..d21d5dd5daa75f2beea117fe78e484b33001ea51 100644 GIT binary patch delta 189 zcmaE(^hRmJ1Ab19j(ZOdeA9DinEaAonK`{%X>uc*O8rUaZ+$BNJJheft>nFWdC{t} z?p0O$)-Al%7y8vK>}t`fg;D*XTP0V&N{e(h?Y*k=Z^Np@Y5xtwu0CGXc60x-$E$u% zx0-b}_;TUZO#A!4LwPsN)X} hxX91l*^^XumYajr{p|n4a@H<%!DK^5k;xwgssX`QV5I;6 delta 189 zcmaE(^hRmJ1Aa~^fxQPCbHx>=On%9)%>4Yb(&R=qmHLy(-}+SkcPzj9)|L0_-cH}0Cj&1B24t5-kmR`Z;sf{JYBRD*=v-(Tm|_nfT6 iFCl#Y?gW*S&AU~B>Q2@_Q7@MW)SGO`C^Gq@Ks5lp30unm diff --git a/data/catalogs/STRchive-disease-loci.hg19.general.bed b/data/catalogs/STRchive-disease-loci.hg19.general.bed index 97bbd950..9b3d34f5 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.general.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.general.bed @@ -1,59 +1,59 @@ #chrom start stop id gene reference_motif_reference_orientation pathogenic_motif_reference_orientation pathogenic_min inheritance disease chr1 1371178 1371198 HMNR7_VWA1 VWA1 GGCGCGGAGC GGCGCGGAGC 1 AR Neuronopathy, distal hereditary motor, autosomal recessive 7 -chr1 57832715 57832793 SCA37_DAB1 DAB1 AAAAT GAAAT 31 AD Spinocerebellar ataxia type 37 -chr1 94883977 94884000 OPDM5_ABCD3 ABCD3 GCC GCC 118 AD Oculopharyngodistal myopathy type 5 -chr1 145209323 145209354 NIID_NOTCH2NLC NOTCH2NLC GGC GGC 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 +chr1 57832715 57832793 SCA37_DAB1 DAB1 AAAAT AAATG 31 AD Spinocerebellar ataxia type 37 +chr1 94883977 94884000 OPDM5_ABCD3 ABCD3 GCC CCG 118 AD Oculopharyngodistal myopathy type 5 +chr1 145209323 145209354 NIID_NOTCH2NLC NOTCH2NLC GGC CGG 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 chr1 155160981 155162030 ADTKD_MUC1 MUC1 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA None AD Autosomal dominant tubulointerstitial kidney disease chr1 156561557 156561575 NME_NAXE NAXE GGGCC GGGCC 200 AR NAXE-related mitochondrial encephalopathy chr2 96862804 96862862 FAME2_STARD7 STARD7 AAAAT AAATG 274 AD Familial adult myoclonic epilepsy 2 -chr2 100721260 100721286 FRA2A_AFF3 AFF3 GCC GCC 300 AD Intellectual disability associated with fragile site FRA2A +chr2 100721260 100721286 FRA2A_AFF3 AFF3 GCC CCG 300 AD Intellectual disability associated with fragile site FRA2A chr2 176957786 176957831 SD5_HOXD13 HOXD13 GCN GCN 22 AD Syndactyly -chr2 191745598 191745646 GDPAG_GLS GLS GCA GCA 680 AR Glutaminase deficiency +chr2 191745598 191745646 GDPAG_GLS GLS GCA CAG 680 AR Glutaminase deficiency chr3 63898360 63898391 SCA7_ATXN7 ATXN7 CAG CAG 37 AD Spinocerebellar ataxia type 7 chr3 128891419 128891499 DM2_CNBP CNBP CAGG CAGG 75 AD Myotonic dystrophy type 2 -chr3 138664861 138664904 BPES_FOXL2 FOXL2 NGC NGC 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis +chr3 138664861 138664904 BPES_FOXL2 FOXL2 NGC GCN 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis chr3 183429975 183430014 FAME4_YEATS2 YEATS2 TTTTA TTTCA 1000 AD Familial adult myoclonic epilepsy 4 chr4 3076603 3076660 HD_HTT HTT CAG CAG 36 AD Huntington disease chr4 39350044 39350103 CANVAS_RFC1 RFC1 AAAAG AAGGG,ACAGG,AAAGG,AGGGC 400 AR Cerebellar ataxia, neuropathy, and vestibular areflexia syndrome chr4 41747989 41748049 CCHS_PHOX2B PHOX2B GCN GCN 26 AD Congenital central hypoventilation syndrome chr4 160263678 160263770 FAME7_RAPGEF2 RAPGEF2 TTTTA TTTCA 60 AD Familial adult myoclonic epilepsy type 7 chr5 10356455 10356523 FAME3_MARCHF6 MARCHF6 TTTTA TTTCA 650 AD Familial adult myoclonic epilepsy type 3 -chr5 146258290 146258322 SCA12_PPP2R2B PPP2R2B GCT GCT 51 AD Spinocerebellar ataxia type 12 -chr5 176981490 176981532 OPDM_FAM193B FAM193B GCC GCC 194 AD Oculopharyngodistal myopathy -chr6 13328708 13328835 OPDM_TBC1D7 TBC1D7 GCC GCC 83 AD Oculopharyngodistal myopathy +chr5 146258290 146258322 SCA12_PPP2R2B PPP2R2B GCT CTG 51 AD Spinocerebellar ataxia type 12 +chr5 176981490 176981532 OPDM_FAM193B FAM193B GCC CCG 194 AD Oculopharyngodistal myopathy +chr6 13328708 13328835 OPDM_TBC1D7 TBC1D7 GCC CCG 83 AD Oculopharyngodistal myopathy chr6 16327864 16327955 SCA1_ATXN1 ATXN1 CTG CTG 39 AD Spinocerebellar ataxia type 1 chr6 45390487 45390538 CCD_RUNX2 RUNX2 GCN GCN 20 AD Cleidocranial dysplasia chr6 170870994 170871105 SCA17_TBP TBP CAG CAG 49 AD Spinocerebellar ataxia type 17 -chr7 27239297 27239351 HFG_HOXA13-III HOXA13 NGC NGC 22 AD Hand-foot-genital syndrome 3 -chr7 27239444 27239480 HFG_HOXA13-II HOXA13 NGC NGC 18 AD Hand-foot-genital syndrome 2 -chr7 27239543 27239585 HFG_HOXA13-I HOXA13 NGC NGC 22 AD Hand-foot-genital syndrome 1 -chr7 55955293 55955332 FRA7A_ZNF713 ZNF713 GCG GCG 450 AD Autism spectrum disorder associated with fragile site FRA7A -chr8 105601198 105601227 OPDM1_LRP12 LRP12 CGC CGC 85 AD Oculopharyngodistal myopathy type 1 -chr8 119379051 119379157 FAME1_SAMD12 SAMD12 TAAAA TGAAA 105 AD Familial adult myoclonic epilepsy type 1 +chr7 27239297 27239351 HFG_HOXA13-III HOXA13 NGC GCN 22 AD Hand-foot-genital syndrome 3 +chr7 27239444 27239480 HFG_HOXA13-II HOXA13 NGC GCN 18 AD Hand-foot-genital syndrome 2 +chr7 27239543 27239585 HFG_HOXA13-I HOXA13 NGC GCN 22 AD Hand-foot-genital syndrome 1 +chr7 55955293 55955332 FRA7A_ZNF713 ZNF713 GCG CGG 450 AD Autism spectrum disorder associated with fragile site FRA7A +chr8 105601198 105601227 OPDM1_LRP12 LRP12 CGC CCG 85 AD Oculopharyngodistal myopathy type 1 +chr8 119379051 119379157 FAME1_SAMD12 SAMD12 TAAAA AAATG 105 AD Familial adult myoclonic epilepsy type 1 chr9 27573482 27573544 FTDALS1_C9orf72 C9orf72 GGCCCC GGCCCC 31 AD Frontotemporal dementia (FTD) and/or amyotrophic lateral sclerosis (ALS) chr9 71652202 71652220 FRDA_FXN FXN GAA GAA 56 AR Friedreich ataxia -chr9 133556992 133557028 HSAN-VIII_PRDM12 PRDM12 GCC GCC 18 AR Hereditary sensory and autonomic neuropathy type VIII +chr9 133556992 133557028 HSAN-VIII_PRDM12 PRDM12 GCC CCG 18 AR Hereditary sensory and autonomic neuropathy type VIII chr9 135946564 135947124 MODY8_CEL CEL GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG None AD Maturity-Onset Diabetes of the Young Type 8 -chr10 81586139 81586160 OPML1_NUTM2B-AS1 NUTM2B-AS1 GGC GGC 161 AD Oculopharyngeal myopathy with leukoencephalopathy 1 +chr10 81586139 81586160 OPML1_NUTM2B-AS1 NUTM2B-AS1 GGC CGG 161 AD Oculopharyngeal myopathy with leukoencephalopathy 1 chr11 119076999 119077033 JBS_CBL CBL CGG CGG 101 AD Jacobsen syndrome (FRAX11B fragile site) chr12 7045879 7045938 DRPLA_ATN1 ATN1 CAG CAG 48 AD Dentatorubral-Pallidoluysian Atrophy -chr12 50898784 50898807 FRA12A_DIP2B DIP2B GGC GGC 273 AD Intellectual developmental disorder, FRA12A type +chr12 50898784 50898807 FRA12A_DIP2B DIP2B GGC CGG 273 AD Intellectual developmental disorder, FRA12A type chr12 112036753 112036823 SCA2_ATXN2 ATXN2 CTG CTG 35 AD,AR Spinocerebellar ataxia type 2 -chr12 124018267 124018297 OPDM4_RILPL1 RILPL1 GGC GGC 120 AD Oculopharyngodistal myopathy type 4 +chr12 124018267 124018297 OPDM4_RILPL1 RILPL1 GGC CGG 120 AD Oculopharyngodistal myopathy type 4 chr13 70713515 70713561 SCA8_ATXN8OS ATXN8OS CTG CTG 71 AD Spinocerebellar ataxia type 8 chr13 100637702 100637748 HPE5_ZIC2 ZIC2 GCN GCN 25 AD Holoprosencephaly-5 chr13 102813924 102814076 SCA27B_FGF14 FGF14 GAA GAA 320 AD Spinocerebellar ataxia 27B chr14 23790681 23790712 OPMD_PABPN1 PABPN1 GCN GCN 12 AD,AR Oculopharyngeal muscular dystrophy chr14 92537354 92537396 SCA3_ATXN3 ATXN3 CTG CTG 60 AD Spinocerebellar ataxia type 3/Machado-Joseph disease -chr15 23086363 23086389 ALS1_NIPA1 NIPA1 GCG GCG 11 AD Amyotrophic lateral sclerosis +chr15 23086363 23086389 ALS1_NIPA1 NIPA1 GCG CGG 11 AD Amyotrophic lateral sclerosis chr15 34711626 34711652 aFTLD-U_GOLGA8A GOLGA8A TTTC CT 190 Atypical frontotemporal lobar degeneration with ubiquitinated inclusions (aFTLD-U) chr15 89112664 89112683 CHNG3_MIR7-2 MIR7-2 TTTG TTTG 3 AD Nongoitrous congenital hypothyroidism-3 -chr15 89876819 89876860 CPEO_POLG POLG GCT GCT None Progressive external ophthalmoplegia, Parkinson's disease -chr16 17564764 17564779 DBQD2_XYLT1 XYLT1 GCC GCC 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 +chr15 89876819 89876860 CPEO_POLG POLG GCT CTG None Progressive external ophthalmoplegia, Parkinson's disease +chr16 17564764 17564779 DBQD2_XYLT1 XYLT1 GCC CCG 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 chr16 24624759 24624853 FAME6_TNRC6A TNRC6A TTTTA TTTCA 1100 AD Familial adult myoclonic epilepsy type 6 chr16 66524299 66524369 SCA31_BEAN1 BEAN1 AATAA TGGAA,TAGAA 110 AD Spinocerebellar ataxia type 31 chr16 67876765 67876853 SCA_THAP11 THAP11 CAG CAG 45 AD Spinocerebellar ataxia 51 -chr16 72821593 72821657 SCA4_ZFHX3 ZFHX3 GCC GCC 46 AD Spinocerebellar ataxia 4 +chr16 72821593 72821657 SCA4_ZFHX3 ZFHX3 GCC CCG 46 AD Spinocerebellar ataxia 4 chr16 87637888 87637935 HDL2_JPH3 JPH3 CTG CTG 40 AD Huntington disease-like 2 chr17 17711672 17711774 FAME8_RAI1 RAI1 TTTTA TTTCA 9 AD Familial adult myoclonic epilepsy type 8 chr17 78120808 78120938 RCPS_EIF4A3 EIF4A3 CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA 14 AR Richieri-Costa-Pereira syndrome @@ -70,12 +70,12 @@ chr21 45196323 45196360 EPM1_CSTB CSTB CGCGGGGCGGGG CGCGGGGCGGGG 30 AR Progressi chr22 19754285 19754330 TOF_TBX1 TBX1 GCN GCN 25 AD Tetralogy of Fallot chr22 38713287 38713380 EPM_CSNK1E CSNK1E CCG CCG 745 AR Progressive Myoclonic Epilepsy and Developmental and Epileptic Encephalopathy chr22 46191234 46191304 SCA10_ATXN10 ATXN10 ATTCT ATTCT 800 AD Spinocerebellar ataxia type 10 -chrX 25031646 25031682 PRTS_ARX ARX NGC NGC 20 XR Partington syndrome -chrX 25031766 25031814 EIEE1_ARX ARX NGC NGC 17 XR Early-infantile epileptic encephalopathy +chrX 25031646 25031682 PRTS_ARX ARX NGC GCN 20 XR Partington syndrome +chrX 25031766 25031814 EIEE1_ARX ARX NGC GCN 17 XR Early-infantile epileptic encephalopathy chrX 31302674 31302722 DMD_DMD DMD TTC TTC 59 XR Duchenne muscular dystrophy -chrX 66765158 66765261 SBMA_AR AR GCA GCA 38 XR Spinal and bulbar muscular atrophy, Kennedy Disease +chrX 66765158 66765261 SBMA_AR AR GCA CAG 38 XR Spinal and bulbar muscular atrophy, Kennedy Disease chrX 70672904 70672981 XDP_TAF1 TAF1 AGAGGG AGAGGG 35 XR X-linked dystonia-parkinsonism (XDP) a.k.a. Dystonia 3, torsion, X-linked (DYT3) chrX 136648985 136649015 VACTERLX_ZIC3 ZIC3 GCN GCN 12 XR X-linked VACTERL syndrome -chrX 139586481 139586526 XLID_SOX3 SOX3 NGC NGC 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) +chrX 139586481 139586526 XLID_SOX3 SOX3 NGC GCN 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) chrX 146993567 146993629 FXS_FMR1 FMR1 CGG CGG 201 XD Fragile X syndrome (FXS), fragile X-associated tremor/ataxia syndrome (FXTAS), and fragile X-associated primary ovarian insufficiency FXPOI/POF1 -chrX 147582124 147582273 FRAXE_AFF2 AFF2 GCC GCC 201 XR Intellectual developmental disorder, Fragile X intellectual disability +chrX 147582124 147582273 FRAXE_AFF2 AFF2 GCC CCG 201 XR Intellectual developmental disorder, Fragile X intellectual disability diff --git a/data/catalogs/STRchive-disease-loci.hg19.longTR.bed b/data/catalogs/STRchive-disease-loci.hg19.longTR.bed index 67839406..f9112ba3 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.longTR.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.longTR.bed @@ -1,58 +1,58 @@ chr1 1371179 1371198 GGCGCGGAGC HMNR7_VWA1 -chr1 57832716 57832793 GAAAT,AAAAT SCA37_DAB1 -chr1 94883978 94884000 GCC OPDM5_ABCD3 -chr1 145209324 145209354 GGC NIID_NOTCH2NLC +chr1 57832716 57832793 AAATG,AAAAT SCA37_DAB1 +chr1 94883978 94884000 CCG,GCC OPDM5_ABCD3 +chr1 145209324 145209354 CGG,GGC NIID_NOTCH2NLC chr1 155160982 155162030 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ADTKD_MUC1 chr1 156561558 156561575 GGGCC NME_NAXE chr2 96862805 96862862 AAATG,AAAAT FAME2_STARD7 -chr2 100721261 100721286 GCC FRA2A_AFF3 +chr2 100721261 100721286 CCG,GCC FRA2A_AFF3 chr2 176957787 176957831 GCN SD5_HOXD13 -chr2 191745599 191745646 GCA GDPAG_GLS +chr2 191745599 191745646 CAG,GCA GDPAG_GLS chr3 63898361 63898391 CAG SCA7_ATXN7 chr3 128891420 128891499 CAGG DM2_CNBP -chr3 138664862 138664904 NGC BPES_FOXL2 +chr3 138664862 138664904 GCN,NGC BPES_FOXL2 chr3 183429976 183430014 TTTCA,TTTTA FAME4_YEATS2 chr4 3076604 3076660 CAG HD_HTT chr4 39350045 39350103 AAGGG,ACAGG,AAAGG,AGGGC,AAAAG,AAAGGG CANVAS_RFC1 chr4 41747990 41748049 GCN CCHS_PHOX2B chr4 160263679 160263770 TTTCA,TTTTA FAME7_RAPGEF2 chr5 10356456 10356523 TTTCA,TTTTA FAME3_MARCHF6 -chr5 146258291 146258322 GCT SCA12_PPP2R2B -chr5 176981491 176981532 GCC OPDM_FAM193B -chr6 13328709 13328835 GCC OPDM_TBC1D7 +chr5 146258291 146258322 CTG,GCT SCA12_PPP2R2B +chr5 176981491 176981532 CCG,GCC OPDM_FAM193B +chr6 13328709 13328835 CCG,GCC OPDM_TBC1D7 chr6 16327865 16327955 CTG SCA1_ATXN1 chr6 45390488 45390538 GCN CCD_RUNX2 chr6 170870995 170871105 CAG SCA17_TBP -chr7 27239298 27239351 NGC HFG_HOXA13-III -chr7 27239445 27239480 NGC HFG_HOXA13-II -chr7 27239544 27239585 NGC HFG_HOXA13-I -chr7 55955294 55955332 GCG FRA7A_ZNF713 -chr8 105601199 105601227 CGC OPDM1_LRP12 -chr8 119379052 119379157 TGAAA,TAAAA FAME1_SAMD12 +chr7 27239298 27239351 GCN,NGC HFG_HOXA13-III +chr7 27239445 27239480 GCN,NGC HFG_HOXA13-II +chr7 27239544 27239585 GCN,NGC HFG_HOXA13-I +chr7 55955294 55955332 CGG,GCG FRA7A_ZNF713 +chr8 105601199 105601227 CCG,CGC OPDM1_LRP12 +chr8 119379052 119379157 AAATG,TAAAA FAME1_SAMD12 chr9 27573483 27573544 GGCCCC FTDALS1_C9orf72 chr9 71652203 71652220 GAA FRDA_FXN -chr9 133556993 133557028 GCC HSAN-VIII_PRDM12 +chr9 133556993 133557028 CCG,GCC HSAN-VIII_PRDM12 chr9 135946565 135947124 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG MODY8_CEL -chr10 81586140 81586160 GGC OPML1_NUTM2B-AS1 +chr10 81586140 81586160 CGG,GGC OPML1_NUTM2B-AS1 chr11 119077000 119077033 CGG JBS_CBL chr12 7045880 7045938 CAG DRPLA_ATN1 -chr12 50898785 50898807 GGC FRA12A_DIP2B +chr12 50898785 50898807 CGG,GGC FRA12A_DIP2B chr12 112036754 112036823 CTG SCA2_ATXN2 -chr12 124018268 124018297 GGC OPDM4_RILPL1 +chr12 124018268 124018297 CGG,GGC OPDM4_RILPL1 chr13 70713516 70713561 CTG SCA8_ATXN8OS chr13 100637703 100637748 GCN HPE5_ZIC2 -chr13 102813925 102814076 GAA,GGA,GCA SCA27B_FGF14 +chr13 102813925 102814076 GAA,GGA,CAG SCA27B_FGF14 chr14 23790682 23790712 GCN OPMD_PABPN1 chr14 92537355 92537396 CTG SCA3_ATXN3 -chr15 23086364 23086389 GCG ALS1_NIPA1 +chr15 23086364 23086389 CGG,GCG ALS1_NIPA1 chr15 34711627 34711652 CT,CCTT,CCCTCT,TTTC aFTLD-U_GOLGA8A chr15 89112665 89112683 TTTG CHNG3_MIR7-2 -chr15 89876820 89876860 GCT CPEO_POLG -chr16 17564765 17564779 GCC DBQD2_XYLT1 +chr15 89876820 89876860 CTG,GCT CPEO_POLG +chr16 17564765 17564779 CCG,GCC DBQD2_XYLT1 chr16 24624760 24624853 TTTCA,TTTTA FAME6_TNRC6A chr16 66524300 66524369 TGGAA,TAGAA,AATAA SCA31_BEAN1 chr16 67876766 67876853 CAG SCA_THAP11 -chr16 72821594 72821657 GCC SCA4_ZFHX3 +chr16 72821594 72821657 CCG,GCC SCA4_ZFHX3 chr16 87637889 87637935 CTG HDL2_JPH3 chr17 17711673 17711774 TTTCA,TTTTA FAME8_RAI1 chr17 78120809 78120938 CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 @@ -69,12 +69,12 @@ chr21 45196324 45196360 CGCGGGGCGGGG EPM1_CSTB chr22 19754286 19754330 GCN TOF_TBX1 chr22 38713288 38713380 CCG EPM_CSNK1E chr22 46191235 46191304 ATTCT SCA10_ATXN10 -chrX 25031647 25031682 NGC PRTS_ARX -chrX 25031767 25031814 NGC EIEE1_ARX +chrX 25031647 25031682 GCN,NGC PRTS_ARX +chrX 25031767 25031814 GCN,NGC EIEE1_ARX chrX 31302675 31302722 TTC DMD_DMD -chrX 66765159 66765261 GCA SBMA_AR +chrX 66765159 66765261 CAG,GCA SBMA_AR chrX 70672905 70672981 AGAGGG XDP_TAF1 chrX 136648986 136649015 GCN VACTERLX_ZIC3 -chrX 139586482 139586526 NGC XLID_SOX3 +chrX 139586482 139586526 GCN,NGC XLID_SOX3 chrX 146993568 146993629 CGG FXS_FMR1 -chrX 147582125 147582273 GCC FRAXE_AFF2 +chrX 147582125 147582273 CCG,GCC FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.hg19.straglr.bed b/data/catalogs/STRchive-disease-loci.hg19.straglr.bed index f48e358b..582f4802 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.straglr.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.straglr.bed @@ -1,18 +1,18 @@ chr1 1371178 1371198 GGCGCGGAGC HMNR7_VWA1 HMNR7_VWA1 chr1 57832750 57832793 GAAAT SCA37_DAB1 SCA37_DAB1 -chr1 94883977 94884000 GCC OPDM5_ABCD3 OPDM5_ABCD3 -chr1 145209323 145209354 GGC NIID_NOTCH2NLC NIID_NOTCH2NLC +chr1 94883977 94884000 CCG OPDM5_ABCD3 OPDM5_ABCD3 +chr1 145209323 145209354 CGG NIID_NOTCH2NLC NIID_NOTCH2NLC chr1 156561557 156561575 GGGCC NME_NAXE NME_NAXE chr2 96862804 96862807 AAATG FAME2_STARD7 FAME2_STARD7 -chr2 100721260 100721286 GCC FRA2A_AFF3 FRA2A_AFF3 +chr2 100721260 100721286 CCG FRA2A_AFF3 FRA2A_AFF3 chr2 176957786 176957831 GCN SD5_HOXD13 SD5_HOXD13 -chr2 191745598 191745646 GCA GDPAG_GLS GDPAG_GLS +chr2 191745598 191745646 CAG GDPAG_GLS GDPAG_GLS chr3 63898360 63898391 CAG SCA7_ATXN7 SCA7_ATXN7 chr3 63898391 63898403 CCG SCA7_ATXN7 SCA7_ATXN7_CCG chr3 128891419 128891499 CAGG DM2_CNBP DM2_CNBP chr3 128891499 128891539 CAGA DM2_CNBP DM2_CNBP_CAGA chr3 128891539 128891577 CA DM2_CNBP DM2_CNBP_CA -chr3 138664861 138664904 NGC BPES_FOXL2 BPES_FOXL2 +chr3 138664861 138664904 GCN BPES_FOXL2 BPES_FOXL2 chr3 183430010 183430014 TTTCA FAME4_YEATS2 FAME4_YEATS2 chr4 3076603 3076654 CAG HD_HTT HD_HTT chr4 3076660 3076696 CCG HD_HTT HD_HTT_CCG @@ -20,35 +20,35 @@ chr4 39350099 39350103 AAGGG CANVAS_RFC1 CANVAS_RFC1 chr4 41747989 41748049 GCN CCHS_PHOX2B CCHS_PHOX2B chr4 160263763 160263770 TTTCA FAME7_RAPGEF2 FAME7_RAPGEF2 chr5 10356515 10356523 TTTCA FAME3_MARCHF6 FAME3_MARCHF6 -chr5 146258290 146258322 GCT SCA12_PPP2R2B SCA12_PPP2R2B -chr5 176981490 176981532 GCC OPDM_FAM193B OPDM_FAM193B -chr6 13328708 13328835 GCC OPDM_TBC1D7 OPDM_TBC1D7 +chr5 146258290 146258322 CTG SCA12_PPP2R2B SCA12_PPP2R2B +chr5 176981490 176981532 CCG OPDM_FAM193B OPDM_FAM193B +chr6 13328708 13328835 CCG OPDM_TBC1D7 OPDM_TBC1D7 chr6 16327864 16327955 CTG SCA1_ATXN1 SCA1_ATXN1 chr6 45390487 45390538 GCN CCD_RUNX2 CCD_RUNX2 chr6 170870994 170871105 CAG SCA17_TBP SCA17_TBP -chr7 27239297 27239351 NGC HFG_HOXA13-III HFG_HOXA13-III -chr7 27239444 27239480 NGC HFG_HOXA13-II HFG_HOXA13-II -chr7 27239543 27239585 NGC HFG_HOXA13-I HFG_HOXA13-I -chr7 55955293 55955332 GCG FRA7A_ZNF713 FRA7A_ZNF713 -chr8 105601198 105601227 CGC OPDM1_LRP12 OPDM1_LRP12 +chr7 27239297 27239351 GCN HFG_HOXA13-III HFG_HOXA13-III +chr7 27239444 27239480 GCN HFG_HOXA13-II HFG_HOXA13-II +chr7 27239543 27239585 GCN HFG_HOXA13-I HFG_HOXA13-I +chr7 55955293 55955332 CGG FRA7A_ZNF713 FRA7A_ZNF713 +chr8 105601198 105601227 CCG OPDM1_LRP12 OPDM1_LRP12 chr8 119379151 119379157 TGAAA FAME1_SAMD12 FAME1_SAMD12 chr9 27573482 27573544 GGCCCC FTDALS1_C9orf72 FTDALS1_C9orf72 chr9 71652186 71652202 A FRDA_FXN FRDA_FXN_A chr9 71652202 71652220 GAA FRDA_FXN FRDA_FXN -chr9 133556992 133557028 GCC HSAN-VIII_PRDM12 HSAN-VIII_PRDM12 -chr10 81586139 81586160 GGC OPML1_NUTM2B-AS1 OPML1_NUTM2B-AS1 +chr9 133556992 133557028 CCG HSAN-VIII_PRDM12 HSAN-VIII_PRDM12 +chr10 81586139 81586160 CGG OPML1_NUTM2B-AS1 OPML1_NUTM2B-AS1 chr11 119076999 119077033 CGG JBS_CBL JBS_CBL chr12 7045879 7045938 CAG DRPLA_ATN1 DRPLA_ATN1 -chr12 50898784 50898807 GGC FRA12A_DIP2B FRA12A_DIP2B +chr12 50898784 50898807 CGG FRA12A_DIP2B FRA12A_DIP2B chr12 112036753 112036823 CTG SCA2_ATXN2 SCA2_ATXN2 -chr12 124018267 124018297 GGC OPDM4_RILPL1 OPDM4_RILPL1 +chr12 124018267 124018297 CGG OPDM4_RILPL1 OPDM4_RILPL1 chr13 70713485 70713515 CTA SCA8_ATXN8OS SCA8_ATXN8OS_CTA chr13 70713515 70713561 CTG SCA8_ATXN8OS SCA8_ATXN8OS chr13 100637702 100637748 GCN HPE5_ZIC2 HPE5_ZIC2 chr13 102813924 102814076 GAA SCA27B_FGF14 SCA27B_FGF14 chr14 23790681 23790712 GCN OPMD_PABPN1 OPMD_PABPN1 chr14 92537354 92537396 CTG SCA3_ATXN3 SCA3_ATXN3 -chr15 23086363 23086389 GCG ALS1_NIPA1 ALS1_NIPA1 +chr15 23086363 23086389 CGG ALS1_NIPA1 ALS1_NIPA1 chr15 34711626 34711652 CT aFTLD-U_GOLGA8A aFTLD-U_GOLGA8A chr15 89112664 89112683 TTTG CHNG3_MIR7-2 CHNG3_MIR7-2 chr15 89876810 89876816 GCT CPEO_POLG CPEO_POLG_GCT @@ -58,7 +58,7 @@ chr16 17564764 17564779 GCC DBQD2_XYLT1 DBQD2_XYLT1 chr16 24624809 24624853 TTTCA FAME6_TNRC6A FAME6_TNRC6A chr16 66524299 66524369 TGGAA SCA31_BEAN1 SCA31_BEAN1 chr16 67876765 67876853 CAG SCA_THAP11 SCA_THAP11 -chr16 72821593 72821657 GCC SCA4_ZFHX3 SCA4_ZFHX3 +chr16 72821593 72821657 CCG SCA4_ZFHX3 SCA4_ZFHX3 chr16 87637888 87637935 CTG HDL2_JPH3 HDL2_JPH3 chr17 17711762 17711774 TTTCA FAME8_RAI1 FAME8_RAI1 chr17 78120808 78120938 CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 RCPS_EIF4A3 @@ -77,13 +77,13 @@ chr21 45196323 45196360 CGCGGGGCGGGG EPM1_CSTB EPM1_CSTB chr22 19754285 19754330 GCN TOF_TBX1 TOF_TBX1 chr22 38713287 38713380 CCG EPM_CSNK1E EPM_CSNK1E chr22 46191234 46191304 ATTCT SCA10_ATXN10 SCA10_ATXN10 -chrX 25031646 25031682 NGC PRTS_ARX PRTS_ARX -chrX 25031766 25031814 NGC EIEE1_ARX EIEE1_ARX +chrX 25031646 25031682 GCN PRTS_ARX PRTS_ARX +chrX 25031766 25031814 GCN EIEE1_ARX EIEE1_ARX chrX 31302674 31302722 TTC DMD_DMD DMD_DMD chrX 31302722 31302730 T DMD_DMD DMD_DMD_T -chrX 66765158 66765261 GCA SBMA_AR SBMA_AR +chrX 66765158 66765261 CAG SBMA_AR SBMA_AR chrX 70672904 70672981 AGAGGG XDP_TAF1 XDP_TAF1 chrX 136648985 136649015 GCN VACTERLX_ZIC3 VACTERLX_ZIC3 -chrX 139586481 139586526 NGC XLID_SOX3 XLID_SOX3 +chrX 139586481 139586526 GCN XLID_SOX3 XLID_SOX3 chrX 146993567 146993629 CGG FXS_FMR1 FXS_FMR1 -chrX 147582124 147582273 GCC FRAXE_AFF2 FRAXE_AFF2 +chrX 147582124 147582273 CCG FRAXE_AFF2 FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.hg19.stranger.json b/data/catalogs/STRchive-disease-loci.hg19.stranger.json index c1dd1fd7..f455dd18 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.stranger.json +++ b/data/catalogs/STRchive-disease-loci.hg19.stranger.json @@ -21,7 +21,7 @@ "PathologicRegion": "chr1:57832750-57832793", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GAAAT", + "DisplayRU": "AAATG", "Disease": "SCA37", "NormalMax": 30, "PathologicMin": 31, @@ -30,11 +30,11 @@ { "LocusId": "OPDM5_ABCD3", "ReferenceRegion": "chr1:94883977-94884000", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "OPDM5", "NormalMax": 44, "PathologicMin": 118, @@ -43,11 +43,11 @@ { "LocusId": "NIID_NOTCH2NLC", "ReferenceRegion": "chr1:145209323-145209354", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "NIID", "NormalMax": 37, "PathologicMin": 66, @@ -84,11 +84,11 @@ { "LocusId": "FRA2A_AFF3", "ReferenceRegion": "chr2:100721260-100721286", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "FRA2A", "NormalMax": 20, "PathologicMin": 300, @@ -110,11 +110,11 @@ { "LocusId": "GDPAG_GLS", "ReferenceRegion": "chr2:191745598-191745646", - "LocusStructure": "(GCA)*", + "LocusStructure": "(CAG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GCA", + "DisplayRU": "CAG", "Disease": "GDPAG", "NormalMax": 38, "PathologicMin": 680, @@ -153,11 +153,11 @@ { "LocusId": "BPES_FOXL2", "ReferenceRegion": "chr3:138664861-138664904", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD", "AR"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "BPES", "NormalMax": 14, "PathologicMin": 15, @@ -254,11 +254,11 @@ { "LocusId": "SCA12_PPP2R2B", "ReferenceRegion": "chr5:146258290-146258322", - "LocusStructure": "(GCT)*", + "LocusStructure": "(CTG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCT", + "DisplayRU": "CTG", "Disease": "SCA12", "NormalMax": 32, "PathologicMin": 51, @@ -267,11 +267,11 @@ { "LocusId": "OPDM_FAM193B", "ReferenceRegion": "chr5:176981490-176981532", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "OPDM", "NormalMax": 50, "PathologicMin": 194, @@ -280,11 +280,11 @@ { "LocusId": "OPDM_TBC1D7", "ReferenceRegion": "chr6:13328708-13328835", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "OPDM", "NormalMax": 60, "PathologicMin": 83, @@ -332,11 +332,11 @@ { "LocusId": "HFG_HOXA13-III", "ReferenceRegion": "chr7:27239297-27239351", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "HFG-III", "NormalMax": 18, "PathologicMin": 22, @@ -345,11 +345,11 @@ { "LocusId": "HFG_HOXA13-II", "ReferenceRegion": "chr7:27239444-27239480", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "HFG-II", "NormalMax": 12, "PathologicMin": 18, @@ -358,11 +358,11 @@ { "LocusId": "HFG_HOXA13-I", "ReferenceRegion": "chr7:27239543-27239585", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "HFG-I", "NormalMax": 14, "PathologicMin": 22, @@ -371,11 +371,11 @@ { "LocusId": "FRA7A_ZNF713", "ReferenceRegion": "chr7:55955293-55955332", - "LocusStructure": "(GCG)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCG", + "DisplayRU": "CGG", "Disease": "FRA7A", "NormalMax": 22, "PathologicMin": 450, @@ -384,11 +384,11 @@ { "LocusId": "OPDM1_LRP12", "ReferenceRegion": "chr8:105601198-105601227", - "LocusStructure": "(CGC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "CGC", + "DisplayRU": "CCG", "Disease": "OPDM1", "NormalMax": 45, "PathologicMin": 85, @@ -403,7 +403,7 @@ "PathologicRegion": "chr8:119379151-119379157", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TGAAA", + "DisplayRU": "AAATG", "Disease": "FAME1", "NormalMax": 104, "PathologicMin": 105, @@ -440,11 +440,11 @@ { "LocusId": "HSAN-VIII_PRDM12", "ReferenceRegion": "chr9:133556992-133557028", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "HSAN VIII", "NormalMax": 14, "PathologicMin": 18, @@ -453,11 +453,11 @@ { "LocusId": "OPML1_NUTM2B-AS1", "ReferenceRegion": "chr10:81586139-81586160", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "OPML1", "NormalMax": 16, "PathologicMin": 161, @@ -492,11 +492,11 @@ { "LocusId": "FRA12A_DIP2B", "ReferenceRegion": "chr12:50898784-50898807", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "FRA12A", "NormalMax": 23, "PathologicMin": 273, @@ -518,11 +518,11 @@ { "LocusId": "OPDM4_RILPL1", "ReferenceRegion": "chr12:124018267-124018297", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "OPDM4", "NormalMax": 16, "PathologicMin": 120, @@ -598,11 +598,11 @@ { "LocusId": "ALS1_NIPA1", "ReferenceRegion": "chr15:23086363-23086389", - "LocusStructure": "(GCG)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCG", + "DisplayRU": "CGG", "Disease": "ALS1", "NormalMax": 10, "PathologicMin": 11, @@ -645,7 +645,7 @@ "PathologicRegion": "chr15:89876819-89876860", "HGNCId": null, "InheritanceMode": [], - "DisplayRU": "GCT", + "DisplayRU": "CTG", "Disease": "CPEO", "NormalMax": 10, "PathologicMin": 11, @@ -660,7 +660,7 @@ "PathologicRegion": "chr16:17564764-17564779", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "DBQD2, BSS", "NormalMax": 20, "PathologicMin": 72, @@ -710,11 +710,11 @@ { "LocusId": "SCA4_ZFHX3", "ReferenceRegion": "chr16:72821593-72821657", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "SCA4", "NormalMax": 26, "PathologicMin": 46, @@ -939,11 +939,11 @@ { "LocusId": "PRTS_ARX", "ReferenceRegion": "chrX:25031646-25031682", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "PRTS", "NormalMax": 12, "PathologicMin": 20, @@ -952,11 +952,11 @@ { "LocusId": "EIEE1_ARX", "ReferenceRegion": "chrX:25031766-25031814", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "EIEE1", "NormalMax": 16, "PathologicMin": 17, @@ -980,11 +980,11 @@ { "LocusId": "SBMA_AR", "ReferenceRegion": "chrX:66765158-66765261", - "LocusStructure": "(GCA)*", + "LocusStructure": "(CAG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "GCA", + "DisplayRU": "CAG", "Disease": "SBMA", "NormalMax": 34, "PathologicMin": 38, @@ -1019,11 +1019,11 @@ { "LocusId": "XLID_SOX3", "ReferenceRegion": "chrX:139586481-139586526", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "XLID, PHPX", "NormalMax": 15, "PathologicMin": 22, @@ -1045,11 +1045,11 @@ { "LocusId": "FRAXE_AFF2", "ReferenceRegion": "chrX:147582124-147582273", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "FRAXE", "NormalMax": 39, "PathologicMin": 201, diff --git a/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed b/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed index 6d1f281f..662ca536 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed +++ b/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed @@ -1,58 +1,58 @@ chr1 1435798 1435818 ID=HMNR7_VWA1;MOTIFS=GGCGCGGAGC;STRUC= -chr1 57367043 57367121 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT;STRUC= -chr1 94418421 94418444 ID=OPDM5_ABCD3;MOTIFS=GCC;STRUC= -chr1 149390802 149390842 ID=NIID_NOTCH2NLC;MOTIFS=GGC;STRUC= +chr1 57367043 57367121 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT,AAATG;STRUC= +chr1 94418421 94418444 ID=OPDM5_ABCD3;MOTIFS=CCG,GCC;STRUC= +chr1 149390802 149390842 ID=NIID_NOTCH2NLC;MOTIFS=CGG,GGC;STRUC= chr1 155188505 155192239 ID=ADTKD_MUC1;MOTIFS=GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA;STRUC= chr1 156591765 156591783 ID=NME_NAXE;MOTIFS=GGGCC;STRUC= chr2 96197066 96197124 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT;STRUC= -chr2 100104798 100104824 ID=FRA2A_AFF3;MOTIFS=GCC;STRUC= +chr2 100104798 100104824 ID=FRA2A_AFF3;MOTIFS=CCG,GCC;STRUC= chr2 176093058 176093103 ID=SD5_HOXD13;MOTIFS=GCN;STRUC= -chr2 190880872 190880920 ID=GDPAG_GLS;MOTIFS=GCA;STRUC= +chr2 190880872 190880920 ID=GDPAG_GLS;MOTIFS=CAG,GCA;STRUC= chr3 63912684 63912727 ID=SCA7_ATXN7;MOTIFS=CAG,CCG;STRUC= chr3 129172576 129172734 ID=DM2_CNBP;MOTIFS=CAGG,CAGA,CA;STRUC= -chr3 138946019 138946062 ID=BPES_FOXL2;MOTIFS=NGC;STRUC= +chr3 138946019 138946062 ID=BPES_FOXL2;MOTIFS=GCN,NGC;STRUC= chr3 183712187 183712226 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA;STRUC= chr4 3074876 3074969 ID=HD_HTT;MOTIFS=CAG,CCG;STRUC= chr4 39348424 39348483 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,AGGGC,AAAGGG;STRUC= chr4 41745972 41746032 ID=CCHS_PHOX2B;MOTIFS=GCN;STRUC= chr4 159342526 159342618 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA;STRUC= chr5 10356343 10356411 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA;STRUC= -chr5 146878727 146878759 ID=SCA12_PPP2R2B;MOTIFS=GCT;STRUC= -chr5 177554489 177554531 ID=OPDM_FAM193B;MOTIFS=GCC;STRUC= -chr6 13328476 13328603 ID=OPDM_TBC1D7;MOTIFS=GCC;STRUC= +chr5 146878727 146878759 ID=SCA12_PPP2R2B;MOTIFS=CTG,GCT;STRUC= +chr5 177554489 177554531 ID=OPDM_FAM193B;MOTIFS=CCG,GCC;STRUC= +chr6 13328476 13328603 ID=OPDM_TBC1D7;MOTIFS=CCG,GCC;STRUC= chr6 16327633 16327724 ID=SCA1_ATXN1;MOTIFS=CTG;STRUC= chr6 45422750 45422801 ID=CCD_RUNX2;MOTIFS=GCN;STRUC= chr6 170561906 170562017 ID=SCA17_TBP;MOTIFS=CAG;STRUC= -chr7 27199678 27199732 ID=HFG_HOXA13-III;MOTIFS=NGC;STRUC= -chr7 27199825 27199861 ID=HFG_HOXA13-II;MOTIFS=NGC;STRUC= -chr7 27199924 27199966 ID=HFG_HOXA13-I;MOTIFS=NGC;STRUC= -chr7 55887600 55887639 ID=FRA7A_ZNF713;MOTIFS=GCG;STRUC= -chr8 104588970 104588999 ID=OPDM1_LRP12;MOTIFS=CGC;STRUC= -chr8 118366812 118366918 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA;STRUC= +chr7 27199678 27199732 ID=HFG_HOXA13-III;MOTIFS=GCN,NGC;STRUC= +chr7 27199825 27199861 ID=HFG_HOXA13-II;MOTIFS=GCN,NGC;STRUC= +chr7 27199924 27199966 ID=HFG_HOXA13-I;MOTIFS=GCN,NGC;STRUC= +chr7 55887600 55887639 ID=FRA7A_ZNF713;MOTIFS=CGG,GCG;STRUC= +chr8 104588970 104588999 ID=OPDM1_LRP12;MOTIFS=CCG,CGC;STRUC= +chr8 118366812 118366918 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA,AAATG;STRUC= chr9 27573484 27573546 ID=FTDALS1_C9orf72;MOTIFS=GGCCCC;STRUC= chr9 69037270 69037304 ID=FRDA_FXN;MOTIFS=A,GAA;STRUC= -chr9 130681605 130681641 ID=HSAN-VIII_PRDM12;MOTIFS=GCC;STRUC= +chr9 130681605 130681641 ID=HSAN-VIII_PRDM12;MOTIFS=CCG,GCC;STRUC= chr9 133071177 133071737 ID=MODY8_CEL;MOTIFS=GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG;STRUC= -chr10 79826383 79826404 ID=OPML1_NUTM2B-AS1;MOTIFS=GGC;STRUC= +chr10 79826383 79826404 ID=OPML1_NUTM2B-AS1;MOTIFS=CGG,GGC;STRUC= chr11 119206289 119206323 ID=JBS_CBL;MOTIFS=CGG;STRUC= chr12 6936716 6936775 ID=DRPLA_ATN1;MOTIFS=CAG;STRUC= -chr12 50505001 50505024 ID=FRA12A_DIP2B;MOTIFS=GGC;STRUC= +chr12 50505001 50505024 ID=FRA12A_DIP2B;MOTIFS=CGG,GGC;STRUC= chr12 111598949 111599019 ID=SCA2_ATXN2;MOTIFS=CTG;STRUC= -chr12 123533720 123533750 ID=OPDM4_RILPL1;MOTIFS=GGC;STRUC= +chr12 123533720 123533750 ID=OPDM4_RILPL1;MOTIFS=CGG,GGC;STRUC= chr13 70139353 70139429 ID=SCA8_ATXN8OS;MOTIFS=CTA,CTG;STRUC= chr13 99985448 99985494 ID=HPE5_ZIC2;MOTIFS=GCN;STRUC= -chr13 102161574 102161726 ID=SCA27B_FGF14;MOTIFS=GAA,GGA,GCA;STRUC= +chr13 102161574 102161726 ID=SCA27B_FGF14;MOTIFS=GAA,GGA,CAG;STRUC= chr14 23321472 23321503 ID=OPMD_PABPN1;MOTIFS=GCN;STRUC= chr14 92071010 92071052 ID=SCA3_ATXN3;MOTIFS=CTG;STRUC= -chr15 22786677 22786703 ID=ALS1_NIPA1;MOTIFS=GCG;STRUC= +chr15 22786677 22786703 ID=ALS1_NIPA1;MOTIFS=CGG,GCG;STRUC= chr15 34419425 34419451 ID=aFTLD-U_GOLGA8A;MOTIFS=CT,TTTC,CCTT,CCCTCT;STRUC= chr15 88569433 88569452 ID=CHNG3_MIR7-2;MOTIFS=TTTG;STRUC= -chr15 89333579 89333629 ID=CPEO_POLG;MOTIFS=GCT,GTT;STRUC= -chr16 17470907 17470922 ID=DBQD2_XYLT1;MOTIFS=GCC;STRUC= +chr15 89333579 89333629 ID=CPEO_POLG;MOTIFS=GCT,GTT,CTG;STRUC= +chr16 17470907 17470922 ID=DBQD2_XYLT1;MOTIFS=GCC,CCG;STRUC= chr16 24613438 24613532 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA;STRUC= chr16 66490396 66490466 ID=SCA31_BEAN1;MOTIFS=TGGAA,TAGAA,AATAA;STRUC= chr16 67842862 67842950 ID=SCA_THAP11;MOTIFS=CAG;STRUC= -chr16 72787694 72787758 ID=SCA4_ZFHX3;MOTIFS=GCC;STRUC= +chr16 72787694 72787758 ID=SCA4_ZFHX3;MOTIFS=CCG,GCC;STRUC= chr16 87604282 87604329 ID=HDL2_JPH3;MOTIFS=CTG;STRUC= chr17 17808358 17808460 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA;STRUC= chr17 80147009 80147139 ID=RCPS_EIF4A3;MOTIFS=CCTCGCTGTGCCGCTGCCGA;STRUC= @@ -69,12 +69,12 @@ chr21 43776442 43776479 ID=EPM1_CSTB;MOTIFS=CGCGGGGCGGGG;STRUC= chr22 19766762 19766807 ID=TOF_TBX1;MOTIFS=GCN;STRUC= chr22 38317282 38317375 ID=EPM_CSNK1E;MOTIFS=CCG;STRUC= chr22 45795354 45795424 ID=SCA10_ATXN10;MOTIFS=ATTCT;STRUC= -chrX 25013529 25013565 ID=PRTS_ARX;MOTIFS=NGC;STRUC= -chrX 25013649 25013697 ID=EIEE1_ARX;MOTIFS=NGC;STRUC= +chrX 25013529 25013565 ID=PRTS_ARX;MOTIFS=GCN,NGC;STRUC= +chrX 25013649 25013697 ID=EIEE1_ARX;MOTIFS=GCN,NGC;STRUC= chrX 31284557 31284613 ID=DMD_DMD;MOTIFS=TTC,T;STRUC= -chrX 67545316 67545419 ID=SBMA_AR;MOTIFS=GCA;STRUC= +chrX 67545316 67545419 ID=SBMA_AR;MOTIFS=CAG,GCA;STRUC= chrX 71453054 71453131 ID=XDP_TAF1;MOTIFS=AGAGGG;STRUC= chrX 137566826 137566856 ID=VACTERLX_ZIC3;MOTIFS=GCN;STRUC= -chrX 140504316 140504361 ID=XLID_SOX3;MOTIFS=NGC;STRUC= +chrX 140504316 140504361 ID=XLID_SOX3;MOTIFS=GCN,NGC;STRUC= chrX 147912049 147912111 ID=FXS_FMR1;MOTIFS=CGG;STRUC= -chrX 148500604 148500753 ID=FRAXE_AFF2;MOTIFS=GCC;STRUC= +chrX 148500604 148500753 ID=FRAXE_AFF2;MOTIFS=CCG,GCC;STRUC= diff --git a/data/catalogs/STRchive-disease-loci.hg38.atarva.bed b/data/catalogs/STRchive-disease-loci.hg38.atarva.bed index 53c72b79..a2d85324 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.atarva.bed +++ b/data/catalogs/STRchive-disease-loci.hg38.atarva.bed @@ -1,20 +1,20 @@ #chrom start stop motif motif_len id chr1 1435798 1435818 GGCGCGGAGC 10 HMNR7_VWA1 chr1 57367078 57367121 GAAAT 5 SCA37_DAB1 -chr1 94418421 94418444 GCC 3 OPDM5_ABCD3 -chr1 149390802 149390842 GGC 3 NIID_NOTCH2NLC +chr1 94418421 94418444 CCG 3 OPDM5_ABCD3 +chr1 149390802 149390842 CGG 3 NIID_NOTCH2NLC chr1 155188505 155192239 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA 61 ADTKD_MUC1 chr1 156591765 156591783 GGGCC 5 NME_NAXE chr2 96197066 96197069 AAATG 5 FAME2_STARD7 -chr2 100104798 100104824 GCC 3 FRA2A_AFF3 +chr2 100104798 100104824 CCG 3 FRA2A_AFF3 chr2 176093058 176093103 GCN 3 SD5_HOXD13 -chr2 190880872 190880920 GCA 3 GDPAG_GLS +chr2 190880872 190880920 CAG 3 GDPAG_GLS chr3 63912684 63912715 CAG 3 SCA7_ATXN7 chr3 63912715 63912727 CCG 3 SCA7_ATXN7_flank chr3 129172576 129172656 CAGG 4 DM2_CNBP chr3 129172656 129172696 CAGA 4 DM2_CNBP_flank chr3 129172696 129172734 CA 2 DM2_CNBP_flank -chr3 138946019 138946062 NGC 3 BPES_FOXL2 +chr3 138946019 138946062 GCN 3 BPES_FOXL2 chr3 183712222 183712226 TTTCA 5 FAME4_YEATS2 chr4 3074876 3074927 CAG 3 HD_HTT chr4 3074933 3074969 CCG 3 HD_HTT_flank @@ -22,36 +22,36 @@ chr4 39348479 39348483 AAGGG 5 CANVAS_RFC1 chr4 41745972 41746032 GCN 3 CCHS_PHOX2B chr4 159342611 159342618 TTTCA 5 FAME7_RAPGEF2 chr5 10356403 10356411 TTTCA 5 FAME3_MARCHF6 -chr5 146878727 146878759 GCT 3 SCA12_PPP2R2B -chr5 177554489 177554531 GCC 3 OPDM_FAM193B -chr6 13328476 13328603 GCC 3 OPDM_TBC1D7 +chr5 146878727 146878759 CTG 3 SCA12_PPP2R2B +chr5 177554489 177554531 CCG 3 OPDM_FAM193B +chr6 13328476 13328603 CCG 3 OPDM_TBC1D7 chr6 16327633 16327724 CTG 3 SCA1_ATXN1 chr6 45422750 45422801 GCN 3 CCD_RUNX2 chr6 170561906 170562017 CAG 3 SCA17_TBP -chr7 27199678 27199732 NGC 3 HFG_HOXA13-III -chr7 27199825 27199861 NGC 3 HFG_HOXA13-II -chr7 27199924 27199966 NGC 3 HFG_HOXA13-I -chr7 55887600 55887639 GCG 3 FRA7A_ZNF713 -chr8 104588970 104588999 CGC 3 OPDM1_LRP12 +chr7 27199678 27199732 GCN 3 HFG_HOXA13-III +chr7 27199825 27199861 GCN 3 HFG_HOXA13-II +chr7 27199924 27199966 GCN 3 HFG_HOXA13-I +chr7 55887600 55887639 CGG 3 FRA7A_ZNF713 +chr8 104588970 104588999 CCG 3 OPDM1_LRP12 chr8 118366912 118366918 TGAAA 5 FAME1_SAMD12 chr9 27573484 27573546 GGCCCC 6 FTDALS1_C9orf72 chr9 69037270 69037286 A 1 FRDA_FXN_flank chr9 69037286 69037304 GAA 3 FRDA_FXN -chr9 130681605 130681641 GCC 3 HSAN-VIII_PRDM12 +chr9 130681605 130681641 CCG 3 HSAN-VIII_PRDM12 chr9 133071177 133071737 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG 32 MODY8_CEL -chr10 79826383 79826404 GGC 3 OPML1_NUTM2B-AS1 +chr10 79826383 79826404 CGG 3 OPML1_NUTM2B-AS1 chr11 119206289 119206323 CGG 3 JBS_CBL chr12 6936716 6936775 CAG 3 DRPLA_ATN1 -chr12 50505001 50505024 GGC 3 FRA12A_DIP2B +chr12 50505001 50505024 CGG 3 FRA12A_DIP2B chr12 111598949 111599019 CTG 3 SCA2_ATXN2 -chr12 123533720 123533750 GGC 3 OPDM4_RILPL1 +chr12 123533720 123533750 CGG 3 OPDM4_RILPL1 chr13 70139353 70139383 CTA 3 SCA8_ATXN8OS_flank chr13 70139383 70139429 CTG 3 SCA8_ATXN8OS chr13 99985448 99985494 GCN 3 HPE5_ZIC2 chr13 102161574 102161726 GAA 3 SCA27B_FGF14 chr14 23321472 23321503 GCN 3 OPMD_PABPN1 chr14 92071010 92071052 CTG 3 SCA3_ATXN3 -chr15 22786677 22786703 GCG 3 ALS1_NIPA1 +chr15 22786677 22786703 CGG 3 ALS1_NIPA1 chr15 34419425 34419451 CT 2 aFTLD-U_GOLGA8A chr15 88569433 88569452 TTTG 4 CHNG3_MIR7-2 chr15 89333579 89333585 GCT 3 CPEO_POLG_flank @@ -61,7 +61,7 @@ chr16 17470907 17470922 GCC 3 DBQD2_XYLT1 chr16 24613488 24613532 TTTCA 5 FAME6_TNRC6A chr16 66490396 66490466 TGGAA 5 SCA31_BEAN1 chr16 67842862 67842950 CAG 3 SCA_THAP11 -chr16 72787694 72787758 GCC 3 SCA4_ZFHX3 +chr16 72787694 72787758 CCG 3 SCA4_ZFHX3 chr16 87604282 87604329 CTG 3 HDL2_JPH3 chr17 17808448 17808460 TTTCA 5 FAME8_RAI1 chr17 80147009 80147139 CCTCGCTGTGCCGCTGCCGA 20 RCPS_EIF4A3 @@ -80,13 +80,13 @@ chr21 43776442 43776479 CGCGGGGCGGGG 12 EPM1_CSTB chr22 19766762 19766807 GCN 3 TOF_TBX1 chr22 38317282 38317375 CCG 3 EPM_CSNK1E chr22 45795354 45795424 ATTCT 5 SCA10_ATXN10 -chrX 25013529 25013565 NGC 3 PRTS_ARX -chrX 25013649 25013697 NGC 3 EIEE1_ARX +chrX 25013529 25013565 GCN 3 PRTS_ARX +chrX 25013649 25013697 GCN 3 EIEE1_ARX chrX 31284557 31284605 TTC 3 DMD_DMD chrX 31284605 31284613 T 1 DMD_DMD_flank -chrX 67545316 67545419 GCA 3 SBMA_AR +chrX 67545316 67545419 CAG 3 SBMA_AR chrX 71453054 71453131 AGAGGG 6 XDP_TAF1 chrX 137566826 137566856 GCN 3 VACTERLX_ZIC3 -chrX 140504316 140504361 NGC 3 XLID_SOX3 +chrX 140504316 140504361 GCN 3 XLID_SOX3 chrX 147912049 147912111 CGG 3 FXS_FMR1 -chrX 148500604 148500753 GCC 3 FRAXE_AFF2 +chrX 148500604 148500753 CCG 3 FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.hg38.atarva.bed.gz b/data/catalogs/STRchive-disease-loci.hg38.atarva.bed.gz index 283d10ecfc039af1b72d59de2604bdb4927299a7..5be15b619dc00bd3198bbe4e15f6b1121beda5b0 100644 GIT binary patch literal 1840 zcmV-02haE)iwFb&00000{{{d;LjnL32Zfj2uG_c~fUjrYB>-oJUh&@a(#t^3|O(}*LQ=G~-Wf0|KD4&jR zrkIKw>&NmZfJF#H`a0zKBjWK8YdnfI2RVjvZv+F&e9lXFoU5royd%qw8)&6_-|y>u z!@PW?nNDBA-WoL8<@qZ($Nx9e@awJtD- zLiTJ9a11ep)~c(my8E90_}6d${^zF~Tn9~W-FV*6RqwqsTDuNj!xR@_xU6TBe_SJV z4;7(BBf682EqU3;CZ{}3BkC$^AZ9`>)W9h2jo#OJ$ZI&QJ-Qk^DM5v*gP~VXx%RYW zumK`*)`~O`bfZP&n(|KGN&X8}cv z@=7`om4#Wd_Vl-b^5gq0VK+%zWbIr7!*ygafPpd0XPeWqMH{+ot@et22W>5+sZ4Gw zIhar9upA9rahjZ^2EOEN=rfknC9HUeLgK`F-9U`%)oG7G=O5$c!CiD>l_p0G_?fMj z;YRt__0qgDW~W?9)dp658j{OfxMEkGLM=-92FM<2&5CJCrBcQEe*jXj#dXi;>!?F} zj00<}^9a_8Wr8WBX-Z%rt{-zw-{-w+#u#VOa04p6u5lR8ap0Wd*cO8aZBPkGxw19B zgtk=CQcg>`OtHkee3@b_**BG+LK#`%T3u_+EnlY)!*tZ)Smacy@sv`irMR?~LIm$& z-agjwEw8uha`Xh5lsMx$c=uXtHpdW`fZkdZ2 zluAmThZ(=Ddx)1D@ALW$hKM4iJ6L;^-5K*RuV0Hs;4OMl)*j%Y?0{3BY07&F+aS$F zrBLa*y;gE`Y=h$#DC97X+)c5{vnu1SgwMLP4<=vkj{y(UE9aDxgFGEsJ`HP_@^ss) zsDP{?VQN<+Wos`nwRPdxyxdn~-&G^+S_1kL^;Hx|fwtbW^sASe!lCSuW%|p%k~!sb zn1BT0uR~t9W&LX#Ug}kQZf&HWHK3mj)LRiWi&(9xgZR?ps+a!&3C`<`IqvCH?v~3~%CJ z+bvQfVGS_={aj|}wN-TiSF+Rtds@$X9`BDEb z`JK$$2?;5$>q=YehyfHa<%Hxr8OFC_n8z@q=jvKOW^!6OSjF#VIz?<%%K};^vCIvm ztiI>-TV6L$8%&SGl_%ruS^`tvk`xCX9#<;T0s}5ynK9188@{eVdR)kAA*Ty+7>LBmm zqKWyotw>!($JY9K1aIUZmA9ro{h0SK56$*as&Glu21Z+GuP$I=OhY|TM*p8b|B&um zuUJS_2WKGw!nh3~kB{ZG4*W}~z)JTc^5HfMSy_j+yk9laM?zipuzdM;yaIV`LmuYgO)9@NK!yZ(D6hCZz)D?Kwt>duEVZBJ zki*s_L2XzW-2fROOts2bcnoV!VZDn2ktkRvt@asJCIG6JX{qkR80s1Ky5C8xl eiwFb&00000{{{d;LjnLB00RI3000000001gUziU7 delta 1850 zcmV-A2gUfX4#N(AABzYC000000RIL6LPG)oCI^L=+m7S75kQ}7|D^#|@lxa?O|3gqD)pqX=u-%Q;v2{f8QY@0c{p$||G}?0T_3F{XG^Hs` zQ=Af@g3G*YT)zD?;xE66ZCN{3>>D5k6T2FWw@?%>MA1I;DZT`g2DhPOR0N`RAPQCbf~1Q)=cUkt3Fya z%%*JD+j?tN(7+0H(DneYA?4Ug^|e%g-^=g+`Srhl|NiuV?_lVq8XpgIHH6@;F}_2P z7+OHXWj)*S^@s0bZeHl3K9#ARm9ECrZT|EcLhE-79fvC^;;q;Gz%H#VjV>ih-)W-V;R@auL2o}aTpIynzmR#(z zb0#PqIvD35PcpbI}Gt(W0O<;!|$p0wF3FQMANsZUMvWs6trs#j=Osn7s*fSR*Xp0Z@B zRR2dH1zTG8a=wlxw%2%Y&iR1g96L=g#XLsQeTg2TYd zxdQ^o3OMzVrn2X_4cuH*3YDqLYb8&|GB|FLVu|CZyU9)kt)%^x@FOqngDsc)W5B}< z%6sMcpg;sJa^Gp z4pv*!z>oo%7JVm#s;&>HMAqPcbU!TZ z2CS?QG7r*nT3QtzKDM zj6^aNo!qA!Y0CF=%zL#BXvM~}tJfeMQ9JB1luOP}BWT^OF7&Qy#}9>&A8j{f0Bz8QoNVYi`A==(vc+{e zkF_xtP-*Lo(j5>#$-2yOOPA4g**MQ$2EUU%7F-D|D(kw^)+%BFnXT-QLMN;7WuINf zIAhS&b%0uvH{8M11oL5k=2OC!wH%<+#7^r5EVJ)O#n|GydBz$d*&J5z3O~AQDBJt; zpk5p+>oBNJq>!KSk6dz#QwCxT!Le^3nX5(8Wt=1fM$~WD>-EL;WIH(gnfsy!G98@z ziP}tSy)5&pLp@qdEm?CcKbDbO5|(sstoMP{L7`@#-kVr%+lthGRrK1qP`BWXJY+dF z_2I{|$9ZUW0Lj7JRf!hIx3A4YUT&I<5U@-L6c0bLr#H~q;#%g6v1Jnp{s#(Ut zV^~X$BQ_B=TjxBvgRJYb=37kpyxkshu+d#M$|@u8?(`8c=8xNTDEs~KA#y-ca=5F6 oi2YbOKK3%s+wpeetx`^zPHgpaKk_)seuRGcKea?CqmvK^AoY^4u>b%7 diff --git a/data/catalogs/STRchive-disease-loci.hg38.atarva.bed.gz.tbi b/data/catalogs/STRchive-disease-loci.hg38.atarva.bed.gz.tbi index 86b30c266113483c63969d12f6de90a7f72ab25c..5afce915cb97b5eb05f6f34938b8dd18c9f46617 100644 GIT binary patch delta 232 zcmX@4bWmx-R(=an#<%Cz`Z@QWn8M1lE_y*gMq2`t4ClV$ z5_XmC^CmdWOP)99{r)VSu89{F>wUvjDi!91h6kFR@Uow?O(Yvs(;n3o>f&l z)~(znbycUnF8cDCccmd$pXd5+{(pSMs@>DAR?QRpUG0Cn{`~1#X|=lpL*Gv~4ZZDZ zx;woz@N&+(vcS-O^ZSOAR6J49&(C=vk$=_e*`P9$GgyQqW9}l^YqnRXQ}({CMd1>!zWv14CyA zhVqA-hE88R>*@Kt>{(a$_O1H0XKm)U{}Q34--Fz*UQViipZWWJW#r{G`R1!${av&y zYxjz@?`zIw&pLZoZ`G@hzpULRsd%EI`g^)Sk(2fBpDKW5Ca19oOP)Mj<~vEH{wyy{ W@e{^htl>uS-jf&di%dQ(PzC_I5^>A` diff --git a/data/catalogs/STRchive-disease-loci.hg38.general.bed b/data/catalogs/STRchive-disease-loci.hg38.general.bed index 39d87c24..5abc8e09 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.general.bed +++ b/data/catalogs/STRchive-disease-loci.hg38.general.bed @@ -1,59 +1,59 @@ #chrom start stop id gene reference_motif_reference_orientation pathogenic_motif_reference_orientation pathogenic_min inheritance disease chr1 1435798 1435818 HMNR7_VWA1 VWA1 GGCGCGGAGC GGCGCGGAGC 1 AR Neuronopathy, distal hereditary motor, autosomal recessive 7 -chr1 57367043 57367121 SCA37_DAB1 DAB1 AAAAT GAAAT 31 AD Spinocerebellar ataxia type 37 -chr1 94418421 94418444 OPDM5_ABCD3 ABCD3 GCC GCC 118 AD Oculopharyngodistal myopathy type 5 -chr1 149390802 149390842 NIID_NOTCH2NLC NOTCH2NLC GGC GGC 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 +chr1 57367043 57367121 SCA37_DAB1 DAB1 AAAAT AAATG 31 AD Spinocerebellar ataxia type 37 +chr1 94418421 94418444 OPDM5_ABCD3 ABCD3 GCC CCG 118 AD Oculopharyngodistal myopathy type 5 +chr1 149390802 149390842 NIID_NOTCH2NLC NOTCH2NLC GGC CGG 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 chr1 155188505 155192239 ADTKD_MUC1 MUC1 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA None AD Autosomal dominant tubulointerstitial kidney disease chr1 156591765 156591783 NME_NAXE NAXE GGGCC GGGCC 200 AR NAXE-related mitochondrial encephalopathy chr2 96197066 96197124 FAME2_STARD7 STARD7 AAAAT AAATG 274 AD Familial adult myoclonic epilepsy 2 -chr2 100104798 100104824 FRA2A_AFF3 AFF3 GCC GCC 300 AD Intellectual disability associated with fragile site FRA2A +chr2 100104798 100104824 FRA2A_AFF3 AFF3 GCC CCG 300 AD Intellectual disability associated with fragile site FRA2A chr2 176093058 176093103 SD5_HOXD13 HOXD13 GCN GCN 22 AD Syndactyly -chr2 190880872 190880920 GDPAG_GLS GLS GCA GCA 680 AR Glutaminase deficiency +chr2 190880872 190880920 GDPAG_GLS GLS GCA CAG 680 AR Glutaminase deficiency chr3 63912684 63912715 SCA7_ATXN7 ATXN7 CAG CAG 37 AD Spinocerebellar ataxia type 7 chr3 129172576 129172656 DM2_CNBP CNBP CAGG CAGG 75 AD Myotonic dystrophy type 2 -chr3 138946019 138946062 BPES_FOXL2 FOXL2 NGC NGC 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis +chr3 138946019 138946062 BPES_FOXL2 FOXL2 NGC GCN 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis chr3 183712187 183712226 FAME4_YEATS2 YEATS2 TTTTA TTTCA 1000 AD Familial adult myoclonic epilepsy 4 chr4 3074876 3074933 HD_HTT HTT CAG CAG 36 AD Huntington disease chr4 39348424 39348483 CANVAS_RFC1 RFC1 AAAAG AAGGG,ACAGG,AAAGG,AGGGC 400 AR Cerebellar ataxia, neuropathy, and vestibular areflexia syndrome chr4 41745972 41746032 CCHS_PHOX2B PHOX2B GCN GCN 26 AD Congenital central hypoventilation syndrome chr4 159342526 159342618 FAME7_RAPGEF2 RAPGEF2 TTTTA TTTCA 60 AD Familial adult myoclonic epilepsy type 7 chr5 10356343 10356411 FAME3_MARCHF6 MARCHF6 TTTTA TTTCA 650 AD Familial adult myoclonic epilepsy type 3 -chr5 146878727 146878759 SCA12_PPP2R2B PPP2R2B GCT GCT 51 AD Spinocerebellar ataxia type 12 -chr5 177554489 177554531 OPDM_FAM193B FAM193B GCC GCC 194 AD Oculopharyngodistal myopathy -chr6 13328476 13328603 OPDM_TBC1D7 TBC1D7 GCC GCC 83 AD Oculopharyngodistal myopathy +chr5 146878727 146878759 SCA12_PPP2R2B PPP2R2B GCT CTG 51 AD Spinocerebellar ataxia type 12 +chr5 177554489 177554531 OPDM_FAM193B FAM193B GCC CCG 194 AD Oculopharyngodistal myopathy +chr6 13328476 13328603 OPDM_TBC1D7 TBC1D7 GCC CCG 83 AD Oculopharyngodistal myopathy chr6 16327633 16327724 SCA1_ATXN1 ATXN1 CTG CTG 39 AD Spinocerebellar ataxia type 1 chr6 45422750 45422801 CCD_RUNX2 RUNX2 GCN GCN 20 AD Cleidocranial dysplasia chr6 170561906 170562017 SCA17_TBP TBP CAG CAG 49 AD Spinocerebellar ataxia type 17 -chr7 27199678 27199732 HFG_HOXA13-III HOXA13 NGC NGC 22 AD Hand-foot-genital syndrome 3 -chr7 27199825 27199861 HFG_HOXA13-II HOXA13 NGC NGC 18 AD Hand-foot-genital syndrome 2 -chr7 27199924 27199966 HFG_HOXA13-I HOXA13 NGC NGC 22 AD Hand-foot-genital syndrome 1 -chr7 55887600 55887639 FRA7A_ZNF713 ZNF713 GCG GCG 450 AD Autism spectrum disorder associated with fragile site FRA7A -chr8 104588970 104588999 OPDM1_LRP12 LRP12 CGC CGC 85 AD Oculopharyngodistal myopathy type 1 -chr8 118366812 118366918 FAME1_SAMD12 SAMD12 TAAAA TGAAA 105 AD Familial adult myoclonic epilepsy type 1 +chr7 27199678 27199732 HFG_HOXA13-III HOXA13 NGC GCN 22 AD Hand-foot-genital syndrome 3 +chr7 27199825 27199861 HFG_HOXA13-II HOXA13 NGC GCN 18 AD Hand-foot-genital syndrome 2 +chr7 27199924 27199966 HFG_HOXA13-I HOXA13 NGC GCN 22 AD Hand-foot-genital syndrome 1 +chr7 55887600 55887639 FRA7A_ZNF713 ZNF713 GCG CGG 450 AD Autism spectrum disorder associated with fragile site FRA7A +chr8 104588970 104588999 OPDM1_LRP12 LRP12 CGC CCG 85 AD Oculopharyngodistal myopathy type 1 +chr8 118366812 118366918 FAME1_SAMD12 SAMD12 TAAAA AAATG 105 AD Familial adult myoclonic epilepsy type 1 chr9 27573484 27573546 FTDALS1_C9orf72 C9orf72 GGCCCC GGCCCC 31 AD Frontotemporal dementia (FTD) and/or amyotrophic lateral sclerosis (ALS) chr9 69037286 69037304 FRDA_FXN FXN GAA GAA 56 AR Friedreich ataxia -chr9 130681605 130681641 HSAN-VIII_PRDM12 PRDM12 GCC GCC 18 AR Hereditary sensory and autonomic neuropathy type VIII +chr9 130681605 130681641 HSAN-VIII_PRDM12 PRDM12 GCC CCG 18 AR Hereditary sensory and autonomic neuropathy type VIII chr9 133071177 133071737 MODY8_CEL CEL GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG None AD Maturity-Onset Diabetes of the Young Type 8 -chr10 79826383 79826404 OPML1_NUTM2B-AS1 NUTM2B-AS1 GGC GGC 161 AD Oculopharyngeal myopathy with leukoencephalopathy 1 +chr10 79826383 79826404 OPML1_NUTM2B-AS1 NUTM2B-AS1 GGC CGG 161 AD Oculopharyngeal myopathy with leukoencephalopathy 1 chr11 119206289 119206323 JBS_CBL CBL CGG CGG 101 AD Jacobsen syndrome (FRAX11B fragile site) chr12 6936716 6936775 DRPLA_ATN1 ATN1 CAG CAG 48 AD Dentatorubral-Pallidoluysian Atrophy -chr12 50505001 50505024 FRA12A_DIP2B DIP2B GGC GGC 273 AD Intellectual developmental disorder, FRA12A type +chr12 50505001 50505024 FRA12A_DIP2B DIP2B GGC CGG 273 AD Intellectual developmental disorder, FRA12A type chr12 111598949 111599019 SCA2_ATXN2 ATXN2 CTG CTG 35 AD,AR Spinocerebellar ataxia type 2 -chr12 123533720 123533750 OPDM4_RILPL1 RILPL1 GGC GGC 120 AD Oculopharyngodistal myopathy type 4 +chr12 123533720 123533750 OPDM4_RILPL1 RILPL1 GGC CGG 120 AD Oculopharyngodistal myopathy type 4 chr13 70139383 70139429 SCA8_ATXN8OS ATXN8OS CTG CTG 71 AD Spinocerebellar ataxia type 8 chr13 99985448 99985494 HPE5_ZIC2 ZIC2 GCN GCN 25 AD Holoprosencephaly-5 chr13 102161574 102161726 SCA27B_FGF14 FGF14 GAA GAA 320 AD Spinocerebellar ataxia 27B chr14 23321472 23321503 OPMD_PABPN1 PABPN1 GCN GCN 12 AD,AR Oculopharyngeal muscular dystrophy chr14 92071010 92071052 SCA3_ATXN3 ATXN3 CTG CTG 60 AD Spinocerebellar ataxia type 3/Machado-Joseph disease -chr15 22786677 22786703 ALS1_NIPA1 NIPA1 GCG GCG 11 AD Amyotrophic lateral sclerosis +chr15 22786677 22786703 ALS1_NIPA1 NIPA1 GCG CGG 11 AD Amyotrophic lateral sclerosis chr15 34419425 34419451 aFTLD-U_GOLGA8A GOLGA8A TTTC CT 190 Atypical frontotemporal lobar degeneration with ubiquitinated inclusions (aFTLD-U) chr15 88569433 88569452 CHNG3_MIR7-2 MIR7-2 TTTG TTTG 3 AD Nongoitrous congenital hypothyroidism-3 -chr15 89333588 89333629 CPEO_POLG POLG GCT GCT None Progressive external ophthalmoplegia, Parkinson's disease -chr16 17470907 17470922 DBQD2_XYLT1 XYLT1 GCC GCC 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 +chr15 89333588 89333629 CPEO_POLG POLG GCT CTG None Progressive external ophthalmoplegia, Parkinson's disease +chr16 17470907 17470922 DBQD2_XYLT1 XYLT1 GCC CCG 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 chr16 24613438 24613532 FAME6_TNRC6A TNRC6A TTTTA TTTCA 1100 AD Familial adult myoclonic epilepsy type 6 chr16 66490396 66490466 SCA31_BEAN1 BEAN1 AATAA TGGAA,TAGAA 110 AD Spinocerebellar ataxia type 31 chr16 67842862 67842950 SCA_THAP11 THAP11 CAG CAG 45 AD Spinocerebellar ataxia 51 -chr16 72787694 72787758 SCA4_ZFHX3 ZFHX3 GCC GCC 46 AD Spinocerebellar ataxia 4 +chr16 72787694 72787758 SCA4_ZFHX3 ZFHX3 GCC CCG 46 AD Spinocerebellar ataxia 4 chr16 87604282 87604329 HDL2_JPH3 JPH3 CTG CTG 40 AD Huntington disease-like 2 chr17 17808358 17808460 FAME8_RAI1 RAI1 TTTTA TTTCA 9 AD Familial adult myoclonic epilepsy type 8 chr17 80147009 80147139 RCPS_EIF4A3 EIF4A3 CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA 14 AR Richieri-Costa-Pereira syndrome @@ -70,12 +70,12 @@ chr21 43776442 43776479 EPM1_CSTB CSTB CGCGGGGCGGGG CGCGGGGCGGGG 30 AR Progressi chr22 19766762 19766807 TOF_TBX1 TBX1 GCN GCN 25 AD Tetralogy of Fallot chr22 38317282 38317375 EPM_CSNK1E CSNK1E CCG CCG 745 AR Progressive Myoclonic Epilepsy and Developmental and Epileptic Encephalopathy chr22 45795354 45795424 SCA10_ATXN10 ATXN10 ATTCT ATTCT 800 AD Spinocerebellar ataxia type 10 -chrX 25013529 25013565 PRTS_ARX ARX NGC NGC 20 XR Partington syndrome -chrX 25013649 25013697 EIEE1_ARX ARX NGC NGC 17 XR Early-infantile epileptic encephalopathy +chrX 25013529 25013565 PRTS_ARX ARX NGC GCN 20 XR Partington syndrome +chrX 25013649 25013697 EIEE1_ARX ARX NGC GCN 17 XR Early-infantile epileptic encephalopathy chrX 31284557 31284605 DMD_DMD DMD TTC TTC 59 XR Duchenne muscular dystrophy -chrX 67545316 67545419 SBMA_AR AR GCA GCA 38 XR Spinal and bulbar muscular atrophy, Kennedy Disease +chrX 67545316 67545419 SBMA_AR AR GCA CAG 38 XR Spinal and bulbar muscular atrophy, Kennedy Disease chrX 71453054 71453131 XDP_TAF1 TAF1 AGAGGG AGAGGG 35 XR X-linked dystonia-parkinsonism (XDP) a.k.a. Dystonia 3, torsion, X-linked (DYT3) chrX 137566826 137566856 VACTERLX_ZIC3 ZIC3 GCN GCN 12 XR X-linked VACTERL syndrome -chrX 140504316 140504361 XLID_SOX3 SOX3 NGC NGC 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) +chrX 140504316 140504361 XLID_SOX3 SOX3 NGC GCN 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) chrX 147912049 147912111 FXS_FMR1 FMR1 CGG CGG 201 XD Fragile X syndrome (FXS), fragile X-associated tremor/ataxia syndrome (FXTAS), and fragile X-associated primary ovarian insufficiency FXPOI/POF1 -chrX 148500604 148500753 FRAXE_AFF2 AFF2 GCC GCC 201 XR Intellectual developmental disorder, Fragile X intellectual disability +chrX 148500604 148500753 FRAXE_AFF2 AFF2 GCC CCG 201 XR Intellectual developmental disorder, Fragile X intellectual disability diff --git a/data/catalogs/STRchive-disease-loci.hg38.longTR.bed b/data/catalogs/STRchive-disease-loci.hg38.longTR.bed index 9000993c..d058a235 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.longTR.bed +++ b/data/catalogs/STRchive-disease-loci.hg38.longTR.bed @@ -1,58 +1,58 @@ chr1 1435799 1435818 GGCGCGGAGC HMNR7_VWA1 -chr1 57367044 57367121 GAAAT,AAAAT SCA37_DAB1 -chr1 94418422 94418444 GCC OPDM5_ABCD3 -chr1 149390803 149390842 GGC NIID_NOTCH2NLC +chr1 57367044 57367121 AAATG,AAAAT SCA37_DAB1 +chr1 94418422 94418444 CCG,GCC OPDM5_ABCD3 +chr1 149390803 149390842 CGG,GGC NIID_NOTCH2NLC chr1 155188506 155192239 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ADTKD_MUC1 chr1 156591766 156591783 GGGCC NME_NAXE chr2 96197067 96197124 AAATG,AAAAT FAME2_STARD7 -chr2 100104799 100104824 GCC FRA2A_AFF3 +chr2 100104799 100104824 CCG,GCC FRA2A_AFF3 chr2 176093059 176093103 GCN SD5_HOXD13 -chr2 190880873 190880920 GCA GDPAG_GLS +chr2 190880873 190880920 CAG,GCA GDPAG_GLS chr3 63912685 63912715 CAG SCA7_ATXN7 chr3 129172577 129172656 CAGG DM2_CNBP -chr3 138946020 138946062 NGC BPES_FOXL2 +chr3 138946020 138946062 GCN,NGC BPES_FOXL2 chr3 183712188 183712226 TTTCA,TTTTA FAME4_YEATS2 chr4 3074877 3074933 CAG HD_HTT chr4 39348425 39348483 AAGGG,ACAGG,AAAGG,AGGGC,AAAAG,AAAGGG CANVAS_RFC1 chr4 41745973 41746032 GCN CCHS_PHOX2B chr4 159342527 159342618 TTTCA,TTTTA FAME7_RAPGEF2 chr5 10356344 10356411 TTTCA,TTTTA FAME3_MARCHF6 -chr5 146878728 146878759 GCT SCA12_PPP2R2B -chr5 177554490 177554531 GCC OPDM_FAM193B -chr6 13328477 13328603 GCC OPDM_TBC1D7 +chr5 146878728 146878759 CTG,GCT SCA12_PPP2R2B +chr5 177554490 177554531 CCG,GCC OPDM_FAM193B +chr6 13328477 13328603 CCG,GCC OPDM_TBC1D7 chr6 16327634 16327724 CTG SCA1_ATXN1 chr6 45422751 45422801 GCN CCD_RUNX2 chr6 170561907 170562017 CAG SCA17_TBP -chr7 27199679 27199732 NGC HFG_HOXA13-III -chr7 27199826 27199861 NGC HFG_HOXA13-II -chr7 27199925 27199966 NGC HFG_HOXA13-I -chr7 55887601 55887639 GCG FRA7A_ZNF713 -chr8 104588971 104588999 CGC OPDM1_LRP12 -chr8 118366813 118366918 TGAAA,TAAAA FAME1_SAMD12 +chr7 27199679 27199732 GCN,NGC HFG_HOXA13-III +chr7 27199826 27199861 GCN,NGC HFG_HOXA13-II +chr7 27199925 27199966 GCN,NGC HFG_HOXA13-I +chr7 55887601 55887639 CGG,GCG FRA7A_ZNF713 +chr8 104588971 104588999 CCG,CGC OPDM1_LRP12 +chr8 118366813 118366918 AAATG,TAAAA FAME1_SAMD12 chr9 27573485 27573546 GGCCCC FTDALS1_C9orf72 chr9 69037287 69037304 GAA FRDA_FXN -chr9 130681606 130681641 GCC HSAN-VIII_PRDM12 +chr9 130681606 130681641 CCG,GCC HSAN-VIII_PRDM12 chr9 133071178 133071737 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG MODY8_CEL -chr10 79826384 79826404 GGC OPML1_NUTM2B-AS1 +chr10 79826384 79826404 CGG,GGC OPML1_NUTM2B-AS1 chr11 119206290 119206323 CGG JBS_CBL chr12 6936717 6936775 CAG DRPLA_ATN1 -chr12 50505002 50505024 GGC FRA12A_DIP2B +chr12 50505002 50505024 CGG,GGC FRA12A_DIP2B chr12 111598950 111599019 CTG SCA2_ATXN2 -chr12 123533721 123533750 GGC OPDM4_RILPL1 +chr12 123533721 123533750 CGG,GGC OPDM4_RILPL1 chr13 70139384 70139429 CTG SCA8_ATXN8OS chr13 99985449 99985494 GCN HPE5_ZIC2 -chr13 102161575 102161726 GAA,GGA,GCA SCA27B_FGF14 +chr13 102161575 102161726 GAA,GGA,CAG SCA27B_FGF14 chr14 23321473 23321503 GCN OPMD_PABPN1 chr14 92071011 92071052 CTG SCA3_ATXN3 -chr15 22786678 22786703 GCG ALS1_NIPA1 +chr15 22786678 22786703 CGG,GCG ALS1_NIPA1 chr15 34419426 34419451 CT,CCTT,CCCTCT,TTTC aFTLD-U_GOLGA8A chr15 88569434 88569452 TTTG CHNG3_MIR7-2 -chr15 89333589 89333629 GCT CPEO_POLG -chr16 17470908 17470922 GCC DBQD2_XYLT1 +chr15 89333589 89333629 CTG,GCT CPEO_POLG +chr16 17470908 17470922 CCG,GCC DBQD2_XYLT1 chr16 24613439 24613532 TTTCA,TTTTA FAME6_TNRC6A chr16 66490397 66490466 TGGAA,TAGAA,AATAA SCA31_BEAN1 chr16 67842863 67842950 CAG SCA_THAP11 -chr16 72787695 72787758 GCC SCA4_ZFHX3 +chr16 72787695 72787758 CCG,GCC SCA4_ZFHX3 chr16 87604283 87604329 CTG HDL2_JPH3 chr17 17808359 17808460 TTTCA,TTTTA FAME8_RAI1 chr17 80147010 80147139 CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 @@ -69,12 +69,12 @@ chr21 43776443 43776479 CGCGGGGCGGGG EPM1_CSTB chr22 19766763 19766807 GCN TOF_TBX1 chr22 38317283 38317375 CCG EPM_CSNK1E chr22 45795355 45795424 ATTCT SCA10_ATXN10 -chrX 25013530 25013565 NGC PRTS_ARX -chrX 25013650 25013697 NGC EIEE1_ARX +chrX 25013530 25013565 GCN,NGC PRTS_ARX +chrX 25013650 25013697 GCN,NGC EIEE1_ARX chrX 31284558 31284605 TTC DMD_DMD -chrX 67545317 67545419 GCA SBMA_AR +chrX 67545317 67545419 CAG,GCA SBMA_AR chrX 71453055 71453131 AGAGGG XDP_TAF1 chrX 137566827 137566856 GCN VACTERLX_ZIC3 -chrX 140504317 140504361 NGC XLID_SOX3 +chrX 140504317 140504361 GCN,NGC XLID_SOX3 chrX 147912050 147912111 CGG FXS_FMR1 -chrX 148500605 148500753 GCC FRAXE_AFF2 +chrX 148500605 148500753 CCG,GCC FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.hg38.straglr.bed b/data/catalogs/STRchive-disease-loci.hg38.straglr.bed index 0e501fb8..16be7b31 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.straglr.bed +++ b/data/catalogs/STRchive-disease-loci.hg38.straglr.bed @@ -1,18 +1,18 @@ chr1 1435798 1435818 GGCGCGGAGC HMNR7_VWA1 HMNR7_VWA1 chr1 57367078 57367121 GAAAT SCA37_DAB1 SCA37_DAB1 -chr1 94418421 94418444 GCC OPDM5_ABCD3 OPDM5_ABCD3 -chr1 149390802 149390842 GGC NIID_NOTCH2NLC NIID_NOTCH2NLC +chr1 94418421 94418444 CCG OPDM5_ABCD3 OPDM5_ABCD3 +chr1 149390802 149390842 CGG NIID_NOTCH2NLC NIID_NOTCH2NLC chr1 156591765 156591783 GGGCC NME_NAXE NME_NAXE chr2 96197066 96197069 AAATG FAME2_STARD7 FAME2_STARD7 -chr2 100104798 100104824 GCC FRA2A_AFF3 FRA2A_AFF3 +chr2 100104798 100104824 CCG FRA2A_AFF3 FRA2A_AFF3 chr2 176093058 176093103 GCN SD5_HOXD13 SD5_HOXD13 -chr2 190880872 190880920 GCA GDPAG_GLS GDPAG_GLS +chr2 190880872 190880920 CAG GDPAG_GLS GDPAG_GLS chr3 63912684 63912715 CAG SCA7_ATXN7 SCA7_ATXN7 chr3 63912715 63912727 CCG SCA7_ATXN7 SCA7_ATXN7_CCG chr3 129172576 129172656 CAGG DM2_CNBP DM2_CNBP chr3 129172656 129172696 CAGA DM2_CNBP DM2_CNBP_CAGA chr3 129172696 129172734 CA DM2_CNBP DM2_CNBP_CA -chr3 138946019 138946062 NGC BPES_FOXL2 BPES_FOXL2 +chr3 138946019 138946062 GCN BPES_FOXL2 BPES_FOXL2 chr3 183712222 183712226 TTTCA FAME4_YEATS2 FAME4_YEATS2 chr4 3074876 3074927 CAG HD_HTT HD_HTT chr4 3074933 3074969 CCG HD_HTT HD_HTT_CCG @@ -20,35 +20,35 @@ chr4 39348479 39348483 AAGGG CANVAS_RFC1 CANVAS_RFC1 chr4 41745972 41746032 GCN CCHS_PHOX2B CCHS_PHOX2B chr4 159342611 159342618 TTTCA FAME7_RAPGEF2 FAME7_RAPGEF2 chr5 10356403 10356411 TTTCA FAME3_MARCHF6 FAME3_MARCHF6 -chr5 146878727 146878759 GCT SCA12_PPP2R2B SCA12_PPP2R2B -chr5 177554489 177554531 GCC OPDM_FAM193B OPDM_FAM193B -chr6 13328476 13328603 GCC OPDM_TBC1D7 OPDM_TBC1D7 +chr5 146878727 146878759 CTG SCA12_PPP2R2B SCA12_PPP2R2B +chr5 177554489 177554531 CCG OPDM_FAM193B OPDM_FAM193B +chr6 13328476 13328603 CCG OPDM_TBC1D7 OPDM_TBC1D7 chr6 16327633 16327724 CTG SCA1_ATXN1 SCA1_ATXN1 chr6 45422750 45422801 GCN CCD_RUNX2 CCD_RUNX2 chr6 170561906 170562017 CAG SCA17_TBP SCA17_TBP -chr7 27199678 27199732 NGC HFG_HOXA13-III HFG_HOXA13-III -chr7 27199825 27199861 NGC HFG_HOXA13-II HFG_HOXA13-II -chr7 27199924 27199966 NGC HFG_HOXA13-I HFG_HOXA13-I -chr7 55887600 55887639 GCG FRA7A_ZNF713 FRA7A_ZNF713 -chr8 104588970 104588999 CGC OPDM1_LRP12 OPDM1_LRP12 +chr7 27199678 27199732 GCN HFG_HOXA13-III HFG_HOXA13-III +chr7 27199825 27199861 GCN HFG_HOXA13-II HFG_HOXA13-II +chr7 27199924 27199966 GCN HFG_HOXA13-I HFG_HOXA13-I +chr7 55887600 55887639 CGG FRA7A_ZNF713 FRA7A_ZNF713 +chr8 104588970 104588999 CCG OPDM1_LRP12 OPDM1_LRP12 chr8 118366912 118366918 TGAAA FAME1_SAMD12 FAME1_SAMD12 chr9 27573484 27573546 GGCCCC FTDALS1_C9orf72 FTDALS1_C9orf72 chr9 69037270 69037286 A FRDA_FXN FRDA_FXN_A chr9 69037286 69037304 GAA FRDA_FXN FRDA_FXN -chr9 130681605 130681641 GCC HSAN-VIII_PRDM12 HSAN-VIII_PRDM12 -chr10 79826383 79826404 GGC OPML1_NUTM2B-AS1 OPML1_NUTM2B-AS1 +chr9 130681605 130681641 CCG HSAN-VIII_PRDM12 HSAN-VIII_PRDM12 +chr10 79826383 79826404 CGG OPML1_NUTM2B-AS1 OPML1_NUTM2B-AS1 chr11 119206289 119206323 CGG JBS_CBL JBS_CBL chr12 6936716 6936775 CAG DRPLA_ATN1 DRPLA_ATN1 -chr12 50505001 50505024 GGC FRA12A_DIP2B FRA12A_DIP2B +chr12 50505001 50505024 CGG FRA12A_DIP2B FRA12A_DIP2B chr12 111598949 111599019 CTG SCA2_ATXN2 SCA2_ATXN2 -chr12 123533720 123533750 GGC OPDM4_RILPL1 OPDM4_RILPL1 +chr12 123533720 123533750 CGG OPDM4_RILPL1 OPDM4_RILPL1 chr13 70139353 70139383 CTA SCA8_ATXN8OS SCA8_ATXN8OS_CTA chr13 70139383 70139429 CTG SCA8_ATXN8OS SCA8_ATXN8OS chr13 99985448 99985494 GCN HPE5_ZIC2 HPE5_ZIC2 chr13 102161574 102161726 GAA SCA27B_FGF14 SCA27B_FGF14 chr14 23321472 23321503 GCN OPMD_PABPN1 OPMD_PABPN1 chr14 92071010 92071052 CTG SCA3_ATXN3 SCA3_ATXN3 -chr15 22786677 22786703 GCG ALS1_NIPA1 ALS1_NIPA1 +chr15 22786677 22786703 CGG ALS1_NIPA1 ALS1_NIPA1 chr15 34419425 34419451 CT aFTLD-U_GOLGA8A aFTLD-U_GOLGA8A chr15 88569433 88569452 TTTG CHNG3_MIR7-2 CHNG3_MIR7-2 chr15 89333579 89333585 GCT CPEO_POLG CPEO_POLG_GCT @@ -58,7 +58,7 @@ chr16 17470907 17470922 GCC DBQD2_XYLT1 DBQD2_XYLT1 chr16 24613488 24613532 TTTCA FAME6_TNRC6A FAME6_TNRC6A chr16 66490396 66490466 TGGAA SCA31_BEAN1 SCA31_BEAN1 chr16 67842862 67842950 CAG SCA_THAP11 SCA_THAP11 -chr16 72787694 72787758 GCC SCA4_ZFHX3 SCA4_ZFHX3 +chr16 72787694 72787758 CCG SCA4_ZFHX3 SCA4_ZFHX3 chr16 87604282 87604329 CTG HDL2_JPH3 HDL2_JPH3 chr17 17808448 17808460 TTTCA FAME8_RAI1 FAME8_RAI1 chr17 80147009 80147139 CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 RCPS_EIF4A3 @@ -77,13 +77,13 @@ chr21 43776442 43776479 CGCGGGGCGGGG EPM1_CSTB EPM1_CSTB chr22 19766762 19766807 GCN TOF_TBX1 TOF_TBX1 chr22 38317282 38317375 CCG EPM_CSNK1E EPM_CSNK1E chr22 45795354 45795424 ATTCT SCA10_ATXN10 SCA10_ATXN10 -chrX 25013529 25013565 NGC PRTS_ARX PRTS_ARX -chrX 25013649 25013697 NGC EIEE1_ARX EIEE1_ARX +chrX 25013529 25013565 GCN PRTS_ARX PRTS_ARX +chrX 25013649 25013697 GCN EIEE1_ARX EIEE1_ARX chrX 31284557 31284605 TTC DMD_DMD DMD_DMD chrX 31284605 31284613 T DMD_DMD DMD_DMD_T -chrX 67545316 67545419 GCA SBMA_AR SBMA_AR +chrX 67545316 67545419 CAG SBMA_AR SBMA_AR chrX 71453054 71453131 AGAGGG XDP_TAF1 XDP_TAF1 chrX 137566826 137566856 GCN VACTERLX_ZIC3 VACTERLX_ZIC3 -chrX 140504316 140504361 NGC XLID_SOX3 XLID_SOX3 +chrX 140504316 140504361 GCN XLID_SOX3 XLID_SOX3 chrX 147912049 147912111 CGG FXS_FMR1 FXS_FMR1 -chrX 148500604 148500753 GCC FRAXE_AFF2 FRAXE_AFF2 +chrX 148500604 148500753 CCG FRAXE_AFF2 FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.hg38.stranger.json b/data/catalogs/STRchive-disease-loci.hg38.stranger.json index 0702fe80..eb5c5e63 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.stranger.json +++ b/data/catalogs/STRchive-disease-loci.hg38.stranger.json @@ -21,7 +21,7 @@ "PathologicRegion": "chr1:57367078-57367121", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GAAAT", + "DisplayRU": "AAATG", "Disease": "SCA37", "NormalMax": 30, "PathologicMin": 31, @@ -30,11 +30,11 @@ { "LocusId": "OPDM5_ABCD3", "ReferenceRegion": "chr1:94418421-94418444", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "OPDM5", "NormalMax": 44, "PathologicMin": 118, @@ -43,11 +43,11 @@ { "LocusId": "NIID_NOTCH2NLC", "ReferenceRegion": "chr1:149390802-149390842", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "NIID", "NormalMax": 37, "PathologicMin": 66, @@ -84,11 +84,11 @@ { "LocusId": "FRA2A_AFF3", "ReferenceRegion": "chr2:100104798-100104824", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "FRA2A", "NormalMax": 20, "PathologicMin": 300, @@ -110,11 +110,11 @@ { "LocusId": "GDPAG_GLS", "ReferenceRegion": "chr2:190880872-190880920", - "LocusStructure": "(GCA)*", + "LocusStructure": "(CAG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GCA", + "DisplayRU": "CAG", "Disease": "GDPAG", "NormalMax": 38, "PathologicMin": 680, @@ -153,11 +153,11 @@ { "LocusId": "BPES_FOXL2", "ReferenceRegion": "chr3:138946019-138946062", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD", "AR"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "BPES", "NormalMax": 14, "PathologicMin": 15, @@ -254,11 +254,11 @@ { "LocusId": "SCA12_PPP2R2B", "ReferenceRegion": "chr5:146878727-146878759", - "LocusStructure": "(GCT)*", + "LocusStructure": "(CTG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCT", + "DisplayRU": "CTG", "Disease": "SCA12", "NormalMax": 32, "PathologicMin": 51, @@ -267,11 +267,11 @@ { "LocusId": "OPDM_FAM193B", "ReferenceRegion": "chr5:177554489-177554531", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "OPDM", "NormalMax": 50, "PathologicMin": 194, @@ -280,11 +280,11 @@ { "LocusId": "OPDM_TBC1D7", "ReferenceRegion": "chr6:13328476-13328603", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "OPDM", "NormalMax": 60, "PathologicMin": 83, @@ -332,11 +332,11 @@ { "LocusId": "HFG_HOXA13-III", "ReferenceRegion": "chr7:27199678-27199732", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "HFG-III", "NormalMax": 18, "PathologicMin": 22, @@ -345,11 +345,11 @@ { "LocusId": "HFG_HOXA13-II", "ReferenceRegion": "chr7:27199825-27199861", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "HFG-II", "NormalMax": 12, "PathologicMin": 18, @@ -358,11 +358,11 @@ { "LocusId": "HFG_HOXA13-I", "ReferenceRegion": "chr7:27199924-27199966", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "HFG-I", "NormalMax": 14, "PathologicMin": 22, @@ -371,11 +371,11 @@ { "LocusId": "FRA7A_ZNF713", "ReferenceRegion": "chr7:55887600-55887639", - "LocusStructure": "(GCG)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCG", + "DisplayRU": "CGG", "Disease": "FRA7A", "NormalMax": 22, "PathologicMin": 450, @@ -384,11 +384,11 @@ { "LocusId": "OPDM1_LRP12", "ReferenceRegion": "chr8:104588970-104588999", - "LocusStructure": "(CGC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "CGC", + "DisplayRU": "CCG", "Disease": "OPDM1", "NormalMax": 45, "PathologicMin": 85, @@ -403,7 +403,7 @@ "PathologicRegion": "chr8:118366912-118366918", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TGAAA", + "DisplayRU": "AAATG", "Disease": "FAME1", "NormalMax": 104, "PathologicMin": 105, @@ -440,11 +440,11 @@ { "LocusId": "HSAN-VIII_PRDM12", "ReferenceRegion": "chr9:130681605-130681641", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "HSAN VIII", "NormalMax": 14, "PathologicMin": 18, @@ -453,11 +453,11 @@ { "LocusId": "OPML1_NUTM2B-AS1", "ReferenceRegion": "chr10:79826383-79826404", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "OPML1", "NormalMax": 16, "PathologicMin": 161, @@ -492,11 +492,11 @@ { "LocusId": "FRA12A_DIP2B", "ReferenceRegion": "chr12:50505001-50505024", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "FRA12A", "NormalMax": 23, "PathologicMin": 273, @@ -518,11 +518,11 @@ { "LocusId": "OPDM4_RILPL1", "ReferenceRegion": "chr12:123533720-123533750", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "OPDM4", "NormalMax": 16, "PathologicMin": 120, @@ -598,11 +598,11 @@ { "LocusId": "ALS1_NIPA1", "ReferenceRegion": "chr15:22786677-22786703", - "LocusStructure": "(GCG)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCG", + "DisplayRU": "CGG", "Disease": "ALS1", "NormalMax": 10, "PathologicMin": 11, @@ -645,7 +645,7 @@ "PathologicRegion": "chr15:89333588-89333629", "HGNCId": null, "InheritanceMode": [], - "DisplayRU": "GCT", + "DisplayRU": "CTG", "Disease": "CPEO", "NormalMax": 10, "PathologicMin": 11, @@ -660,7 +660,7 @@ "PathologicRegion": "chr16:17470907-17470922", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "DBQD2, BSS", "NormalMax": 20, "PathologicMin": 72, @@ -710,11 +710,11 @@ { "LocusId": "SCA4_ZFHX3", "ReferenceRegion": "chr16:72787694-72787758", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "SCA4", "NormalMax": 26, "PathologicMin": 46, @@ -939,11 +939,11 @@ { "LocusId": "PRTS_ARX", "ReferenceRegion": "chrX:25013529-25013565", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "PRTS", "NormalMax": 12, "PathologicMin": 20, @@ -952,11 +952,11 @@ { "LocusId": "EIEE1_ARX", "ReferenceRegion": "chrX:25013649-25013697", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "EIEE1", "NormalMax": 16, "PathologicMin": 17, @@ -980,11 +980,11 @@ { "LocusId": "SBMA_AR", "ReferenceRegion": "chrX:67545316-67545419", - "LocusStructure": "(GCA)*", + "LocusStructure": "(CAG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "GCA", + "DisplayRU": "CAG", "Disease": "SBMA", "NormalMax": 34, "PathologicMin": 38, @@ -1019,11 +1019,11 @@ { "LocusId": "XLID_SOX3", "ReferenceRegion": "chrX:140504316-140504361", - "LocusStructure": "(NGC)*", + "LocusStructure": "(GCN)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "NGC", + "DisplayRU": "GCN", "Disease": "XLID, PHPX", "NormalMax": 15, "PathologicMin": 22, @@ -1045,11 +1045,11 @@ { "LocusId": "FRAXE_AFF2", "ReferenceRegion": "chrX:148500604-148500753", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "FRAXE", "NormalMax": 39, "PathologicMin": 201, diff --git a/data/ref-alleles/ref-alleles.T2T-chm13.txt b/data/ref-alleles/ref-alleles.T2T-chm13.txt index 4a360874..bffc3a67 100644 --- a/data/ref-alleles/ref-alleles.T2T-chm13.txt +++ b/data/ref-alleles/ref-alleles.T2T-chm13.txt @@ -5,20 +5,20 @@ GGCTGGCGCT GGCGCGGAGC GGCGCGGAGC GCGGTGAGTG GGCTGGCGCT GGCGCGGAGC GGCGCGGAGC GCGGTGAGTG SCA37_DAB1 -chr1 57245935 57245973 AAAAT,GAAAT STRchive -chr1 57245935 57245973 AAAAT,GAAAT TRGT +chr1 57245935 57245973 AAAAT,AAATG STRchive +chr1 57245935 57245973 AAAAT,GAAAT,AAATG TRGT CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC OPDM5_ABCD3 -chr1 94266544 94266567 GCC STRchive -chr1 94266544 94266567 GCC TRGT +chr1 94266544 94266567 GCC,CCG STRchive +chr1 94266544 94266567 CCG,GCC TRGT CAGTAAGGTA GCC GCC GCC GCC GCC GCC GCC GC GTCCCCTCGC CAGTAAGGTA GCC GCC GCC GCC GCC GCC GCC GC GTCCCCTCGC NIID_NOTCH2NLC -chr1 148519695 148519738 GGC STRchive -chr1 148519695 148519738 GGC TRGT +chr1 148519695 148519738 GGC,CGG STRchive +chr1 148519695 148519738 CGG,GGC TRGT GATCTGCCCA GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC GGAGGA GGC GGC G ACCGAGAAGA GATCTGCCCA GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC GGAGGA GGC GGC G ACCGAGAAGA @@ -41,8 +41,8 @@ ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA GCCAAGCACA FRA2A_AFF3 -chr2 100563685 100563738 GCC STRchive -chr2 100563685 100563738 GCC TRGT +chr2 100563685 100563738 GCC,CCG STRchive +chr2 100563685 100563738 CCG,GCC TRGT CGCTGCTGCA GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC GGTGCTCTGC CGCTGCTGCA GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC GGTGCTCTGC @@ -53,8 +53,8 @@ TTCGGGGCGG GCG GCG GCG GCG GCA GCG GCG GCT GCG GCG GCG GCA GCG GCA GCC TCCGGCTT TTCGGGGCGG GCG GCG GCG GCG GCA GCG GCG GCT GCG GCG GCG GCA GCG GCA GCC TCCGGCTTTG GDPAG_GLS -chr2 191369982 191370024 GCA STRchive -chr2 191369982 191370024 GCA TRGT +chr2 191369982 191370024 GCA,CAG STRchive +chr2 191369982 191370024 CAG,GCA TRGT ATCCTAGCGC GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA CCCGCATCCG ATCCTAGCGC GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA CCCGCATCCG @@ -71,8 +71,8 @@ GTGAGACAGA CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGG CAGC CAGG CAGG GTGAGACAGA CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGG CA GC CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGA CAGA CAGA CAGA CAGA CAGA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CTGG CA GTAAT ACTCATTCAC BPES_FOXL2 -chr3 141687011 141687054 NGC STRchive -chr3 141687011 141687054 NGC TRGT +chr3 141687011 141687054 NGC,GCN STRchive +chr3 141687011 141687054 GCN,NGC TRGT CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG @@ -113,20 +113,20 @@ CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CCGAGATGGA SCA12_PPP2R2B -chr5 147414733 147414780 GCT STRchive -chr5 147414733 147414780 GCT TRGT +chr5 147414733 147414780 GCT,CTG STRchive +chr5 147414733 147414780 CTG,GCT TRGT CGCACTCGCA GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC AGGAGGCTGG CGCACTCGCA GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC AGGAGGCTGG OPDM_FAM193B -chr5 178096748 178096792 GCC STRchive -chr5 178096748 178096792 GCC TRGT +chr5 178096748 178096792 GCC,CCG STRchive +chr5 178096748 178096792 CCG,GCC TRGT TCGCTCCACA C GCC GC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC TACCGCTCCC TCGCTCCACA C GCC GC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC TACCGCTCCC OPDM_TBC1D7 -chr6 13201716 13201843 GCC STRchive -chr6 13201716 13201843 GCC TRGT +chr6 13201716 13201843 GCC,CCG STRchive +chr6 13201716 13201843 CCG,GCC TRGT GCGCGCGCCC AGACG GCC CGGGAGACAAAACTCAGC GCC GCT GCC GCT GCC GCT GCC GCC GCC GCC GGACGTGACATCAACTCCAGGTC GCC GGGCGGGC GCC GGGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT GCGCGCGCCC AGACG GCC CGGGAGACAAAACTCAGC GCC GCT GCC GCT GCC GCT GCC GCC GCC GCC GGACGTGACATCAACTCCAGGTC GCC GGGCGGGC GCC GGGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT @@ -149,38 +149,38 @@ AGCAACAAAG G CAG CAG CAG CAACAACAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG C AGCAACAAAG G CAG CAG CAG CAACAACAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA ACAGGCAGTG HFG_HOXA13-III -chr7 27335684 27335720 NGC STRchive -chr7 27335684 27335720 NGC TRGT +chr7 27335684 27335720 NGC,GCN STRchive +chr7 27335684 27335720 GCN,NGC TRGT CCGAGGACGA CGC GGC GGC GGC GGC GGC GGC TGC AGC GGC GGC AGC CGACGGGGGC CCGAGGACGA CGC GGC GGC GGC GGC GGC GGC TGC AGC GGC GGC AGC CGACGGGGGC HFG_HOXA13-II -chr7 27335813 27335849 NGC STRchive -chr7 27335813 27335849 NGC TRGT +chr7 27335813 27335849 NGC,GCN STRchive +chr7 27335813 27335849 GCN,NGC TRGT GGCACTGGTT GGC CGC GGC CGC CGC CGC AGC CGC GGC CGC CGC CGC CACCGAGAAG GGCACTGGTT GGC CGC GGC CGC CGC CGC AGC CGC GGC CGC CGC CGC CACCGAGAAG HFG_HOXA13-I -chr7 27335912 27335954 NGC STRchive -chr7 27335912 27335954 NGC TRGT +chr7 27335912 27335954 NGC,GCN STRchive +chr7 27335912 27335954 GCN,NGC TRGT CCCCGGCCCC GGC AGC CGC CGC CGC TGC AGC CGC TGC TGC AGC CGC CGC CGC CCCTTCCATG CCCCGGCCCC GGC AGC CGC CGC CGC TGC AGC CGC TGC TGC AGC CGC CGC CGC CCCTTCCATG FRA7A_ZNF713 -chr7 56047900 56047939 GCG STRchive -chr7 56047900 56047939 GCG TRGT +chr7 56047900 56047939 GCG,CGG STRchive +chr7 56047900 56047939 CGG,GCG TRGT CACCGCGGCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG TCAGGGGGCG CACCGCGGCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG TCAGGGGGCG OPDM1_LRP12 -chr8 105716409 105716441 CGC STRchive -chr8 105716409 105716441 CGC TRGT +chr8 105716409 105716441 CGC,CCG STRchive +chr8 105716409 105716441 CCG,CGC TRGT AGGTAGACGA CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CG AGCCACCGGC AGGTAGACGA CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CG AGCCACCGGC FAME1_SAMD12 -chr8 119495247 119495353 TAAAA,TGAAA STRchive -chr8 119495247 119495353 TAAAA,TGAAA TRGT +chr8 119495247 119495353 TAAAA,AAATG STRchive +chr8 119495247 119495353 TAAAA,TGAAA,AAATG TRGT ACTCTGTCTC AAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA ATGAACAAAA ACTCTGTCTC AAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA ATGAACAAAA @@ -197,8 +197,8 @@ AAAAATACAAAAAAAAAAAAAAAAAA GAA GAA GAA GAA GAA GAA GAA GAA GAA AAAAATACAA A A A A A A A A A A A A A A A A GAA GAA GAA GAA GAA GAA GAA GAA GAA AATAAAGAAA HSAN-VIII_PRDM12 -chr9 142886568 142886595 GCC STRchive -chr9 142886568 142886595 GCC TRGT +chr9 142886568 142886595 GCC,CCG STRchive +chr9 142886568 142886595 CCG,GCC TRGT CGCCCGCGCT C GCC GCC GCC GCC GCC GCC GCC GCC GC GCACCACCTG CGCCCGCGCT C GCC GCC GCC GCC GCC GCC GCC GCC GC GCACCACCTG @@ -209,8 +209,8 @@ GAGGCCACCC CTGTGCCCCCCACAGGGGACTCCGAGGCCACTCCCGTGCCCCCCACGGGTGACTCCGAGACCGCCCCCG GAGGCCACCC CTGTGCCCCCCACAGGGGACTCCGAGGCCACTCCCGTGCCCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG CGCCCCCCCCGTGCCGCCCACGGGTGACGCCGGGCCCCCCCCCCGTGCCGCCCACGGGTGACTCCGGCGCCCCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCCGTGACCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCTGTGCCCCCCACGGGTGACTCTGAGGCTGCCCCTGTGCCCCCCACAGATGACTCCAAGGAAGCT CAGATGCCTG OPML1_NUTM2B-AS1 -chr10 80695718 80695748 GGC STRchive -chr10 80695718 80695748 GGC TRGT +chr10 80695718 80695748 GGC,CGG STRchive +chr10 80695718 80695748 CGG,GGC TRGT GAAGCGGCGG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC CGGGAAGAAC GAAGCGGCGG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC CGGGAAGAAC @@ -227,8 +227,8 @@ CCATCACCAC CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CA TCACGGAAAC CCATCACCAC CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CA TCACGGAAAC FRA12A_DIP2B -chr12 50468095 50468118 GGC STRchive -chr12 50468095 50468118 GGC TRGT +chr12 50468095 50468118 GGC,CGG STRchive +chr12 50468095 50468118 CGG,GGC TRGT CTTTGCTCAT GGC GGC GGC GGC GGC GGC GGC GG TGCTGGTGGT CTTTGCTCAT GGC GGC GGC GGC GGC GGC GGC GG TGCTGGTGGT @@ -239,8 +239,8 @@ GCGGGCGGCG G CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTG CTG CTG TTG CTG CTG CTG GCGGGCGGCG G CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTG CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG CTG GGGCTTCAGC OPDM4_RILPL1 -chr12 123532573 123532603 GGC STRchive -chr12 123532573 123532603 GGC TRGT +chr12 123532573 123532603 GGC,CGG STRchive +chr12 123532573 123532603 CGG,GGC TRGT CTCCCGAGTG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC AGCGGGGAGG CTCCCGAGTG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC AGCGGGGAGG @@ -258,7 +258,7 @@ CCTGTCCCCA GCG GCG GCG GCA GCG GCG GCG GCG GCT GCG GCG GCG GCG GCC GCG G TGTCCGC SCA27B_FGF14 chr13 101377549 101377792 GAA STRchive -chr13 101377549 101377792 GAA,GGA,GCA TRGT +chr13 101377549 101377792 GAA,GGA,CAG TRGT AACTTTCTGT GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA AAATGTGTTT AACTTTCTGT GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA AAATGTGTTT @@ -275,8 +275,8 @@ ATAGGTCCCC CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C ATAGGTCCCC CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTTTTG CTG CTG TCTGAAACAT ALS1_NIPA1 -chr15 20458510 20458536 GCG STRchive -chr15 20458510 20458536 GCG TRGT +chr15 20458510 20458536 GCG,CGG STRchive +chr15 20458510 20458536 CGG,GCG TRGT AGCTGCGGCA GCG GCG GCG GCG GCG GCG GCG GCG GC CGGGGAGGGG AGCTGCGGCA GCG GCG GCG GCG GCG GCG GCG GCG GC CGGGGAGGGG @@ -293,16 +293,16 @@ ATTGTTTTTC TTTG TTTG TTTG TTTG TTT TTCTTTGAGA ATTGTTTTTC TTTG TTTG TTTG TTTG TTT TTCTTTGAGA CPEO_POLG -chr15 87088411 87088452 GCT STRchive -chr15 87088402 87088452 GCT,GTT TRGT +chr15 87088411 87088452 GCT,CTG STRchive +chr15 87088402 87088452 GCT,GTT,CTG TRGT AGCACTTGCGGCTGCTGAG GCT GCT GTT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC CGCCGCCGCT AGCACTTGCG GCT GCT GAG GCT GCT GTT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC CGCCGCCGCT DBQD2_XYLT1 -chr16 17477909 17478002 GCC STRchive -chr16 17477909 17478002 GCC TRGT -TCCCGCTCGG GCC GCC GCC GCC GCC GCC GCC TCGGCTC GCC GCTGCTCCTCCTCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC GCC GCC GCC GCC GCC TCCACCGCCG -TCCCGCTCGG GCC GCC GCC GCC GCC GCC GCC TCGGCTC GCC GCTGCTCCTCCTCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC GCC GCC GCC GCC GCC TCCACCGCCG +chr16 17477909 17478002 GCC,CCG STRchive +chr16 17477909 17478002 GCC,CCG TRGT +TCCCGCTCGG GCC GCC GCC GCC GCC GCC GCC TCGGCTC GCC GCTGCTCCTCCT CCG CCG CCG CCG CCG CCG CCG CCG CCG CT GCC GCC GCC GCC GCC GCC GCC TCCACCGCCG +TCCCGCTCGG GCC GCC GCC GCC GCC GCC GCC TCGGCTC GCC GCTGCTCCTCCT CCG CCG CCG CCG CCG CCG CCG CCG CCG CT GCC GCC GCC GCC GCC GCC GCC TCCACCGCCG FAME6_TNRC6A chr16 24890366 24890430 TTTTA,TTTCA STRchive @@ -323,10 +323,10 @@ CCCGCCGCAG G CAG CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAA CAG CAG CAG CAG CCCGCCGCAG G CAG CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG TCCTCACCCT SCA4_ZFHX3 -chr16 78605502 78605569 GCC STRchive -chr16 78605502 78605569 GCC TRGT -AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC GCC ACC GCC GCC GCC GCC GCC ACT GCC ACC GCC GCC GCC GCC G GTGGGGACGT -AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC GCC ACC GCC GCC GCC GCC GCC ACT GCC ACC GCC GCC GCC GCC G GTGGGGACGT +chr16 78605502 78605569 GCC,CCG STRchive +chr16 78605502 78605569 CCG,GCC TRGT +AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC GCC A CCG CCG CCG CCG CCG CCACT GCC A CCG CCG CCG CCG CCG GTGGGGACGT +AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC GCC A CCG CCG CCG CCG CCG CCACT GCC A CCG CCG CCG CCG CCG GTGGGGACGT HDL2_JPH3 chr16 93675723 93675776 CTG STRchive @@ -425,14 +425,14 @@ ACTAGAATGG ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATT ACTAGAATGG ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT TTTTGAGATG PRTS_ARX -chrX 24597766 24597802 NGC STRchive -chrX 24597766 24597802 NGC TRGT +chrX 24597766 24597802 NGC,GCN STRchive +chrX 24597766 24597802 GCN,NGC TRGT GCGTGTCCCA GGC CGC GGC GGC CGC GGC CGC GGC TGC CGC GGC GGC CCCTGCGCCG GCGTGTCCCA GGC CGC GGC GGC CGC GGC CGC GGC TGC CGC GGC GGC CCCTGCGCCG EIEE1_ARX -chrX 24597886 24597934 NGC STRchive -chrX 24597886 24597934 NGC TRGT +chrX 24597886 24597934 NGC,GCN STRchive +chrX 24597886 24597934 GCN,NGC TRGT CCGTGGCCGT GGC GGC CGC TGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC TGC CGC ACCCTGAAGG CCGTGGCCGT GGC GGC CGC TGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC TGC CGC ACCCTGAAGG @@ -443,10 +443,10 @@ AACGAACTGT TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC T AACGAACTGT TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC T T T T T T T T GGCAGAGGTG SBMA_AR -chrX 65975147 65975250 GCA STRchive -chrX 65975147 65975250 GCA TRGT -TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCCCAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG -TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCCCAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG +chrX 65975147 65975250 GCA,CAG STRchive +chrX 65975147 65975250 CAG,GCA TRGT +TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCC CAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG +TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCC CAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG XDP_TAF1 chrX 69887153 69887230 AGAGGG STRchive @@ -461,8 +461,8 @@ CTCAACCCAC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC TTCAAGCTGA CTCAACCCAC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC TTCAAGCTGA XLID_SOX3 -chrX 138816203 138816248 NGC STRchive -chrX 138816203 138816248 NGC TRGT +chrX 138816203 138816248 NGC,GCN STRchive +chrX 138816203 138816248 GCN,NGC TRGT CCGGACTGCT GGC GGC AGC GGC TGC GGC CGC GGC AGC GGC GGC GGC GGC CGC GGC ACCGGGAGGC CCGGACTGCT GGC GGC AGC GGC TGC GGC CGC GGC AGC GGC GGC GGC GGC CGC GGC ACCGGGAGGC @@ -473,8 +473,8 @@ GTGCGGCAGC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG GTGCGGCAGC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG CGG CGG CGG C TGGGCCTCGA FRAXE_AFF2 -chrX 146765190 146765342 GCC STRchive -chrX 146765190 146765342 GCC TRGT -GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCGGCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC CCC GCC GCC GCT GCC GCC GCC G GCCCGCAGCC -GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCGGCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC CCC GCC GCC GCT GCC GCC GCC G GCCCGCAGCC +chrX 146765190 146765342 GCC,CCG STRchive +chrX 146765190 146765342 CCG,GCC TRGT +GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCG GCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC C CCG CCG CCG CT GCC GCC GCC G GCCCGCAGCC +GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCG GCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC C CCG CCG CCG CT GCC GCC GCC G GCCCGCAGCC diff --git a/data/ref-alleles/ref-alleles.hg19.txt b/data/ref-alleles/ref-alleles.hg19.txt index 36022cd4..9100dfb3 100644 --- a/data/ref-alleles/ref-alleles.hg19.txt +++ b/data/ref-alleles/ref-alleles.hg19.txt @@ -5,20 +5,20 @@ GGCTGGCGCT GGCGCGGAGC GGCGCGGAGC GCGGTGAGTG GGCTGGCGCT GGCGCGGAGC GGCGCGGAGC GCGGTGAGTG SCA37_DAB1 -chr1 57832715 57832793 AAAAT,GAAAT STRchive -chr1 57832715 57832793 AAAAT,GAAAT TRGT +chr1 57832715 57832793 AAAAT,AAATG STRchive +chr1 57832715 57832793 AAAAT,GAAAT,AAATG TRGT CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC OPDM5_ABCD3 -chr1 94883977 94884000 GCC STRchive -chr1 94883977 94884000 GCC TRGT +chr1 94883977 94884000 GCC,CCG STRchive +chr1 94883977 94884000 CCG,GCC TRGT CAGTAAGGTA GCC GCC GCC GCC GCC GCC GCC GC GTCCCCTCGC CAGTAAGGTA GCC GCC GCC GCC GCC GCC GCC GC GTCCCCTCGC NIID_NOTCH2NLC -chr1 145209323 145209354 GGC STRchive -chr1 145209323 145209354 GGC TRGT +chr1 145209323 145209354 GGC,CGG STRchive +chr1 145209323 145209354 CGG,GGC TRGT GATCTGCCCA GGC GGC GGC GGC GGC GGC GGC GGAGGA GGC G ACCGAGAAGA GATCTGCCCA GGC GGC GGC GGC GGC GGC GGC GGAGGA GGC G ACCGAGAAGA @@ -41,8 +41,8 @@ ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA GCCAAGCACA FRA2A_AFF3 -chr2 100721260 100721286 GCC STRchive -chr2 100721260 100721286 GCC TRGT +chr2 100721260 100721286 GCC,CCG STRchive +chr2 100721260 100721286 CCG,GCC TRGT CGCTGCTGCA GCC GCC GCC GCC GCC GCC GCC GCC GC GGTGCTCTGC CGCTGCTGCA GCC GCC GCC GCC GCC GCC GCC GCC GC GGTGCTCTGC @@ -53,8 +53,8 @@ TTCGGGGCGG GCG GCG GCG GCG GCA GCG GCG GCT GCG GCG GCG GCG GCG GCA GCC TCCGGCTT TTCGGGGCGG GCG GCG GCG GCG GCA GCG GCG GCT GCG GCG GCG GCG GCG GCA GCC TCCGGCTTTG GDPAG_GLS -chr2 191745598 191745646 GCA STRchive -chr2 191745598 191745646 GCA TRGT +chr2 191745598 191745646 GCA,CAG STRchive +chr2 191745598 191745646 CAG,GCA TRGT ATCCTAGCGC GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA CCCGCATCCG ATCCTAGCGC GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA CCCGCATCCG @@ -71,8 +71,8 @@ GTGAGACAGA CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGG CAGA CAGG CAGC CAGG CAGG GTGAGACAGA CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGG CAGA CAGG CA GC CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGA CAGA CAGA CAGA CAGA CAGA CAGA CAGA CAGA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CT GGCAGTAATA BPES_FOXL2 -chr3 138664861 138664904 NGC STRchive -chr3 138664861 138664904 NGC TRGT +chr3 138664861 138664904 NGC,GCN STRchive +chr3 138664861 138664904 GCN,NGC TRGT CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG @@ -113,20 +113,20 @@ CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CCGAGATGGA SCA12_PPP2R2B -chr5 146258290 146258322 GCT STRchive -chr5 146258290 146258322 GCT TRGT +chr5 146258290 146258322 GCT,CTG STRchive +chr5 146258290 146258322 CTG,GCT TRGT CGCACTCGCA GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC AGGAGGCTGG CGCACTCGCA GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC AGGAGGCTGG OPDM_FAM193B -chr5 176981490 176981532 GCC STRchive -chr5 176981490 176981532 GCC TRGT +chr5 176981490 176981532 GCC,CCG STRchive +chr5 176981490 176981532 CCG,GCC TRGT TCGCTCCACA C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC TACCGCTCCC TCGCTCCACA C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC TACCGCTCCC OPDM_TBC1D7 -chr6 13328708 13328835 GCC STRchive -chr6 13328708 13328835 GCC TRGT +chr6 13328708 13328835 GCC,CCG STRchive +chr6 13328708 13328835 CCG,GCC TRGT GCGCGCGCCC AGACG GCC CGGGAGACAAAACTCAGC GCC GCT GCC GCT GCC GCT GCC GCC GCC GCC GGACGTGACATCAACTCCAGGTC GCC GGGCGGGC GCC GGGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT GCGCGCGCCC AGACG GCC CGGGAGACAAAACTCAGC GCC GCT GCC GCT GCC GCT GCC GCC GCC GCC GGACGTGACATCAACTCCAGGTC GCC GGGCGGGC GCC GGGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT @@ -149,38 +149,38 @@ AGCAACAAAG G CAG CAG CAG CAACAACAA CAG CAG CAG CAG CAG CAG CAG CAG CAA CAG CAA C AGCAACAAAG G CAG CAG CAG CAACAACAA CAG CAG CAG CAG CAG CAG CAG CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA ACAGGCAGTG HFG_HOXA13-III -chr7 27239297 27239351 NGC STRchive -chr7 27239297 27239351 NGC TRGT +chr7 27239297 27239351 NGC,GCN STRchive +chr7 27239297 27239351 GCN,NGC TRGT CCGAGGACGA CGC GGC GGC GGC GGC GGC GGC TGC AGC GGC AGC CGC GGC AGC AGC GGC GGC AGC CGACGGGGGC CCGAGGACGA CGC GGC GGC GGC GGC GGC GGC TGC AGC GGC AGC CGC GGC AGC AGC GGC GGC AGC CGACGGGGGC HFG_HOXA13-II -chr7 27239444 27239480 NGC STRchive -chr7 27239444 27239480 NGC TRGT +chr7 27239444 27239480 NGC,GCN STRchive +chr7 27239444 27239480 GCN,NGC TRGT GGCACTGGTT GGC CGC GGC CGC CGC CGC AGC CGC GGC CGC CGC CGC CACCGAGAAG GGCACTGGTT GGC CGC GGC CGC CGC CGC AGC CGC GGC CGC CGC CGC CACCGAGAAG HFG_HOXA13-I -chr7 27239543 27239585 NGC STRchive -chr7 27239543 27239585 NGC TRGT +chr7 27239543 27239585 NGC,GCN STRchive +chr7 27239543 27239585 GCN,NGC TRGT CCCCGGCCCC GGC AGC CGC CGC CGC TGC AGC CGC TGC TGC AGC CGC CGC CGC CCCTTCCATG CCCCGGCCCC GGC AGC CGC CGC CGC TGC AGC CGC TGC TGC AGC CGC CGC CGC CCCTTCCATG FRA7A_ZNF713 -chr7 55955293 55955332 GCG STRchive -chr7 55955293 55955332 GCG TRGT +chr7 55955293 55955332 GCG,CGG STRchive +chr7 55955293 55955332 CGG,GCG TRGT CGGGTCCACC GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG TCAGGGGGCG CGGGTCCACC GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG TCAGGGGGCG OPDM1_LRP12 -chr8 105601198 105601227 CGC STRchive -chr8 105601198 105601227 CGC TRGT +chr8 105601198 105601227 CGC,CCG STRchive +chr8 105601198 105601227 CCG,CGC TRGT ACGACGCCGA CGC CGC CGC CGC CGC CGC CGC CGC CGC CG AGCCACCGGC ACGACGCCGA CGC CGC CGC CGC CGC CGC CGC CGC CGC CG AGCCACCGGC FAME1_SAMD12 -chr8 119379051 119379157 TAAAA,TGAAA STRchive -chr8 119379051 119379157 TAAAA,TGAAA TRGT +chr8 119379051 119379157 TAAAA,AAATG STRchive +chr8 119379051 119379157 TAAAA,TGAAA,AAATG TRGT ACTCTGTCTC AAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA ATGAACAAAA ACTCTGTCTC AAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA ATGAACAAAA @@ -197,8 +197,8 @@ TAAAAAATACAAAAAAAAAAAAAAAA GAA GAA GAA GAA GAA GAA AATAAAGAAA TAAAAAATAC A A A A A A A A A A A A A A A A GAA GAA GAA GAA GAA GAA AATAAAGAAA HSAN-VIII_PRDM12 -chr9 133556992 133557028 GCC STRchive -chr9 133556992 133557028 GCC TRGT +chr9 133556992 133557028 GCC,CCG STRchive +chr9 133556992 133557028 CCG,GCC TRGT CGCCCGCGCT C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC GCACCACCTG CGCCCGCGCT C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC GCACCACCTG @@ -209,8 +209,8 @@ GAGGCCACCC CTGTGCCCCCCACAGGGGACTCCGAGGCCACTCCCGTGCCCCCCACGGGTGACTCCGAGACCGCCCCCG GAGGCCACCC CTGTGCCCCCCACAGGGGACTCCGAGGCCACTCCCGTGCCCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG CGCCCCCCCCGTGCCGCCCACGGGTGACGCCGGGCCCCCCCCCGTGCCGCCCACGGGTGACTCCGGCGCCCCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCCGTGACCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCTGTGCCCCCCACGGGTGACTCTGAGGCTGCCCCTGTGCCCCCCACAGATGACTCCAAGGAAGCT CAGATGCCTG OPML1_NUTM2B-AS1 -chr10 81586139 81586160 GGC STRchive -chr10 81586139 81586160 GGC TRGT +chr10 81586139 81586160 GGC,CGG STRchive +chr10 81586139 81586160 CGG,GGC TRGT GAAGCGGCGG GGC GGC GGC GGC GGC GGC GGC CGGGAAGAAC GAAGCGGCGG GGC GGC GGC GGC GGC GGC GGC CGGGAAGAAC @@ -227,8 +227,8 @@ CCATCACCAC CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG C CCATCACCAC CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA TCACGGAAAC FRA12A_DIP2B -chr12 50898784 50898807 GGC STRchive -chr12 50898784 50898807 GGC TRGT +chr12 50898784 50898807 GGC,CGG STRchive +chr12 50898784 50898807 CGG,GGC TRGT CTTTGCTCAT GGC GGC GGC GGC GGC GGC GGC GG TGCTGGTGGT CTTTGCTCAT GGC GGC GGC GGC GGC GGC GGC GG TGCTGGTGGT @@ -239,8 +239,8 @@ GCGGGCGGCG G CTG CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG GCGGGCGGCG G CTG CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG GGGCTTCAGC OPDM4_RILPL1 -chr12 124018267 124018297 GGC STRchive -chr12 124018267 124018297 GGC TRGT +chr12 124018267 124018297 GGC,CGG STRchive +chr12 124018267 124018297 CGG,GGC TRGT CTCCCGAGTG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC AGCGGGGAGG CTCCCGAGTG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC AGCGGGGAGG @@ -258,7 +258,7 @@ CCTGTCCCCA GCG GCG GCG GCA GCG GCG GCG GCG GCT GCG GCG GCG GCG GCC GCG G TGTCCGC SCA27B_FGF14 chr13 102813924 102814076 GAA STRchive -chr13 102813924 102814076 GAA,GGA,GCA TRGT +chr13 102813924 102814076 GAA,GGA,CAG TRGT TGAAGAAAGA AA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA TAGAAATGTG TGAAGAAAGA AA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA TAGAAATGTG @@ -275,8 +275,8 @@ ATAGGTCCCC CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTTTTG CTG CTG TCTGAAACAT ATAGGTCCCC CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTTTTG CTG CTG TCTGAAACAT ALS1_NIPA1 -chr15 23086363 23086389 GCG STRchive -chr15 23086363 23086389 GCG TRGT +chr15 23086363 23086389 GCG,CGG STRchive +chr15 23086363 23086389 CGG,GCG TRGT CCCCCTCCCC GGCCGCCGCCGCCGCCGCCGCCGCCG CTGCCGCAGC CCCCCTCCCC GGCCGCCGCCGCCGCCGCCGCCGCCG CTGCCGCAGC @@ -293,14 +293,14 @@ ATTGTTTTTC TTTG TTTG TTTG TTTG TTT TTCTTTGAGA ATTGTTTTTC TTTG TTTG TTTG TTTG TTT TTCTTTGAGA CPEO_POLG -chr15 89876819 89876860 GCT STRchive -chr15 89876810 89876860 GCT,GTT TRGT +chr15 89876819 89876860 GCT,CTG STRchive +chr15 89876810 89876860 GCT,GTT,CTG TRGT AGCACTTGCGGCTGCTGAG GCT GCT GTT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC CGCCGCCGCT AGCACTTGCG GCT GCT GAG GCT GCT GTT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC CGCCGCCGCT DBQD2_XYLT1 -chr16 17564764 17564779 GCC STRchive -chr16 17564764 17564779 GCC TRGT +chr16 17564764 17564779 GCC,CCG STRchive +chr16 17564764 17564779 GCC,CCG TRGT TCCCGCTCGG GCC GCC GCC GCC GCC CCCCTCCCCA TCCCGCTCGG GCC GCC GCC GCC GCC CCCCTCCCCA @@ -323,10 +323,10 @@ CCCGCCGCAG G CAG CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAA CAG CAG CAG CAG CCCGCCGCAG G CAG CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG TCCTCACCCT SCA4_ZFHX3 -chr16 72821593 72821657 GCC STRchive -chr16 72821593 72821657 GCC TRGT -AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC ACC GCC GCC GCC GCC GCC ACT GCC ACC GCC GCC GCC GCC G GTGGGGACGT -AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC ACC GCC GCC GCC GCC GCC ACT GCC ACC GCC GCC GCC GCC G GTGGGGACGT +chr16 72821593 72821657 GCC,CCG STRchive +chr16 72821593 72821657 CCG,GCC TRGT +AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC A CCG CCG CCG CCG CCG CCACT GCC A CCG CCG CCG CCG CCG GTGGGGACGT +AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC A CCG CCG CCG CCG CCG CCACT GCC A CCG CCG CCG CCG CCG GTGGGGACGT HDL2_JPH3 chr16 87637888 87637935 CTG STRchive @@ -425,14 +425,14 @@ ACTAGAATGG ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATT ACTAGAATGG ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT TTTTGAGATG PRTS_ARX -chrX 25031646 25031682 NGC STRchive -chrX 25031646 25031682 NGC TRGT +chrX 25031646 25031682 NGC,GCN STRchive +chrX 25031646 25031682 GCN,NGC TRGT GCGTGTCCCA GGC CGC GGC GGC CGC GGC CGC GGC TGC CGC GGC GGC CCCTGCGCCG GCGTGTCCCA GGC CGC GGC GGC CGC GGC CGC GGC TGC CGC GGC GGC CCCTGCGCCG EIEE1_ARX -chrX 25031766 25031814 NGC STRchive -chrX 25031766 25031814 NGC TRGT +chrX 25031766 25031814 NGC,GCN STRchive +chrX 25031766 25031814 GCN,NGC TRGT CCGTGGCCGT GGC GGC CGC TGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC TGC CGC ACCCTGAAGG CCGTGGCCGT GGC GGC CGC TGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC TGC CGC ACCCTGAAGG @@ -443,10 +443,10 @@ AACGAACTGT TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC AACGAACTGT TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC T T T T T T T T GGCAGAGGTG SBMA_AR -chrX 66765158 66765261 GCA STRchive -chrX 66765158 66765261 GCA TRGT -TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCCCAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG -TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCCCAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG +chrX 66765158 66765261 GCA,CAG STRchive +chrX 66765158 66765261 CAG,GCA TRGT +TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCC CAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG +TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCC CAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG XDP_TAF1 chrX 70672904 70672981 AGAGGG STRchive @@ -461,8 +461,8 @@ CTCAACCCAC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC TTCAAGCTGA CTCAACCCAC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC TTCAAGCTGA XLID_SOX3 -chrX 139586481 139586526 NGC STRchive -chrX 139586481 139586526 NGC TRGT +chrX 139586481 139586526 NGC,GCN STRchive +chrX 139586481 139586526 GCN,NGC TRGT CCGGACTGCT GGC GGC AGC GGC TGC GGC CGC GGC AGC GGC GGC GGC GGC CGC GGC ACCGGGAGGC CCGGACTGCT GGC GGC AGC GGC TGC GGC CGC GGC AGC GGC GGC GGC GGC CGC GGC ACCGGGAGGC @@ -473,8 +473,8 @@ GTGCGGCAGC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG GTGCGGCAGC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG CGG CGG CGG C TGGGCCTCGA FRAXE_AFF2 -chrX 147582124 147582273 GCC STRchive -chrX 147582124 147582273 GCC TRGT -GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCGGCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC CCC GCC GCC GCT GCC GCC GCC G GCCCGCAGCC -GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCGGCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC CCC GCC GCC GCT GCC GCC GCC G GCCCGCAGCC +chrX 147582124 147582273 GCC,CCG STRchive +chrX 147582124 147582273 CCG,GCC TRGT +GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCG GCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC C CCG CCG CCG CT GCC GCC GCC G GCCCGCAGCC +GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCG GCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC C CCG CCG CCG CT GCC GCC GCC G GCCCGCAGCC diff --git a/data/ref-alleles/ref-alleles.hg38.txt b/data/ref-alleles/ref-alleles.hg38.txt index b9c4bd8f..7fe83a7b 100644 --- a/data/ref-alleles/ref-alleles.hg38.txt +++ b/data/ref-alleles/ref-alleles.hg38.txt @@ -5,20 +5,20 @@ GGCTGGCGCT GGCGCGGAGC GGCGCGGAGC GCGGTGAGTG GGCTGGCGCT GGCGCGGAGC GGCGCGGAGC GCGGTGAGTG SCA37_DAB1 -chr1 57367043 57367121 AAAAT,GAAAT STRchive -chr1 57367043 57367121 AAAAT,GAAAT TRGT +chr1 57367043 57367121 AAAAT,AAATG STRchive +chr1 57367043 57367121 AAAAT,GAAAT,AAATG TRGT CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC OPDM5_ABCD3 -chr1 94418421 94418444 GCC STRchive -chr1 94418421 94418444 GCC TRGT +chr1 94418421 94418444 GCC,CCG STRchive +chr1 94418421 94418444 CCG,GCC TRGT CAGTAAGGTA GCC GCC GCC GCC GCC GCC GCC GC GTCCCCTCGC CAGTAAGGTA GCC GCC GCC GCC GCC GCC GCC GC GTCCCCTCGC NIID_NOTCH2NLC -chr1 149390802 149390842 GGC STRchive -chr1 149390802 149390842 GGC TRGT +chr1 149390802 149390842 GGC,CGG STRchive +chr1 149390802 149390842 CGG,GGC TRGT GATCTGCCCA GGC GGC GGC GGC GGC GGC GGC GGC GGC GGAGGA GGC GGC G ACCGAGAAGA GATCTGCCCA GGC GGC GGC GGC GGC GGC GGC GGC GGC GGAGGA GGC GGC G ACCGAGAAGA @@ -41,8 +41,8 @@ ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA GCCAAGCACA FRA2A_AFF3 -chr2 100104798 100104824 GCC STRchive -chr2 100104798 100104824 GCC TRGT +chr2 100104798 100104824 GCC,CCG STRchive +chr2 100104798 100104824 CCG,GCC TRGT CGCTGCTGCA GCC GCC GCC GCC GCC GCC GCC GCC GC GGTGCTCTGC CGCTGCTGCA GCC GCC GCC GCC GCC GCC GCC GCC GC GGTGCTCTGC @@ -53,8 +53,8 @@ TTCGGGGCGG GCG GCG GCG GCG GCA GCG GCG GCT GCG GCG GCG GCG GCG GCA GCC TCCGGCTT TTCGGGGCGG GCG GCG GCG GCG GCA GCG GCG GCT GCG GCG GCG GCG GCG GCA GCC TCCGGCTTTG GDPAG_GLS -chr2 190880872 190880920 GCA STRchive -chr2 190880872 190880920 GCA TRGT +chr2 190880872 190880920 GCA,CAG STRchive +chr2 190880872 190880920 CAG,GCA TRGT ATCCTAGCGC GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA CCCGCATCCG ATCCTAGCGC GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA CCCGCATCCG @@ -71,8 +71,8 @@ GTGAGACAGA CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGG CAGA CAGG CAGC CAGG CAGG GTGAGACAGA CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGG CAGA CAGG CA GC CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGA CAGA CAGA CAGA CAGA CAGA CAGA CAGA CAGA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CT GGCAGTAATA BPES_FOXL2 -chr3 138946019 138946062 NGC STRchive -chr3 138946019 138946062 NGC TRGT +chr3 138946019 138946062 NGC,GCN STRchive +chr3 138946019 138946062 GCN,NGC TRGT CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG @@ -113,20 +113,20 @@ CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CCGAGATGGA SCA12_PPP2R2B -chr5 146878727 146878759 GCT STRchive -chr5 146878727 146878759 GCT TRGT +chr5 146878727 146878759 GCT,CTG STRchive +chr5 146878727 146878759 CTG,GCT TRGT CGCACTCGCA GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC AGGAGGCTGG CGCACTCGCA GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC AGGAGGCTGG OPDM_FAM193B -chr5 177554489 177554531 GCC STRchive -chr5 177554489 177554531 GCC TRGT +chr5 177554489 177554531 GCC,CCG STRchive +chr5 177554489 177554531 CCG,GCC TRGT TCGCTCCACA C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC TACCGCTCCC TCGCTCCACA C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC TACCGCTCCC OPDM_TBC1D7 -chr6 13328476 13328603 GCC STRchive -chr6 13328476 13328603 GCC TRGT +chr6 13328476 13328603 GCC,CCG STRchive +chr6 13328476 13328603 CCG,GCC TRGT GCGCGCGCCC AGACG GCC CGGGAGACAAAACTCAGC GCC GCT GCC GCT GCC GCT GCC GCC GCC GCC GGACGTGACATCAACTCCAGGTC GCC GGGCGGGC GCC GGGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT GCGCGCGCCC AGACG GCC CGGGAGACAAAACTCAGC GCC GCT GCC GCT GCC GCT GCC GCC GCC GCC GGACGTGACATCAACTCCAGGTC GCC GGGCGGGC GCC GGGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT @@ -149,38 +149,38 @@ AGCAACAAAG G CAG CAG CAG CAACAACAA CAG CAG CAG CAG CAG CAG CAG CAG CAA CAG CAA C AGCAACAAAG G CAG CAG CAG CAACAACAA CAG CAG CAG CAG CAG CAG CAG CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA ACAGGCAGTG HFG_HOXA13-III -chr7 27199678 27199732 NGC STRchive -chr7 27199678 27199732 NGC TRGT +chr7 27199678 27199732 NGC,GCN STRchive +chr7 27199678 27199732 GCN,NGC TRGT CCGAGGACGA CGC GGC GGC GGC GGC GGC GGC TGC AGC GGC AGC CGC GGC AGC AGC GGC GGC AGC CGACGGGGGC CCGAGGACGA CGC GGC GGC GGC GGC GGC GGC TGC AGC GGC AGC CGC GGC AGC AGC GGC GGC AGC CGACGGGGGC HFG_HOXA13-II -chr7 27199825 27199861 NGC STRchive -chr7 27199825 27199861 NGC TRGT +chr7 27199825 27199861 NGC,GCN STRchive +chr7 27199825 27199861 GCN,NGC TRGT GGCACTGGTT GGC CGC GGC CGC CGC CGC AGC CGC GGC CGC CGC CGC CACCGAGAAG GGCACTGGTT GGC CGC GGC CGC CGC CGC AGC CGC GGC CGC CGC CGC CACCGAGAAG HFG_HOXA13-I -chr7 27199924 27199966 NGC STRchive -chr7 27199924 27199966 NGC TRGT +chr7 27199924 27199966 NGC,GCN STRchive +chr7 27199924 27199966 GCN,NGC TRGT CCCCGGCCCC GGC AGC CGC CGC CGC TGC AGC CGC TGC TGC AGC CGC CGC CGC CCCTTCCATG CCCCGGCCCC GGC AGC CGC CGC CGC TGC AGC CGC TGC TGC AGC CGC CGC CGC CCCTTCCATG FRA7A_ZNF713 -chr7 55887600 55887639 GCG STRchive -chr7 55887600 55887639 GCG TRGT +chr7 55887600 55887639 GCG,CGG STRchive +chr7 55887600 55887639 CGG,GCG TRGT CGGGTCCACC GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG TCAGGGGGCG CGGGTCCACC GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG TCAGGGGGCG OPDM1_LRP12 -chr8 104588970 104588999 CGC STRchive -chr8 104588970 104588999 CGC TRGT +chr8 104588970 104588999 CGC,CCG STRchive +chr8 104588970 104588999 CCG,CGC TRGT ACGACGCCGA CGC CGC CGC CGC CGC CGC CGC CGC CGC CG AGCCACCGGC ACGACGCCGA CGC CGC CGC CGC CGC CGC CGC CGC CGC CG AGCCACCGGC FAME1_SAMD12 -chr8 118366812 118366918 TAAAA,TGAAA STRchive -chr8 118366812 118366918 TAAAA,TGAAA TRGT +chr8 118366812 118366918 TAAAA,AAATG STRchive +chr8 118366812 118366918 TAAAA,TGAAA,AAATG TRGT ACTCTGTCTC AAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA ATGAACAAAA ACTCTGTCTC AAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA ATGAACAAAA @@ -197,8 +197,8 @@ TAAAAAATACAAAAAAAAAAAAAAAA GAA GAA GAA GAA GAA GAA AATAAAGAAA TAAAAAATAC A A A A A A A A A A A A A A A A GAA GAA GAA GAA GAA GAA AATAAAGAAA HSAN-VIII_PRDM12 -chr9 130681605 130681641 GCC STRchive -chr9 130681605 130681641 GCC TRGT +chr9 130681605 130681641 GCC,CCG STRchive +chr9 130681605 130681641 CCG,GCC TRGT CGCCCGCGCT C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC GCACCACCTG CGCCCGCGCT C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC GCACCACCTG @@ -209,8 +209,8 @@ GAGGCCACCC CTGTGCCCCCCACAGGGGACTCCGAGGCCACTCCCGTGCCCCCCACGGGTGACTCCGAGACCGCCCCCG GAGGCCACCC CTGTGCCCCCCACAGGGGACTCCGAGGCCACTCCCGTGCCCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG CGCCCCCCCCGTGCCGCCCACGGGTGACGCCGGGCCCCCCCCCGTGCCGCCCACGGGTGACTCCGGCGCCCCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCCGTGACCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCTGTGCCCCCCACGGGTGACTCTGAGGCTGCCCCTGTGCCCCCCACAGATGACTCCAAGGAAGCT CAGATGCCTG OPML1_NUTM2B-AS1 -chr10 79826383 79826404 GGC STRchive -chr10 79826383 79826404 GGC TRGT +chr10 79826383 79826404 GGC,CGG STRchive +chr10 79826383 79826404 CGG,GGC TRGT GAAGCGGCGG GGC GGC GGC GGC GGC GGC GGC CGGGAAGAAC GAAGCGGCGG GGC GGC GGC GGC GGC GGC GGC CGGGAAGAAC @@ -227,8 +227,8 @@ CCATCACCAC CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG C CCATCACCAC CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA TCACGGAAAC FRA12A_DIP2B -chr12 50505001 50505024 GGC STRchive -chr12 50505001 50505024 GGC TRGT +chr12 50505001 50505024 GGC,CGG STRchive +chr12 50505001 50505024 CGG,GGC TRGT CTTTGCTCAT GGC GGC GGC GGC GGC GGC GGC GG TGCTGGTGGT CTTTGCTCAT GGC GGC GGC GGC GGC GGC GGC GG TGCTGGTGGT @@ -239,8 +239,8 @@ GCGGGCGGCG G CTG CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG GCGGGCGGCG G CTG CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG GGGCTTCAGC OPDM4_RILPL1 -chr12 123533720 123533750 GGC STRchive -chr12 123533720 123533750 GGC TRGT +chr12 123533720 123533750 GGC,CGG STRchive +chr12 123533720 123533750 CGG,GGC TRGT CTCCCGAGTG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC AGCGGGGAGG CTCCCGAGTG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC AGCGGGGAGG @@ -258,7 +258,7 @@ CCTGTCCCCA GCG GCG GCG GCA GCG GCG GCG GCG GCT GCG GCG GCG GCG GCC GCG G TGTCCGC SCA27B_FGF14 chr13 102161574 102161726 GAA STRchive -chr13 102161574 102161726 GAA,GGA,GCA TRGT +chr13 102161574 102161726 GAA,GGA,CAG TRGT TGAAGAAAGA AA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA TAGAAATGTG TGAAGAAAGA AA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA TAGAAATGTG @@ -275,8 +275,8 @@ ATAGGTCCCC CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTTTTG CTG CTG TCTGAAACAT ATAGGTCCCC CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTTTTG CTG CTG TCTGAAACAT ALS1_NIPA1 -chr15 22786677 22786703 GCG STRchive -chr15 22786677 22786703 GCG TRGT +chr15 22786677 22786703 GCG,CGG STRchive +chr15 22786677 22786703 CGG,GCG TRGT AGCTGCGGCA GCG GCG GCG GCG GCG GCG GCG GCG GC CGGGGAGGGG AGCTGCGGCA GCG GCG GCG GCG GCG GCG GCG GCG GC CGGGGAGGGG @@ -293,14 +293,14 @@ ATTGTTTTTC TTTG TTTG TTTG TTTG TTT TTCTTTGAGA ATTGTTTTTC TTTG TTTG TTTG TTTG TTT TTCTTTGAGA CPEO_POLG -chr15 89333588 89333629 GCT STRchive -chr15 89333579 89333629 GCT,GTT TRGT +chr15 89333588 89333629 GCT,CTG STRchive +chr15 89333579 89333629 GCT,GTT,CTG TRGT AGCACTTGCGGCTGCTGAG GCT GCT GTT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC CGCCGCCGCT AGCACTTGCG GCT GCT GAG GCT GCT GTT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC CGCCGCCGCT DBQD2_XYLT1 -chr16 17470907 17470922 GCC STRchive -chr16 17470907 17470922 GCC TRGT +chr16 17470907 17470922 GCC,CCG STRchive +chr16 17470907 17470922 GCC,CCG TRGT TCCCGCTCGG GCC GCC GCC GCC GCC CCCCTCCCCA TCCCGCTCGG GCC GCC GCC GCC GCC CCCCTCCCCA @@ -323,10 +323,10 @@ CCCGCCGCAG G CAG CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAA CAG CAG CAG CAG CCCGCCGCAG G CAG CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG TCCTCACCCT SCA4_ZFHX3 -chr16 72787694 72787758 GCC STRchive -chr16 72787694 72787758 GCC TRGT -AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC ACC GCC GCC GCC GCC GCC ACT GCC ACC GCC GCC GCC GCC G GTGGGGACGT -AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC ACC GCC GCC GCC GCC GCC ACT GCC ACC GCC GCC GCC GCC G GTGGGGACGT +chr16 72787694 72787758 GCC,CCG STRchive +chr16 72787694 72787758 CCG,GCC TRGT +AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC A CCG CCG CCG CCG CCG CCACT GCC A CCG CCG CCG CCG CCG GTGGGGACGT +AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC A CCG CCG CCG CCG CCG CCACT GCC A CCG CCG CCG CCG CCG GTGGGGACGT HDL2_JPH3 chr16 87604282 87604329 CTG STRchive @@ -425,14 +425,14 @@ ACTAGAATGG ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATT ACTAGAATGG ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT TTTTGAGATG PRTS_ARX -chrX 25013529 25013565 NGC STRchive -chrX 25013529 25013565 NGC TRGT +chrX 25013529 25013565 NGC,GCN STRchive +chrX 25013529 25013565 GCN,NGC TRGT GCGTGTCCCA GGC CGC GGC GGC CGC GGC CGC GGC TGC CGC GGC GGC CCCTGCGCCG GCGTGTCCCA GGC CGC GGC GGC CGC GGC CGC GGC TGC CGC GGC GGC CCCTGCGCCG EIEE1_ARX -chrX 25013649 25013697 NGC STRchive -chrX 25013649 25013697 NGC TRGT +chrX 25013649 25013697 NGC,GCN STRchive +chrX 25013649 25013697 GCN,NGC TRGT CCGTGGCCGT GGC GGC CGC TGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC TGC CGC ACCCTGAAGG CCGTGGCCGT GGC GGC CGC TGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC TGC CGC ACCCTGAAGG @@ -443,10 +443,10 @@ AACGAACTGT TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC AACGAACTGT TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC T T T T T T T T GGCAGAGGTG SBMA_AR -chrX 67545316 67545419 GCA STRchive -chrX 67545316 67545419 GCA TRGT -TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCCCAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG -TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCCCAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG +chrX 67545316 67545419 GCA,CAG STRchive +chrX 67545316 67545419 CAG,GCA TRGT +TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCC CAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG +TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCC CAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG XDP_TAF1 chrX 71453054 71453131 AGAGGG STRchive @@ -461,8 +461,8 @@ CTCAACCCAC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC TTCAAGCTGA CTCAACCCAC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC TTCAAGCTGA XLID_SOX3 -chrX 140504316 140504361 NGC STRchive -chrX 140504316 140504361 NGC TRGT +chrX 140504316 140504361 NGC,GCN STRchive +chrX 140504316 140504361 GCN,NGC TRGT CCGGACTGCT GGC GGC AGC GGC TGC GGC CGC GGC AGC GGC GGC GGC GGC CGC GGC ACCGGGAGGC CCGGACTGCT GGC GGC AGC GGC TGC GGC CGC GGC AGC GGC GGC GGC GGC CGC GGC ACCGGGAGGC @@ -473,8 +473,8 @@ GTGCGGCAGC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG GTGCGGCAGC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG CGG CGG CGG C TGGGCCTCGA FRAXE_AFF2 -chrX 148500604 148500753 GCC STRchive -chrX 148500604 148500753 GCC TRGT -GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCGGCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC CCC GCC GCC GCT GCC GCC GCC G GCCCGCAGCC -GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCGGCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC CCC GCC GCC GCT GCC GCC GCC G GCCCGCAGCC +chrX 148500604 148500753 GCC,CCG STRchive +chrX 148500604 148500753 CCG,GCC TRGT +GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCG GCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC C CCG CCG CCG CT GCC GCC GCC G GCCCGCAGCC +GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCG GCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC C CCG CCG CCG CT GCC GCC GCC G GCCCGCAGCC From 7192f9d994a7e2f0b602118c47b8c42f09cb459f Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Wed, 27 May 2026 11:35:43 -0600 Subject: [PATCH 03/16] Standardize reference allele as well --- scripts/check-loci.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/scripts/check-loci.py b/scripts/check-loci.py index 7466a48c..d801634c 100644 --- a/scripts/check-loci.py +++ b/scripts/check-loci.py @@ -246,6 +246,24 @@ def check_motif_orientation(record): f"Updating {record['id']} {gene_field} from {old_motif} to {new_motif}\n" ) record[gene_field] = new_gene_motifs + #3. Standardize reference repeat + if record['reference_motif_reference_orientation'] is not None: + old_reference_motifs = record['reference_motif_reference_orientation'] + new_reference_motifs = [ + standardise_reference_motif(motif) + for motif in old_reference_motifs + ] + + if old_reference_motifs != new_reference_motifs: + for old_motif, new_motif in zip(old_reference_motifs, new_reference_motifs): + if old_motif != new_motif: + sys.stderr.write( + f"Updating {record['id']} reference_motif_reference_orientation from {old_motif} to {new_motif}\n" + ) + + record['reference_motif_reference_orientation'] = new_reference_motifs + + # Replace locus_structure with a string of the motifs in reference orientation if record['locus_structure'] is None: From 373612502afd6ba931d2e0b9c4e22e3d2fb207c1 Mon Sep 17 00:00:00 2001 From: gaberbz <182678422+gaberbz@users.noreply.github.com> Date: Wed, 27 May 2026 17:42:28 +0000 Subject: [PATCH 04/16] Update data --- data/STRchive-loci.json | 52 ++--- .../STRchive-disease-loci.T2T-chm13.TRGT.bed | 48 ++--- ...TRchive-disease-loci.T2T-chm13.general.bed | 52 ++--- ...STRchive-disease-loci.T2T-chm13.longTR.bed | 52 ++--- .../STRchive-disease-loci.hg19.TRGT.bed | 48 ++--- .../STRchive-disease-loci.hg19.general.bed | 52 ++--- .../STRchive-disease-loci.hg19.longTR.bed | 52 ++--- .../STRchive-disease-loci.hg38.TRGT.bed | 48 ++--- .../STRchive-disease-loci.hg38.general.bed | 52 ++--- .../STRchive-disease-loci.hg38.longTR.bed | 52 ++--- data/ref-alleles/ref-alleles.T2T-chm13.txt | 200 +++++++++--------- data/ref-alleles/ref-alleles.hg19.txt | 196 ++++++++--------- data/ref-alleles/ref-alleles.hg38.txt | 200 +++++++++--------- 13 files changed, 552 insertions(+), 552 deletions(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 6001e685..47110e5b 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -29,7 +29,7 @@ "year": "2023 [@pmid:39068203]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -95,7 +95,7 @@ "year": "1993 [@pmid:8334699]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -161,7 +161,7 @@ "year": "2014 [@pmid:24763282]", "location_in_gene": "Intron 3", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -227,7 +227,7 @@ "year": "1991 [@pmid:2062380]; the first triplet disease to be discovered [@pmid:15313856]", "location_in_gene": "Coding Exon 1", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCA"], + "reference_motif_reference_orientation": ["CAG"], "pathogenic_motif_reference_orientation": ["CAG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -293,7 +293,7 @@ "year": "2002 [@pmid:11889467]", "location_in_gene": "Coding Exon 2, aa 110-115", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["GCN"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -359,7 +359,7 @@ "year": "2002 [@pmid:11889467]", "location_in_gene": "Coding Exon 2, aa 144-155", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["GCN"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -1602,7 +1602,7 @@ "year": "2007 [@pmid:17236128]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["GGC"], + "reference_motif_reference_orientation": ["CGG"], "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -1876,7 +1876,7 @@ "year": "2026 [@pmid:39868092]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -2074,7 +2074,7 @@ "year": "2003 [@pmid:12529855]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["GCN"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -2282,7 +2282,7 @@ "year": "2019 [@pmid:30970188]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCA"], + "reference_motif_reference_orientation": ["CAG"], "pathogenic_motif_reference_orientation": ["CAG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -2419,7 +2419,7 @@ "year": "2004 [@pmid:15385446]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["GCN"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -2485,7 +2485,7 @@ "year": "2003 [@pmid:12676922]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["GCN"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -2551,7 +2551,7 @@ "year": "2000 [@pmid:10839976]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["GCN"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -2830,7 +2830,7 @@ "year": "2019 [@pmid:31332380]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["CGC"], + "reference_motif_reference_orientation": ["CCG"], "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -3170,7 +3170,7 @@ "year": "2019 [@pmid:30342764]", "location_in_gene": "Coding Exon 1/Intron 1 depending on transcript", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCG"], + "reference_motif_reference_orientation": ["CGG"], "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -3312,7 +3312,7 @@ "year": "2019 [@pmid:31332380]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["GGC"], + "reference_motif_reference_orientation": ["CGG"], "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -3378,7 +3378,7 @@ "year": "2019 [@pmid:31332380]", "location_in_gene": "Exon 1 of lncRNA (noncoding)", "gene_strand": "+", - "reference_motif_reference_orientation": ["GGC"], + "reference_motif_reference_orientation": ["CGG"], "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -3642,7 +3642,7 @@ "year": null, "location_in_gene": "Coding Exon 2", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCT"], + "reference_motif_reference_orientation": ["CTG"], "pathogenic_motif_reference_orientation": ["CTG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -3723,7 +3723,7 @@ "year": "1999 [@pmid:10581021]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCT"], + "reference_motif_reference_orientation": ["CTG"], "pathogenic_motif_reference_orientation": ["CTG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -3789,7 +3789,7 @@ "year": "2015 [@pmid:26005867]", "location_in_gene": "Coding Exon 5", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -4159,7 +4159,7 @@ "year": "2022 [@pmid:35148830]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["GGC"], + "reference_motif_reference_orientation": ["CGG"], "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -4367,7 +4367,7 @@ "year": "2002 [@pmid:12428212]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["GCN"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -4575,7 +4575,7 @@ "year": "2026 [@pmid:41959811]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -5113,7 +5113,7 @@ "year": "2019 [@pmid:30554721]", "location_in_gene": "5' promoter region. Note, it can also be annotated coding or introntic depending on the reference, due to missing sequences in some reference genomes.", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -5270,7 +5270,7 @@ "year": "2023 [@pmid:38035881]", "location_in_gene": "Coding, Last Exon (exon number is transcript dependent)", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], @@ -5468,7 +5468,7 @@ "year": "2014 [@pmid:25196122]", "location_in_gene": "Intron 1", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCG"], + "reference_motif_reference_orientation": ["CGG"], "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed b/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed index 7b23e014..76f3fbec 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed @@ -1,50 +1,50 @@ chr1 870158 870178 ID=HMNR7_VWA1;MOTIFS=GGCGCGGAGC;STRUC= chr1 57245935 57245973 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT,AAATG;STRUC= -chr1 94266544 94266567 ID=OPDM5_ABCD3;MOTIFS=CCG,GCC;STRUC= -chr1 148519695 148519738 ID=NIID_NOTCH2NLC;MOTIFS=CGG,GGC;STRUC= +chr1 94266544 94266567 ID=OPDM5_ABCD3;MOTIFS=CCG;STRUC= +chr1 148519695 148519738 ID=NIID_NOTCH2NLC;MOTIFS=CGG;STRUC= chr1 154328121 154330802 ID=ADTKD_MUC1;MOTIFS=GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA;STRUC= chr1 155728131 155728159 ID=NME_NAXE;MOTIFS=GGGCC;STRUC= chr2 96703674 96703732 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT;STRUC= -chr2 100563685 100563738 ID=FRA2A_AFF3;MOTIFS=CCG,GCC;STRUC= +chr2 100563685 100563738 ID=FRA2A_AFF3;MOTIFS=CCG;STRUC= chr2 176581179 176581224 ID=SD5_HOXD13;MOTIFS=GCN;STRUC= -chr2 191369982 191370024 ID=GDPAG_GLS;MOTIFS=CAG,GCA;STRUC= +chr2 191369982 191370024 ID=GDPAG_GLS;MOTIFS=CAG;STRUC= chr3 63956302 63956345 ID=SCA7_ATXN7;MOTIFS=CAG,CCG;STRUC= chr3 131917482 131917635 ID=DM2_CNBP;MOTIFS=CAGG,CAGA,CA;STRUC= -chr3 141687011 141687054 ID=BPES_FOXL2;MOTIFS=GCN,NGC;STRUC= +chr3 141687011 141687054 ID=BPES_FOXL2;MOTIFS=GCN;STRUC= chr3 186521667 186521706 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA;STRUC= chr4 3073603 3073723 ID=HD_HTT;MOTIFS=CAG,CCG;STRUC= chr4 39318077 39318136 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,AGGGC,AAAGGG;STRUC= chr4 41719745 41719805 ID=CCHS_PHOX2B;MOTIFS=GCN;STRUC= chr4 162693303 162693405 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA;STRUC= chr5 10295525 10295593 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA;STRUC= -chr5 147414733 147414780 ID=SCA12_PPP2R2B;MOTIFS=CTG,GCT;STRUC= -chr5 178096748 178096792 ID=OPDM_FAM193B;MOTIFS=CCG,GCC;STRUC= -chr6 13201716 13201843 ID=OPDM_TBC1D7;MOTIFS=CCG,GCC;STRUC= +chr5 147414733 147414780 ID=SCA12_PPP2R2B;MOTIFS=CTG;STRUC= +chr5 178096748 178096792 ID=OPDM_FAM193B;MOTIFS=CCG;STRUC= +chr6 13201716 13201843 ID=OPDM_TBC1D7;MOTIFS=CCG;STRUC= chr6 16200188 16200282 ID=SCA1_ATXN1;MOTIFS=CTG;STRUC= chr6 45257567 45257618 ID=CCD_RUNX2;MOTIFS=GCN;STRUC= chr6 171935458 171935569 ID=SCA17_TBP;MOTIFS=CAG;STRUC= -chr7 27335684 27335720 ID=HFG_HOXA13-III;MOTIFS=GCN,NGC;STRUC= -chr7 27335813 27335849 ID=HFG_HOXA13-II;MOTIFS=GCN,NGC;STRUC= -chr7 27335912 27335954 ID=HFG_HOXA13-I;MOTIFS=GCN,NGC;STRUC= -chr7 56047900 56047939 ID=FRA7A_ZNF713;MOTIFS=CGG,GCG;STRUC= -chr8 105716409 105716441 ID=OPDM1_LRP12;MOTIFS=CCG,CGC;STRUC= +chr7 27335684 27335720 ID=HFG_HOXA13-III;MOTIFS=GCN;STRUC= +chr7 27335813 27335849 ID=HFG_HOXA13-II;MOTIFS=GCN;STRUC= +chr7 27335912 27335954 ID=HFG_HOXA13-I;MOTIFS=GCN;STRUC= +chr7 56047900 56047939 ID=FRA7A_ZNF713;MOTIFS=CGG;STRUC= +chr8 105716409 105716441 ID=OPDM1_LRP12;MOTIFS=CCG;STRUC= chr8 119495247 119495353 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA,AAATG;STRUC= chr9 27584063 27584155 ID=FTDALS1_C9orf72;MOTIFS=GGCCCC;STRUC= chr9 81210818 81210861 ID=FRDA_FXN;MOTIFS=A,GAA;STRUC= -chr9 142886568 142886595 ID=HSAN-VIII_PRDM12;MOTIFS=CCG,GCC;STRUC= +chr9 142886568 142886595 ID=HSAN-VIII_PRDM12;MOTIFS=CCG;STRUC= chr9 145285333 145285861 ID=MODY8_CEL;MOTIFS=GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG;STRUC= -chr10 80695718 80695748 ID=OPML1_NUTM2B-AS1;MOTIFS=CGG,GGC;STRUC= +chr10 80695718 80695748 ID=OPML1_NUTM2B-AS1;MOTIFS=CGG;STRUC= chr11 119226662 119226696 ID=JBS_CBL;MOTIFS=CGG;STRUC= chr12 6947903 6947941 ID=DRPLA_ATN1;MOTIFS=CAG;STRUC= -chr12 50468095 50468118 ID=FRA12A_DIP2B;MOTIFS=CGG,GGC;STRUC= +chr12 50468095 50468118 ID=FRA12A_DIP2B;MOTIFS=CGG;STRUC= chr12 111575873 111575940 ID=SCA2_ATXN2;MOTIFS=CTG;STRUC= -chr12 123532573 123532603 ID=OPDM4_RILPL1;MOTIFS=CGG,GGC;STRUC= +chr12 123532573 123532603 ID=OPDM4_RILPL1;MOTIFS=CGG;STRUC= chr13 69361213 69361270 ID=SCA8_ATXN8OS;MOTIFS=CTA,CTG;STRUC= chr13 99196358 99196404 ID=HPE5_ZIC2;MOTIFS=GCN;STRUC= chr13 101377549 101377792 ID=SCA27B_FGF14;MOTIFS=GAA,GGA,CAG;STRUC= chr14 17522488 17522519 ID=OPMD_PABPN1;MOTIFS=GCN;STRUC= chr14 86300519 86300603 ID=SCA3_ATXN3;MOTIFS=CTG;STRUC= -chr15 20458510 20458536 ID=ALS1_NIPA1;MOTIFS=CGG,GCG;STRUC= +chr15 20458510 20458536 ID=ALS1_NIPA1;MOTIFS=CGG;STRUC= chr15 32225152 32225178 ID=aFTLD-U_GOLGA8A;MOTIFS=CT,TTTC,CCTT,CCCTCT;STRUC= chr15 86324038 86324057 ID=CHNG3_MIR7-2;MOTIFS=TTTG;STRUC= chr15 87088402 87088452 ID=CPEO_POLG;MOTIFS=GCT,GTT,CTG;STRUC= @@ -52,7 +52,7 @@ chr16 17477909 17478002 ID=DBQD2_XYLT1;MOTIFS=GCC,CCG;STRUC= chr16 24890366 24890430 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA;STRUC= chr16 72284666 72284761 ID=SCA31_BEAN1;MOTIFS=TGGAA,TAGAA,AATAA;STRUC= chr16 73638636 73638724 ID=SCA_THAP11;MOTIFS=CAG;STRUC= -chr16 78605502 78605569 ID=SCA4_ZFHX3;MOTIFS=CCG,GCC;STRUC= +chr16 78605502 78605569 ID=SCA4_ZFHX3;MOTIFS=CCG;STRUC= chr16 93675723 93675776 ID=HDL2_JPH3;MOTIFS=CTG;STRUC= chr17 17754961 17755053 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA;STRUC= chr17 81047404 81047534 ID=RCPS_EIF4A3;MOTIFS=CCTCGCTGTGCCGCTGCCGA;STRUC= @@ -69,12 +69,12 @@ chr21 42132054 42132091 ID=EPM1_CSTB;MOTIFS=CGCGGGGCGGGG;STRUC= chr22 20143615 20143660 ID=TOF_TBX1;MOTIFS=GCN;STRUC= chr22 38781587 38781680 ID=EPM_CSNK1E;MOTIFS=CCG;STRUC= chr22 46280059 46280134 ID=SCA10_ATXN10;MOTIFS=ATTCT;STRUC= -chrX 24597766 24597802 ID=PRTS_ARX;MOTIFS=GCN,NGC;STRUC= -chrX 24597886 24597934 ID=EIEE1_ARX;MOTIFS=GCN,NGC;STRUC= +chrX 24597766 24597802 ID=PRTS_ARX;MOTIFS=GCN;STRUC= +chrX 24597886 24597934 ID=EIEE1_ARX;MOTIFS=GCN;STRUC= chrX 30882677 30882751 ID=DMD_DMD;MOTIFS=TTC,T;STRUC= -chrX 65975147 65975250 ID=SBMA_AR;MOTIFS=CAG,GCA;STRUC= +chrX 65975147 65975250 ID=SBMA_AR;MOTIFS=CAG;STRUC= chrX 69887153 69887230 ID=XDP_TAF1;MOTIFS=AGAGGG;STRUC= chrX 135876774 135876804 ID=VACTERLX_ZIC3;MOTIFS=GCN;STRUC= -chrX 138816203 138816248 ID=XLID_SOX3;MOTIFS=GCN,NGC;STRUC= +chrX 138816203 138816248 ID=XLID_SOX3;MOTIFS=GCN;STRUC= chrX 146176677 146176769 ID=FXS_FMR1;MOTIFS=CGG;STRUC= -chrX 146765190 146765342 ID=FRAXE_AFF2;MOTIFS=CCG,GCC;STRUC= +chrX 146765190 146765342 ID=FRAXE_AFF2;MOTIFS=CCG;STRUC= diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed b/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed index c79207eb..bd57934c 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed @@ -1,59 +1,59 @@ #chrom start stop id gene reference_motif_reference_orientation pathogenic_motif_reference_orientation pathogenic_min inheritance disease chr1 870158 870178 HMNR7_VWA1 VWA1 GGCGCGGAGC GGCGCGGAGC 1 AR Neuronopathy, distal hereditary motor, autosomal recessive 7 chr1 57245935 57245973 SCA37_DAB1 DAB1 AAAAT AAATG 31 AD Spinocerebellar ataxia type 37 -chr1 94266544 94266567 OPDM5_ABCD3 ABCD3 GCC CCG 118 AD Oculopharyngodistal myopathy type 5 -chr1 148519695 148519738 NIID_NOTCH2NLC NOTCH2NLC GGC CGG 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 +chr1 94266544 94266567 OPDM5_ABCD3 ABCD3 CCG CCG 118 AD Oculopharyngodistal myopathy type 5 +chr1 148519695 148519738 NIID_NOTCH2NLC NOTCH2NLC CGG CGG 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 chr1 154328121 154330802 ADTKD_MUC1 MUC1 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA None AD Autosomal dominant tubulointerstitial kidney disease chr1 155728131 155728159 NME_NAXE NAXE GGGCC GGGCC 200 AR NAXE-related mitochondrial encephalopathy chr2 96703674 96703732 FAME2_STARD7 STARD7 AAAAT AAATG 274 AD Familial adult myoclonic epilepsy 2 -chr2 100563685 100563738 FRA2A_AFF3 AFF3 GCC CCG 300 AD Intellectual disability associated with fragile site FRA2A +chr2 100563685 100563738 FRA2A_AFF3 AFF3 CCG CCG 300 AD Intellectual disability associated with fragile site FRA2A chr2 176581179 176581224 SD5_HOXD13 HOXD13 GCN GCN 22 AD Syndactyly -chr2 191369982 191370024 GDPAG_GLS GLS GCA CAG 680 AR Glutaminase deficiency +chr2 191369982 191370024 GDPAG_GLS GLS CAG CAG 680 AR Glutaminase deficiency chr3 63956302 63956333 SCA7_ATXN7 ATXN7 CAG CAG 37 AD Spinocerebellar ataxia type 7 chr3 131917482 131917557 DM2_CNBP CNBP CAGG CAGG 75 AD Myotonic dystrophy type 2 -chr3 141687011 141687054 BPES_FOXL2 FOXL2 NGC GCN 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis +chr3 141687011 141687054 BPES_FOXL2 FOXL2 GCN GCN 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis chr3 186521667 186521706 FAME4_YEATS2 YEATS2 TTTTA TTTCA 1000 AD Familial adult myoclonic epilepsy 4 chr4 3073603 3073687 HD_HTT HTT CAG CAG 36 AD Huntington disease chr4 39318077 39318136 CANVAS_RFC1 RFC1 AAAAG AAGGG,ACAGG,AAAGG,AGGGC 400 AR Cerebellar ataxia, neuropathy, and vestibular areflexia syndrome chr4 41719745 41719805 CCHS_PHOX2B PHOX2B GCN GCN 26 AD Congenital central hypoventilation syndrome chr4 162693303 162693405 FAME7_RAPGEF2 RAPGEF2 TTTTA TTTCA 60 AD Familial adult myoclonic epilepsy type 7 chr5 10295525 10295593 FAME3_MARCHF6 MARCHF6 TTTTA TTTCA 650 AD Familial adult myoclonic epilepsy type 3 -chr5 147414733 147414780 SCA12_PPP2R2B PPP2R2B GCT CTG 51 AD Spinocerebellar ataxia type 12 -chr5 178096748 178096792 OPDM_FAM193B FAM193B GCC CCG 194 AD Oculopharyngodistal myopathy -chr6 13201716 13201843 OPDM_TBC1D7 TBC1D7 GCC CCG 83 AD Oculopharyngodistal myopathy +chr5 147414733 147414780 SCA12_PPP2R2B PPP2R2B CTG CTG 51 AD Spinocerebellar ataxia type 12 +chr5 178096748 178096792 OPDM_FAM193B FAM193B CCG CCG 194 AD Oculopharyngodistal myopathy +chr6 13201716 13201843 OPDM_TBC1D7 TBC1D7 CCG CCG 83 AD Oculopharyngodistal myopathy chr6 16200188 16200282 SCA1_ATXN1 ATXN1 CTG CTG 39 AD Spinocerebellar ataxia type 1 chr6 45257567 45257618 CCD_RUNX2 RUNX2 GCN GCN 20 AD Cleidocranial dysplasia chr6 171935458 171935569 SCA17_TBP TBP CAG CAG 49 AD Spinocerebellar ataxia type 17 -chr7 27335684 27335720 HFG_HOXA13-III HOXA13 NGC GCN 22 AD Hand-foot-genital syndrome 3 -chr7 27335813 27335849 HFG_HOXA13-II HOXA13 NGC GCN 18 AD Hand-foot-genital syndrome 2 -chr7 27335912 27335954 HFG_HOXA13-I HOXA13 NGC GCN 22 AD Hand-foot-genital syndrome 1 -chr7 56047900 56047939 FRA7A_ZNF713 ZNF713 GCG CGG 450 AD Autism spectrum disorder associated with fragile site FRA7A -chr8 105716409 105716441 OPDM1_LRP12 LRP12 CGC CCG 85 AD Oculopharyngodistal myopathy type 1 +chr7 27335684 27335720 HFG_HOXA13-III HOXA13 GCN GCN 22 AD Hand-foot-genital syndrome 3 +chr7 27335813 27335849 HFG_HOXA13-II HOXA13 GCN GCN 18 AD Hand-foot-genital syndrome 2 +chr7 27335912 27335954 HFG_HOXA13-I HOXA13 GCN GCN 22 AD Hand-foot-genital syndrome 1 +chr7 56047900 56047939 FRA7A_ZNF713 ZNF713 CGG CGG 450 AD Autism spectrum disorder associated with fragile site FRA7A +chr8 105716409 105716441 OPDM1_LRP12 LRP12 CCG CCG 85 AD Oculopharyngodistal myopathy type 1 chr8 119495247 119495353 FAME1_SAMD12 SAMD12 TAAAA AAATG 105 AD Familial adult myoclonic epilepsy type 1 chr9 27584063 27584155 FTDALS1_C9orf72 C9orf72 GGCCCC GGCCCC 31 AD Frontotemporal dementia (FTD) and/or amyotrophic lateral sclerosis (ALS) chr9 81210834 81210861 FRDA_FXN FXN GAA GAA 56 AR Friedreich ataxia -chr9 142886568 142886595 HSAN-VIII_PRDM12 PRDM12 GCC CCG 18 AR Hereditary sensory and autonomic neuropathy type VIII +chr9 142886568 142886595 HSAN-VIII_PRDM12 PRDM12 CCG CCG 18 AR Hereditary sensory and autonomic neuropathy type VIII chr9 145285333 145285861 MODY8_CEL CEL GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG None AD Maturity-Onset Diabetes of the Young Type 8 -chr10 80695718 80695748 OPML1_NUTM2B-AS1 NUTM2B-AS1 GGC CGG 161 AD Oculopharyngeal myopathy with leukoencephalopathy 1 +chr10 80695718 80695748 OPML1_NUTM2B-AS1 NUTM2B-AS1 CGG CGG 161 AD Oculopharyngeal myopathy with leukoencephalopathy 1 chr11 119226662 119226696 JBS_CBL CBL CGG CGG 101 AD Jacobsen syndrome (FRAX11B fragile site) chr12 6947903 6947941 DRPLA_ATN1 ATN1 CAG CAG 48 AD Dentatorubral-Pallidoluysian Atrophy -chr12 50468095 50468118 FRA12A_DIP2B DIP2B GGC CGG 273 AD Intellectual developmental disorder, FRA12A type +chr12 50468095 50468118 FRA12A_DIP2B DIP2B CGG CGG 273 AD Intellectual developmental disorder, FRA12A type chr12 111575873 111575940 SCA2_ATXN2 ATXN2 CTG CTG 35 AD,AR Spinocerebellar ataxia type 2 -chr12 123532573 123532603 OPDM4_RILPL1 RILPL1 GGC CGG 120 AD Oculopharyngodistal myopathy type 4 +chr12 123532573 123532603 OPDM4_RILPL1 RILPL1 CGG CGG 120 AD Oculopharyngodistal myopathy type 4 chr13 69361243 69361270 SCA8_ATXN8OS ATXN8OS CTG CTG 71 AD Spinocerebellar ataxia type 8 chr13 99196358 99196404 HPE5_ZIC2 ZIC2 GCN GCN 25 AD Holoprosencephaly-5 chr13 101377549 101377792 SCA27B_FGF14 FGF14 GAA GAA 320 AD Spinocerebellar ataxia 27B chr14 17522488 17522519 OPMD_PABPN1 PABPN1 GCN GCN 12 AD,AR Oculopharyngeal muscular dystrophy chr14 86300519 86300603 SCA3_ATXN3 ATXN3 CTG CTG 60 AD Spinocerebellar ataxia type 3/Machado-Joseph disease -chr15 20458510 20458536 ALS1_NIPA1 NIPA1 GCG CGG 11 AD Amyotrophic lateral sclerosis +chr15 20458510 20458536 ALS1_NIPA1 NIPA1 CGG CGG 11 AD Amyotrophic lateral sclerosis chr15 32225152 32225178 aFTLD-U_GOLGA8A GOLGA8A TTTC CT 190 Atypical frontotemporal lobar degeneration with ubiquitinated inclusions (aFTLD-U) chr15 86324038 86324057 CHNG3_MIR7-2 MIR7-2 TTTG TTTG 3 AD Nongoitrous congenital hypothyroidism-3 -chr15 87088411 87088452 CPEO_POLG POLG GCT CTG None Progressive external ophthalmoplegia, Parkinson's disease -chr16 17477909 17478002 DBQD2_XYLT1 XYLT1 GCC CCG 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 +chr15 87088411 87088452 CPEO_POLG POLG CTG CTG None Progressive external ophthalmoplegia, Parkinson's disease +chr16 17477909 17478002 DBQD2_XYLT1 XYLT1 CCG CCG 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 chr16 24890366 24890430 FAME6_TNRC6A TNRC6A TTTTA TTTCA 1100 AD Familial adult myoclonic epilepsy type 6 chr16 72284666 72284761 SCA31_BEAN1 BEAN1 AATAA TGGAA,TAGAA 110 AD Spinocerebellar ataxia type 31 chr16 73638636 73638724 SCA_THAP11 THAP11 CAG CAG 45 AD Spinocerebellar ataxia 51 -chr16 78605502 78605569 SCA4_ZFHX3 ZFHX3 GCC CCG 46 AD Spinocerebellar ataxia 4 +chr16 78605502 78605569 SCA4_ZFHX3 ZFHX3 CCG CCG 46 AD Spinocerebellar ataxia 4 chr16 93675723 93675776 HDL2_JPH3 JPH3 CTG CTG 40 AD Huntington disease-like 2 chr17 17754961 17755053 FAME8_RAI1 RAI1 TTTTA TTTCA 9 AD Familial adult myoclonic epilepsy type 8 chr17 81047404 81047534 RCPS_EIF4A3 EIF4A3 CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA 14 AR Richieri-Costa-Pereira syndrome @@ -70,12 +70,12 @@ chr21 42132054 42132091 EPM1_CSTB CSTB CGCGGGGCGGGG CGCGGGGCGGGG 30 AR Progressi chr22 20143615 20143660 TOF_TBX1 TBX1 GCN GCN 25 AD Tetralogy of Fallot chr22 38781587 38781680 EPM_CSNK1E CSNK1E CCG CCG 745 AR Progressive Myoclonic Epilepsy and Developmental and Epileptic Encephalopathy chr22 46280059 46280134 SCA10_ATXN10 ATXN10 ATTCT ATTCT 800 AD Spinocerebellar ataxia type 10 -chrX 24597766 24597802 PRTS_ARX ARX NGC GCN 20 XR Partington syndrome -chrX 24597886 24597934 EIEE1_ARX ARX NGC GCN 17 XR Early-infantile epileptic encephalopathy +chrX 24597766 24597802 PRTS_ARX ARX GCN GCN 20 XR Partington syndrome +chrX 24597886 24597934 EIEE1_ARX ARX GCN GCN 17 XR Early-infantile epileptic encephalopathy chrX 30882677 30882743 DMD_DMD DMD TTC TTC 59 XR Duchenne muscular dystrophy -chrX 65975147 65975250 SBMA_AR AR GCA CAG 38 XR Spinal and bulbar muscular atrophy, Kennedy Disease +chrX 65975147 65975250 SBMA_AR AR CAG CAG 38 XR Spinal and bulbar muscular atrophy, Kennedy Disease chrX 69887153 69887230 XDP_TAF1 TAF1 AGAGGG AGAGGG 35 XR X-linked dystonia-parkinsonism (XDP) a.k.a. Dystonia 3, torsion, X-linked (DYT3) chrX 135876774 135876804 VACTERLX_ZIC3 ZIC3 GCN GCN 12 XR X-linked VACTERL syndrome -chrX 138816203 138816248 XLID_SOX3 SOX3 NGC GCN 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) +chrX 138816203 138816248 XLID_SOX3 SOX3 GCN GCN 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) chrX 146176677 146176769 FXS_FMR1 FMR1 CGG CGG 201 XD Fragile X syndrome (FXS), fragile X-associated tremor/ataxia syndrome (FXTAS), and fragile X-associated primary ovarian insufficiency FXPOI/POF1 -chrX 146765190 146765342 FRAXE_AFF2 AFF2 GCC CCG 201 XR Intellectual developmental disorder, Fragile X intellectual disability +chrX 146765190 146765342 FRAXE_AFF2 AFF2 CCG CCG 201 XR Intellectual developmental disorder, Fragile X intellectual disability diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.longTR.bed b/data/catalogs/STRchive-disease-loci.T2T-chm13.longTR.bed index 66187d67..57f0e361 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.longTR.bed +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.longTR.bed @@ -1,58 +1,58 @@ chr1 870159 870178 GGCGCGGAGC HMNR7_VWA1 chr1 57245936 57245973 AAATG,AAAAT SCA37_DAB1 -chr1 94266545 94266567 CCG,GCC OPDM5_ABCD3 -chr1 148519696 148519738 CGG,GGC NIID_NOTCH2NLC +chr1 94266545 94266567 CCG OPDM5_ABCD3 +chr1 148519696 148519738 CGG NIID_NOTCH2NLC chr1 154328122 154330802 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ADTKD_MUC1 chr1 155728132 155728159 GGGCC NME_NAXE chr2 96703675 96703732 AAATG,AAAAT FAME2_STARD7 -chr2 100563686 100563738 CCG,GCC FRA2A_AFF3 +chr2 100563686 100563738 CCG FRA2A_AFF3 chr2 176581180 176581224 GCN SD5_HOXD13 -chr2 191369983 191370024 CAG,GCA GDPAG_GLS +chr2 191369983 191370024 CAG GDPAG_GLS chr3 63956303 63956333 CAG SCA7_ATXN7 chr3 131917483 131917557 CAGG DM2_CNBP -chr3 141687012 141687054 GCN,NGC BPES_FOXL2 +chr3 141687012 141687054 GCN BPES_FOXL2 chr3 186521668 186521706 TTTCA,TTTTA FAME4_YEATS2 chr4 3073604 3073687 CAG HD_HTT chr4 39318078 39318136 AAGGG,ACAGG,AAAGG,AGGGC,AAAAG,AAAGGG CANVAS_RFC1 chr4 41719746 41719805 GCN CCHS_PHOX2B chr4 162693304 162693405 TTTCA,TTTTA FAME7_RAPGEF2 chr5 10295526 10295593 TTTCA,TTTTA FAME3_MARCHF6 -chr5 147414734 147414780 CTG,GCT SCA12_PPP2R2B -chr5 178096749 178096792 CCG,GCC OPDM_FAM193B -chr6 13201717 13201843 CCG,GCC OPDM_TBC1D7 +chr5 147414734 147414780 CTG SCA12_PPP2R2B +chr5 178096749 178096792 CCG OPDM_FAM193B +chr6 13201717 13201843 CCG OPDM_TBC1D7 chr6 16200189 16200282 CTG SCA1_ATXN1 chr6 45257568 45257618 GCN CCD_RUNX2 chr6 171935459 171935569 CAG SCA17_TBP -chr7 27335685 27335720 GCN,NGC HFG_HOXA13-III -chr7 27335814 27335849 GCN,NGC HFG_HOXA13-II -chr7 27335913 27335954 GCN,NGC HFG_HOXA13-I -chr7 56047901 56047939 CGG,GCG FRA7A_ZNF713 -chr8 105716410 105716441 CCG,CGC OPDM1_LRP12 +chr7 27335685 27335720 GCN HFG_HOXA13-III +chr7 27335814 27335849 GCN HFG_HOXA13-II +chr7 27335913 27335954 GCN HFG_HOXA13-I +chr7 56047901 56047939 CGG FRA7A_ZNF713 +chr8 105716410 105716441 CCG OPDM1_LRP12 chr8 119495248 119495353 AAATG,TAAAA FAME1_SAMD12 chr9 27584064 27584155 GGCCCC FTDALS1_C9orf72 chr9 81210835 81210861 GAA FRDA_FXN -chr9 142886569 142886595 CCG,GCC HSAN-VIII_PRDM12 +chr9 142886569 142886595 CCG HSAN-VIII_PRDM12 chr9 145285334 145285861 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG MODY8_CEL -chr10 80695719 80695748 CGG,GGC OPML1_NUTM2B-AS1 +chr10 80695719 80695748 CGG OPML1_NUTM2B-AS1 chr11 119226663 119226696 CGG JBS_CBL chr12 6947904 6947941 CAG DRPLA_ATN1 -chr12 50468096 50468118 CGG,GGC FRA12A_DIP2B +chr12 50468096 50468118 CGG FRA12A_DIP2B chr12 111575874 111575940 CTG SCA2_ATXN2 -chr12 123532574 123532603 CGG,GGC OPDM4_RILPL1 +chr12 123532574 123532603 CGG OPDM4_RILPL1 chr13 69361244 69361270 CTG SCA8_ATXN8OS chr13 99196359 99196404 GCN HPE5_ZIC2 chr13 101377550 101377792 GAA,GGA,CAG SCA27B_FGF14 chr14 17522489 17522519 GCN OPMD_PABPN1 chr14 86300520 86300603 CTG SCA3_ATXN3 -chr15 20458511 20458536 CGG,GCG ALS1_NIPA1 +chr15 20458511 20458536 CGG ALS1_NIPA1 chr15 32225153 32225178 CT,CCTT,CCCTCT,TTTC aFTLD-U_GOLGA8A chr15 86324039 86324057 TTTG CHNG3_MIR7-2 -chr15 87088412 87088452 CTG,GCT CPEO_POLG -chr16 17477910 17478002 CCG,GCC DBQD2_XYLT1 +chr15 87088412 87088452 CTG CPEO_POLG +chr16 17477910 17478002 CCG DBQD2_XYLT1 chr16 24890367 24890430 TTTCA,TTTTA FAME6_TNRC6A chr16 72284667 72284761 TGGAA,TAGAA,AATAA SCA31_BEAN1 chr16 73638637 73638724 CAG SCA_THAP11 -chr16 78605503 78605569 CCG,GCC SCA4_ZFHX3 +chr16 78605503 78605569 CCG SCA4_ZFHX3 chr16 93675724 93675776 CTG HDL2_JPH3 chr17 17754962 17755053 TTTCA,TTTTA FAME8_RAI1 chr17 81047405 81047534 CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 @@ -69,12 +69,12 @@ chr21 42132055 42132091 CGCGGGGCGGGG EPM1_CSTB chr22 20143616 20143660 GCN TOF_TBX1 chr22 38781588 38781680 CCG EPM_CSNK1E chr22 46280060 46280134 ATTCT SCA10_ATXN10 -chrX 24597767 24597802 GCN,NGC PRTS_ARX -chrX 24597887 24597934 GCN,NGC EIEE1_ARX +chrX 24597767 24597802 GCN PRTS_ARX +chrX 24597887 24597934 GCN EIEE1_ARX chrX 30882678 30882743 TTC DMD_DMD -chrX 65975148 65975250 CAG,GCA SBMA_AR +chrX 65975148 65975250 CAG SBMA_AR chrX 69887154 69887230 AGAGGG XDP_TAF1 chrX 135876775 135876804 GCN VACTERLX_ZIC3 -chrX 138816204 138816248 GCN,NGC XLID_SOX3 +chrX 138816204 138816248 GCN XLID_SOX3 chrX 146176678 146176769 CGG FXS_FMR1 -chrX 146765191 146765342 CCG,GCC FRAXE_AFF2 +chrX 146765191 146765342 CCG FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.hg19.TRGT.bed b/data/catalogs/STRchive-disease-loci.hg19.TRGT.bed index 6340ba42..467f6d74 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.TRGT.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.TRGT.bed @@ -1,50 +1,50 @@ chr1 1371178 1371198 ID=HMNR7_VWA1;MOTIFS=GGCGCGGAGC;STRUC= chr1 57832715 57832793 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT,AAATG;STRUC= -chr1 94883977 94884000 ID=OPDM5_ABCD3;MOTIFS=CCG,GCC;STRUC= -chr1 145209323 145209354 ID=NIID_NOTCH2NLC;MOTIFS=CGG,GGC;STRUC= +chr1 94883977 94884000 ID=OPDM5_ABCD3;MOTIFS=CCG;STRUC= +chr1 145209323 145209354 ID=NIID_NOTCH2NLC;MOTIFS=CGG;STRUC= chr1 155160981 155162030 ID=ADTKD_MUC1;MOTIFS=GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA;STRUC= chr1 156561557 156561575 ID=NME_NAXE;MOTIFS=GGGCC;STRUC= chr2 96862804 96862862 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT;STRUC= -chr2 100721260 100721286 ID=FRA2A_AFF3;MOTIFS=CCG,GCC;STRUC= +chr2 100721260 100721286 ID=FRA2A_AFF3;MOTIFS=CCG;STRUC= chr2 176957786 176957831 ID=SD5_HOXD13;MOTIFS=GCN;STRUC= -chr2 191745598 191745646 ID=GDPAG_GLS;MOTIFS=CAG,GCA;STRUC= +chr2 191745598 191745646 ID=GDPAG_GLS;MOTIFS=CAG;STRUC= chr3 63898360 63898403 ID=SCA7_ATXN7;MOTIFS=CAG,CCG;STRUC= chr3 128891419 128891577 ID=DM2_CNBP;MOTIFS=CAGG,CAGA,CA;STRUC= -chr3 138664861 138664904 ID=BPES_FOXL2;MOTIFS=GCN,NGC;STRUC= +chr3 138664861 138664904 ID=BPES_FOXL2;MOTIFS=GCN;STRUC= chr3 183429975 183430014 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA;STRUC= chr4 3076603 3076696 ID=HD_HTT;MOTIFS=CAG,CCG;STRUC= chr4 39350044 39350103 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,AGGGC,AAAGGG;STRUC= chr4 41747989 41748049 ID=CCHS_PHOX2B;MOTIFS=GCN;STRUC= chr4 160263678 160263770 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA;STRUC= chr5 10356455 10356523 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA;STRUC= -chr5 146258290 146258322 ID=SCA12_PPP2R2B;MOTIFS=CTG,GCT;STRUC= -chr5 176981490 176981532 ID=OPDM_FAM193B;MOTIFS=CCG,GCC;STRUC= -chr6 13328708 13328835 ID=OPDM_TBC1D7;MOTIFS=CCG,GCC;STRUC= +chr5 146258290 146258322 ID=SCA12_PPP2R2B;MOTIFS=CTG;STRUC= +chr5 176981490 176981532 ID=OPDM_FAM193B;MOTIFS=CCG;STRUC= +chr6 13328708 13328835 ID=OPDM_TBC1D7;MOTIFS=CCG;STRUC= chr6 16327864 16327955 ID=SCA1_ATXN1;MOTIFS=CTG;STRUC= chr6 45390487 45390538 ID=CCD_RUNX2;MOTIFS=GCN;STRUC= chr6 170870994 170871105 ID=SCA17_TBP;MOTIFS=CAG;STRUC= -chr7 27239297 27239351 ID=HFG_HOXA13-III;MOTIFS=GCN,NGC;STRUC= -chr7 27239444 27239480 ID=HFG_HOXA13-II;MOTIFS=GCN,NGC;STRUC= -chr7 27239543 27239585 ID=HFG_HOXA13-I;MOTIFS=GCN,NGC;STRUC= -chr7 55955293 55955332 ID=FRA7A_ZNF713;MOTIFS=CGG,GCG;STRUC= -chr8 105601198 105601227 ID=OPDM1_LRP12;MOTIFS=CCG,CGC;STRUC= +chr7 27239297 27239351 ID=HFG_HOXA13-III;MOTIFS=GCN;STRUC= +chr7 27239444 27239480 ID=HFG_HOXA13-II;MOTIFS=GCN;STRUC= +chr7 27239543 27239585 ID=HFG_HOXA13-I;MOTIFS=GCN;STRUC= +chr7 55955293 55955332 ID=FRA7A_ZNF713;MOTIFS=CGG;STRUC= +chr8 105601198 105601227 ID=OPDM1_LRP12;MOTIFS=CCG;STRUC= chr8 119379051 119379157 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA,AAATG;STRUC= chr9 27573482 27573544 ID=FTDALS1_C9orf72;MOTIFS=GGCCCC;STRUC= chr9 71652186 71652220 ID=FRDA_FXN;MOTIFS=A,GAA;STRUC= -chr9 133556992 133557028 ID=HSAN-VIII_PRDM12;MOTIFS=CCG,GCC;STRUC= +chr9 133556992 133557028 ID=HSAN-VIII_PRDM12;MOTIFS=CCG;STRUC= chr9 135946564 135947124 ID=MODY8_CEL;MOTIFS=GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG;STRUC= -chr10 81586139 81586160 ID=OPML1_NUTM2B-AS1;MOTIFS=CGG,GGC;STRUC= +chr10 81586139 81586160 ID=OPML1_NUTM2B-AS1;MOTIFS=CGG;STRUC= chr11 119076999 119077033 ID=JBS_CBL;MOTIFS=CGG;STRUC= chr12 7045879 7045938 ID=DRPLA_ATN1;MOTIFS=CAG;STRUC= -chr12 50898784 50898807 ID=FRA12A_DIP2B;MOTIFS=CGG,GGC;STRUC= +chr12 50898784 50898807 ID=FRA12A_DIP2B;MOTIFS=CGG;STRUC= chr12 112036753 112036823 ID=SCA2_ATXN2;MOTIFS=CTG;STRUC= -chr12 124018267 124018297 ID=OPDM4_RILPL1;MOTIFS=CGG,GGC;STRUC= +chr12 124018267 124018297 ID=OPDM4_RILPL1;MOTIFS=CGG;STRUC= chr13 70713485 70713561 ID=SCA8_ATXN8OS;MOTIFS=CTA,CTG;STRUC= chr13 100637702 100637748 ID=HPE5_ZIC2;MOTIFS=GCN;STRUC= chr13 102813924 102814076 ID=SCA27B_FGF14;MOTIFS=GAA,GGA,CAG;STRUC= chr14 23790681 23790712 ID=OPMD_PABPN1;MOTIFS=GCN;STRUC= chr14 92537354 92537396 ID=SCA3_ATXN3;MOTIFS=CTG;STRUC= -chr15 23086363 23086389 ID=ALS1_NIPA1;MOTIFS=CGG,GCG;STRUC= +chr15 23086363 23086389 ID=ALS1_NIPA1;MOTIFS=CGG;STRUC= chr15 34711626 34711652 ID=aFTLD-U_GOLGA8A;MOTIFS=CT,TTTC,CCTT,CCCTCT;STRUC= chr15 89112664 89112683 ID=CHNG3_MIR7-2;MOTIFS=TTTG;STRUC= chr15 89876810 89876860 ID=CPEO_POLG;MOTIFS=GCT,GTT,CTG;STRUC= @@ -52,7 +52,7 @@ chr16 17564764 17564779 ID=DBQD2_XYLT1;MOTIFS=GCC,CCG;STRUC= chr16 24624759 24624853 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA;STRUC= chr16 66524299 66524369 ID=SCA31_BEAN1;MOTIFS=TGGAA,TAGAA,AATAA;STRUC= chr16 67876765 67876853 ID=SCA_THAP11;MOTIFS=CAG;STRUC= -chr16 72821593 72821657 ID=SCA4_ZFHX3;MOTIFS=CCG,GCC;STRUC= +chr16 72821593 72821657 ID=SCA4_ZFHX3;MOTIFS=CCG;STRUC= chr16 87637888 87637935 ID=HDL2_JPH3;MOTIFS=CTG;STRUC= chr17 17711672 17711774 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA;STRUC= chr17 78120808 78120938 ID=RCPS_EIF4A3;MOTIFS=CCTCGCTGTGCCGCTGCCGA;STRUC= @@ -69,12 +69,12 @@ chr21 45196323 45196360 ID=EPM1_CSTB;MOTIFS=CGCGGGGCGGGG;STRUC= chr22 19754285 19754330 ID=TOF_TBX1;MOTIFS=GCN;STRUC= chr22 38713287 38713380 ID=EPM_CSNK1E;MOTIFS=CCG;STRUC= chr22 46191234 46191304 ID=SCA10_ATXN10;MOTIFS=ATTCT;STRUC= -chrX 25031646 25031682 ID=PRTS_ARX;MOTIFS=GCN,NGC;STRUC= -chrX 25031766 25031814 ID=EIEE1_ARX;MOTIFS=GCN,NGC;STRUC= +chrX 25031646 25031682 ID=PRTS_ARX;MOTIFS=GCN;STRUC= +chrX 25031766 25031814 ID=EIEE1_ARX;MOTIFS=GCN;STRUC= chrX 31302674 31302730 ID=DMD_DMD;MOTIFS=TTC,T;STRUC= -chrX 66765158 66765261 ID=SBMA_AR;MOTIFS=CAG,GCA;STRUC= +chrX 66765158 66765261 ID=SBMA_AR;MOTIFS=CAG;STRUC= chrX 70672904 70672981 ID=XDP_TAF1;MOTIFS=AGAGGG;STRUC= chrX 136648985 136649015 ID=VACTERLX_ZIC3;MOTIFS=GCN;STRUC= -chrX 139586481 139586526 ID=XLID_SOX3;MOTIFS=GCN,NGC;STRUC= +chrX 139586481 139586526 ID=XLID_SOX3;MOTIFS=GCN;STRUC= chrX 146993567 146993629 ID=FXS_FMR1;MOTIFS=CGG;STRUC= -chrX 147582124 147582273 ID=FRAXE_AFF2;MOTIFS=CCG,GCC;STRUC= +chrX 147582124 147582273 ID=FRAXE_AFF2;MOTIFS=CCG;STRUC= diff --git a/data/catalogs/STRchive-disease-loci.hg19.general.bed b/data/catalogs/STRchive-disease-loci.hg19.general.bed index 9b3d34f5..6e8ec293 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.general.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.general.bed @@ -1,59 +1,59 @@ #chrom start stop id gene reference_motif_reference_orientation pathogenic_motif_reference_orientation pathogenic_min inheritance disease chr1 1371178 1371198 HMNR7_VWA1 VWA1 GGCGCGGAGC GGCGCGGAGC 1 AR Neuronopathy, distal hereditary motor, autosomal recessive 7 chr1 57832715 57832793 SCA37_DAB1 DAB1 AAAAT AAATG 31 AD Spinocerebellar ataxia type 37 -chr1 94883977 94884000 OPDM5_ABCD3 ABCD3 GCC CCG 118 AD Oculopharyngodistal myopathy type 5 -chr1 145209323 145209354 NIID_NOTCH2NLC NOTCH2NLC GGC CGG 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 +chr1 94883977 94884000 OPDM5_ABCD3 ABCD3 CCG CCG 118 AD Oculopharyngodistal myopathy type 5 +chr1 145209323 145209354 NIID_NOTCH2NLC NOTCH2NLC CGG CGG 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 chr1 155160981 155162030 ADTKD_MUC1 MUC1 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA None AD Autosomal dominant tubulointerstitial kidney disease chr1 156561557 156561575 NME_NAXE NAXE GGGCC GGGCC 200 AR NAXE-related mitochondrial encephalopathy chr2 96862804 96862862 FAME2_STARD7 STARD7 AAAAT AAATG 274 AD Familial adult myoclonic epilepsy 2 -chr2 100721260 100721286 FRA2A_AFF3 AFF3 GCC CCG 300 AD Intellectual disability associated with fragile site FRA2A +chr2 100721260 100721286 FRA2A_AFF3 AFF3 CCG CCG 300 AD Intellectual disability associated with fragile site FRA2A chr2 176957786 176957831 SD5_HOXD13 HOXD13 GCN GCN 22 AD Syndactyly -chr2 191745598 191745646 GDPAG_GLS GLS GCA CAG 680 AR Glutaminase deficiency +chr2 191745598 191745646 GDPAG_GLS GLS CAG CAG 680 AR Glutaminase deficiency chr3 63898360 63898391 SCA7_ATXN7 ATXN7 CAG CAG 37 AD Spinocerebellar ataxia type 7 chr3 128891419 128891499 DM2_CNBP CNBP CAGG CAGG 75 AD Myotonic dystrophy type 2 -chr3 138664861 138664904 BPES_FOXL2 FOXL2 NGC GCN 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis +chr3 138664861 138664904 BPES_FOXL2 FOXL2 GCN GCN 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis chr3 183429975 183430014 FAME4_YEATS2 YEATS2 TTTTA TTTCA 1000 AD Familial adult myoclonic epilepsy 4 chr4 3076603 3076660 HD_HTT HTT CAG CAG 36 AD Huntington disease chr4 39350044 39350103 CANVAS_RFC1 RFC1 AAAAG AAGGG,ACAGG,AAAGG,AGGGC 400 AR Cerebellar ataxia, neuropathy, and vestibular areflexia syndrome chr4 41747989 41748049 CCHS_PHOX2B PHOX2B GCN GCN 26 AD Congenital central hypoventilation syndrome chr4 160263678 160263770 FAME7_RAPGEF2 RAPGEF2 TTTTA TTTCA 60 AD Familial adult myoclonic epilepsy type 7 chr5 10356455 10356523 FAME3_MARCHF6 MARCHF6 TTTTA TTTCA 650 AD Familial adult myoclonic epilepsy type 3 -chr5 146258290 146258322 SCA12_PPP2R2B PPP2R2B GCT CTG 51 AD Spinocerebellar ataxia type 12 -chr5 176981490 176981532 OPDM_FAM193B FAM193B GCC CCG 194 AD Oculopharyngodistal myopathy -chr6 13328708 13328835 OPDM_TBC1D7 TBC1D7 GCC CCG 83 AD Oculopharyngodistal myopathy +chr5 146258290 146258322 SCA12_PPP2R2B PPP2R2B CTG CTG 51 AD Spinocerebellar ataxia type 12 +chr5 176981490 176981532 OPDM_FAM193B FAM193B CCG CCG 194 AD Oculopharyngodistal myopathy +chr6 13328708 13328835 OPDM_TBC1D7 TBC1D7 CCG CCG 83 AD Oculopharyngodistal myopathy chr6 16327864 16327955 SCA1_ATXN1 ATXN1 CTG CTG 39 AD Spinocerebellar ataxia type 1 chr6 45390487 45390538 CCD_RUNX2 RUNX2 GCN GCN 20 AD Cleidocranial dysplasia chr6 170870994 170871105 SCA17_TBP TBP CAG CAG 49 AD Spinocerebellar ataxia type 17 -chr7 27239297 27239351 HFG_HOXA13-III HOXA13 NGC GCN 22 AD Hand-foot-genital syndrome 3 -chr7 27239444 27239480 HFG_HOXA13-II HOXA13 NGC GCN 18 AD Hand-foot-genital syndrome 2 -chr7 27239543 27239585 HFG_HOXA13-I HOXA13 NGC GCN 22 AD Hand-foot-genital syndrome 1 -chr7 55955293 55955332 FRA7A_ZNF713 ZNF713 GCG CGG 450 AD Autism spectrum disorder associated with fragile site FRA7A -chr8 105601198 105601227 OPDM1_LRP12 LRP12 CGC CCG 85 AD Oculopharyngodistal myopathy type 1 +chr7 27239297 27239351 HFG_HOXA13-III HOXA13 GCN GCN 22 AD Hand-foot-genital syndrome 3 +chr7 27239444 27239480 HFG_HOXA13-II HOXA13 GCN GCN 18 AD Hand-foot-genital syndrome 2 +chr7 27239543 27239585 HFG_HOXA13-I HOXA13 GCN GCN 22 AD Hand-foot-genital syndrome 1 +chr7 55955293 55955332 FRA7A_ZNF713 ZNF713 CGG CGG 450 AD Autism spectrum disorder associated with fragile site FRA7A +chr8 105601198 105601227 OPDM1_LRP12 LRP12 CCG CCG 85 AD Oculopharyngodistal myopathy type 1 chr8 119379051 119379157 FAME1_SAMD12 SAMD12 TAAAA AAATG 105 AD Familial adult myoclonic epilepsy type 1 chr9 27573482 27573544 FTDALS1_C9orf72 C9orf72 GGCCCC GGCCCC 31 AD Frontotemporal dementia (FTD) and/or amyotrophic lateral sclerosis (ALS) chr9 71652202 71652220 FRDA_FXN FXN GAA GAA 56 AR Friedreich ataxia -chr9 133556992 133557028 HSAN-VIII_PRDM12 PRDM12 GCC CCG 18 AR Hereditary sensory and autonomic neuropathy type VIII +chr9 133556992 133557028 HSAN-VIII_PRDM12 PRDM12 CCG CCG 18 AR Hereditary sensory and autonomic neuropathy type VIII chr9 135946564 135947124 MODY8_CEL CEL GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG None AD Maturity-Onset Diabetes of the Young Type 8 -chr10 81586139 81586160 OPML1_NUTM2B-AS1 NUTM2B-AS1 GGC CGG 161 AD Oculopharyngeal myopathy with leukoencephalopathy 1 +chr10 81586139 81586160 OPML1_NUTM2B-AS1 NUTM2B-AS1 CGG CGG 161 AD Oculopharyngeal myopathy with leukoencephalopathy 1 chr11 119076999 119077033 JBS_CBL CBL CGG CGG 101 AD Jacobsen syndrome (FRAX11B fragile site) chr12 7045879 7045938 DRPLA_ATN1 ATN1 CAG CAG 48 AD Dentatorubral-Pallidoluysian Atrophy -chr12 50898784 50898807 FRA12A_DIP2B DIP2B GGC CGG 273 AD Intellectual developmental disorder, FRA12A type +chr12 50898784 50898807 FRA12A_DIP2B DIP2B CGG CGG 273 AD Intellectual developmental disorder, FRA12A type chr12 112036753 112036823 SCA2_ATXN2 ATXN2 CTG CTG 35 AD,AR Spinocerebellar ataxia type 2 -chr12 124018267 124018297 OPDM4_RILPL1 RILPL1 GGC CGG 120 AD Oculopharyngodistal myopathy type 4 +chr12 124018267 124018297 OPDM4_RILPL1 RILPL1 CGG CGG 120 AD Oculopharyngodistal myopathy type 4 chr13 70713515 70713561 SCA8_ATXN8OS ATXN8OS CTG CTG 71 AD Spinocerebellar ataxia type 8 chr13 100637702 100637748 HPE5_ZIC2 ZIC2 GCN GCN 25 AD Holoprosencephaly-5 chr13 102813924 102814076 SCA27B_FGF14 FGF14 GAA GAA 320 AD Spinocerebellar ataxia 27B chr14 23790681 23790712 OPMD_PABPN1 PABPN1 GCN GCN 12 AD,AR Oculopharyngeal muscular dystrophy chr14 92537354 92537396 SCA3_ATXN3 ATXN3 CTG CTG 60 AD Spinocerebellar ataxia type 3/Machado-Joseph disease -chr15 23086363 23086389 ALS1_NIPA1 NIPA1 GCG CGG 11 AD Amyotrophic lateral sclerosis +chr15 23086363 23086389 ALS1_NIPA1 NIPA1 CGG CGG 11 AD Amyotrophic lateral sclerosis chr15 34711626 34711652 aFTLD-U_GOLGA8A GOLGA8A TTTC CT 190 Atypical frontotemporal lobar degeneration with ubiquitinated inclusions (aFTLD-U) chr15 89112664 89112683 CHNG3_MIR7-2 MIR7-2 TTTG TTTG 3 AD Nongoitrous congenital hypothyroidism-3 -chr15 89876819 89876860 CPEO_POLG POLG GCT CTG None Progressive external ophthalmoplegia, Parkinson's disease -chr16 17564764 17564779 DBQD2_XYLT1 XYLT1 GCC CCG 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 +chr15 89876819 89876860 CPEO_POLG POLG CTG CTG None Progressive external ophthalmoplegia, Parkinson's disease +chr16 17564764 17564779 DBQD2_XYLT1 XYLT1 CCG CCG 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 chr16 24624759 24624853 FAME6_TNRC6A TNRC6A TTTTA TTTCA 1100 AD Familial adult myoclonic epilepsy type 6 chr16 66524299 66524369 SCA31_BEAN1 BEAN1 AATAA TGGAA,TAGAA 110 AD Spinocerebellar ataxia type 31 chr16 67876765 67876853 SCA_THAP11 THAP11 CAG CAG 45 AD Spinocerebellar ataxia 51 -chr16 72821593 72821657 SCA4_ZFHX3 ZFHX3 GCC CCG 46 AD Spinocerebellar ataxia 4 +chr16 72821593 72821657 SCA4_ZFHX3 ZFHX3 CCG CCG 46 AD Spinocerebellar ataxia 4 chr16 87637888 87637935 HDL2_JPH3 JPH3 CTG CTG 40 AD Huntington disease-like 2 chr17 17711672 17711774 FAME8_RAI1 RAI1 TTTTA TTTCA 9 AD Familial adult myoclonic epilepsy type 8 chr17 78120808 78120938 RCPS_EIF4A3 EIF4A3 CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA 14 AR Richieri-Costa-Pereira syndrome @@ -70,12 +70,12 @@ chr21 45196323 45196360 EPM1_CSTB CSTB CGCGGGGCGGGG CGCGGGGCGGGG 30 AR Progressi chr22 19754285 19754330 TOF_TBX1 TBX1 GCN GCN 25 AD Tetralogy of Fallot chr22 38713287 38713380 EPM_CSNK1E CSNK1E CCG CCG 745 AR Progressive Myoclonic Epilepsy and Developmental and Epileptic Encephalopathy chr22 46191234 46191304 SCA10_ATXN10 ATXN10 ATTCT ATTCT 800 AD Spinocerebellar ataxia type 10 -chrX 25031646 25031682 PRTS_ARX ARX NGC GCN 20 XR Partington syndrome -chrX 25031766 25031814 EIEE1_ARX ARX NGC GCN 17 XR Early-infantile epileptic encephalopathy +chrX 25031646 25031682 PRTS_ARX ARX GCN GCN 20 XR Partington syndrome +chrX 25031766 25031814 EIEE1_ARX ARX GCN GCN 17 XR Early-infantile epileptic encephalopathy chrX 31302674 31302722 DMD_DMD DMD TTC TTC 59 XR Duchenne muscular dystrophy -chrX 66765158 66765261 SBMA_AR AR GCA CAG 38 XR Spinal and bulbar muscular atrophy, Kennedy Disease +chrX 66765158 66765261 SBMA_AR AR CAG CAG 38 XR Spinal and bulbar muscular atrophy, Kennedy Disease chrX 70672904 70672981 XDP_TAF1 TAF1 AGAGGG AGAGGG 35 XR X-linked dystonia-parkinsonism (XDP) a.k.a. Dystonia 3, torsion, X-linked (DYT3) chrX 136648985 136649015 VACTERLX_ZIC3 ZIC3 GCN GCN 12 XR X-linked VACTERL syndrome -chrX 139586481 139586526 XLID_SOX3 SOX3 NGC GCN 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) +chrX 139586481 139586526 XLID_SOX3 SOX3 GCN GCN 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) chrX 146993567 146993629 FXS_FMR1 FMR1 CGG CGG 201 XD Fragile X syndrome (FXS), fragile X-associated tremor/ataxia syndrome (FXTAS), and fragile X-associated primary ovarian insufficiency FXPOI/POF1 -chrX 147582124 147582273 FRAXE_AFF2 AFF2 GCC CCG 201 XR Intellectual developmental disorder, Fragile X intellectual disability +chrX 147582124 147582273 FRAXE_AFF2 AFF2 CCG CCG 201 XR Intellectual developmental disorder, Fragile X intellectual disability diff --git a/data/catalogs/STRchive-disease-loci.hg19.longTR.bed b/data/catalogs/STRchive-disease-loci.hg19.longTR.bed index f9112ba3..511f42f2 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.longTR.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.longTR.bed @@ -1,58 +1,58 @@ chr1 1371179 1371198 GGCGCGGAGC HMNR7_VWA1 chr1 57832716 57832793 AAATG,AAAAT SCA37_DAB1 -chr1 94883978 94884000 CCG,GCC OPDM5_ABCD3 -chr1 145209324 145209354 CGG,GGC NIID_NOTCH2NLC +chr1 94883978 94884000 CCG OPDM5_ABCD3 +chr1 145209324 145209354 CGG NIID_NOTCH2NLC chr1 155160982 155162030 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ADTKD_MUC1 chr1 156561558 156561575 GGGCC NME_NAXE chr2 96862805 96862862 AAATG,AAAAT FAME2_STARD7 -chr2 100721261 100721286 CCG,GCC FRA2A_AFF3 +chr2 100721261 100721286 CCG FRA2A_AFF3 chr2 176957787 176957831 GCN SD5_HOXD13 -chr2 191745599 191745646 CAG,GCA GDPAG_GLS +chr2 191745599 191745646 CAG GDPAG_GLS chr3 63898361 63898391 CAG SCA7_ATXN7 chr3 128891420 128891499 CAGG DM2_CNBP -chr3 138664862 138664904 GCN,NGC BPES_FOXL2 +chr3 138664862 138664904 GCN BPES_FOXL2 chr3 183429976 183430014 TTTCA,TTTTA FAME4_YEATS2 chr4 3076604 3076660 CAG HD_HTT chr4 39350045 39350103 AAGGG,ACAGG,AAAGG,AGGGC,AAAAG,AAAGGG CANVAS_RFC1 chr4 41747990 41748049 GCN CCHS_PHOX2B chr4 160263679 160263770 TTTCA,TTTTA FAME7_RAPGEF2 chr5 10356456 10356523 TTTCA,TTTTA FAME3_MARCHF6 -chr5 146258291 146258322 CTG,GCT SCA12_PPP2R2B -chr5 176981491 176981532 CCG,GCC OPDM_FAM193B -chr6 13328709 13328835 CCG,GCC OPDM_TBC1D7 +chr5 146258291 146258322 CTG SCA12_PPP2R2B +chr5 176981491 176981532 CCG OPDM_FAM193B +chr6 13328709 13328835 CCG OPDM_TBC1D7 chr6 16327865 16327955 CTG SCA1_ATXN1 chr6 45390488 45390538 GCN CCD_RUNX2 chr6 170870995 170871105 CAG SCA17_TBP -chr7 27239298 27239351 GCN,NGC HFG_HOXA13-III -chr7 27239445 27239480 GCN,NGC HFG_HOXA13-II -chr7 27239544 27239585 GCN,NGC HFG_HOXA13-I -chr7 55955294 55955332 CGG,GCG FRA7A_ZNF713 -chr8 105601199 105601227 CCG,CGC OPDM1_LRP12 +chr7 27239298 27239351 GCN HFG_HOXA13-III +chr7 27239445 27239480 GCN HFG_HOXA13-II +chr7 27239544 27239585 GCN HFG_HOXA13-I +chr7 55955294 55955332 CGG FRA7A_ZNF713 +chr8 105601199 105601227 CCG OPDM1_LRP12 chr8 119379052 119379157 AAATG,TAAAA FAME1_SAMD12 chr9 27573483 27573544 GGCCCC FTDALS1_C9orf72 chr9 71652203 71652220 GAA FRDA_FXN -chr9 133556993 133557028 CCG,GCC HSAN-VIII_PRDM12 +chr9 133556993 133557028 CCG HSAN-VIII_PRDM12 chr9 135946565 135947124 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG MODY8_CEL -chr10 81586140 81586160 CGG,GGC OPML1_NUTM2B-AS1 +chr10 81586140 81586160 CGG OPML1_NUTM2B-AS1 chr11 119077000 119077033 CGG JBS_CBL chr12 7045880 7045938 CAG DRPLA_ATN1 -chr12 50898785 50898807 CGG,GGC FRA12A_DIP2B +chr12 50898785 50898807 CGG FRA12A_DIP2B chr12 112036754 112036823 CTG SCA2_ATXN2 -chr12 124018268 124018297 CGG,GGC OPDM4_RILPL1 +chr12 124018268 124018297 CGG OPDM4_RILPL1 chr13 70713516 70713561 CTG SCA8_ATXN8OS chr13 100637703 100637748 GCN HPE5_ZIC2 chr13 102813925 102814076 GAA,GGA,CAG SCA27B_FGF14 chr14 23790682 23790712 GCN OPMD_PABPN1 chr14 92537355 92537396 CTG SCA3_ATXN3 -chr15 23086364 23086389 CGG,GCG ALS1_NIPA1 +chr15 23086364 23086389 CGG ALS1_NIPA1 chr15 34711627 34711652 CT,CCTT,CCCTCT,TTTC aFTLD-U_GOLGA8A chr15 89112665 89112683 TTTG CHNG3_MIR7-2 -chr15 89876820 89876860 CTG,GCT CPEO_POLG -chr16 17564765 17564779 CCG,GCC DBQD2_XYLT1 +chr15 89876820 89876860 CTG CPEO_POLG +chr16 17564765 17564779 CCG DBQD2_XYLT1 chr16 24624760 24624853 TTTCA,TTTTA FAME6_TNRC6A chr16 66524300 66524369 TGGAA,TAGAA,AATAA SCA31_BEAN1 chr16 67876766 67876853 CAG SCA_THAP11 -chr16 72821594 72821657 CCG,GCC SCA4_ZFHX3 +chr16 72821594 72821657 CCG SCA4_ZFHX3 chr16 87637889 87637935 CTG HDL2_JPH3 chr17 17711673 17711774 TTTCA,TTTTA FAME8_RAI1 chr17 78120809 78120938 CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 @@ -69,12 +69,12 @@ chr21 45196324 45196360 CGCGGGGCGGGG EPM1_CSTB chr22 19754286 19754330 GCN TOF_TBX1 chr22 38713288 38713380 CCG EPM_CSNK1E chr22 46191235 46191304 ATTCT SCA10_ATXN10 -chrX 25031647 25031682 GCN,NGC PRTS_ARX -chrX 25031767 25031814 GCN,NGC EIEE1_ARX +chrX 25031647 25031682 GCN PRTS_ARX +chrX 25031767 25031814 GCN EIEE1_ARX chrX 31302675 31302722 TTC DMD_DMD -chrX 66765159 66765261 CAG,GCA SBMA_AR +chrX 66765159 66765261 CAG SBMA_AR chrX 70672905 70672981 AGAGGG XDP_TAF1 chrX 136648986 136649015 GCN VACTERLX_ZIC3 -chrX 139586482 139586526 GCN,NGC XLID_SOX3 +chrX 139586482 139586526 GCN XLID_SOX3 chrX 146993568 146993629 CGG FXS_FMR1 -chrX 147582125 147582273 CCG,GCC FRAXE_AFF2 +chrX 147582125 147582273 CCG FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed b/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed index 662ca536..dc288528 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed +++ b/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed @@ -1,50 +1,50 @@ chr1 1435798 1435818 ID=HMNR7_VWA1;MOTIFS=GGCGCGGAGC;STRUC= chr1 57367043 57367121 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT,AAATG;STRUC= -chr1 94418421 94418444 ID=OPDM5_ABCD3;MOTIFS=CCG,GCC;STRUC= -chr1 149390802 149390842 ID=NIID_NOTCH2NLC;MOTIFS=CGG,GGC;STRUC= +chr1 94418421 94418444 ID=OPDM5_ABCD3;MOTIFS=CCG;STRUC= +chr1 149390802 149390842 ID=NIID_NOTCH2NLC;MOTIFS=CGG;STRUC= chr1 155188505 155192239 ID=ADTKD_MUC1;MOTIFS=GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA;STRUC= chr1 156591765 156591783 ID=NME_NAXE;MOTIFS=GGGCC;STRUC= chr2 96197066 96197124 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT;STRUC= -chr2 100104798 100104824 ID=FRA2A_AFF3;MOTIFS=CCG,GCC;STRUC= +chr2 100104798 100104824 ID=FRA2A_AFF3;MOTIFS=CCG;STRUC= chr2 176093058 176093103 ID=SD5_HOXD13;MOTIFS=GCN;STRUC= -chr2 190880872 190880920 ID=GDPAG_GLS;MOTIFS=CAG,GCA;STRUC= +chr2 190880872 190880920 ID=GDPAG_GLS;MOTIFS=CAG;STRUC= chr3 63912684 63912727 ID=SCA7_ATXN7;MOTIFS=CAG,CCG;STRUC= chr3 129172576 129172734 ID=DM2_CNBP;MOTIFS=CAGG,CAGA,CA;STRUC= -chr3 138946019 138946062 ID=BPES_FOXL2;MOTIFS=GCN,NGC;STRUC= +chr3 138946019 138946062 ID=BPES_FOXL2;MOTIFS=GCN;STRUC= chr3 183712187 183712226 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA;STRUC= chr4 3074876 3074969 ID=HD_HTT;MOTIFS=CAG,CCG;STRUC= chr4 39348424 39348483 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,AGGGC,AAAGGG;STRUC= chr4 41745972 41746032 ID=CCHS_PHOX2B;MOTIFS=GCN;STRUC= chr4 159342526 159342618 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA;STRUC= chr5 10356343 10356411 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA;STRUC= -chr5 146878727 146878759 ID=SCA12_PPP2R2B;MOTIFS=CTG,GCT;STRUC= -chr5 177554489 177554531 ID=OPDM_FAM193B;MOTIFS=CCG,GCC;STRUC= -chr6 13328476 13328603 ID=OPDM_TBC1D7;MOTIFS=CCG,GCC;STRUC= +chr5 146878727 146878759 ID=SCA12_PPP2R2B;MOTIFS=CTG;STRUC= +chr5 177554489 177554531 ID=OPDM_FAM193B;MOTIFS=CCG;STRUC= +chr6 13328476 13328603 ID=OPDM_TBC1D7;MOTIFS=CCG;STRUC= chr6 16327633 16327724 ID=SCA1_ATXN1;MOTIFS=CTG;STRUC= chr6 45422750 45422801 ID=CCD_RUNX2;MOTIFS=GCN;STRUC= chr6 170561906 170562017 ID=SCA17_TBP;MOTIFS=CAG;STRUC= -chr7 27199678 27199732 ID=HFG_HOXA13-III;MOTIFS=GCN,NGC;STRUC= -chr7 27199825 27199861 ID=HFG_HOXA13-II;MOTIFS=GCN,NGC;STRUC= -chr7 27199924 27199966 ID=HFG_HOXA13-I;MOTIFS=GCN,NGC;STRUC= -chr7 55887600 55887639 ID=FRA7A_ZNF713;MOTIFS=CGG,GCG;STRUC= -chr8 104588970 104588999 ID=OPDM1_LRP12;MOTIFS=CCG,CGC;STRUC= +chr7 27199678 27199732 ID=HFG_HOXA13-III;MOTIFS=GCN;STRUC= +chr7 27199825 27199861 ID=HFG_HOXA13-II;MOTIFS=GCN;STRUC= +chr7 27199924 27199966 ID=HFG_HOXA13-I;MOTIFS=GCN;STRUC= +chr7 55887600 55887639 ID=FRA7A_ZNF713;MOTIFS=CGG;STRUC= +chr8 104588970 104588999 ID=OPDM1_LRP12;MOTIFS=CCG;STRUC= chr8 118366812 118366918 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA,AAATG;STRUC= chr9 27573484 27573546 ID=FTDALS1_C9orf72;MOTIFS=GGCCCC;STRUC= chr9 69037270 69037304 ID=FRDA_FXN;MOTIFS=A,GAA;STRUC= -chr9 130681605 130681641 ID=HSAN-VIII_PRDM12;MOTIFS=CCG,GCC;STRUC= +chr9 130681605 130681641 ID=HSAN-VIII_PRDM12;MOTIFS=CCG;STRUC= chr9 133071177 133071737 ID=MODY8_CEL;MOTIFS=GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG;STRUC= -chr10 79826383 79826404 ID=OPML1_NUTM2B-AS1;MOTIFS=CGG,GGC;STRUC= +chr10 79826383 79826404 ID=OPML1_NUTM2B-AS1;MOTIFS=CGG;STRUC= chr11 119206289 119206323 ID=JBS_CBL;MOTIFS=CGG;STRUC= chr12 6936716 6936775 ID=DRPLA_ATN1;MOTIFS=CAG;STRUC= -chr12 50505001 50505024 ID=FRA12A_DIP2B;MOTIFS=CGG,GGC;STRUC= +chr12 50505001 50505024 ID=FRA12A_DIP2B;MOTIFS=CGG;STRUC= chr12 111598949 111599019 ID=SCA2_ATXN2;MOTIFS=CTG;STRUC= -chr12 123533720 123533750 ID=OPDM4_RILPL1;MOTIFS=CGG,GGC;STRUC= +chr12 123533720 123533750 ID=OPDM4_RILPL1;MOTIFS=CGG;STRUC= chr13 70139353 70139429 ID=SCA8_ATXN8OS;MOTIFS=CTA,CTG;STRUC= chr13 99985448 99985494 ID=HPE5_ZIC2;MOTIFS=GCN;STRUC= chr13 102161574 102161726 ID=SCA27B_FGF14;MOTIFS=GAA,GGA,CAG;STRUC= chr14 23321472 23321503 ID=OPMD_PABPN1;MOTIFS=GCN;STRUC= chr14 92071010 92071052 ID=SCA3_ATXN3;MOTIFS=CTG;STRUC= -chr15 22786677 22786703 ID=ALS1_NIPA1;MOTIFS=CGG,GCG;STRUC= +chr15 22786677 22786703 ID=ALS1_NIPA1;MOTIFS=CGG;STRUC= chr15 34419425 34419451 ID=aFTLD-U_GOLGA8A;MOTIFS=CT,TTTC,CCTT,CCCTCT;STRUC= chr15 88569433 88569452 ID=CHNG3_MIR7-2;MOTIFS=TTTG;STRUC= chr15 89333579 89333629 ID=CPEO_POLG;MOTIFS=GCT,GTT,CTG;STRUC= @@ -52,7 +52,7 @@ chr16 17470907 17470922 ID=DBQD2_XYLT1;MOTIFS=GCC,CCG;STRUC= chr16 24613438 24613532 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA;STRUC= chr16 66490396 66490466 ID=SCA31_BEAN1;MOTIFS=TGGAA,TAGAA,AATAA;STRUC= chr16 67842862 67842950 ID=SCA_THAP11;MOTIFS=CAG;STRUC= -chr16 72787694 72787758 ID=SCA4_ZFHX3;MOTIFS=CCG,GCC;STRUC= +chr16 72787694 72787758 ID=SCA4_ZFHX3;MOTIFS=CCG;STRUC= chr16 87604282 87604329 ID=HDL2_JPH3;MOTIFS=CTG;STRUC= chr17 17808358 17808460 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA;STRUC= chr17 80147009 80147139 ID=RCPS_EIF4A3;MOTIFS=CCTCGCTGTGCCGCTGCCGA;STRUC= @@ -69,12 +69,12 @@ chr21 43776442 43776479 ID=EPM1_CSTB;MOTIFS=CGCGGGGCGGGG;STRUC= chr22 19766762 19766807 ID=TOF_TBX1;MOTIFS=GCN;STRUC= chr22 38317282 38317375 ID=EPM_CSNK1E;MOTIFS=CCG;STRUC= chr22 45795354 45795424 ID=SCA10_ATXN10;MOTIFS=ATTCT;STRUC= -chrX 25013529 25013565 ID=PRTS_ARX;MOTIFS=GCN,NGC;STRUC= -chrX 25013649 25013697 ID=EIEE1_ARX;MOTIFS=GCN,NGC;STRUC= +chrX 25013529 25013565 ID=PRTS_ARX;MOTIFS=GCN;STRUC= +chrX 25013649 25013697 ID=EIEE1_ARX;MOTIFS=GCN;STRUC= chrX 31284557 31284613 ID=DMD_DMD;MOTIFS=TTC,T;STRUC= -chrX 67545316 67545419 ID=SBMA_AR;MOTIFS=CAG,GCA;STRUC= +chrX 67545316 67545419 ID=SBMA_AR;MOTIFS=CAG;STRUC= chrX 71453054 71453131 ID=XDP_TAF1;MOTIFS=AGAGGG;STRUC= chrX 137566826 137566856 ID=VACTERLX_ZIC3;MOTIFS=GCN;STRUC= -chrX 140504316 140504361 ID=XLID_SOX3;MOTIFS=GCN,NGC;STRUC= +chrX 140504316 140504361 ID=XLID_SOX3;MOTIFS=GCN;STRUC= chrX 147912049 147912111 ID=FXS_FMR1;MOTIFS=CGG;STRUC= -chrX 148500604 148500753 ID=FRAXE_AFF2;MOTIFS=CCG,GCC;STRUC= +chrX 148500604 148500753 ID=FRAXE_AFF2;MOTIFS=CCG;STRUC= diff --git a/data/catalogs/STRchive-disease-loci.hg38.general.bed b/data/catalogs/STRchive-disease-loci.hg38.general.bed index 5abc8e09..260dfd09 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.general.bed +++ b/data/catalogs/STRchive-disease-loci.hg38.general.bed @@ -1,59 +1,59 @@ #chrom start stop id gene reference_motif_reference_orientation pathogenic_motif_reference_orientation pathogenic_min inheritance disease chr1 1435798 1435818 HMNR7_VWA1 VWA1 GGCGCGGAGC GGCGCGGAGC 1 AR Neuronopathy, distal hereditary motor, autosomal recessive 7 chr1 57367043 57367121 SCA37_DAB1 DAB1 AAAAT AAATG 31 AD Spinocerebellar ataxia type 37 -chr1 94418421 94418444 OPDM5_ABCD3 ABCD3 GCC CCG 118 AD Oculopharyngodistal myopathy type 5 -chr1 149390802 149390842 NIID_NOTCH2NLC NOTCH2NLC GGC CGG 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 +chr1 94418421 94418444 OPDM5_ABCD3 ABCD3 CCG CCG 118 AD Oculopharyngodistal myopathy type 5 +chr1 149390802 149390842 NIID_NOTCH2NLC NOTCH2NLC CGG CGG 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 chr1 155188505 155192239 ADTKD_MUC1 MUC1 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA None AD Autosomal dominant tubulointerstitial kidney disease chr1 156591765 156591783 NME_NAXE NAXE GGGCC GGGCC 200 AR NAXE-related mitochondrial encephalopathy chr2 96197066 96197124 FAME2_STARD7 STARD7 AAAAT AAATG 274 AD Familial adult myoclonic epilepsy 2 -chr2 100104798 100104824 FRA2A_AFF3 AFF3 GCC CCG 300 AD Intellectual disability associated with fragile site FRA2A +chr2 100104798 100104824 FRA2A_AFF3 AFF3 CCG CCG 300 AD Intellectual disability associated with fragile site FRA2A chr2 176093058 176093103 SD5_HOXD13 HOXD13 GCN GCN 22 AD Syndactyly -chr2 190880872 190880920 GDPAG_GLS GLS GCA CAG 680 AR Glutaminase deficiency +chr2 190880872 190880920 GDPAG_GLS GLS CAG CAG 680 AR Glutaminase deficiency chr3 63912684 63912715 SCA7_ATXN7 ATXN7 CAG CAG 37 AD Spinocerebellar ataxia type 7 chr3 129172576 129172656 DM2_CNBP CNBP CAGG CAGG 75 AD Myotonic dystrophy type 2 -chr3 138946019 138946062 BPES_FOXL2 FOXL2 NGC GCN 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis +chr3 138946019 138946062 BPES_FOXL2 FOXL2 GCN GCN 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis chr3 183712187 183712226 FAME4_YEATS2 YEATS2 TTTTA TTTCA 1000 AD Familial adult myoclonic epilepsy 4 chr4 3074876 3074933 HD_HTT HTT CAG CAG 36 AD Huntington disease chr4 39348424 39348483 CANVAS_RFC1 RFC1 AAAAG AAGGG,ACAGG,AAAGG,AGGGC 400 AR Cerebellar ataxia, neuropathy, and vestibular areflexia syndrome chr4 41745972 41746032 CCHS_PHOX2B PHOX2B GCN GCN 26 AD Congenital central hypoventilation syndrome chr4 159342526 159342618 FAME7_RAPGEF2 RAPGEF2 TTTTA TTTCA 60 AD Familial adult myoclonic epilepsy type 7 chr5 10356343 10356411 FAME3_MARCHF6 MARCHF6 TTTTA TTTCA 650 AD Familial adult myoclonic epilepsy type 3 -chr5 146878727 146878759 SCA12_PPP2R2B PPP2R2B GCT CTG 51 AD Spinocerebellar ataxia type 12 -chr5 177554489 177554531 OPDM_FAM193B FAM193B GCC CCG 194 AD Oculopharyngodistal myopathy -chr6 13328476 13328603 OPDM_TBC1D7 TBC1D7 GCC CCG 83 AD Oculopharyngodistal myopathy +chr5 146878727 146878759 SCA12_PPP2R2B PPP2R2B CTG CTG 51 AD Spinocerebellar ataxia type 12 +chr5 177554489 177554531 OPDM_FAM193B FAM193B CCG CCG 194 AD Oculopharyngodistal myopathy +chr6 13328476 13328603 OPDM_TBC1D7 TBC1D7 CCG CCG 83 AD Oculopharyngodistal myopathy chr6 16327633 16327724 SCA1_ATXN1 ATXN1 CTG CTG 39 AD Spinocerebellar ataxia type 1 chr6 45422750 45422801 CCD_RUNX2 RUNX2 GCN GCN 20 AD Cleidocranial dysplasia chr6 170561906 170562017 SCA17_TBP TBP CAG CAG 49 AD Spinocerebellar ataxia type 17 -chr7 27199678 27199732 HFG_HOXA13-III HOXA13 NGC GCN 22 AD Hand-foot-genital syndrome 3 -chr7 27199825 27199861 HFG_HOXA13-II HOXA13 NGC GCN 18 AD Hand-foot-genital syndrome 2 -chr7 27199924 27199966 HFG_HOXA13-I HOXA13 NGC GCN 22 AD Hand-foot-genital syndrome 1 -chr7 55887600 55887639 FRA7A_ZNF713 ZNF713 GCG CGG 450 AD Autism spectrum disorder associated with fragile site FRA7A -chr8 104588970 104588999 OPDM1_LRP12 LRP12 CGC CCG 85 AD Oculopharyngodistal myopathy type 1 +chr7 27199678 27199732 HFG_HOXA13-III HOXA13 GCN GCN 22 AD Hand-foot-genital syndrome 3 +chr7 27199825 27199861 HFG_HOXA13-II HOXA13 GCN GCN 18 AD Hand-foot-genital syndrome 2 +chr7 27199924 27199966 HFG_HOXA13-I HOXA13 GCN GCN 22 AD Hand-foot-genital syndrome 1 +chr7 55887600 55887639 FRA7A_ZNF713 ZNF713 CGG CGG 450 AD Autism spectrum disorder associated with fragile site FRA7A +chr8 104588970 104588999 OPDM1_LRP12 LRP12 CCG CCG 85 AD Oculopharyngodistal myopathy type 1 chr8 118366812 118366918 FAME1_SAMD12 SAMD12 TAAAA AAATG 105 AD Familial adult myoclonic epilepsy type 1 chr9 27573484 27573546 FTDALS1_C9orf72 C9orf72 GGCCCC GGCCCC 31 AD Frontotemporal dementia (FTD) and/or amyotrophic lateral sclerosis (ALS) chr9 69037286 69037304 FRDA_FXN FXN GAA GAA 56 AR Friedreich ataxia -chr9 130681605 130681641 HSAN-VIII_PRDM12 PRDM12 GCC CCG 18 AR Hereditary sensory and autonomic neuropathy type VIII +chr9 130681605 130681641 HSAN-VIII_PRDM12 PRDM12 CCG CCG 18 AR Hereditary sensory and autonomic neuropathy type VIII chr9 133071177 133071737 MODY8_CEL CEL GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG None AD Maturity-Onset Diabetes of the Young Type 8 -chr10 79826383 79826404 OPML1_NUTM2B-AS1 NUTM2B-AS1 GGC CGG 161 AD Oculopharyngeal myopathy with leukoencephalopathy 1 +chr10 79826383 79826404 OPML1_NUTM2B-AS1 NUTM2B-AS1 CGG CGG 161 AD Oculopharyngeal myopathy with leukoencephalopathy 1 chr11 119206289 119206323 JBS_CBL CBL CGG CGG 101 AD Jacobsen syndrome (FRAX11B fragile site) chr12 6936716 6936775 DRPLA_ATN1 ATN1 CAG CAG 48 AD Dentatorubral-Pallidoluysian Atrophy -chr12 50505001 50505024 FRA12A_DIP2B DIP2B GGC CGG 273 AD Intellectual developmental disorder, FRA12A type +chr12 50505001 50505024 FRA12A_DIP2B DIP2B CGG CGG 273 AD Intellectual developmental disorder, FRA12A type chr12 111598949 111599019 SCA2_ATXN2 ATXN2 CTG CTG 35 AD,AR Spinocerebellar ataxia type 2 -chr12 123533720 123533750 OPDM4_RILPL1 RILPL1 GGC CGG 120 AD Oculopharyngodistal myopathy type 4 +chr12 123533720 123533750 OPDM4_RILPL1 RILPL1 CGG CGG 120 AD Oculopharyngodistal myopathy type 4 chr13 70139383 70139429 SCA8_ATXN8OS ATXN8OS CTG CTG 71 AD Spinocerebellar ataxia type 8 chr13 99985448 99985494 HPE5_ZIC2 ZIC2 GCN GCN 25 AD Holoprosencephaly-5 chr13 102161574 102161726 SCA27B_FGF14 FGF14 GAA GAA 320 AD Spinocerebellar ataxia 27B chr14 23321472 23321503 OPMD_PABPN1 PABPN1 GCN GCN 12 AD,AR Oculopharyngeal muscular dystrophy chr14 92071010 92071052 SCA3_ATXN3 ATXN3 CTG CTG 60 AD Spinocerebellar ataxia type 3/Machado-Joseph disease -chr15 22786677 22786703 ALS1_NIPA1 NIPA1 GCG CGG 11 AD Amyotrophic lateral sclerosis +chr15 22786677 22786703 ALS1_NIPA1 NIPA1 CGG CGG 11 AD Amyotrophic lateral sclerosis chr15 34419425 34419451 aFTLD-U_GOLGA8A GOLGA8A TTTC CT 190 Atypical frontotemporal lobar degeneration with ubiquitinated inclusions (aFTLD-U) chr15 88569433 88569452 CHNG3_MIR7-2 MIR7-2 TTTG TTTG 3 AD Nongoitrous congenital hypothyroidism-3 -chr15 89333588 89333629 CPEO_POLG POLG GCT CTG None Progressive external ophthalmoplegia, Parkinson's disease -chr16 17470907 17470922 DBQD2_XYLT1 XYLT1 GCC CCG 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 +chr15 89333588 89333629 CPEO_POLG POLG CTG CTG None Progressive external ophthalmoplegia, Parkinson's disease +chr16 17470907 17470922 DBQD2_XYLT1 XYLT1 CCG CCG 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 chr16 24613438 24613532 FAME6_TNRC6A TNRC6A TTTTA TTTCA 1100 AD Familial adult myoclonic epilepsy type 6 chr16 66490396 66490466 SCA31_BEAN1 BEAN1 AATAA TGGAA,TAGAA 110 AD Spinocerebellar ataxia type 31 chr16 67842862 67842950 SCA_THAP11 THAP11 CAG CAG 45 AD Spinocerebellar ataxia 51 -chr16 72787694 72787758 SCA4_ZFHX3 ZFHX3 GCC CCG 46 AD Spinocerebellar ataxia 4 +chr16 72787694 72787758 SCA4_ZFHX3 ZFHX3 CCG CCG 46 AD Spinocerebellar ataxia 4 chr16 87604282 87604329 HDL2_JPH3 JPH3 CTG CTG 40 AD Huntington disease-like 2 chr17 17808358 17808460 FAME8_RAI1 RAI1 TTTTA TTTCA 9 AD Familial adult myoclonic epilepsy type 8 chr17 80147009 80147139 RCPS_EIF4A3 EIF4A3 CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA 14 AR Richieri-Costa-Pereira syndrome @@ -70,12 +70,12 @@ chr21 43776442 43776479 EPM1_CSTB CSTB CGCGGGGCGGGG CGCGGGGCGGGG 30 AR Progressi chr22 19766762 19766807 TOF_TBX1 TBX1 GCN GCN 25 AD Tetralogy of Fallot chr22 38317282 38317375 EPM_CSNK1E CSNK1E CCG CCG 745 AR Progressive Myoclonic Epilepsy and Developmental and Epileptic Encephalopathy chr22 45795354 45795424 SCA10_ATXN10 ATXN10 ATTCT ATTCT 800 AD Spinocerebellar ataxia type 10 -chrX 25013529 25013565 PRTS_ARX ARX NGC GCN 20 XR Partington syndrome -chrX 25013649 25013697 EIEE1_ARX ARX NGC GCN 17 XR Early-infantile epileptic encephalopathy +chrX 25013529 25013565 PRTS_ARX ARX GCN GCN 20 XR Partington syndrome +chrX 25013649 25013697 EIEE1_ARX ARX GCN GCN 17 XR Early-infantile epileptic encephalopathy chrX 31284557 31284605 DMD_DMD DMD TTC TTC 59 XR Duchenne muscular dystrophy -chrX 67545316 67545419 SBMA_AR AR GCA CAG 38 XR Spinal and bulbar muscular atrophy, Kennedy Disease +chrX 67545316 67545419 SBMA_AR AR CAG CAG 38 XR Spinal and bulbar muscular atrophy, Kennedy Disease chrX 71453054 71453131 XDP_TAF1 TAF1 AGAGGG AGAGGG 35 XR X-linked dystonia-parkinsonism (XDP) a.k.a. Dystonia 3, torsion, X-linked (DYT3) chrX 137566826 137566856 VACTERLX_ZIC3 ZIC3 GCN GCN 12 XR X-linked VACTERL syndrome -chrX 140504316 140504361 XLID_SOX3 SOX3 NGC GCN 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) +chrX 140504316 140504361 XLID_SOX3 SOX3 GCN GCN 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) chrX 147912049 147912111 FXS_FMR1 FMR1 CGG CGG 201 XD Fragile X syndrome (FXS), fragile X-associated tremor/ataxia syndrome (FXTAS), and fragile X-associated primary ovarian insufficiency FXPOI/POF1 -chrX 148500604 148500753 FRAXE_AFF2 AFF2 GCC CCG 201 XR Intellectual developmental disorder, Fragile X intellectual disability +chrX 148500604 148500753 FRAXE_AFF2 AFF2 CCG CCG 201 XR Intellectual developmental disorder, Fragile X intellectual disability diff --git a/data/catalogs/STRchive-disease-loci.hg38.longTR.bed b/data/catalogs/STRchive-disease-loci.hg38.longTR.bed index d058a235..1cd0d50f 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.longTR.bed +++ b/data/catalogs/STRchive-disease-loci.hg38.longTR.bed @@ -1,58 +1,58 @@ chr1 1435799 1435818 GGCGCGGAGC HMNR7_VWA1 chr1 57367044 57367121 AAATG,AAAAT SCA37_DAB1 -chr1 94418422 94418444 CCG,GCC OPDM5_ABCD3 -chr1 149390803 149390842 CGG,GGC NIID_NOTCH2NLC +chr1 94418422 94418444 CCG OPDM5_ABCD3 +chr1 149390803 149390842 CGG NIID_NOTCH2NLC chr1 155188506 155192239 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ADTKD_MUC1 chr1 156591766 156591783 GGGCC NME_NAXE chr2 96197067 96197124 AAATG,AAAAT FAME2_STARD7 -chr2 100104799 100104824 CCG,GCC FRA2A_AFF3 +chr2 100104799 100104824 CCG FRA2A_AFF3 chr2 176093059 176093103 GCN SD5_HOXD13 -chr2 190880873 190880920 CAG,GCA GDPAG_GLS +chr2 190880873 190880920 CAG GDPAG_GLS chr3 63912685 63912715 CAG SCA7_ATXN7 chr3 129172577 129172656 CAGG DM2_CNBP -chr3 138946020 138946062 GCN,NGC BPES_FOXL2 +chr3 138946020 138946062 GCN BPES_FOXL2 chr3 183712188 183712226 TTTCA,TTTTA FAME4_YEATS2 chr4 3074877 3074933 CAG HD_HTT chr4 39348425 39348483 AAGGG,ACAGG,AAAGG,AGGGC,AAAAG,AAAGGG CANVAS_RFC1 chr4 41745973 41746032 GCN CCHS_PHOX2B chr4 159342527 159342618 TTTCA,TTTTA FAME7_RAPGEF2 chr5 10356344 10356411 TTTCA,TTTTA FAME3_MARCHF6 -chr5 146878728 146878759 CTG,GCT SCA12_PPP2R2B -chr5 177554490 177554531 CCG,GCC OPDM_FAM193B -chr6 13328477 13328603 CCG,GCC OPDM_TBC1D7 +chr5 146878728 146878759 CTG SCA12_PPP2R2B +chr5 177554490 177554531 CCG OPDM_FAM193B +chr6 13328477 13328603 CCG OPDM_TBC1D7 chr6 16327634 16327724 CTG SCA1_ATXN1 chr6 45422751 45422801 GCN CCD_RUNX2 chr6 170561907 170562017 CAG SCA17_TBP -chr7 27199679 27199732 GCN,NGC HFG_HOXA13-III -chr7 27199826 27199861 GCN,NGC HFG_HOXA13-II -chr7 27199925 27199966 GCN,NGC HFG_HOXA13-I -chr7 55887601 55887639 CGG,GCG FRA7A_ZNF713 -chr8 104588971 104588999 CCG,CGC OPDM1_LRP12 +chr7 27199679 27199732 GCN HFG_HOXA13-III +chr7 27199826 27199861 GCN HFG_HOXA13-II +chr7 27199925 27199966 GCN HFG_HOXA13-I +chr7 55887601 55887639 CGG FRA7A_ZNF713 +chr8 104588971 104588999 CCG OPDM1_LRP12 chr8 118366813 118366918 AAATG,TAAAA FAME1_SAMD12 chr9 27573485 27573546 GGCCCC FTDALS1_C9orf72 chr9 69037287 69037304 GAA FRDA_FXN -chr9 130681606 130681641 CCG,GCC HSAN-VIII_PRDM12 +chr9 130681606 130681641 CCG HSAN-VIII_PRDM12 chr9 133071178 133071737 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG MODY8_CEL -chr10 79826384 79826404 CGG,GGC OPML1_NUTM2B-AS1 +chr10 79826384 79826404 CGG OPML1_NUTM2B-AS1 chr11 119206290 119206323 CGG JBS_CBL chr12 6936717 6936775 CAG DRPLA_ATN1 -chr12 50505002 50505024 CGG,GGC FRA12A_DIP2B +chr12 50505002 50505024 CGG FRA12A_DIP2B chr12 111598950 111599019 CTG SCA2_ATXN2 -chr12 123533721 123533750 CGG,GGC OPDM4_RILPL1 +chr12 123533721 123533750 CGG OPDM4_RILPL1 chr13 70139384 70139429 CTG SCA8_ATXN8OS chr13 99985449 99985494 GCN HPE5_ZIC2 chr13 102161575 102161726 GAA,GGA,CAG SCA27B_FGF14 chr14 23321473 23321503 GCN OPMD_PABPN1 chr14 92071011 92071052 CTG SCA3_ATXN3 -chr15 22786678 22786703 CGG,GCG ALS1_NIPA1 +chr15 22786678 22786703 CGG ALS1_NIPA1 chr15 34419426 34419451 CT,CCTT,CCCTCT,TTTC aFTLD-U_GOLGA8A chr15 88569434 88569452 TTTG CHNG3_MIR7-2 -chr15 89333589 89333629 CTG,GCT CPEO_POLG -chr16 17470908 17470922 CCG,GCC DBQD2_XYLT1 +chr15 89333589 89333629 CTG CPEO_POLG +chr16 17470908 17470922 CCG DBQD2_XYLT1 chr16 24613439 24613532 TTTCA,TTTTA FAME6_TNRC6A chr16 66490397 66490466 TGGAA,TAGAA,AATAA SCA31_BEAN1 chr16 67842863 67842950 CAG SCA_THAP11 -chr16 72787695 72787758 CCG,GCC SCA4_ZFHX3 +chr16 72787695 72787758 CCG SCA4_ZFHX3 chr16 87604283 87604329 CTG HDL2_JPH3 chr17 17808359 17808460 TTTCA,TTTTA FAME8_RAI1 chr17 80147010 80147139 CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 @@ -69,12 +69,12 @@ chr21 43776443 43776479 CGCGGGGCGGGG EPM1_CSTB chr22 19766763 19766807 GCN TOF_TBX1 chr22 38317283 38317375 CCG EPM_CSNK1E chr22 45795355 45795424 ATTCT SCA10_ATXN10 -chrX 25013530 25013565 GCN,NGC PRTS_ARX -chrX 25013650 25013697 GCN,NGC EIEE1_ARX +chrX 25013530 25013565 GCN PRTS_ARX +chrX 25013650 25013697 GCN EIEE1_ARX chrX 31284558 31284605 TTC DMD_DMD -chrX 67545317 67545419 CAG,GCA SBMA_AR +chrX 67545317 67545419 CAG SBMA_AR chrX 71453055 71453131 AGAGGG XDP_TAF1 chrX 137566827 137566856 GCN VACTERLX_ZIC3 -chrX 140504317 140504361 GCN,NGC XLID_SOX3 +chrX 140504317 140504361 GCN XLID_SOX3 chrX 147912050 147912111 CGG FXS_FMR1 -chrX 148500605 148500753 CCG,GCC FRAXE_AFF2 +chrX 148500605 148500753 CCG FRAXE_AFF2 diff --git a/data/ref-alleles/ref-alleles.T2T-chm13.txt b/data/ref-alleles/ref-alleles.T2T-chm13.txt index bffc3a67..a894f3e5 100644 --- a/data/ref-alleles/ref-alleles.T2T-chm13.txt +++ b/data/ref-alleles/ref-alleles.T2T-chm13.txt @@ -11,16 +11,16 @@ CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC OPDM5_ABCD3 -chr1 94266544 94266567 GCC,CCG STRchive -chr1 94266544 94266567 CCG,GCC TRGT -CAGTAAGGTA GCC GCC GCC GCC GCC GCC GCC GC GTCCCCTCGC -CAGTAAGGTA GCC GCC GCC GCC GCC GCC GCC GC GTCCCCTCGC +chr1 94266544 94266567 CCG STRchive +chr1 94266544 94266567 CCG TRGT +CAGTAAGGTA G CCG CCG CCG CCG CCG CCG CCG C GTCCCCTCGC +CAGTAAGGTA G CCG CCG CCG CCG CCG CCG CCG C GTCCCCTCGC NIID_NOTCH2NLC -chr1 148519695 148519738 GGC,CGG STRchive -chr1 148519695 148519738 CGG,GGC TRGT -GATCTGCCCA GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC GGAGGA GGC GGC G ACCGAGAAGA -GATCTGCCCA GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC GGAGGA GGC GGC G ACCGAGAAGA +chr1 148519695 148519738 CGG STRchive +chr1 148519695 148519738 CGG TRGT +GATCTGCCCA GG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGGAGG CGG CG ACCGAGAAGA +GATCTGCCCA GG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGGAGG CGG CG ACCGAGAAGA ADTKD_MUC1 chr1 154328121 154330802 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA STRchive @@ -41,10 +41,10 @@ ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA GCCAAGCACA FRA2A_AFF3 -chr2 100563685 100563738 GCC,CCG STRchive -chr2 100563685 100563738 CCG,GCC TRGT -CGCTGCTGCA GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC GGTGCTCTGC -CGCTGCTGCA GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC GGTGCTCTGC +chr2 100563685 100563738 CCG STRchive +chr2 100563685 100563738 CCG TRGT +CGCTGCTGCA G CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C GGTGCTCTGC +CGCTGCTGCA G CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C GGTGCTCTGC SD5_HOXD13 chr2 176581179 176581224 GCN STRchive @@ -53,10 +53,10 @@ TTCGGGGCGG GCG GCG GCG GCG GCA GCG GCG GCT GCG GCG GCG GCA GCG GCA GCC TCCGGCTT TTCGGGGCGG GCG GCG GCG GCG GCA GCG GCG GCT GCG GCG GCG GCA GCG GCA GCC TCCGGCTTTG GDPAG_GLS -chr2 191369982 191370024 GCA,CAG STRchive -chr2 191369982 191370024 CAG,GCA TRGT -ATCCTAGCGC GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA CCCGCATCCG -ATCCTAGCGC GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA CCCGCATCCG +chr2 191369982 191370024 CAG STRchive +chr2 191369982 191370024 CAG TRGT +ATCCTAGCGC G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA CCCGCATCCG +ATCCTAGCGC G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA CCCGCATCCG SCA7_ATXN7 chr3 63956302 63956333 CAG STRchive @@ -71,10 +71,10 @@ GTGAGACAGA CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGG CAGC CAGG CAGG GTGAGACAGA CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGG CA GC CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGA CAGA CAGA CAGA CAGA CAGA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CTGG CA GTAAT ACTCATTCAC BPES_FOXL2 -chr3 141687011 141687054 NGC,GCN STRchive -chr3 141687011 141687054 GCN,NGC TRGT -CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG -CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG +chr3 141687011 141687054 GCN STRchive +chr3 141687011 141687054 GCN TRGT +CTACCGGGGC CC GCG GCT GCA GCC GCA GCT GCT GCA GCC GCT GCG GCT GCC GC CATCTGGCAG +CTACCGGGGC CC GCG GCT GCA GCC GCA GCT GCT GCA GCC GCT GCG GCT GCC GC CATCTGGCAG FAME4_YEATS2 chr3 186521667 186521706 TTTTA,TTTCA STRchive @@ -113,22 +113,22 @@ CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CCGAGATGGA SCA12_PPP2R2B -chr5 147414733 147414780 GCT,CTG STRchive -chr5 147414733 147414780 CTG,GCT TRGT -CGCACTCGCA GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC AGGAGGCTGG -CGCACTCGCA GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC AGGAGGCTGG +chr5 147414733 147414780 CTG STRchive +chr5 147414733 147414780 CTG TRGT +CGCACTCGCA G CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C AGGAGGCTGG +CGCACTCGCA G CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C AGGAGGCTGG OPDM_FAM193B -chr5 178096748 178096792 GCC,CCG STRchive -chr5 178096748 178096792 CCG,GCC TRGT -TCGCTCCACA C GCC GC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC TACCGCTCCC -TCGCTCCACA C GCC GC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC TACCGCTCCC +chr5 178096748 178096792 CCG STRchive +chr5 178096748 178096792 CCG TRGT +TCGCTCCACA CG CCG CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C TACCGCTCCC +TCGCTCCACA CG CCG CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C TACCGCTCCC OPDM_TBC1D7 -chr6 13201716 13201843 GCC,CCG STRchive -chr6 13201716 13201843 CCG,GCC TRGT -GCGCGCGCCC AGACG GCC CGGGAGACAAAACTCAGC GCC GCT GCC GCT GCC GCT GCC GCC GCC GCC GGACGTGACATCAACTCCAGGTC GCC GGGCGGGC GCC GGGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT -GCGCGCGCCC AGACG GCC CGGGAGACAAAACTCAGC GCC GCT GCC GCT GCC GCT GCC GCC GCC GCC GGACGTGACATCAACTCCAGGTC GCC GGGCGGGC GCC GGGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT +chr6 13201716 13201843 CCG STRchive +chr6 13201716 13201843 CCG TRGT +GCGCGCGCCC AGACGGC CCG GGAGACAAAACTCAGCG CCG CTG CCG CTG CCG CTG CCG CCG CCG CCG GACGTGACATCAACTCCAGGTCG CCG GGCGGGCG CCG GGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT +GCGCGCGCCC AGACGGC CCG GGAGACAAAACTCAGCG CCG CTG CCG CTG CCG CTG CCG CCG CCG CCG GACGTGACATCAACTCCAGGTCG CCG GGCGGGCG CCG GGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT SCA1_ATXN1 chr6 16200188 16200282 CTG STRchive @@ -149,34 +149,34 @@ AGCAACAAAG G CAG CAG CAG CAACAACAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG C AGCAACAAAG G CAG CAG CAG CAACAACAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA ACAGGCAGTG HFG_HOXA13-III -chr7 27335684 27335720 NGC,GCN STRchive -chr7 27335684 27335720 GCN,NGC TRGT -CCGAGGACGA CGC GGC GGC GGC GGC GGC GGC TGC AGC GGC GGC AGC CGACGGGGGC -CCGAGGACGA CGC GGC GGC GGC GGC GGC GGC TGC AGC GGC GGC AGC CGACGGGGGC +chr7 27335684 27335720 GCN STRchive +chr7 27335684 27335720 GCN TRGT +CCGAGGACGA C GCG GCG GCG GCG GCG GCG GCT GCA GCG GCG GCA GC CGACGGGGGC +CCGAGGACGA C GCG GCG GCG GCG GCG GCG GCT GCA GCG GCG GCA GC CGACGGGGGC HFG_HOXA13-II -chr7 27335813 27335849 NGC,GCN STRchive -chr7 27335813 27335849 GCN,NGC TRGT -GGCACTGGTT GGC CGC GGC CGC CGC CGC AGC CGC GGC CGC CGC CGC CACCGAGAAG -GGCACTGGTT GGC CGC GGC CGC CGC CGC AGC CGC GGC CGC CGC CGC CACCGAGAAG +chr7 27335813 27335849 GCN STRchive +chr7 27335813 27335849 GCN TRGT +GGCACTGGTT G GCC GCG GCC GCC GCC GCA GCC GCG GCC GCC GCC GC CACCGAGAAG +GGCACTGGTT G GCC GCG GCC GCC GCC GCA GCC GCG GCC GCC GCC GC CACCGAGAAG HFG_HOXA13-I -chr7 27335912 27335954 NGC,GCN STRchive -chr7 27335912 27335954 GCN,NGC TRGT -CCCCGGCCCC GGC AGC CGC CGC CGC TGC AGC CGC TGC TGC AGC CGC CGC CGC CCCTTCCATG -CCCCGGCCCC GGC AGC CGC CGC CGC TGC AGC CGC TGC TGC AGC CGC CGC CGC CCCTTCCATG +chr7 27335912 27335954 GCN STRchive +chr7 27335912 27335954 GCN TRGT +CCCCGGCCCC G GCA GCC GCC GCC GCT GCA GCC GCT GCT GCA GCC GCC GCC GC CCCTTCCATG +CCCCGGCCCC G GCA GCC GCC GCC GCT GCA GCC GCT GCT GCA GCC GCC GCC GC CCCTTCCATG FRA7A_ZNF713 -chr7 56047900 56047939 GCG,CGG STRchive -chr7 56047900 56047939 CGG,GCG TRGT -CACCGCGGCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG TCAGGGGGCG -CACCGCGGCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG TCAGGGGGCG +chr7 56047900 56047939 CGG STRchive +chr7 56047900 56047939 CGG TRGT +CACCGCGGCG G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CG TCAGGGGGCG +CACCGCGGCG G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CG TCAGGGGGCG OPDM1_LRP12 -chr8 105716409 105716441 CGC,CCG STRchive -chr8 105716409 105716441 CCG,CGC TRGT -AGGTAGACGA CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CG AGCCACCGGC -AGGTAGACGA CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CG AGCCACCGGC +chr8 105716409 105716441 CCG STRchive +chr8 105716409 105716441 CCG TRGT +AGGTAGACGA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG AGCCACCGGC +AGGTAGACGA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG AGCCACCGGC FAME1_SAMD12 chr8 119495247 119495353 TAAAA,AAATG STRchive @@ -197,10 +197,10 @@ AAAAATACAAAAAAAAAAAAAAAAAA GAA GAA GAA GAA GAA GAA GAA GAA GAA AAAAATACAA A A A A A A A A A A A A A A A A GAA GAA GAA GAA GAA GAA GAA GAA GAA AATAAAGAAA HSAN-VIII_PRDM12 -chr9 142886568 142886595 GCC,CCG STRchive -chr9 142886568 142886595 CCG,GCC TRGT -CGCCCGCGCT C GCC GCC GCC GCC GCC GCC GCC GCC GC GCACCACCTG -CGCCCGCGCT C GCC GCC GCC GCC GCC GCC GCC GCC GC GCACCACCTG +chr9 142886568 142886595 CCG STRchive +chr9 142886568 142886595 CCG TRGT +CGCCCGCGCT CG CCG CCG CCG CCG CCG CCG CCG CCG C GCACCACCTG +CGCCCGCGCT CG CCG CCG CCG CCG CCG CCG CCG CCG C GCACCACCTG MODY8_CEL chr9 145285333 145285861 GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG STRchive @@ -209,10 +209,10 @@ GAGGCCACCC CTGTGCCCCCCACAGGGGACTCCGAGGCCACTCCCGTGCCCCCCACGGGTGACTCCGAGACCGCCCCCG GAGGCCACCC CTGTGCCCCCCACAGGGGACTCCGAGGCCACTCCCGTGCCCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG CGCCCCCCCCGTGCCGCCCACGGGTGACGCCGGGCCCCCCCCCCGTGCCGCCCACGGGTGACTCCGGCGCCCCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCCGTGACCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCTGTGCCCCCCACGGGTGACTCTGAGGCTGCCCCTGTGCCCCCCACAGATGACTCCAAGGAAGCT CAGATGCCTG OPML1_NUTM2B-AS1 -chr10 80695718 80695748 GGC,CGG STRchive -chr10 80695718 80695748 CGG,GGC TRGT -GAAGCGGCGG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC CGGGAAGAAC -GAAGCGGCGG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC CGGGAAGAAC +chr10 80695718 80695748 CGG STRchive +chr10 80695718 80695748 CGG TRGT +GAAGCGGCGG GG CGG CGG CGG CGG CGG CGG CGG CGG CGG C CGGGAAGAAC +GAAGCGGCGG GG CGG CGG CGG CGG CGG CGG CGG CGG CGG C CGGGAAGAAC JBS_CBL chr11 119226662 119226696 CGG STRchive @@ -227,10 +227,10 @@ CCATCACCAC CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CA TCACGGAAAC CCATCACCAC CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CA TCACGGAAAC FRA12A_DIP2B -chr12 50468095 50468118 GGC,CGG STRchive -chr12 50468095 50468118 CGG,GGC TRGT -CTTTGCTCAT GGC GGC GGC GGC GGC GGC GGC GG TGCTGGTGGT -CTTTGCTCAT GGC GGC GGC GGC GGC GGC GGC GG TGCTGGTGGT +chr12 50468095 50468118 CGG STRchive +chr12 50468095 50468118 CGG TRGT +CTTTGCTCAT GG CGG CGG CGG CGG CGG CGG CGG TGCTGGTGGT +CTTTGCTCAT GG CGG CGG CGG CGG CGG CGG CGG TGCTGGTGGT SCA2_ATXN2 chr12 111575873 111575940 CTG STRchive @@ -239,10 +239,10 @@ GCGGGCGGCG G CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTG CTG CTG TTG CTG CTG CTG GCGGGCGGCG G CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTG CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG CTG GGGCTTCAGC OPDM4_RILPL1 -chr12 123532573 123532603 GGC,CGG STRchive -chr12 123532573 123532603 CGG,GGC TRGT -CTCCCGAGTG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC AGCGGGGAGG -CTCCCGAGTG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC AGCGGGGAGG +chr12 123532573 123532603 CGG STRchive +chr12 123532573 123532603 CGG TRGT +CTCCCGAGTG GG CGG CGG CGG CGG CGG CGG CGG CGG CGG C AGCGGGGAGG +CTCCCGAGTG GG CGG CGG CGG CGG CGG CGG CGG CGG CGG C AGCGGGGAGG SCA8_ATXN8OS chr13 69361243 69361270 CTG STRchive @@ -275,10 +275,10 @@ ATAGGTCCCC CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C ATAGGTCCCC CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTTTTG CTG CTG TCTGAAACAT ALS1_NIPA1 -chr15 20458510 20458536 GCG,CGG STRchive -chr15 20458510 20458536 CGG,GCG TRGT -AGCTGCGGCA GCG GCG GCG GCG GCG GCG GCG GCG GC CGGGGAGGGG -AGCTGCGGCA GCG GCG GCG GCG GCG GCG GCG GCG GC CGGGGAGGGG +chr15 20458510 20458536 CGG STRchive +chr15 20458510 20458536 CGG TRGT +AGCTGCGGCA G CGG CGG CGG CGG CGG CGG CGG CGG C CGGGGAGGGG +AGCTGCGGCA G CGG CGG CGG CGG CGG CGG CGG CGG C CGGGGAGGGG aFTLD-U_GOLGA8A chr15 32225152 32225178 TTTC,CT STRchive @@ -293,15 +293,15 @@ ATTGTTTTTC TTTG TTTG TTTG TTTG TTT TTCTTTGAGA ATTGTTTTTC TTTG TTTG TTTG TTTG TTT TTCTTTGAGA CPEO_POLG -chr15 87088411 87088452 GCT,CTG STRchive +chr15 87088411 87088452 CTG STRchive chr15 87088402 87088452 GCT,GTT,CTG TRGT -AGCACTTGCGGCTGCTGAG GCT GCT GTT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC CGCCGCCGCT +AGCACTTGCGGCTGCTGAG G CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C CGCCGCCGCT AGCACTTGCG GCT GCT GAG GCT GCT GTT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC CGCCGCCGCT DBQD2_XYLT1 -chr16 17477909 17478002 GCC,CCG STRchive +chr16 17477909 17478002 CCG STRchive chr16 17477909 17478002 GCC,CCG TRGT -TCCCGCTCGG GCC GCC GCC GCC GCC GCC GCC TCGGCTC GCC GCTGCTCCTCCT CCG CCG CCG CCG CCG CCG CCG CCG CCG CT GCC GCC GCC GCC GCC GCC GCC TCCACCGCCG +TCCCGCTCGG G CCG CCG CCG CCG CCG CCG CCTCGGCTCG CCG CTGCTCCTCCT CCG CCG CCG CCG CCG CCG CCG CCG CCG CTG CCG CCG CCG CCG CCG CCG CC TCCACCGCCG TCCCGCTCGG GCC GCC GCC GCC GCC GCC GCC TCGGCTC GCC GCTGCTCCTCCT CCG CCG CCG CCG CCG CCG CCG CCG CCG CT GCC GCC GCC GCC GCC GCC GCC TCCACCGCCG FAME6_TNRC6A @@ -323,10 +323,10 @@ CCCGCCGCAG G CAG CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAA CAG CAG CAG CAG CCCGCCGCAG G CAG CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG TCCTCACCCT SCA4_ZFHX3 -chr16 78605502 78605569 GCC,CCG STRchive -chr16 78605502 78605569 CCG,GCC TRGT -AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC GCC A CCG CCG CCG CCG CCG CCACT GCC A CCG CCG CCG CCG CCG GTGGGGACGT -AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC GCC A CCG CCG CCG CCG CCG CCACT GCC A CCG CCG CCG CCG CCG GTGGGGACGT +chr16 78605502 78605569 CCG STRchive +chr16 78605502 78605569 CCG TRGT +AGTGGTACGA G CCG CCG CCG CCG CCG CCG CCG CCG CCA CCG CCG CCG CCG CCG CCACTGCCA CCG CCG CCG CCG CCG GTGGGGACGT +AGTGGTACGA G CCG CCG CCG CCG CCG CCG CCG CCG CCA CCG CCG CCG CCG CCG CCACTGCCA CCG CCG CCG CCG CCG GTGGGGACGT HDL2_JPH3 chr16 93675723 93675776 CTG STRchive @@ -425,16 +425,16 @@ ACTAGAATGG ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATT ACTAGAATGG ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT TTTTGAGATG PRTS_ARX -chrX 24597766 24597802 NGC,GCN STRchive -chrX 24597766 24597802 GCN,NGC TRGT -GCGTGTCCCA GGC CGC GGC GGC CGC GGC CGC GGC TGC CGC GGC GGC CCCTGCGCCG -GCGTGTCCCA GGC CGC GGC GGC CGC GGC CGC GGC TGC CGC GGC GGC CCCTGCGCCG +chrX 24597766 24597802 GCN STRchive +chrX 24597766 24597802 GCN TRGT +GCGTGTCCCA G GCC GCG GCG GCC GCG GCC GCG GCT GCC GCG GCG GC CCCTGCGCCG +GCGTGTCCCA G GCC GCG GCG GCC GCG GCC GCG GCT GCC GCG GCG GC CCCTGCGCCG EIEE1_ARX -chrX 24597886 24597934 NGC,GCN STRchive -chrX 24597886 24597934 GCN,NGC TRGT -CCGTGGCCGT GGC GGC CGC TGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC TGC CGC ACCCTGAAGG -CCGTGGCCGT GGC GGC CGC TGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC TGC CGC ACCCTGAAGG +chrX 24597886 24597934 GCN STRchive +chrX 24597886 24597934 GCN TRGT +CCGTGGCCGT G GCG GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GC ACCCTGAAGG +CCGTGGCCGT G GCG GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GC ACCCTGAAGG DMD_DMD chrX 30882677 30882743 TTC STRchive @@ -443,10 +443,10 @@ AACGAACTGT TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC T AACGAACTGT TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC T T T T T T T T GGCAGAGGTG SBMA_AR -chrX 65975147 65975250 GCA,CAG STRchive -chrX 65975147 65975250 CAG,GCA TRGT -TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCC CAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG -TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCC CAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG +chrX 65975147 65975250 CAG STRchive +chrX 65975147 65975250 CAG TRGT +TGCTGCTGCT G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAAGAGACTAGCCC CAG G CAG CAG CAG CAG CAG CAG GGTGAGGATG +TGCTGCTGCT G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAAGAGACTAGCCC CAG G CAG CAG CAG CAG CAG CAG GGTGAGGATG XDP_TAF1 chrX 69887153 69887230 AGAGGG STRchive @@ -461,10 +461,10 @@ CTCAACCCAC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC TTCAAGCTGA CTCAACCCAC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC TTCAAGCTGA XLID_SOX3 -chrX 138816203 138816248 NGC,GCN STRchive -chrX 138816203 138816248 GCN,NGC TRGT -CCGGACTGCT GGC GGC AGC GGC TGC GGC CGC GGC AGC GGC GGC GGC GGC CGC GGC ACCGGGAGGC -CCGGACTGCT GGC GGC AGC GGC TGC GGC CGC GGC AGC GGC GGC GGC GGC CGC GGC ACCGGGAGGC +chrX 138816203 138816248 GCN STRchive +chrX 138816203 138816248 GCN TRGT +CCGGACTGCT G GCG GCA GCG GCT GCG GCC GCG GCA GCG GCG GCG GCG GCC GCG GC ACCGGGAGGC +CCGGACTGCT G GCG GCA GCG GCT GCG GCC GCG GCA GCG GCG GCG GCG GCC GCG GC ACCGGGAGGC FXS_FMR1 chrX 146176677 146176769 CGG STRchive @@ -473,8 +473,8 @@ GTGCGGCAGC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG GTGCGGCAGC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG CGG CGG CGG C TGGGCCTCGA FRAXE_AFF2 -chrX 146765190 146765342 GCC,CCG STRchive -chrX 146765190 146765342 CCG,GCC TRGT -GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCG GCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC C CCG CCG CCG CT GCC GCC GCC G GCCCGCAGCC -GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCG GCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC C CCG CCG CCG CT GCC GCC GCC G GCCCGCAGCC +chrX 146765190 146765342 CCG STRchive +chrX 146765190 146765342 CCG TRGT +GTGTGATGCT G CCG CGG CCG CCG CCG CCG CCTGTGCAG CCG CTG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CTG CCG CC CCG GCTG CCG CG CCG CG CCG CTGCCTCTGCC CCG G CCG CCC CCG CCG CCG CTG CCG CCG CCG GCCCGCAGCC +GTGTGATGCT G CCG CGG CCG CCG CCG CCG CCTGTGCAG CCG CTG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CTG CCG CC CCG GCTG CCG CG CCG CG CCG CTGCCTCTGCC CCG G CCG CCC CCG CCG CCG CTG CCG CCG CCG GCCCGCAGCC diff --git a/data/ref-alleles/ref-alleles.hg19.txt b/data/ref-alleles/ref-alleles.hg19.txt index 9100dfb3..b81c1127 100644 --- a/data/ref-alleles/ref-alleles.hg19.txt +++ b/data/ref-alleles/ref-alleles.hg19.txt @@ -11,16 +11,16 @@ CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC OPDM5_ABCD3 -chr1 94883977 94884000 GCC,CCG STRchive -chr1 94883977 94884000 CCG,GCC TRGT -CAGTAAGGTA GCC GCC GCC GCC GCC GCC GCC GC GTCCCCTCGC -CAGTAAGGTA GCC GCC GCC GCC GCC GCC GCC GC GTCCCCTCGC +chr1 94883977 94884000 CCG STRchive +chr1 94883977 94884000 CCG TRGT +CAGTAAGGTA G CCG CCG CCG CCG CCG CCG CCG C GTCCCCTCGC +CAGTAAGGTA G CCG CCG CCG CCG CCG CCG CCG C GTCCCCTCGC NIID_NOTCH2NLC -chr1 145209323 145209354 GGC,CGG STRchive -chr1 145209323 145209354 CGG,GGC TRGT -GATCTGCCCA GGC GGC GGC GGC GGC GGC GGC GGAGGA GGC G ACCGAGAAGA -GATCTGCCCA GGC GGC GGC GGC GGC GGC GGC GGAGGA GGC G ACCGAGAAGA +chr1 145209323 145209354 CGG STRchive +chr1 145209323 145209354 CGG TRGT +GATCTGCCCA GG CGG CGG CGG CGG CGG CGG CGG AGGAGGCG ACCGAGAAGA +GATCTGCCCA GG CGG CGG CGG CGG CGG CGG CGG AGGAGGCG ACCGAGAAGA ADTKD_MUC1 chr1 155160981 155162030 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA STRchive @@ -41,10 +41,10 @@ ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA GCCAAGCACA FRA2A_AFF3 -chr2 100721260 100721286 GCC,CCG STRchive -chr2 100721260 100721286 CCG,GCC TRGT -CGCTGCTGCA GCC GCC GCC GCC GCC GCC GCC GCC GC GGTGCTCTGC -CGCTGCTGCA GCC GCC GCC GCC GCC GCC GCC GCC GC GGTGCTCTGC +chr2 100721260 100721286 CCG STRchive +chr2 100721260 100721286 CCG TRGT +CGCTGCTGCA G CCG CCG CCG CCG CCG CCG CCG CCG C GGTGCTCTGC +CGCTGCTGCA G CCG CCG CCG CCG CCG CCG CCG CCG C GGTGCTCTGC SD5_HOXD13 chr2 176957786 176957831 GCN STRchive @@ -53,10 +53,10 @@ TTCGGGGCGG GCG GCG GCG GCG GCA GCG GCG GCT GCG GCG GCG GCG GCG GCA GCC TCCGGCTT TTCGGGGCGG GCG GCG GCG GCG GCA GCG GCG GCT GCG GCG GCG GCG GCG GCA GCC TCCGGCTTTG GDPAG_GLS -chr2 191745598 191745646 GCA,CAG STRchive -chr2 191745598 191745646 CAG,GCA TRGT -ATCCTAGCGC GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA CCCGCATCCG -ATCCTAGCGC GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA CCCGCATCCG +chr2 191745598 191745646 CAG STRchive +chr2 191745598 191745646 CAG TRGT +ATCCTAGCGC G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA CCCGCATCCG +ATCCTAGCGC G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA CCCGCATCCG SCA7_ATXN7 chr3 63898360 63898391 CAG STRchive @@ -71,10 +71,10 @@ GTGAGACAGA CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGG CAGA CAGG CAGC CAGG CAGG GTGAGACAGA CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGG CAGA CAGG CA GC CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGA CAGA CAGA CAGA CAGA CAGA CAGA CAGA CAGA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CT GGCAGTAATA BPES_FOXL2 -chr3 138664861 138664904 NGC,GCN STRchive -chr3 138664861 138664904 GCN,NGC TRGT -CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG -CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG +chr3 138664861 138664904 GCN STRchive +chr3 138664861 138664904 GCN TRGT +CTACCGGGGC CC GCG GCT GCA GCC GCA GCT GCT GCA GCC GCT GCG GCT GCC GC CATCTGGCAG +CTACCGGGGC CC GCG GCT GCA GCC GCA GCT GCT GCA GCC GCT GCG GCT GCC GC CATCTGGCAG FAME4_YEATS2 chr3 183429975 183430014 TTTTA,TTTCA STRchive @@ -113,22 +113,22 @@ CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CCGAGATGGA SCA12_PPP2R2B -chr5 146258290 146258322 GCT,CTG STRchive -chr5 146258290 146258322 CTG,GCT TRGT -CGCACTCGCA GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC AGGAGGCTGG -CGCACTCGCA GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC AGGAGGCTGG +chr5 146258290 146258322 CTG STRchive +chr5 146258290 146258322 CTG TRGT +CGCACTCGCA G CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C AGGAGGCTGG +CGCACTCGCA G CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C AGGAGGCTGG OPDM_FAM193B -chr5 176981490 176981532 GCC,CCG STRchive -chr5 176981490 176981532 CCG,GCC TRGT -TCGCTCCACA C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC TACCGCTCCC -TCGCTCCACA C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC TACCGCTCCC +chr5 176981490 176981532 CCG STRchive +chr5 176981490 176981532 CCG TRGT +TCGCTCCACA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C TACCGCTCCC +TCGCTCCACA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C TACCGCTCCC OPDM_TBC1D7 -chr6 13328708 13328835 GCC,CCG STRchive -chr6 13328708 13328835 CCG,GCC TRGT -GCGCGCGCCC AGACG GCC CGGGAGACAAAACTCAGC GCC GCT GCC GCT GCC GCT GCC GCC GCC GCC GGACGTGACATCAACTCCAGGTC GCC GGGCGGGC GCC GGGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT -GCGCGCGCCC AGACG GCC CGGGAGACAAAACTCAGC GCC GCT GCC GCT GCC GCT GCC GCC GCC GCC GGACGTGACATCAACTCCAGGTC GCC GGGCGGGC GCC GGGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT +chr6 13328708 13328835 CCG STRchive +chr6 13328708 13328835 CCG TRGT +GCGCGCGCCC AGACGGC CCG GGAGACAAAACTCAGCG CCG CTG CCG CTG CCG CTG CCG CCG CCG CCG GACGTGACATCAACTCCAGGTCG CCG GGCGGGCG CCG GGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT +GCGCGCGCCC AGACGGC CCG GGAGACAAAACTCAGCG CCG CTG CCG CTG CCG CTG CCG CCG CCG CCG GACGTGACATCAACTCCAGGTCG CCG GGCGGGCG CCG GGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT SCA1_ATXN1 chr6 16327864 16327955 CTG STRchive @@ -149,34 +149,34 @@ AGCAACAAAG G CAG CAG CAG CAACAACAA CAG CAG CAG CAG CAG CAG CAG CAG CAA CAG CAA C AGCAACAAAG G CAG CAG CAG CAACAACAA CAG CAG CAG CAG CAG CAG CAG CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA ACAGGCAGTG HFG_HOXA13-III -chr7 27239297 27239351 NGC,GCN STRchive -chr7 27239297 27239351 GCN,NGC TRGT -CCGAGGACGA CGC GGC GGC GGC GGC GGC GGC TGC AGC GGC AGC CGC GGC AGC AGC GGC GGC AGC CGACGGGGGC -CCGAGGACGA CGC GGC GGC GGC GGC GGC GGC TGC AGC GGC AGC CGC GGC AGC AGC GGC GGC AGC CGACGGGGGC +chr7 27239297 27239351 GCN STRchive +chr7 27239297 27239351 GCN TRGT +CCGAGGACGA C GCG GCG GCG GCG GCG GCG GCT GCA GCG GCA GCC GCG GCA GCA GCG GCG GCA GC CGACGGGGGC +CCGAGGACGA C GCG GCG GCG GCG GCG GCG GCT GCA GCG GCA GCC GCG GCA GCA GCG GCG GCA GC CGACGGGGGC HFG_HOXA13-II -chr7 27239444 27239480 NGC,GCN STRchive -chr7 27239444 27239480 GCN,NGC TRGT -GGCACTGGTT GGC CGC GGC CGC CGC CGC AGC CGC GGC CGC CGC CGC CACCGAGAAG -GGCACTGGTT GGC CGC GGC CGC CGC CGC AGC CGC GGC CGC CGC CGC CACCGAGAAG +chr7 27239444 27239480 GCN STRchive +chr7 27239444 27239480 GCN TRGT +GGCACTGGTT G GCC GCG GCC GCC GCC GCA GCC GCG GCC GCC GCC GC CACCGAGAAG +GGCACTGGTT G GCC GCG GCC GCC GCC GCA GCC GCG GCC GCC GCC GC CACCGAGAAG HFG_HOXA13-I -chr7 27239543 27239585 NGC,GCN STRchive -chr7 27239543 27239585 GCN,NGC TRGT -CCCCGGCCCC GGC AGC CGC CGC CGC TGC AGC CGC TGC TGC AGC CGC CGC CGC CCCTTCCATG -CCCCGGCCCC GGC AGC CGC CGC CGC TGC AGC CGC TGC TGC AGC CGC CGC CGC CCCTTCCATG +chr7 27239543 27239585 GCN STRchive +chr7 27239543 27239585 GCN TRGT +CCCCGGCCCC G GCA GCC GCC GCC GCT GCA GCC GCT GCT GCA GCC GCC GCC GC CCCTTCCATG +CCCCGGCCCC G GCA GCC GCC GCC GCT GCA GCC GCT GCT GCA GCC GCC GCC GC CCCTTCCATG FRA7A_ZNF713 -chr7 55955293 55955332 GCG,CGG STRchive -chr7 55955293 55955332 CGG,GCG TRGT -CGGGTCCACC GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG TCAGGGGGCG -CGGGTCCACC GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG TCAGGGGGCG +chr7 55955293 55955332 CGG STRchive +chr7 55955293 55955332 CGG TRGT +CGGGTCCACC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CG TCAGGGGGCG +CGGGTCCACC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CG TCAGGGGGCG OPDM1_LRP12 -chr8 105601198 105601227 CGC,CCG STRchive -chr8 105601198 105601227 CCG,CGC TRGT -ACGACGCCGA CGC CGC CGC CGC CGC CGC CGC CGC CGC CG AGCCACCGGC -ACGACGCCGA CGC CGC CGC CGC CGC CGC CGC CGC CGC CG AGCCACCGGC +chr8 105601198 105601227 CCG STRchive +chr8 105601198 105601227 CCG TRGT +ACGACGCCGA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG AGCCACCGGC +ACGACGCCGA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG AGCCACCGGC FAME1_SAMD12 chr8 119379051 119379157 TAAAA,AAATG STRchive @@ -197,10 +197,10 @@ TAAAAAATACAAAAAAAAAAAAAAAA GAA GAA GAA GAA GAA GAA AATAAAGAAA TAAAAAATAC A A A A A A A A A A A A A A A A GAA GAA GAA GAA GAA GAA AATAAAGAAA HSAN-VIII_PRDM12 -chr9 133556992 133557028 GCC,CCG STRchive -chr9 133556992 133557028 CCG,GCC TRGT -CGCCCGCGCT C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC GCACCACCTG -CGCCCGCGCT C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC GCACCACCTG +chr9 133556992 133557028 CCG STRchive +chr9 133556992 133557028 CCG TRGT +CGCCCGCGCT CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C GCACCACCTG +CGCCCGCGCT CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C GCACCACCTG MODY8_CEL chr9 135946564 135947124 GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG STRchive @@ -209,10 +209,10 @@ GAGGCCACCC CTGTGCCCCCCACAGGGGACTCCGAGGCCACTCCCGTGCCCCCCACGGGTGACTCCGAGACCGCCCCCG GAGGCCACCC CTGTGCCCCCCACAGGGGACTCCGAGGCCACTCCCGTGCCCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG CGCCCCCCCCGTGCCGCCCACGGGTGACGCCGGGCCCCCCCCCGTGCCGCCCACGGGTGACTCCGGCGCCCCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCCGTGACCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCTGTGCCCCCCACGGGTGACTCTGAGGCTGCCCCTGTGCCCCCCACAGATGACTCCAAGGAAGCT CAGATGCCTG OPML1_NUTM2B-AS1 -chr10 81586139 81586160 GGC,CGG STRchive -chr10 81586139 81586160 CGG,GGC TRGT -GAAGCGGCGG GGC GGC GGC GGC GGC GGC GGC CGGGAAGAAC -GAAGCGGCGG GGC GGC GGC GGC GGC GGC GGC CGGGAAGAAC +chr10 81586139 81586160 CGG STRchive +chr10 81586139 81586160 CGG TRGT +GAAGCGGCGG GG CGG CGG CGG CGG CGG CGG C CGGGAAGAAC +GAAGCGGCGG GG CGG CGG CGG CGG CGG CGG C CGGGAAGAAC JBS_CBL chr11 119076999 119077033 CGG STRchive @@ -227,10 +227,10 @@ CCATCACCAC CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG C CCATCACCAC CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA TCACGGAAAC FRA12A_DIP2B -chr12 50898784 50898807 GGC,CGG STRchive -chr12 50898784 50898807 CGG,GGC TRGT -CTTTGCTCAT GGC GGC GGC GGC GGC GGC GGC GG TGCTGGTGGT -CTTTGCTCAT GGC GGC GGC GGC GGC GGC GGC GG TGCTGGTGGT +chr12 50898784 50898807 CGG STRchive +chr12 50898784 50898807 CGG TRGT +CTTTGCTCAT GG CGG CGG CGG CGG CGG CGG CGG TGCTGGTGGT +CTTTGCTCAT GG CGG CGG CGG CGG CGG CGG CGG TGCTGGTGGT SCA2_ATXN2 chr12 112036753 112036823 CTG STRchive @@ -239,10 +239,10 @@ GCGGGCGGCG G CTG CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG GCGGGCGGCG G CTG CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG GGGCTTCAGC OPDM4_RILPL1 -chr12 124018267 124018297 GGC,CGG STRchive -chr12 124018267 124018297 CGG,GGC TRGT -CTCCCGAGTG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC AGCGGGGAGG -CTCCCGAGTG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC AGCGGGGAGG +chr12 124018267 124018297 CGG STRchive +chr12 124018267 124018297 CGG TRGT +CTCCCGAGTG GG CGG CGG CGG CGG CGG CGG CGG CGG CGG C AGCGGGGAGG +CTCCCGAGTG GG CGG CGG CGG CGG CGG CGG CGG CGG CGG C AGCGGGGAGG SCA8_ATXN8OS chr13 70713515 70713561 CTG STRchive @@ -275,8 +275,8 @@ ATAGGTCCCC CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTTTTG CTG CTG TCTGAAACAT ATAGGTCCCC CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTTTTG CTG CTG TCTGAAACAT ALS1_NIPA1 -chr15 23086363 23086389 GCG,CGG STRchive -chr15 23086363 23086389 CGG,GCG TRGT +chr15 23086363 23086389 CGG STRchive +chr15 23086363 23086389 CGG TRGT CCCCCTCCCC GGCCGCCGCCGCCGCCGCCGCCGCCG CTGCCGCAGC CCCCCTCCCC GGCCGCCGCCGCCGCCGCCGCCGCCG CTGCCGCAGC @@ -293,15 +293,15 @@ ATTGTTTTTC TTTG TTTG TTTG TTTG TTT TTCTTTGAGA ATTGTTTTTC TTTG TTTG TTTG TTTG TTT TTCTTTGAGA CPEO_POLG -chr15 89876819 89876860 GCT,CTG STRchive +chr15 89876819 89876860 CTG STRchive chr15 89876810 89876860 GCT,GTT,CTG TRGT -AGCACTTGCGGCTGCTGAG GCT GCT GTT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC CGCCGCCGCT +AGCACTTGCGGCTGCTGAG G CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C CGCCGCCGCT AGCACTTGCG GCT GCT GAG GCT GCT GTT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC CGCCGCCGCT DBQD2_XYLT1 -chr16 17564764 17564779 GCC,CCG STRchive +chr16 17564764 17564779 CCG STRchive chr16 17564764 17564779 GCC,CCG TRGT -TCCCGCTCGG GCC GCC GCC GCC GCC CCCCTCCCCA +TCCCGCTCGG G CCG CCG CCG CCG CC CCCCTCCCCA TCCCGCTCGG GCC GCC GCC GCC GCC CCCCTCCCCA FAME6_TNRC6A @@ -323,10 +323,10 @@ CCCGCCGCAG G CAG CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAA CAG CAG CAG CAG CCCGCCGCAG G CAG CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG TCCTCACCCT SCA4_ZFHX3 -chr16 72821593 72821657 GCC,CCG STRchive -chr16 72821593 72821657 CCG,GCC TRGT -AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC A CCG CCG CCG CCG CCG CCACT GCC A CCG CCG CCG CCG CCG GTGGGGACGT -AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC A CCG CCG CCG CCG CCG CCACT GCC A CCG CCG CCG CCG CCG GTGGGGACGT +chr16 72821593 72821657 CCG STRchive +chr16 72821593 72821657 CCG TRGT +AGTGGTACGA G CCG CCG CCG CCG CCG CCG CCG CCA CCG CCG CCG CCG CCG CCACTGCCA CCG CCG CCG CCG CCG GTGGGGACGT +AGTGGTACGA G CCG CCG CCG CCG CCG CCG CCG CCA CCG CCG CCG CCG CCG CCACTGCCA CCG CCG CCG CCG CCG GTGGGGACGT HDL2_JPH3 chr16 87637888 87637935 CTG STRchive @@ -425,16 +425,16 @@ ACTAGAATGG ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATT ACTAGAATGG ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT TTTTGAGATG PRTS_ARX -chrX 25031646 25031682 NGC,GCN STRchive -chrX 25031646 25031682 GCN,NGC TRGT -GCGTGTCCCA GGC CGC GGC GGC CGC GGC CGC GGC TGC CGC GGC GGC CCCTGCGCCG -GCGTGTCCCA GGC CGC GGC GGC CGC GGC CGC GGC TGC CGC GGC GGC CCCTGCGCCG +chrX 25031646 25031682 GCN STRchive +chrX 25031646 25031682 GCN TRGT +GCGTGTCCCA G GCC GCG GCG GCC GCG GCC GCG GCT GCC GCG GCG GC CCCTGCGCCG +GCGTGTCCCA G GCC GCG GCG GCC GCG GCC GCG GCT GCC GCG GCG GC CCCTGCGCCG EIEE1_ARX -chrX 25031766 25031814 NGC,GCN STRchive -chrX 25031766 25031814 GCN,NGC TRGT -CCGTGGCCGT GGC GGC CGC TGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC TGC CGC ACCCTGAAGG -CCGTGGCCGT GGC GGC CGC TGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC TGC CGC ACCCTGAAGG +chrX 25031766 25031814 GCN STRchive +chrX 25031766 25031814 GCN TRGT +CCGTGGCCGT G GCG GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GC ACCCTGAAGG +CCGTGGCCGT G GCG GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GC ACCCTGAAGG DMD_DMD chrX 31302674 31302722 TTC STRchive @@ -443,10 +443,10 @@ AACGAACTGT TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC AACGAACTGT TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC T T T T T T T T GGCAGAGGTG SBMA_AR -chrX 66765158 66765261 GCA,CAG STRchive -chrX 66765158 66765261 CAG,GCA TRGT -TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCC CAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG -TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCC CAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG +chrX 66765158 66765261 CAG STRchive +chrX 66765158 66765261 CAG TRGT +TGCTGCTGCT G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAAGAGACTAGCCC CAG G CAG CAG CAG CAG CAG CAG GGTGAGGATG +TGCTGCTGCT G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAAGAGACTAGCCC CAG G CAG CAG CAG CAG CAG CAG GGTGAGGATG XDP_TAF1 chrX 70672904 70672981 AGAGGG STRchive @@ -461,10 +461,10 @@ CTCAACCCAC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC TTCAAGCTGA CTCAACCCAC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC TTCAAGCTGA XLID_SOX3 -chrX 139586481 139586526 NGC,GCN STRchive -chrX 139586481 139586526 GCN,NGC TRGT -CCGGACTGCT GGC GGC AGC GGC TGC GGC CGC GGC AGC GGC GGC GGC GGC CGC GGC ACCGGGAGGC -CCGGACTGCT GGC GGC AGC GGC TGC GGC CGC GGC AGC GGC GGC GGC GGC CGC GGC ACCGGGAGGC +chrX 139586481 139586526 GCN STRchive +chrX 139586481 139586526 GCN TRGT +CCGGACTGCT G GCG GCA GCG GCT GCG GCC GCG GCA GCG GCG GCG GCG GCC GCG GC ACCGGGAGGC +CCGGACTGCT G GCG GCA GCG GCT GCG GCC GCG GCA GCG GCG GCG GCG GCC GCG GC ACCGGGAGGC FXS_FMR1 chrX 146993567 146993629 CGG STRchive @@ -473,8 +473,8 @@ GTGCGGCAGC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG GTGCGGCAGC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG CGG CGG CGG C TGGGCCTCGA FRAXE_AFF2 -chrX 147582124 147582273 GCC,CCG STRchive -chrX 147582124 147582273 CCG,GCC TRGT -GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCG GCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC C CCG CCG CCG CT GCC GCC GCC G GCCCGCAGCC -GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCG GCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC C CCG CCG CCG CT GCC GCC GCC G GCCCGCAGCC +chrX 147582124 147582273 CCG STRchive +chrX 147582124 147582273 CCG TRGT +GTGTGATGCT G CCG CGG CCG CCG CCG CCG CCTGTGCAG CCG CTG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CTG CCG CC CCG GCTG CCG CG CCG CG CCG CTGCCTCTGCC CCG G CCG CCC CCG CCG CCG CTG CCG CCG CCG GCCCGCAGCC +GTGTGATGCT G CCG CGG CCG CCG CCG CCG CCTGTGCAG CCG CTG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CTG CCG CC CCG GCTG CCG CG CCG CG CCG CTGCCTCTGCC CCG G CCG CCC CCG CCG CCG CTG CCG CCG CCG GCCCGCAGCC diff --git a/data/ref-alleles/ref-alleles.hg38.txt b/data/ref-alleles/ref-alleles.hg38.txt index 7fe83a7b..58d9169d 100644 --- a/data/ref-alleles/ref-alleles.hg38.txt +++ b/data/ref-alleles/ref-alleles.hg38.txt @@ -11,16 +11,16 @@ CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC OPDM5_ABCD3 -chr1 94418421 94418444 GCC,CCG STRchive -chr1 94418421 94418444 CCG,GCC TRGT -CAGTAAGGTA GCC GCC GCC GCC GCC GCC GCC GC GTCCCCTCGC -CAGTAAGGTA GCC GCC GCC GCC GCC GCC GCC GC GTCCCCTCGC +chr1 94418421 94418444 CCG STRchive +chr1 94418421 94418444 CCG TRGT +CAGTAAGGTA G CCG CCG CCG CCG CCG CCG CCG C GTCCCCTCGC +CAGTAAGGTA G CCG CCG CCG CCG CCG CCG CCG C GTCCCCTCGC NIID_NOTCH2NLC -chr1 149390802 149390842 GGC,CGG STRchive -chr1 149390802 149390842 CGG,GGC TRGT -GATCTGCCCA GGC GGC GGC GGC GGC GGC GGC GGC GGC GGAGGA GGC GGC G ACCGAGAAGA -GATCTGCCCA GGC GGC GGC GGC GGC GGC GGC GGC GGC GGAGGA GGC GGC G ACCGAGAAGA +chr1 149390802 149390842 CGG STRchive +chr1 149390802 149390842 CGG TRGT +GATCTGCCCA GG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGGAGG CGG CG ACCGAGAAGA +GATCTGCCCA GG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGGAGG CGG CG ACCGAGAAGA ADTKD_MUC1 chr1 155188505 155192239 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA STRchive @@ -41,10 +41,10 @@ ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA GCCAAGCACA FRA2A_AFF3 -chr2 100104798 100104824 GCC,CCG STRchive -chr2 100104798 100104824 CCG,GCC TRGT -CGCTGCTGCA GCC GCC GCC GCC GCC GCC GCC GCC GC GGTGCTCTGC -CGCTGCTGCA GCC GCC GCC GCC GCC GCC GCC GCC GC GGTGCTCTGC +chr2 100104798 100104824 CCG STRchive +chr2 100104798 100104824 CCG TRGT +CGCTGCTGCA G CCG CCG CCG CCG CCG CCG CCG CCG C GGTGCTCTGC +CGCTGCTGCA G CCG CCG CCG CCG CCG CCG CCG CCG C GGTGCTCTGC SD5_HOXD13 chr2 176093058 176093103 GCN STRchive @@ -53,10 +53,10 @@ TTCGGGGCGG GCG GCG GCG GCG GCA GCG GCG GCT GCG GCG GCG GCG GCG GCA GCC TCCGGCTT TTCGGGGCGG GCG GCG GCG GCG GCA GCG GCG GCT GCG GCG GCG GCG GCG GCA GCC TCCGGCTTTG GDPAG_GLS -chr2 190880872 190880920 GCA,CAG STRchive -chr2 190880872 190880920 CAG,GCA TRGT -ATCCTAGCGC GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA CCCGCATCCG -ATCCTAGCGC GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA CCCGCATCCG +chr2 190880872 190880920 CAG STRchive +chr2 190880872 190880920 CAG TRGT +ATCCTAGCGC G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA CCCGCATCCG +ATCCTAGCGC G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA CCCGCATCCG SCA7_ATXN7 chr3 63912684 63912715 CAG STRchive @@ -71,10 +71,10 @@ GTGAGACAGA CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGG CAGA CAGG CAGC CAGG CAGG GTGAGACAGA CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGG CAGA CAGG CA GC CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGA CAGA CAGA CAGA CAGA CAGA CAGA CAGA CAGA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CT GGCAGTAATA BPES_FOXL2 -chr3 138946019 138946062 NGC,GCN STRchive -chr3 138946019 138946062 GCN,NGC TRGT -CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG -CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG +chr3 138946019 138946062 GCN STRchive +chr3 138946019 138946062 GCN TRGT +CTACCGGGGC CC GCG GCT GCA GCC GCA GCT GCT GCA GCC GCT GCG GCT GCC GC CATCTGGCAG +CTACCGGGGC CC GCG GCT GCA GCC GCA GCT GCT GCA GCC GCT GCG GCT GCC GC CATCTGGCAG FAME4_YEATS2 chr3 183712187 183712226 TTTTA,TTTCA STRchive @@ -113,22 +113,22 @@ CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CCGAGATGGA SCA12_PPP2R2B -chr5 146878727 146878759 GCT,CTG STRchive -chr5 146878727 146878759 CTG,GCT TRGT -CGCACTCGCA GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC AGGAGGCTGG -CGCACTCGCA GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC AGGAGGCTGG +chr5 146878727 146878759 CTG STRchive +chr5 146878727 146878759 CTG TRGT +CGCACTCGCA G CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C AGGAGGCTGG +CGCACTCGCA G CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C AGGAGGCTGG OPDM_FAM193B -chr5 177554489 177554531 GCC,CCG STRchive -chr5 177554489 177554531 CCG,GCC TRGT -TCGCTCCACA C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC TACCGCTCCC -TCGCTCCACA C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC TACCGCTCCC +chr5 177554489 177554531 CCG STRchive +chr5 177554489 177554531 CCG TRGT +TCGCTCCACA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C TACCGCTCCC +TCGCTCCACA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C TACCGCTCCC OPDM_TBC1D7 -chr6 13328476 13328603 GCC,CCG STRchive -chr6 13328476 13328603 CCG,GCC TRGT -GCGCGCGCCC AGACG GCC CGGGAGACAAAACTCAGC GCC GCT GCC GCT GCC GCT GCC GCC GCC GCC GGACGTGACATCAACTCCAGGTC GCC GGGCGGGC GCC GGGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT -GCGCGCGCCC AGACG GCC CGGGAGACAAAACTCAGC GCC GCT GCC GCT GCC GCT GCC GCC GCC GCC GGACGTGACATCAACTCCAGGTC GCC GGGCGGGC GCC GGGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT +chr6 13328476 13328603 CCG STRchive +chr6 13328476 13328603 CCG TRGT +GCGCGCGCCC AGACGGC CCG GGAGACAAAACTCAGCG CCG CTG CCG CTG CCG CTG CCG CCG CCG CCG GACGTGACATCAACTCCAGGTCG CCG GGCGGGCG CCG GGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT +GCGCGCGCCC AGACGGC CCG GGAGACAAAACTCAGCG CCG CTG CCG CTG CCG CTG CCG CCG CCG CCG GACGTGACATCAACTCCAGGTCG CCG GGCGGGCG CCG GGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT SCA1_ATXN1 chr6 16327633 16327724 CTG STRchive @@ -149,34 +149,34 @@ AGCAACAAAG G CAG CAG CAG CAACAACAA CAG CAG CAG CAG CAG CAG CAG CAG CAA CAG CAA C AGCAACAAAG G CAG CAG CAG CAACAACAA CAG CAG CAG CAG CAG CAG CAG CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA ACAGGCAGTG HFG_HOXA13-III -chr7 27199678 27199732 NGC,GCN STRchive -chr7 27199678 27199732 GCN,NGC TRGT -CCGAGGACGA CGC GGC GGC GGC GGC GGC GGC TGC AGC GGC AGC CGC GGC AGC AGC GGC GGC AGC CGACGGGGGC -CCGAGGACGA CGC GGC GGC GGC GGC GGC GGC TGC AGC GGC AGC CGC GGC AGC AGC GGC GGC AGC CGACGGGGGC +chr7 27199678 27199732 GCN STRchive +chr7 27199678 27199732 GCN TRGT +CCGAGGACGA C GCG GCG GCG GCG GCG GCG GCT GCA GCG GCA GCC GCG GCA GCA GCG GCG GCA GC CGACGGGGGC +CCGAGGACGA C GCG GCG GCG GCG GCG GCG GCT GCA GCG GCA GCC GCG GCA GCA GCG GCG GCA GC CGACGGGGGC HFG_HOXA13-II -chr7 27199825 27199861 NGC,GCN STRchive -chr7 27199825 27199861 GCN,NGC TRGT -GGCACTGGTT GGC CGC GGC CGC CGC CGC AGC CGC GGC CGC CGC CGC CACCGAGAAG -GGCACTGGTT GGC CGC GGC CGC CGC CGC AGC CGC GGC CGC CGC CGC CACCGAGAAG +chr7 27199825 27199861 GCN STRchive +chr7 27199825 27199861 GCN TRGT +GGCACTGGTT G GCC GCG GCC GCC GCC GCA GCC GCG GCC GCC GCC GC CACCGAGAAG +GGCACTGGTT G GCC GCG GCC GCC GCC GCA GCC GCG GCC GCC GCC GC CACCGAGAAG HFG_HOXA13-I -chr7 27199924 27199966 NGC,GCN STRchive -chr7 27199924 27199966 GCN,NGC TRGT -CCCCGGCCCC GGC AGC CGC CGC CGC TGC AGC CGC TGC TGC AGC CGC CGC CGC CCCTTCCATG -CCCCGGCCCC GGC AGC CGC CGC CGC TGC AGC CGC TGC TGC AGC CGC CGC CGC CCCTTCCATG +chr7 27199924 27199966 GCN STRchive +chr7 27199924 27199966 GCN TRGT +CCCCGGCCCC G GCA GCC GCC GCC GCT GCA GCC GCT GCT GCA GCC GCC GCC GC CCCTTCCATG +CCCCGGCCCC G GCA GCC GCC GCC GCT GCA GCC GCT GCT GCA GCC GCC GCC GC CCCTTCCATG FRA7A_ZNF713 -chr7 55887600 55887639 GCG,CGG STRchive -chr7 55887600 55887639 CGG,GCG TRGT -CGGGTCCACC GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG TCAGGGGGCG -CGGGTCCACC GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG TCAGGGGGCG +chr7 55887600 55887639 CGG STRchive +chr7 55887600 55887639 CGG TRGT +CGGGTCCACC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CG TCAGGGGGCG +CGGGTCCACC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CG TCAGGGGGCG OPDM1_LRP12 -chr8 104588970 104588999 CGC,CCG STRchive -chr8 104588970 104588999 CCG,CGC TRGT -ACGACGCCGA CGC CGC CGC CGC CGC CGC CGC CGC CGC CG AGCCACCGGC -ACGACGCCGA CGC CGC CGC CGC CGC CGC CGC CGC CGC CG AGCCACCGGC +chr8 104588970 104588999 CCG STRchive +chr8 104588970 104588999 CCG TRGT +ACGACGCCGA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG AGCCACCGGC +ACGACGCCGA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG AGCCACCGGC FAME1_SAMD12 chr8 118366812 118366918 TAAAA,AAATG STRchive @@ -197,10 +197,10 @@ TAAAAAATACAAAAAAAAAAAAAAAA GAA GAA GAA GAA GAA GAA AATAAAGAAA TAAAAAATAC A A A A A A A A A A A A A A A A GAA GAA GAA GAA GAA GAA AATAAAGAAA HSAN-VIII_PRDM12 -chr9 130681605 130681641 GCC,CCG STRchive -chr9 130681605 130681641 CCG,GCC TRGT -CGCCCGCGCT C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC GCACCACCTG -CGCCCGCGCT C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC GCACCACCTG +chr9 130681605 130681641 CCG STRchive +chr9 130681605 130681641 CCG TRGT +CGCCCGCGCT CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C GCACCACCTG +CGCCCGCGCT CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C GCACCACCTG MODY8_CEL chr9 133071177 133071737 GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG STRchive @@ -209,10 +209,10 @@ GAGGCCACCC CTGTGCCCCCCACAGGGGACTCCGAGGCCACTCCCGTGCCCCCCACGGGTGACTCCGAGACCGCCCCCG GAGGCCACCC CTGTGCCCCCCACAGGGGACTCCGAGGCCACTCCCGTGCCCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG CGCCCCCCCCGTGCCGCCCACGGGTGACGCCGGGCCCCCCCCCGTGCCGCCCACGGGTGACTCCGGCGCCCCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCCGTGACCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCTGTGCCCCCCACGGGTGACTCTGAGGCTGCCCCTGTGCCCCCCACAGATGACTCCAAGGAAGCT CAGATGCCTG OPML1_NUTM2B-AS1 -chr10 79826383 79826404 GGC,CGG STRchive -chr10 79826383 79826404 CGG,GGC TRGT -GAAGCGGCGG GGC GGC GGC GGC GGC GGC GGC CGGGAAGAAC -GAAGCGGCGG GGC GGC GGC GGC GGC GGC GGC CGGGAAGAAC +chr10 79826383 79826404 CGG STRchive +chr10 79826383 79826404 CGG TRGT +GAAGCGGCGG GG CGG CGG CGG CGG CGG CGG C CGGGAAGAAC +GAAGCGGCGG GG CGG CGG CGG CGG CGG CGG C CGGGAAGAAC JBS_CBL chr11 119206289 119206323 CGG STRchive @@ -227,10 +227,10 @@ CCATCACCAC CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG C CCATCACCAC CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA TCACGGAAAC FRA12A_DIP2B -chr12 50505001 50505024 GGC,CGG STRchive -chr12 50505001 50505024 CGG,GGC TRGT -CTTTGCTCAT GGC GGC GGC GGC GGC GGC GGC GG TGCTGGTGGT -CTTTGCTCAT GGC GGC GGC GGC GGC GGC GGC GG TGCTGGTGGT +chr12 50505001 50505024 CGG STRchive +chr12 50505001 50505024 CGG TRGT +CTTTGCTCAT GG CGG CGG CGG CGG CGG CGG CGG TGCTGGTGGT +CTTTGCTCAT GG CGG CGG CGG CGG CGG CGG CGG TGCTGGTGGT SCA2_ATXN2 chr12 111598949 111599019 CTG STRchive @@ -239,10 +239,10 @@ GCGGGCGGCG G CTG CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG GCGGGCGGCG G CTG CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG GGGCTTCAGC OPDM4_RILPL1 -chr12 123533720 123533750 GGC,CGG STRchive -chr12 123533720 123533750 CGG,GGC TRGT -CTCCCGAGTG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC AGCGGGGAGG -CTCCCGAGTG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC AGCGGGGAGG +chr12 123533720 123533750 CGG STRchive +chr12 123533720 123533750 CGG TRGT +CTCCCGAGTG GG CGG CGG CGG CGG CGG CGG CGG CGG CGG C AGCGGGGAGG +CTCCCGAGTG GG CGG CGG CGG CGG CGG CGG CGG CGG CGG C AGCGGGGAGG SCA8_ATXN8OS chr13 70139383 70139429 CTG STRchive @@ -275,10 +275,10 @@ ATAGGTCCCC CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTTTTG CTG CTG TCTGAAACAT ATAGGTCCCC CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTTTTG CTG CTG TCTGAAACAT ALS1_NIPA1 -chr15 22786677 22786703 GCG,CGG STRchive -chr15 22786677 22786703 CGG,GCG TRGT -AGCTGCGGCA GCG GCG GCG GCG GCG GCG GCG GCG GC CGGGGAGGGG -AGCTGCGGCA GCG GCG GCG GCG GCG GCG GCG GCG GC CGGGGAGGGG +chr15 22786677 22786703 CGG STRchive +chr15 22786677 22786703 CGG TRGT +AGCTGCGGCA G CGG CGG CGG CGG CGG CGG CGG CGG C CGGGGAGGGG +AGCTGCGGCA G CGG CGG CGG CGG CGG CGG CGG CGG C CGGGGAGGGG aFTLD-U_GOLGA8A chr15 34419425 34419451 TTTC,CT STRchive @@ -293,15 +293,15 @@ ATTGTTTTTC TTTG TTTG TTTG TTTG TTT TTCTTTGAGA ATTGTTTTTC TTTG TTTG TTTG TTTG TTT TTCTTTGAGA CPEO_POLG -chr15 89333588 89333629 GCT,CTG STRchive +chr15 89333588 89333629 CTG STRchive chr15 89333579 89333629 GCT,GTT,CTG TRGT -AGCACTTGCGGCTGCTGAG GCT GCT GTT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC CGCCGCCGCT +AGCACTTGCGGCTGCTGAG G CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C CGCCGCCGCT AGCACTTGCG GCT GCT GAG GCT GCT GTT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC CGCCGCCGCT DBQD2_XYLT1 -chr16 17470907 17470922 GCC,CCG STRchive +chr16 17470907 17470922 CCG STRchive chr16 17470907 17470922 GCC,CCG TRGT -TCCCGCTCGG GCC GCC GCC GCC GCC CCCCTCCCCA +TCCCGCTCGG G CCG CCG CCG CCG CC CCCCTCCCCA TCCCGCTCGG GCC GCC GCC GCC GCC CCCCTCCCCA FAME6_TNRC6A @@ -323,10 +323,10 @@ CCCGCCGCAG G CAG CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAA CAG CAG CAG CAG CCCGCCGCAG G CAG CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG TCCTCACCCT SCA4_ZFHX3 -chr16 72787694 72787758 GCC,CCG STRchive -chr16 72787694 72787758 CCG,GCC TRGT -AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC A CCG CCG CCG CCG CCG CCACT GCC A CCG CCG CCG CCG CCG GTGGGGACGT -AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC A CCG CCG CCG CCG CCG CCACT GCC A CCG CCG CCG CCG CCG GTGGGGACGT +chr16 72787694 72787758 CCG STRchive +chr16 72787694 72787758 CCG TRGT +AGTGGTACGA G CCG CCG CCG CCG CCG CCG CCG CCA CCG CCG CCG CCG CCG CCACTGCCA CCG CCG CCG CCG CCG GTGGGGACGT +AGTGGTACGA G CCG CCG CCG CCG CCG CCG CCG CCA CCG CCG CCG CCG CCG CCACTGCCA CCG CCG CCG CCG CCG GTGGGGACGT HDL2_JPH3 chr16 87604282 87604329 CTG STRchive @@ -425,16 +425,16 @@ ACTAGAATGG ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATT ACTAGAATGG ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT TTTTGAGATG PRTS_ARX -chrX 25013529 25013565 NGC,GCN STRchive -chrX 25013529 25013565 GCN,NGC TRGT -GCGTGTCCCA GGC CGC GGC GGC CGC GGC CGC GGC TGC CGC GGC GGC CCCTGCGCCG -GCGTGTCCCA GGC CGC GGC GGC CGC GGC CGC GGC TGC CGC GGC GGC CCCTGCGCCG +chrX 25013529 25013565 GCN STRchive +chrX 25013529 25013565 GCN TRGT +GCGTGTCCCA G GCC GCG GCG GCC GCG GCC GCG GCT GCC GCG GCG GC CCCTGCGCCG +GCGTGTCCCA G GCC GCG GCG GCC GCG GCC GCG GCT GCC GCG GCG GC CCCTGCGCCG EIEE1_ARX -chrX 25013649 25013697 NGC,GCN STRchive -chrX 25013649 25013697 GCN,NGC TRGT -CCGTGGCCGT GGC GGC CGC TGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC TGC CGC ACCCTGAAGG -CCGTGGCCGT GGC GGC CGC TGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC TGC CGC ACCCTGAAGG +chrX 25013649 25013697 GCN STRchive +chrX 25013649 25013697 GCN TRGT +CCGTGGCCGT G GCG GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GC ACCCTGAAGG +CCGTGGCCGT G GCG GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GC ACCCTGAAGG DMD_DMD chrX 31284557 31284605 TTC STRchive @@ -443,10 +443,10 @@ AACGAACTGT TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC AACGAACTGT TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC T T T T T T T T GGCAGAGGTG SBMA_AR -chrX 67545316 67545419 GCA,CAG STRchive -chrX 67545316 67545419 CAG,GCA TRGT -TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCC CAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG -TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCC CAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG +chrX 67545316 67545419 CAG STRchive +chrX 67545316 67545419 CAG TRGT +TGCTGCTGCT G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAAGAGACTAGCCC CAG G CAG CAG CAG CAG CAG CAG GGTGAGGATG +TGCTGCTGCT G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAAGAGACTAGCCC CAG G CAG CAG CAG CAG CAG CAG GGTGAGGATG XDP_TAF1 chrX 71453054 71453131 AGAGGG STRchive @@ -461,10 +461,10 @@ CTCAACCCAC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC TTCAAGCTGA CTCAACCCAC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC TTCAAGCTGA XLID_SOX3 -chrX 140504316 140504361 NGC,GCN STRchive -chrX 140504316 140504361 GCN,NGC TRGT -CCGGACTGCT GGC GGC AGC GGC TGC GGC CGC GGC AGC GGC GGC GGC GGC CGC GGC ACCGGGAGGC -CCGGACTGCT GGC GGC AGC GGC TGC GGC CGC GGC AGC GGC GGC GGC GGC CGC GGC ACCGGGAGGC +chrX 140504316 140504361 GCN STRchive +chrX 140504316 140504361 GCN TRGT +CCGGACTGCT G GCG GCA GCG GCT GCG GCC GCG GCA GCG GCG GCG GCG GCC GCG GC ACCGGGAGGC +CCGGACTGCT G GCG GCA GCG GCT GCG GCC GCG GCA GCG GCG GCG GCG GCC GCG GC ACCGGGAGGC FXS_FMR1 chrX 147912049 147912111 CGG STRchive @@ -473,8 +473,8 @@ GTGCGGCAGC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG GTGCGGCAGC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG CGG CGG CGG C TGGGCCTCGA FRAXE_AFF2 -chrX 148500604 148500753 GCC,CCG STRchive -chrX 148500604 148500753 CCG,GCC TRGT -GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCG GCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC C CCG CCG CCG CT GCC GCC GCC G GCCCGCAGCC -GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCG GCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC C CCG CCG CCG CT GCC GCC GCC G GCCCGCAGCC +chrX 148500604 148500753 CCG STRchive +chrX 148500604 148500753 CCG TRGT +GTGTGATGCT G CCG CGG CCG CCG CCG CCG CCTGTGCAG CCG CTG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CTG CCG CC CCG GCTG CCG CG CCG CG CCG CTGCCTCTGCC CCG G CCG CCC CCG CCG CCG CTG CCG CCG CCG GCCCGCAGCC +GTGTGATGCT G CCG CGG CCG CCG CCG CCG CCTGTGCAG CCG CTG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CTG CCG CC CCG GCTG CCG CG CCG CG CCG CTGCCTCTGCC CCG G CCG CCC CCG CCG CCG CTG CCG CCG CCG GCCCGCAGCC From 9ddb2f667dbf59342336c226657ef55997828cc3 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Wed, 27 May 2026 15:19:03 -0600 Subject: [PATCH 05/16] normalize motifs based on gene orientation, and overwrite ref orientation motif if needed --- scripts/check-loci.py | 163 +++++++++++++++++++++--------------------- 1 file changed, 80 insertions(+), 83 deletions(-) diff --git a/scripts/check-loci.py b/scripts/check-loci.py index d801634c..c7977497 100644 --- a/scripts/check-loci.py +++ b/scripts/check-loci.py @@ -104,7 +104,32 @@ def circular_permuted(x): modified_sequences.extend([x[i:] + x[:i] for i in range(n)]) return modified_sequences -standard_motif_arrangements = [ +def normalise_str(in_dna): + """ + Args: + in_dna (sequence) + Returns: + the normalized output of the string + Find all possible equivalent STR sequences and return the first alphabetically for each + >>> normalise_str('ATAG') + 'AGAT' + >>> normalise_str('NGC') + 'CNG' + >>> normalise_str('AAG') + 'AAG' + """ + if in_dna is None or len(in_dna) == 0: + return '' + + all_possible = [] + for permuted_seq in circular_permuted(in_dna): + all_possible.append(permuted_seq) + + return min(all_possible) + +# Canonical motif reported in the literature, typically in the gene orientation. When there +# are equivalent motifs that are circular permutations of each other, use the canonical one. +CANONICAL_MOTIFS = [ "CAG", "CCG", "CGG", @@ -113,89 +138,87 @@ def circular_permuted(x): "TTTCA", "AAATG", ] - -def standardise_reference_motif(motif): +def standardise_motif(motif): """ Args: motif (str) Returns: str: motif rewritten to the preferred standard arrangement if possible - >>> standardise_reference_motif('GCC') + >>> standardise_motif('GCC') 'CCG' - >>> standardise_reference_motif('CGC') + >>> standardise_motif('CGC') 'CCG' - >>> standardise_reference_motif('CAG') + >>> standardise_motif('CAG') 'CAG' - >>> standardise_reference_motif('XYZ') + >>> standardise_motif('XYZ') 'XYZ' + >>> assert len(set([min(circular_permuted(motif)) for motif in CANONICAL_MOTIFS])) == len(CANONICAL_MOTIFS), f"Canonical motifs {CANONICAL_MOTIFS} are not all unique circular permutations" """ if motif is None or len(motif) == 0: return motif motif = motif.upper() - for standard_motif in standard_motif_arrangements: - standard_motif = standard_motif.upper() + for canonical_motif in CANONICAL_MOTIFS: + canonical_motif = canonical_motif.upper() - if len(motif) != len(standard_motif): + if len(motif) != len(canonical_motif): continue - if standard_motif in circular_permuted(motif): - return standard_motif + if canonical_motif in circular_permuted(motif): + return canonical_motif - return motif + return normalise_str(motif) -def normalise_str(in_dna): - """ - Args: - in_dna (sequence) - Returns: - the normalized output of the string - Find all possible equivalent STR sequences and return the first alphabetically for each - >>> normalise_str('ATAG') - 'AGAT' - >>> normalise_str('NGC') - 'CNG' - >>> normalise_str('AAG') - 'AAG' +def get_other_motif(reference_motif, gene_motif, gene_strand): """ - if in_dna is None or len(in_dna) == 0: - return '' + If only one of reference_motif or gene_motif is provided, infer the other from the gene strand. If both are provided, check that they are consistent with each other and the gene strand, and if they are inconsistent update the ref motif to match the gene motif. - all_possible = [] - for permuted_seq in circular_permuted(in_dna): - all_possible.append(permuted_seq) - - return min(all_possible) - -def get_new_motif(reference_motif, gene_strand): - """ Args: reference_motif (string) + gene_motif (string) gene_strand: either + or - Returns: - motif in gene orientation + (reference_motif, gene_motif) + + If gene_strand is +, gene orientation copies reference orientation. If gene_strand is -, gene orientation is the reverse complement of reference orientation. - >>> get_new_motif('CCG', '+') - 'CCG' - >>> get_new_motif('CCG', '-') - 'CGG' - >>> get_new_motif('CAG', '-') - 'CTG' - >>> get_new_motif('TAG', 'plus') + >>> get_other_motif('CCG', None, '+') + ('CCG', 'CCG') + >>> get_other_motif('CCG', None, '-') + ('CCG', 'CGG') + >>> get_other_motif('CAG', None, '-') + ('CAG', 'CTG') + >>> get_other_motif('TAG', None, 'plus') Traceback (most recent call last): ... AssertionError: Gene strand plus is not +/- """ - if gene_strand == "+": - return reference_motif - elif gene_strand == "-": - seq = Seq(reference_motif) - return str(seq.reverse_complement()) - else: - raise AssertionError(f'Gene strand {gene_strand} is not +/-') - + # If gene motif is missing, infer it from the reference motif and gene strand + if gene_motif is None or gene_motif == "" and reference_motif is not None and reference_motif != "": + if gene_strand == "+": + return reference_motif, reference_motif + elif gene_strand == "-": + seq = Seq(reference_motif) + return reference_motif, str(seq.reverse_complement()) + else: + raise AssertionError(f'Gene strand {gene_strand} is not +/-') + # Check the gene_motif against the canonical motifs + gene_motif = standardise_motif(gene_motif) + + # Infer the reference motif from the gene motif and gene strand + if gene_motif is not None and gene_motif != "": + if gene_strand == "+": + return gene_motif, gene_motif + elif gene_strand == "-": + seq = Seq(gene_motif) + return str(seq.reverse_complement()), gene_motif + else: + raise AssertionError(f'Gene strand {gene_strand} is not +/-') + + return reference_motif, gene_motif + def check_motif_orientation(record): """ Args: @@ -213,15 +236,15 @@ def check_motif_orientation(record): ] for ref_field, gene_field in field_pairs: + # If one in the pair is missing, infer it from the other. + # If both are present, ensure that they are consistent with each other and the gene strand, and update them if not. Gene motif will overwrite ref motif if they are inconsistent. + if record[ref_field] is None: continue - # 1. Standardize reference orientation old_ref_motifs = record[ref_field] - new_ref_motifs = [ - standardise_reference_motif(motif) - for motif in old_ref_motifs - ] + old_gene_motifs = record[gene_field] + new_ref_motifs, new_gene_motifs = get_other_motif(old_ref_motifs, old_gene_motifs, record['gene_strand']) if old_ref_motifs != new_ref_motifs: for old_motif, new_motif in zip(old_ref_motifs, new_ref_motifs): @@ -229,16 +252,8 @@ def check_motif_orientation(record): sys.stderr.write( f"Updating {record['id']} {ref_field} from {old_motif} to {new_motif}\n" ) - record[ref_field] = new_ref_motifs - # 2. Recompute gene orientation from the standardized reference orientation - old_gene_motifs = record[gene_field] - new_gene_motifs = [ - get_new_motif(motif, record['gene_strand']) - for motif in record[ref_field] - ] - if old_gene_motifs != new_gene_motifs: for old_motif, new_motif in zip(old_gene_motifs, new_gene_motifs): if old_motif != new_motif: @@ -246,24 +261,6 @@ def check_motif_orientation(record): f"Updating {record['id']} {gene_field} from {old_motif} to {new_motif}\n" ) record[gene_field] = new_gene_motifs - #3. Standardize reference repeat - if record['reference_motif_reference_orientation'] is not None: - old_reference_motifs = record['reference_motif_reference_orientation'] - new_reference_motifs = [ - standardise_reference_motif(motif) - for motif in old_reference_motifs - ] - - if old_reference_motifs != new_reference_motifs: - for old_motif, new_motif in zip(old_reference_motifs, new_reference_motifs): - if old_motif != new_motif: - sys.stderr.write( - f"Updating {record['id']} reference_motif_reference_orientation from {old_motif} to {new_motif}\n" - ) - - record['reference_motif_reference_orientation'] = new_reference_motifs - - # Replace locus_structure with a string of the motifs in reference orientation if record['locus_structure'] is None: From c1a6cc20783035963a306b8dadca3eed8f60c3a5 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Wed, 27 May 2026 15:20:57 -0600 Subject: [PATCH 06/16] go back to old data --- data/STRchive-loci.json | 266 ++++++++++++++++++++-------------------- 1 file changed, 133 insertions(+), 133 deletions(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 47110e5b..30e17c29 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -29,8 +29,8 @@ "year": "2023 [@pmid:39068203]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["CCG"], - "pathogenic_motif_reference_orientation": ["CCG"], + "reference_motif_reference_orientation": ["GCC"], + "pathogenic_motif_reference_orientation": ["GCC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -95,8 +95,8 @@ "year": "1993 [@pmid:8334699]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["CCG"], - "pathogenic_motif_reference_orientation": ["CCG"], + "reference_motif_reference_orientation": ["GCC"], + "pathogenic_motif_reference_orientation": ["GCC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -161,8 +161,8 @@ "year": "2014 [@pmid:24763282]", "location_in_gene": "Intron 3", "gene_strand": "-", - "reference_motif_reference_orientation": ["CCG"], - "pathogenic_motif_reference_orientation": ["CCG"], + "reference_motif_reference_orientation": ["GCC"], + "pathogenic_motif_reference_orientation": ["GCC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -227,12 +227,12 @@ "year": "1991 [@pmid:2062380]; the first triplet disease to be discovered [@pmid:15313856]", "location_in_gene": "Coding Exon 1", "gene_strand": "+", - "reference_motif_reference_orientation": ["CAG"], - "pathogenic_motif_reference_orientation": ["CAG"], + "reference_motif_reference_orientation": ["GCA"], + "pathogenic_motif_reference_orientation": ["GCA"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CAG"], + "pathogenic_motif_gene_orientation": ["AGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -293,12 +293,12 @@ "year": "2002 [@pmid:11889467]", "location_in_gene": "Coding Exon 2, aa 110-115", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCN"], - "pathogenic_motif_reference_orientation": ["GCN"], + "reference_motif_reference_orientation": ["NGC"], + "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["NGC"], + "pathogenic_motif_gene_orientation": ["CNG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -359,12 +359,12 @@ "year": "2002 [@pmid:11889467]", "location_in_gene": "Coding Exon 2, aa 144-155", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCN"], - "pathogenic_motif_reference_orientation": ["GCN"], + "reference_motif_reference_orientation": ["NGC"], + "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["NGC"], + "pathogenic_motif_gene_orientation": ["CNG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -430,7 +430,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CAG"], + "pathogenic_motif_gene_orientation": ["AGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -496,10 +496,10 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["ATG", "TTG"], - "pathogenic_motif_gene_orientation": ["CAG"], + "pathogenic_motif_gene_orientation": ["AGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["CAT", "CAA"], + "interruption_gene_orientation": ["ATC", "AAC"], "locus_structure": [], "benign_min": 6, "benign_max": 35, @@ -565,7 +565,7 @@ "pathogenic_motif_gene_orientation": ["ATTCT"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["ATCCT", "ATCCC", "ATTCC", "ATTTCT", "ATATTCT", "ATTCTTCT", "ATTGT", "TTTCT", "ATTTTCT", "ATTCTCT", "GTTTCT", "CTTCT", "ATGCT"], + "interruption_gene_orientation": ["ATCCT", "ATCCC", "ATTCC", "ATTTCT", "ATATTCT", "ATTCTTCT", "ATTGT", "CTTTT", "ATTTTCT", "ATTCTCT", "CTGTTT", "CTCTT", "ATGCT"], "locus_structure": [], "benign_min": 10, "benign_max": 32, @@ -628,10 +628,10 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["TTG"], - "pathogenic_motif_gene_orientation": ["CAG"], + "pathogenic_motif_gene_orientation": ["AGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["CAA"], + "interruption_gene_orientation": ["AAC"], "locus_structure": [], "benign_min": 14, "benign_max": 28, @@ -694,10 +694,10 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["TTG", "AGG"], - "pathogenic_motif_gene_orientation": ["CAG"], + "pathogenic_motif_gene_orientation": ["AGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["CAA", "CCT"], + "interruption_gene_orientation": ["AAC", "CCT"], "locus_structure": [], "benign_min": 11, "benign_max": 44, @@ -760,7 +760,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CAG"], + "pathogenic_motif_gene_orientation": ["AGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -839,7 +839,7 @@ "pathogenic_motif_gene_orientation": ["CTG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["CCG", "CTA", "CTC", "CCA", "CTT"], + "interruption_gene_orientation": ["CCG", "ACT", "CCT", "ACC", "CTT"], "locus_structure": [ { "motif": "CTA", @@ -912,9 +912,9 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AAATG", "AGAAA", "ATAAG", "TAAAC", "TAACA", "TACAA", "TCAAA", "TGCAA"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["TGGAA", "TAGAA"], + "pathogenic_motif_gene_orientation": ["AATGG", "AATAG"], "benign_motif_gene_orientation": [], - "unknown_motif_gene_orientation": ["AAAAA", "AAAAC", "AAATG", "AGAAA", "ATAAG", "TAAAC", "TAACA", "TACAA", "TCAAA", "TGCAA"], + "unknown_motif_gene_orientation": ["AAAAA", "AAAAC", "AAATG", "AAAAG", "AAGAT", "AAACT", "AACAT", "AATAC", "AAATC", "AATGC"], "interruption_gene_orientation": [], "locus_structure": [], "benign_min": null, @@ -978,7 +978,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["GGGGCC"], + "pathogenic_motif_gene_orientation": ["CCGGGG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -1044,7 +1044,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CAG"], + "pathogenic_motif_gene_orientation": ["AGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -1176,7 +1176,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG"], + "pathogenic_motif_gene_orientation": ["ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -1249,7 +1249,7 @@ "interruption_reference_orientation": [], "pathogenic_motif_gene_orientation": ["CCTG"], "benign_motif_gene_orientation": [], - "unknown_motif_gene_orientation": ["TCTG"], + "unknown_motif_gene_orientation": ["CTGT"], "interruption_gene_orientation": [], "locus_structure": [ { @@ -1328,7 +1328,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["GAC"], + "pathogenic_motif_gene_orientation": ["ACG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -1527,11 +1527,11 @@ "location_in_gene": "Intron 1 (most isoforms)", "gene_strand": "-", "reference_motif_reference_orientation": ["AAAAT"], - "pathogenic_motif_reference_orientation": ["AAATG"], + "pathogenic_motif_reference_orientation": ["GAAAT"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["AAAAA"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CATTT"], + "pathogenic_motif_gene_orientation": ["ATTTC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": ["TTTTT"], "interruption_gene_orientation": [], @@ -1602,8 +1602,8 @@ "year": "2007 [@pmid:17236128]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["CGG"], - "pathogenic_motif_reference_orientation": ["CGG"], + "reference_motif_reference_orientation": ["GGC"], + "pathogenic_motif_reference_orientation": ["GGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -1673,7 +1673,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["GAA"], + "pathogenic_motif_gene_orientation": ["AAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -1815,7 +1815,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["TCGGCAGCGGCACAGCGAGG"], + "pathogenic_motif_gene_orientation": ["ACAGCGAGGTCGGCAGCGGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -1876,8 +1876,8 @@ "year": "2026 [@pmid:39868092]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["CCG"], - "pathogenic_motif_reference_orientation": ["CCG"], + "reference_motif_reference_orientation": ["GCC"], + "pathogenic_motif_reference_orientation": ["GCC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -1944,13 +1944,13 @@ "gene_strand": "-", "reference_motif_reference_orientation": ["GAA"], "pathogenic_motif_reference_orientation": ["GAA"], - "benign_motif_reference_orientation": ["GGA", "CAG"], + "benign_motif_reference_orientation": ["GGA", "GCA"], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["GAG", "GAAGGA", "GAAGAAAGAA", "GAAAAGAAGAAGGAAGAAGGAA", "GAAAAGAAGAAGGAA", "GCAGAAGAAGAAGAA"], - "pathogenic_motif_gene_orientation": ["TTC"], - "benign_motif_gene_orientation": ["TCC", "CTG"], + "pathogenic_motif_gene_orientation": ["CTT"], + "benign_motif_gene_orientation": ["CCT", "CTG"], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["CTC", "TCCTTC", "TTCTTTCTTC", "TTCCTTCTTCCTTCTTCTTTTC", "TTCCTTCTTCTTTTC", "TTCTTCTTCTTCTGC"], + "interruption_gene_orientation": ["CCT", "CCTTCT", "CTTCTTCTTT", "CCTTCTTCCTTCTTCTTTTCTT", "CCTTCTTCTTTTCTT", "CTGCTTCTTCTTCTT"], "locus_structure": [], "benign_min": 8, "benign_max": 179, @@ -2074,12 +2074,12 @@ "year": "2003 [@pmid:12529855]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCN"], - "pathogenic_motif_reference_orientation": ["GCN"], + "reference_motif_reference_orientation": ["NGC"], + "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["NGC"], + "pathogenic_motif_gene_orientation": ["CNG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2145,7 +2145,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["GAA"], + "pathogenic_motif_gene_orientation": ["AAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2282,12 +2282,12 @@ "year": "2019 [@pmid:30970188]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["CAG"], - "pathogenic_motif_reference_orientation": ["CAG"], + "reference_motif_reference_orientation": ["GCA"], + "pathogenic_motif_reference_orientation": ["GCA"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CAG"], + "pathogenic_motif_gene_orientation": ["AGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2419,12 +2419,12 @@ "year": "2004 [@pmid:15385446]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCN"], - "pathogenic_motif_reference_orientation": ["GCN"], + "reference_motif_reference_orientation": ["NGC"], + "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["NGC"], + "pathogenic_motif_gene_orientation": ["CNG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2485,12 +2485,12 @@ "year": "2003 [@pmid:12676922]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCN"], - "pathogenic_motif_reference_orientation": ["GCN"], + "reference_motif_reference_orientation": ["NGC"], + "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["NGC"], + "pathogenic_motif_gene_orientation": ["CNG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2551,12 +2551,12 @@ "year": "2000 [@pmid:10839976]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCN"], - "pathogenic_motif_reference_orientation": ["GCN"], + "reference_motif_reference_orientation": ["NGC"], + "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["NGC"], + "pathogenic_motif_gene_orientation": ["CNG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2622,7 +2622,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["GCN"], + "pathogenic_motif_gene_orientation": ["CNG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2688,10 +2688,10 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["CAA"], - "pathogenic_motif_gene_orientation": ["CAG"], + "pathogenic_motif_gene_orientation": ["AGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["CAA"], + "interruption_gene_orientation": ["AAC"], "locus_structure": [ { "motif": "CAG", @@ -2830,8 +2830,8 @@ "year": "2019 [@pmid:31332380]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["CCG"], - "pathogenic_motif_reference_orientation": ["CCG"], + "reference_motif_reference_orientation": ["CGC"], + "pathogenic_motif_reference_orientation": ["CGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -2901,9 +2901,9 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["ATGTT", "TAGTT", "TTTTG", "TTTTT"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["TTTCA"], + "pathogenic_motif_gene_orientation": ["ATTTC"], "benign_motif_gene_orientation": [], - "unknown_motif_gene_orientation": ["ATGTT", "TAGTT", "TTTTG", "TTTTT"], + "unknown_motif_gene_orientation": ["ATGTT", "AGTTT", "GTTTT", "TTTTT"], "interruption_gene_orientation": [], "locus_structure": [ { @@ -2977,7 +2977,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CAAA"], + "pathogenic_motif_gene_orientation": ["AAAC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3043,8 +3043,8 @@ "benign_motif_reference_orientation": ["GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA"], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA"], - "benign_motif_gene_orientation": ["GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA"], + "pathogenic_motif_gene_orientation": ["ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG"], + "benign_motif_gene_orientation": ["ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG"], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], "locus_structure": [], @@ -3109,7 +3109,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["GGGCC"], + "pathogenic_motif_gene_orientation": ["CCGGG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3170,15 +3170,15 @@ "year": "2019 [@pmid:30342764]", "location_in_gene": "Coding Exon 1/Intron 1 depending on transcript", "gene_strand": "+", - "reference_motif_reference_orientation": ["CGG"], - "pathogenic_motif_reference_orientation": ["CGG"], + "reference_motif_reference_orientation": ["GCG"], + "pathogenic_motif_reference_orientation": ["GCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["CAG", "CTG", "CCG"], + "interruption_reference_orientation": ["GCA", "GCT", "GCC"], "pathogenic_motif_gene_orientation": ["CGG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["CAG", "CTG", "CCG"], + "interruption_gene_orientation": ["AGC", "CTG", "CCG"], "locus_structure": [], "benign_min": 6, "benign_max": 10, @@ -3241,10 +3241,10 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["GGCTG", "GGCCCTG", "GGCCG", "GGCCTT"], - "pathogenic_motif_gene_orientation": ["GGCCTG"], + "pathogenic_motif_gene_orientation": ["CCTGGG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["GGCTG", "GGCCCTG", "GGCCG", "GGCCTT"], + "interruption_gene_orientation": ["CTGGG", "CCCTGGG", "CCGGG", "CCTTGG"], "locus_structure": [ { "motif": "GGCCTG", @@ -3312,15 +3312,15 @@ "year": "2019 [@pmid:31332380]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["CGG"], - "pathogenic_motif_reference_orientation": ["CGG"], + "reference_motif_reference_orientation": ["GGC"], + "pathogenic_motif_reference_orientation": ["GGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["GGA", "CAG"], + "interruption_reference_orientation": ["GGA", "AGC"], "pathogenic_motif_gene_orientation": ["CGG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["GGA", "CAG"], + "interruption_gene_orientation": ["AGG", "AGC"], "locus_structure": [], "benign_min": 7, "benign_max": 37, @@ -3378,8 +3378,8 @@ "year": "2019 [@pmid:31332380]", "location_in_gene": "Exon 1 of lncRNA (noncoding)", "gene_strand": "+", - "reference_motif_reference_orientation": ["CGG"], - "pathogenic_motif_reference_orientation": ["CGG"], + "reference_motif_reference_orientation": ["GGC"], + "pathogenic_motif_reference_orientation": ["GGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -3449,7 +3449,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["GCN"], + "pathogenic_motif_gene_orientation": ["CNG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3515,7 +3515,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["NGC"], + "pathogenic_motif_gene_orientation": ["CNG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3581,7 +3581,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["GTAAGACTGTCCTAACAGGTACCAAGGACACCGTCTGTAGTGGGGTGACTGGTGCCATGAATGTGGCCAAAGGAACCATCCAGACCGGCGTGGACACCA"], + "pathogenic_motif_gene_orientation": ["AAAGGAACCATCCAGACCGGCGTGGACACCAGTAAGACTGTCCTAACAGGTACCAAGGACACCGTCTGTAGTGGGGTGACTGGTGCCATGAATGTGGCC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3642,12 +3642,12 @@ "year": null, "location_in_gene": "Coding Exon 2", "gene_strand": "-", - "reference_motif_reference_orientation": ["CTG"], - "pathogenic_motif_reference_orientation": ["CTG"], + "reference_motif_reference_orientation": ["GCT"], + "pathogenic_motif_reference_orientation": ["GCT"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CAG"], + "pathogenic_motif_gene_orientation": ["AGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3723,12 +3723,12 @@ "year": "1999 [@pmid:10581021]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["CTG"], - "pathogenic_motif_reference_orientation": ["CTG"], + "reference_motif_reference_orientation": ["GCT"], + "pathogenic_motif_reference_orientation": ["GCT"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CAG"], + "pathogenic_motif_gene_orientation": ["AGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3789,8 +3789,8 @@ "year": "2015 [@pmid:26005867]", "location_in_gene": "Coding Exon 5", "gene_strand": "+", - "reference_motif_reference_orientation": ["CCG"], - "pathogenic_motif_reference_orientation": ["CCG"], + "reference_motif_reference_orientation": ["GCC"], + "pathogenic_motif_reference_orientation": ["GCC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -3860,7 +3860,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CCTCATGGTGGTGGCTGGGGGCAG"], + "pathogenic_motif_gene_orientation": ["AGCCTCATGGTGGTGGCTGGGGGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3936,10 +3936,10 @@ "benign_motif_reference_orientation": ["TTTTA"], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["GGGGT", "GGGAT"], - "pathogenic_motif_gene_orientation": ["TTTCA"], - "benign_motif_gene_orientation": ["TTTTA"], + "pathogenic_motif_gene_orientation": ["ATTTC"], + "benign_motif_gene_orientation": ["ATTTT"], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["GGGGT", "GGGAT"], + "interruption_gene_orientation": ["GGGGT", "ATGGG"], "locus_structure": [ { "motif": "TTTTA", @@ -4012,9 +4012,9 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["TTTTT", "TTATG"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["TTTCA"], + "pathogenic_motif_gene_orientation": ["ATTTC"], "benign_motif_gene_orientation": [], - "unknown_motif_gene_orientation": ["TTTTT", "TTATG"], + "unknown_motif_gene_orientation": ["TTTTT", "ATGTT"], "interruption_gene_orientation": [], "locus_structure": [ { @@ -4088,9 +4088,9 @@ "benign_motif_reference_orientation": ["AAAAG", "AAAGGG"], "unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AACGG", "AAGAC", "AAGGT", "AGGGG", "AAGAG", "AAAAGG", "AAACG", "AACAG", "AGGTG", "ACGGG", "AAAAAG", "AAGGC"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CCCTT", "CCTGT", "CCTTT", "GCCCT"], + "pathogenic_motif_gene_orientation": ["CCCTT", "CCTGT", "CCTTT", "CCCTG"], "benign_motif_gene_orientation": ["CTTTT", "CCCTTT"], - "unknown_motif_gene_orientation": ["TTTTT", "GTTTT", "CCGTT", "GTCTT", "ACCTT", "CCCCT", "CTCTT", "CCTTTT", "CGTTT", "CTGTT", "CACCT", "CCCGT", "CTTTTT", "GCCTT"], + "unknown_motif_gene_orientation": ["TTTTT", "GTTTT", "CCGTT", "CTTGT", "ACCTT", "CCCCT", "CTCTT", "CCTTTT", "CGTTT", "CTGTT", "ACCTC", "CCCGT", "CTTTTT", "CCTTG"], "interruption_gene_orientation": [], "locus_structure": [ { @@ -4159,15 +4159,15 @@ "year": "2022 [@pmid:35148830]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["CGG"], - "pathogenic_motif_reference_orientation": ["CGG"], + "reference_motif_reference_orientation": ["GGC"], + "pathogenic_motif_reference_orientation": ["GGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["TGG", "CGT", "AGG"], "pathogenic_motif_gene_orientation": ["CCG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["CCA", "ACG", "CCT"], + "interruption_gene_orientation": ["ACC", "ACG", "CCT"], "locus_structure": [], "benign_min": 6, "benign_max": 16, @@ -4230,7 +4230,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["GCN"], + "pathogenic_motif_gene_orientation": ["CNG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -4292,13 +4292,13 @@ "location_in_gene": "Intron 4/4", "gene_strand": "-", "reference_motif_reference_orientation": ["TAAAA"], - "pathogenic_motif_reference_orientation": ["AAATG"], + "pathogenic_motif_reference_orientation": ["TGAAA"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["AAAAA", "TAAAC", "TAACA", "TACAA", "TACAC"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CATTT"], + "pathogenic_motif_gene_orientation": ["ATTTC"], "benign_motif_gene_orientation": [], - "unknown_motif_gene_orientation": ["TTTTT", "GTTTA", "TGTTA", "TTGTA", "GTGTA"], + "unknown_motif_gene_orientation": ["TTTTT", "AGTTT", "ATGTT", "ATTGT", "AGTGT"], "interruption_gene_orientation": [], "locus_structure": [ { @@ -4367,12 +4367,12 @@ "year": "2002 [@pmid:12428212]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCN"], - "pathogenic_motif_reference_orientation": ["GCN"], + "reference_motif_reference_orientation": ["NGC"], + "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["NGC"], + "pathogenic_motif_gene_orientation": ["CNG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -4438,9 +4438,9 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AAACC", "AAACG", "AAACT", "AACTC", "AACTG", "AATAC", "AATAG", "ATAAC"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CATTT"], + "pathogenic_motif_gene_orientation": ["ATTTC"], "benign_motif_gene_orientation": [], - "unknown_motif_gene_orientation": ["TTTTT", "GTTTT", "GGTTT", "CGTTT", "AGTTT", "GAGTT", "CAGTT", "GTATT", "CTATT", "GTTAT"], + "unknown_motif_gene_orientation": ["TTTTT", "GTTTT", "GGTTT", "CGTTT", "AGTTT", "AGTTG", "AGTTC", "ATTGT", "ATTCT", "ATGTT"], "interruption_gene_orientation": [], "locus_structure": [ { @@ -4575,8 +4575,8 @@ "year": "2026 [@pmid:41959811]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["CCG"], - "pathogenic_motif_reference_orientation": ["CCG"], + "reference_motif_reference_orientation": ["GCC"], + "pathogenic_motif_reference_orientation": ["GCC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -4646,10 +4646,10 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["CAA"], - "pathogenic_motif_gene_orientation": ["CAG"], + "pathogenic_motif_gene_orientation": ["AGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["CAA"], + "interruption_gene_orientation": ["AAC"], "locus_structure": [], "benign_min": 25, "benign_max": 40, @@ -4712,7 +4712,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["GCN"], + "pathogenic_motif_gene_orientation": ["CNG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -4844,10 +4844,10 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["CAA"], - "pathogenic_motif_gene_orientation": ["CAG"], + "pathogenic_motif_gene_orientation": ["AGC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["CAA"], + "interruption_gene_orientation": ["AAC"], "locus_structure": [], "benign_min": 20, "benign_max": 38, @@ -4910,7 +4910,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["TTTTT"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["TTTCA"], + "pathogenic_motif_gene_orientation": ["ATTTC"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": ["TTTTT"], "interruption_gene_orientation": [], @@ -5052,7 +5052,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["GGCGCGGAGC"], + "pathogenic_motif_gene_orientation": ["AGCGGCGCGG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -5113,8 +5113,8 @@ "year": "2019 [@pmid:30554721]", "location_in_gene": "5' promoter region. Note, it can also be annotated coding or introntic depending on the reference, due to missing sequences in some reference genomes.", "gene_strand": "-", - "reference_motif_reference_orientation": ["CCG"], - "pathogenic_motif_reference_orientation": ["CCG"], + "reference_motif_reference_orientation": ["GCC"], + "pathogenic_motif_reference_orientation": ["GCC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -5199,9 +5199,9 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["TTTTT", "TGTTA"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["TTTCA"], + "pathogenic_motif_gene_orientation": ["ATTTC"], "benign_motif_gene_orientation": [], - "unknown_motif_gene_orientation": ["TTTTT", "TGTTA"], + "unknown_motif_gene_orientation": ["TTTTT", "ATGTT"], "interruption_gene_orientation": [], "locus_structure": [ { @@ -5270,8 +5270,8 @@ "year": "2023 [@pmid:38035881]", "location_in_gene": "Coding, Last Exon (exon number is transcript dependent)", "gene_strand": "-", - "reference_motif_reference_orientation": ["CCG"], - "pathogenic_motif_reference_orientation": ["CCG"], + "reference_motif_reference_orientation": ["GCC"], + "pathogenic_motif_reference_orientation": ["GCC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -5341,7 +5341,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["GCN"], + "pathogenic_motif_gene_orientation": ["CNG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -5407,7 +5407,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["GCN"], + "pathogenic_motif_gene_orientation": ["CNG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -5468,8 +5468,8 @@ "year": "2014 [@pmid:25196122]", "location_in_gene": "Intron 1", "gene_strand": "+", - "reference_motif_reference_orientation": ["CGG"], - "pathogenic_motif_reference_orientation": ["CGG"], + "reference_motif_reference_orientation": ["GCG"], + "pathogenic_motif_reference_orientation": ["GCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], From 2fe34a0083d27d5f7f10453c5946d4b71affc84b Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Wed, 27 May 2026 15:29:33 -0600 Subject: [PATCH 07/16] handle motif lists --- scripts/check-loci.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/scripts/check-loci.py b/scripts/check-loci.py index c7977497..00447c41 100644 --- a/scripts/check-loci.py +++ b/scripts/check-loci.py @@ -135,6 +135,7 @@ def normalise_str(in_dna): "CGG", "CTG", "GCN", + "CAA" "TTTCA", "AAATG", ] @@ -244,7 +245,23 @@ def check_motif_orientation(record): old_ref_motifs = record[ref_field] old_gene_motifs = record[gene_field] - new_ref_motifs, new_gene_motifs = get_other_motif(old_ref_motifs, old_gene_motifs, record['gene_strand']) + + assert isinstance(old_ref_motifs, list), f"{ref_field} should be a list in record {record['id']}" + assert isinstance(old_gene_motifs, list), f"{gene_field} should be a list in record {record['id']}" + + if len(old_ref_motifs) != len(old_gene_motifs): + # Add Nones to the shorter list so they are the same length + if len(old_ref_motifs) < len(old_gene_motifs): + old_ref_motifs = old_ref_motifs + [None] * (len(old_gene_motifs) - len(old_ref_motifs)) + else: + old_gene_motifs = old_gene_motifs + [None] * (len(old_ref_motifs) - len(old_gene_motifs)) + + new_ref_motifs = [] + new_gene_motifs = [] + for old_ref_motif, old_gene_motif in zip(old_ref_motifs, old_gene_motifs): + new_ref_motif, new_gene_motif = get_other_motif(old_ref_motif, old_gene_motif, record['gene_strand']) + new_ref_motifs.append(new_ref_motif) + new_gene_motifs.append(new_gene_motif) if old_ref_motifs != new_ref_motifs: for old_motif, new_motif in zip(old_ref_motifs, new_ref_motifs): From 762a60e2b6177b848249dbc992c4b42abac1863c Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Wed, 27 May 2026 15:45:59 -0600 Subject: [PATCH 08/16] normalise ref and leave others unchanged --- scripts/check-loci.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/scripts/check-loci.py b/scripts/check-loci.py index 00447c41..00766a91 100644 --- a/scripts/check-loci.py +++ b/scripts/check-loci.py @@ -167,7 +167,7 @@ def standardise_motif(motif): if canonical_motif in circular_permuted(motif): return canonical_motif - return normalise_str(motif) + return motif def get_other_motif(reference_motif, gene_motif, gene_strand): """ @@ -279,6 +279,16 @@ def check_motif_orientation(record): ) record[gene_field] = new_gene_motifs + # Update the reference motif to canonical + old_ref = record['reference_motif_reference_orientation'] + new_ref = [] + for motif in old_ref: + new_motif = standardise_motif(motif) + if motif != new_motif: + sys.stderr.write(f"Updating {record['id']} reference motif from {motif} to {new_motif}\n") + new_ref.append(new_motif) + record['reference_motif_reference_orientation'] = new_ref + # Replace locus_structure with a string of the motifs in reference orientation if record['locus_structure'] is None: record['locus_structure'] = [] From 0c93a927e927256d3996d9776c2951b9985fe456 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Wed, 27 May 2026 15:49:32 -0600 Subject: [PATCH 09/16] update motifs --- data/STRchive-loci.json | 246 ++++++++++++++++++++-------------------- 1 file changed, 123 insertions(+), 123 deletions(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 30e17c29..a1ce318e 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -29,8 +29,8 @@ "year": "2023 [@pmid:39068203]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -95,8 +95,8 @@ "year": "1993 [@pmid:8334699]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -161,8 +161,8 @@ "year": "2014 [@pmid:24763282]", "location_in_gene": "Intron 3", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -227,12 +227,12 @@ "year": "1991 [@pmid:2062380]; the first triplet disease to be discovered [@pmid:15313856]", "location_in_gene": "Coding Exon 1", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCA"], - "pathogenic_motif_reference_orientation": ["GCA"], + "reference_motif_reference_orientation": ["CAG"], + "pathogenic_motif_reference_orientation": ["CAG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -293,12 +293,12 @@ "year": "2002 [@pmid:11889467]", "location_in_gene": "Coding Exon 2, aa 110-115", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -359,12 +359,12 @@ "year": "2002 [@pmid:11889467]", "location_in_gene": "Coding Exon 2, aa 144-155", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -430,7 +430,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -495,8 +495,8 @@ "pathogenic_motif_reference_orientation": ["CTG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["ATG", "TTG"], - "pathogenic_motif_gene_orientation": ["AGC"], + "interruption_reference_orientation": ["GAT", "GTT"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": ["ATC", "AAC"], @@ -561,7 +561,7 @@ "pathogenic_motif_reference_orientation": ["ATTCT"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["ATCCT", "ATCCC", "ATTCC", "ATTTCT", "ATATTCT", "ATTCTTCT", "ATTGT", "TTTCT", "ATTTTCT", "ATTCTCT", "GTTTCT", "CTTCT", "ATGCT"], + "interruption_reference_orientation": ["ATCCT", "ATCCC", "ATTCC", "ATTTCT", "ATATTCT", "ATTCTTCT", "ATTGT", "CTTTT", "ATTTTCT", "ATTCTCT", "CTGTTT", "CTCTT", "ATGCT"], "pathogenic_motif_gene_orientation": ["ATTCT"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], @@ -627,8 +627,8 @@ "pathogenic_motif_reference_orientation": ["CTG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["TTG"], - "pathogenic_motif_gene_orientation": ["AGC"], + "interruption_reference_orientation": ["GTT"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": ["AAC"], @@ -693,8 +693,8 @@ "pathogenic_motif_reference_orientation": ["CTG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["TTG", "AGG"], - "pathogenic_motif_gene_orientation": ["AGC"], + "interruption_reference_orientation": ["GTT", "AGG"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": ["AAC", "CCT"], @@ -760,7 +760,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -835,7 +835,7 @@ "pathogenic_motif_reference_orientation": ["CTG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["CCG", "CTA", "CTC", "CCA", "CTT"], + "interruption_reference_orientation": ["CCG", "ACT", "CCT", "ACC", "CTT"], "pathogenic_motif_gene_orientation": ["CTG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], @@ -908,9 +908,9 @@ "location_in_gene": "Intron 4/4", "gene_strand": "+", "reference_motif_reference_orientation": ["AATAA"], - "pathogenic_motif_reference_orientation": ["TGGAA", "TAGAA"], + "pathogenic_motif_reference_orientation": ["AATGG", "AATAG"], "benign_motif_reference_orientation": [], - "unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AAATG", "AGAAA", "ATAAG", "TAAAC", "TAACA", "TACAA", "TCAAA", "TGCAA"], + "unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AAATG", "AAAAG", "AAGAT", "AAACT", "AACAT", "AATAC", "AAATC", "AATGC"], "interruption_reference_orientation": [], "pathogenic_motif_gene_orientation": ["AATGG", "AATAG"], "benign_motif_gene_orientation": [], @@ -974,7 +974,7 @@ "location_in_gene": "Intron 1 or 5' UTR depending on transcript", "gene_strand": "-", "reference_motif_reference_orientation": ["GGCCCC"], - "pathogenic_motif_reference_orientation": ["GGCCCC"], + "pathogenic_motif_reference_orientation": ["CCCCGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -1044,7 +1044,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -1172,7 +1172,7 @@ "location_in_gene": "Exon 11", "gene_strand": "+", "reference_motif_reference_orientation": ["GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG"], - "pathogenic_motif_reference_orientation": ["GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG"], + "pathogenic_motif_reference_orientation": ["ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -1245,7 +1245,7 @@ "reference_motif_reference_orientation": ["CAGG"], "pathogenic_motif_reference_orientation": ["CAGG"], "benign_motif_reference_orientation": [], - "unknown_motif_reference_orientation": ["CAGA"], + "unknown_motif_reference_orientation": ["ACAG"], "interruption_reference_orientation": [], "pathogenic_motif_gene_orientation": ["CCTG"], "benign_motif_gene_orientation": [], @@ -1324,7 +1324,7 @@ "location_in_gene": "Coding Exon 13", "gene_strand": "-", "reference_motif_reference_orientation": ["GTC"], - "pathogenic_motif_reference_orientation": ["GTC"], + "pathogenic_motif_reference_orientation": ["CGT"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -1602,8 +1602,8 @@ "year": "2007 [@pmid:17236128]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["GGC"], - "pathogenic_motif_reference_orientation": ["GGC"], + "reference_motif_reference_orientation": ["CGG"], + "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -1669,7 +1669,7 @@ "location_in_gene": "Intron 62", "gene_strand": "-", "reference_motif_reference_orientation": ["TTC"], - "pathogenic_motif_reference_orientation": ["TTC"], + "pathogenic_motif_reference_orientation": ["CTT"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -1811,7 +1811,7 @@ "location_in_gene": "5' UTR", "gene_strand": "-", "reference_motif_reference_orientation": ["CCTCGCTGTGCCGCTGCCGA"], - "pathogenic_motif_reference_orientation": ["CCTCGCTGTGCCGCTGCCGA"], + "pathogenic_motif_reference_orientation": ["GCCGCTGCCGACCTCGCTGT"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -1876,8 +1876,8 @@ "year": "2026 [@pmid:39868092]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -1943,10 +1943,10 @@ "location_in_gene": "Intron 1", "gene_strand": "-", "reference_motif_reference_orientation": ["GAA"], - "pathogenic_motif_reference_orientation": ["GAA"], - "benign_motif_reference_orientation": ["GGA", "GCA"], + "pathogenic_motif_reference_orientation": ["AAG"], + "benign_motif_reference_orientation": ["AGG", "CAG"], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["GAG", "GAAGGA", "GAAGAAAGAA", "GAAAAGAAGAAGGAAGAAGGAA", "GAAAAGAAGAAGGAA", "GCAGAAGAAGAAGAA"], + "interruption_reference_orientation": ["AGG", "AGAAGG", "AAAGAAGAAG", "AAGAAAAGAAGAAGGAAGAAGG", "AAGAAAAGAAGAAGG", "AAGAAGAAGAAGCAG"], "pathogenic_motif_gene_orientation": ["CTT"], "benign_motif_gene_orientation": ["CCT", "CTG"], "unknown_motif_gene_orientation": [], @@ -2074,12 +2074,12 @@ "year": "2003 [@pmid:12529855]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2141,7 +2141,7 @@ "location_in_gene": "Intron 1", "gene_strand": "+", "reference_motif_reference_orientation": ["GAA"], - "pathogenic_motif_reference_orientation": ["GAA"], + "pathogenic_motif_reference_orientation": ["AAG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -2282,12 +2282,12 @@ "year": "2019 [@pmid:30970188]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCA"], - "pathogenic_motif_reference_orientation": ["GCA"], + "reference_motif_reference_orientation": ["CAG"], + "pathogenic_motif_reference_orientation": ["CAG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2419,12 +2419,12 @@ "year": "2004 [@pmid:15385446]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2485,12 +2485,12 @@ "year": "2003 [@pmid:12676922]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2551,12 +2551,12 @@ "year": "2000 [@pmid:10839976]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2622,7 +2622,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2687,8 +2687,8 @@ "pathogenic_motif_reference_orientation": ["CAG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["CAA"], - "pathogenic_motif_gene_orientation": ["AGC"], + "interruption_reference_orientation": ["AAC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": ["AAC"], @@ -2830,8 +2830,8 @@ "year": "2019 [@pmid:31332380]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["CGC"], - "pathogenic_motif_reference_orientation": ["CGC"], + "reference_motif_reference_orientation": ["CCG"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -2897,9 +2897,9 @@ "location_in_gene": "Intron 1", "gene_strand": "+", "reference_motif_reference_orientation": ["TTTTA"], - "pathogenic_motif_reference_orientation": ["TTTCA"], + "pathogenic_motif_reference_orientation": ["ATTTC"], "benign_motif_reference_orientation": [], - "unknown_motif_reference_orientation": ["ATGTT", "TAGTT", "TTTTG", "TTTTT"], + "unknown_motif_reference_orientation": ["ATGTT", "AGTTT", "GTTTT", "TTTTT"], "interruption_reference_orientation": [], "pathogenic_motif_gene_orientation": ["ATTTC"], "benign_motif_gene_orientation": [], @@ -2973,7 +2973,7 @@ "location_in_gene": "Non-coding", "gene_strand": "-", "reference_motif_reference_orientation": ["TTTG"], - "pathogenic_motif_reference_orientation": ["TTTG"], + "pathogenic_motif_reference_orientation": ["GTTT"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -3039,8 +3039,8 @@ "location_in_gene": "Coding Exon 2", "gene_strand": "+", "reference_motif_reference_orientation": ["GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG"], - "pathogenic_motif_reference_orientation": ["GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA"], - "benign_motif_reference_orientation": ["GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA"], + "pathogenic_motif_reference_orientation": ["ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG"], + "benign_motif_reference_orientation": ["ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG"], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], "pathogenic_motif_gene_orientation": ["ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG"], @@ -3105,7 +3105,7 @@ "location_in_gene": "5' UTR", "gene_strand": "+", "reference_motif_reference_orientation": ["GGGCC"], - "pathogenic_motif_reference_orientation": ["GGGCC"], + "pathogenic_motif_reference_orientation": ["CCGGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -3170,15 +3170,15 @@ "year": "2019 [@pmid:30342764]", "location_in_gene": "Coding Exon 1/Intron 1 depending on transcript", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCG"], - "pathogenic_motif_reference_orientation": ["GCG"], + "reference_motif_reference_orientation": ["CGG"], + "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["GCA", "GCT", "GCC"], + "interruption_reference_orientation": ["CAG", "CTG", "CCG"], "pathogenic_motif_gene_orientation": ["CGG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AGC", "CTG", "CCG"], + "interruption_gene_orientation": ["CAG", "CTG", "CCG"], "locus_structure": [], "benign_min": 6, "benign_max": 10, @@ -3237,10 +3237,10 @@ "location_in_gene": "Intron 1", "gene_strand": "+", "reference_motif_reference_orientation": ["GGCCTG"], - "pathogenic_motif_reference_orientation": ["GGCCTG"], + "pathogenic_motif_reference_orientation": ["CCTGGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["GGCTG", "GGCCCTG", "GGCCG", "GGCCTT"], + "interruption_reference_orientation": ["CTGGG", "CCCTGGG", "CCGGG", "CCTTGG"], "pathogenic_motif_gene_orientation": ["CCTGGG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], @@ -3312,15 +3312,15 @@ "year": "2019 [@pmid:31332380]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["GGC"], - "pathogenic_motif_reference_orientation": ["GGC"], + "reference_motif_reference_orientation": ["CGG"], + "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["GGA", "AGC"], + "interruption_reference_orientation": ["AGG", "CAG"], "pathogenic_motif_gene_orientation": ["CGG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AGG", "AGC"], + "interruption_gene_orientation": ["AGG", "CAG"], "locus_structure": [], "benign_min": 7, "benign_max": 37, @@ -3378,8 +3378,8 @@ "year": "2019 [@pmid:31332380]", "location_in_gene": "Exon 1 of lncRNA (noncoding)", "gene_strand": "+", - "reference_motif_reference_orientation": ["GGC"], - "pathogenic_motif_reference_orientation": ["GGC"], + "reference_motif_reference_orientation": ["CGG"], + "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -3449,7 +3449,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3511,11 +3511,11 @@ "location_in_gene": "Coding Exon 3", "gene_strand": "-", "reference_motif_reference_orientation": ["GCN"], - "pathogenic_motif_reference_orientation": ["GCN"], + "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3577,7 +3577,7 @@ "location_in_gene": "Coding Exon 3", "gene_strand": "-", "reference_motif_reference_orientation": ["TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC"], - "pathogenic_motif_reference_orientation": ["TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC"], + "pathogenic_motif_reference_orientation": ["GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -3642,12 +3642,12 @@ "year": null, "location_in_gene": "Coding Exon 2", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCT"], - "pathogenic_motif_reference_orientation": ["GCT"], + "reference_motif_reference_orientation": ["CTG"], + "pathogenic_motif_reference_orientation": ["CTG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3723,12 +3723,12 @@ "year": "1999 [@pmid:10581021]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCT"], - "pathogenic_motif_reference_orientation": ["GCT"], + "reference_motif_reference_orientation": ["CTG"], + "pathogenic_motif_reference_orientation": ["CTG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3789,8 +3789,8 @@ "year": "2015 [@pmid:26005867]", "location_in_gene": "Coding Exon 5", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -3856,7 +3856,7 @@ "location_in_gene": "Coding Exon 2", "gene_strand": "+", "reference_motif_reference_orientation": ["GGTGGTGGCTGGGGGCAGCCTCAT"], - "pathogenic_motif_reference_orientation": ["CCTCATGGTGGTGGCTGGGGGCAG"], + "pathogenic_motif_reference_orientation": ["AGCCTCATGGTGGTGGCTGGGGGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -3932,10 +3932,10 @@ "location_in_gene": "Intron 4", "gene_strand": "+", "reference_motif_reference_orientation": ["TTTTA"], - "pathogenic_motif_reference_orientation": ["TTTCA"], - "benign_motif_reference_orientation": ["TTTTA"], + "pathogenic_motif_reference_orientation": ["ATTTC"], + "benign_motif_reference_orientation": ["ATTTT"], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["GGGGT", "GGGAT"], + "interruption_reference_orientation": ["GGGGT", "ATGGG"], "pathogenic_motif_gene_orientation": ["ATTTC"], "benign_motif_gene_orientation": ["ATTTT"], "unknown_motif_gene_orientation": [], @@ -4008,9 +4008,9 @@ "location_in_gene": "Intron 14", "gene_strand": "+", "reference_motif_reference_orientation": ["TTTTA"], - "pathogenic_motif_reference_orientation": ["TTTCA"], + "pathogenic_motif_reference_orientation": ["ATTTC"], "benign_motif_reference_orientation": [], - "unknown_motif_reference_orientation": ["TTTTT", "TTATG"], + "unknown_motif_reference_orientation": ["TTTTT", "ATGTT"], "interruption_reference_orientation": [], "pathogenic_motif_gene_orientation": ["ATTTC"], "benign_motif_gene_orientation": [], @@ -4084,9 +4084,9 @@ "location_in_gene": "Intron 2", "gene_strand": "-", "reference_motif_reference_orientation": ["AAAAG"], - "pathogenic_motif_reference_orientation": ["AAGGG", "ACAGG", "AAAGG", "AGGGC"], + "pathogenic_motif_reference_orientation": ["AAGGG", "ACAGG", "AAAGG", "CAGGG"], "benign_motif_reference_orientation": ["AAAAG", "AAAGGG"], - "unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AACGG", "AAGAC", "AAGGT", "AGGGG", "AAGAG", "AAAAGG", "AAACG", "AACAG", "AGGTG", "ACGGG", "AAAAAG", "AAGGC"], + "unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AACGG", "ACAAG", "AAGGT", "AGGGG", "AAGAG", "AAAAGG", "AAACG", "AACAG", "GAGGT", "ACGGG", "AAAAAG", "CAAGG"], "interruption_reference_orientation": [], "pathogenic_motif_gene_orientation": ["CCCTT", "CCTGT", "CCTTT", "CCCTG"], "benign_motif_gene_orientation": ["CTTTT", "CCCTTT"], @@ -4159,11 +4159,11 @@ "year": "2022 [@pmid:35148830]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["GGC"], - "pathogenic_motif_reference_orientation": ["GGC"], + "reference_motif_reference_orientation": ["CGG"], + "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["TGG", "CGT", "AGG"], + "interruption_reference_orientation": ["GGT", "CGT", "AGG"], "pathogenic_motif_gene_orientation": ["CCG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], @@ -4230,7 +4230,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -4292,9 +4292,9 @@ "location_in_gene": "Intron 4/4", "gene_strand": "-", "reference_motif_reference_orientation": ["TAAAA"], - "pathogenic_motif_reference_orientation": ["TGAAA"], + "pathogenic_motif_reference_orientation": ["GAAAT"], "benign_motif_reference_orientation": [], - "unknown_motif_reference_orientation": ["AAAAA", "TAAAC", "TAACA", "TACAA", "TACAC"], + "unknown_motif_reference_orientation": ["AAAAA", "AAACT", "AACAT", "ACAAT", "ACACT"], "interruption_reference_orientation": [], "pathogenic_motif_gene_orientation": ["ATTTC"], "benign_motif_gene_orientation": [], @@ -4367,12 +4367,12 @@ "year": "2002 [@pmid:12428212]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -4434,9 +4434,9 @@ "location_in_gene": "Intron 1", "gene_strand": "-", "reference_motif_reference_orientation": ["AAAAT"], - "pathogenic_motif_reference_orientation": ["AAATG"], + "pathogenic_motif_reference_orientation": ["GAAAT"], "benign_motif_reference_orientation": [], - "unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AAACC", "AAACG", "AAACT", "AACTC", "AACTG", "AATAC", "AATAG", "ATAAC"], + "unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AAACC", "AAACG", "AAACT", "CAACT", "GAACT", "ACAAT", "AGAAT", "AACAT"], "interruption_reference_orientation": [], "pathogenic_motif_gene_orientation": ["ATTTC"], "benign_motif_gene_orientation": [], @@ -4575,8 +4575,8 @@ "year": "2026 [@pmid:41959811]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -4645,8 +4645,8 @@ "pathogenic_motif_reference_orientation": ["CAG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["CAA"], - "pathogenic_motif_gene_orientation": ["AGC"], + "interruption_reference_orientation": ["AAC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": ["AAC"], @@ -4712,7 +4712,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -4843,8 +4843,8 @@ "pathogenic_motif_reference_orientation": ["CAG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["CAA"], - "pathogenic_motif_gene_orientation": ["AGC"], + "interruption_reference_orientation": ["AAC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": ["AAC"], @@ -4906,7 +4906,7 @@ "location_in_gene": "Intron 1/23", "gene_strand": "+", "reference_motif_reference_orientation": ["TTTTA"], - "pathogenic_motif_reference_orientation": ["TTTCA"], + "pathogenic_motif_reference_orientation": ["ATTTC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["TTTTT"], "interruption_reference_orientation": [], @@ -5048,7 +5048,7 @@ "location_in_gene": "Coding Exon 1", "gene_strand": "+", "reference_motif_reference_orientation": ["GGCGCGGAGC"], - "pathogenic_motif_reference_orientation": ["GGCGCGGAGC"], + "pathogenic_motif_reference_orientation": ["AGCGGCGCGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -5113,8 +5113,8 @@ "year": "2019 [@pmid:30554721]", "location_in_gene": "5' promoter region. Note, it can also be annotated coding or introntic depending on the reference, due to missing sequences in some reference genomes.", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -5195,9 +5195,9 @@ "location_in_gene": "Intron 1", "gene_strand": "+", "reference_motif_reference_orientation": ["TTTTA"], - "pathogenic_motif_reference_orientation": ["TTTCA"], + "pathogenic_motif_reference_orientation": ["ATTTC"], "benign_motif_reference_orientation": [], - "unknown_motif_reference_orientation": ["TTTTT", "TGTTA"], + "unknown_motif_reference_orientation": ["TTTTT", "ATGTT"], "interruption_reference_orientation": [], "pathogenic_motif_gene_orientation": ["ATTTC"], "benign_motif_gene_orientation": [], @@ -5270,8 +5270,8 @@ "year": "2023 [@pmid:38035881]", "location_in_gene": "Coding, Last Exon (exon number is transcript dependent)", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -5341,7 +5341,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -5407,7 +5407,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -5468,8 +5468,8 @@ "year": "2014 [@pmid:25196122]", "location_in_gene": "Intron 1", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCG"], - "pathogenic_motif_reference_orientation": ["GCG"], + "reference_motif_reference_orientation": ["CGG"], + "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], From 0ccf931ea4e2a77a6a0957af7bec519724c9b8f0 Mon Sep 17 00:00:00 2001 From: hdashnow <3794821+hdashnow@users.noreply.github.com> Date: Wed, 27 May 2026 21:56:52 +0000 Subject: [PATCH 10/16] Update data --- .../STRchive-disease-loci.T2T-chm13.TRGT.bed | 64 +++---- ...STRchive-disease-loci.T2T-chm13.atarva.bed | 36 ++-- ...chive-disease-loci.T2T-chm13.atarva.bed.gz | Bin 1834 -> 1835 bytes ...e-disease-loci.T2T-chm13.atarva.bed.gz.tbi | Bin 4452 -> 4452 bytes ...TRchive-disease-loci.T2T-chm13.general.bed | 64 +++---- ...STRchive-disease-loci.T2T-chm13.longTR.bed | 64 +++---- ...TRchive-disease-loci.T2T-chm13.straglr.bed | 34 ++-- ...chive-disease-loci.T2T-chm13.stranger.json | 92 +++++----- .../STRchive-disease-loci.hg19.TRGT.bed | 64 +++---- .../STRchive-disease-loci.hg19.atarva.bed | 36 ++-- .../STRchive-disease-loci.hg19.atarva.bed.gz | Bin 1855 -> 1852 bytes ...Rchive-disease-loci.hg19.atarva.bed.gz.tbi | Bin 4460 -> 4461 bytes .../STRchive-disease-loci.hg19.general.bed | 64 +++---- .../STRchive-disease-loci.hg19.longTR.bed | 64 +++---- .../STRchive-disease-loci.hg19.straglr.bed | 34 ++-- .../STRchive-disease-loci.hg19.stranger.json | 92 +++++----- .../STRchive-disease-loci.hg38.TRGT.bed | 64 +++---- .../STRchive-disease-loci.hg38.atarva.bed | 36 ++-- .../STRchive-disease-loci.hg38.atarva.bed.gz | Bin 1840 -> 1837 bytes ...Rchive-disease-loci.hg38.atarva.bed.gz.tbi | Bin 4417 -> 4418 bytes .../STRchive-disease-loci.hg38.general.bed | 64 +++---- .../STRchive-disease-loci.hg38.longTR.bed | 64 +++---- .../STRchive-disease-loci.hg38.straglr.bed | 34 ++-- .../STRchive-disease-loci.hg38.stranger.json | 92 +++++----- data/ref-alleles/ref-alleles.T2T-chm13.txt | 172 +++++++++--------- data/ref-alleles/ref-alleles.hg19.txt | 172 +++++++++--------- data/ref-alleles/ref-alleles.hg38.txt | 172 +++++++++--------- 27 files changed, 789 insertions(+), 789 deletions(-) diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed b/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed index 76f3fbec..7aa0ae8c 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed @@ -1,38 +1,38 @@ -chr1 870158 870178 ID=HMNR7_VWA1;MOTIFS=GGCGCGGAGC;STRUC= -chr1 57245935 57245973 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT,AAATG;STRUC= +chr1 870158 870178 ID=HMNR7_VWA1;MOTIFS=AGCGGCGCGG,GGCGCGGAGC;STRUC= +chr1 57245935 57245973 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT;STRUC= chr1 94266544 94266567 ID=OPDM5_ABCD3;MOTIFS=CCG;STRUC= chr1 148519695 148519738 ID=NIID_NOTCH2NLC;MOTIFS=CGG;STRUC= -chr1 154328121 154330802 ID=ADTKD_MUC1;MOTIFS=GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA;STRUC= -chr1 155728131 155728159 ID=NME_NAXE;MOTIFS=GGGCC;STRUC= -chr2 96703674 96703732 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT;STRUC= +chr1 154328121 154330802 ID=ADTKD_MUC1;MOTIFS=ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG;STRUC= +chr1 155728131 155728159 ID=NME_NAXE;MOTIFS=CCGGG,GGGCC;STRUC= +chr2 96703674 96703732 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT,GAAAT;STRUC= chr2 100563685 100563738 ID=FRA2A_AFF3;MOTIFS=CCG;STRUC= chr2 176581179 176581224 ID=SD5_HOXD13;MOTIFS=GCN;STRUC= chr2 191369982 191370024 ID=GDPAG_GLS;MOTIFS=CAG;STRUC= chr3 63956302 63956345 ID=SCA7_ATXN7;MOTIFS=CAG,CCG;STRUC= chr3 131917482 131917635 ID=DM2_CNBP;MOTIFS=CAGG,CAGA,CA;STRUC= -chr3 141687011 141687054 ID=BPES_FOXL2;MOTIFS=GCN;STRUC= -chr3 186521667 186521706 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA;STRUC= +chr3 141687011 141687054 ID=BPES_FOXL2;MOTIFS=NGC,GCN;STRUC= +chr3 186521667 186521706 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= chr4 3073603 3073723 ID=HD_HTT;MOTIFS=CAG,CCG;STRUC= -chr4 39318077 39318136 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,AGGGC,AAAGGG;STRUC= -chr4 41719745 41719805 ID=CCHS_PHOX2B;MOTIFS=GCN;STRUC= -chr4 162693303 162693405 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA;STRUC= -chr5 10295525 10295593 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA;STRUC= +chr4 39318077 39318136 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,CAGGG,AAAGGG;STRUC= +chr4 41719745 41719805 ID=CCHS_PHOX2B;MOTIFS=NGC,GCN;STRUC= +chr4 162693303 162693405 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= +chr5 10295525 10295593 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= chr5 147414733 147414780 ID=SCA12_PPP2R2B;MOTIFS=CTG;STRUC= chr5 178096748 178096792 ID=OPDM_FAM193B;MOTIFS=CCG;STRUC= chr6 13201716 13201843 ID=OPDM_TBC1D7;MOTIFS=CCG;STRUC= chr6 16200188 16200282 ID=SCA1_ATXN1;MOTIFS=CTG;STRUC= chr6 45257567 45257618 ID=CCD_RUNX2;MOTIFS=GCN;STRUC= chr6 171935458 171935569 ID=SCA17_TBP;MOTIFS=CAG;STRUC= -chr7 27335684 27335720 ID=HFG_HOXA13-III;MOTIFS=GCN;STRUC= -chr7 27335813 27335849 ID=HFG_HOXA13-II;MOTIFS=GCN;STRUC= -chr7 27335912 27335954 ID=HFG_HOXA13-I;MOTIFS=GCN;STRUC= +chr7 27335684 27335720 ID=HFG_HOXA13-III;MOTIFS=NGC,GCN;STRUC= +chr7 27335813 27335849 ID=HFG_HOXA13-II;MOTIFS=NGC,GCN;STRUC= +chr7 27335912 27335954 ID=HFG_HOXA13-I;MOTIFS=NGC,GCN;STRUC= chr7 56047900 56047939 ID=FRA7A_ZNF713;MOTIFS=CGG;STRUC= chr8 105716409 105716441 ID=OPDM1_LRP12;MOTIFS=CCG;STRUC= -chr8 119495247 119495353 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA,AAATG;STRUC= -chr9 27584063 27584155 ID=FTDALS1_C9orf72;MOTIFS=GGCCCC;STRUC= -chr9 81210818 81210861 ID=FRDA_FXN;MOTIFS=A,GAA;STRUC= +chr8 119495247 119495353 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA,GAAAT;STRUC= +chr9 27584063 27584155 ID=FTDALS1_C9orf72;MOTIFS=CCCCGG,GGCCCC;STRUC= +chr9 81210818 81210861 ID=FRDA_FXN;MOTIFS=A,GAA,AAG;STRUC= chr9 142886568 142886595 ID=HSAN-VIII_PRDM12;MOTIFS=CCG;STRUC= -chr9 145285333 145285861 ID=MODY8_CEL;MOTIFS=GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG;STRUC= +chr9 145285333 145285861 ID=MODY8_CEL;MOTIFS=GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG;STRUC= chr10 80695718 80695748 ID=OPML1_NUTM2B-AS1;MOTIFS=CGG;STRUC= chr11 119226662 119226696 ID=JBS_CBL;MOTIFS=CGG;STRUC= chr12 6947903 6947941 ID=DRPLA_ATN1;MOTIFS=CAG;STRUC= @@ -41,40 +41,40 @@ chr12 111575873 111575940 ID=SCA2_ATXN2;MOTIFS=CTG;STRUC= chr12 123532573 123532603 ID=OPDM4_RILPL1;MOTIFS=CGG;STRUC= chr13 69361213 69361270 ID=SCA8_ATXN8OS;MOTIFS=CTA,CTG;STRUC= chr13 99196358 99196404 ID=HPE5_ZIC2;MOTIFS=GCN;STRUC= -chr13 101377549 101377792 ID=SCA27B_FGF14;MOTIFS=GAA,GGA,CAG;STRUC= +chr13 101377549 101377792 ID=SCA27B_FGF14;MOTIFS=AAG,GAA,AGG,CAG;STRUC= chr14 17522488 17522519 ID=OPMD_PABPN1;MOTIFS=GCN;STRUC= chr14 86300519 86300603 ID=SCA3_ATXN3;MOTIFS=CTG;STRUC= chr15 20458510 20458536 ID=ALS1_NIPA1;MOTIFS=CGG;STRUC= chr15 32225152 32225178 ID=aFTLD-U_GOLGA8A;MOTIFS=CT,TTTC,CCTT,CCCTCT;STRUC= -chr15 86324038 86324057 ID=CHNG3_MIR7-2;MOTIFS=TTTG;STRUC= +chr15 86324038 86324057 ID=CHNG3_MIR7-2;MOTIFS=GTTT,TTTG;STRUC= chr15 87088402 87088452 ID=CPEO_POLG;MOTIFS=GCT,GTT,CTG;STRUC= chr16 17477909 17478002 ID=DBQD2_XYLT1;MOTIFS=GCC,CCG;STRUC= -chr16 24890366 24890430 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA;STRUC= -chr16 72284666 72284761 ID=SCA31_BEAN1;MOTIFS=TGGAA,TAGAA,AATAA;STRUC= +chr16 24890366 24890430 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= +chr16 72284666 72284761 ID=SCA31_BEAN1;MOTIFS=AATGG,AATAG,AATAA;STRUC= chr16 73638636 73638724 ID=SCA_THAP11;MOTIFS=CAG;STRUC= chr16 78605502 78605569 ID=SCA4_ZFHX3;MOTIFS=CCG;STRUC= chr16 93675723 93675776 ID=HDL2_JPH3;MOTIFS=CTG;STRUC= -chr17 17754961 17755053 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA;STRUC= -chr17 81047404 81047534 ID=RCPS_EIF4A3;MOTIFS=CCTCGCTGTGCCGCTGCCGA;STRUC= +chr17 17754961 17755053 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA,ATTTC,ATTTT;STRUC= +chr17 81047404 81047534 ID=RCPS_EIF4A3;MOTIFS=GCCGCTGCCGACCTCGCTGT,CCTCGCTGTGCCGCTGCCGA;STRUC= chr18 821235 821905 ID=CPUM_TYMS;MOTIFS=GATGGT;STRUC= chr18 55789233 55789288 ID=FECD3_TCF4;MOTIFS=CAG;STRUC= -chr19 4494212 4497342 ID=MRUPAV_PLIN4;MOTIFS=TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC;STRUC= +chr19 4494212 4497342 ID=MRUPAV_PLIN4;MOTIFS=GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT,TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC;STRUC= chr19 13333136 13333176 ID=SCA6_CACNA1A;MOTIFS=CTG;STRUC= chr19 14622655 14622692 ID=OPDM2_GIPC1;MOTIFS=CCG;STRUC= -chr19 18921630 18921645 ID=EDM1-PSACH_COMP;MOTIFS=GTC;STRUC= +chr19 18921630 18921645 ID=EDM1-PSACH_COMP;MOTIFS=CGT,GTC;STRUC= chr19 48597739 48597756 ID=DM1_DMPK;MOTIFS=CAG;STRUC= -chr20 2683189 2683248 ID=SCA36_NOP56;MOTIFS=GGCCTG,CGCCTG;STRUC= -chr20 4738606 4738705 ID=CJD_PRNP;MOTIFS=CCTCAGGGCGGTGGTGGCTGGGGGCAG,CCTCATGGTGGTGGCTGGGGGCAG,GGTGGTGGCTGGGGGCAGCCTCAT;STRUC= +chr20 2683189 2683248 ID=SCA36_NOP56;MOTIFS=GGCCTG,CGCCTG,CCTGGG;STRUC= +chr20 4738606 4738705 ID=CJD_PRNP;MOTIFS=CCTCAGGGCGGTGGTGGCTGGGGGCAG,CCTCATGGTGGTGGCTGGGGGCAG,AGCCTCATGGTGGTGGCTGGGGGC,GGTGGTGGCTGGGGGCAGCCTCAT;STRUC= chr21 42132054 42132091 ID=EPM1_CSTB;MOTIFS=CGCGGGGCGGGG;STRUC= chr22 20143615 20143660 ID=TOF_TBX1;MOTIFS=GCN;STRUC= chr22 38781587 38781680 ID=EPM_CSNK1E;MOTIFS=CCG;STRUC= chr22 46280059 46280134 ID=SCA10_ATXN10;MOTIFS=ATTCT;STRUC= -chrX 24597766 24597802 ID=PRTS_ARX;MOTIFS=GCN;STRUC= -chrX 24597886 24597934 ID=EIEE1_ARX;MOTIFS=GCN;STRUC= -chrX 30882677 30882751 ID=DMD_DMD;MOTIFS=TTC,T;STRUC= +chrX 24597766 24597802 ID=PRTS_ARX;MOTIFS=NGC,GCN;STRUC= +chrX 24597886 24597934 ID=EIEE1_ARX;MOTIFS=NGC,GCN;STRUC= +chrX 30882677 30882751 ID=DMD_DMD;MOTIFS=TTC,T,CTT;STRUC= chrX 65975147 65975250 ID=SBMA_AR;MOTIFS=CAG;STRUC= chrX 69887153 69887230 ID=XDP_TAF1;MOTIFS=AGAGGG;STRUC= chrX 135876774 135876804 ID=VACTERLX_ZIC3;MOTIFS=GCN;STRUC= -chrX 138816203 138816248 ID=XLID_SOX3;MOTIFS=GCN;STRUC= +chrX 138816203 138816248 ID=XLID_SOX3;MOTIFS=NGC,GCN;STRUC= chrX 146176677 146176769 ID=FXS_FMR1;MOTIFS=CGG;STRUC= chrX 146765190 146765342 ID=FRAXE_AFF2;MOTIFS=CCG;STRUC= diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed b/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed index 8114fb59..f9028103 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed @@ -1,10 +1,10 @@ #chrom start stop motif motif_len id -chr1 870158 870178 GGCGCGGAGC 10 HMNR7_VWA1 +chr1 870158 870178 AGCGGCGCGG 10 HMNR7_VWA1 chr1 57245970 57245973 GAAAT 5 SCA37_DAB1 chr1 94266544 94266567 CCG 3 OPDM5_ABCD3 chr1 148519695 148519738 CGG 3 NIID_NOTCH2NLC -chr1 154328121 154330802 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA 61 ADTKD_MUC1 -chr1 155728131 155728159 GGGCC 5 NME_NAXE +chr1 154328121 154330802 ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG 61 ADTKD_MUC1 +chr1 155728131 155728159 CCGGG 5 NME_NAXE chr2 96703674 96703677 AAATG 5 FAME2_STARD7 chr2 100563685 100563738 CCG 3 FRA2A_AFF3 chr2 176581179 176581224 GCN 3 SD5_HOXD13 @@ -14,12 +14,12 @@ chr3 63956333 63956345 CCG 3 SCA7_ATXN7_flank chr3 131917482 131917557 CAGG 4 DM2_CNBP chr3 131917557 131917597 CAGA 4 DM2_CNBP_flank chr3 131917597 131917635 CA 2 DM2_CNBP_flank -chr3 141687011 141687054 GCN 3 BPES_FOXL2 +chr3 141687011 141687054 NGC 3 BPES_FOXL2 chr3 186521702 186521706 TTTCA 5 FAME4_YEATS2 chr4 3073603 3073681 CAG 3 HD_HTT chr4 3073687 3073723 CCG 3 HD_HTT_flank chr4 39318132 39318136 AAGGG 5 CANVAS_RFC1 -chr4 41719745 41719805 GCN 3 CCHS_PHOX2B +chr4 41719745 41719805 NGC 3 CCHS_PHOX2B chr4 162693388 162693405 TTTCA 5 FAME7_RAPGEF2 chr5 10295585 10295593 TTTCA 5 FAME3_MARCHF6 chr5 147414733 147414780 CTG 3 SCA12_PPP2R2B @@ -28,13 +28,13 @@ chr6 13201716 13201843 CCG 3 OPDM_TBC1D7 chr6 16200188 16200282 CTG 3 SCA1_ATXN1 chr6 45257567 45257618 GCN 3 CCD_RUNX2 chr6 171935458 171935569 CAG 3 SCA17_TBP -chr7 27335684 27335720 GCN 3 HFG_HOXA13-III -chr7 27335813 27335849 GCN 3 HFG_HOXA13-II -chr7 27335912 27335954 GCN 3 HFG_HOXA13-I +chr7 27335684 27335720 NGC 3 HFG_HOXA13-III +chr7 27335813 27335849 NGC 3 HFG_HOXA13-II +chr7 27335912 27335954 NGC 3 HFG_HOXA13-I chr7 56047900 56047939 CGG 3 FRA7A_ZNF713 chr8 105716409 105716441 CCG 3 OPDM1_LRP12 chr8 119495347 119495353 TGAAA 5 FAME1_SAMD12 -chr9 27584063 27584155 GGCCCC 6 FTDALS1_C9orf72 +chr9 27584063 27584155 CCCCGG 6 FTDALS1_C9orf72 chr9 81210818 81210834 A 1 FRDA_FXN_flank chr9 81210834 81210861 GAA 3 FRDA_FXN chr9 142886568 142886595 CCG 3 HSAN-VIII_PRDM12 @@ -48,29 +48,29 @@ chr12 123532573 123532603 CGG 3 OPDM4_RILPL1 chr13 69361213 69361243 CTA 3 SCA8_ATXN8OS_flank chr13 69361243 69361270 CTG 3 SCA8_ATXN8OS chr13 99196358 99196404 GCN 3 HPE5_ZIC2 -chr13 101377549 101377792 GAA 3 SCA27B_FGF14 +chr13 101377549 101377792 AAG 3 SCA27B_FGF14 chr14 17522488 17522519 GCN 3 OPMD_PABPN1 chr14 86300519 86300603 CTG 3 SCA3_ATXN3 chr15 20458510 20458536 CGG 3 ALS1_NIPA1 chr15 32225152 32225178 CT 2 aFTLD-U_GOLGA8A -chr15 86324038 86324057 TTTG 4 CHNG3_MIR7-2 +chr15 86324038 86324057 GTTT 4 CHNG3_MIR7-2 chr15 87088402 87088408 GCT 3 CPEO_POLG_flank chr15 87088408 87088411 GTT 3 CPEO_POLG_flank chr15 87088411 87088452 GCT 3 CPEO_POLG chr16 17477909 17478002 GCC 3 DBQD2_XYLT1 chr16 24890416 24890430 TTTCA 5 FAME6_TNRC6A -chr16 72284666 72284761 TGGAA 5 SCA31_BEAN1 +chr16 72284666 72284761 AATGG 5 SCA31_BEAN1 chr16 73638636 73638724 CAG 3 SCA_THAP11 chr16 78605502 78605569 CCG 3 SCA4_ZFHX3 chr16 93675723 93675776 CTG 3 HDL2_JPH3 chr17 17755051 17755053 TTTCA 5 FAME8_RAI1 -chr17 81047404 81047534 CCTCGCTGTGCCGCTGCCGA 20 RCPS_EIF4A3 +chr17 81047404 81047534 GCCGCTGCCGACCTCGCTGT 20 RCPS_EIF4A3 chr18 821235 821905 GATGGT 6 CPUM_TYMS chr18 55789233 55789288 CAG 3 FECD3_TCF4 -chr19 4494212 4497342 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC 99 MRUPAV_PLIN4 +chr19 4494212 4497342 GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT 99 MRUPAV_PLIN4 chr19 13333136 13333176 CTG 3 SCA6_CACNA1A chr19 14622655 14622692 CCG 3 OPDM2_GIPC1 -chr19 18921630 18921645 GTC 3 EDM1-PSACH_COMP +chr19 18921630 18921645 CGT 3 EDM1-PSACH_COMP chr19 48597739 48597756 CAG 3 DM1_DMPK chr20 2683189 2683230 GGCCTG 6 SCA36_NOP56 chr20 2683230 2683248 CGCCTG 6 SCA36_NOP56_flank @@ -80,13 +80,13 @@ chr21 42132054 42132091 CGCGGGGCGGGG 12 EPM1_CSTB chr22 20143615 20143660 GCN 3 TOF_TBX1 chr22 38781587 38781680 CCG 3 EPM_CSNK1E chr22 46280059 46280134 ATTCT 5 SCA10_ATXN10 -chrX 24597766 24597802 GCN 3 PRTS_ARX -chrX 24597886 24597934 GCN 3 EIEE1_ARX +chrX 24597766 24597802 NGC 3 PRTS_ARX +chrX 24597886 24597934 NGC 3 EIEE1_ARX chrX 30882677 30882743 TTC 3 DMD_DMD chrX 30882743 30882751 T 1 DMD_DMD_flank chrX 65975147 65975250 CAG 3 SBMA_AR chrX 69887153 69887230 AGAGGG 6 XDP_TAF1 chrX 135876774 135876804 GCN 3 VACTERLX_ZIC3 -chrX 138816203 138816248 GCN 3 XLID_SOX3 +chrX 138816203 138816248 NGC 3 XLID_SOX3 chrX 146176677 146176769 CGG 3 FXS_FMR1 chrX 146765190 146765342 CCG 3 FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed.gz b/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed.gz index 4d84dd9f68f14fc057d5f1d7e3d9792cebe3ae3a..41a7179685aeb903988768423ba9919a387be942 100644 GIT binary patch delta 1752 zcmV;}1}FKd4yz6kABzYC000000RIL6LPG)o4v`Tre~oVGw0%VJS}gwYkG~MSQ)qm> zIS+A4)0E@_sNga$n=9Y`9`Uz7#5Inz2~HirFvS=%7}!%}SB7}%X3%7<(Yk|mkkSMU z_jQ;}iKjGh6H$AE!3NXGu?KlO!*ab2Wx3~cq2-pkZ8VcdYQ)Mbf|z1Tahg(+Kehv)2(_0HfR?Vx4TA2QJi60Z?VL@ zVAqV&)~jF|ARccYw}=$Wa9!!tRYWw-c*mWBe^!vP$a{)ZVty=isKkcJh+4@iezt0u zb=j`B_13B&HDrtQ1kI3gY^C~Is=x2$&wu^??|*)JKp)^pY#x&k`EX#H6tH%J7ywOkx=OJYv_C2e z#14l3x9sX!K&PDxDpc@FiI-&7p8hsae|dbrW$Y$tAC!IqE66#g2nNPDpKZy@mTc^@ zBl6nX4o;S9%ylWsIAA%Q<8l=2*s{0W!IAC@XHT)@OI-0OdaI04)WL=xo%R%U`7vG| z+{M7w8Asf~***s@!;Q+9_0l{C=$tXi-~s%pNxp3Hie2@nw3EqP!LZEgG^H%pe-@fj zo-&YvEvC=Ydm<8B`LR{3{27y87!preJ=U?ym!qQ=L0c!@V@uMcuoVCoW`~o z0%#pHA<5mbCXlEYQmikUP$7zby%sHmqg_vqY&hkO6 zXniO{2rzH&Yy4K$+jTi=g3QdwVYb7wqxTeTNii)EDZmtPrm`9DrT4$_yQtlP5wTn2N!J7roK`Bw$# zX94w51Wm}KjP4LnYFu@>01}+n8B5ypsXjqK3N_mrtOcy<-J9=Y$)|q@c8$IBXuNA+ zz3St2UgwhbdZsDpIZ5tdvP-K)n3$aGDg zy^sE5K*oSh%S5PTjFWOF&>-s|EvNNWl&?F5D#jP_=W<1AWZ6K9K);qXTFD&oAfsWK zBMNhJyVx%tL&oS@S*1*cK?6~#rj|Av4luOPph76Q)ECIT8 zRWm+nsDT5cN2UX;qy}~WNOAcV_p*&GUTDzhFgR@*cx8H1Qo8JAz1$xyo6>-mY+#lo zqLNnTQ(Iiw;yRtj+89~<6pZl)gwLA;mpN|fGTJU{r%^lRgZzI>hJ;ksb)~IU!~i<4 zf|azh)2Coq#yDf(>PlcGg$_G-&97!YC2Ud4N}H6z);Oie)64tYyl$R0WSnI76)gI` zq*5Bn_P#u*7w0gr(Z+YGyKOuQbS1ZW>VO=xv0gWDqRR<0q>9mXG$1=JWggu}#4y~4Cr;+67&)&px+g7BmV$i{` zb_YprAg882{8;ul56uoh&UmdXH$WpHpdC|S%tK8mQ~1wcf4c6=QAl~o3@S(%0)%lJ zVi_OH$2tffLIaC(bgPYPZnIF6_tsc{P&k6nWsl3-x9flPTGnltkNToE_91lFs$O^MHDobk;VILtPuAb<2B++vCOZzFUEhkvw0| zXZ+Auc~7!2S8!UETTMjgp$zlzBFT3R3_=Dxl#hOUfHlDx)UE*;bxhvV9827~6uft6 zxPk1XajH~N#=>J*OO7Kp5!rZWoztEC^HB3GrhMLR4>{QIA$u=tkPp(nxA<|p4rRYT u4pCclR+`+%+2_=K>}8xce0t6r462i)e9wI3ah809e)}&%@u>up3I`y!@OPU4 delta 1732 zcmV;#20Qty4yq0jABzYC000000RIL6LPG)o4UrKqe}h5u*gm3oEf)Xy$6pBEDKx$o z&cigNDNR$H5}<<1ylk#~`+LOS{t(wV(k3`{0K*hx%wS+okzE<$shdHQwMOd>)PPt(!6L*Ve;vyFp73~yapaMoWCF;u2@EXrxh(N3am-TGQk87gtp+a`f=+Mapl6Bep=1ygtM$}c-fX)zU?;98lz0&(S4`q#~wU4d_ ze{WeS5h~PCI9kfJrELWpAf=5rs5|p4B>#m*!blSO~LItnd#*|fifBM@%M)1 z@$%p<2DZ*P;ttOCHE=5>6#t<|Jb+U*?o#^-c0ql%5IK=vrg0e4S&8(-DW0$XTNC zlyj`1SZd2LK?pE!?`!;4*4uSCI$DCv%*eLGbD;MWZAmdL5#yn1OJt4d5a{J-p)y_9 z9>)Wq4}_Lg2W?==vLw&LjBo26)1{>QydHy-Zvz+rBw+*e~?R8eYK53!%skDx#bv}13T zZ6$ME!cophnSS{%D@Q)Z3CKbEGL&^&)}QNOuBB6^)scTyfqqs{A4SlFOv>mE0j0%N zmp_06=XJ)C_I#>O5RgL6wgzhftNQTf`&jbne}P?N?>rjs8d$ITe4W?1q<#5{=PqhX zUMUkAAZw6q(K8`b^?X1jGK1w{EGz0D!*zruS4H>g@d4U=M5XS1j2{Cs26S2$LM3CI z)GH^1?1QwN)>lz}?i8vRU&Np56{(SB11SRiTGnVKbHszpv2IW6c`xJraieZ}YY3g9 ze<#^bF;bWB=a~0uYhdMAPTX4hBLTV$<&yJ@Arqw>OMq@e)r^l8YT&@=k?8;nQ5Gsl zr*fAv*^~yfWCOD#5tXzupZem;7T4)K*3QW0 zr(ldfAbdU?xXf`&m(g}vJB`{gALL&$e$;ouVB$XhifR?`|_Y(ox{LJ8{es( zw(+RYmE6{;19Hs9dfmX$u|+TABoQzozg@4_7uO?q(BW6^iyDwRJQwJz+Dv1Wf27^2 z1BZ`v+Qsr?8C?s)9#9#FR!g$eNOJV&Xkxi-D^gc6=wMj8gCsYQTT`EYEPI@XW(Oc= zyjGSQpfS*{E?{BILro}C_|IQ|y6($WNO{Q&Do7XtgmD{U86V5XJ_sK|1B*RHKHO%Z zChx7W{-AIKq01hZw{O?$wXEAPe;@5dZO9w0t5vAhd)%&iLep+%Q=X#SzGOrR=KJv7 zmvpWdo(I&+q_eg`yVS`Xty|tp+#WBM_uUG7%;focKI4bR%6pQ7xq{QO-D)B_4`rB# z7fF6=U=T9kp?r+n1FQ+opmq(&sAKV-=2+s^rQp3o!wuvljZ=*>79PV|PI4TviO9w~ z>zwZ7pO>0%G3E1id&tFx57~R!gM5%aw%Fr#9m;-x9HO@9taQ1Nv(Kgb*vmL?`1G7L a7*r=o`JVa6<1G0I{q|oJ?6cRC3kM+NHE)*y diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed.gz.tbi b/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed.gz.tbi index 8475a70387f5e7814999dd994ab5ea6a67bb1098..e449153c3e15288e9922a3a24323df71a6461139 100644 GIT binary patch delta 218 zcmaE&^h9aHettWbM1j_e?;8~+y!j_4a*~UyVa*Xime>G>*Z{^N+i83OAsb59EM{m@ zvav~0Og|%NZoHG%=j!W49+Ol&CvA{VLG)x#@R9S-%q`g29v&(D#*SIZ*Le(JtFYt`{o)6@CaO+$AFhMM-R3VS^(&9-du zs(15VC6p~c7I@Xu{`u2Q%OdZeU9qbF>UTX)kX6X&_mgQr?vwrBUw;QNCNJR=W`F;S ObMmWQGbbMuSPTI8wQ#in delta 219 zcmaE&^h9aHettU^MuFCc^^O7_dDokiA{`kgOtxrRz|FOQTgzeI@k@+dTiv1-v`-v!@Yh{hlTrL1+EoXDhI|L1nx -chr1 57832715 57832793 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT,AAATG;STRUC= +chr1 1371178 1371198 ID=HMNR7_VWA1;MOTIFS=AGCGGCGCGG,GGCGCGGAGC;STRUC= +chr1 57832715 57832793 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT;STRUC= chr1 94883977 94884000 ID=OPDM5_ABCD3;MOTIFS=CCG;STRUC= chr1 145209323 145209354 ID=NIID_NOTCH2NLC;MOTIFS=CGG;STRUC= -chr1 155160981 155162030 ID=ADTKD_MUC1;MOTIFS=GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA;STRUC= -chr1 156561557 156561575 ID=NME_NAXE;MOTIFS=GGGCC;STRUC= -chr2 96862804 96862862 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT;STRUC= +chr1 155160981 155162030 ID=ADTKD_MUC1;MOTIFS=ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG;STRUC= +chr1 156561557 156561575 ID=NME_NAXE;MOTIFS=CCGGG,GGGCC;STRUC= +chr2 96862804 96862862 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT,GAAAT;STRUC= chr2 100721260 100721286 ID=FRA2A_AFF3;MOTIFS=CCG;STRUC= chr2 176957786 176957831 ID=SD5_HOXD13;MOTIFS=GCN;STRUC= chr2 191745598 191745646 ID=GDPAG_GLS;MOTIFS=CAG;STRUC= chr3 63898360 63898403 ID=SCA7_ATXN7;MOTIFS=CAG,CCG;STRUC= chr3 128891419 128891577 ID=DM2_CNBP;MOTIFS=CAGG,CAGA,CA;STRUC= -chr3 138664861 138664904 ID=BPES_FOXL2;MOTIFS=GCN;STRUC= -chr3 183429975 183430014 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA;STRUC= +chr3 138664861 138664904 ID=BPES_FOXL2;MOTIFS=NGC,GCN;STRUC= +chr3 183429975 183430014 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= chr4 3076603 3076696 ID=HD_HTT;MOTIFS=CAG,CCG;STRUC= -chr4 39350044 39350103 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,AGGGC,AAAGGG;STRUC= -chr4 41747989 41748049 ID=CCHS_PHOX2B;MOTIFS=GCN;STRUC= -chr4 160263678 160263770 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA;STRUC= -chr5 10356455 10356523 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA;STRUC= +chr4 39350044 39350103 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,CAGGG,AAAGGG;STRUC= +chr4 41747989 41748049 ID=CCHS_PHOX2B;MOTIFS=NGC,GCN;STRUC= +chr4 160263678 160263770 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= +chr5 10356455 10356523 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= chr5 146258290 146258322 ID=SCA12_PPP2R2B;MOTIFS=CTG;STRUC= chr5 176981490 176981532 ID=OPDM_FAM193B;MOTIFS=CCG;STRUC= chr6 13328708 13328835 ID=OPDM_TBC1D7;MOTIFS=CCG;STRUC= chr6 16327864 16327955 ID=SCA1_ATXN1;MOTIFS=CTG;STRUC= chr6 45390487 45390538 ID=CCD_RUNX2;MOTIFS=GCN;STRUC= chr6 170870994 170871105 ID=SCA17_TBP;MOTIFS=CAG;STRUC= -chr7 27239297 27239351 ID=HFG_HOXA13-III;MOTIFS=GCN;STRUC= -chr7 27239444 27239480 ID=HFG_HOXA13-II;MOTIFS=GCN;STRUC= -chr7 27239543 27239585 ID=HFG_HOXA13-I;MOTIFS=GCN;STRUC= +chr7 27239297 27239351 ID=HFG_HOXA13-III;MOTIFS=NGC,GCN;STRUC= +chr7 27239444 27239480 ID=HFG_HOXA13-II;MOTIFS=NGC,GCN;STRUC= +chr7 27239543 27239585 ID=HFG_HOXA13-I;MOTIFS=NGC,GCN;STRUC= chr7 55955293 55955332 ID=FRA7A_ZNF713;MOTIFS=CGG;STRUC= chr8 105601198 105601227 ID=OPDM1_LRP12;MOTIFS=CCG;STRUC= -chr8 119379051 119379157 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA,AAATG;STRUC= -chr9 27573482 27573544 ID=FTDALS1_C9orf72;MOTIFS=GGCCCC;STRUC= -chr9 71652186 71652220 ID=FRDA_FXN;MOTIFS=A,GAA;STRUC= +chr8 119379051 119379157 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA,GAAAT;STRUC= +chr9 27573482 27573544 ID=FTDALS1_C9orf72;MOTIFS=CCCCGG,GGCCCC;STRUC= +chr9 71652186 71652220 ID=FRDA_FXN;MOTIFS=A,GAA,AAG;STRUC= chr9 133556992 133557028 ID=HSAN-VIII_PRDM12;MOTIFS=CCG;STRUC= -chr9 135946564 135947124 ID=MODY8_CEL;MOTIFS=GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG;STRUC= +chr9 135946564 135947124 ID=MODY8_CEL;MOTIFS=GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG;STRUC= chr10 81586139 81586160 ID=OPML1_NUTM2B-AS1;MOTIFS=CGG;STRUC= chr11 119076999 119077033 ID=JBS_CBL;MOTIFS=CGG;STRUC= chr12 7045879 7045938 ID=DRPLA_ATN1;MOTIFS=CAG;STRUC= @@ -41,40 +41,40 @@ chr12 112036753 112036823 ID=SCA2_ATXN2;MOTIFS=CTG;STRUC= chr12 124018267 124018297 ID=OPDM4_RILPL1;MOTIFS=CGG;STRUC= chr13 70713485 70713561 ID=SCA8_ATXN8OS;MOTIFS=CTA,CTG;STRUC= chr13 100637702 100637748 ID=HPE5_ZIC2;MOTIFS=GCN;STRUC= -chr13 102813924 102814076 ID=SCA27B_FGF14;MOTIFS=GAA,GGA,CAG;STRUC= +chr13 102813924 102814076 ID=SCA27B_FGF14;MOTIFS=AAG,GAA,AGG,CAG;STRUC= chr14 23790681 23790712 ID=OPMD_PABPN1;MOTIFS=GCN;STRUC= chr14 92537354 92537396 ID=SCA3_ATXN3;MOTIFS=CTG;STRUC= chr15 23086363 23086389 ID=ALS1_NIPA1;MOTIFS=CGG;STRUC= chr15 34711626 34711652 ID=aFTLD-U_GOLGA8A;MOTIFS=CT,TTTC,CCTT,CCCTCT;STRUC= -chr15 89112664 89112683 ID=CHNG3_MIR7-2;MOTIFS=TTTG;STRUC= +chr15 89112664 89112683 ID=CHNG3_MIR7-2;MOTIFS=GTTT,TTTG;STRUC= chr15 89876810 89876860 ID=CPEO_POLG;MOTIFS=GCT,GTT,CTG;STRUC= chr16 17564764 17564779 ID=DBQD2_XYLT1;MOTIFS=GCC,CCG;STRUC= -chr16 24624759 24624853 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA;STRUC= -chr16 66524299 66524369 ID=SCA31_BEAN1;MOTIFS=TGGAA,TAGAA,AATAA;STRUC= +chr16 24624759 24624853 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= +chr16 66524299 66524369 ID=SCA31_BEAN1;MOTIFS=AATGG,AATAG,AATAA;STRUC= chr16 67876765 67876853 ID=SCA_THAP11;MOTIFS=CAG;STRUC= chr16 72821593 72821657 ID=SCA4_ZFHX3;MOTIFS=CCG;STRUC= chr16 87637888 87637935 ID=HDL2_JPH3;MOTIFS=CTG;STRUC= -chr17 17711672 17711774 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA;STRUC= -chr17 78120808 78120938 ID=RCPS_EIF4A3;MOTIFS=CCTCGCTGTGCCGCTGCCGA;STRUC= +chr17 17711672 17711774 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA,ATTTC,ATTTT;STRUC= +chr17 78120808 78120938 ID=RCPS_EIF4A3;MOTIFS=GCCGCTGCCGACCTCGCTGT,CCTCGCTGTGCCGCTGCCGA;STRUC= chr18 666891 667632 ID=CPUM_TYMS;MOTIFS=GATGGT;STRUC= chr18 53253384 53253460 ID=FECD3_TCF4;MOTIFS=CAG;STRUC= -chr19 4510739 4513671 ID=MRUPAV_PLIN4;MOTIFS=TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC;STRUC= +chr19 4510739 4513671 ID=MRUPAV_PLIN4;MOTIFS=GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT,TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC;STRUC= chr19 13318672 13318712 ID=SCA6_CACNA1A;MOTIFS=CTG;STRUC= chr19 14606853 14606887 ID=OPDM2_GIPC1;MOTIFS=CCG;STRUC= -chr19 18896844 18896860 ID=EDM1-PSACH_COMP;MOTIFS=GTC;STRUC= +chr19 18896844 18896860 ID=EDM1-PSACH_COMP;MOTIFS=CGT,GTC;STRUC= chr19 46273462 46273524 ID=DM1_DMPK;MOTIFS=CAG;STRUC= -chr20 2633378 2633421 ID=SCA36_NOP56;MOTIFS=GGCCTG,CGCCTG;STRUC= -chr20 4680016 4680139 ID=CJD_PRNP;MOTIFS=CCTCAGGGCGGTGGTGGCTGGGGGCAG,CCTCATGGTGGTGGCTGGGGGCAG,GGTGGTGGCTGGGGGCAGCCTCAT;STRUC= +chr20 2633378 2633421 ID=SCA36_NOP56;MOTIFS=GGCCTG,CGCCTG,CCTGGG;STRUC= +chr20 4680016 4680139 ID=CJD_PRNP;MOTIFS=CCTCAGGGCGGTGGTGGCTGGGGGCAG,CCTCATGGTGGTGGCTGGGGGCAG,AGCCTCATGGTGGTGGCTGGGGGC,GGTGGTGGCTGGGGGCAGCCTCAT;STRUC= chr21 45196323 45196360 ID=EPM1_CSTB;MOTIFS=CGCGGGGCGGGG;STRUC= chr22 19754285 19754330 ID=TOF_TBX1;MOTIFS=GCN;STRUC= chr22 38713287 38713380 ID=EPM_CSNK1E;MOTIFS=CCG;STRUC= chr22 46191234 46191304 ID=SCA10_ATXN10;MOTIFS=ATTCT;STRUC= -chrX 25031646 25031682 ID=PRTS_ARX;MOTIFS=GCN;STRUC= -chrX 25031766 25031814 ID=EIEE1_ARX;MOTIFS=GCN;STRUC= -chrX 31302674 31302730 ID=DMD_DMD;MOTIFS=TTC,T;STRUC= +chrX 25031646 25031682 ID=PRTS_ARX;MOTIFS=NGC,GCN;STRUC= +chrX 25031766 25031814 ID=EIEE1_ARX;MOTIFS=NGC,GCN;STRUC= +chrX 31302674 31302730 ID=DMD_DMD;MOTIFS=TTC,T,CTT;STRUC= chrX 66765158 66765261 ID=SBMA_AR;MOTIFS=CAG;STRUC= chrX 70672904 70672981 ID=XDP_TAF1;MOTIFS=AGAGGG;STRUC= chrX 136648985 136649015 ID=VACTERLX_ZIC3;MOTIFS=GCN;STRUC= -chrX 139586481 139586526 ID=XLID_SOX3;MOTIFS=GCN;STRUC= +chrX 139586481 139586526 ID=XLID_SOX3;MOTIFS=NGC,GCN;STRUC= chrX 146993567 146993629 ID=FXS_FMR1;MOTIFS=CGG;STRUC= chrX 147582124 147582273 ID=FRAXE_AFF2;MOTIFS=CCG;STRUC= diff --git a/data/catalogs/STRchive-disease-loci.hg19.atarva.bed b/data/catalogs/STRchive-disease-loci.hg19.atarva.bed index 376471bb..26e5c179 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.atarva.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.atarva.bed @@ -1,10 +1,10 @@ #chrom start stop motif motif_len id -chr1 1371178 1371198 GGCGCGGAGC 10 HMNR7_VWA1 +chr1 1371178 1371198 AGCGGCGCGG 10 HMNR7_VWA1 chr1 57832750 57832793 GAAAT 5 SCA37_DAB1 chr1 94883977 94884000 CCG 3 OPDM5_ABCD3 chr1 145209323 145209354 CGG 3 NIID_NOTCH2NLC -chr1 155160981 155162030 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA 61 ADTKD_MUC1 -chr1 156561557 156561575 GGGCC 5 NME_NAXE +chr1 155160981 155162030 ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG 61 ADTKD_MUC1 +chr1 156561557 156561575 CCGGG 5 NME_NAXE chr2 96862804 96862807 AAATG 5 FAME2_STARD7 chr2 100721260 100721286 CCG 3 FRA2A_AFF3 chr2 176957786 176957831 GCN 3 SD5_HOXD13 @@ -14,12 +14,12 @@ chr3 63898391 63898403 CCG 3 SCA7_ATXN7_flank chr3 128891419 128891499 CAGG 4 DM2_CNBP chr3 128891499 128891539 CAGA 4 DM2_CNBP_flank chr3 128891539 128891577 CA 2 DM2_CNBP_flank -chr3 138664861 138664904 GCN 3 BPES_FOXL2 +chr3 138664861 138664904 NGC 3 BPES_FOXL2 chr3 183430010 183430014 TTTCA 5 FAME4_YEATS2 chr4 3076603 3076654 CAG 3 HD_HTT chr4 3076660 3076696 CCG 3 HD_HTT_flank chr4 39350099 39350103 AAGGG 5 CANVAS_RFC1 -chr4 41747989 41748049 GCN 3 CCHS_PHOX2B +chr4 41747989 41748049 NGC 3 CCHS_PHOX2B chr4 160263763 160263770 TTTCA 5 FAME7_RAPGEF2 chr5 10356515 10356523 TTTCA 5 FAME3_MARCHF6 chr5 146258290 146258322 CTG 3 SCA12_PPP2R2B @@ -28,13 +28,13 @@ chr6 13328708 13328835 CCG 3 OPDM_TBC1D7 chr6 16327864 16327955 CTG 3 SCA1_ATXN1 chr6 45390487 45390538 GCN 3 CCD_RUNX2 chr6 170870994 170871105 CAG 3 SCA17_TBP -chr7 27239297 27239351 GCN 3 HFG_HOXA13-III -chr7 27239444 27239480 GCN 3 HFG_HOXA13-II -chr7 27239543 27239585 GCN 3 HFG_HOXA13-I +chr7 27239297 27239351 NGC 3 HFG_HOXA13-III +chr7 27239444 27239480 NGC 3 HFG_HOXA13-II +chr7 27239543 27239585 NGC 3 HFG_HOXA13-I chr7 55955293 55955332 CGG 3 FRA7A_ZNF713 chr8 105601198 105601227 CCG 3 OPDM1_LRP12 chr8 119379151 119379157 TGAAA 5 FAME1_SAMD12 -chr9 27573482 27573544 GGCCCC 6 FTDALS1_C9orf72 +chr9 27573482 27573544 CCCCGG 6 FTDALS1_C9orf72 chr9 71652186 71652202 A 1 FRDA_FXN_flank chr9 71652202 71652220 GAA 3 FRDA_FXN chr9 133556992 133557028 CCG 3 HSAN-VIII_PRDM12 @@ -48,29 +48,29 @@ chr12 124018267 124018297 CGG 3 OPDM4_RILPL1 chr13 70713485 70713515 CTA 3 SCA8_ATXN8OS_flank chr13 70713515 70713561 CTG 3 SCA8_ATXN8OS chr13 100637702 100637748 GCN 3 HPE5_ZIC2 -chr13 102813924 102814076 GAA 3 SCA27B_FGF14 +chr13 102813924 102814076 AAG 3 SCA27B_FGF14 chr14 23790681 23790712 GCN 3 OPMD_PABPN1 chr14 92537354 92537396 CTG 3 SCA3_ATXN3 chr15 23086363 23086389 CGG 3 ALS1_NIPA1 chr15 34711626 34711652 CT 2 aFTLD-U_GOLGA8A -chr15 89112664 89112683 TTTG 4 CHNG3_MIR7-2 +chr15 89112664 89112683 GTTT 4 CHNG3_MIR7-2 chr15 89876810 89876816 GCT 3 CPEO_POLG_flank chr15 89876816 89876819 GTT 3 CPEO_POLG_flank chr15 89876819 89876860 GCT 3 CPEO_POLG chr16 17564764 17564779 GCC 3 DBQD2_XYLT1 chr16 24624809 24624853 TTTCA 5 FAME6_TNRC6A -chr16 66524299 66524369 TGGAA 5 SCA31_BEAN1 +chr16 66524299 66524369 AATGG 5 SCA31_BEAN1 chr16 67876765 67876853 CAG 3 SCA_THAP11 chr16 72821593 72821657 CCG 3 SCA4_ZFHX3 chr16 87637888 87637935 CTG 3 HDL2_JPH3 chr17 17711762 17711774 TTTCA 5 FAME8_RAI1 -chr17 78120808 78120938 CCTCGCTGTGCCGCTGCCGA 20 RCPS_EIF4A3 +chr17 78120808 78120938 GCCGCTGCCGACCTCGCTGT 20 RCPS_EIF4A3 chr18 666891 667632 GATGGT 6 CPUM_TYMS chr18 53253384 53253460 CAG 3 FECD3_TCF4 -chr19 4510739 4513671 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC 99 MRUPAV_PLIN4 +chr19 4510739 4513671 GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT 99 MRUPAV_PLIN4 chr19 13318672 13318712 CTG 3 SCA6_CACNA1A chr19 14606853 14606887 CCG 3 OPDM2_GIPC1 -chr19 18896844 18896860 GTC 3 EDM1-PSACH_COMP +chr19 18896844 18896860 CGT 3 EDM1-PSACH_COMP chr19 46273462 46273524 CAG 3 DM1_DMPK chr20 2633378 2633403 GGCCTG 6 SCA36_NOP56 chr20 2633403 2633421 CGCCTG 6 SCA36_NOP56_flank @@ -80,13 +80,13 @@ chr21 45196323 45196360 CGCGGGGCGGGG 12 EPM1_CSTB chr22 19754285 19754330 GCN 3 TOF_TBX1 chr22 38713287 38713380 CCG 3 EPM_CSNK1E chr22 46191234 46191304 ATTCT 5 SCA10_ATXN10 -chrX 25031646 25031682 GCN 3 PRTS_ARX -chrX 25031766 25031814 GCN 3 EIEE1_ARX +chrX 25031646 25031682 NGC 3 PRTS_ARX +chrX 25031766 25031814 NGC 3 EIEE1_ARX chrX 31302674 31302722 TTC 3 DMD_DMD chrX 31302722 31302730 T 1 DMD_DMD_flank chrX 66765158 66765261 CAG 3 SBMA_AR chrX 70672904 70672981 AGAGGG 6 XDP_TAF1 chrX 136648985 136649015 GCN 3 VACTERLX_ZIC3 -chrX 139586481 139586526 GCN 3 XLID_SOX3 +chrX 139586481 139586526 NGC 3 XLID_SOX3 chrX 146993567 146993629 CGG 3 FXS_FMR1 chrX 147582124 147582273 CCG 3 FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.hg19.atarva.bed.gz b/data/catalogs/STRchive-disease-loci.hg19.atarva.bed.gz index e428e60fa43cabfb99c8f246a3e5adc599018639..a98b1bf283ff5efa91c1f3a6ff731ccc67edb69a 100644 GIT binary patch delta 1843 zcmV-32h8}t4!jP3ABzYC000000RIL6LPG)o9|wh(+m7S75kQ}7|D^#|70DtWY08ql zW66rdo_OYkSS+vzkRSo}{~rQgWP7p$q|q&&wvS2_FGceAfBcDnEQsP-=^cbAPE#xw zKn0h1*(87cd%$0QFSZfKBt~`c3{wapfq^{+mOO-0H@$X$j=e|)t(1ZoCt$d*!)$Um z#etiM+K}>0e2~#lp3bmbuR~t$DPCx~#cmseR(aQn6e|cZ#2BV2#`0ILQ%tdZSpO|Q zp-_m^l%`a^NKX|kLKxDQAM4Aih-V42T(36FC%6*x4`BKr9NqEl2+)*%0XV9Z#B$1Z`a#; zYgJ$nCCJ*D1_lkp6k1eQqq_T^|M=H$|NiIu4aIAJVDVLr=M7y&r7VjmI!HU1;sOkp z^=$I@Yow=14%w4#q?K3*;dnba<#8HOS6KtGcx4?n5TqXTzRp8l!)fj9tHG0DDFd>B zx39W#t!o>@28fljY`KH;y}`n5$GlwEK7S0b7DP+7gCU3s2!0#WZTR@i)BQFD7rN{` zB3Y|{J2-|Zr34z{Wtlk7*G)d?={m8ljK~HSrkH>suIGDR@3-l#6UFbpZB7{I_7=CjRd*`f_yw$_j) z?;Ei75JIYxvGfDx(>W|hU@K0nSks^cj>j;6$mtSRJVYT!XgoKtMtXkQW6=5gczJLa zomdvS>xQ4V=6!E|jP1+B3Na zbk19+b%&at%g{X!Gk&alh?gAi^LjLYh9zNbsevJVB;FbGFt1;VMo{9AQ~CgbvIb7Q zrz!6#Y=blxl@da->L6!(UB@j@$YC70n_|5Yaki6V?0IP)OupP710JSF(Z+b!$!go{ zISp%=@^ss)sDLapi>swpTst>SVC%%OdAYB~zNtm2B_YJ0sIR0z3bb`fp>3sqW+>w} zl%=E0*!ED5euvVq5cS6Y%P%DY$QcY)F$&#ctFSNky_IpZjm~PKny@Xm)TjXoyAUGX;`L`_D}11&*S}Zqi#CZtQ4vz z{g@+l{&o&&ueJtOVy)sB&+Fy>X!|J*Xcbu&%Lj@2;awzeVV%xneHjC&*x0h^ zX+S@&cbw<2#mi{BtSuQwUUg!6ONB^zU02##MNCZ=4t0?$8K$>nn8z@q=jz&$SjmZU z4IGhp5MlG^R!{Fw37ygkL_8EL*71@2la5GoG9;gBSd9h z;~yobHckadh<({^R$zm@`7h&CGGIXdxL&VsuGYGPQ$KUxRHM0rGe1$AX^c09JU8Id zmdA!#LjJyt(vq+PRK}`$VroS6z6fEyZ7Wh&(X)6o_#}Z;&P~04`!Vlf9-8fe#0YCg z4UE=s98_RTL)}nD|DQkqknT&_MWV9}ND%};7`Gwh@v*$-fqw}NNR`7|eYxf~3z?0v z-cut{d>_8K z9MAR0^8hl6v8-f&4IK48TDP?4usz-^vE6cbq?+^fe8!i?$cn7!20{cW)s5&p<<)MFy4NyjTm!Ptkz4B7t1kK1*~`~7h$h}wG3rkvR-Svrm&<73a` hyd6*1VjPt|YDB`X{mA27_9OJme*ranJKvKZ2O#YZl|BFf delta 1846 zcmV-62g&%n4!;h6ABzYC000000RIL6LPG)oA_s++(XQht5Y+eplN`U=n9KM20D)RKUTO6H6V^shiP%hQLu|2WyOhoF`y-EW_+- zI^}_zh}MybOni{DkSA5|*XvO4kCHERzvXT_hu*}{i3}TnEMUCp0=7#x7{NUV_lHAmyrYHq|UG|w^UQP zW7nJsF-Witf{|X?xF)1jhwDP8t|F3QFOE9}V#r11Ehnm}JnwX-DzWS`!$=2(k^XfkOwmq*iO#YYmUB{_(Hh{{7E?_Z!q};PKUrj~lv*#&{MnbdU`& zOEf-U13~JQK9+f?OFAumd@V$B zEE;sl=8`k@E>GQ_{TS4^XJ2-+|fZ(^W+=fqI>h!ox zDWoochloht+YW)Dlrn*Ze7R3t=j$q;=)(=-gGOWn4^u9{keBnLE|1&v_7dDn>JVWn z|JRNky!qd{ch3Wg^H#j=AW^5L#_j2E1J&oZTfuIU^<;yI9o*itoWQ}D=CiNmzGk1g z?7bsPF*aZyp-lR6ThhULI;ZeUtu_v}ekDe%-UKUM?x8>Bz)c z<)Zm`Dkbg7SZgaOLyR!5UzYT>F1PFbXbCc7@InoAT7FD6UvtX$gy~SNH~Y(N5aexf zp*mfcp2;I%2+;>?J80+@4axH`RT zV-FC}HE`;0n(9{4I!JTT7$GEY4hp`vbzBprn#Pg4DLy)p;5!Azo|m@4)yv~K;9*7- z>s$<-y!E}E)3BtePPc6r4UlJM343V`>*B6s>i3D`>;17f|E8+4mxPdiqP~)U0vWJA z7=yl*miG`&x=QNn*ME&1{hlTuf&A-Gm-W8oR=AE-pXmeLx)YArQjOyzH!juEswF5(-XVKL69j(0}jW=<-E)_Z}-o5 z?xOV=k=Qn1V?n1y-wUDH*9SCzDl_Pk5VVmEBEHg+?4rDTI=>b4eSBo4?!Cs30V#xt zR>Qnej9Ra*5IPU?{j~fb#zSZO!7!<$ydV_r4m)j|t@9N8HYV*@k{ zNdpxeJ-@|u^Q_}&Y>)E)s)pK!zXz!nh8pj?epR9>kZ>fK1;a zA8xad**PC0H4;PNm|0u8fBJg8UhA?B^U+>p7p-?1EJ}iOv{+AEZ9~(}XH%Jyp1*WN z3FgP}E!2G8k33I*AZHkLC2J6lP5H8xt)}()W{K}sz$4Y1ujezqG)6Y$#WoNkDEq2E z%tIaK;Z4%t8nA%^cqp%Nd+vKJhb}ZA|B<{;bE;`=k{Bh^dZqV+Md(=*#no{l5P k_}uC^ugBA^xIp@j8jHX-Ht%#$xC=Wl>hbv7%LVFD#zJY?@k@{f2@-JaJ_J;}W>l&)< zSXFOhRV|h^RO_*-He*%YENZBhV>O9CtZJC#e~eZ2xT<0DulaG+{YxE9=BfE{)oDN9 z`&A8-{@bytUN35x?6DlH>T>y!@e>U0&mBpUB*}yK>xP$;u>lwq^hRmJWquo3#<#cE`Z_0yG(7yC#L;o@!GUjj4h>ZWyt7I?m97Iw90)xxO$(5;fIU!_GloAzGS`L|(J;nfg;(9rKM>VEeBVL59Tx?r*)qsZh>0@VOQfpawg diff --git a/data/catalogs/STRchive-disease-loci.hg19.general.bed b/data/catalogs/STRchive-disease-loci.hg19.general.bed index 6e8ec293..db0aac51 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.general.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.general.bed @@ -1,39 +1,39 @@ #chrom start stop id gene reference_motif_reference_orientation pathogenic_motif_reference_orientation pathogenic_min inheritance disease -chr1 1371178 1371198 HMNR7_VWA1 VWA1 GGCGCGGAGC GGCGCGGAGC 1 AR Neuronopathy, distal hereditary motor, autosomal recessive 7 -chr1 57832715 57832793 SCA37_DAB1 DAB1 AAAAT AAATG 31 AD Spinocerebellar ataxia type 37 +chr1 1371178 1371198 HMNR7_VWA1 VWA1 GGCGCGGAGC AGCGGCGCGG 1 AR Neuronopathy, distal hereditary motor, autosomal recessive 7 +chr1 57832715 57832793 SCA37_DAB1 DAB1 AAAAT GAAAT 31 AD Spinocerebellar ataxia type 37 chr1 94883977 94884000 OPDM5_ABCD3 ABCD3 CCG CCG 118 AD Oculopharyngodistal myopathy type 5 chr1 145209323 145209354 NIID_NOTCH2NLC NOTCH2NLC CGG CGG 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 -chr1 155160981 155162030 ADTKD_MUC1 MUC1 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA None AD Autosomal dominant tubulointerstitial kidney disease -chr1 156561557 156561575 NME_NAXE NAXE GGGCC GGGCC 200 AR NAXE-related mitochondrial encephalopathy -chr2 96862804 96862862 FAME2_STARD7 STARD7 AAAAT AAATG 274 AD Familial adult myoclonic epilepsy 2 +chr1 155160981 155162030 ADTKD_MUC1 MUC1 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG None AD Autosomal dominant tubulointerstitial kidney disease +chr1 156561557 156561575 NME_NAXE NAXE GGGCC CCGGG 200 AR NAXE-related mitochondrial encephalopathy +chr2 96862804 96862862 FAME2_STARD7 STARD7 AAAAT GAAAT 274 AD Familial adult myoclonic epilepsy 2 chr2 100721260 100721286 FRA2A_AFF3 AFF3 CCG CCG 300 AD Intellectual disability associated with fragile site FRA2A chr2 176957786 176957831 SD5_HOXD13 HOXD13 GCN GCN 22 AD Syndactyly chr2 191745598 191745646 GDPAG_GLS GLS CAG CAG 680 AR Glutaminase deficiency chr3 63898360 63898391 SCA7_ATXN7 ATXN7 CAG CAG 37 AD Spinocerebellar ataxia type 7 chr3 128891419 128891499 DM2_CNBP CNBP CAGG CAGG 75 AD Myotonic dystrophy type 2 -chr3 138664861 138664904 BPES_FOXL2 FOXL2 GCN GCN 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis -chr3 183429975 183430014 FAME4_YEATS2 YEATS2 TTTTA TTTCA 1000 AD Familial adult myoclonic epilepsy 4 +chr3 138664861 138664904 BPES_FOXL2 FOXL2 GCN NGC 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis +chr3 183429975 183430014 FAME4_YEATS2 YEATS2 TTTTA ATTTC 1000 AD Familial adult myoclonic epilepsy 4 chr4 3076603 3076660 HD_HTT HTT CAG CAG 36 AD Huntington disease -chr4 39350044 39350103 CANVAS_RFC1 RFC1 AAAAG AAGGG,ACAGG,AAAGG,AGGGC 400 AR Cerebellar ataxia, neuropathy, and vestibular areflexia syndrome -chr4 41747989 41748049 CCHS_PHOX2B PHOX2B GCN GCN 26 AD Congenital central hypoventilation syndrome -chr4 160263678 160263770 FAME7_RAPGEF2 RAPGEF2 TTTTA TTTCA 60 AD Familial adult myoclonic epilepsy type 7 -chr5 10356455 10356523 FAME3_MARCHF6 MARCHF6 TTTTA TTTCA 650 AD Familial adult myoclonic epilepsy type 3 +chr4 39350044 39350103 CANVAS_RFC1 RFC1 AAAAG AAGGG,ACAGG,AAAGG,CAGGG 400 AR Cerebellar ataxia, neuropathy, and vestibular areflexia syndrome +chr4 41747989 41748049 CCHS_PHOX2B PHOX2B GCN NGC 26 AD Congenital central hypoventilation syndrome +chr4 160263678 160263770 FAME7_RAPGEF2 RAPGEF2 TTTTA ATTTC 60 AD Familial adult myoclonic epilepsy type 7 +chr5 10356455 10356523 FAME3_MARCHF6 MARCHF6 TTTTA ATTTC 650 AD Familial adult myoclonic epilepsy type 3 chr5 146258290 146258322 SCA12_PPP2R2B PPP2R2B CTG CTG 51 AD Spinocerebellar ataxia type 12 chr5 176981490 176981532 OPDM_FAM193B FAM193B CCG CCG 194 AD Oculopharyngodistal myopathy chr6 13328708 13328835 OPDM_TBC1D7 TBC1D7 CCG CCG 83 AD Oculopharyngodistal myopathy chr6 16327864 16327955 SCA1_ATXN1 ATXN1 CTG CTG 39 AD Spinocerebellar ataxia type 1 chr6 45390487 45390538 CCD_RUNX2 RUNX2 GCN GCN 20 AD Cleidocranial dysplasia chr6 170870994 170871105 SCA17_TBP TBP CAG CAG 49 AD Spinocerebellar ataxia type 17 -chr7 27239297 27239351 HFG_HOXA13-III HOXA13 GCN GCN 22 AD Hand-foot-genital syndrome 3 -chr7 27239444 27239480 HFG_HOXA13-II HOXA13 GCN GCN 18 AD Hand-foot-genital syndrome 2 -chr7 27239543 27239585 HFG_HOXA13-I HOXA13 GCN GCN 22 AD Hand-foot-genital syndrome 1 +chr7 27239297 27239351 HFG_HOXA13-III HOXA13 GCN NGC 22 AD Hand-foot-genital syndrome 3 +chr7 27239444 27239480 HFG_HOXA13-II HOXA13 GCN NGC 18 AD Hand-foot-genital syndrome 2 +chr7 27239543 27239585 HFG_HOXA13-I HOXA13 GCN NGC 22 AD Hand-foot-genital syndrome 1 chr7 55955293 55955332 FRA7A_ZNF713 ZNF713 CGG CGG 450 AD Autism spectrum disorder associated with fragile site FRA7A chr8 105601198 105601227 OPDM1_LRP12 LRP12 CCG CCG 85 AD Oculopharyngodistal myopathy type 1 -chr8 119379051 119379157 FAME1_SAMD12 SAMD12 TAAAA AAATG 105 AD Familial adult myoclonic epilepsy type 1 -chr9 27573482 27573544 FTDALS1_C9orf72 C9orf72 GGCCCC GGCCCC 31 AD Frontotemporal dementia (FTD) and/or amyotrophic lateral sclerosis (ALS) -chr9 71652202 71652220 FRDA_FXN FXN GAA GAA 56 AR Friedreich ataxia +chr8 119379051 119379157 FAME1_SAMD12 SAMD12 TAAAA GAAAT 105 AD Familial adult myoclonic epilepsy type 1 +chr9 27573482 27573544 FTDALS1_C9orf72 C9orf72 GGCCCC CCCCGG 31 AD Frontotemporal dementia (FTD) and/or amyotrophic lateral sclerosis (ALS) +chr9 71652202 71652220 FRDA_FXN FXN GAA AAG 56 AR Friedreich ataxia chr9 133556992 133557028 HSAN-VIII_PRDM12 PRDM12 CCG CCG 18 AR Hereditary sensory and autonomic neuropathy type VIII -chr9 135946564 135947124 MODY8_CEL CEL GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG None AD Maturity-Onset Diabetes of the Young Type 8 +chr9 135946564 135947124 MODY8_CEL CEL GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC None AD Maturity-Onset Diabetes of the Young Type 8 chr10 81586139 81586160 OPML1_NUTM2B-AS1 NUTM2B-AS1 CGG CGG 161 AD Oculopharyngeal myopathy with leukoencephalopathy 1 chr11 119076999 119077033 JBS_CBL CBL CGG CGG 101 AD Jacobsen syndrome (FRAX11B fragile site) chr12 7045879 7045938 DRPLA_ATN1 ATN1 CAG CAG 48 AD Dentatorubral-Pallidoluysian Atrophy @@ -42,40 +42,40 @@ chr12 112036753 112036823 SCA2_ATXN2 ATXN2 CTG CTG 35 AD,AR Spinocerebellar atax chr12 124018267 124018297 OPDM4_RILPL1 RILPL1 CGG CGG 120 AD Oculopharyngodistal myopathy type 4 chr13 70713515 70713561 SCA8_ATXN8OS ATXN8OS CTG CTG 71 AD Spinocerebellar ataxia type 8 chr13 100637702 100637748 HPE5_ZIC2 ZIC2 GCN GCN 25 AD Holoprosencephaly-5 -chr13 102813924 102814076 SCA27B_FGF14 FGF14 GAA GAA 320 AD Spinocerebellar ataxia 27B +chr13 102813924 102814076 SCA27B_FGF14 FGF14 GAA AAG 320 AD Spinocerebellar ataxia 27B chr14 23790681 23790712 OPMD_PABPN1 PABPN1 GCN GCN 12 AD,AR Oculopharyngeal muscular dystrophy chr14 92537354 92537396 SCA3_ATXN3 ATXN3 CTG CTG 60 AD Spinocerebellar ataxia type 3/Machado-Joseph disease chr15 23086363 23086389 ALS1_NIPA1 NIPA1 CGG CGG 11 AD Amyotrophic lateral sclerosis chr15 34711626 34711652 aFTLD-U_GOLGA8A GOLGA8A TTTC CT 190 Atypical frontotemporal lobar degeneration with ubiquitinated inclusions (aFTLD-U) -chr15 89112664 89112683 CHNG3_MIR7-2 MIR7-2 TTTG TTTG 3 AD Nongoitrous congenital hypothyroidism-3 +chr15 89112664 89112683 CHNG3_MIR7-2 MIR7-2 TTTG GTTT 3 AD Nongoitrous congenital hypothyroidism-3 chr15 89876819 89876860 CPEO_POLG POLG CTG CTG None Progressive external ophthalmoplegia, Parkinson's disease chr16 17564764 17564779 DBQD2_XYLT1 XYLT1 CCG CCG 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 -chr16 24624759 24624853 FAME6_TNRC6A TNRC6A TTTTA TTTCA 1100 AD Familial adult myoclonic epilepsy type 6 -chr16 66524299 66524369 SCA31_BEAN1 BEAN1 AATAA TGGAA,TAGAA 110 AD Spinocerebellar ataxia type 31 +chr16 24624759 24624853 FAME6_TNRC6A TNRC6A TTTTA ATTTC 1100 AD Familial adult myoclonic epilepsy type 6 +chr16 66524299 66524369 SCA31_BEAN1 BEAN1 AATAA AATGG,AATAG 110 AD Spinocerebellar ataxia type 31 chr16 67876765 67876853 SCA_THAP11 THAP11 CAG CAG 45 AD Spinocerebellar ataxia 51 chr16 72821593 72821657 SCA4_ZFHX3 ZFHX3 CCG CCG 46 AD Spinocerebellar ataxia 4 chr16 87637888 87637935 HDL2_JPH3 JPH3 CTG CTG 40 AD Huntington disease-like 2 -chr17 17711672 17711774 FAME8_RAI1 RAI1 TTTTA TTTCA 9 AD Familial adult myoclonic epilepsy type 8 -chr17 78120808 78120938 RCPS_EIF4A3 EIF4A3 CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA 14 AR Richieri-Costa-Pereira syndrome +chr17 17711672 17711774 FAME8_RAI1 RAI1 TTTTA ATTTC 9 AD Familial adult myoclonic epilepsy type 8 +chr17 78120808 78120938 RCPS_EIF4A3 EIF4A3 CCTCGCTGTGCCGCTGCCGA GCCGCTGCCGACCTCGCTGT 14 AR Richieri-Costa-Pereira syndrome chr18 666891 667632 CPUM_TYMS TYMS GATGGT GATGGT 210 AR Congenital Progressive Universal Melanosis chr18 53253384 53253460 FECD3_TCF4 TCF4 CAG CAG 51 AD Fuchs endothelial corneal dystrophy 3 -chr19 4510739 4513671 MRUPAV_PLIN4 PLIN4 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC 37 AD Myopathy with Rimmed Ubiquitin-Positive Autophagic Vacuolation, PLIN4-Related Myopathy +chr19 4510739 4513671 MRUPAV_PLIN4 PLIN4 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT 37 AD Myopathy with Rimmed Ubiquitin-Positive Autophagic Vacuolation, PLIN4-Related Myopathy chr19 13318672 13318712 SCA6_CACNA1A CACNA1A CTG CTG 21 AD Spinocerebellar ataxia type 6 chr19 14606853 14606887 OPDM2_GIPC1 GIPC1 CCG CCG 73 AD Oculopharyngodistal myopathy type 2 -chr19 18896844 18896860 EDM1-PSACH_COMP COMP GTC GTC 6 AD Multiple epiphyseal dysplasia, Pseudoachondroplasia +chr19 18896844 18896860 EDM1-PSACH_COMP COMP GTC CGT 6 AD Multiple epiphyseal dysplasia, Pseudoachondroplasia chr19 46273462 46273524 DM1_DMPK DMPK CAG CAG 50 AD Myotonic dystrophy type 1 -chr20 2633378 2633403 SCA36_NOP56 NOP56 GGCCTG GGCCTG 650 AD Spinocerebellar ataxia type 36 -chr20 4680043 4680139 CJD_PRNP PRNP GGTGGTGGCTGGGGGCAGCCTCAT CCTCATGGTGGTGGCTGGGGGCAG 5 AD Creutzfeldt-Jakob disease and Gerstmann-Straussler-Schneiker syndrome +chr20 2633378 2633403 SCA36_NOP56 NOP56 GGCCTG CCTGGG 650 AD Spinocerebellar ataxia type 36 +chr20 4680043 4680139 CJD_PRNP PRNP GGTGGTGGCTGGGGGCAGCCTCAT AGCCTCATGGTGGTGGCTGGGGGC 5 AD Creutzfeldt-Jakob disease and Gerstmann-Straussler-Schneiker syndrome chr21 45196323 45196360 EPM1_CSTB CSTB CGCGGGGCGGGG CGCGGGGCGGGG 30 AR Progressive Myoclonic Epilepsy Type 1 (EPM1), a.k.a Unverricht-Lundborg Disease (ULD) chr22 19754285 19754330 TOF_TBX1 TBX1 GCN GCN 25 AD Tetralogy of Fallot chr22 38713287 38713380 EPM_CSNK1E CSNK1E CCG CCG 745 AR Progressive Myoclonic Epilepsy and Developmental and Epileptic Encephalopathy chr22 46191234 46191304 SCA10_ATXN10 ATXN10 ATTCT ATTCT 800 AD Spinocerebellar ataxia type 10 -chrX 25031646 25031682 PRTS_ARX ARX GCN GCN 20 XR Partington syndrome -chrX 25031766 25031814 EIEE1_ARX ARX GCN GCN 17 XR Early-infantile epileptic encephalopathy -chrX 31302674 31302722 DMD_DMD DMD TTC TTC 59 XR Duchenne muscular dystrophy +chrX 25031646 25031682 PRTS_ARX ARX GCN NGC 20 XR Partington syndrome +chrX 25031766 25031814 EIEE1_ARX ARX GCN NGC 17 XR Early-infantile epileptic encephalopathy +chrX 31302674 31302722 DMD_DMD DMD TTC CTT 59 XR Duchenne muscular dystrophy chrX 66765158 66765261 SBMA_AR AR CAG CAG 38 XR Spinal and bulbar muscular atrophy, Kennedy Disease chrX 70672904 70672981 XDP_TAF1 TAF1 AGAGGG AGAGGG 35 XR X-linked dystonia-parkinsonism (XDP) a.k.a. Dystonia 3, torsion, X-linked (DYT3) chrX 136648985 136649015 VACTERLX_ZIC3 ZIC3 GCN GCN 12 XR X-linked VACTERL syndrome -chrX 139586481 139586526 XLID_SOX3 SOX3 GCN GCN 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) +chrX 139586481 139586526 XLID_SOX3 SOX3 GCN NGC 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) chrX 146993567 146993629 FXS_FMR1 FMR1 CGG CGG 201 XD Fragile X syndrome (FXS), fragile X-associated tremor/ataxia syndrome (FXTAS), and fragile X-associated primary ovarian insufficiency FXPOI/POF1 chrX 147582124 147582273 FRAXE_AFF2 AFF2 CCG CCG 201 XR Intellectual developmental disorder, Fragile X intellectual disability diff --git a/data/catalogs/STRchive-disease-loci.hg19.longTR.bed b/data/catalogs/STRchive-disease-loci.hg19.longTR.bed index 511f42f2..82005ced 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.longTR.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.longTR.bed @@ -1,38 +1,38 @@ -chr1 1371179 1371198 GGCGCGGAGC HMNR7_VWA1 -chr1 57832716 57832793 AAATG,AAAAT SCA37_DAB1 +chr1 1371179 1371198 AGCGGCGCGG,GGCGCGGAGC HMNR7_VWA1 +chr1 57832716 57832793 GAAAT,AAAAT SCA37_DAB1 chr1 94883978 94884000 CCG OPDM5_ABCD3 chr1 145209324 145209354 CGG NIID_NOTCH2NLC -chr1 155160982 155162030 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ADTKD_MUC1 -chr1 156561558 156561575 GGGCC NME_NAXE -chr2 96862805 96862862 AAATG,AAAAT FAME2_STARD7 +chr1 155160982 155162030 ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ADTKD_MUC1 +chr1 156561558 156561575 CCGGG,GGGCC NME_NAXE +chr2 96862805 96862862 GAAAT,AAAAT FAME2_STARD7 chr2 100721261 100721286 CCG FRA2A_AFF3 chr2 176957787 176957831 GCN SD5_HOXD13 chr2 191745599 191745646 CAG GDPAG_GLS chr3 63898361 63898391 CAG SCA7_ATXN7 chr3 128891420 128891499 CAGG DM2_CNBP -chr3 138664862 138664904 GCN BPES_FOXL2 -chr3 183429976 183430014 TTTCA,TTTTA FAME4_YEATS2 +chr3 138664862 138664904 NGC,GCN BPES_FOXL2 +chr3 183429976 183430014 ATTTC,TTTTA FAME4_YEATS2 chr4 3076604 3076660 CAG HD_HTT -chr4 39350045 39350103 AAGGG,ACAGG,AAAGG,AGGGC,AAAAG,AAAGGG CANVAS_RFC1 -chr4 41747990 41748049 GCN CCHS_PHOX2B -chr4 160263679 160263770 TTTCA,TTTTA FAME7_RAPGEF2 -chr5 10356456 10356523 TTTCA,TTTTA FAME3_MARCHF6 +chr4 39350045 39350103 AAGGG,ACAGG,AAAGG,CAGGG,AAAAG,AAAGGG CANVAS_RFC1 +chr4 41747990 41748049 NGC,GCN CCHS_PHOX2B +chr4 160263679 160263770 ATTTC,TTTTA FAME7_RAPGEF2 +chr5 10356456 10356523 ATTTC,TTTTA FAME3_MARCHF6 chr5 146258291 146258322 CTG SCA12_PPP2R2B chr5 176981491 176981532 CCG OPDM_FAM193B chr6 13328709 13328835 CCG OPDM_TBC1D7 chr6 16327865 16327955 CTG SCA1_ATXN1 chr6 45390488 45390538 GCN CCD_RUNX2 chr6 170870995 170871105 CAG SCA17_TBP -chr7 27239298 27239351 GCN HFG_HOXA13-III -chr7 27239445 27239480 GCN HFG_HOXA13-II -chr7 27239544 27239585 GCN HFG_HOXA13-I +chr7 27239298 27239351 NGC,GCN HFG_HOXA13-III +chr7 27239445 27239480 NGC,GCN HFG_HOXA13-II +chr7 27239544 27239585 NGC,GCN HFG_HOXA13-I chr7 55955294 55955332 CGG FRA7A_ZNF713 chr8 105601199 105601227 CCG OPDM1_LRP12 -chr8 119379052 119379157 AAATG,TAAAA FAME1_SAMD12 -chr9 27573483 27573544 GGCCCC FTDALS1_C9orf72 -chr9 71652203 71652220 GAA FRDA_FXN +chr8 119379052 119379157 GAAAT,TAAAA FAME1_SAMD12 +chr9 27573483 27573544 CCCCGG,GGCCCC FTDALS1_C9orf72 +chr9 71652203 71652220 AAG,GAA FRDA_FXN chr9 133556993 133557028 CCG HSAN-VIII_PRDM12 -chr9 135946565 135947124 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG MODY8_CEL +chr9 135946565 135947124 ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG MODY8_CEL chr10 81586140 81586160 CGG OPML1_NUTM2B-AS1 chr11 119077000 119077033 CGG JBS_CBL chr12 7045880 7045938 CAG DRPLA_ATN1 @@ -41,40 +41,40 @@ chr12 112036754 112036823 CTG SCA2_ATXN2 chr12 124018268 124018297 CGG OPDM4_RILPL1 chr13 70713516 70713561 CTG SCA8_ATXN8OS chr13 100637703 100637748 GCN HPE5_ZIC2 -chr13 102813925 102814076 GAA,GGA,CAG SCA27B_FGF14 +chr13 102813925 102814076 AAG,AGG,CAG,GAA SCA27B_FGF14 chr14 23790682 23790712 GCN OPMD_PABPN1 chr14 92537355 92537396 CTG SCA3_ATXN3 chr15 23086364 23086389 CGG ALS1_NIPA1 chr15 34711627 34711652 CT,CCTT,CCCTCT,TTTC aFTLD-U_GOLGA8A -chr15 89112665 89112683 TTTG CHNG3_MIR7-2 +chr15 89112665 89112683 GTTT,TTTG CHNG3_MIR7-2 chr15 89876820 89876860 CTG CPEO_POLG chr16 17564765 17564779 CCG DBQD2_XYLT1 -chr16 24624760 24624853 TTTCA,TTTTA FAME6_TNRC6A -chr16 66524300 66524369 TGGAA,TAGAA,AATAA SCA31_BEAN1 +chr16 24624760 24624853 ATTTC,TTTTA FAME6_TNRC6A +chr16 66524300 66524369 AATGG,AATAG,AATAA SCA31_BEAN1 chr16 67876766 67876853 CAG SCA_THAP11 chr16 72821594 72821657 CCG SCA4_ZFHX3 chr16 87637889 87637935 CTG HDL2_JPH3 -chr17 17711673 17711774 TTTCA,TTTTA FAME8_RAI1 -chr17 78120809 78120938 CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 +chr17 17711673 17711774 ATTTC,ATTTT,TTTTA FAME8_RAI1 +chr17 78120809 78120938 GCCGCTGCCGACCTCGCTGT,CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 chr18 666892 667632 GATGGT CPUM_TYMS chr18 53253385 53253460 CAG FECD3_TCF4 -chr19 4510740 4513671 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC MRUPAV_PLIN4 +chr19 4510740 4513671 GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT,TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC MRUPAV_PLIN4 chr19 13318673 13318712 CTG SCA6_CACNA1A chr19 14606854 14606887 CCG OPDM2_GIPC1 -chr19 18896845 18896860 GTC EDM1-PSACH_COMP +chr19 18896845 18896860 CGT,GTC EDM1-PSACH_COMP chr19 46273463 46273524 CAG DM1_DMPK -chr20 2633379 2633403 GGCCTG SCA36_NOP56 -chr20 4680044 4680139 CCTCATGGTGGTGGCTGGGGGCAG,GGTGGTGGCTGGGGGCAGCCTCAT CJD_PRNP +chr20 2633379 2633403 CCTGGG,GGCCTG SCA36_NOP56 +chr20 4680044 4680139 AGCCTCATGGTGGTGGCTGGGGGC,GGTGGTGGCTGGGGGCAGCCTCAT CJD_PRNP chr21 45196324 45196360 CGCGGGGCGGGG EPM1_CSTB chr22 19754286 19754330 GCN TOF_TBX1 chr22 38713288 38713380 CCG EPM_CSNK1E chr22 46191235 46191304 ATTCT SCA10_ATXN10 -chrX 25031647 25031682 GCN PRTS_ARX -chrX 25031767 25031814 GCN EIEE1_ARX -chrX 31302675 31302722 TTC DMD_DMD +chrX 25031647 25031682 NGC,GCN PRTS_ARX +chrX 25031767 25031814 NGC,GCN EIEE1_ARX +chrX 31302675 31302722 CTT,TTC DMD_DMD chrX 66765159 66765261 CAG SBMA_AR chrX 70672905 70672981 AGAGGG XDP_TAF1 chrX 136648986 136649015 GCN VACTERLX_ZIC3 -chrX 139586482 139586526 GCN XLID_SOX3 +chrX 139586482 139586526 NGC,GCN XLID_SOX3 chrX 146993568 146993629 CGG FXS_FMR1 chrX 147582125 147582273 CCG FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.hg19.straglr.bed b/data/catalogs/STRchive-disease-loci.hg19.straglr.bed index 582f4802..fbb1db85 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.straglr.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.straglr.bed @@ -1,8 +1,8 @@ -chr1 1371178 1371198 GGCGCGGAGC HMNR7_VWA1 HMNR7_VWA1 +chr1 1371178 1371198 AGCGGCGCGG HMNR7_VWA1 HMNR7_VWA1 chr1 57832750 57832793 GAAAT SCA37_DAB1 SCA37_DAB1 chr1 94883977 94884000 CCG OPDM5_ABCD3 OPDM5_ABCD3 chr1 145209323 145209354 CGG NIID_NOTCH2NLC NIID_NOTCH2NLC -chr1 156561557 156561575 GGGCC NME_NAXE NME_NAXE +chr1 156561557 156561575 CCGGG NME_NAXE NME_NAXE chr2 96862804 96862807 AAATG FAME2_STARD7 FAME2_STARD7 chr2 100721260 100721286 CCG FRA2A_AFF3 FRA2A_AFF3 chr2 176957786 176957831 GCN SD5_HOXD13 SD5_HOXD13 @@ -12,12 +12,12 @@ chr3 63898391 63898403 CCG SCA7_ATXN7 SCA7_ATXN7_CCG chr3 128891419 128891499 CAGG DM2_CNBP DM2_CNBP chr3 128891499 128891539 CAGA DM2_CNBP DM2_CNBP_CAGA chr3 128891539 128891577 CA DM2_CNBP DM2_CNBP_CA -chr3 138664861 138664904 GCN BPES_FOXL2 BPES_FOXL2 +chr3 138664861 138664904 NGC BPES_FOXL2 BPES_FOXL2 chr3 183430010 183430014 TTTCA FAME4_YEATS2 FAME4_YEATS2 chr4 3076603 3076654 CAG HD_HTT HD_HTT chr4 3076660 3076696 CCG HD_HTT HD_HTT_CCG chr4 39350099 39350103 AAGGG CANVAS_RFC1 CANVAS_RFC1 -chr4 41747989 41748049 GCN CCHS_PHOX2B CCHS_PHOX2B +chr4 41747989 41748049 NGC CCHS_PHOX2B CCHS_PHOX2B chr4 160263763 160263770 TTTCA FAME7_RAPGEF2 FAME7_RAPGEF2 chr5 10356515 10356523 TTTCA FAME3_MARCHF6 FAME3_MARCHF6 chr5 146258290 146258322 CTG SCA12_PPP2R2B SCA12_PPP2R2B @@ -26,13 +26,13 @@ chr6 13328708 13328835 CCG OPDM_TBC1D7 OPDM_TBC1D7 chr6 16327864 16327955 CTG SCA1_ATXN1 SCA1_ATXN1 chr6 45390487 45390538 GCN CCD_RUNX2 CCD_RUNX2 chr6 170870994 170871105 CAG SCA17_TBP SCA17_TBP -chr7 27239297 27239351 GCN HFG_HOXA13-III HFG_HOXA13-III -chr7 27239444 27239480 GCN HFG_HOXA13-II HFG_HOXA13-II -chr7 27239543 27239585 GCN HFG_HOXA13-I HFG_HOXA13-I +chr7 27239297 27239351 NGC HFG_HOXA13-III HFG_HOXA13-III +chr7 27239444 27239480 NGC HFG_HOXA13-II HFG_HOXA13-II +chr7 27239543 27239585 NGC HFG_HOXA13-I HFG_HOXA13-I chr7 55955293 55955332 CGG FRA7A_ZNF713 FRA7A_ZNF713 chr8 105601198 105601227 CCG OPDM1_LRP12 OPDM1_LRP12 chr8 119379151 119379157 TGAAA FAME1_SAMD12 FAME1_SAMD12 -chr9 27573482 27573544 GGCCCC FTDALS1_C9orf72 FTDALS1_C9orf72 +chr9 27573482 27573544 CCCCGG FTDALS1_C9orf72 FTDALS1_C9orf72 chr9 71652186 71652202 A FRDA_FXN FRDA_FXN_A chr9 71652202 71652220 GAA FRDA_FXN FRDA_FXN chr9 133556992 133557028 CCG HSAN-VIII_PRDM12 HSAN-VIII_PRDM12 @@ -45,29 +45,29 @@ chr12 124018267 124018297 CGG OPDM4_RILPL1 OPDM4_RILPL1 chr13 70713485 70713515 CTA SCA8_ATXN8OS SCA8_ATXN8OS_CTA chr13 70713515 70713561 CTG SCA8_ATXN8OS SCA8_ATXN8OS chr13 100637702 100637748 GCN HPE5_ZIC2 HPE5_ZIC2 -chr13 102813924 102814076 GAA SCA27B_FGF14 SCA27B_FGF14 +chr13 102813924 102814076 AAG SCA27B_FGF14 SCA27B_FGF14 chr14 23790681 23790712 GCN OPMD_PABPN1 OPMD_PABPN1 chr14 92537354 92537396 CTG SCA3_ATXN3 SCA3_ATXN3 chr15 23086363 23086389 CGG ALS1_NIPA1 ALS1_NIPA1 chr15 34711626 34711652 CT aFTLD-U_GOLGA8A aFTLD-U_GOLGA8A -chr15 89112664 89112683 TTTG CHNG3_MIR7-2 CHNG3_MIR7-2 +chr15 89112664 89112683 GTTT CHNG3_MIR7-2 CHNG3_MIR7-2 chr15 89876810 89876816 GCT CPEO_POLG CPEO_POLG_GCT chr15 89876816 89876819 GTT CPEO_POLG CPEO_POLG_GTT chr15 89876819 89876860 GCT CPEO_POLG CPEO_POLG chr16 17564764 17564779 GCC DBQD2_XYLT1 DBQD2_XYLT1 chr16 24624809 24624853 TTTCA FAME6_TNRC6A FAME6_TNRC6A -chr16 66524299 66524369 TGGAA SCA31_BEAN1 SCA31_BEAN1 +chr16 66524299 66524369 AATGG SCA31_BEAN1 SCA31_BEAN1 chr16 67876765 67876853 CAG SCA_THAP11 SCA_THAP11 chr16 72821593 72821657 CCG SCA4_ZFHX3 SCA4_ZFHX3 chr16 87637888 87637935 CTG HDL2_JPH3 HDL2_JPH3 chr17 17711762 17711774 TTTCA FAME8_RAI1 FAME8_RAI1 -chr17 78120808 78120938 CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 RCPS_EIF4A3 +chr17 78120808 78120938 GCCGCTGCCGACCTCGCTGT RCPS_EIF4A3 RCPS_EIF4A3 chr18 666891 667632 GATGGT CPUM_TYMS CPUM_TYMS chr18 53253384 53253460 CAG FECD3_TCF4 FECD3_TCF4 -chr19 4510739 4513671 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC MRUPAV_PLIN4 MRUPAV_PLIN4 +chr19 4510739 4513671 GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT MRUPAV_PLIN4 MRUPAV_PLIN4 chr19 13318672 13318712 CTG SCA6_CACNA1A SCA6_CACNA1A chr19 14606853 14606887 CCG OPDM2_GIPC1 OPDM2_GIPC1 -chr19 18896844 18896860 GTC EDM1-PSACH_COMP EDM1-PSACH_COMP +chr19 18896844 18896860 CGT EDM1-PSACH_COMP EDM1-PSACH_COMP chr19 46273462 46273524 CAG DM1_DMPK DM1_DMPK chr20 2633378 2633403 GGCCTG SCA36_NOP56 SCA36_NOP56 chr20 2633403 2633421 CGCCTG SCA36_NOP56 SCA36_NOP56_CGCCTG @@ -77,13 +77,13 @@ chr21 45196323 45196360 CGCGGGGCGGGG EPM1_CSTB EPM1_CSTB chr22 19754285 19754330 GCN TOF_TBX1 TOF_TBX1 chr22 38713287 38713380 CCG EPM_CSNK1E EPM_CSNK1E chr22 46191234 46191304 ATTCT SCA10_ATXN10 SCA10_ATXN10 -chrX 25031646 25031682 GCN PRTS_ARX PRTS_ARX -chrX 25031766 25031814 GCN EIEE1_ARX EIEE1_ARX +chrX 25031646 25031682 NGC PRTS_ARX PRTS_ARX +chrX 25031766 25031814 NGC EIEE1_ARX EIEE1_ARX chrX 31302674 31302722 TTC DMD_DMD DMD_DMD chrX 31302722 31302730 T DMD_DMD DMD_DMD_T chrX 66765158 66765261 CAG SBMA_AR SBMA_AR chrX 70672904 70672981 AGAGGG XDP_TAF1 XDP_TAF1 chrX 136648985 136649015 GCN VACTERLX_ZIC3 VACTERLX_ZIC3 -chrX 139586481 139586526 GCN XLID_SOX3 XLID_SOX3 +chrX 139586481 139586526 NGC XLID_SOX3 XLID_SOX3 chrX 146993567 146993629 CGG FXS_FMR1 FXS_FMR1 chrX 147582124 147582273 CCG FRAXE_AFF2 FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.hg19.stranger.json b/data/catalogs/STRchive-disease-loci.hg19.stranger.json index f455dd18..0bdbb4e5 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.stranger.json +++ b/data/catalogs/STRchive-disease-loci.hg19.stranger.json @@ -2,11 +2,11 @@ { "LocusId": "HMNR7_VWA1", "ReferenceRegion": "chr1:1371178-1371198", - "LocusStructure": "(GGCGCGGAGC)*", + "LocusStructure": "(AGCGGCGCGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GGCGCGGAGC", + "DisplayRU": "AGCGGCGCGG", "Disease": "HMNR7", "NormalMax": 2, "PathologicMin": 3, @@ -21,7 +21,7 @@ "PathologicRegion": "chr1:57832750-57832793", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "AAATG", + "DisplayRU": "GAAAT", "Disease": "SCA37", "NormalMax": 30, "PathologicMin": 31, @@ -56,11 +56,11 @@ { "LocusId": "NME_NAXE", "ReferenceRegion": "chr1:156561557-156561575", - "LocusStructure": "(GGGCC)*", + "LocusStructure": "(CCGGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GGGCC", + "DisplayRU": "CCGGG", "Disease": "NME", "NormalMax": 7, "PathologicMin": 200, @@ -75,7 +75,7 @@ "PathologicRegion": "chr2:96862804-96862807", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "AAATG", + "DisplayRU": "GAAAT", "Disease": "FAME2", "NormalMax": 273, "PathologicMin": 274, @@ -153,11 +153,11 @@ { "LocusId": "BPES_FOXL2", "ReferenceRegion": "chr3:138664861-138664904", - "LocusStructure": "(GCN)*", + "LocusStructure": "(NGC)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD", "AR"], - "DisplayRU": "GCN", + "DisplayRU": "NGC", "Disease": "BPES", "NormalMax": 14, "PathologicMin": 15, @@ -172,7 +172,7 @@ "PathologicRegion": "chr3:183430010-183430014", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TTTCA", + "DisplayRU": "ATTTC", "Disease": "FAME4", "NormalMax": 999, "PathologicMin": 1000, @@ -211,11 +211,11 @@ { "LocusId": "CCHS_PHOX2B", "ReferenceRegion": "chr4:41747989-41748049", - "LocusStructure": "(GCN)*", + "LocusStructure": "(NGC)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCN", + "DisplayRU": "NGC", "Disease": "CCHS", "NormalMax": 20, "PathologicMin": 26, @@ -230,7 +230,7 @@ "PathologicRegion": "chr4:160263763-160263770", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TTTCA", + "DisplayRU": "ATTTC", "Disease": "FAME7", "NormalMax": 59, "PathologicMin": 60, @@ -245,7 +245,7 @@ "PathologicRegion": "chr5:10356515-10356523", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TTTCA", + "DisplayRU": "ATTTC", "Disease": "FAME3", "NormalMax": 649, "PathologicMin": 650, @@ -332,11 +332,11 @@ { "LocusId": "HFG_HOXA13-III", "ReferenceRegion": "chr7:27239297-27239351", - "LocusStructure": "(GCN)*", + "LocusStructure": "(NGC)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCN", + "DisplayRU": "NGC", "Disease": "HFG-III", "NormalMax": 18, "PathologicMin": 22, @@ -345,11 +345,11 @@ { "LocusId": "HFG_HOXA13-II", "ReferenceRegion": "chr7:27239444-27239480", - "LocusStructure": "(GCN)*", + "LocusStructure": "(NGC)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCN", + "DisplayRU": "NGC", "Disease": "HFG-II", "NormalMax": 12, "PathologicMin": 18, @@ -358,11 +358,11 @@ { "LocusId": "HFG_HOXA13-I", "ReferenceRegion": "chr7:27239543-27239585", - "LocusStructure": "(GCN)*", + "LocusStructure": "(NGC)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCN", + "DisplayRU": "NGC", "Disease": "HFG-I", "NormalMax": 14, "PathologicMin": 22, @@ -403,7 +403,7 @@ "PathologicRegion": "chr8:119379151-119379157", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "AAATG", + "DisplayRU": "GAAAT", "Disease": "FAME1", "NormalMax": 104, "PathologicMin": 105, @@ -412,11 +412,11 @@ { "LocusId": "FTDALS1_C9orf72", "ReferenceRegion": "chr9:27573482-27573544", - "LocusStructure": "(GGCCCC)*", + "LocusStructure": "(CCCCGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGCCCC", + "DisplayRU": "CCCCGG", "Disease": "FTDALS1", "NormalMax": 23, "PathologicMin": 31, @@ -431,7 +431,7 @@ "PathologicRegion": "chr9:71652202-71652220", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GAA", + "DisplayRU": "AAG", "Disease": "FRDA", "NormalMax": 33, "PathologicMin": 56, @@ -559,11 +559,11 @@ { "LocusId": "SCA27B_FGF14", "ReferenceRegion": "chr13:102813924-102814076", - "LocusStructure": "(GAA)*", + "LocusStructure": "(AAG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GAA", + "DisplayRU": "AAG", "Disease": "SCA27B", "NormalMax": 179, "PathologicMin": 320, @@ -626,11 +626,11 @@ { "LocusId": "CHNG3_MIR7-2", "ReferenceRegion": "chr15:89112664-89112683", - "LocusStructure": "(TTTG)*", + "LocusStructure": "(GTTT)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TTTG", + "DisplayRU": "GTTT", "Disease": "CHNG3", "NormalMax": 4, "PathologicMin": 5, @@ -675,7 +675,7 @@ "PathologicRegion": "chr16:24624809-24624853", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TTTCA", + "DisplayRU": "ATTTC", "Disease": "FAME6", "NormalMax": 1099, "PathologicMin": 1100, @@ -684,11 +684,11 @@ { "LocusId": "SCA31_BEAN1", "ReferenceRegion": "chr16:66524299-66524369", - "LocusStructure": "(TGGAA)*", + "LocusStructure": "(AATGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TGGAA", + "DisplayRU": "AATGG", "Disease": "SCA31", "NormalMax": 109, "PathologicMin": 110, @@ -742,7 +742,7 @@ "PathologicRegion": "chr17:17711762-17711774", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TTTCA", + "DisplayRU": "ATTTC", "Disease": "FAME8", "NormalMax": 8, "PathologicMin": 9, @@ -751,11 +751,11 @@ { "LocusId": "RCPS_EIF4A3", "ReferenceRegion": "chr17:78120808-78120938", - "LocusStructure": "(CCTCGCTGTGCCGCTGCCGA)*", + "LocusStructure": "(GCCGCTGCCGACCTCGCTGT)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "CCTCGCTGTGCCGCTGCCGA", + "DisplayRU": "GCCGCTGCCGACCTCGCTGT", "Disease": "RCPS", "NormalMax": 12, "PathologicMin": 14, @@ -790,11 +790,11 @@ { "LocusId": "MRUPAV_PLIN4", "ReferenceRegion": "chr19:4510739-4513671", - "LocusStructure": "(TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC)*", + "LocusStructure": "(GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC", + "DisplayRU": "GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT", "Disease": "MRUPAV", "NormalMax": 31, "PathologicMin": 37, @@ -829,11 +829,11 @@ { "LocusId": "EDM1-PSACH_COMP", "ReferenceRegion": "chr19:18896844-18896860", - "LocusStructure": "(GTC)*", + "LocusStructure": "(CGT)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GTC", + "DisplayRU": "CGT", "Disease": "EDM1, PSACH", "NormalMax": 5, "PathologicMin": 6, @@ -861,7 +861,7 @@ "PathologicRegion": "chr20:2633378-2633403", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGCCTG", + "DisplayRU": "CCTGGG", "Disease": "SCA36", "NormalMax": 14, "PathologicMin": 650, @@ -876,7 +876,7 @@ "PathologicRegion": "chr20:4680043-4680139", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "CCTCATGGTGGTGGCTGGGGGCAG", + "DisplayRU": "AGCCTCATGGTGGTGGCTGGGGGC", "Disease": "CJD", "NormalMax": 4, "PathologicMin": 5, @@ -939,11 +939,11 @@ { "LocusId": "PRTS_ARX", "ReferenceRegion": "chrX:25031646-25031682", - "LocusStructure": "(GCN)*", + "LocusStructure": "(NGC)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "GCN", + "DisplayRU": "NGC", "Disease": "PRTS", "NormalMax": 12, "PathologicMin": 20, @@ -952,11 +952,11 @@ { "LocusId": "EIEE1_ARX", "ReferenceRegion": "chrX:25031766-25031814", - "LocusStructure": "(GCN)*", + "LocusStructure": "(NGC)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "GCN", + "DisplayRU": "NGC", "Disease": "EIEE1", "NormalMax": 16, "PathologicMin": 17, @@ -971,7 +971,7 @@ "PathologicRegion": "chrX:31302674-31302722", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "TTC", + "DisplayRU": "CTT", "Disease": "DMD", "NormalMax": 33, "PathologicMin": 59, @@ -1019,11 +1019,11 @@ { "LocusId": "XLID_SOX3", "ReferenceRegion": "chrX:139586481-139586526", - "LocusStructure": "(GCN)*", + "LocusStructure": "(NGC)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "GCN", + "DisplayRU": "NGC", "Disease": "XLID, PHPX", "NormalMax": 15, "PathologicMin": 22, diff --git a/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed b/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed index dc288528..41b859a7 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed +++ b/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed @@ -1,38 +1,38 @@ -chr1 1435798 1435818 ID=HMNR7_VWA1;MOTIFS=GGCGCGGAGC;STRUC= -chr1 57367043 57367121 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT,AAATG;STRUC= +chr1 1435798 1435818 ID=HMNR7_VWA1;MOTIFS=AGCGGCGCGG,GGCGCGGAGC;STRUC= +chr1 57367043 57367121 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT;STRUC= chr1 94418421 94418444 ID=OPDM5_ABCD3;MOTIFS=CCG;STRUC= chr1 149390802 149390842 ID=NIID_NOTCH2NLC;MOTIFS=CGG;STRUC= -chr1 155188505 155192239 ID=ADTKD_MUC1;MOTIFS=GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA;STRUC= -chr1 156591765 156591783 ID=NME_NAXE;MOTIFS=GGGCC;STRUC= -chr2 96197066 96197124 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT;STRUC= +chr1 155188505 155192239 ID=ADTKD_MUC1;MOTIFS=ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG;STRUC= +chr1 156591765 156591783 ID=NME_NAXE;MOTIFS=CCGGG,GGGCC;STRUC= +chr2 96197066 96197124 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT,GAAAT;STRUC= chr2 100104798 100104824 ID=FRA2A_AFF3;MOTIFS=CCG;STRUC= chr2 176093058 176093103 ID=SD5_HOXD13;MOTIFS=GCN;STRUC= chr2 190880872 190880920 ID=GDPAG_GLS;MOTIFS=CAG;STRUC= chr3 63912684 63912727 ID=SCA7_ATXN7;MOTIFS=CAG,CCG;STRUC= chr3 129172576 129172734 ID=DM2_CNBP;MOTIFS=CAGG,CAGA,CA;STRUC= -chr3 138946019 138946062 ID=BPES_FOXL2;MOTIFS=GCN;STRUC= -chr3 183712187 183712226 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA;STRUC= +chr3 138946019 138946062 ID=BPES_FOXL2;MOTIFS=NGC,GCN;STRUC= +chr3 183712187 183712226 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= chr4 3074876 3074969 ID=HD_HTT;MOTIFS=CAG,CCG;STRUC= -chr4 39348424 39348483 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,AGGGC,AAAGGG;STRUC= -chr4 41745972 41746032 ID=CCHS_PHOX2B;MOTIFS=GCN;STRUC= -chr4 159342526 159342618 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA;STRUC= -chr5 10356343 10356411 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA;STRUC= +chr4 39348424 39348483 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,CAGGG,AAAGGG;STRUC= +chr4 41745972 41746032 ID=CCHS_PHOX2B;MOTIFS=NGC,GCN;STRUC= +chr4 159342526 159342618 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= +chr5 10356343 10356411 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= chr5 146878727 146878759 ID=SCA12_PPP2R2B;MOTIFS=CTG;STRUC= chr5 177554489 177554531 ID=OPDM_FAM193B;MOTIFS=CCG;STRUC= chr6 13328476 13328603 ID=OPDM_TBC1D7;MOTIFS=CCG;STRUC= chr6 16327633 16327724 ID=SCA1_ATXN1;MOTIFS=CTG;STRUC= chr6 45422750 45422801 ID=CCD_RUNX2;MOTIFS=GCN;STRUC= chr6 170561906 170562017 ID=SCA17_TBP;MOTIFS=CAG;STRUC= -chr7 27199678 27199732 ID=HFG_HOXA13-III;MOTIFS=GCN;STRUC= -chr7 27199825 27199861 ID=HFG_HOXA13-II;MOTIFS=GCN;STRUC= -chr7 27199924 27199966 ID=HFG_HOXA13-I;MOTIFS=GCN;STRUC= +chr7 27199678 27199732 ID=HFG_HOXA13-III;MOTIFS=NGC,GCN;STRUC= +chr7 27199825 27199861 ID=HFG_HOXA13-II;MOTIFS=NGC,GCN;STRUC= +chr7 27199924 27199966 ID=HFG_HOXA13-I;MOTIFS=NGC,GCN;STRUC= chr7 55887600 55887639 ID=FRA7A_ZNF713;MOTIFS=CGG;STRUC= chr8 104588970 104588999 ID=OPDM1_LRP12;MOTIFS=CCG;STRUC= -chr8 118366812 118366918 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA,AAATG;STRUC= -chr9 27573484 27573546 ID=FTDALS1_C9orf72;MOTIFS=GGCCCC;STRUC= -chr9 69037270 69037304 ID=FRDA_FXN;MOTIFS=A,GAA;STRUC= +chr8 118366812 118366918 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA,GAAAT;STRUC= +chr9 27573484 27573546 ID=FTDALS1_C9orf72;MOTIFS=CCCCGG,GGCCCC;STRUC= +chr9 69037270 69037304 ID=FRDA_FXN;MOTIFS=A,GAA,AAG;STRUC= chr9 130681605 130681641 ID=HSAN-VIII_PRDM12;MOTIFS=CCG;STRUC= -chr9 133071177 133071737 ID=MODY8_CEL;MOTIFS=GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG;STRUC= +chr9 133071177 133071737 ID=MODY8_CEL;MOTIFS=GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG;STRUC= chr10 79826383 79826404 ID=OPML1_NUTM2B-AS1;MOTIFS=CGG;STRUC= chr11 119206289 119206323 ID=JBS_CBL;MOTIFS=CGG;STRUC= chr12 6936716 6936775 ID=DRPLA_ATN1;MOTIFS=CAG;STRUC= @@ -41,40 +41,40 @@ chr12 111598949 111599019 ID=SCA2_ATXN2;MOTIFS=CTG;STRUC= chr12 123533720 123533750 ID=OPDM4_RILPL1;MOTIFS=CGG;STRUC= chr13 70139353 70139429 ID=SCA8_ATXN8OS;MOTIFS=CTA,CTG;STRUC= chr13 99985448 99985494 ID=HPE5_ZIC2;MOTIFS=GCN;STRUC= -chr13 102161574 102161726 ID=SCA27B_FGF14;MOTIFS=GAA,GGA,CAG;STRUC= +chr13 102161574 102161726 ID=SCA27B_FGF14;MOTIFS=AAG,GAA,AGG,CAG;STRUC= chr14 23321472 23321503 ID=OPMD_PABPN1;MOTIFS=GCN;STRUC= chr14 92071010 92071052 ID=SCA3_ATXN3;MOTIFS=CTG;STRUC= chr15 22786677 22786703 ID=ALS1_NIPA1;MOTIFS=CGG;STRUC= chr15 34419425 34419451 ID=aFTLD-U_GOLGA8A;MOTIFS=CT,TTTC,CCTT,CCCTCT;STRUC= -chr15 88569433 88569452 ID=CHNG3_MIR7-2;MOTIFS=TTTG;STRUC= +chr15 88569433 88569452 ID=CHNG3_MIR7-2;MOTIFS=GTTT,TTTG;STRUC= chr15 89333579 89333629 ID=CPEO_POLG;MOTIFS=GCT,GTT,CTG;STRUC= chr16 17470907 17470922 ID=DBQD2_XYLT1;MOTIFS=GCC,CCG;STRUC= -chr16 24613438 24613532 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA;STRUC= -chr16 66490396 66490466 ID=SCA31_BEAN1;MOTIFS=TGGAA,TAGAA,AATAA;STRUC= +chr16 24613438 24613532 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= +chr16 66490396 66490466 ID=SCA31_BEAN1;MOTIFS=AATGG,AATAG,AATAA;STRUC= chr16 67842862 67842950 ID=SCA_THAP11;MOTIFS=CAG;STRUC= chr16 72787694 72787758 ID=SCA4_ZFHX3;MOTIFS=CCG;STRUC= chr16 87604282 87604329 ID=HDL2_JPH3;MOTIFS=CTG;STRUC= -chr17 17808358 17808460 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA;STRUC= -chr17 80147009 80147139 ID=RCPS_EIF4A3;MOTIFS=CCTCGCTGTGCCGCTGCCGA;STRUC= +chr17 17808358 17808460 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA,ATTTC,ATTTT;STRUC= +chr17 80147009 80147139 ID=RCPS_EIF4A3;MOTIFS=GCCGCTGCCGACCTCGCTGT,CCTCGCTGTGCCGCTGCCGA;STRUC= chr18 666891 667632 ID=CPUM_TYMS;MOTIFS=GATGGT;STRUC= chr18 55586153 55586229 ID=FECD3_TCF4;MOTIFS=CAG;STRUC= -chr19 4510727 4513659 ID=MRUPAV_PLIN4;MOTIFS=TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC;STRUC= +chr19 4510727 4513659 ID=MRUPAV_PLIN4;MOTIFS=GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT,TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC;STRUC= chr19 13207858 13207898 ID=SCA6_CACNA1A;MOTIFS=CTG;STRUC= chr19 14496041 14496075 ID=OPDM2_GIPC1;MOTIFS=CCG;STRUC= -chr19 18786034 18786050 ID=EDM1-PSACH_COMP;MOTIFS=GTC;STRUC= +chr19 18786034 18786050 ID=EDM1-PSACH_COMP;MOTIFS=CGT,GTC;STRUC= chr19 45770204 45770266 ID=DM1_DMPK;MOTIFS=CAG;STRUC= -chr20 2652732 2652775 ID=SCA36_NOP56;MOTIFS=GGCCTG,CGCCTG;STRUC= -chr20 4699370 4699493 ID=CJD_PRNP;MOTIFS=CCTCAGGGCGGTGGTGGCTGGGGGCAG,CCTCATGGTGGTGGCTGGGGGCAG,GGTGGTGGCTGGGGGCAGCCTCAT;STRUC= +chr20 2652732 2652775 ID=SCA36_NOP56;MOTIFS=GGCCTG,CGCCTG,CCTGGG;STRUC= +chr20 4699370 4699493 ID=CJD_PRNP;MOTIFS=CCTCAGGGCGGTGGTGGCTGGGGGCAG,CCTCATGGTGGTGGCTGGGGGCAG,AGCCTCATGGTGGTGGCTGGGGGC,GGTGGTGGCTGGGGGCAGCCTCAT;STRUC= chr21 43776442 43776479 ID=EPM1_CSTB;MOTIFS=CGCGGGGCGGGG;STRUC= chr22 19766762 19766807 ID=TOF_TBX1;MOTIFS=GCN;STRUC= chr22 38317282 38317375 ID=EPM_CSNK1E;MOTIFS=CCG;STRUC= chr22 45795354 45795424 ID=SCA10_ATXN10;MOTIFS=ATTCT;STRUC= -chrX 25013529 25013565 ID=PRTS_ARX;MOTIFS=GCN;STRUC= -chrX 25013649 25013697 ID=EIEE1_ARX;MOTIFS=GCN;STRUC= -chrX 31284557 31284613 ID=DMD_DMD;MOTIFS=TTC,T;STRUC= +chrX 25013529 25013565 ID=PRTS_ARX;MOTIFS=NGC,GCN;STRUC= +chrX 25013649 25013697 ID=EIEE1_ARX;MOTIFS=NGC,GCN;STRUC= +chrX 31284557 31284613 ID=DMD_DMD;MOTIFS=TTC,T,CTT;STRUC= chrX 67545316 67545419 ID=SBMA_AR;MOTIFS=CAG;STRUC= chrX 71453054 71453131 ID=XDP_TAF1;MOTIFS=AGAGGG;STRUC= chrX 137566826 137566856 ID=VACTERLX_ZIC3;MOTIFS=GCN;STRUC= -chrX 140504316 140504361 ID=XLID_SOX3;MOTIFS=GCN;STRUC= +chrX 140504316 140504361 ID=XLID_SOX3;MOTIFS=NGC,GCN;STRUC= chrX 147912049 147912111 ID=FXS_FMR1;MOTIFS=CGG;STRUC= chrX 148500604 148500753 ID=FRAXE_AFF2;MOTIFS=CCG;STRUC= diff --git a/data/catalogs/STRchive-disease-loci.hg38.atarva.bed b/data/catalogs/STRchive-disease-loci.hg38.atarva.bed index a2d85324..337c4c77 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.atarva.bed +++ b/data/catalogs/STRchive-disease-loci.hg38.atarva.bed @@ -1,10 +1,10 @@ #chrom start stop motif motif_len id -chr1 1435798 1435818 GGCGCGGAGC 10 HMNR7_VWA1 +chr1 1435798 1435818 AGCGGCGCGG 10 HMNR7_VWA1 chr1 57367078 57367121 GAAAT 5 SCA37_DAB1 chr1 94418421 94418444 CCG 3 OPDM5_ABCD3 chr1 149390802 149390842 CGG 3 NIID_NOTCH2NLC -chr1 155188505 155192239 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA 61 ADTKD_MUC1 -chr1 156591765 156591783 GGGCC 5 NME_NAXE +chr1 155188505 155192239 ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG 61 ADTKD_MUC1 +chr1 156591765 156591783 CCGGG 5 NME_NAXE chr2 96197066 96197069 AAATG 5 FAME2_STARD7 chr2 100104798 100104824 CCG 3 FRA2A_AFF3 chr2 176093058 176093103 GCN 3 SD5_HOXD13 @@ -14,12 +14,12 @@ chr3 63912715 63912727 CCG 3 SCA7_ATXN7_flank chr3 129172576 129172656 CAGG 4 DM2_CNBP chr3 129172656 129172696 CAGA 4 DM2_CNBP_flank chr3 129172696 129172734 CA 2 DM2_CNBP_flank -chr3 138946019 138946062 GCN 3 BPES_FOXL2 +chr3 138946019 138946062 NGC 3 BPES_FOXL2 chr3 183712222 183712226 TTTCA 5 FAME4_YEATS2 chr4 3074876 3074927 CAG 3 HD_HTT chr4 3074933 3074969 CCG 3 HD_HTT_flank chr4 39348479 39348483 AAGGG 5 CANVAS_RFC1 -chr4 41745972 41746032 GCN 3 CCHS_PHOX2B +chr4 41745972 41746032 NGC 3 CCHS_PHOX2B chr4 159342611 159342618 TTTCA 5 FAME7_RAPGEF2 chr5 10356403 10356411 TTTCA 5 FAME3_MARCHF6 chr5 146878727 146878759 CTG 3 SCA12_PPP2R2B @@ -28,13 +28,13 @@ chr6 13328476 13328603 CCG 3 OPDM_TBC1D7 chr6 16327633 16327724 CTG 3 SCA1_ATXN1 chr6 45422750 45422801 GCN 3 CCD_RUNX2 chr6 170561906 170562017 CAG 3 SCA17_TBP -chr7 27199678 27199732 GCN 3 HFG_HOXA13-III -chr7 27199825 27199861 GCN 3 HFG_HOXA13-II -chr7 27199924 27199966 GCN 3 HFG_HOXA13-I +chr7 27199678 27199732 NGC 3 HFG_HOXA13-III +chr7 27199825 27199861 NGC 3 HFG_HOXA13-II +chr7 27199924 27199966 NGC 3 HFG_HOXA13-I chr7 55887600 55887639 CGG 3 FRA7A_ZNF713 chr8 104588970 104588999 CCG 3 OPDM1_LRP12 chr8 118366912 118366918 TGAAA 5 FAME1_SAMD12 -chr9 27573484 27573546 GGCCCC 6 FTDALS1_C9orf72 +chr9 27573484 27573546 CCCCGG 6 FTDALS1_C9orf72 chr9 69037270 69037286 A 1 FRDA_FXN_flank chr9 69037286 69037304 GAA 3 FRDA_FXN chr9 130681605 130681641 CCG 3 HSAN-VIII_PRDM12 @@ -48,29 +48,29 @@ chr12 123533720 123533750 CGG 3 OPDM4_RILPL1 chr13 70139353 70139383 CTA 3 SCA8_ATXN8OS_flank chr13 70139383 70139429 CTG 3 SCA8_ATXN8OS chr13 99985448 99985494 GCN 3 HPE5_ZIC2 -chr13 102161574 102161726 GAA 3 SCA27B_FGF14 +chr13 102161574 102161726 AAG 3 SCA27B_FGF14 chr14 23321472 23321503 GCN 3 OPMD_PABPN1 chr14 92071010 92071052 CTG 3 SCA3_ATXN3 chr15 22786677 22786703 CGG 3 ALS1_NIPA1 chr15 34419425 34419451 CT 2 aFTLD-U_GOLGA8A -chr15 88569433 88569452 TTTG 4 CHNG3_MIR7-2 +chr15 88569433 88569452 GTTT 4 CHNG3_MIR7-2 chr15 89333579 89333585 GCT 3 CPEO_POLG_flank chr15 89333585 89333588 GTT 3 CPEO_POLG_flank chr15 89333588 89333629 GCT 3 CPEO_POLG chr16 17470907 17470922 GCC 3 DBQD2_XYLT1 chr16 24613488 24613532 TTTCA 5 FAME6_TNRC6A -chr16 66490396 66490466 TGGAA 5 SCA31_BEAN1 +chr16 66490396 66490466 AATGG 5 SCA31_BEAN1 chr16 67842862 67842950 CAG 3 SCA_THAP11 chr16 72787694 72787758 CCG 3 SCA4_ZFHX3 chr16 87604282 87604329 CTG 3 HDL2_JPH3 chr17 17808448 17808460 TTTCA 5 FAME8_RAI1 -chr17 80147009 80147139 CCTCGCTGTGCCGCTGCCGA 20 RCPS_EIF4A3 +chr17 80147009 80147139 GCCGCTGCCGACCTCGCTGT 20 RCPS_EIF4A3 chr18 666891 667632 GATGGT 6 CPUM_TYMS chr18 55586153 55586229 CAG 3 FECD3_TCF4 -chr19 4510727 4513659 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC 99 MRUPAV_PLIN4 +chr19 4510727 4513659 GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT 99 MRUPAV_PLIN4 chr19 13207858 13207898 CTG 3 SCA6_CACNA1A chr19 14496041 14496075 CCG 3 OPDM2_GIPC1 -chr19 18786034 18786050 GTC 3 EDM1-PSACH_COMP +chr19 18786034 18786050 CGT 3 EDM1-PSACH_COMP chr19 45770204 45770266 CAG 3 DM1_DMPK chr20 2652732 2652757 GGCCTG 6 SCA36_NOP56 chr20 2652757 2652775 CGCCTG 6 SCA36_NOP56_flank @@ -80,13 +80,13 @@ chr21 43776442 43776479 CGCGGGGCGGGG 12 EPM1_CSTB chr22 19766762 19766807 GCN 3 TOF_TBX1 chr22 38317282 38317375 CCG 3 EPM_CSNK1E chr22 45795354 45795424 ATTCT 5 SCA10_ATXN10 -chrX 25013529 25013565 GCN 3 PRTS_ARX -chrX 25013649 25013697 GCN 3 EIEE1_ARX +chrX 25013529 25013565 NGC 3 PRTS_ARX +chrX 25013649 25013697 NGC 3 EIEE1_ARX chrX 31284557 31284605 TTC 3 DMD_DMD chrX 31284605 31284613 T 1 DMD_DMD_flank chrX 67545316 67545419 CAG 3 SBMA_AR chrX 71453054 71453131 AGAGGG 6 XDP_TAF1 chrX 137566826 137566856 GCN 3 VACTERLX_ZIC3 -chrX 140504316 140504361 GCN 3 XLID_SOX3 +chrX 140504316 140504361 NGC 3 XLID_SOX3 chrX 147912049 147912111 CGG 3 FXS_FMR1 chrX 148500604 148500753 CCG 3 FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.hg38.atarva.bed.gz b/data/catalogs/STRchive-disease-loci.hg38.atarva.bed.gz index 5be15b619dc00bd3198bbe4e15f6b1121beda5b0..7ab401506df10d7464b624a7e16f8e4adead9b8f 100644 GIT binary patch delta 1801 zcmV+k2ln`|4y_JGd#c7J= z1yI3dUN*_!{vPm`--~U8ttfE~kPxO2LIMMO3@mvFr*3+GtF`0*3oK%qE9Z z9Jq<7J$vO89i$1$)ftxSb;!#-#S1OB*llCbIipM^PsAQV3^9gjin08c*D0o0KCExc zPbd`PG^Ht(FVa&5ix7tNb;$Ea#N#5?cob_6atyUuFtE($yoATOnhL}_vh28lR=W57 zzRowy%SW1j>GUP+tziU2Ps)<>otU_v@at*M@pP*mf%PT4wjISl48<9?^%inS3wF(@ z@~f}|9akT>fE4m@UFp$rn6FvSE6aXsJjdcRF?Q(#_F2UlADf3|C2$^UQL z)w6)2MR_G1h+vAbWbNs11Leo}Tf%OVw#eGK28IbzN-=g7oAw8 z$x#D-X6t3RQT}zkG_Q==DOXapfmNS|kIf6+;ZA>uH)=U9APPe4IiI(@{iYD%URkol*+Z6snn0h^Y%9){30U2eqU1 zqy*l>ynU?UTV8M1<>(1ADRIVi@b0zPY>pu=0mGqc?XA+-p_Iqxt%dS*U3(q(fKHrM ztUJ^?R#Nvo%=l&9L%ig8pVwzFL=-81-ND+U?9P~ndHq^60&mfavi1NEmrNSNBuNS)j?fvpS2=Hq;k#}d$=sIQ_x3bgf}r60XO$=gtWZoFmk z%U_xG@;*#J0`b=&uiLWzwGA)zs6Dqf($5;u&j#wL2%1H#*3?0K>2cM|3n0OHoiWEf zo$47SSW;F^2dAWmH{HjaP7mxFJ4Pw%y#haT>%7i6?#ov^cTpShhK+8ZNkdsReJ6yf z-XBmEnZdG0vQ9S;G_SHGRg^bAuYBF0#4UdIUh!i<@+f4)cCw}>>;xLhI*7|@eHY8u zoqQF;oA}pui_}P1LkvJam)Uu3Rb9Z9EWt9BvVW7h1RsCVmq65jU50!~>1BB4cC@~S zsu`~yYGBXWl?-X%xNHsT{Ue6uTiEk9wtAsKqiExWDpjb}}^ zZ0I_^cQbub1{v9Vfl2gFY@=Q(WgGTJU{>%^7G?_}O~NJx2ISK3-f44{Z9 zJ0#!9Fus2k!#su=Jy+KPGLzHN!76??(lOf`OEcseRCa|4i|=O5C2>lF)$>fkH{Kp3|n(G|xt48`rsLLLfFW;`$YhJfuKKhHyN)Sal3Ag)9{h8{X zraymqO=$|{v8+UtV7?FEU5@Aa!1Dky3fYkFU`?N+bxV5=+vCl$x?9#ux6aq|8DAPB zIcsIot&Gy;+MS+h{g%A=L*kfSHLpXJR(`$`R(*asTj1w^2*I_m} zoZ`SuMD5utr|2L}5T~Lp*Xxj%dx{rYZn4|OpmRoY zwLq1ynLjYPG7>_8b(0$q%1k#iHZ9Szn=CSPq*q3SYN_x+f@w2P@G{~Zy|@Y zVAqT)e-(D1utTYE-;8f_G}Jt3^9e)s;jNK`=0;!*Khy+=cgNgTn9~W-FV*6RqwqsTDuNj!xR@_ zxU6TBe_SJV4;7(BBf682EqU3;CZ{}3BkC$^AZ9`>)W9h2jo#OJ$ZI&QJ-Qk^DM5v* zgP~VXx%RYWumK`*)`~O`bF@S+F%x9a^vPB!ZY_0Z+ zeFtqVq^V49D>;}?=dc_NTXCA4r3Sv_ZRj(9meVDyc!)yc#CqL8jO*2Dk3r`jcfRse|~^L9P;$Pb>QX^pvF#!EsX6Ln4bpcnh)B}5ywFDo3)t5ljfL(@sN$F*H z=61EdhpHK`9%^9E+La7x;21(lR49|Md<%Qt##S#hXcTR{Pz9^_7)p$nJ+GJhqn)NS zpz*B9mIGZU_h~GWx3Es`JK$$2?;5$>q=Ye zhyfHa<%Hxr8OFDNVwlG;qvz^cKxT4UI#|W;W;#V|Rm%ceCb7&7q^!Q@^IKjwPa92C z4u=(-!e_XKynQSW>dlEV7QO033gwmmC^@w}6(B+Ht+)pA4F6@EN(BtaU#{2do9oDQ zu=q3gP4%QZSo;&TnZ~%X&8rS&XFiV867r8_lvadGIoHO2IWOuU@86<{`L?Y{T}8*% z`g#O!KTWGH?U|~!{Jy1sfpFjVQ?pv=|NK^-BAppX- z4Iz(@<+Tp{OQ^s~_apM*HVauDn2ktkRvt@asJCIG6JX{qkR80s1ME4F@3cWse>J diff --git a/data/catalogs/STRchive-disease-loci.hg38.atarva.bed.gz.tbi b/data/catalogs/STRchive-disease-loci.hg38.atarva.bed.gz.tbi index 5afce915cb97b5eb05f6f34938b8dd18c9f46617..e1a7226db72689f3000118d06890693665521d6f 100644 GIT binary patch delta 233 zcmX@8bVzB#R(=aH#<%Cz`Z@45ZlQPgUJ3lV<;MN$TmP!#u9>{P-Mgx4|C*Jz z^j6)pUEMt^t$2I)s;WId`zES*PC`K^=a+i}MLylHHuV6@Oy0&QEct0e>|~Xb@x{(C W#gz?pj0Y|2ttKzz7nyuopbP-3G;@0Z delta 232 zcmX@4bWmx-R(=an#<%Cz`Z@QWn8M1lE_y*gMq2`t4ClV$ z5_XmC^CmdWOP)99{r)VSu89{F>wUvjDi!91h6kFR@Uow?O(Yvs(;n3o>f&l z)~(znbycUnF8cDCccmd$pXd5+{(pSMs@>DAR?QRpUG0Cn{`~1#X|=lpL*Gv~4ZZDZ zx;woz@N&+(vcS-O^ZSOAR6J49&(C=vk$=_e*`P9$GgyQq Date: Thu, 28 May 2026 09:40:36 -0600 Subject: [PATCH 11/16] Add canonical array and change "autto_generated" and "hide" bools --- data/STRchive-loci.schema.json | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/data/STRchive-loci.schema.json b/data/STRchive-loci.schema.json index a24788ba..7a246e8a 100644 --- a/data/STRchive-loci.schema.json +++ b/data/STRchive-loci.schema.json @@ -4,6 +4,17 @@ "title": "STRchive-loci", "description": "STRchive tandem repeat disease locus", "citation_format": "In free text strings: 'Some text [@doi:12345; @pmid:12345]'. In regular lists: ['doi:12345', 'pmid:12345']", + "canonical_motifs": [ + "CAG", + "CCG", + "CGG", + "CTG", + "GCN", + "CAA" + "TTTCA", + "AAATG", +] + ] "type": "object", "properties": { "id": { @@ -270,6 +281,7 @@ "type": ["string", "null"], "enum": ["+", "-"] }, + "reference_motif_reference_orientation": { "section": "Alleles", "title": "Reference Motif (Reference Orientation)", @@ -289,6 +301,8 @@ "description": "Pathogenic motif(s) in the + reference orientation. May be the same as the reference motif if it is pathogenic when expanded or contracted.", "examples": ["AAGGG", "ACAGG"], "type": "array", + "auto_generated": true, + "hide": "true", "uniqueItems": true, "items": { "title": "", @@ -302,6 +316,8 @@ "description": "Benign motif(s) in the + reference orientation. Benign motifs are not known to be pathogenic at any size. May be the same as the reference motif if it is benign.", "examples": ["AAAAG"], "type": "array", + "auto_generated": true, + "hide": "true", "uniqueItems": true, "items": { "title": "", @@ -315,6 +331,8 @@ "description": "Motif(s) of unknown consequence in the + reference orientation. Only include motifs that have been observed in individuals but are not classified elsewhere.", "examples": ["AAAAG"], "type": "array", + "auto_generated": true, + "hide": "true", "uniqueItems": true, "items": { "title": "", @@ -328,6 +346,8 @@ "description": "Interruption(s) in the + reference orientation. Only include interruptions that have been observed in individuals. These should typically be reported in the context of the motif, for example a CAG -> CAA interruption would be reported as CAA, not A", "examples": ["CAA"], "type": "array", + "auto_generated": true, + "hide": "true", "uniqueItems": true, "items": { "title": "", @@ -341,8 +361,7 @@ "description": "Pathogenic motif(s) in the gene orientation, so the reverse complement if gene is on - strand. Automatically generated from pathogenic_motif_reference_orientation and gene_strand. Should not be manually edited.", "examples": ["CCCTT", "CCTGT"], "type": "array", - "auto_generated": true, - "hide": "true", + "uniqueItems": true, "items": { "title": "", @@ -356,8 +375,6 @@ "description": "Benign motif(s) in the gene orientation, so the reverse complement if gene is on - strand. Automatically generated from benign_motif_reference_orientation and gene_strand. Should not be manually edited.", "examples": ["CCCTT", "CCTGT"], "type": "array", - "auto_generated": true, - "hide": "true", "uniqueItems": true, "items": { "title": "", @@ -371,8 +388,6 @@ "description": "Motif(s) of unknown consequence in the gene orientation, so the reverse complement if gene is on - strand. Automatically generated from unknown_motif_reference_orientation and gene_strand. Should not be manually edited.", "examples": ["CCCTT", "CCTGT"], "type": "array", - "auto_generated": true, - "hide": "true", "uniqueItems": true, "items": { "title": "", @@ -386,8 +401,6 @@ "description": "Interruption(s) in the gene orientation, so the reverse complement if gene is on - strand. Automatically generated from interruption_reference_orientation and gene_strand. Should not be manually edited.", "examples": ["TTG"], "type": "array", - "auto_generated": true, - "hide": "true", "uniqueItems": true, "items": { "title": "", From 862f476d41226430e639707a1ccc634cd79df77b Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Thu, 28 May 2026 09:41:16 -0600 Subject: [PATCH 12/16] Editing canonical motifs Adding auto generated to reference descriptions and removing from gene descriptions Editing descriptions to say explicitly say auto generated from gene Fix formatting fix formatting Editing script to derive canonical motifs from schema fixing adding canonical_motifs to function call --- data/STRchive-loci.schema.json | 23 ++++++++++---------- scripts/check-loci.py | 39 +++++++++++++++++----------------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/data/STRchive-loci.schema.json b/data/STRchive-loci.schema.json index 7a246e8a..c5b52b3a 100644 --- a/data/STRchive-loci.schema.json +++ b/data/STRchive-loci.schema.json @@ -10,11 +10,10 @@ "CGG", "CTG", "GCN", - "CAA" + "CAA", "TTTCA", - "AAATG", -] - ] + "AAATG" +], "type": "object", "properties": { "id": { @@ -298,7 +297,7 @@ "pathogenic_motif_reference_orientation": { "section": "Alleles", "title": "Pathogenic Motif (Reference Orientation)", - "description": "Pathogenic motif(s) in the + reference orientation. May be the same as the reference motif if it is pathogenic when expanded or contracted.", + "description": "Pathogenic motif(s) in the + reference orientation. May be the same as the reference motif if it is pathogenic when expanded or contracted. Automatically generated from pathogenic_motif_gene_orientation and gene_strand. Should not be manually edited.", "examples": ["AAGGG", "ACAGG"], "type": "array", "auto_generated": true, @@ -313,7 +312,7 @@ "benign_motif_reference_orientation": { "section": "Alleles", "title": "Benign Motif (Reference Orientation)", - "description": "Benign motif(s) in the + reference orientation. Benign motifs are not known to be pathogenic at any size. May be the same as the reference motif if it is benign.", + "description": "Benign motif(s) in the + reference orientation. Benign motifs are not known to be pathogenic at any size. May be the same as the reference motif if it is benign. Automatically generated from benign_motif_gene_orientation and gene_strand. Should not be manually edited.", "examples": ["AAAAG"], "type": "array", "auto_generated": true, @@ -328,7 +327,7 @@ "unknown_motif_reference_orientation": { "section": "Alleles", "title": "Unknown Motif (Reference Orientation)", - "description": "Motif(s) of unknown consequence in the + reference orientation. Only include motifs that have been observed in individuals but are not classified elsewhere.", + "description": "Motif(s) of unknown consequence in the + reference orientation. Automatically generated from unknown_motif_gene_orientation and gene_strand. Should not be manually edited.", "examples": ["AAAAG"], "type": "array", "auto_generated": true, @@ -343,7 +342,7 @@ "interruption_reference_orientation": { "section": "Alleles", "title": "Interruption (Reference Orientation)", - "description": "Interruption(s) in the + reference orientation. Only include interruptions that have been observed in individuals. These should typically be reported in the context of the motif, for example a CAG -> CAA interruption would be reported as CAA, not A", + "description": "Interruption(s) in the + reference orientation. Only include interruptions that have been observed in individuals. Automatically generated from interruption_gene_orientation and gene_strand. Should not be manually edited.", "examples": ["CAA"], "type": "array", "auto_generated": true, @@ -358,7 +357,7 @@ "pathogenic_motif_gene_orientation": { "section": "Alleles", "title": "Pathogenic Motif (Gene Orientation)", - "description": "Pathogenic motif(s) in the gene orientation, so the reverse complement if gene is on - strand. Automatically generated from pathogenic_motif_reference_orientation and gene_strand. Should not be manually edited.", + "description": "Pathogenic motif(s) in the gene orientation, so the reverse complement if gene is on - strand.", "examples": ["CCCTT", "CCTGT"], "type": "array", @@ -372,7 +371,7 @@ "benign_motif_gene_orientation": { "section": "Alleles", "title": "Benign Motif (Gene Orientation)", - "description": "Benign motif(s) in the gene orientation, so the reverse complement if gene is on - strand. Automatically generated from benign_motif_reference_orientation and gene_strand. Should not be manually edited.", + "description": "Benign motif(s) in the gene orientation, so the reverse complement if gene is on - strand.", "examples": ["CCCTT", "CCTGT"], "type": "array", "uniqueItems": true, @@ -385,7 +384,7 @@ "unknown_motif_gene_orientation": { "section": "Alleles", "title": "Unknown Motif (Gene Orientation)", - "description": "Motif(s) of unknown consequence in the gene orientation, so the reverse complement if gene is on - strand. Automatically generated from unknown_motif_reference_orientation and gene_strand. Should not be manually edited.", + "description": "Motif(s) of unknown consequence in the gene orientation, so the reverse complement if gene is on - strand. Only include motifs that have been observed in individuals but are not classified elsewhere.", "examples": ["CCCTT", "CCTGT"], "type": "array", "uniqueItems": true, @@ -398,7 +397,7 @@ "interruption_gene_orientation": { "section": "Alleles", "title": "Interruption (Gene Orientation)", - "description": "Interruption(s) in the gene orientation, so the reverse complement if gene is on - strand. Automatically generated from interruption_reference_orientation and gene_strand. Should not be manually edited.", + "description": "Interruption(s) in the gene orientation, so the reverse complement if gene is on - strand. These should typically be reported in the context of the motif, for example a CAG -> CAA interruption would be reported as CAA, not A.", "examples": ["TTG"], "type": "array", "uniqueItems": true, diff --git a/scripts/check-loci.py b/scripts/check-loci.py index 00766a91..dd7a65e8 100644 --- a/scripts/check-loci.py +++ b/scripts/check-loci.py @@ -127,19 +127,19 @@ def normalise_str(in_dna): return min(all_possible) -# Canonical motif reported in the literature, typically in the gene orientation. When there -# are equivalent motifs that are circular permutations of each other, use the canonical one. -CANONICAL_MOTIFS = [ - "CAG", - "CCG", - "CGG", - "CTG", - "GCN", - "CAA" - "TTTCA", - "AAATG", -] -def standardise_motif(motif): +def get_canonical_motifs(schema): + """ + Args: + schema (dict): the loci JSON schema + Returns: + list: canonical motifs from schema file + >>> get_canonical_motifs({"canonical_motifs": ["CAG", "CCG"]}) + ['CAG', 'CCG'] + """ + canonical_motifs = schema["canonical_motifs"] + return canonical_motifs + +def standardise_motif(motif, canonical_motifs): """ Args: motif (str) @@ -169,7 +169,7 @@ def standardise_motif(motif): return motif -def get_other_motif(reference_motif, gene_motif, gene_strand): +def get_other_motif(reference_motif, gene_motif, gene_strand, canonical_motifs): """ If only one of reference_motif or gene_motif is provided, infer the other from the gene strand. If both are provided, check that they are consistent with each other and the gene strand, and if they are inconsistent update the ref motif to match the gene motif. @@ -206,7 +206,7 @@ def get_other_motif(reference_motif, gene_motif, gene_strand): else: raise AssertionError(f'Gene strand {gene_strand} is not +/-') # Check the gene_motif against the canonical motifs - gene_motif = standardise_motif(gene_motif) + gene_motif = standardise_motif(gene_motif, canonical_motifs) # Infer the reference motif from the gene motif and gene strand if gene_motif is not None and gene_motif != "": @@ -220,7 +220,7 @@ def get_other_motif(reference_motif, gene_motif, gene_strand): return reference_motif, gene_motif -def check_motif_orientation(record): +def check_motif_orientation(record, canonical_motifs): """ Args: record (dict): a dictionary containing a single locus from the STRchive json @@ -259,7 +259,8 @@ def check_motif_orientation(record): new_ref_motifs = [] new_gene_motifs = [] for old_ref_motif, old_gene_motif in zip(old_ref_motifs, old_gene_motifs): - new_ref_motif, new_gene_motif = get_other_motif(old_ref_motif, old_gene_motif, record['gene_strand']) + new_ref_motif, new_gene_motif = get_other_motif(old_ref_motif, old_gene_motif, record['gene_strand'], canonical_motifs) + new_ref_motifs.append(new_ref_motif) new_gene_motifs.append(new_gene_motif) @@ -283,7 +284,7 @@ def check_motif_orientation(record): old_ref = record['reference_motif_reference_orientation'] new_ref = [] for motif in old_ref: - new_motif = standardise_motif(motif) + new_motif = standardise_motif(motif, canonical_motifs) if motif != new_motif: sys.stderr.write(f"Updating {record['id']} reference motif from {motif} to {new_motif}\n") new_ref.append(new_motif) @@ -508,7 +509,7 @@ def main(json_fname, json_schema = None, curations_json = None, out_json = None, # Check if the field contains a string that should be a list record = check_list_fields(record) - record = check_motif_orientation(record) + record = check_motif_orientation(record, canonical_motifs) # Update disease association tags based on curations if curations_json: From e2140fd0abc23fca25c01fe145bd61dae26d50c1 Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Thu, 28 May 2026 10:40:06 -0600 Subject: [PATCH 13/16] Adding get_canonical_motifs to main --- scripts/check-loci.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/check-loci.py b/scripts/check-loci.py index dd7a65e8..aec211c6 100644 --- a/scripts/check-loci.py +++ b/scripts/check-loci.py @@ -158,7 +158,7 @@ def standardise_motif(motif, canonical_motifs): if motif is None or len(motif) == 0: return motif motif = motif.upper() - for canonical_motif in CANONICAL_MOTIFS: + for canonical_motif in canonical_motifs: canonical_motif = canonical_motif.upper() if len(motif) != len(canonical_motif): @@ -497,6 +497,10 @@ def main(json_fname, json_schema = None, curations_json = None, out_json = None, if json_schema is not None: with open(json_schema, 'r') as schema_file: schema = json.load(schema_file) + if schema is None: + raise AssertionError("--schema is required because canonical_motifs are read from the schema") + +canonical_motifs = get_canonical_motifs(schema) # Fixes to individual records for record in data: From a5309ae75a0523089f043bb6cde1304242737a1b Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Thu, 28 May 2026 10:55:36 -0600 Subject: [PATCH 14/16] Debug --- scripts/check-loci.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/scripts/check-loci.py b/scripts/check-loci.py index aec211c6..4246c354 100644 --- a/scripts/check-loci.py +++ b/scripts/check-loci.py @@ -145,15 +145,15 @@ def standardise_motif(motif, canonical_motifs): motif (str) Returns: str: motif rewritten to the preferred standard arrangement if possible - >>> standardise_motif('GCC') + >>> test_motifs = ["CAG", "CCG", "CGG", "CTG", "GCN", "CAA", "TTTCA", "AAATG"] + >>> standardise_motif('GCC', test_motifs) 'CCG' - >>> standardise_motif('CGC') + >>> standardise_motif('CGC', test_motifs) 'CCG' - >>> standardise_motif('CAG') + >>> standardise_motif('CAG', test_motifs) 'CAG' - >>> standardise_motif('XYZ') + >>> standardise_motif('XYZ', test_motifs) 'XYZ' - >>> assert len(set([min(circular_permuted(motif)) for motif in CANONICAL_MOTIFS])) == len(CANONICAL_MOTIFS), f"Canonical motifs {CANONICAL_MOTIFS} are not all unique circular permutations" """ if motif is None or len(motif) == 0: return motif @@ -180,18 +180,17 @@ def get_other_motif(reference_motif, gene_motif, gene_strand, canonical_motifs): Returns: (reference_motif, gene_motif) - - If gene_strand is +, gene orientation copies reference orientation. If gene_strand is -, gene orientation is the reverse complement of reference orientation. - >>> get_other_motif('CCG', None, '+') + >>> test_motifs = ["CAG", "CCG", "CGG", "CTG", "GCN", "CAA", "TTTCA", "AAATG"] + >>> get_other_motif('CCG', None, '+', test_motifs) ('CCG', 'CCG') - >>> get_other_motif('CCG', None, '-') + >>> get_other_motif('CCG', None, '-', test_motifs) ('CCG', 'CGG') - >>> get_other_motif('CAG', None, '-') + >>> get_other_motif('CAG', None, '-', test_motifs) ('CAG', 'CTG') - >>> get_other_motif('TAG', None, 'plus') + >>> get_other_motif('TAG', None, 'plus', test_motifs) Traceback (most recent call last): ... AssertionError: Gene strand plus is not +/- @@ -500,7 +499,7 @@ def main(json_fname, json_schema = None, curations_json = None, out_json = None, if schema is None: raise AssertionError("--schema is required because canonical_motifs are read from the schema") -canonical_motifs = get_canonical_motifs(schema) + canonical_motifs = get_canonical_motifs(schema) # Fixes to individual records for record in data: From faee6e60a8131191d1c86a5e407be4863e314029 Mon Sep 17 00:00:00 2001 From: gaberbz <182678422+gaberbz@users.noreply.github.com> Date: Thu, 28 May 2026 17:02:59 +0000 Subject: [PATCH 15/16] Update data --- data/STRchive-loci.json | 56 +++++++++---------- .../STRchive-disease-loci.T2T-chm13.TRGT.bed | 16 +++--- ...TRchive-disease-loci.T2T-chm13.general.bed | 16 +++--- ...STRchive-disease-loci.T2T-chm13.longTR.bed | 16 +++--- ...chive-disease-loci.T2T-chm13.stranger.json | 16 +++--- .../STRchive-disease-loci.hg19.TRGT.bed | 16 +++--- .../STRchive-disease-loci.hg19.general.bed | 16 +++--- .../STRchive-disease-loci.hg19.longTR.bed | 16 +++--- .../STRchive-disease-loci.hg19.stranger.json | 16 +++--- .../STRchive-disease-loci.hg38.TRGT.bed | 16 +++--- .../STRchive-disease-loci.hg38.general.bed | 16 +++--- .../STRchive-disease-loci.hg38.longTR.bed | 16 +++--- .../STRchive-disease-loci.hg38.stranger.json | 16 +++--- data/ref-alleles/ref-alleles.T2T-chm13.txt | 32 +++++------ data/ref-alleles/ref-alleles.hg19.txt | 32 +++++------ data/ref-alleles/ref-alleles.hg38.txt | 32 +++++------ 16 files changed, 172 insertions(+), 172 deletions(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index a1ce318e..22126087 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -495,11 +495,11 @@ "pathogenic_motif_reference_orientation": ["CTG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["GAT", "GTT"], + "interruption_reference_orientation": ["GAT", "TTG"], "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["ATC", "AAC"], + "interruption_gene_orientation": ["ATC", "CAA"], "locus_structure": [], "benign_min": 6, "benign_max": 35, @@ -627,11 +627,11 @@ "pathogenic_motif_reference_orientation": ["CTG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["GTT"], + "interruption_reference_orientation": ["TTG"], "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AAC"], + "interruption_gene_orientation": ["CAA"], "locus_structure": [], "benign_min": 14, "benign_max": 28, @@ -693,11 +693,11 @@ "pathogenic_motif_reference_orientation": ["CTG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["GTT", "AGG"], + "interruption_reference_orientation": ["TTG", "AGG"], "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AAC", "CCT"], + "interruption_gene_orientation": ["CAA", "CCT"], "locus_structure": [], "benign_min": 11, "benign_max": 44, @@ -1527,11 +1527,11 @@ "location_in_gene": "Intron 1 (most isoforms)", "gene_strand": "-", "reference_motif_reference_orientation": ["AAAAT"], - "pathogenic_motif_reference_orientation": ["GAAAT"], + "pathogenic_motif_reference_orientation": ["TGAAA"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["AAAAA"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": ["TTTTT"], "interruption_gene_orientation": [], @@ -2687,11 +2687,11 @@ "pathogenic_motif_reference_orientation": ["CAG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["AAC"], + "interruption_reference_orientation": ["CAA"], "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AAC"], + "interruption_gene_orientation": ["CAA"], "locus_structure": [ { "motif": "CAG", @@ -2897,11 +2897,11 @@ "location_in_gene": "Intron 1", "gene_strand": "+", "reference_motif_reference_orientation": ["TTTTA"], - "pathogenic_motif_reference_orientation": ["ATTTC"], + "pathogenic_motif_reference_orientation": ["TTTCA"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["ATGTT", "AGTTT", "GTTTT", "TTTTT"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": ["ATGTT", "AGTTT", "GTTTT", "TTTTT"], "interruption_gene_orientation": [], @@ -3932,11 +3932,11 @@ "location_in_gene": "Intron 4", "gene_strand": "+", "reference_motif_reference_orientation": ["TTTTA"], - "pathogenic_motif_reference_orientation": ["ATTTC"], + "pathogenic_motif_reference_orientation": ["TTTCA"], "benign_motif_reference_orientation": ["ATTTT"], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["GGGGT", "ATGGG"], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": ["ATTTT"], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": ["GGGGT", "ATGGG"], @@ -4008,11 +4008,11 @@ "location_in_gene": "Intron 14", "gene_strand": "+", "reference_motif_reference_orientation": ["TTTTA"], - "pathogenic_motif_reference_orientation": ["ATTTC"], + "pathogenic_motif_reference_orientation": ["TTTCA"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["TTTTT", "ATGTT"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": ["TTTTT", "ATGTT"], "interruption_gene_orientation": [], @@ -4292,11 +4292,11 @@ "location_in_gene": "Intron 4/4", "gene_strand": "-", "reference_motif_reference_orientation": ["TAAAA"], - "pathogenic_motif_reference_orientation": ["GAAAT"], + "pathogenic_motif_reference_orientation": ["TGAAA"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["AAAAA", "AAACT", "AACAT", "ACAAT", "ACACT"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": ["TTTTT", "AGTTT", "ATGTT", "ATTGT", "AGTGT"], "interruption_gene_orientation": [], @@ -4434,11 +4434,11 @@ "location_in_gene": "Intron 1", "gene_strand": "-", "reference_motif_reference_orientation": ["AAAAT"], - "pathogenic_motif_reference_orientation": ["GAAAT"], + "pathogenic_motif_reference_orientation": ["TGAAA"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AAACC", "AAACG", "AAACT", "CAACT", "GAACT", "ACAAT", "AGAAT", "AACAT"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": ["TTTTT", "GTTTT", "GGTTT", "CGTTT", "AGTTT", "AGTTG", "AGTTC", "ATTGT", "ATTCT", "ATGTT"], "interruption_gene_orientation": [], @@ -4645,11 +4645,11 @@ "pathogenic_motif_reference_orientation": ["CAG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["AAC"], + "interruption_reference_orientation": ["CAA"], "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AAC"], + "interruption_gene_orientation": ["CAA"], "locus_structure": [], "benign_min": 25, "benign_max": 40, @@ -4843,11 +4843,11 @@ "pathogenic_motif_reference_orientation": ["CAG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["AAC"], + "interruption_reference_orientation": ["CAA"], "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AAC"], + "interruption_gene_orientation": ["CAA"], "locus_structure": [], "benign_min": 20, "benign_max": 38, @@ -4906,11 +4906,11 @@ "location_in_gene": "Intron 1/23", "gene_strand": "+", "reference_motif_reference_orientation": ["TTTTA"], - "pathogenic_motif_reference_orientation": ["ATTTC"], + "pathogenic_motif_reference_orientation": ["TTTCA"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["TTTTT"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": ["TTTTT"], "interruption_gene_orientation": [], @@ -5195,11 +5195,11 @@ "location_in_gene": "Intron 1", "gene_strand": "+", "reference_motif_reference_orientation": ["TTTTA"], - "pathogenic_motif_reference_orientation": ["ATTTC"], + "pathogenic_motif_reference_orientation": ["TTTCA"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["TTTTT", "ATGTT"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": ["TTTTT", "ATGTT"], "interruption_gene_orientation": [], diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed b/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed index 7aa0ae8c..e87fc27b 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed @@ -1,22 +1,22 @@ chr1 870158 870178 ID=HMNR7_VWA1;MOTIFS=AGCGGCGCGG,GGCGCGGAGC;STRUC= -chr1 57245935 57245973 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT;STRUC= +chr1 57245935 57245973 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT,TGAAA;STRUC= chr1 94266544 94266567 ID=OPDM5_ABCD3;MOTIFS=CCG;STRUC= chr1 148519695 148519738 ID=NIID_NOTCH2NLC;MOTIFS=CGG;STRUC= chr1 154328121 154330802 ID=ADTKD_MUC1;MOTIFS=ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG;STRUC= chr1 155728131 155728159 ID=NME_NAXE;MOTIFS=CCGGG,GGGCC;STRUC= -chr2 96703674 96703732 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT,GAAAT;STRUC= +chr2 96703674 96703732 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT,TGAAA;STRUC= chr2 100563685 100563738 ID=FRA2A_AFF3;MOTIFS=CCG;STRUC= chr2 176581179 176581224 ID=SD5_HOXD13;MOTIFS=GCN;STRUC= chr2 191369982 191370024 ID=GDPAG_GLS;MOTIFS=CAG;STRUC= chr3 63956302 63956345 ID=SCA7_ATXN7;MOTIFS=CAG,CCG;STRUC= chr3 131917482 131917635 ID=DM2_CNBP;MOTIFS=CAGG,CAGA,CA;STRUC= chr3 141687011 141687054 ID=BPES_FOXL2;MOTIFS=NGC,GCN;STRUC= -chr3 186521667 186521706 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= +chr3 186521667 186521706 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA;STRUC= chr4 3073603 3073723 ID=HD_HTT;MOTIFS=CAG,CCG;STRUC= chr4 39318077 39318136 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,CAGGG,AAAGGG;STRUC= chr4 41719745 41719805 ID=CCHS_PHOX2B;MOTIFS=NGC,GCN;STRUC= -chr4 162693303 162693405 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= -chr5 10295525 10295593 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= +chr4 162693303 162693405 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA;STRUC= +chr5 10295525 10295593 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA;STRUC= chr5 147414733 147414780 ID=SCA12_PPP2R2B;MOTIFS=CTG;STRUC= chr5 178096748 178096792 ID=OPDM_FAM193B;MOTIFS=CCG;STRUC= chr6 13201716 13201843 ID=OPDM_TBC1D7;MOTIFS=CCG;STRUC= @@ -28,7 +28,7 @@ chr7 27335813 27335849 ID=HFG_HOXA13-II;MOTIFS=NGC,GCN;STRUC= chr7 27335912 27335954 ID=HFG_HOXA13-I;MOTIFS=NGC,GCN;STRUC= chr7 56047900 56047939 ID=FRA7A_ZNF713;MOTIFS=CGG;STRUC= chr8 105716409 105716441 ID=OPDM1_LRP12;MOTIFS=CCG;STRUC= -chr8 119495247 119495353 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA,GAAAT;STRUC= +chr8 119495247 119495353 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA;STRUC= chr9 27584063 27584155 ID=FTDALS1_C9orf72;MOTIFS=CCCCGG,GGCCCC;STRUC= chr9 81210818 81210861 ID=FRDA_FXN;MOTIFS=A,GAA,AAG;STRUC= chr9 142886568 142886595 ID=HSAN-VIII_PRDM12;MOTIFS=CCG;STRUC= @@ -49,12 +49,12 @@ chr15 32225152 32225178 ID=aFTLD-U_GOLGA8A;MOTIFS=CT,TTTC,CCTT,CCCTCT;STRUC= chr15 86324038 86324057 ID=CHNG3_MIR7-2;MOTIFS=GTTT,TTTG;STRUC= chr15 87088402 87088452 ID=CPEO_POLG;MOTIFS=GCT,GTT,CTG;STRUC= chr16 17477909 17478002 ID=DBQD2_XYLT1;MOTIFS=GCC,CCG;STRUC= -chr16 24890366 24890430 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= +chr16 24890366 24890430 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA;STRUC= chr16 72284666 72284761 ID=SCA31_BEAN1;MOTIFS=AATGG,AATAG,AATAA;STRUC= chr16 73638636 73638724 ID=SCA_THAP11;MOTIFS=CAG;STRUC= chr16 78605502 78605569 ID=SCA4_ZFHX3;MOTIFS=CCG;STRUC= chr16 93675723 93675776 ID=HDL2_JPH3;MOTIFS=CTG;STRUC= -chr17 17754961 17755053 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA,ATTTC,ATTTT;STRUC= +chr17 17754961 17755053 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA,ATTTT;STRUC= chr17 81047404 81047534 ID=RCPS_EIF4A3;MOTIFS=GCCGCTGCCGACCTCGCTGT,CCTCGCTGTGCCGCTGCCGA;STRUC= chr18 821235 821905 ID=CPUM_TYMS;MOTIFS=GATGGT;STRUC= chr18 55789233 55789288 ID=FECD3_TCF4;MOTIFS=CAG;STRUC= diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed b/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed index 34b018c1..aebd3435 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed @@ -1,23 +1,23 @@ #chrom start stop id gene reference_motif_reference_orientation pathogenic_motif_reference_orientation pathogenic_min inheritance disease chr1 870158 870178 HMNR7_VWA1 VWA1 GGCGCGGAGC AGCGGCGCGG 1 AR Neuronopathy, distal hereditary motor, autosomal recessive 7 -chr1 57245935 57245973 SCA37_DAB1 DAB1 AAAAT GAAAT 31 AD Spinocerebellar ataxia type 37 +chr1 57245935 57245973 SCA37_DAB1 DAB1 AAAAT TGAAA 31 AD Spinocerebellar ataxia type 37 chr1 94266544 94266567 OPDM5_ABCD3 ABCD3 CCG CCG 118 AD Oculopharyngodistal myopathy type 5 chr1 148519695 148519738 NIID_NOTCH2NLC NOTCH2NLC CGG CGG 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 chr1 154328121 154330802 ADTKD_MUC1 MUC1 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG None AD Autosomal dominant tubulointerstitial kidney disease chr1 155728131 155728159 NME_NAXE NAXE GGGCC CCGGG 200 AR NAXE-related mitochondrial encephalopathy -chr2 96703674 96703732 FAME2_STARD7 STARD7 AAAAT GAAAT 274 AD Familial adult myoclonic epilepsy 2 +chr2 96703674 96703732 FAME2_STARD7 STARD7 AAAAT TGAAA 274 AD Familial adult myoclonic epilepsy 2 chr2 100563685 100563738 FRA2A_AFF3 AFF3 CCG CCG 300 AD Intellectual disability associated with fragile site FRA2A chr2 176581179 176581224 SD5_HOXD13 HOXD13 GCN GCN 22 AD Syndactyly chr2 191369982 191370024 GDPAG_GLS GLS CAG CAG 680 AR Glutaminase deficiency chr3 63956302 63956333 SCA7_ATXN7 ATXN7 CAG CAG 37 AD Spinocerebellar ataxia type 7 chr3 131917482 131917557 DM2_CNBP CNBP CAGG CAGG 75 AD Myotonic dystrophy type 2 chr3 141687011 141687054 BPES_FOXL2 FOXL2 GCN NGC 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis -chr3 186521667 186521706 FAME4_YEATS2 YEATS2 TTTTA ATTTC 1000 AD Familial adult myoclonic epilepsy 4 +chr3 186521667 186521706 FAME4_YEATS2 YEATS2 TTTTA TTTCA 1000 AD Familial adult myoclonic epilepsy 4 chr4 3073603 3073687 HD_HTT HTT CAG CAG 36 AD Huntington disease chr4 39318077 39318136 CANVAS_RFC1 RFC1 AAAAG AAGGG,ACAGG,AAAGG,CAGGG 400 AR Cerebellar ataxia, neuropathy, and vestibular areflexia syndrome chr4 41719745 41719805 CCHS_PHOX2B PHOX2B GCN NGC 26 AD Congenital central hypoventilation syndrome -chr4 162693303 162693405 FAME7_RAPGEF2 RAPGEF2 TTTTA ATTTC 60 AD Familial adult myoclonic epilepsy type 7 -chr5 10295525 10295593 FAME3_MARCHF6 MARCHF6 TTTTA ATTTC 650 AD Familial adult myoclonic epilepsy type 3 +chr4 162693303 162693405 FAME7_RAPGEF2 RAPGEF2 TTTTA TTTCA 60 AD Familial adult myoclonic epilepsy type 7 +chr5 10295525 10295593 FAME3_MARCHF6 MARCHF6 TTTTA TTTCA 650 AD Familial adult myoclonic epilepsy type 3 chr5 147414733 147414780 SCA12_PPP2R2B PPP2R2B CTG CTG 51 AD Spinocerebellar ataxia type 12 chr5 178096748 178096792 OPDM_FAM193B FAM193B CCG CCG 194 AD Oculopharyngodistal myopathy chr6 13201716 13201843 OPDM_TBC1D7 TBC1D7 CCG CCG 83 AD Oculopharyngodistal myopathy @@ -29,7 +29,7 @@ chr7 27335813 27335849 HFG_HOXA13-II HOXA13 GCN NGC 18 AD Hand-foot-genital synd chr7 27335912 27335954 HFG_HOXA13-I HOXA13 GCN NGC 22 AD Hand-foot-genital syndrome 1 chr7 56047900 56047939 FRA7A_ZNF713 ZNF713 CGG CGG 450 AD Autism spectrum disorder associated with fragile site FRA7A chr8 105716409 105716441 OPDM1_LRP12 LRP12 CCG CCG 85 AD Oculopharyngodistal myopathy type 1 -chr8 119495247 119495353 FAME1_SAMD12 SAMD12 TAAAA GAAAT 105 AD Familial adult myoclonic epilepsy type 1 +chr8 119495247 119495353 FAME1_SAMD12 SAMD12 TAAAA TGAAA 105 AD Familial adult myoclonic epilepsy type 1 chr9 27584063 27584155 FTDALS1_C9orf72 C9orf72 GGCCCC CCCCGG 31 AD Frontotemporal dementia (FTD) and/or amyotrophic lateral sclerosis (ALS) chr9 81210834 81210861 FRDA_FXN FXN GAA AAG 56 AR Friedreich ataxia chr9 142886568 142886595 HSAN-VIII_PRDM12 PRDM12 CCG CCG 18 AR Hereditary sensory and autonomic neuropathy type VIII @@ -50,12 +50,12 @@ chr15 32225152 32225178 aFTLD-U_GOLGA8A GOLGA8A TTTC CT 190 Atypical frontotemp chr15 86324038 86324057 CHNG3_MIR7-2 MIR7-2 TTTG GTTT 3 AD Nongoitrous congenital hypothyroidism-3 chr15 87088411 87088452 CPEO_POLG POLG CTG CTG None Progressive external ophthalmoplegia, Parkinson's disease chr16 17477909 17478002 DBQD2_XYLT1 XYLT1 CCG CCG 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 -chr16 24890366 24890430 FAME6_TNRC6A TNRC6A TTTTA ATTTC 1100 AD Familial adult myoclonic epilepsy type 6 +chr16 24890366 24890430 FAME6_TNRC6A TNRC6A TTTTA TTTCA 1100 AD Familial adult myoclonic epilepsy type 6 chr16 72284666 72284761 SCA31_BEAN1 BEAN1 AATAA AATGG,AATAG 110 AD Spinocerebellar ataxia type 31 chr16 73638636 73638724 SCA_THAP11 THAP11 CAG CAG 45 AD Spinocerebellar ataxia 51 chr16 78605502 78605569 SCA4_ZFHX3 ZFHX3 CCG CCG 46 AD Spinocerebellar ataxia 4 chr16 93675723 93675776 HDL2_JPH3 JPH3 CTG CTG 40 AD Huntington disease-like 2 -chr17 17754961 17755053 FAME8_RAI1 RAI1 TTTTA ATTTC 9 AD Familial adult myoclonic epilepsy type 8 +chr17 17754961 17755053 FAME8_RAI1 RAI1 TTTTA TTTCA 9 AD Familial adult myoclonic epilepsy type 8 chr17 81047404 81047534 RCPS_EIF4A3 EIF4A3 CCTCGCTGTGCCGCTGCCGA GCCGCTGCCGACCTCGCTGT 14 AR Richieri-Costa-Pereira syndrome chr18 821235 821905 CPUM_TYMS TYMS GATGGT GATGGT 210 AR Congenital Progressive Universal Melanosis chr18 55789233 55789288 FECD3_TCF4 TCF4 CAG CAG 51 AD Fuchs endothelial corneal dystrophy 3 diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.longTR.bed b/data/catalogs/STRchive-disease-loci.T2T-chm13.longTR.bed index 33d53b30..8cd57661 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.longTR.bed +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.longTR.bed @@ -1,22 +1,22 @@ chr1 870159 870178 AGCGGCGCGG,GGCGCGGAGC HMNR7_VWA1 -chr1 57245936 57245973 GAAAT,AAAAT SCA37_DAB1 +chr1 57245936 57245973 TGAAA,AAAAT SCA37_DAB1 chr1 94266545 94266567 CCG OPDM5_ABCD3 chr1 148519696 148519738 CGG NIID_NOTCH2NLC chr1 154328122 154330802 ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ADTKD_MUC1 chr1 155728132 155728159 CCGGG,GGGCC NME_NAXE -chr2 96703675 96703732 GAAAT,AAAAT FAME2_STARD7 +chr2 96703675 96703732 TGAAA,AAAAT FAME2_STARD7 chr2 100563686 100563738 CCG FRA2A_AFF3 chr2 176581180 176581224 GCN SD5_HOXD13 chr2 191369983 191370024 CAG GDPAG_GLS chr3 63956303 63956333 CAG SCA7_ATXN7 chr3 131917483 131917557 CAGG DM2_CNBP chr3 141687012 141687054 NGC,GCN BPES_FOXL2 -chr3 186521668 186521706 ATTTC,TTTTA FAME4_YEATS2 +chr3 186521668 186521706 TTTCA,TTTTA FAME4_YEATS2 chr4 3073604 3073687 CAG HD_HTT chr4 39318078 39318136 AAGGG,ACAGG,AAAGG,CAGGG,AAAAG,AAAGGG CANVAS_RFC1 chr4 41719746 41719805 NGC,GCN CCHS_PHOX2B -chr4 162693304 162693405 ATTTC,TTTTA FAME7_RAPGEF2 -chr5 10295526 10295593 ATTTC,TTTTA FAME3_MARCHF6 +chr4 162693304 162693405 TTTCA,TTTTA FAME7_RAPGEF2 +chr5 10295526 10295593 TTTCA,TTTTA FAME3_MARCHF6 chr5 147414734 147414780 CTG SCA12_PPP2R2B chr5 178096749 178096792 CCG OPDM_FAM193B chr6 13201717 13201843 CCG OPDM_TBC1D7 @@ -28,7 +28,7 @@ chr7 27335814 27335849 NGC,GCN HFG_HOXA13-II chr7 27335913 27335954 NGC,GCN HFG_HOXA13-I chr7 56047901 56047939 CGG FRA7A_ZNF713 chr8 105716410 105716441 CCG OPDM1_LRP12 -chr8 119495248 119495353 GAAAT,TAAAA FAME1_SAMD12 +chr8 119495248 119495353 TGAAA,TAAAA FAME1_SAMD12 chr9 27584064 27584155 CCCCGG,GGCCCC FTDALS1_C9orf72 chr9 81210835 81210861 AAG,GAA FRDA_FXN chr9 142886569 142886595 CCG HSAN-VIII_PRDM12 @@ -49,12 +49,12 @@ chr15 32225153 32225178 CT,CCTT,CCCTCT,TTTC aFTLD-U_GOLGA8A chr15 86324039 86324057 GTTT,TTTG CHNG3_MIR7-2 chr15 87088412 87088452 CTG CPEO_POLG chr16 17477910 17478002 CCG DBQD2_XYLT1 -chr16 24890367 24890430 ATTTC,TTTTA FAME6_TNRC6A +chr16 24890367 24890430 TTTCA,TTTTA FAME6_TNRC6A chr16 72284667 72284761 AATGG,AATAG,AATAA SCA31_BEAN1 chr16 73638637 73638724 CAG SCA_THAP11 chr16 78605503 78605569 CCG SCA4_ZFHX3 chr16 93675724 93675776 CTG HDL2_JPH3 -chr17 17754962 17755053 ATTTC,ATTTT,TTTTA FAME8_RAI1 +chr17 17754962 17755053 TTTCA,ATTTT,TTTTA FAME8_RAI1 chr17 81047405 81047534 GCCGCTGCCGACCTCGCTGT,CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 chr18 821236 821905 GATGGT CPUM_TYMS chr18 55789234 55789288 CAG FECD3_TCF4 diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.stranger.json b/data/catalogs/STRchive-disease-loci.T2T-chm13.stranger.json index 878bdc51..86dcfe18 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.stranger.json +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.stranger.json @@ -21,7 +21,7 @@ "PathologicRegion": "chr1:57245970-57245973", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GAAAT", + "DisplayRU": "TGAAA", "Disease": "SCA37", "NormalMax": 30, "PathologicMin": 31, @@ -75,7 +75,7 @@ "PathologicRegion": "chr2:96703674-96703677", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GAAAT", + "DisplayRU": "TGAAA", "Disease": "FAME2", "NormalMax": 273, "PathologicMin": 274, @@ -172,7 +172,7 @@ "PathologicRegion": "chr3:186521702-186521706", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "ATTTC", + "DisplayRU": "TTTCA", "Disease": "FAME4", "NormalMax": 999, "PathologicMin": 1000, @@ -230,7 +230,7 @@ "PathologicRegion": "chr4:162693388-162693405", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "ATTTC", + "DisplayRU": "TTTCA", "Disease": "FAME7", "NormalMax": 59, "PathologicMin": 60, @@ -245,7 +245,7 @@ "PathologicRegion": "chr5:10295585-10295593", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "ATTTC", + "DisplayRU": "TTTCA", "Disease": "FAME3", "NormalMax": 649, "PathologicMin": 650, @@ -403,7 +403,7 @@ "PathologicRegion": "chr8:119495347-119495353", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GAAAT", + "DisplayRU": "TGAAA", "Disease": "FAME1", "NormalMax": 104, "PathologicMin": 105, @@ -675,7 +675,7 @@ "PathologicRegion": "chr16:24890416-24890430", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "ATTTC", + "DisplayRU": "TTTCA", "Disease": "FAME6", "NormalMax": 1099, "PathologicMin": 1100, @@ -742,7 +742,7 @@ "PathologicRegion": "chr17:17755051-17755053", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "ATTTC", + "DisplayRU": "TTTCA", "Disease": "FAME8", "NormalMax": 8, "PathologicMin": 9, diff --git a/data/catalogs/STRchive-disease-loci.hg19.TRGT.bed b/data/catalogs/STRchive-disease-loci.hg19.TRGT.bed index 28e79f7e..cec298cf 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.TRGT.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.TRGT.bed @@ -1,22 +1,22 @@ chr1 1371178 1371198 ID=HMNR7_VWA1;MOTIFS=AGCGGCGCGG,GGCGCGGAGC;STRUC= -chr1 57832715 57832793 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT;STRUC= +chr1 57832715 57832793 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT,TGAAA;STRUC= chr1 94883977 94884000 ID=OPDM5_ABCD3;MOTIFS=CCG;STRUC= chr1 145209323 145209354 ID=NIID_NOTCH2NLC;MOTIFS=CGG;STRUC= chr1 155160981 155162030 ID=ADTKD_MUC1;MOTIFS=ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG;STRUC= chr1 156561557 156561575 ID=NME_NAXE;MOTIFS=CCGGG,GGGCC;STRUC= -chr2 96862804 96862862 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT,GAAAT;STRUC= +chr2 96862804 96862862 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT,TGAAA;STRUC= chr2 100721260 100721286 ID=FRA2A_AFF3;MOTIFS=CCG;STRUC= chr2 176957786 176957831 ID=SD5_HOXD13;MOTIFS=GCN;STRUC= chr2 191745598 191745646 ID=GDPAG_GLS;MOTIFS=CAG;STRUC= chr3 63898360 63898403 ID=SCA7_ATXN7;MOTIFS=CAG,CCG;STRUC= chr3 128891419 128891577 ID=DM2_CNBP;MOTIFS=CAGG,CAGA,CA;STRUC= chr3 138664861 138664904 ID=BPES_FOXL2;MOTIFS=NGC,GCN;STRUC= -chr3 183429975 183430014 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= +chr3 183429975 183430014 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA;STRUC= chr4 3076603 3076696 ID=HD_HTT;MOTIFS=CAG,CCG;STRUC= chr4 39350044 39350103 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,CAGGG,AAAGGG;STRUC= chr4 41747989 41748049 ID=CCHS_PHOX2B;MOTIFS=NGC,GCN;STRUC= -chr4 160263678 160263770 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= -chr5 10356455 10356523 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= +chr4 160263678 160263770 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA;STRUC= +chr5 10356455 10356523 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA;STRUC= chr5 146258290 146258322 ID=SCA12_PPP2R2B;MOTIFS=CTG;STRUC= chr5 176981490 176981532 ID=OPDM_FAM193B;MOTIFS=CCG;STRUC= chr6 13328708 13328835 ID=OPDM_TBC1D7;MOTIFS=CCG;STRUC= @@ -28,7 +28,7 @@ chr7 27239444 27239480 ID=HFG_HOXA13-II;MOTIFS=NGC,GCN;STRUC= chr7 27239543 27239585 ID=HFG_HOXA13-I;MOTIFS=NGC,GCN;STRUC= chr7 55955293 55955332 ID=FRA7A_ZNF713;MOTIFS=CGG;STRUC= chr8 105601198 105601227 ID=OPDM1_LRP12;MOTIFS=CCG;STRUC= -chr8 119379051 119379157 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA,GAAAT;STRUC= +chr8 119379051 119379157 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA;STRUC= chr9 27573482 27573544 ID=FTDALS1_C9orf72;MOTIFS=CCCCGG,GGCCCC;STRUC= chr9 71652186 71652220 ID=FRDA_FXN;MOTIFS=A,GAA,AAG;STRUC= chr9 133556992 133557028 ID=HSAN-VIII_PRDM12;MOTIFS=CCG;STRUC= @@ -49,12 +49,12 @@ chr15 34711626 34711652 ID=aFTLD-U_GOLGA8A;MOTIFS=CT,TTTC,CCTT,CCCTCT;STRUC= chr15 89112664 89112683 ID=CHNG3_MIR7-2;MOTIFS=GTTT,TTTG;STRUC= chr15 89876810 89876860 ID=CPEO_POLG;MOTIFS=GCT,GTT,CTG;STRUC= chr16 17564764 17564779 ID=DBQD2_XYLT1;MOTIFS=GCC,CCG;STRUC= -chr16 24624759 24624853 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= +chr16 24624759 24624853 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA;STRUC= chr16 66524299 66524369 ID=SCA31_BEAN1;MOTIFS=AATGG,AATAG,AATAA;STRUC= chr16 67876765 67876853 ID=SCA_THAP11;MOTIFS=CAG;STRUC= chr16 72821593 72821657 ID=SCA4_ZFHX3;MOTIFS=CCG;STRUC= chr16 87637888 87637935 ID=HDL2_JPH3;MOTIFS=CTG;STRUC= -chr17 17711672 17711774 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA,ATTTC,ATTTT;STRUC= +chr17 17711672 17711774 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA,ATTTT;STRUC= chr17 78120808 78120938 ID=RCPS_EIF4A3;MOTIFS=GCCGCTGCCGACCTCGCTGT,CCTCGCTGTGCCGCTGCCGA;STRUC= chr18 666891 667632 ID=CPUM_TYMS;MOTIFS=GATGGT;STRUC= chr18 53253384 53253460 ID=FECD3_TCF4;MOTIFS=CAG;STRUC= diff --git a/data/catalogs/STRchive-disease-loci.hg19.general.bed b/data/catalogs/STRchive-disease-loci.hg19.general.bed index db0aac51..aaa22850 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.general.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.general.bed @@ -1,23 +1,23 @@ #chrom start stop id gene reference_motif_reference_orientation pathogenic_motif_reference_orientation pathogenic_min inheritance disease chr1 1371178 1371198 HMNR7_VWA1 VWA1 GGCGCGGAGC AGCGGCGCGG 1 AR Neuronopathy, distal hereditary motor, autosomal recessive 7 -chr1 57832715 57832793 SCA37_DAB1 DAB1 AAAAT GAAAT 31 AD Spinocerebellar ataxia type 37 +chr1 57832715 57832793 SCA37_DAB1 DAB1 AAAAT TGAAA 31 AD Spinocerebellar ataxia type 37 chr1 94883977 94884000 OPDM5_ABCD3 ABCD3 CCG CCG 118 AD Oculopharyngodistal myopathy type 5 chr1 145209323 145209354 NIID_NOTCH2NLC NOTCH2NLC CGG CGG 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 chr1 155160981 155162030 ADTKD_MUC1 MUC1 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG None AD Autosomal dominant tubulointerstitial kidney disease chr1 156561557 156561575 NME_NAXE NAXE GGGCC CCGGG 200 AR NAXE-related mitochondrial encephalopathy -chr2 96862804 96862862 FAME2_STARD7 STARD7 AAAAT GAAAT 274 AD Familial adult myoclonic epilepsy 2 +chr2 96862804 96862862 FAME2_STARD7 STARD7 AAAAT TGAAA 274 AD Familial adult myoclonic epilepsy 2 chr2 100721260 100721286 FRA2A_AFF3 AFF3 CCG CCG 300 AD Intellectual disability associated with fragile site FRA2A chr2 176957786 176957831 SD5_HOXD13 HOXD13 GCN GCN 22 AD Syndactyly chr2 191745598 191745646 GDPAG_GLS GLS CAG CAG 680 AR Glutaminase deficiency chr3 63898360 63898391 SCA7_ATXN7 ATXN7 CAG CAG 37 AD Spinocerebellar ataxia type 7 chr3 128891419 128891499 DM2_CNBP CNBP CAGG CAGG 75 AD Myotonic dystrophy type 2 chr3 138664861 138664904 BPES_FOXL2 FOXL2 GCN NGC 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis -chr3 183429975 183430014 FAME4_YEATS2 YEATS2 TTTTA ATTTC 1000 AD Familial adult myoclonic epilepsy 4 +chr3 183429975 183430014 FAME4_YEATS2 YEATS2 TTTTA TTTCA 1000 AD Familial adult myoclonic epilepsy 4 chr4 3076603 3076660 HD_HTT HTT CAG CAG 36 AD Huntington disease chr4 39350044 39350103 CANVAS_RFC1 RFC1 AAAAG AAGGG,ACAGG,AAAGG,CAGGG 400 AR Cerebellar ataxia, neuropathy, and vestibular areflexia syndrome chr4 41747989 41748049 CCHS_PHOX2B PHOX2B GCN NGC 26 AD Congenital central hypoventilation syndrome -chr4 160263678 160263770 FAME7_RAPGEF2 RAPGEF2 TTTTA ATTTC 60 AD Familial adult myoclonic epilepsy type 7 -chr5 10356455 10356523 FAME3_MARCHF6 MARCHF6 TTTTA ATTTC 650 AD Familial adult myoclonic epilepsy type 3 +chr4 160263678 160263770 FAME7_RAPGEF2 RAPGEF2 TTTTA TTTCA 60 AD Familial adult myoclonic epilepsy type 7 +chr5 10356455 10356523 FAME3_MARCHF6 MARCHF6 TTTTA TTTCA 650 AD Familial adult myoclonic epilepsy type 3 chr5 146258290 146258322 SCA12_PPP2R2B PPP2R2B CTG CTG 51 AD Spinocerebellar ataxia type 12 chr5 176981490 176981532 OPDM_FAM193B FAM193B CCG CCG 194 AD Oculopharyngodistal myopathy chr6 13328708 13328835 OPDM_TBC1D7 TBC1D7 CCG CCG 83 AD Oculopharyngodistal myopathy @@ -29,7 +29,7 @@ chr7 27239444 27239480 HFG_HOXA13-II HOXA13 GCN NGC 18 AD Hand-foot-genital synd chr7 27239543 27239585 HFG_HOXA13-I HOXA13 GCN NGC 22 AD Hand-foot-genital syndrome 1 chr7 55955293 55955332 FRA7A_ZNF713 ZNF713 CGG CGG 450 AD Autism spectrum disorder associated with fragile site FRA7A chr8 105601198 105601227 OPDM1_LRP12 LRP12 CCG CCG 85 AD Oculopharyngodistal myopathy type 1 -chr8 119379051 119379157 FAME1_SAMD12 SAMD12 TAAAA GAAAT 105 AD Familial adult myoclonic epilepsy type 1 +chr8 119379051 119379157 FAME1_SAMD12 SAMD12 TAAAA TGAAA 105 AD Familial adult myoclonic epilepsy type 1 chr9 27573482 27573544 FTDALS1_C9orf72 C9orf72 GGCCCC CCCCGG 31 AD Frontotemporal dementia (FTD) and/or amyotrophic lateral sclerosis (ALS) chr9 71652202 71652220 FRDA_FXN FXN GAA AAG 56 AR Friedreich ataxia chr9 133556992 133557028 HSAN-VIII_PRDM12 PRDM12 CCG CCG 18 AR Hereditary sensory and autonomic neuropathy type VIII @@ -50,12 +50,12 @@ chr15 34711626 34711652 aFTLD-U_GOLGA8A GOLGA8A TTTC CT 190 Atypical frontotemp chr15 89112664 89112683 CHNG3_MIR7-2 MIR7-2 TTTG GTTT 3 AD Nongoitrous congenital hypothyroidism-3 chr15 89876819 89876860 CPEO_POLG POLG CTG CTG None Progressive external ophthalmoplegia, Parkinson's disease chr16 17564764 17564779 DBQD2_XYLT1 XYLT1 CCG CCG 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 -chr16 24624759 24624853 FAME6_TNRC6A TNRC6A TTTTA ATTTC 1100 AD Familial adult myoclonic epilepsy type 6 +chr16 24624759 24624853 FAME6_TNRC6A TNRC6A TTTTA TTTCA 1100 AD Familial adult myoclonic epilepsy type 6 chr16 66524299 66524369 SCA31_BEAN1 BEAN1 AATAA AATGG,AATAG 110 AD Spinocerebellar ataxia type 31 chr16 67876765 67876853 SCA_THAP11 THAP11 CAG CAG 45 AD Spinocerebellar ataxia 51 chr16 72821593 72821657 SCA4_ZFHX3 ZFHX3 CCG CCG 46 AD Spinocerebellar ataxia 4 chr16 87637888 87637935 HDL2_JPH3 JPH3 CTG CTG 40 AD Huntington disease-like 2 -chr17 17711672 17711774 FAME8_RAI1 RAI1 TTTTA ATTTC 9 AD Familial adult myoclonic epilepsy type 8 +chr17 17711672 17711774 FAME8_RAI1 RAI1 TTTTA TTTCA 9 AD Familial adult myoclonic epilepsy type 8 chr17 78120808 78120938 RCPS_EIF4A3 EIF4A3 CCTCGCTGTGCCGCTGCCGA GCCGCTGCCGACCTCGCTGT 14 AR Richieri-Costa-Pereira syndrome chr18 666891 667632 CPUM_TYMS TYMS GATGGT GATGGT 210 AR Congenital Progressive Universal Melanosis chr18 53253384 53253460 FECD3_TCF4 TCF4 CAG CAG 51 AD Fuchs endothelial corneal dystrophy 3 diff --git a/data/catalogs/STRchive-disease-loci.hg19.longTR.bed b/data/catalogs/STRchive-disease-loci.hg19.longTR.bed index 82005ced..4d1c9eea 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.longTR.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.longTR.bed @@ -1,22 +1,22 @@ chr1 1371179 1371198 AGCGGCGCGG,GGCGCGGAGC HMNR7_VWA1 -chr1 57832716 57832793 GAAAT,AAAAT SCA37_DAB1 +chr1 57832716 57832793 TGAAA,AAAAT SCA37_DAB1 chr1 94883978 94884000 CCG OPDM5_ABCD3 chr1 145209324 145209354 CGG NIID_NOTCH2NLC chr1 155160982 155162030 ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ADTKD_MUC1 chr1 156561558 156561575 CCGGG,GGGCC NME_NAXE -chr2 96862805 96862862 GAAAT,AAAAT FAME2_STARD7 +chr2 96862805 96862862 TGAAA,AAAAT FAME2_STARD7 chr2 100721261 100721286 CCG FRA2A_AFF3 chr2 176957787 176957831 GCN SD5_HOXD13 chr2 191745599 191745646 CAG GDPAG_GLS chr3 63898361 63898391 CAG SCA7_ATXN7 chr3 128891420 128891499 CAGG DM2_CNBP chr3 138664862 138664904 NGC,GCN BPES_FOXL2 -chr3 183429976 183430014 ATTTC,TTTTA FAME4_YEATS2 +chr3 183429976 183430014 TTTCA,TTTTA FAME4_YEATS2 chr4 3076604 3076660 CAG HD_HTT chr4 39350045 39350103 AAGGG,ACAGG,AAAGG,CAGGG,AAAAG,AAAGGG CANVAS_RFC1 chr4 41747990 41748049 NGC,GCN CCHS_PHOX2B -chr4 160263679 160263770 ATTTC,TTTTA FAME7_RAPGEF2 -chr5 10356456 10356523 ATTTC,TTTTA FAME3_MARCHF6 +chr4 160263679 160263770 TTTCA,TTTTA FAME7_RAPGEF2 +chr5 10356456 10356523 TTTCA,TTTTA FAME3_MARCHF6 chr5 146258291 146258322 CTG SCA12_PPP2R2B chr5 176981491 176981532 CCG OPDM_FAM193B chr6 13328709 13328835 CCG OPDM_TBC1D7 @@ -28,7 +28,7 @@ chr7 27239445 27239480 NGC,GCN HFG_HOXA13-II chr7 27239544 27239585 NGC,GCN HFG_HOXA13-I chr7 55955294 55955332 CGG FRA7A_ZNF713 chr8 105601199 105601227 CCG OPDM1_LRP12 -chr8 119379052 119379157 GAAAT,TAAAA FAME1_SAMD12 +chr8 119379052 119379157 TGAAA,TAAAA FAME1_SAMD12 chr9 27573483 27573544 CCCCGG,GGCCCC FTDALS1_C9orf72 chr9 71652203 71652220 AAG,GAA FRDA_FXN chr9 133556993 133557028 CCG HSAN-VIII_PRDM12 @@ -49,12 +49,12 @@ chr15 34711627 34711652 CT,CCTT,CCCTCT,TTTC aFTLD-U_GOLGA8A chr15 89112665 89112683 GTTT,TTTG CHNG3_MIR7-2 chr15 89876820 89876860 CTG CPEO_POLG chr16 17564765 17564779 CCG DBQD2_XYLT1 -chr16 24624760 24624853 ATTTC,TTTTA FAME6_TNRC6A +chr16 24624760 24624853 TTTCA,TTTTA FAME6_TNRC6A chr16 66524300 66524369 AATGG,AATAG,AATAA SCA31_BEAN1 chr16 67876766 67876853 CAG SCA_THAP11 chr16 72821594 72821657 CCG SCA4_ZFHX3 chr16 87637889 87637935 CTG HDL2_JPH3 -chr17 17711673 17711774 ATTTC,ATTTT,TTTTA FAME8_RAI1 +chr17 17711673 17711774 TTTCA,ATTTT,TTTTA FAME8_RAI1 chr17 78120809 78120938 GCCGCTGCCGACCTCGCTGT,CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 chr18 666892 667632 GATGGT CPUM_TYMS chr18 53253385 53253460 CAG FECD3_TCF4 diff --git a/data/catalogs/STRchive-disease-loci.hg19.stranger.json b/data/catalogs/STRchive-disease-loci.hg19.stranger.json index 0bdbb4e5..a4eaf755 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.stranger.json +++ b/data/catalogs/STRchive-disease-loci.hg19.stranger.json @@ -21,7 +21,7 @@ "PathologicRegion": "chr1:57832750-57832793", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GAAAT", + "DisplayRU": "TGAAA", "Disease": "SCA37", "NormalMax": 30, "PathologicMin": 31, @@ -75,7 +75,7 @@ "PathologicRegion": "chr2:96862804-96862807", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GAAAT", + "DisplayRU": "TGAAA", "Disease": "FAME2", "NormalMax": 273, "PathologicMin": 274, @@ -172,7 +172,7 @@ "PathologicRegion": "chr3:183430010-183430014", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "ATTTC", + "DisplayRU": "TTTCA", "Disease": "FAME4", "NormalMax": 999, "PathologicMin": 1000, @@ -230,7 +230,7 @@ "PathologicRegion": "chr4:160263763-160263770", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "ATTTC", + "DisplayRU": "TTTCA", "Disease": "FAME7", "NormalMax": 59, "PathologicMin": 60, @@ -245,7 +245,7 @@ "PathologicRegion": "chr5:10356515-10356523", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "ATTTC", + "DisplayRU": "TTTCA", "Disease": "FAME3", "NormalMax": 649, "PathologicMin": 650, @@ -403,7 +403,7 @@ "PathologicRegion": "chr8:119379151-119379157", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GAAAT", + "DisplayRU": "TGAAA", "Disease": "FAME1", "NormalMax": 104, "PathologicMin": 105, @@ -675,7 +675,7 @@ "PathologicRegion": "chr16:24624809-24624853", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "ATTTC", + "DisplayRU": "TTTCA", "Disease": "FAME6", "NormalMax": 1099, "PathologicMin": 1100, @@ -742,7 +742,7 @@ "PathologicRegion": "chr17:17711762-17711774", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "ATTTC", + "DisplayRU": "TTTCA", "Disease": "FAME8", "NormalMax": 8, "PathologicMin": 9, diff --git a/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed b/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed index 41b859a7..46e289e9 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed +++ b/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed @@ -1,22 +1,22 @@ chr1 1435798 1435818 ID=HMNR7_VWA1;MOTIFS=AGCGGCGCGG,GGCGCGGAGC;STRUC= -chr1 57367043 57367121 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT;STRUC= +chr1 57367043 57367121 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT,TGAAA;STRUC= chr1 94418421 94418444 ID=OPDM5_ABCD3;MOTIFS=CCG;STRUC= chr1 149390802 149390842 ID=NIID_NOTCH2NLC;MOTIFS=CGG;STRUC= chr1 155188505 155192239 ID=ADTKD_MUC1;MOTIFS=ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG;STRUC= chr1 156591765 156591783 ID=NME_NAXE;MOTIFS=CCGGG,GGGCC;STRUC= -chr2 96197066 96197124 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT,GAAAT;STRUC= +chr2 96197066 96197124 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT,TGAAA;STRUC= chr2 100104798 100104824 ID=FRA2A_AFF3;MOTIFS=CCG;STRUC= chr2 176093058 176093103 ID=SD5_HOXD13;MOTIFS=GCN;STRUC= chr2 190880872 190880920 ID=GDPAG_GLS;MOTIFS=CAG;STRUC= chr3 63912684 63912727 ID=SCA7_ATXN7;MOTIFS=CAG,CCG;STRUC= chr3 129172576 129172734 ID=DM2_CNBP;MOTIFS=CAGG,CAGA,CA;STRUC= chr3 138946019 138946062 ID=BPES_FOXL2;MOTIFS=NGC,GCN;STRUC= -chr3 183712187 183712226 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= +chr3 183712187 183712226 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA;STRUC= chr4 3074876 3074969 ID=HD_HTT;MOTIFS=CAG,CCG;STRUC= chr4 39348424 39348483 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,CAGGG,AAAGGG;STRUC= chr4 41745972 41746032 ID=CCHS_PHOX2B;MOTIFS=NGC,GCN;STRUC= -chr4 159342526 159342618 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= -chr5 10356343 10356411 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= +chr4 159342526 159342618 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA;STRUC= +chr5 10356343 10356411 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA;STRUC= chr5 146878727 146878759 ID=SCA12_PPP2R2B;MOTIFS=CTG;STRUC= chr5 177554489 177554531 ID=OPDM_FAM193B;MOTIFS=CCG;STRUC= chr6 13328476 13328603 ID=OPDM_TBC1D7;MOTIFS=CCG;STRUC= @@ -28,7 +28,7 @@ chr7 27199825 27199861 ID=HFG_HOXA13-II;MOTIFS=NGC,GCN;STRUC= chr7 27199924 27199966 ID=HFG_HOXA13-I;MOTIFS=NGC,GCN;STRUC= chr7 55887600 55887639 ID=FRA7A_ZNF713;MOTIFS=CGG;STRUC= chr8 104588970 104588999 ID=OPDM1_LRP12;MOTIFS=CCG;STRUC= -chr8 118366812 118366918 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA,GAAAT;STRUC= +chr8 118366812 118366918 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA;STRUC= chr9 27573484 27573546 ID=FTDALS1_C9orf72;MOTIFS=CCCCGG,GGCCCC;STRUC= chr9 69037270 69037304 ID=FRDA_FXN;MOTIFS=A,GAA,AAG;STRUC= chr9 130681605 130681641 ID=HSAN-VIII_PRDM12;MOTIFS=CCG;STRUC= @@ -49,12 +49,12 @@ chr15 34419425 34419451 ID=aFTLD-U_GOLGA8A;MOTIFS=CT,TTTC,CCTT,CCCTCT;STRUC= chr15 88569433 88569452 ID=CHNG3_MIR7-2;MOTIFS=GTTT,TTTG;STRUC= chr15 89333579 89333629 ID=CPEO_POLG;MOTIFS=GCT,GTT,CTG;STRUC= chr16 17470907 17470922 ID=DBQD2_XYLT1;MOTIFS=GCC,CCG;STRUC= -chr16 24613438 24613532 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA,ATTTC;STRUC= +chr16 24613438 24613532 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA;STRUC= chr16 66490396 66490466 ID=SCA31_BEAN1;MOTIFS=AATGG,AATAG,AATAA;STRUC= chr16 67842862 67842950 ID=SCA_THAP11;MOTIFS=CAG;STRUC= chr16 72787694 72787758 ID=SCA4_ZFHX3;MOTIFS=CCG;STRUC= chr16 87604282 87604329 ID=HDL2_JPH3;MOTIFS=CTG;STRUC= -chr17 17808358 17808460 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA,ATTTC,ATTTT;STRUC= +chr17 17808358 17808460 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA,ATTTT;STRUC= chr17 80147009 80147139 ID=RCPS_EIF4A3;MOTIFS=GCCGCTGCCGACCTCGCTGT,CCTCGCTGTGCCGCTGCCGA;STRUC= chr18 666891 667632 ID=CPUM_TYMS;MOTIFS=GATGGT;STRUC= chr18 55586153 55586229 ID=FECD3_TCF4;MOTIFS=CAG;STRUC= diff --git a/data/catalogs/STRchive-disease-loci.hg38.general.bed b/data/catalogs/STRchive-disease-loci.hg38.general.bed index 5aaa7052..c2f85cf8 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.general.bed +++ b/data/catalogs/STRchive-disease-loci.hg38.general.bed @@ -1,23 +1,23 @@ #chrom start stop id gene reference_motif_reference_orientation pathogenic_motif_reference_orientation pathogenic_min inheritance disease chr1 1435798 1435818 HMNR7_VWA1 VWA1 GGCGCGGAGC AGCGGCGCGG 1 AR Neuronopathy, distal hereditary motor, autosomal recessive 7 -chr1 57367043 57367121 SCA37_DAB1 DAB1 AAAAT GAAAT 31 AD Spinocerebellar ataxia type 37 +chr1 57367043 57367121 SCA37_DAB1 DAB1 AAAAT TGAAA 31 AD Spinocerebellar ataxia type 37 chr1 94418421 94418444 OPDM5_ABCD3 ABCD3 CCG CCG 118 AD Oculopharyngodistal myopathy type 5 chr1 149390802 149390842 NIID_NOTCH2NLC NOTCH2NLC CGG CGG 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 chr1 155188505 155192239 ADTKD_MUC1 MUC1 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG None AD Autosomal dominant tubulointerstitial kidney disease chr1 156591765 156591783 NME_NAXE NAXE GGGCC CCGGG 200 AR NAXE-related mitochondrial encephalopathy -chr2 96197066 96197124 FAME2_STARD7 STARD7 AAAAT GAAAT 274 AD Familial adult myoclonic epilepsy 2 +chr2 96197066 96197124 FAME2_STARD7 STARD7 AAAAT TGAAA 274 AD Familial adult myoclonic epilepsy 2 chr2 100104798 100104824 FRA2A_AFF3 AFF3 CCG CCG 300 AD Intellectual disability associated with fragile site FRA2A chr2 176093058 176093103 SD5_HOXD13 HOXD13 GCN GCN 22 AD Syndactyly chr2 190880872 190880920 GDPAG_GLS GLS CAG CAG 680 AR Glutaminase deficiency chr3 63912684 63912715 SCA7_ATXN7 ATXN7 CAG CAG 37 AD Spinocerebellar ataxia type 7 chr3 129172576 129172656 DM2_CNBP CNBP CAGG CAGG 75 AD Myotonic dystrophy type 2 chr3 138946019 138946062 BPES_FOXL2 FOXL2 GCN NGC 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis -chr3 183712187 183712226 FAME4_YEATS2 YEATS2 TTTTA ATTTC 1000 AD Familial adult myoclonic epilepsy 4 +chr3 183712187 183712226 FAME4_YEATS2 YEATS2 TTTTA TTTCA 1000 AD Familial adult myoclonic epilepsy 4 chr4 3074876 3074933 HD_HTT HTT CAG CAG 36 AD Huntington disease chr4 39348424 39348483 CANVAS_RFC1 RFC1 AAAAG AAGGG,ACAGG,AAAGG,CAGGG 400 AR Cerebellar ataxia, neuropathy, and vestibular areflexia syndrome chr4 41745972 41746032 CCHS_PHOX2B PHOX2B GCN NGC 26 AD Congenital central hypoventilation syndrome -chr4 159342526 159342618 FAME7_RAPGEF2 RAPGEF2 TTTTA ATTTC 60 AD Familial adult myoclonic epilepsy type 7 -chr5 10356343 10356411 FAME3_MARCHF6 MARCHF6 TTTTA ATTTC 650 AD Familial adult myoclonic epilepsy type 3 +chr4 159342526 159342618 FAME7_RAPGEF2 RAPGEF2 TTTTA TTTCA 60 AD Familial adult myoclonic epilepsy type 7 +chr5 10356343 10356411 FAME3_MARCHF6 MARCHF6 TTTTA TTTCA 650 AD Familial adult myoclonic epilepsy type 3 chr5 146878727 146878759 SCA12_PPP2R2B PPP2R2B CTG CTG 51 AD Spinocerebellar ataxia type 12 chr5 177554489 177554531 OPDM_FAM193B FAM193B CCG CCG 194 AD Oculopharyngodistal myopathy chr6 13328476 13328603 OPDM_TBC1D7 TBC1D7 CCG CCG 83 AD Oculopharyngodistal myopathy @@ -29,7 +29,7 @@ chr7 27199825 27199861 HFG_HOXA13-II HOXA13 GCN NGC 18 AD Hand-foot-genital synd chr7 27199924 27199966 HFG_HOXA13-I HOXA13 GCN NGC 22 AD Hand-foot-genital syndrome 1 chr7 55887600 55887639 FRA7A_ZNF713 ZNF713 CGG CGG 450 AD Autism spectrum disorder associated with fragile site FRA7A chr8 104588970 104588999 OPDM1_LRP12 LRP12 CCG CCG 85 AD Oculopharyngodistal myopathy type 1 -chr8 118366812 118366918 FAME1_SAMD12 SAMD12 TAAAA GAAAT 105 AD Familial adult myoclonic epilepsy type 1 +chr8 118366812 118366918 FAME1_SAMD12 SAMD12 TAAAA TGAAA 105 AD Familial adult myoclonic epilepsy type 1 chr9 27573484 27573546 FTDALS1_C9orf72 C9orf72 GGCCCC CCCCGG 31 AD Frontotemporal dementia (FTD) and/or amyotrophic lateral sclerosis (ALS) chr9 69037286 69037304 FRDA_FXN FXN GAA AAG 56 AR Friedreich ataxia chr9 130681605 130681641 HSAN-VIII_PRDM12 PRDM12 CCG CCG 18 AR Hereditary sensory and autonomic neuropathy type VIII @@ -50,12 +50,12 @@ chr15 34419425 34419451 aFTLD-U_GOLGA8A GOLGA8A TTTC CT 190 Atypical frontotemp chr15 88569433 88569452 CHNG3_MIR7-2 MIR7-2 TTTG GTTT 3 AD Nongoitrous congenital hypothyroidism-3 chr15 89333588 89333629 CPEO_POLG POLG CTG CTG None Progressive external ophthalmoplegia, Parkinson's disease chr16 17470907 17470922 DBQD2_XYLT1 XYLT1 CCG CCG 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 -chr16 24613438 24613532 FAME6_TNRC6A TNRC6A TTTTA ATTTC 1100 AD Familial adult myoclonic epilepsy type 6 +chr16 24613438 24613532 FAME6_TNRC6A TNRC6A TTTTA TTTCA 1100 AD Familial adult myoclonic epilepsy type 6 chr16 66490396 66490466 SCA31_BEAN1 BEAN1 AATAA AATGG,AATAG 110 AD Spinocerebellar ataxia type 31 chr16 67842862 67842950 SCA_THAP11 THAP11 CAG CAG 45 AD Spinocerebellar ataxia 51 chr16 72787694 72787758 SCA4_ZFHX3 ZFHX3 CCG CCG 46 AD Spinocerebellar ataxia 4 chr16 87604282 87604329 HDL2_JPH3 JPH3 CTG CTG 40 AD Huntington disease-like 2 -chr17 17808358 17808460 FAME8_RAI1 RAI1 TTTTA ATTTC 9 AD Familial adult myoclonic epilepsy type 8 +chr17 17808358 17808460 FAME8_RAI1 RAI1 TTTTA TTTCA 9 AD Familial adult myoclonic epilepsy type 8 chr17 80147009 80147139 RCPS_EIF4A3 EIF4A3 CCTCGCTGTGCCGCTGCCGA GCCGCTGCCGACCTCGCTGT 14 AR Richieri-Costa-Pereira syndrome chr18 666891 667632 CPUM_TYMS TYMS GATGGT GATGGT 210 AR Congenital Progressive Universal Melanosis chr18 55586153 55586229 FECD3_TCF4 TCF4 CAG CAG 51 AD Fuchs endothelial corneal dystrophy 3 diff --git a/data/catalogs/STRchive-disease-loci.hg38.longTR.bed b/data/catalogs/STRchive-disease-loci.hg38.longTR.bed index 6dd9029c..49ae12bc 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.longTR.bed +++ b/data/catalogs/STRchive-disease-loci.hg38.longTR.bed @@ -1,22 +1,22 @@ chr1 1435799 1435818 AGCGGCGCGG,GGCGCGGAGC HMNR7_VWA1 -chr1 57367044 57367121 GAAAT,AAAAT SCA37_DAB1 +chr1 57367044 57367121 TGAAA,AAAAT SCA37_DAB1 chr1 94418422 94418444 CCG OPDM5_ABCD3 chr1 149390803 149390842 CGG NIID_NOTCH2NLC chr1 155188506 155192239 ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ADTKD_MUC1 chr1 156591766 156591783 CCGGG,GGGCC NME_NAXE -chr2 96197067 96197124 GAAAT,AAAAT FAME2_STARD7 +chr2 96197067 96197124 TGAAA,AAAAT FAME2_STARD7 chr2 100104799 100104824 CCG FRA2A_AFF3 chr2 176093059 176093103 GCN SD5_HOXD13 chr2 190880873 190880920 CAG GDPAG_GLS chr3 63912685 63912715 CAG SCA7_ATXN7 chr3 129172577 129172656 CAGG DM2_CNBP chr3 138946020 138946062 NGC,GCN BPES_FOXL2 -chr3 183712188 183712226 ATTTC,TTTTA FAME4_YEATS2 +chr3 183712188 183712226 TTTCA,TTTTA FAME4_YEATS2 chr4 3074877 3074933 CAG HD_HTT chr4 39348425 39348483 AAGGG,ACAGG,AAAGG,CAGGG,AAAAG,AAAGGG CANVAS_RFC1 chr4 41745973 41746032 NGC,GCN CCHS_PHOX2B -chr4 159342527 159342618 ATTTC,TTTTA FAME7_RAPGEF2 -chr5 10356344 10356411 ATTTC,TTTTA FAME3_MARCHF6 +chr4 159342527 159342618 TTTCA,TTTTA FAME7_RAPGEF2 +chr5 10356344 10356411 TTTCA,TTTTA FAME3_MARCHF6 chr5 146878728 146878759 CTG SCA12_PPP2R2B chr5 177554490 177554531 CCG OPDM_FAM193B chr6 13328477 13328603 CCG OPDM_TBC1D7 @@ -28,7 +28,7 @@ chr7 27199826 27199861 NGC,GCN HFG_HOXA13-II chr7 27199925 27199966 NGC,GCN HFG_HOXA13-I chr7 55887601 55887639 CGG FRA7A_ZNF713 chr8 104588971 104588999 CCG OPDM1_LRP12 -chr8 118366813 118366918 GAAAT,TAAAA FAME1_SAMD12 +chr8 118366813 118366918 TGAAA,TAAAA FAME1_SAMD12 chr9 27573485 27573546 CCCCGG,GGCCCC FTDALS1_C9orf72 chr9 69037287 69037304 AAG,GAA FRDA_FXN chr9 130681606 130681641 CCG HSAN-VIII_PRDM12 @@ -49,12 +49,12 @@ chr15 34419426 34419451 CT,CCTT,CCCTCT,TTTC aFTLD-U_GOLGA8A chr15 88569434 88569452 GTTT,TTTG CHNG3_MIR7-2 chr15 89333589 89333629 CTG CPEO_POLG chr16 17470908 17470922 CCG DBQD2_XYLT1 -chr16 24613439 24613532 ATTTC,TTTTA FAME6_TNRC6A +chr16 24613439 24613532 TTTCA,TTTTA FAME6_TNRC6A chr16 66490397 66490466 AATGG,AATAG,AATAA SCA31_BEAN1 chr16 67842863 67842950 CAG SCA_THAP11 chr16 72787695 72787758 CCG SCA4_ZFHX3 chr16 87604283 87604329 CTG HDL2_JPH3 -chr17 17808359 17808460 ATTTC,ATTTT,TTTTA FAME8_RAI1 +chr17 17808359 17808460 TTTCA,ATTTT,TTTTA FAME8_RAI1 chr17 80147010 80147139 GCCGCTGCCGACCTCGCTGT,CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 chr18 666892 667632 GATGGT CPUM_TYMS chr18 55586154 55586229 CAG FECD3_TCF4 diff --git a/data/catalogs/STRchive-disease-loci.hg38.stranger.json b/data/catalogs/STRchive-disease-loci.hg38.stranger.json index 4dec48a6..530a264f 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.stranger.json +++ b/data/catalogs/STRchive-disease-loci.hg38.stranger.json @@ -21,7 +21,7 @@ "PathologicRegion": "chr1:57367078-57367121", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GAAAT", + "DisplayRU": "TGAAA", "Disease": "SCA37", "NormalMax": 30, "PathologicMin": 31, @@ -75,7 +75,7 @@ "PathologicRegion": "chr2:96197066-96197069", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GAAAT", + "DisplayRU": "TGAAA", "Disease": "FAME2", "NormalMax": 273, "PathologicMin": 274, @@ -172,7 +172,7 @@ "PathologicRegion": "chr3:183712222-183712226", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "ATTTC", + "DisplayRU": "TTTCA", "Disease": "FAME4", "NormalMax": 999, "PathologicMin": 1000, @@ -230,7 +230,7 @@ "PathologicRegion": "chr4:159342611-159342618", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "ATTTC", + "DisplayRU": "TTTCA", "Disease": "FAME7", "NormalMax": 59, "PathologicMin": 60, @@ -245,7 +245,7 @@ "PathologicRegion": "chr5:10356403-10356411", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "ATTTC", + "DisplayRU": "TTTCA", "Disease": "FAME3", "NormalMax": 649, "PathologicMin": 650, @@ -403,7 +403,7 @@ "PathologicRegion": "chr8:118366912-118366918", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GAAAT", + "DisplayRU": "TGAAA", "Disease": "FAME1", "NormalMax": 104, "PathologicMin": 105, @@ -675,7 +675,7 @@ "PathologicRegion": "chr16:24613488-24613532", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "ATTTC", + "DisplayRU": "TTTCA", "Disease": "FAME6", "NormalMax": 1099, "PathologicMin": 1100, @@ -742,7 +742,7 @@ "PathologicRegion": "chr17:17808448-17808460", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "ATTTC", + "DisplayRU": "TTTCA", "Disease": "FAME8", "NormalMax": 8, "PathologicMin": 9, diff --git a/data/ref-alleles/ref-alleles.T2T-chm13.txt b/data/ref-alleles/ref-alleles.T2T-chm13.txt index 4c866c74..e032892d 100644 --- a/data/ref-alleles/ref-alleles.T2T-chm13.txt +++ b/data/ref-alleles/ref-alleles.T2T-chm13.txt @@ -5,8 +5,8 @@ GGCTGGCGCT GGCGCGGAGC GGCGCGGAGC GCGGTGAGTG GGCTGGCGCT GGCGCGGAGC GGCGCGGAGC GCGGTGAGTG SCA37_DAB1 -chr1 57245935 57245973 AAAAT,GAAAT STRchive -chr1 57245935 57245973 AAAAT,GAAAT TRGT +chr1 57245935 57245973 AAAAT,TGAAA STRchive +chr1 57245935 57245973 AAAAT,GAAAT,TGAAA TRGT CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC @@ -35,8 +35,8 @@ ACATGCGCCG GGGCC GGGCC GGGCC GGGCC GGGCC GGG GGCGCGCGCT ACATGCGCCG GGGCC GGGCC GGGCC GGGCC GGGCC GGG GGCGCGCGCT FAME2_STARD7 -chr2 96703674 96703732 AAAAT,GAAAT STRchive -chr2 96703674 96703732 AAATG,AAAAT,GAAAT TRGT +chr2 96703674 96703732 AAAAT,TGAAA STRchive +chr2 96703674 96703732 AAATG,AAAAT,TGAAA TRGT ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA GCCAAGCACA ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA GCCAAGCACA @@ -77,8 +77,8 @@ CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG FAME4_YEATS2 -chr3 186521667 186521706 TTTTA,ATTTC STRchive -chr3 186521667 186521706 TTTTA,TTTCA,ATTTC TRGT +chr3 186521667 186521706 TTTTA,TTTCA STRchive +chr3 186521667 186521706 TTTTA,TTTCA TRGT TTTTATGTTC TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT TTGAGACAGA TTTTATGTTC TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT TTGAGACAGA @@ -101,14 +101,14 @@ CAGGCCTCCA GCT GCC GCC GCT GCC GCT GCC GCC GCC GCC GCT GCC GCG GCC GCC GCC GCT G CAGGCCTCCA GCT GCC GCC GCT GCC GCT GCC GCC GCC GCC GCT GCC GCG GCC GCC GCC GCT GCT GCT GCG CCGCCCTTGC FAME7_RAPGEF2 -chr4 162693303 162693405 TTTTA,ATTTC STRchive -chr4 162693303 162693405 TTTTA,TTTCA,ATTTC TRGT +chr4 162693303 162693405 TTTTA,TTTCA STRchive +chr4 162693303 162693405 TTTTA,TTTCA TRGT CTATCATAGC TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TATTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TT ACTAGAGGAT CTATCATAGC TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TATTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TT ACTAGAGGAT FAME3_MARCHF6 -chr5 10295525 10295593 TTTTA,ATTTC STRchive -chr5 10295525 10295593 TTTTA,TTTCA,ATTTC TRGT +chr5 10295525 10295593 TTTTA,TTTCA STRchive +chr5 10295525 10295593 TTTTA,TTTCA TRGT CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CCGAGATGGA CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CCGAGATGGA @@ -179,8 +179,8 @@ AGGTAGACGA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG AGCCACCGGC AGGTAGACGA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG AGCCACCGGC FAME1_SAMD12 -chr8 119495247 119495353 TAAAA,GAAAT STRchive -chr8 119495247 119495353 TAAAA,TGAAA,GAAAT TRGT +chr8 119495247 119495353 TAAAA,TGAAA STRchive +chr8 119495247 119495353 TAAAA,TGAAA TRGT ACTCTGTCTC AAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA ATGAACAAAA ACTCTGTCTC AAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA ATGAACAAAA @@ -305,8 +305,8 @@ TCCCGCTCGG G CCG CCG CCG CCG CCG CCG CCTCGGCTCG CCG CTGCTCCTCCT CCG CCG CCG CCG TCCCGCTCGG GCC GCC GCC GCC GCC GCC GCC TCGGCTC GCC GCTGCTCCTCCT CCG CCG CCG CCG CCG CCG CCG CCG CCG CT GCC GCC GCC GCC GCC GCC GCC TCCACCGCCG FAME6_TNRC6A -chr16 24890366 24890430 TTTTA,ATTTC STRchive -chr16 24890366 24890430 TTTTA,TTTCA,ATTTC TRGT +chr16 24890366 24890430 TTTTA,TTTCA STRchive +chr16 24890366 24890430 TTTTA,TTTCA TRGT CTATTAAAGC A TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTT ACTTATTTAT CTATTAAAGC A TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTT ACTTATTTAT @@ -335,8 +335,8 @@ AAGCCAGGGA G CTG C CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C AAGCCAGGGA G CTG C CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG TAAGATGGTT FAME8_RAI1 -chr17 17754961 17755053 TTTTA,ATTTC STRchive -chr17 17754961 17755053 TTTTA,TTTCA,ATTTC,ATTTT TRGT +chr17 17754961 17755053 TTTTA,TTTCA STRchive +chr17 17754961 17755053 TTTTA,TTTCA,ATTTT TRGT TTATTTTTAA A TTTTA TTTTA TTTTA TA TTTTA TTTTA TTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTA TTTTA TTTTA TTTTA TTTTA TTTTA TTT CATCTCAGAC TTATTTTTAA ATTTT ATTTT ATTTT AT ATTTT ATTTT ATT ATTTT ATTTT ATTTT ATTTT ATTTT ATTTT ATT ATTTT ATTTT ATTTT ATTTT ATTTT ATTT CATCTCAGAC diff --git a/data/ref-alleles/ref-alleles.hg19.txt b/data/ref-alleles/ref-alleles.hg19.txt index b1255aa5..8263f3de 100644 --- a/data/ref-alleles/ref-alleles.hg19.txt +++ b/data/ref-alleles/ref-alleles.hg19.txt @@ -5,8 +5,8 @@ GGCTGGCGCT GGCGCGGAGC GGCGCGGAGC GCGGTGAGTG GGCTGGCGCT GGCGCGGAGC GGCGCGGAGC GCGGTGAGTG SCA37_DAB1 -chr1 57832715 57832793 AAAAT,GAAAT STRchive -chr1 57832715 57832793 AAAAT,GAAAT TRGT +chr1 57832715 57832793 AAAAT,TGAAA STRchive +chr1 57832715 57832793 AAAAT,GAAAT,TGAAA TRGT CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC @@ -35,8 +35,8 @@ ACATGCGCCG GGGCC GGGCC GGGCC GGG GGCGCGCGCT ACATGCGCCG GGGCC GGGCC GGGCC GGG GGCGCGCGCT FAME2_STARD7 -chr2 96862804 96862862 AAAAT,GAAAT STRchive -chr2 96862804 96862862 AAATG,AAAAT,GAAAT TRGT +chr2 96862804 96862862 AAAAT,TGAAA STRchive +chr2 96862804 96862862 AAATG,AAAAT,TGAAA TRGT ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA GCCAAGCACA ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA GCCAAGCACA @@ -77,8 +77,8 @@ CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG FAME4_YEATS2 -chr3 183429975 183430014 TTTTA,ATTTC STRchive -chr3 183429975 183430014 TTTTA,TTTCA,ATTTC TRGT +chr3 183429975 183430014 TTTTA,TTTCA STRchive +chr3 183429975 183430014 TTTTA,TTTCA TRGT TTTTATGTTC TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT TTGAGACAGA TTTTATGTTC TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT TTGAGACAGA @@ -101,14 +101,14 @@ CAGGCCTCCA GCT GCC GCC GCT GCC GCT GCC GCC GCC GCC GCT GCC GCG GCC GCC GCC GCT G CAGGCCTCCA GCT GCC GCC GCT GCC GCT GCC GCC GCC GCC GCT GCC GCG GCC GCC GCC GCT GCT GCT GCG CCGCCCTTGC FAME7_RAPGEF2 -chr4 160263678 160263770 TTTTA,ATTTC STRchive -chr4 160263678 160263770 TTTTA,TTTCA,ATTTC TRGT +chr4 160263678 160263770 TTTTA,TTTCA STRchive +chr4 160263678 160263770 TTTTA,TTTCA TRGT CTATCATAGC TTTTA TTTTA TTTTA TTTTA TTTTA TATTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TT ACTAGAGGAT CTATCATAGC TTTTA TTTTA TTTTA TTTTA TTTTA TATTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TT ACTAGAGGAT FAME3_MARCHF6 -chr5 10356455 10356523 TTTTA,ATTTC STRchive -chr5 10356455 10356523 TTTTA,TTTCA,ATTTC TRGT +chr5 10356455 10356523 TTTTA,TTTCA STRchive +chr5 10356455 10356523 TTTTA,TTTCA TRGT CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CCGAGATGGA CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CCGAGATGGA @@ -179,8 +179,8 @@ ACGACGCCGA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG AGCCACCGGC ACGACGCCGA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG AGCCACCGGC FAME1_SAMD12 -chr8 119379051 119379157 TAAAA,GAAAT STRchive -chr8 119379051 119379157 TAAAA,TGAAA,GAAAT TRGT +chr8 119379051 119379157 TAAAA,TGAAA STRchive +chr8 119379051 119379157 TAAAA,TGAAA TRGT ACTCTGTCTC AAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA ATGAACAAAA ACTCTGTCTC AAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA ATGAACAAAA @@ -305,8 +305,8 @@ TCCCGCTCGG G CCG CCG CCG CCG CC CCCCTCCCCA TCCCGCTCGG GCC GCC GCC GCC GCC CCCCTCCCCA FAME6_TNRC6A -chr16 24624759 24624853 TTTTA,ATTTC STRchive -chr16 24624759 24624853 TTTTA,TTTCA,ATTTC TRGT +chr16 24624759 24624853 TTTTA,TTTCA STRchive +chr16 24624759 24624853 TTTTA,TTTCA TRGT CTATTAAAGC A TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTT ACTTATTTAT CTATTAAAGC A TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTT ACTTATTTAT @@ -335,8 +335,8 @@ AAGCCAGGGA G CTG C CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG TAAG AAGCCAGGGA G CTG C CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG TAAGATGGTT FAME8_RAI1 -chr17 17711672 17711774 TTTTA,ATTTC STRchive -chr17 17711672 17711774 TTTTA,TTTCA,ATTTC,ATTTT TRGT +chr17 17711672 17711774 TTTTA,TTTCA STRchive +chr17 17711672 17711774 TTTTA,TTTCA,ATTTT TRGT TTATTTTTAA A TTTTA TTTTA TTTTA TA TTTTA TTTTA TTA TTTTA TTTTA TTTTA TTTTA TTTTA TTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTT CATCTCAGAC TTATTTTTAA ATTTT ATTTT ATTTT AT ATTTT ATTTT ATT ATTTT ATTTT ATTTT ATTTT ATTTT ATT ATTTT ATTTT ATTTT ATTTT ATTTT ATTTT ATTTT ATTTT ATTT CATCTCAGAC diff --git a/data/ref-alleles/ref-alleles.hg38.txt b/data/ref-alleles/ref-alleles.hg38.txt index 629a50bc..9437d479 100644 --- a/data/ref-alleles/ref-alleles.hg38.txt +++ b/data/ref-alleles/ref-alleles.hg38.txt @@ -5,8 +5,8 @@ GGCTGGCGCT GGCGCGGAGC GGCGCGGAGC GCGGTGAGTG GGCTGGCGCT GGCGCGGAGC GGCGCGGAGC GCGGTGAGTG SCA37_DAB1 -chr1 57367043 57367121 AAAAT,GAAAT STRchive -chr1 57367043 57367121 AAAAT,GAAAT TRGT +chr1 57367043 57367121 AAAAT,TGAAA STRchive +chr1 57367043 57367121 AAAAT,GAAAT,TGAAA TRGT CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC @@ -35,8 +35,8 @@ ACATGCGCCG GGGCC GGGCC GGGCC GGG GGCGCGCGCT ACATGCGCCG GGGCC GGGCC GGGCC GGG GGCGCGCGCT FAME2_STARD7 -chr2 96197066 96197124 AAAAT,GAAAT STRchive -chr2 96197066 96197124 AAATG,AAAAT,GAAAT TRGT +chr2 96197066 96197124 AAAAT,TGAAA STRchive +chr2 96197066 96197124 AAATG,AAAAT,TGAAA TRGT ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA GCCAAGCACA ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA GCCAAGCACA @@ -77,8 +77,8 @@ CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG FAME4_YEATS2 -chr3 183712187 183712226 TTTTA,ATTTC STRchive -chr3 183712187 183712226 TTTTA,TTTCA,ATTTC TRGT +chr3 183712187 183712226 TTTTA,TTTCA STRchive +chr3 183712187 183712226 TTTTA,TTTCA TRGT TTTTATGTTC TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT TTGAGACAGA TTTTATGTTC TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT TTGAGACAGA @@ -101,14 +101,14 @@ CAGGCCTCCA GCT GCC GCC GCT GCC GCT GCC GCC GCC GCC GCT GCC GCG GCC GCC GCC GCT G CAGGCCTCCA GCT GCC GCC GCT GCC GCT GCC GCC GCC GCC GCT GCC GCG GCC GCC GCC GCT GCT GCT GCG CCGCCCTTGC FAME7_RAPGEF2 -chr4 159342526 159342618 TTTTA,ATTTC STRchive -chr4 159342526 159342618 TTTTA,TTTCA,ATTTC TRGT +chr4 159342526 159342618 TTTTA,TTTCA STRchive +chr4 159342526 159342618 TTTTA,TTTCA TRGT CTATCATAGC TTTTA TTTTA TTTTA TTTTA TTTTA TATTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TT ACTAGAGGAT CTATCATAGC TTTTA TTTTA TTTTA TTTTA TTTTA TATTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TT ACTAGAGGAT FAME3_MARCHF6 -chr5 10356343 10356411 TTTTA,ATTTC STRchive -chr5 10356343 10356411 TTTTA,TTTCA,ATTTC TRGT +chr5 10356343 10356411 TTTTA,TTTCA STRchive +chr5 10356343 10356411 TTTTA,TTTCA TRGT CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CCGAGATGGA CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CCGAGATGGA @@ -179,8 +179,8 @@ ACGACGCCGA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG AGCCACCGGC ACGACGCCGA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG AGCCACCGGC FAME1_SAMD12 -chr8 118366812 118366918 TAAAA,GAAAT STRchive -chr8 118366812 118366918 TAAAA,TGAAA,GAAAT TRGT +chr8 118366812 118366918 TAAAA,TGAAA STRchive +chr8 118366812 118366918 TAAAA,TGAAA TRGT ACTCTGTCTC AAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA ATGAACAAAA ACTCTGTCTC AAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA ATGAACAAAA @@ -305,8 +305,8 @@ TCCCGCTCGG G CCG CCG CCG CCG CC CCCCTCCCCA TCCCGCTCGG GCC GCC GCC GCC GCC CCCCTCCCCA FAME6_TNRC6A -chr16 24613438 24613532 TTTTA,ATTTC STRchive -chr16 24613438 24613532 TTTTA,TTTCA,ATTTC TRGT +chr16 24613438 24613532 TTTTA,TTTCA STRchive +chr16 24613438 24613532 TTTTA,TTTCA TRGT CTATTAAAGC A TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTT ACTTATTTAT CTATTAAAGC A TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTT ACTTATTTAT @@ -335,8 +335,8 @@ AAGCCAGGGA G CTG C CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG TAAG AAGCCAGGGA G CTG C CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG TAAGATGGTT FAME8_RAI1 -chr17 17808358 17808460 TTTTA,ATTTC STRchive -chr17 17808358 17808460 TTTTA,TTTCA,ATTTC,ATTTT TRGT +chr17 17808358 17808460 TTTTA,TTTCA STRchive +chr17 17808358 17808460 TTTTA,TTTCA,ATTTT TRGT TTATTTTTAA A TTTTA TTTTA TTTTA TA TTTTA TTTTA TTA TTTTA TTTTA TTTTA TTTTA TTTTA TTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTT CATCTCAGAC TTATTTTTAA ATTTT ATTTT ATTTT AT ATTTT ATTTT ATT ATTTT ATTTT ATTTT ATTTT ATTTT ATT ATTTT ATTTT ATTTT ATTTT ATTTT ATTTT ATTTT ATTTT ATTT CATCTCAGAC From a0dd36f1392f05c532699f47297b0aa89c2cfefd Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Thu, 28 May 2026 16:22:20 -0600 Subject: [PATCH 16/16] Preserving schema-optional logic --- scripts/check-loci.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/scripts/check-loci.py b/scripts/check-loci.py index 4246c354..a2945661 100644 --- a/scripts/check-loci.py +++ b/scripts/check-loci.py @@ -136,8 +136,7 @@ def get_canonical_motifs(schema): >>> get_canonical_motifs({"canonical_motifs": ["CAG", "CCG"]}) ['CAG', 'CCG'] """ - canonical_motifs = schema["canonical_motifs"] - return canonical_motifs + return schema.get("canonical_motifs", []) def standardise_motif(motif, canonical_motifs): """ @@ -496,10 +495,10 @@ def main(json_fname, json_schema = None, curations_json = None, out_json = None, if json_schema is not None: with open(json_schema, 'r') as schema_file: schema = json.load(schema_file) - if schema is None: - raise AssertionError("--schema is required because canonical_motifs are read from the schema") - canonical_motifs = get_canonical_motifs(schema) + canonical_motifs = [] + if schema is not None: + canonical_motifs = get_canonical_motifs(schema) # Fixes to individual records for record in data: