diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 30e17c29..22126087 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -29,8 +29,8 @@ "year": "2023 [@pmid:39068203]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -95,8 +95,8 @@ "year": "1993 [@pmid:8334699]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -161,8 +161,8 @@ "year": "2014 [@pmid:24763282]", "location_in_gene": "Intron 3", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -227,12 +227,12 @@ "year": "1991 [@pmid:2062380]; the first triplet disease to be discovered [@pmid:15313856]", "location_in_gene": "Coding Exon 1", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCA"], - "pathogenic_motif_reference_orientation": ["GCA"], + "reference_motif_reference_orientation": ["CAG"], + "pathogenic_motif_reference_orientation": ["CAG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -293,12 +293,12 @@ "year": "2002 [@pmid:11889467]", "location_in_gene": "Coding Exon 2, aa 110-115", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -359,12 +359,12 @@ "year": "2002 [@pmid:11889467]", "location_in_gene": "Coding Exon 2, aa 144-155", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -430,7 +430,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -495,11 +495,11 @@ "pathogenic_motif_reference_orientation": ["CTG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["ATG", "TTG"], - "pathogenic_motif_gene_orientation": ["AGC"], + "interruption_reference_orientation": ["GAT", "TTG"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["ATC", "AAC"], + "interruption_gene_orientation": ["ATC", "CAA"], "locus_structure": [], "benign_min": 6, "benign_max": 35, @@ -561,7 +561,7 @@ "pathogenic_motif_reference_orientation": ["ATTCT"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["ATCCT", "ATCCC", "ATTCC", "ATTTCT", "ATATTCT", "ATTCTTCT", "ATTGT", "TTTCT", "ATTTTCT", "ATTCTCT", "GTTTCT", "CTTCT", "ATGCT"], + "interruption_reference_orientation": ["ATCCT", "ATCCC", "ATTCC", "ATTTCT", "ATATTCT", "ATTCTTCT", "ATTGT", "CTTTT", "ATTTTCT", "ATTCTCT", "CTGTTT", "CTCTT", "ATGCT"], "pathogenic_motif_gene_orientation": ["ATTCT"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], @@ -628,10 +628,10 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["TTG"], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AAC"], + "interruption_gene_orientation": ["CAA"], "locus_structure": [], "benign_min": 14, "benign_max": 28, @@ -694,10 +694,10 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["TTG", "AGG"], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AAC", "CCT"], + "interruption_gene_orientation": ["CAA", "CCT"], "locus_structure": [], "benign_min": 11, "benign_max": 44, @@ -760,7 +760,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -835,7 +835,7 @@ "pathogenic_motif_reference_orientation": ["CTG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["CCG", "CTA", "CTC", "CCA", "CTT"], + "interruption_reference_orientation": ["CCG", "ACT", "CCT", "ACC", "CTT"], "pathogenic_motif_gene_orientation": ["CTG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], @@ -908,9 +908,9 @@ "location_in_gene": "Intron 4/4", "gene_strand": "+", "reference_motif_reference_orientation": ["AATAA"], - "pathogenic_motif_reference_orientation": ["TGGAA", "TAGAA"], + "pathogenic_motif_reference_orientation": ["AATGG", "AATAG"], "benign_motif_reference_orientation": [], - "unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AAATG", "AGAAA", "ATAAG", "TAAAC", "TAACA", "TACAA", "TCAAA", "TGCAA"], + "unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AAATG", "AAAAG", "AAGAT", "AAACT", "AACAT", "AATAC", "AAATC", "AATGC"], "interruption_reference_orientation": [], "pathogenic_motif_gene_orientation": ["AATGG", "AATAG"], "benign_motif_gene_orientation": [], @@ -974,7 +974,7 @@ "location_in_gene": "Intron 1 or 5' UTR depending on transcript", "gene_strand": "-", "reference_motif_reference_orientation": ["GGCCCC"], - "pathogenic_motif_reference_orientation": ["GGCCCC"], + "pathogenic_motif_reference_orientation": ["CCCCGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -1044,7 +1044,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -1172,7 +1172,7 @@ "location_in_gene": "Exon 11", "gene_strand": "+", "reference_motif_reference_orientation": ["GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG"], - "pathogenic_motif_reference_orientation": ["GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG"], + "pathogenic_motif_reference_orientation": ["ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -1245,7 +1245,7 @@ "reference_motif_reference_orientation": ["CAGG"], "pathogenic_motif_reference_orientation": ["CAGG"], "benign_motif_reference_orientation": [], - "unknown_motif_reference_orientation": ["CAGA"], + "unknown_motif_reference_orientation": ["ACAG"], "interruption_reference_orientation": [], "pathogenic_motif_gene_orientation": ["CCTG"], "benign_motif_gene_orientation": [], @@ -1324,7 +1324,7 @@ "location_in_gene": "Coding Exon 13", "gene_strand": "-", "reference_motif_reference_orientation": ["GTC"], - "pathogenic_motif_reference_orientation": ["GTC"], + "pathogenic_motif_reference_orientation": ["CGT"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -1527,11 +1527,11 @@ "location_in_gene": "Intron 1 (most isoforms)", "gene_strand": "-", "reference_motif_reference_orientation": ["AAAAT"], - "pathogenic_motif_reference_orientation": ["GAAAT"], + "pathogenic_motif_reference_orientation": ["TGAAA"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["AAAAA"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": ["TTTTT"], "interruption_gene_orientation": [], @@ -1602,8 +1602,8 @@ "year": "2007 [@pmid:17236128]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["GGC"], - "pathogenic_motif_reference_orientation": ["GGC"], + "reference_motif_reference_orientation": ["CGG"], + "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -1669,7 +1669,7 @@ "location_in_gene": "Intron 62", "gene_strand": "-", "reference_motif_reference_orientation": ["TTC"], - "pathogenic_motif_reference_orientation": ["TTC"], + "pathogenic_motif_reference_orientation": ["CTT"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -1811,7 +1811,7 @@ "location_in_gene": "5' UTR", "gene_strand": "-", "reference_motif_reference_orientation": ["CCTCGCTGTGCCGCTGCCGA"], - "pathogenic_motif_reference_orientation": ["CCTCGCTGTGCCGCTGCCGA"], + "pathogenic_motif_reference_orientation": ["GCCGCTGCCGACCTCGCTGT"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -1876,8 +1876,8 @@ "year": "2026 [@pmid:39868092]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -1943,10 +1943,10 @@ "location_in_gene": "Intron 1", "gene_strand": "-", "reference_motif_reference_orientation": ["GAA"], - "pathogenic_motif_reference_orientation": ["GAA"], - "benign_motif_reference_orientation": ["GGA", "GCA"], + "pathogenic_motif_reference_orientation": ["AAG"], + "benign_motif_reference_orientation": ["AGG", "CAG"], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["GAG", "GAAGGA", "GAAGAAAGAA", "GAAAAGAAGAAGGAAGAAGGAA", "GAAAAGAAGAAGGAA", "GCAGAAGAAGAAGAA"], + "interruption_reference_orientation": ["AGG", "AGAAGG", "AAAGAAGAAG", "AAGAAAAGAAGAAGGAAGAAGG", "AAGAAAAGAAGAAGG", "AAGAAGAAGAAGCAG"], "pathogenic_motif_gene_orientation": ["CTT"], "benign_motif_gene_orientation": ["CCT", "CTG"], "unknown_motif_gene_orientation": [], @@ -2074,12 +2074,12 @@ "year": "2003 [@pmid:12529855]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2141,7 +2141,7 @@ "location_in_gene": "Intron 1", "gene_strand": "+", "reference_motif_reference_orientation": ["GAA"], - "pathogenic_motif_reference_orientation": ["GAA"], + "pathogenic_motif_reference_orientation": ["AAG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -2282,12 +2282,12 @@ "year": "2019 [@pmid:30970188]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCA"], - "pathogenic_motif_reference_orientation": ["GCA"], + "reference_motif_reference_orientation": ["CAG"], + "pathogenic_motif_reference_orientation": ["CAG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2419,12 +2419,12 @@ "year": "2004 [@pmid:15385446]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2485,12 +2485,12 @@ "year": "2003 [@pmid:12676922]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2551,12 +2551,12 @@ "year": "2000 [@pmid:10839976]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2622,7 +2622,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -2688,10 +2688,10 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["CAA"], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AAC"], + "interruption_gene_orientation": ["CAA"], "locus_structure": [ { "motif": "CAG", @@ -2830,8 +2830,8 @@ "year": "2019 [@pmid:31332380]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["CGC"], - "pathogenic_motif_reference_orientation": ["CGC"], + "reference_motif_reference_orientation": ["CCG"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -2899,9 +2899,9 @@ "reference_motif_reference_orientation": ["TTTTA"], "pathogenic_motif_reference_orientation": ["TTTCA"], "benign_motif_reference_orientation": [], - "unknown_motif_reference_orientation": ["ATGTT", "TAGTT", "TTTTG", "TTTTT"], + "unknown_motif_reference_orientation": ["ATGTT", "AGTTT", "GTTTT", "TTTTT"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": ["ATGTT", "AGTTT", "GTTTT", "TTTTT"], "interruption_gene_orientation": [], @@ -2973,7 +2973,7 @@ "location_in_gene": "Non-coding", "gene_strand": "-", "reference_motif_reference_orientation": ["TTTG"], - "pathogenic_motif_reference_orientation": ["TTTG"], + "pathogenic_motif_reference_orientation": ["GTTT"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -3039,8 +3039,8 @@ "location_in_gene": "Coding Exon 2", "gene_strand": "+", "reference_motif_reference_orientation": ["GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG"], - "pathogenic_motif_reference_orientation": ["GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA"], - "benign_motif_reference_orientation": ["GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA"], + "pathogenic_motif_reference_orientation": ["ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG"], + "benign_motif_reference_orientation": ["ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG"], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], "pathogenic_motif_gene_orientation": ["ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG"], @@ -3105,7 +3105,7 @@ "location_in_gene": "5' UTR", "gene_strand": "+", "reference_motif_reference_orientation": ["GGGCC"], - "pathogenic_motif_reference_orientation": ["GGGCC"], + "pathogenic_motif_reference_orientation": ["CCGGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -3170,15 +3170,15 @@ "year": "2019 [@pmid:30342764]", "location_in_gene": "Coding Exon 1/Intron 1 depending on transcript", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCG"], - "pathogenic_motif_reference_orientation": ["GCG"], + "reference_motif_reference_orientation": ["CGG"], + "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["GCA", "GCT", "GCC"], + "interruption_reference_orientation": ["CAG", "CTG", "CCG"], "pathogenic_motif_gene_orientation": ["CGG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AGC", "CTG", "CCG"], + "interruption_gene_orientation": ["CAG", "CTG", "CCG"], "locus_structure": [], "benign_min": 6, "benign_max": 10, @@ -3237,10 +3237,10 @@ "location_in_gene": "Intron 1", "gene_strand": "+", "reference_motif_reference_orientation": ["GGCCTG"], - "pathogenic_motif_reference_orientation": ["GGCCTG"], + "pathogenic_motif_reference_orientation": ["CCTGGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["GGCTG", "GGCCCTG", "GGCCG", "GGCCTT"], + "interruption_reference_orientation": ["CTGGG", "CCCTGGG", "CCGGG", "CCTTGG"], "pathogenic_motif_gene_orientation": ["CCTGGG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], @@ -3312,15 +3312,15 @@ "year": "2019 [@pmid:31332380]", "location_in_gene": "5' UTR", "gene_strand": "+", - "reference_motif_reference_orientation": ["GGC"], - "pathogenic_motif_reference_orientation": ["GGC"], + "reference_motif_reference_orientation": ["CGG"], + "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["GGA", "AGC"], + "interruption_reference_orientation": ["AGG", "CAG"], "pathogenic_motif_gene_orientation": ["CGG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AGG", "AGC"], + "interruption_gene_orientation": ["AGG", "CAG"], "locus_structure": [], "benign_min": 7, "benign_max": 37, @@ -3378,8 +3378,8 @@ "year": "2019 [@pmid:31332380]", "location_in_gene": "Exon 1 of lncRNA (noncoding)", "gene_strand": "+", - "reference_motif_reference_orientation": ["GGC"], - "pathogenic_motif_reference_orientation": ["GGC"], + "reference_motif_reference_orientation": ["CGG"], + "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -3449,7 +3449,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3511,11 +3511,11 @@ "location_in_gene": "Coding Exon 3", "gene_strand": "-", "reference_motif_reference_orientation": ["GCN"], - "pathogenic_motif_reference_orientation": ["GCN"], + "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3577,7 +3577,7 @@ "location_in_gene": "Coding Exon 3", "gene_strand": "-", "reference_motif_reference_orientation": ["TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC"], - "pathogenic_motif_reference_orientation": ["TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC"], + "pathogenic_motif_reference_orientation": ["GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -3642,12 +3642,12 @@ "year": null, "location_in_gene": "Coding Exon 2", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCT"], - "pathogenic_motif_reference_orientation": ["GCT"], + "reference_motif_reference_orientation": ["CTG"], + "pathogenic_motif_reference_orientation": ["CTG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3723,12 +3723,12 @@ "year": "1999 [@pmid:10581021]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCT"], - "pathogenic_motif_reference_orientation": ["GCT"], + "reference_motif_reference_orientation": ["CTG"], + "pathogenic_motif_reference_orientation": ["CTG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -3789,8 +3789,8 @@ "year": "2015 [@pmid:26005867]", "location_in_gene": "Coding Exon 5", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -3856,7 +3856,7 @@ "location_in_gene": "Coding Exon 2", "gene_strand": "+", "reference_motif_reference_orientation": ["GGTGGTGGCTGGGGGCAGCCTCAT"], - "pathogenic_motif_reference_orientation": ["CCTCATGGTGGTGGCTGGGGGCAG"], + "pathogenic_motif_reference_orientation": ["AGCCTCATGGTGGTGGCTGGGGGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -3933,10 +3933,10 @@ "gene_strand": "+", "reference_motif_reference_orientation": ["TTTTA"], "pathogenic_motif_reference_orientation": ["TTTCA"], - "benign_motif_reference_orientation": ["TTTTA"], + "benign_motif_reference_orientation": ["ATTTT"], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["GGGGT", "GGGAT"], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "interruption_reference_orientation": ["GGGGT", "ATGGG"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": ["ATTTT"], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": ["GGGGT", "ATGGG"], @@ -4010,9 +4010,9 @@ "reference_motif_reference_orientation": ["TTTTA"], "pathogenic_motif_reference_orientation": ["TTTCA"], "benign_motif_reference_orientation": [], - "unknown_motif_reference_orientation": ["TTTTT", "TTATG"], + "unknown_motif_reference_orientation": ["TTTTT", "ATGTT"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": ["TTTTT", "ATGTT"], "interruption_gene_orientation": [], @@ -4084,9 +4084,9 @@ "location_in_gene": "Intron 2", "gene_strand": "-", "reference_motif_reference_orientation": ["AAAAG"], - "pathogenic_motif_reference_orientation": ["AAGGG", "ACAGG", "AAAGG", "AGGGC"], + "pathogenic_motif_reference_orientation": ["AAGGG", "ACAGG", "AAAGG", "CAGGG"], "benign_motif_reference_orientation": ["AAAAG", "AAAGGG"], - "unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AACGG", "AAGAC", "AAGGT", "AGGGG", "AAGAG", "AAAAGG", "AAACG", "AACAG", "AGGTG", "ACGGG", "AAAAAG", "AAGGC"], + "unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AACGG", "ACAAG", "AAGGT", "AGGGG", "AAGAG", "AAAAGG", "AAACG", "AACAG", "GAGGT", "ACGGG", "AAAAAG", "CAAGG"], "interruption_reference_orientation": [], "pathogenic_motif_gene_orientation": ["CCCTT", "CCTGT", "CCTTT", "CCCTG"], "benign_motif_gene_orientation": ["CTTTT", "CCCTTT"], @@ -4159,11 +4159,11 @@ "year": "2022 [@pmid:35148830]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["GGC"], - "pathogenic_motif_reference_orientation": ["GGC"], + "reference_motif_reference_orientation": ["CGG"], + "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], - "interruption_reference_orientation": ["TGG", "CGT", "AGG"], + "interruption_reference_orientation": ["GGT", "CGT", "AGG"], "pathogenic_motif_gene_orientation": ["CCG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], @@ -4230,7 +4230,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -4294,9 +4294,9 @@ "reference_motif_reference_orientation": ["TAAAA"], "pathogenic_motif_reference_orientation": ["TGAAA"], "benign_motif_reference_orientation": [], - "unknown_motif_reference_orientation": ["AAAAA", "TAAAC", "TAACA", "TACAA", "TACAC"], + "unknown_motif_reference_orientation": ["AAAAA", "AAACT", "AACAT", "ACAAT", "ACACT"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": ["TTTTT", "AGTTT", "ATGTT", "ATTGT", "AGTGT"], "interruption_gene_orientation": [], @@ -4367,12 +4367,12 @@ "year": "2002 [@pmid:12428212]", "location_in_gene": "Coding Exon 1", "gene_strand": "-", - "reference_motif_reference_orientation": ["NGC"], + "reference_motif_reference_orientation": ["GCN"], "pathogenic_motif_reference_orientation": ["NGC"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -4434,11 +4434,11 @@ "location_in_gene": "Intron 1", "gene_strand": "-", "reference_motif_reference_orientation": ["AAAAT"], - "pathogenic_motif_reference_orientation": ["AAATG"], + "pathogenic_motif_reference_orientation": ["TGAAA"], "benign_motif_reference_orientation": [], - "unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AAACC", "AAACG", "AAACT", "AACTC", "AACTG", "AATAC", "AATAG", "ATAAC"], + "unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AAACC", "AAACG", "AAACT", "CAACT", "GAACT", "ACAAT", "AGAAT", "AACAT"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": ["TTTTT", "GTTTT", "GGTTT", "CGTTT", "AGTTT", "AGTTG", "AGTTC", "ATTGT", "ATTCT", "ATGTT"], "interruption_gene_orientation": [], @@ -4575,8 +4575,8 @@ "year": "2026 [@pmid:41959811]", "location_in_gene": "5' UTR", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -4646,10 +4646,10 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["CAA"], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AAC"], + "interruption_gene_orientation": ["CAA"], "locus_structure": [], "benign_min": 25, "benign_max": 40, @@ -4712,7 +4712,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -4844,10 +4844,10 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": ["CAA"], - "pathogenic_motif_gene_orientation": ["AGC"], + "pathogenic_motif_gene_orientation": ["CAG"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], - "interruption_gene_orientation": ["AAC"], + "interruption_gene_orientation": ["CAA"], "locus_structure": [], "benign_min": 20, "benign_max": 38, @@ -4910,7 +4910,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": ["TTTTT"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": ["TTTTT"], "interruption_gene_orientation": [], @@ -5048,7 +5048,7 @@ "location_in_gene": "Coding Exon 1", "gene_strand": "+", "reference_motif_reference_orientation": ["GGCGCGGAGC"], - "pathogenic_motif_reference_orientation": ["GGCGCGGAGC"], + "pathogenic_motif_reference_orientation": ["AGCGGCGCGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -5113,8 +5113,8 @@ "year": "2019 [@pmid:30554721]", "location_in_gene": "5' promoter region. Note, it can also be annotated coding or introntic depending on the reference, due to missing sequences in some reference genomes.", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -5197,9 +5197,9 @@ "reference_motif_reference_orientation": ["TTTTA"], "pathogenic_motif_reference_orientation": ["TTTCA"], "benign_motif_reference_orientation": [], - "unknown_motif_reference_orientation": ["TTTTT", "TGTTA"], + "unknown_motif_reference_orientation": ["TTTTT", "ATGTT"], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["ATTTC"], + "pathogenic_motif_gene_orientation": ["TTTCA"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": ["TTTTT", "ATGTT"], "interruption_gene_orientation": [], @@ -5270,8 +5270,8 @@ "year": "2023 [@pmid:38035881]", "location_in_gene": "Coding, Last Exon (exon number is transcript dependent)", "gene_strand": "-", - "reference_motif_reference_orientation": ["GCC"], - "pathogenic_motif_reference_orientation": ["GCC"], + "reference_motif_reference_orientation": ["CCG"], + "pathogenic_motif_reference_orientation": ["CCG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], @@ -5341,7 +5341,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -5407,7 +5407,7 @@ "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], - "pathogenic_motif_gene_orientation": ["CNG"], + "pathogenic_motif_gene_orientation": ["GCN"], "benign_motif_gene_orientation": [], "unknown_motif_gene_orientation": [], "interruption_gene_orientation": [], @@ -5468,8 +5468,8 @@ "year": "2014 [@pmid:25196122]", "location_in_gene": "Intron 1", "gene_strand": "+", - "reference_motif_reference_orientation": ["GCG"], - "pathogenic_motif_reference_orientation": ["GCG"], + "reference_motif_reference_orientation": ["CGG"], + "pathogenic_motif_reference_orientation": ["CGG"], "benign_motif_reference_orientation": [], "unknown_motif_reference_orientation": [], "interruption_reference_orientation": [], diff --git a/data/STRchive-loci.schema.json b/data/STRchive-loci.schema.json index a24788ba..c5b52b3a 100644 --- a/data/STRchive-loci.schema.json +++ b/data/STRchive-loci.schema.json @@ -4,6 +4,16 @@ "title": "STRchive-loci", "description": "STRchive tandem repeat disease locus", "citation_format": "In free text strings: 'Some text [@doi:12345; @pmid:12345]'. In regular lists: ['doi:12345', 'pmid:12345']", + "canonical_motifs": [ + "CAG", + "CCG", + "CGG", + "CTG", + "GCN", + "CAA", + "TTTCA", + "AAATG" +], "type": "object", "properties": { "id": { @@ -270,6 +280,7 @@ "type": ["string", "null"], "enum": ["+", "-"] }, + "reference_motif_reference_orientation": { "section": "Alleles", "title": "Reference Motif (Reference Orientation)", @@ -286,9 +297,11 @@ "pathogenic_motif_reference_orientation": { "section": "Alleles", "title": "Pathogenic Motif (Reference Orientation)", - "description": "Pathogenic motif(s) in the + reference orientation. May be the same as the reference motif if it is pathogenic when expanded or contracted.", + "description": "Pathogenic motif(s) in the + reference orientation. May be the same as the reference motif if it is pathogenic when expanded or contracted. Automatically generated from pathogenic_motif_gene_orientation and gene_strand. Should not be manually edited.", "examples": ["AAGGG", "ACAGG"], "type": "array", + "auto_generated": true, + "hide": "true", "uniqueItems": true, "items": { "title": "", @@ -299,9 +312,11 @@ "benign_motif_reference_orientation": { "section": "Alleles", "title": "Benign Motif (Reference Orientation)", - "description": "Benign motif(s) in the + reference orientation. Benign motifs are not known to be pathogenic at any size. May be the same as the reference motif if it is benign.", + "description": "Benign motif(s) in the + reference orientation. Benign motifs are not known to be pathogenic at any size. May be the same as the reference motif if it is benign. Automatically generated from benign_motif_gene_orientation and gene_strand. Should not be manually edited.", "examples": ["AAAAG"], "type": "array", + "auto_generated": true, + "hide": "true", "uniqueItems": true, "items": { "title": "", @@ -312,9 +327,11 @@ "unknown_motif_reference_orientation": { "section": "Alleles", "title": "Unknown Motif (Reference Orientation)", - "description": "Motif(s) of unknown consequence in the + reference orientation. Only include motifs that have been observed in individuals but are not classified elsewhere.", + "description": "Motif(s) of unknown consequence in the + reference orientation. Automatically generated from unknown_motif_gene_orientation and gene_strand. Should not be manually edited.", "examples": ["AAAAG"], "type": "array", + "auto_generated": true, + "hide": "true", "uniqueItems": true, "items": { "title": "", @@ -325,9 +342,11 @@ "interruption_reference_orientation": { "section": "Alleles", "title": "Interruption (Reference Orientation)", - "description": "Interruption(s) in the + reference orientation. Only include interruptions that have been observed in individuals. These should typically be reported in the context of the motif, for example a CAG -> CAA interruption would be reported as CAA, not A", + "description": "Interruption(s) in the + reference orientation. Only include interruptions that have been observed in individuals. Automatically generated from interruption_gene_orientation and gene_strand. Should not be manually edited.", "examples": ["CAA"], "type": "array", + "auto_generated": true, + "hide": "true", "uniqueItems": true, "items": { "title": "", @@ -338,11 +357,10 @@ "pathogenic_motif_gene_orientation": { "section": "Alleles", "title": "Pathogenic Motif (Gene Orientation)", - "description": "Pathogenic motif(s) in the gene orientation, so the reverse complement if gene is on - strand. Automatically generated from pathogenic_motif_reference_orientation and gene_strand. Should not be manually edited.", + "description": "Pathogenic motif(s) in the gene orientation, so the reverse complement if gene is on - strand.", "examples": ["CCCTT", "CCTGT"], "type": "array", - "auto_generated": true, - "hide": "true", + "uniqueItems": true, "items": { "title": "", @@ -353,11 +371,9 @@ "benign_motif_gene_orientation": { "section": "Alleles", "title": "Benign Motif (Gene Orientation)", - "description": "Benign motif(s) in the gene orientation, so the reverse complement if gene is on - strand. Automatically generated from benign_motif_reference_orientation and gene_strand. Should not be manually edited.", + "description": "Benign motif(s) in the gene orientation, so the reverse complement if gene is on - strand.", "examples": ["CCCTT", "CCTGT"], "type": "array", - "auto_generated": true, - "hide": "true", "uniqueItems": true, "items": { "title": "", @@ -368,11 +384,9 @@ "unknown_motif_gene_orientation": { "section": "Alleles", "title": "Unknown Motif (Gene Orientation)", - "description": "Motif(s) of unknown consequence in the gene orientation, so the reverse complement if gene is on - strand. Automatically generated from unknown_motif_reference_orientation and gene_strand. Should not be manually edited.", + "description": "Motif(s) of unknown consequence in the gene orientation, so the reverse complement if gene is on - strand. Only include motifs that have been observed in individuals but are not classified elsewhere.", "examples": ["CCCTT", "CCTGT"], "type": "array", - "auto_generated": true, - "hide": "true", "uniqueItems": true, "items": { "title": "", @@ -383,11 +397,9 @@ "interruption_gene_orientation": { "section": "Alleles", "title": "Interruption (Gene Orientation)", - "description": "Interruption(s) in the gene orientation, so the reverse complement if gene is on - strand. Automatically generated from interruption_reference_orientation and gene_strand. Should not be manually edited.", + "description": "Interruption(s) in the gene orientation, so the reverse complement if gene is on - strand. These should typically be reported in the context of the motif, for example a CAG -> CAA interruption would be reported as CAA, not A.", "examples": ["TTG"], "type": "array", - "auto_generated": true, - "hide": "true", "uniqueItems": true, "items": { "title": "", diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed b/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed index 41f2fcb5..e87fc27b 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed @@ -1,80 +1,80 @@ -chr1 870158 870178 ID=HMNR7_VWA1;MOTIFS=GGCGCGGAGC;STRUC= -chr1 57245935 57245973 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT;STRUC= -chr1 94266544 94266567 ID=OPDM5_ABCD3;MOTIFS=GCC;STRUC= -chr1 148519695 148519738 ID=NIID_NOTCH2NLC;MOTIFS=GGC;STRUC= -chr1 154328121 154330802 ID=ADTKD_MUC1;MOTIFS=GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA;STRUC= -chr1 155728131 155728159 ID=NME_NAXE;MOTIFS=GGGCC;STRUC= -chr2 96703674 96703732 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT;STRUC= -chr2 100563685 100563738 ID=FRA2A_AFF3;MOTIFS=GCC;STRUC= +chr1 870158 870178 ID=HMNR7_VWA1;MOTIFS=AGCGGCGCGG,GGCGCGGAGC;STRUC= +chr1 57245935 57245973 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT,TGAAA;STRUC= +chr1 94266544 94266567 ID=OPDM5_ABCD3;MOTIFS=CCG;STRUC= +chr1 148519695 148519738 ID=NIID_NOTCH2NLC;MOTIFS=CGG;STRUC= +chr1 154328121 154330802 ID=ADTKD_MUC1;MOTIFS=ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG;STRUC= +chr1 155728131 155728159 ID=NME_NAXE;MOTIFS=CCGGG,GGGCC;STRUC= +chr2 96703674 96703732 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT,TGAAA;STRUC= +chr2 100563685 100563738 ID=FRA2A_AFF3;MOTIFS=CCG;STRUC= chr2 176581179 176581224 ID=SD5_HOXD13;MOTIFS=GCN;STRUC= -chr2 191369982 191370024 ID=GDPAG_GLS;MOTIFS=GCA;STRUC= +chr2 191369982 191370024 ID=GDPAG_GLS;MOTIFS=CAG;STRUC= chr3 63956302 63956345 ID=SCA7_ATXN7;MOTIFS=CAG,CCG;STRUC= chr3 131917482 131917635 ID=DM2_CNBP;MOTIFS=CAGG,CAGA,CA;STRUC= -chr3 141687011 141687054 ID=BPES_FOXL2;MOTIFS=NGC;STRUC= +chr3 141687011 141687054 ID=BPES_FOXL2;MOTIFS=NGC,GCN;STRUC= chr3 186521667 186521706 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA;STRUC= chr4 3073603 3073723 ID=HD_HTT;MOTIFS=CAG,CCG;STRUC= -chr4 39318077 39318136 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,AGGGC,AAAGGG;STRUC= -chr4 41719745 41719805 ID=CCHS_PHOX2B;MOTIFS=GCN;STRUC= +chr4 39318077 39318136 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,CAGGG,AAAGGG;STRUC= +chr4 41719745 41719805 ID=CCHS_PHOX2B;MOTIFS=NGC,GCN;STRUC= chr4 162693303 162693405 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA;STRUC= chr5 10295525 10295593 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA;STRUC= -chr5 147414733 147414780 ID=SCA12_PPP2R2B;MOTIFS=GCT;STRUC= -chr5 178096748 178096792 ID=OPDM_FAM193B;MOTIFS=GCC;STRUC= -chr6 13201716 13201843 ID=OPDM_TBC1D7;MOTIFS=GCC;STRUC= +chr5 147414733 147414780 ID=SCA12_PPP2R2B;MOTIFS=CTG;STRUC= +chr5 178096748 178096792 ID=OPDM_FAM193B;MOTIFS=CCG;STRUC= +chr6 13201716 13201843 ID=OPDM_TBC1D7;MOTIFS=CCG;STRUC= chr6 16200188 16200282 ID=SCA1_ATXN1;MOTIFS=CTG;STRUC= chr6 45257567 45257618 ID=CCD_RUNX2;MOTIFS=GCN;STRUC= chr6 171935458 171935569 ID=SCA17_TBP;MOTIFS=CAG;STRUC= -chr7 27335684 27335720 ID=HFG_HOXA13-III;MOTIFS=NGC;STRUC= -chr7 27335813 27335849 ID=HFG_HOXA13-II;MOTIFS=NGC;STRUC= -chr7 27335912 27335954 ID=HFG_HOXA13-I;MOTIFS=NGC;STRUC= -chr7 56047900 56047939 ID=FRA7A_ZNF713;MOTIFS=GCG;STRUC= -chr8 105716409 105716441 ID=OPDM1_LRP12;MOTIFS=CGC;STRUC= +chr7 27335684 27335720 ID=HFG_HOXA13-III;MOTIFS=NGC,GCN;STRUC= +chr7 27335813 27335849 ID=HFG_HOXA13-II;MOTIFS=NGC,GCN;STRUC= +chr7 27335912 27335954 ID=HFG_HOXA13-I;MOTIFS=NGC,GCN;STRUC= +chr7 56047900 56047939 ID=FRA7A_ZNF713;MOTIFS=CGG;STRUC= +chr8 105716409 105716441 ID=OPDM1_LRP12;MOTIFS=CCG;STRUC= chr8 119495247 119495353 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA;STRUC= -chr9 27584063 27584155 ID=FTDALS1_C9orf72;MOTIFS=GGCCCC;STRUC= -chr9 81210818 81210861 ID=FRDA_FXN;MOTIFS=A,GAA;STRUC= -chr9 142886568 142886595 ID=HSAN-VIII_PRDM12;MOTIFS=GCC;STRUC= -chr9 145285333 145285861 ID=MODY8_CEL;MOTIFS=GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG;STRUC= -chr10 80695718 80695748 ID=OPML1_NUTM2B-AS1;MOTIFS=GGC;STRUC= +chr9 27584063 27584155 ID=FTDALS1_C9orf72;MOTIFS=CCCCGG,GGCCCC;STRUC= +chr9 81210818 81210861 ID=FRDA_FXN;MOTIFS=A,GAA,AAG;STRUC= +chr9 142886568 142886595 ID=HSAN-VIII_PRDM12;MOTIFS=CCG;STRUC= +chr9 145285333 145285861 ID=MODY8_CEL;MOTIFS=GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG;STRUC= +chr10 80695718 80695748 ID=OPML1_NUTM2B-AS1;MOTIFS=CGG;STRUC= chr11 119226662 119226696 ID=JBS_CBL;MOTIFS=CGG;STRUC= chr12 6947903 6947941 ID=DRPLA_ATN1;MOTIFS=CAG;STRUC= -chr12 50468095 50468118 ID=FRA12A_DIP2B;MOTIFS=GGC;STRUC= +chr12 50468095 50468118 ID=FRA12A_DIP2B;MOTIFS=CGG;STRUC= chr12 111575873 111575940 ID=SCA2_ATXN2;MOTIFS=CTG;STRUC= -chr12 123532573 123532603 ID=OPDM4_RILPL1;MOTIFS=GGC;STRUC= +chr12 123532573 123532603 ID=OPDM4_RILPL1;MOTIFS=CGG;STRUC= chr13 69361213 69361270 ID=SCA8_ATXN8OS;MOTIFS=CTA,CTG;STRUC= chr13 99196358 99196404 ID=HPE5_ZIC2;MOTIFS=GCN;STRUC= -chr13 101377549 101377792 ID=SCA27B_FGF14;MOTIFS=GAA,GGA,GCA;STRUC= +chr13 101377549 101377792 ID=SCA27B_FGF14;MOTIFS=AAG,GAA,AGG,CAG;STRUC= chr14 17522488 17522519 ID=OPMD_PABPN1;MOTIFS=GCN;STRUC= chr14 86300519 86300603 ID=SCA3_ATXN3;MOTIFS=CTG;STRUC= -chr15 20458510 20458536 ID=ALS1_NIPA1;MOTIFS=GCG;STRUC= +chr15 20458510 20458536 ID=ALS1_NIPA1;MOTIFS=CGG;STRUC= chr15 32225152 32225178 ID=aFTLD-U_GOLGA8A;MOTIFS=CT,TTTC,CCTT,CCCTCT;STRUC= -chr15 86324038 86324057 ID=CHNG3_MIR7-2;MOTIFS=TTTG;STRUC= -chr15 87088402 87088452 ID=CPEO_POLG;MOTIFS=GCT,GTT;STRUC= -chr16 17477909 17478002 ID=DBQD2_XYLT1;MOTIFS=GCC;STRUC= +chr15 86324038 86324057 ID=CHNG3_MIR7-2;MOTIFS=GTTT,TTTG;STRUC= +chr15 87088402 87088452 ID=CPEO_POLG;MOTIFS=GCT,GTT,CTG;STRUC= +chr16 17477909 17478002 ID=DBQD2_XYLT1;MOTIFS=GCC,CCG;STRUC= chr16 24890366 24890430 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA;STRUC= -chr16 72284666 72284761 ID=SCA31_BEAN1;MOTIFS=TGGAA,TAGAA,AATAA;STRUC= +chr16 72284666 72284761 ID=SCA31_BEAN1;MOTIFS=AATGG,AATAG,AATAA;STRUC= chr16 73638636 73638724 ID=SCA_THAP11;MOTIFS=CAG;STRUC= -chr16 78605502 78605569 ID=SCA4_ZFHX3;MOTIFS=GCC;STRUC= +chr16 78605502 78605569 ID=SCA4_ZFHX3;MOTIFS=CCG;STRUC= chr16 93675723 93675776 ID=HDL2_JPH3;MOTIFS=CTG;STRUC= -chr17 17754961 17755053 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA;STRUC= -chr17 81047404 81047534 ID=RCPS_EIF4A3;MOTIFS=CCTCGCTGTGCCGCTGCCGA;STRUC= +chr17 17754961 17755053 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA,ATTTT;STRUC= +chr17 81047404 81047534 ID=RCPS_EIF4A3;MOTIFS=GCCGCTGCCGACCTCGCTGT,CCTCGCTGTGCCGCTGCCGA;STRUC= chr18 821235 821905 ID=CPUM_TYMS;MOTIFS=GATGGT;STRUC= chr18 55789233 55789288 ID=FECD3_TCF4;MOTIFS=CAG;STRUC= -chr19 4494212 4497342 ID=MRUPAV_PLIN4;MOTIFS=TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC;STRUC= +chr19 4494212 4497342 ID=MRUPAV_PLIN4;MOTIFS=GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT,TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC;STRUC= chr19 13333136 13333176 ID=SCA6_CACNA1A;MOTIFS=CTG;STRUC= chr19 14622655 14622692 ID=OPDM2_GIPC1;MOTIFS=CCG;STRUC= -chr19 18921630 18921645 ID=EDM1-PSACH_COMP;MOTIFS=GTC;STRUC= +chr19 18921630 18921645 ID=EDM1-PSACH_COMP;MOTIFS=CGT,GTC;STRUC= chr19 48597739 48597756 ID=DM1_DMPK;MOTIFS=CAG;STRUC= -chr20 2683189 2683248 ID=SCA36_NOP56;MOTIFS=GGCCTG,CGCCTG;STRUC= -chr20 4738606 4738705 ID=CJD_PRNP;MOTIFS=CCTCAGGGCGGTGGTGGCTGGGGGCAG,CCTCATGGTGGTGGCTGGGGGCAG,GGTGGTGGCTGGGGGCAGCCTCAT;STRUC= +chr20 2683189 2683248 ID=SCA36_NOP56;MOTIFS=GGCCTG,CGCCTG,CCTGGG;STRUC= +chr20 4738606 4738705 ID=CJD_PRNP;MOTIFS=CCTCAGGGCGGTGGTGGCTGGGGGCAG,CCTCATGGTGGTGGCTGGGGGCAG,AGCCTCATGGTGGTGGCTGGGGGC,GGTGGTGGCTGGGGGCAGCCTCAT;STRUC= chr21 42132054 42132091 ID=EPM1_CSTB;MOTIFS=CGCGGGGCGGGG;STRUC= chr22 20143615 20143660 ID=TOF_TBX1;MOTIFS=GCN;STRUC= chr22 38781587 38781680 ID=EPM_CSNK1E;MOTIFS=CCG;STRUC= chr22 46280059 46280134 ID=SCA10_ATXN10;MOTIFS=ATTCT;STRUC= -chrX 24597766 24597802 ID=PRTS_ARX;MOTIFS=NGC;STRUC= -chrX 24597886 24597934 ID=EIEE1_ARX;MOTIFS=NGC;STRUC= -chrX 30882677 30882751 ID=DMD_DMD;MOTIFS=TTC,T;STRUC= -chrX 65975147 65975250 ID=SBMA_AR;MOTIFS=GCA;STRUC= +chrX 24597766 24597802 ID=PRTS_ARX;MOTIFS=NGC,GCN;STRUC= +chrX 24597886 24597934 ID=EIEE1_ARX;MOTIFS=NGC,GCN;STRUC= +chrX 30882677 30882751 ID=DMD_DMD;MOTIFS=TTC,T,CTT;STRUC= +chrX 65975147 65975250 ID=SBMA_AR;MOTIFS=CAG;STRUC= chrX 69887153 69887230 ID=XDP_TAF1;MOTIFS=AGAGGG;STRUC= chrX 135876774 135876804 ID=VACTERLX_ZIC3;MOTIFS=GCN;STRUC= -chrX 138816203 138816248 ID=XLID_SOX3;MOTIFS=NGC;STRUC= +chrX 138816203 138816248 ID=XLID_SOX3;MOTIFS=NGC,GCN;STRUC= chrX 146176677 146176769 ID=FXS_FMR1;MOTIFS=CGG;STRUC= -chrX 146765190 146765342 ID=FRAXE_AFF2;MOTIFS=GCC;STRUC= +chrX 146765190 146765342 ID=FRAXE_AFF2;MOTIFS=CCG;STRUC= diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed b/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed index ca754b45..f9028103 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed @@ -1,14 +1,14 @@ #chrom start stop motif motif_len id -chr1 870158 870178 GGCGCGGAGC 10 HMNR7_VWA1 +chr1 870158 870178 AGCGGCGCGG 10 HMNR7_VWA1 chr1 57245970 57245973 GAAAT 5 SCA37_DAB1 -chr1 94266544 94266567 GCC 3 OPDM5_ABCD3 -chr1 148519695 148519738 GGC 3 NIID_NOTCH2NLC -chr1 154328121 154330802 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA 61 ADTKD_MUC1 -chr1 155728131 155728159 GGGCC 5 NME_NAXE +chr1 94266544 94266567 CCG 3 OPDM5_ABCD3 +chr1 148519695 148519738 CGG 3 NIID_NOTCH2NLC +chr1 154328121 154330802 ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG 61 ADTKD_MUC1 +chr1 155728131 155728159 CCGGG 5 NME_NAXE chr2 96703674 96703677 AAATG 5 FAME2_STARD7 -chr2 100563685 100563738 GCC 3 FRA2A_AFF3 +chr2 100563685 100563738 CCG 3 FRA2A_AFF3 chr2 176581179 176581224 GCN 3 SD5_HOXD13 -chr2 191369982 191370024 GCA 3 GDPAG_GLS +chr2 191369982 191370024 CAG 3 GDPAG_GLS chr3 63956302 63956333 CAG 3 SCA7_ATXN7 chr3 63956333 63956345 CCG 3 SCA7_ATXN7_flank chr3 131917482 131917557 CAGG 4 DM2_CNBP @@ -19,58 +19,58 @@ chr3 186521702 186521706 TTTCA 5 FAME4_YEATS2 chr4 3073603 3073681 CAG 3 HD_HTT chr4 3073687 3073723 CCG 3 HD_HTT_flank chr4 39318132 39318136 AAGGG 5 CANVAS_RFC1 -chr4 41719745 41719805 GCN 3 CCHS_PHOX2B +chr4 41719745 41719805 NGC 3 CCHS_PHOX2B chr4 162693388 162693405 TTTCA 5 FAME7_RAPGEF2 chr5 10295585 10295593 TTTCA 5 FAME3_MARCHF6 -chr5 147414733 147414780 GCT 3 SCA12_PPP2R2B -chr5 178096748 178096792 GCC 3 OPDM_FAM193B -chr6 13201716 13201843 GCC 3 OPDM_TBC1D7 +chr5 147414733 147414780 CTG 3 SCA12_PPP2R2B +chr5 178096748 178096792 CCG 3 OPDM_FAM193B +chr6 13201716 13201843 CCG 3 OPDM_TBC1D7 chr6 16200188 16200282 CTG 3 SCA1_ATXN1 chr6 45257567 45257618 GCN 3 CCD_RUNX2 chr6 171935458 171935569 CAG 3 SCA17_TBP chr7 27335684 27335720 NGC 3 HFG_HOXA13-III chr7 27335813 27335849 NGC 3 HFG_HOXA13-II chr7 27335912 27335954 NGC 3 HFG_HOXA13-I -chr7 56047900 56047939 GCG 3 FRA7A_ZNF713 -chr8 105716409 105716441 CGC 3 OPDM1_LRP12 +chr7 56047900 56047939 CGG 3 FRA7A_ZNF713 +chr8 105716409 105716441 CCG 3 OPDM1_LRP12 chr8 119495347 119495353 TGAAA 5 FAME1_SAMD12 -chr9 27584063 27584155 GGCCCC 6 FTDALS1_C9orf72 +chr9 27584063 27584155 CCCCGG 6 FTDALS1_C9orf72 chr9 81210818 81210834 A 1 FRDA_FXN_flank chr9 81210834 81210861 GAA 3 FRDA_FXN -chr9 142886568 142886595 GCC 3 HSAN-VIII_PRDM12 +chr9 142886568 142886595 CCG 3 HSAN-VIII_PRDM12 chr9 145285333 145285861 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG 32 MODY8_CEL -chr10 80695718 80695748 GGC 3 OPML1_NUTM2B-AS1 +chr10 80695718 80695748 CGG 3 OPML1_NUTM2B-AS1 chr11 119226662 119226696 CGG 3 JBS_CBL chr12 6947903 6947941 CAG 3 DRPLA_ATN1 -chr12 50468095 50468118 GGC 3 FRA12A_DIP2B +chr12 50468095 50468118 CGG 3 FRA12A_DIP2B chr12 111575873 111575940 CTG 3 SCA2_ATXN2 -chr12 123532573 123532603 GGC 3 OPDM4_RILPL1 +chr12 123532573 123532603 CGG 3 OPDM4_RILPL1 chr13 69361213 69361243 CTA 3 SCA8_ATXN8OS_flank chr13 69361243 69361270 CTG 3 SCA8_ATXN8OS chr13 99196358 99196404 GCN 3 HPE5_ZIC2 -chr13 101377549 101377792 GAA 3 SCA27B_FGF14 +chr13 101377549 101377792 AAG 3 SCA27B_FGF14 chr14 17522488 17522519 GCN 3 OPMD_PABPN1 chr14 86300519 86300603 CTG 3 SCA3_ATXN3 -chr15 20458510 20458536 GCG 3 ALS1_NIPA1 +chr15 20458510 20458536 CGG 3 ALS1_NIPA1 chr15 32225152 32225178 CT 2 aFTLD-U_GOLGA8A -chr15 86324038 86324057 TTTG 4 CHNG3_MIR7-2 +chr15 86324038 86324057 GTTT 4 CHNG3_MIR7-2 chr15 87088402 87088408 GCT 3 CPEO_POLG_flank chr15 87088408 87088411 GTT 3 CPEO_POLG_flank chr15 87088411 87088452 GCT 3 CPEO_POLG chr16 17477909 17478002 GCC 3 DBQD2_XYLT1 chr16 24890416 24890430 TTTCA 5 FAME6_TNRC6A -chr16 72284666 72284761 TGGAA 5 SCA31_BEAN1 +chr16 72284666 72284761 AATGG 5 SCA31_BEAN1 chr16 73638636 73638724 CAG 3 SCA_THAP11 -chr16 78605502 78605569 GCC 3 SCA4_ZFHX3 +chr16 78605502 78605569 CCG 3 SCA4_ZFHX3 chr16 93675723 93675776 CTG 3 HDL2_JPH3 chr17 17755051 17755053 TTTCA 5 FAME8_RAI1 -chr17 81047404 81047534 CCTCGCTGTGCCGCTGCCGA 20 RCPS_EIF4A3 +chr17 81047404 81047534 GCCGCTGCCGACCTCGCTGT 20 RCPS_EIF4A3 chr18 821235 821905 GATGGT 6 CPUM_TYMS chr18 55789233 55789288 CAG 3 FECD3_TCF4 -chr19 4494212 4497342 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC 99 MRUPAV_PLIN4 +chr19 4494212 4497342 GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT 99 MRUPAV_PLIN4 chr19 13333136 13333176 CTG 3 SCA6_CACNA1A chr19 14622655 14622692 CCG 3 OPDM2_GIPC1 -chr19 18921630 18921645 GTC 3 EDM1-PSACH_COMP +chr19 18921630 18921645 CGT 3 EDM1-PSACH_COMP chr19 48597739 48597756 CAG 3 DM1_DMPK chr20 2683189 2683230 GGCCTG 6 SCA36_NOP56 chr20 2683230 2683248 CGCCTG 6 SCA36_NOP56_flank @@ -84,9 +84,9 @@ chrX 24597766 24597802 NGC 3 PRTS_ARX chrX 24597886 24597934 NGC 3 EIEE1_ARX chrX 30882677 30882743 TTC 3 DMD_DMD chrX 30882743 30882751 T 1 DMD_DMD_flank -chrX 65975147 65975250 GCA 3 SBMA_AR +chrX 65975147 65975250 CAG 3 SBMA_AR chrX 69887153 69887230 AGAGGG 6 XDP_TAF1 chrX 135876774 135876804 GCN 3 VACTERLX_ZIC3 chrX 138816203 138816248 NGC 3 XLID_SOX3 chrX 146176677 146176769 CGG 3 FXS_FMR1 -chrX 146765190 146765342 GCC 3 FRAXE_AFF2 +chrX 146765190 146765342 CCG 3 FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed.gz b/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed.gz index b416feff..41a71796 100644 Binary files a/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed.gz and b/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed.gz differ diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed.gz.tbi b/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed.gz.tbi index 118890a1..e449153c 100644 Binary files a/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed.gz.tbi and b/data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed.gz.tbi differ diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed b/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed index 03c1911e..aebd3435 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed @@ -1,81 +1,81 @@ #chrom start stop id gene reference_motif_reference_orientation pathogenic_motif_reference_orientation pathogenic_min inheritance disease -chr1 870158 870178 HMNR7_VWA1 VWA1 GGCGCGGAGC GGCGCGGAGC 1 AR Neuronopathy, distal hereditary motor, autosomal recessive 7 -chr1 57245935 57245973 SCA37_DAB1 DAB1 AAAAT GAAAT 31 AD Spinocerebellar ataxia type 37 -chr1 94266544 94266567 OPDM5_ABCD3 ABCD3 GCC GCC 118 AD Oculopharyngodistal myopathy type 5 -chr1 148519695 148519738 NIID_NOTCH2NLC NOTCH2NLC GGC GGC 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 -chr1 154328121 154330802 ADTKD_MUC1 MUC1 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA None AD Autosomal dominant tubulointerstitial kidney disease -chr1 155728131 155728159 NME_NAXE NAXE GGGCC GGGCC 200 AR NAXE-related mitochondrial encephalopathy -chr2 96703674 96703732 FAME2_STARD7 STARD7 AAAAT AAATG 274 AD Familial adult myoclonic epilepsy 2 -chr2 100563685 100563738 FRA2A_AFF3 AFF3 GCC GCC 300 AD Intellectual disability associated with fragile site FRA2A +chr1 870158 870178 HMNR7_VWA1 VWA1 GGCGCGGAGC AGCGGCGCGG 1 AR Neuronopathy, distal hereditary motor, autosomal recessive 7 +chr1 57245935 57245973 SCA37_DAB1 DAB1 AAAAT TGAAA 31 AD Spinocerebellar ataxia type 37 +chr1 94266544 94266567 OPDM5_ABCD3 ABCD3 CCG CCG 118 AD Oculopharyngodistal myopathy type 5 +chr1 148519695 148519738 NIID_NOTCH2NLC NOTCH2NLC CGG CGG 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 +chr1 154328121 154330802 ADTKD_MUC1 MUC1 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG None AD Autosomal dominant tubulointerstitial kidney disease +chr1 155728131 155728159 NME_NAXE NAXE GGGCC CCGGG 200 AR NAXE-related mitochondrial encephalopathy +chr2 96703674 96703732 FAME2_STARD7 STARD7 AAAAT TGAAA 274 AD Familial adult myoclonic epilepsy 2 +chr2 100563685 100563738 FRA2A_AFF3 AFF3 CCG CCG 300 AD Intellectual disability associated with fragile site FRA2A chr2 176581179 176581224 SD5_HOXD13 HOXD13 GCN GCN 22 AD Syndactyly -chr2 191369982 191370024 GDPAG_GLS GLS GCA GCA 680 AR Glutaminase deficiency +chr2 191369982 191370024 GDPAG_GLS GLS CAG CAG 680 AR Glutaminase deficiency chr3 63956302 63956333 SCA7_ATXN7 ATXN7 CAG CAG 37 AD Spinocerebellar ataxia type 7 chr3 131917482 131917557 DM2_CNBP CNBP CAGG CAGG 75 AD Myotonic dystrophy type 2 -chr3 141687011 141687054 BPES_FOXL2 FOXL2 NGC NGC 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis +chr3 141687011 141687054 BPES_FOXL2 FOXL2 GCN NGC 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis chr3 186521667 186521706 FAME4_YEATS2 YEATS2 TTTTA TTTCA 1000 AD Familial adult myoclonic epilepsy 4 chr4 3073603 3073687 HD_HTT HTT CAG CAG 36 AD Huntington disease -chr4 39318077 39318136 CANVAS_RFC1 RFC1 AAAAG AAGGG,ACAGG,AAAGG,AGGGC 400 AR Cerebellar ataxia, neuropathy, and vestibular areflexia syndrome -chr4 41719745 41719805 CCHS_PHOX2B PHOX2B GCN GCN 26 AD Congenital central hypoventilation syndrome +chr4 39318077 39318136 CANVAS_RFC1 RFC1 AAAAG AAGGG,ACAGG,AAAGG,CAGGG 400 AR Cerebellar ataxia, neuropathy, and vestibular areflexia syndrome +chr4 41719745 41719805 CCHS_PHOX2B PHOX2B GCN NGC 26 AD Congenital central hypoventilation syndrome chr4 162693303 162693405 FAME7_RAPGEF2 RAPGEF2 TTTTA TTTCA 60 AD Familial adult myoclonic epilepsy type 7 chr5 10295525 10295593 FAME3_MARCHF6 MARCHF6 TTTTA TTTCA 650 AD Familial adult myoclonic epilepsy type 3 -chr5 147414733 147414780 SCA12_PPP2R2B PPP2R2B GCT GCT 51 AD Spinocerebellar ataxia type 12 -chr5 178096748 178096792 OPDM_FAM193B FAM193B GCC GCC 194 AD Oculopharyngodistal myopathy -chr6 13201716 13201843 OPDM_TBC1D7 TBC1D7 GCC GCC 83 AD Oculopharyngodistal myopathy +chr5 147414733 147414780 SCA12_PPP2R2B PPP2R2B CTG CTG 51 AD Spinocerebellar ataxia type 12 +chr5 178096748 178096792 OPDM_FAM193B FAM193B CCG CCG 194 AD Oculopharyngodistal myopathy +chr6 13201716 13201843 OPDM_TBC1D7 TBC1D7 CCG CCG 83 AD Oculopharyngodistal myopathy chr6 16200188 16200282 SCA1_ATXN1 ATXN1 CTG CTG 39 AD Spinocerebellar ataxia type 1 chr6 45257567 45257618 CCD_RUNX2 RUNX2 GCN GCN 20 AD Cleidocranial dysplasia chr6 171935458 171935569 SCA17_TBP TBP CAG CAG 49 AD Spinocerebellar ataxia type 17 -chr7 27335684 27335720 HFG_HOXA13-III HOXA13 NGC NGC 22 AD Hand-foot-genital syndrome 3 -chr7 27335813 27335849 HFG_HOXA13-II HOXA13 NGC NGC 18 AD Hand-foot-genital syndrome 2 -chr7 27335912 27335954 HFG_HOXA13-I HOXA13 NGC NGC 22 AD Hand-foot-genital syndrome 1 -chr7 56047900 56047939 FRA7A_ZNF713 ZNF713 GCG GCG 450 AD Autism spectrum disorder associated with fragile site FRA7A -chr8 105716409 105716441 OPDM1_LRP12 LRP12 CGC CGC 85 AD Oculopharyngodistal myopathy type 1 +chr7 27335684 27335720 HFG_HOXA13-III HOXA13 GCN NGC 22 AD Hand-foot-genital syndrome 3 +chr7 27335813 27335849 HFG_HOXA13-II HOXA13 GCN NGC 18 AD Hand-foot-genital syndrome 2 +chr7 27335912 27335954 HFG_HOXA13-I HOXA13 GCN NGC 22 AD Hand-foot-genital syndrome 1 +chr7 56047900 56047939 FRA7A_ZNF713 ZNF713 CGG CGG 450 AD Autism spectrum disorder associated with fragile site FRA7A +chr8 105716409 105716441 OPDM1_LRP12 LRP12 CCG CCG 85 AD Oculopharyngodistal myopathy type 1 chr8 119495247 119495353 FAME1_SAMD12 SAMD12 TAAAA TGAAA 105 AD Familial adult myoclonic epilepsy type 1 -chr9 27584063 27584155 FTDALS1_C9orf72 C9orf72 GGCCCC GGCCCC 31 AD Frontotemporal dementia (FTD) and/or amyotrophic lateral sclerosis (ALS) -chr9 81210834 81210861 FRDA_FXN FXN GAA GAA 56 AR Friedreich ataxia -chr9 142886568 142886595 HSAN-VIII_PRDM12 PRDM12 GCC GCC 18 AR Hereditary sensory and autonomic neuropathy type VIII -chr9 145285333 145285861 MODY8_CEL CEL GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG None AD Maturity-Onset Diabetes of the Young Type 8 -chr10 80695718 80695748 OPML1_NUTM2B-AS1 NUTM2B-AS1 GGC GGC 161 AD Oculopharyngeal myopathy with leukoencephalopathy 1 +chr9 27584063 27584155 FTDALS1_C9orf72 C9orf72 GGCCCC CCCCGG 31 AD Frontotemporal dementia (FTD) and/or amyotrophic lateral sclerosis (ALS) +chr9 81210834 81210861 FRDA_FXN FXN GAA AAG 56 AR Friedreich ataxia +chr9 142886568 142886595 HSAN-VIII_PRDM12 PRDM12 CCG CCG 18 AR Hereditary sensory and autonomic neuropathy type VIII +chr9 145285333 145285861 MODY8_CEL CEL GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC None AD Maturity-Onset Diabetes of the Young Type 8 +chr10 80695718 80695748 OPML1_NUTM2B-AS1 NUTM2B-AS1 CGG CGG 161 AD Oculopharyngeal myopathy with leukoencephalopathy 1 chr11 119226662 119226696 JBS_CBL CBL CGG CGG 101 AD Jacobsen syndrome (FRAX11B fragile site) chr12 6947903 6947941 DRPLA_ATN1 ATN1 CAG CAG 48 AD Dentatorubral-Pallidoluysian Atrophy -chr12 50468095 50468118 FRA12A_DIP2B DIP2B GGC GGC 273 AD Intellectual developmental disorder, FRA12A type +chr12 50468095 50468118 FRA12A_DIP2B DIP2B CGG CGG 273 AD Intellectual developmental disorder, FRA12A type chr12 111575873 111575940 SCA2_ATXN2 ATXN2 CTG CTG 35 AD,AR Spinocerebellar ataxia type 2 -chr12 123532573 123532603 OPDM4_RILPL1 RILPL1 GGC GGC 120 AD Oculopharyngodistal myopathy type 4 +chr12 123532573 123532603 OPDM4_RILPL1 RILPL1 CGG CGG 120 AD Oculopharyngodistal myopathy type 4 chr13 69361243 69361270 SCA8_ATXN8OS ATXN8OS CTG CTG 71 AD Spinocerebellar ataxia type 8 chr13 99196358 99196404 HPE5_ZIC2 ZIC2 GCN GCN 25 AD Holoprosencephaly-5 -chr13 101377549 101377792 SCA27B_FGF14 FGF14 GAA GAA 320 AD Spinocerebellar ataxia 27B +chr13 101377549 101377792 SCA27B_FGF14 FGF14 GAA AAG 320 AD Spinocerebellar ataxia 27B chr14 17522488 17522519 OPMD_PABPN1 PABPN1 GCN GCN 12 AD,AR Oculopharyngeal muscular dystrophy chr14 86300519 86300603 SCA3_ATXN3 ATXN3 CTG CTG 60 AD Spinocerebellar ataxia type 3/Machado-Joseph disease -chr15 20458510 20458536 ALS1_NIPA1 NIPA1 GCG GCG 11 AD Amyotrophic lateral sclerosis +chr15 20458510 20458536 ALS1_NIPA1 NIPA1 CGG CGG 11 AD Amyotrophic lateral sclerosis chr15 32225152 32225178 aFTLD-U_GOLGA8A GOLGA8A TTTC CT 190 Atypical frontotemporal lobar degeneration with ubiquitinated inclusions (aFTLD-U) -chr15 86324038 86324057 CHNG3_MIR7-2 MIR7-2 TTTG TTTG 3 AD Nongoitrous congenital hypothyroidism-3 -chr15 87088411 87088452 CPEO_POLG POLG GCT GCT None Progressive external ophthalmoplegia, Parkinson's disease -chr16 17477909 17478002 DBQD2_XYLT1 XYLT1 GCC GCC 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 +chr15 86324038 86324057 CHNG3_MIR7-2 MIR7-2 TTTG GTTT 3 AD Nongoitrous congenital hypothyroidism-3 +chr15 87088411 87088452 CPEO_POLG POLG CTG CTG None Progressive external ophthalmoplegia, Parkinson's disease +chr16 17477909 17478002 DBQD2_XYLT1 XYLT1 CCG CCG 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 chr16 24890366 24890430 FAME6_TNRC6A TNRC6A TTTTA TTTCA 1100 AD Familial adult myoclonic epilepsy type 6 -chr16 72284666 72284761 SCA31_BEAN1 BEAN1 AATAA TGGAA,TAGAA 110 AD Spinocerebellar ataxia type 31 +chr16 72284666 72284761 SCA31_BEAN1 BEAN1 AATAA AATGG,AATAG 110 AD Spinocerebellar ataxia type 31 chr16 73638636 73638724 SCA_THAP11 THAP11 CAG CAG 45 AD Spinocerebellar ataxia 51 -chr16 78605502 78605569 SCA4_ZFHX3 ZFHX3 GCC GCC 46 AD Spinocerebellar ataxia 4 +chr16 78605502 78605569 SCA4_ZFHX3 ZFHX3 CCG CCG 46 AD Spinocerebellar ataxia 4 chr16 93675723 93675776 HDL2_JPH3 JPH3 CTG CTG 40 AD Huntington disease-like 2 chr17 17754961 17755053 FAME8_RAI1 RAI1 TTTTA TTTCA 9 AD Familial adult myoclonic epilepsy type 8 -chr17 81047404 81047534 RCPS_EIF4A3 EIF4A3 CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA 14 AR Richieri-Costa-Pereira syndrome +chr17 81047404 81047534 RCPS_EIF4A3 EIF4A3 CCTCGCTGTGCCGCTGCCGA GCCGCTGCCGACCTCGCTGT 14 AR Richieri-Costa-Pereira syndrome chr18 821235 821905 CPUM_TYMS TYMS GATGGT GATGGT 210 AR Congenital Progressive Universal Melanosis chr18 55789233 55789288 FECD3_TCF4 TCF4 CAG CAG 51 AD Fuchs endothelial corneal dystrophy 3 -chr19 4494212 4497342 MRUPAV_PLIN4 PLIN4 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC 37 AD Myopathy with Rimmed Ubiquitin-Positive Autophagic Vacuolation, PLIN4-Related Myopathy +chr19 4494212 4497342 MRUPAV_PLIN4 PLIN4 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT 37 AD Myopathy with Rimmed Ubiquitin-Positive Autophagic Vacuolation, PLIN4-Related Myopathy chr19 13333136 13333176 SCA6_CACNA1A CACNA1A CTG CTG 21 AD Spinocerebellar ataxia type 6 chr19 14622655 14622692 OPDM2_GIPC1 GIPC1 CCG CCG 73 AD Oculopharyngodistal myopathy type 2 -chr19 18921630 18921645 EDM1-PSACH_COMP COMP GTC GTC 6 AD Multiple epiphyseal dysplasia, Pseudoachondroplasia +chr19 18921630 18921645 EDM1-PSACH_COMP COMP GTC CGT 6 AD Multiple epiphyseal dysplasia, Pseudoachondroplasia chr19 48597739 48597756 DM1_DMPK DMPK CAG CAG 50 AD Myotonic dystrophy type 1 -chr20 2683189 2683230 SCA36_NOP56 NOP56 GGCCTG GGCCTG 650 AD Spinocerebellar ataxia type 36 -chr20 4738633 4738705 CJD_PRNP PRNP GGTGGTGGCTGGGGGCAGCCTCAT CCTCATGGTGGTGGCTGGGGGCAG 5 AD Creutzfeldt-Jakob disease and Gerstmann-Straussler-Schneiker syndrome +chr20 2683189 2683230 SCA36_NOP56 NOP56 GGCCTG CCTGGG 650 AD Spinocerebellar ataxia type 36 +chr20 4738633 4738705 CJD_PRNP PRNP GGTGGTGGCTGGGGGCAGCCTCAT AGCCTCATGGTGGTGGCTGGGGGC 5 AD Creutzfeldt-Jakob disease and Gerstmann-Straussler-Schneiker syndrome chr21 42132054 42132091 EPM1_CSTB CSTB CGCGGGGCGGGG CGCGGGGCGGGG 30 AR Progressive Myoclonic Epilepsy Type 1 (EPM1), a.k.a Unverricht-Lundborg Disease (ULD) chr22 20143615 20143660 TOF_TBX1 TBX1 GCN GCN 25 AD Tetralogy of Fallot chr22 38781587 38781680 EPM_CSNK1E CSNK1E CCG CCG 745 AR Progressive Myoclonic Epilepsy and Developmental and Epileptic Encephalopathy chr22 46280059 46280134 SCA10_ATXN10 ATXN10 ATTCT ATTCT 800 AD Spinocerebellar ataxia type 10 -chrX 24597766 24597802 PRTS_ARX ARX NGC NGC 20 XR Partington syndrome -chrX 24597886 24597934 EIEE1_ARX ARX NGC NGC 17 XR Early-infantile epileptic encephalopathy -chrX 30882677 30882743 DMD_DMD DMD TTC TTC 59 XR Duchenne muscular dystrophy -chrX 65975147 65975250 SBMA_AR AR GCA GCA 38 XR Spinal and bulbar muscular atrophy, Kennedy Disease +chrX 24597766 24597802 PRTS_ARX ARX GCN NGC 20 XR Partington syndrome +chrX 24597886 24597934 EIEE1_ARX ARX GCN NGC 17 XR Early-infantile epileptic encephalopathy +chrX 30882677 30882743 DMD_DMD DMD TTC CTT 59 XR Duchenne muscular dystrophy +chrX 65975147 65975250 SBMA_AR AR CAG CAG 38 XR Spinal and bulbar muscular atrophy, Kennedy Disease chrX 69887153 69887230 XDP_TAF1 TAF1 AGAGGG AGAGGG 35 XR X-linked dystonia-parkinsonism (XDP) a.k.a. Dystonia 3, torsion, X-linked (DYT3) chrX 135876774 135876804 VACTERLX_ZIC3 ZIC3 GCN GCN 12 XR X-linked VACTERL syndrome -chrX 138816203 138816248 XLID_SOX3 SOX3 NGC NGC 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) +chrX 138816203 138816248 XLID_SOX3 SOX3 GCN NGC 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) chrX 146176677 146176769 FXS_FMR1 FMR1 CGG CGG 201 XD Fragile X syndrome (FXS), fragile X-associated tremor/ataxia syndrome (FXTAS), and fragile X-associated primary ovarian insufficiency FXPOI/POF1 -chrX 146765190 146765342 FRAXE_AFF2 AFF2 GCC GCC 201 XR Intellectual developmental disorder, Fragile X intellectual disability +chrX 146765190 146765342 FRAXE_AFF2 AFF2 CCG CCG 201 XR Intellectual developmental disorder, Fragile X intellectual disability diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.longTR.bed b/data/catalogs/STRchive-disease-loci.T2T-chm13.longTR.bed index c67fe4e0..8cd57661 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.longTR.bed +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.longTR.bed @@ -1,80 +1,80 @@ -chr1 870159 870178 GGCGCGGAGC HMNR7_VWA1 -chr1 57245936 57245973 GAAAT,AAAAT SCA37_DAB1 -chr1 94266545 94266567 GCC OPDM5_ABCD3 -chr1 148519696 148519738 GGC NIID_NOTCH2NLC -chr1 154328122 154330802 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ADTKD_MUC1 -chr1 155728132 155728159 GGGCC NME_NAXE -chr2 96703675 96703732 AAATG,AAAAT FAME2_STARD7 -chr2 100563686 100563738 GCC FRA2A_AFF3 +chr1 870159 870178 AGCGGCGCGG,GGCGCGGAGC HMNR7_VWA1 +chr1 57245936 57245973 TGAAA,AAAAT SCA37_DAB1 +chr1 94266545 94266567 CCG OPDM5_ABCD3 +chr1 148519696 148519738 CGG NIID_NOTCH2NLC +chr1 154328122 154330802 ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ADTKD_MUC1 +chr1 155728132 155728159 CCGGG,GGGCC NME_NAXE +chr2 96703675 96703732 TGAAA,AAAAT FAME2_STARD7 +chr2 100563686 100563738 CCG FRA2A_AFF3 chr2 176581180 176581224 GCN SD5_HOXD13 -chr2 191369983 191370024 GCA GDPAG_GLS +chr2 191369983 191370024 CAG GDPAG_GLS chr3 63956303 63956333 CAG SCA7_ATXN7 chr3 131917483 131917557 CAGG DM2_CNBP -chr3 141687012 141687054 NGC BPES_FOXL2 +chr3 141687012 141687054 NGC,GCN BPES_FOXL2 chr3 186521668 186521706 TTTCA,TTTTA FAME4_YEATS2 chr4 3073604 3073687 CAG HD_HTT -chr4 39318078 39318136 AAGGG,ACAGG,AAAGG,AGGGC,AAAAG,AAAGGG CANVAS_RFC1 -chr4 41719746 41719805 GCN CCHS_PHOX2B +chr4 39318078 39318136 AAGGG,ACAGG,AAAGG,CAGGG,AAAAG,AAAGGG CANVAS_RFC1 +chr4 41719746 41719805 NGC,GCN CCHS_PHOX2B chr4 162693304 162693405 TTTCA,TTTTA FAME7_RAPGEF2 chr5 10295526 10295593 TTTCA,TTTTA FAME3_MARCHF6 -chr5 147414734 147414780 GCT SCA12_PPP2R2B -chr5 178096749 178096792 GCC OPDM_FAM193B -chr6 13201717 13201843 GCC OPDM_TBC1D7 +chr5 147414734 147414780 CTG SCA12_PPP2R2B +chr5 178096749 178096792 CCG OPDM_FAM193B +chr6 13201717 13201843 CCG OPDM_TBC1D7 chr6 16200189 16200282 CTG SCA1_ATXN1 chr6 45257568 45257618 GCN CCD_RUNX2 chr6 171935459 171935569 CAG SCA17_TBP -chr7 27335685 27335720 NGC HFG_HOXA13-III -chr7 27335814 27335849 NGC HFG_HOXA13-II -chr7 27335913 27335954 NGC HFG_HOXA13-I -chr7 56047901 56047939 GCG FRA7A_ZNF713 -chr8 105716410 105716441 CGC OPDM1_LRP12 +chr7 27335685 27335720 NGC,GCN HFG_HOXA13-III +chr7 27335814 27335849 NGC,GCN HFG_HOXA13-II +chr7 27335913 27335954 NGC,GCN HFG_HOXA13-I +chr7 56047901 56047939 CGG FRA7A_ZNF713 +chr8 105716410 105716441 CCG OPDM1_LRP12 chr8 119495248 119495353 TGAAA,TAAAA FAME1_SAMD12 -chr9 27584064 27584155 GGCCCC FTDALS1_C9orf72 -chr9 81210835 81210861 GAA FRDA_FXN -chr9 142886569 142886595 GCC HSAN-VIII_PRDM12 -chr9 145285334 145285861 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG MODY8_CEL -chr10 80695719 80695748 GGC OPML1_NUTM2B-AS1 +chr9 27584064 27584155 CCCCGG,GGCCCC FTDALS1_C9orf72 +chr9 81210835 81210861 AAG,GAA FRDA_FXN +chr9 142886569 142886595 CCG HSAN-VIII_PRDM12 +chr9 145285334 145285861 ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG MODY8_CEL +chr10 80695719 80695748 CGG OPML1_NUTM2B-AS1 chr11 119226663 119226696 CGG JBS_CBL chr12 6947904 6947941 CAG DRPLA_ATN1 -chr12 50468096 50468118 GGC FRA12A_DIP2B +chr12 50468096 50468118 CGG FRA12A_DIP2B chr12 111575874 111575940 CTG SCA2_ATXN2 -chr12 123532574 123532603 GGC OPDM4_RILPL1 +chr12 123532574 123532603 CGG OPDM4_RILPL1 chr13 69361244 69361270 CTG SCA8_ATXN8OS chr13 99196359 99196404 GCN HPE5_ZIC2 -chr13 101377550 101377792 GAA,GGA,GCA SCA27B_FGF14 +chr13 101377550 101377792 AAG,AGG,CAG,GAA SCA27B_FGF14 chr14 17522489 17522519 GCN OPMD_PABPN1 chr14 86300520 86300603 CTG SCA3_ATXN3 -chr15 20458511 20458536 GCG ALS1_NIPA1 +chr15 20458511 20458536 CGG ALS1_NIPA1 chr15 32225153 32225178 CT,CCTT,CCCTCT,TTTC aFTLD-U_GOLGA8A -chr15 86324039 86324057 TTTG CHNG3_MIR7-2 -chr15 87088412 87088452 GCT CPEO_POLG -chr16 17477910 17478002 GCC DBQD2_XYLT1 +chr15 86324039 86324057 GTTT,TTTG CHNG3_MIR7-2 +chr15 87088412 87088452 CTG CPEO_POLG +chr16 17477910 17478002 CCG DBQD2_XYLT1 chr16 24890367 24890430 TTTCA,TTTTA FAME6_TNRC6A -chr16 72284667 72284761 TGGAA,TAGAA,AATAA SCA31_BEAN1 +chr16 72284667 72284761 AATGG,AATAG,AATAA SCA31_BEAN1 chr16 73638637 73638724 CAG SCA_THAP11 -chr16 78605503 78605569 GCC SCA4_ZFHX3 +chr16 78605503 78605569 CCG SCA4_ZFHX3 chr16 93675724 93675776 CTG HDL2_JPH3 -chr17 17754962 17755053 TTTCA,TTTTA FAME8_RAI1 -chr17 81047405 81047534 CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 +chr17 17754962 17755053 TTTCA,ATTTT,TTTTA FAME8_RAI1 +chr17 81047405 81047534 GCCGCTGCCGACCTCGCTGT,CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 chr18 821236 821905 GATGGT CPUM_TYMS chr18 55789234 55789288 CAG FECD3_TCF4 -chr19 4494213 4497342 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC MRUPAV_PLIN4 +chr19 4494213 4497342 GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT,TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC MRUPAV_PLIN4 chr19 13333137 13333176 CTG SCA6_CACNA1A chr19 14622656 14622692 CCG OPDM2_GIPC1 -chr19 18921631 18921645 GTC EDM1-PSACH_COMP +chr19 18921631 18921645 CGT,GTC EDM1-PSACH_COMP chr19 48597740 48597756 CAG DM1_DMPK -chr20 2683190 2683230 GGCCTG SCA36_NOP56 -chr20 4738634 4738705 CCTCATGGTGGTGGCTGGGGGCAG,GGTGGTGGCTGGGGGCAGCCTCAT CJD_PRNP +chr20 2683190 2683230 CCTGGG,GGCCTG SCA36_NOP56 +chr20 4738634 4738705 AGCCTCATGGTGGTGGCTGGGGGC,GGTGGTGGCTGGGGGCAGCCTCAT CJD_PRNP chr21 42132055 42132091 CGCGGGGCGGGG EPM1_CSTB chr22 20143616 20143660 GCN TOF_TBX1 chr22 38781588 38781680 CCG EPM_CSNK1E chr22 46280060 46280134 ATTCT SCA10_ATXN10 -chrX 24597767 24597802 NGC PRTS_ARX -chrX 24597887 24597934 NGC EIEE1_ARX -chrX 30882678 30882743 TTC DMD_DMD -chrX 65975148 65975250 GCA SBMA_AR +chrX 24597767 24597802 NGC,GCN PRTS_ARX +chrX 24597887 24597934 NGC,GCN EIEE1_ARX +chrX 30882678 30882743 CTT,TTC DMD_DMD +chrX 65975148 65975250 CAG SBMA_AR chrX 69887154 69887230 AGAGGG XDP_TAF1 chrX 135876775 135876804 GCN VACTERLX_ZIC3 -chrX 138816204 138816248 NGC XLID_SOX3 +chrX 138816204 138816248 NGC,GCN XLID_SOX3 chrX 146176678 146176769 CGG FXS_FMR1 -chrX 146765191 146765342 GCC FRAXE_AFF2 +chrX 146765191 146765342 CCG FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.straglr.bed b/data/catalogs/STRchive-disease-loci.T2T-chm13.straglr.bed index 6eb055bf..c2ab4143 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.straglr.bed +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.straglr.bed @@ -1,12 +1,12 @@ -chr1 870158 870178 GGCGCGGAGC HMNR7_VWA1 HMNR7_VWA1 +chr1 870158 870178 AGCGGCGCGG HMNR7_VWA1 HMNR7_VWA1 chr1 57245970 57245973 GAAAT SCA37_DAB1 SCA37_DAB1 -chr1 94266544 94266567 GCC OPDM5_ABCD3 OPDM5_ABCD3 -chr1 148519695 148519738 GGC NIID_NOTCH2NLC NIID_NOTCH2NLC -chr1 155728131 155728159 GGGCC NME_NAXE NME_NAXE +chr1 94266544 94266567 CCG OPDM5_ABCD3 OPDM5_ABCD3 +chr1 148519695 148519738 CGG NIID_NOTCH2NLC NIID_NOTCH2NLC +chr1 155728131 155728159 CCGGG NME_NAXE NME_NAXE chr2 96703674 96703677 AAATG FAME2_STARD7 FAME2_STARD7 -chr2 100563685 100563738 GCC FRA2A_AFF3 FRA2A_AFF3 +chr2 100563685 100563738 CCG FRA2A_AFF3 FRA2A_AFF3 chr2 176581179 176581224 GCN SD5_HOXD13 SD5_HOXD13 -chr2 191369982 191370024 GCA GDPAG_GLS GDPAG_GLS +chr2 191369982 191370024 CAG GDPAG_GLS GDPAG_GLS chr3 63956302 63956333 CAG SCA7_ATXN7 SCA7_ATXN7 chr3 63956333 63956345 CCG SCA7_ATXN7 SCA7_ATXN7_CCG chr3 131917482 131917557 CAGG DM2_CNBP DM2_CNBP @@ -17,57 +17,57 @@ chr3 186521702 186521706 TTTCA FAME4_YEATS2 FAME4_YEATS2 chr4 3073603 3073681 CAG HD_HTT HD_HTT chr4 3073687 3073723 CCG HD_HTT HD_HTT_CCG chr4 39318132 39318136 AAGGG CANVAS_RFC1 CANVAS_RFC1 -chr4 41719745 41719805 GCN CCHS_PHOX2B CCHS_PHOX2B +chr4 41719745 41719805 NGC CCHS_PHOX2B CCHS_PHOX2B chr4 162693388 162693405 TTTCA FAME7_RAPGEF2 FAME7_RAPGEF2 chr5 10295585 10295593 TTTCA FAME3_MARCHF6 FAME3_MARCHF6 -chr5 147414733 147414780 GCT SCA12_PPP2R2B SCA12_PPP2R2B -chr5 178096748 178096792 GCC OPDM_FAM193B OPDM_FAM193B -chr6 13201716 13201843 GCC OPDM_TBC1D7 OPDM_TBC1D7 +chr5 147414733 147414780 CTG SCA12_PPP2R2B SCA12_PPP2R2B +chr5 178096748 178096792 CCG OPDM_FAM193B OPDM_FAM193B +chr6 13201716 13201843 CCG OPDM_TBC1D7 OPDM_TBC1D7 chr6 16200188 16200282 CTG SCA1_ATXN1 SCA1_ATXN1 chr6 45257567 45257618 GCN CCD_RUNX2 CCD_RUNX2 chr6 171935458 171935569 CAG SCA17_TBP SCA17_TBP chr7 27335684 27335720 NGC HFG_HOXA13-III HFG_HOXA13-III chr7 27335813 27335849 NGC HFG_HOXA13-II HFG_HOXA13-II chr7 27335912 27335954 NGC HFG_HOXA13-I HFG_HOXA13-I -chr7 56047900 56047939 GCG FRA7A_ZNF713 FRA7A_ZNF713 -chr8 105716409 105716441 CGC OPDM1_LRP12 OPDM1_LRP12 +chr7 56047900 56047939 CGG FRA7A_ZNF713 FRA7A_ZNF713 +chr8 105716409 105716441 CCG OPDM1_LRP12 OPDM1_LRP12 chr8 119495347 119495353 TGAAA FAME1_SAMD12 FAME1_SAMD12 -chr9 27584063 27584155 GGCCCC FTDALS1_C9orf72 FTDALS1_C9orf72 +chr9 27584063 27584155 CCCCGG FTDALS1_C9orf72 FTDALS1_C9orf72 chr9 81210818 81210834 A FRDA_FXN FRDA_FXN_A chr9 81210834 81210861 GAA FRDA_FXN FRDA_FXN -chr9 142886568 142886595 GCC HSAN-VIII_PRDM12 HSAN-VIII_PRDM12 -chr10 80695718 80695748 GGC OPML1_NUTM2B-AS1 OPML1_NUTM2B-AS1 +chr9 142886568 142886595 CCG HSAN-VIII_PRDM12 HSAN-VIII_PRDM12 +chr10 80695718 80695748 CGG OPML1_NUTM2B-AS1 OPML1_NUTM2B-AS1 chr11 119226662 119226696 CGG JBS_CBL JBS_CBL chr12 6947903 6947941 CAG DRPLA_ATN1 DRPLA_ATN1 -chr12 50468095 50468118 GGC FRA12A_DIP2B FRA12A_DIP2B +chr12 50468095 50468118 CGG FRA12A_DIP2B FRA12A_DIP2B chr12 111575873 111575940 CTG SCA2_ATXN2 SCA2_ATXN2 -chr12 123532573 123532603 GGC OPDM4_RILPL1 OPDM4_RILPL1 +chr12 123532573 123532603 CGG OPDM4_RILPL1 OPDM4_RILPL1 chr13 69361213 69361243 CTA SCA8_ATXN8OS SCA8_ATXN8OS_CTA chr13 69361243 69361270 CTG SCA8_ATXN8OS SCA8_ATXN8OS chr13 99196358 99196404 GCN HPE5_ZIC2 HPE5_ZIC2 -chr13 101377549 101377792 GAA SCA27B_FGF14 SCA27B_FGF14 +chr13 101377549 101377792 AAG SCA27B_FGF14 SCA27B_FGF14 chr14 17522488 17522519 GCN OPMD_PABPN1 OPMD_PABPN1 chr14 86300519 86300603 CTG SCA3_ATXN3 SCA3_ATXN3 -chr15 20458510 20458536 GCG ALS1_NIPA1 ALS1_NIPA1 +chr15 20458510 20458536 CGG ALS1_NIPA1 ALS1_NIPA1 chr15 32225152 32225178 CT aFTLD-U_GOLGA8A aFTLD-U_GOLGA8A -chr15 86324038 86324057 TTTG CHNG3_MIR7-2 CHNG3_MIR7-2 +chr15 86324038 86324057 GTTT CHNG3_MIR7-2 CHNG3_MIR7-2 chr15 87088402 87088408 GCT CPEO_POLG CPEO_POLG_GCT chr15 87088408 87088411 GTT CPEO_POLG CPEO_POLG_GTT chr15 87088411 87088452 GCT CPEO_POLG CPEO_POLG chr16 17477909 17478002 GCC DBQD2_XYLT1 DBQD2_XYLT1 chr16 24890416 24890430 TTTCA FAME6_TNRC6A FAME6_TNRC6A -chr16 72284666 72284761 TGGAA SCA31_BEAN1 SCA31_BEAN1 +chr16 72284666 72284761 AATGG SCA31_BEAN1 SCA31_BEAN1 chr16 73638636 73638724 CAG SCA_THAP11 SCA_THAP11 -chr16 78605502 78605569 GCC SCA4_ZFHX3 SCA4_ZFHX3 +chr16 78605502 78605569 CCG SCA4_ZFHX3 SCA4_ZFHX3 chr16 93675723 93675776 CTG HDL2_JPH3 HDL2_JPH3 chr17 17755051 17755053 TTTCA FAME8_RAI1 FAME8_RAI1 -chr17 81047404 81047534 CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 RCPS_EIF4A3 +chr17 81047404 81047534 GCCGCTGCCGACCTCGCTGT RCPS_EIF4A3 RCPS_EIF4A3 chr18 821235 821905 GATGGT CPUM_TYMS CPUM_TYMS chr18 55789233 55789288 CAG FECD3_TCF4 FECD3_TCF4 -chr19 4494212 4497342 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC MRUPAV_PLIN4 MRUPAV_PLIN4 +chr19 4494212 4497342 GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT MRUPAV_PLIN4 MRUPAV_PLIN4 chr19 13333136 13333176 CTG SCA6_CACNA1A SCA6_CACNA1A chr19 14622655 14622692 CCG OPDM2_GIPC1 OPDM2_GIPC1 -chr19 18921630 18921645 GTC EDM1-PSACH_COMP EDM1-PSACH_COMP +chr19 18921630 18921645 CGT EDM1-PSACH_COMP EDM1-PSACH_COMP chr19 48597739 48597756 CAG DM1_DMPK DM1_DMPK chr20 2683189 2683230 GGCCTG SCA36_NOP56 SCA36_NOP56 chr20 2683230 2683248 CGCCTG SCA36_NOP56 SCA36_NOP56_CGCCTG @@ -81,9 +81,9 @@ chrX 24597766 24597802 NGC PRTS_ARX PRTS_ARX chrX 24597886 24597934 NGC EIEE1_ARX EIEE1_ARX chrX 30882677 30882743 TTC DMD_DMD DMD_DMD chrX 30882743 30882751 T DMD_DMD DMD_DMD_T -chrX 65975147 65975250 GCA SBMA_AR SBMA_AR +chrX 65975147 65975250 CAG SBMA_AR SBMA_AR chrX 69887153 69887230 AGAGGG XDP_TAF1 XDP_TAF1 chrX 135876774 135876804 GCN VACTERLX_ZIC3 VACTERLX_ZIC3 chrX 138816203 138816248 NGC XLID_SOX3 XLID_SOX3 chrX 146176677 146176769 CGG FXS_FMR1 FXS_FMR1 -chrX 146765190 146765342 GCC FRAXE_AFF2 FRAXE_AFF2 +chrX 146765190 146765342 CCG FRAXE_AFF2 FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.stranger.json b/data/catalogs/STRchive-disease-loci.T2T-chm13.stranger.json index cd667006..86dcfe18 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.stranger.json +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.stranger.json @@ -2,11 +2,11 @@ { "LocusId": "HMNR7_VWA1", "ReferenceRegion": "chr1:870158-870178", - "LocusStructure": "(GGCGCGGAGC)*", + "LocusStructure": "(AGCGGCGCGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GGCGCGGAGC", + "DisplayRU": "AGCGGCGCGG", "Disease": "HMNR7", "NormalMax": 2, "PathologicMin": 3, @@ -21,7 +21,7 @@ "PathologicRegion": "chr1:57245970-57245973", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GAAAT", + "DisplayRU": "TGAAA", "Disease": "SCA37", "NormalMax": 30, "PathologicMin": 31, @@ -30,11 +30,11 @@ { "LocusId": "OPDM5_ABCD3", "ReferenceRegion": "chr1:94266544-94266567", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "OPDM5", "NormalMax": 44, "PathologicMin": 118, @@ -43,11 +43,11 @@ { "LocusId": "NIID_NOTCH2NLC", "ReferenceRegion": "chr1:148519695-148519738", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "NIID", "NormalMax": 37, "PathologicMin": 66, @@ -56,11 +56,11 @@ { "LocusId": "NME_NAXE", "ReferenceRegion": "chr1:155728131-155728159", - "LocusStructure": "(GGGCC)*", + "LocusStructure": "(CCGGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GGGCC", + "DisplayRU": "CCGGG", "Disease": "NME", "NormalMax": 7, "PathologicMin": 200, @@ -75,7 +75,7 @@ "PathologicRegion": "chr2:96703674-96703677", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "AAATG", + "DisplayRU": "TGAAA", "Disease": "FAME2", "NormalMax": 273, "PathologicMin": 274, @@ -84,11 +84,11 @@ { "LocusId": "FRA2A_AFF3", "ReferenceRegion": "chr2:100563685-100563738", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "FRA2A", "NormalMax": 20, "PathologicMin": 300, @@ -110,11 +110,11 @@ { "LocusId": "GDPAG_GLS", "ReferenceRegion": "chr2:191369982-191370024", - "LocusStructure": "(GCA)*", + "LocusStructure": "(CAG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GCA", + "DisplayRU": "CAG", "Disease": "GDPAG", "NormalMax": 38, "PathologicMin": 680, @@ -211,11 +211,11 @@ { "LocusId": "CCHS_PHOX2B", "ReferenceRegion": "chr4:41719745-41719805", - "LocusStructure": "(GCN)*", + "LocusStructure": "(NGC)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCN", + "DisplayRU": "NGC", "Disease": "CCHS", "NormalMax": 20, "PathologicMin": 26, @@ -254,11 +254,11 @@ { "LocusId": "SCA12_PPP2R2B", "ReferenceRegion": "chr5:147414733-147414780", - "LocusStructure": "(GCT)*", + "LocusStructure": "(CTG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCT", + "DisplayRU": "CTG", "Disease": "SCA12", "NormalMax": 32, "PathologicMin": 51, @@ -267,11 +267,11 @@ { "LocusId": "OPDM_FAM193B", "ReferenceRegion": "chr5:178096748-178096792", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "OPDM", "NormalMax": 50, "PathologicMin": 194, @@ -280,11 +280,11 @@ { "LocusId": "OPDM_TBC1D7", "ReferenceRegion": "chr6:13201716-13201843", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "OPDM", "NormalMax": 60, "PathologicMin": 83, @@ -371,11 +371,11 @@ { "LocusId": "FRA7A_ZNF713", "ReferenceRegion": "chr7:56047900-56047939", - "LocusStructure": "(GCG)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCG", + "DisplayRU": "CGG", "Disease": "FRA7A", "NormalMax": 22, "PathologicMin": 450, @@ -384,11 +384,11 @@ { "LocusId": "OPDM1_LRP12", "ReferenceRegion": "chr8:105716409-105716441", - "LocusStructure": "(CGC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "CGC", + "DisplayRU": "CCG", "Disease": "OPDM1", "NormalMax": 45, "PathologicMin": 85, @@ -412,11 +412,11 @@ { "LocusId": "FTDALS1_C9orf72", "ReferenceRegion": "chr9:27584063-27584155", - "LocusStructure": "(GGCCCC)*", + "LocusStructure": "(CCCCGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGCCCC", + "DisplayRU": "CCCCGG", "Disease": "FTDALS1", "NormalMax": 23, "PathologicMin": 31, @@ -431,7 +431,7 @@ "PathologicRegion": "chr9:81210834-81210861", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GAA", + "DisplayRU": "AAG", "Disease": "FRDA", "NormalMax": 33, "PathologicMin": 56, @@ -440,11 +440,11 @@ { "LocusId": "HSAN-VIII_PRDM12", "ReferenceRegion": "chr9:142886568-142886595", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "HSAN VIII", "NormalMax": 14, "PathologicMin": 18, @@ -453,11 +453,11 @@ { "LocusId": "OPML1_NUTM2B-AS1", "ReferenceRegion": "chr10:80695718-80695748", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "OPML1", "NormalMax": 16, "PathologicMin": 161, @@ -492,11 +492,11 @@ { "LocusId": "FRA12A_DIP2B", "ReferenceRegion": "chr12:50468095-50468118", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "FRA12A", "NormalMax": 23, "PathologicMin": 273, @@ -518,11 +518,11 @@ { "LocusId": "OPDM4_RILPL1", "ReferenceRegion": "chr12:123532573-123532603", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "OPDM4", "NormalMax": 16, "PathologicMin": 120, @@ -559,11 +559,11 @@ { "LocusId": "SCA27B_FGF14", "ReferenceRegion": "chr13:101377549-101377792", - "LocusStructure": "(GAA)*", + "LocusStructure": "(AAG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GAA", + "DisplayRU": "AAG", "Disease": "SCA27B", "NormalMax": 179, "PathologicMin": 320, @@ -598,11 +598,11 @@ { "LocusId": "ALS1_NIPA1", "ReferenceRegion": "chr15:20458510-20458536", - "LocusStructure": "(GCG)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCG", + "DisplayRU": "CGG", "Disease": "ALS1", "NormalMax": 10, "PathologicMin": 11, @@ -626,11 +626,11 @@ { "LocusId": "CHNG3_MIR7-2", "ReferenceRegion": "chr15:86324038-86324057", - "LocusStructure": "(TTTG)*", + "LocusStructure": "(GTTT)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TTTG", + "DisplayRU": "GTTT", "Disease": "CHNG3", "NormalMax": 4, "PathologicMin": 5, @@ -645,7 +645,7 @@ "PathologicRegion": "chr15:87088411-87088452", "HGNCId": null, "InheritanceMode": [], - "DisplayRU": "GCT", + "DisplayRU": "CTG", "Disease": "CPEO", "NormalMax": 10, "PathologicMin": 11, @@ -660,7 +660,7 @@ "PathologicRegion": "chr16:17477909-17478002", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "DBQD2, BSS", "NormalMax": 20, "PathologicMin": 72, @@ -684,11 +684,11 @@ { "LocusId": "SCA31_BEAN1", "ReferenceRegion": "chr16:72284666-72284761", - "LocusStructure": "(TGGAA)*", + "LocusStructure": "(AATGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TGGAA", + "DisplayRU": "AATGG", "Disease": "SCA31", "NormalMax": 109, "PathologicMin": 110, @@ -710,11 +710,11 @@ { "LocusId": "SCA4_ZFHX3", "ReferenceRegion": "chr16:78605502-78605569", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "SCA4", "NormalMax": 26, "PathologicMin": 46, @@ -751,11 +751,11 @@ { "LocusId": "RCPS_EIF4A3", "ReferenceRegion": "chr17:81047404-81047534", - "LocusStructure": "(CCTCGCTGTGCCGCTGCCGA)*", + "LocusStructure": "(GCCGCTGCCGACCTCGCTGT)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "CCTCGCTGTGCCGCTGCCGA", + "DisplayRU": "GCCGCTGCCGACCTCGCTGT", "Disease": "RCPS", "NormalMax": 12, "PathologicMin": 14, @@ -790,11 +790,11 @@ { "LocusId": "MRUPAV_PLIN4", "ReferenceRegion": "chr19:4494212-4497342", - "LocusStructure": "(TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC)*", + "LocusStructure": "(GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC", + "DisplayRU": "GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT", "Disease": "MRUPAV", "NormalMax": 31, "PathologicMin": 37, @@ -829,11 +829,11 @@ { "LocusId": "EDM1-PSACH_COMP", "ReferenceRegion": "chr19:18921630-18921645", - "LocusStructure": "(GTC)*", + "LocusStructure": "(CGT)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GTC", + "DisplayRU": "CGT", "Disease": "EDM1, PSACH", "NormalMax": 5, "PathologicMin": 6, @@ -861,7 +861,7 @@ "PathologicRegion": "chr20:2683189-2683230", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGCCTG", + "DisplayRU": "CCTGGG", "Disease": "SCA36", "NormalMax": 14, "PathologicMin": 650, @@ -876,7 +876,7 @@ "PathologicRegion": "chr20:4738633-4738705", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "CCTCATGGTGGTGGCTGGGGGCAG", + "DisplayRU": "AGCCTCATGGTGGTGGCTGGGGGC", "Disease": "CJD", "NormalMax": 4, "PathologicMin": 5, @@ -971,7 +971,7 @@ "PathologicRegion": "chrX:30882677-30882743", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "TTC", + "DisplayRU": "CTT", "Disease": "DMD", "NormalMax": 33, "PathologicMin": 59, @@ -980,11 +980,11 @@ { "LocusId": "SBMA_AR", "ReferenceRegion": "chrX:65975147-65975250", - "LocusStructure": "(GCA)*", + "LocusStructure": "(CAG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "GCA", + "DisplayRU": "CAG", "Disease": "SBMA", "NormalMax": 34, "PathologicMin": 38, @@ -1045,11 +1045,11 @@ { "LocusId": "FRAXE_AFF2", "ReferenceRegion": "chrX:146765190-146765342", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "FRAXE", "NormalMax": 39, "PathologicMin": 201, diff --git a/data/catalogs/STRchive-disease-loci.hg19.TRGT.bed b/data/catalogs/STRchive-disease-loci.hg19.TRGT.bed index 04b2193a..cec298cf 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.TRGT.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.TRGT.bed @@ -1,80 +1,80 @@ -chr1 1371178 1371198 ID=HMNR7_VWA1;MOTIFS=GGCGCGGAGC;STRUC= -chr1 57832715 57832793 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT;STRUC= -chr1 94883977 94884000 ID=OPDM5_ABCD3;MOTIFS=GCC;STRUC= -chr1 145209323 145209354 ID=NIID_NOTCH2NLC;MOTIFS=GGC;STRUC= -chr1 155160981 155162030 ID=ADTKD_MUC1;MOTIFS=GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA;STRUC= -chr1 156561557 156561575 ID=NME_NAXE;MOTIFS=GGGCC;STRUC= -chr2 96862804 96862862 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT;STRUC= -chr2 100721260 100721286 ID=FRA2A_AFF3;MOTIFS=GCC;STRUC= +chr1 1371178 1371198 ID=HMNR7_VWA1;MOTIFS=AGCGGCGCGG,GGCGCGGAGC;STRUC= +chr1 57832715 57832793 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT,TGAAA;STRUC= +chr1 94883977 94884000 ID=OPDM5_ABCD3;MOTIFS=CCG;STRUC= +chr1 145209323 145209354 ID=NIID_NOTCH2NLC;MOTIFS=CGG;STRUC= +chr1 155160981 155162030 ID=ADTKD_MUC1;MOTIFS=ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG;STRUC= +chr1 156561557 156561575 ID=NME_NAXE;MOTIFS=CCGGG,GGGCC;STRUC= +chr2 96862804 96862862 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT,TGAAA;STRUC= +chr2 100721260 100721286 ID=FRA2A_AFF3;MOTIFS=CCG;STRUC= chr2 176957786 176957831 ID=SD5_HOXD13;MOTIFS=GCN;STRUC= -chr2 191745598 191745646 ID=GDPAG_GLS;MOTIFS=GCA;STRUC= +chr2 191745598 191745646 ID=GDPAG_GLS;MOTIFS=CAG;STRUC= chr3 63898360 63898403 ID=SCA7_ATXN7;MOTIFS=CAG,CCG;STRUC= chr3 128891419 128891577 ID=DM2_CNBP;MOTIFS=CAGG,CAGA,CA;STRUC= -chr3 138664861 138664904 ID=BPES_FOXL2;MOTIFS=NGC;STRUC= +chr3 138664861 138664904 ID=BPES_FOXL2;MOTIFS=NGC,GCN;STRUC= chr3 183429975 183430014 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA;STRUC= chr4 3076603 3076696 ID=HD_HTT;MOTIFS=CAG,CCG;STRUC= -chr4 39350044 39350103 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,AGGGC,AAAGGG;STRUC= -chr4 41747989 41748049 ID=CCHS_PHOX2B;MOTIFS=GCN;STRUC= +chr4 39350044 39350103 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,CAGGG,AAAGGG;STRUC= +chr4 41747989 41748049 ID=CCHS_PHOX2B;MOTIFS=NGC,GCN;STRUC= chr4 160263678 160263770 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA;STRUC= chr5 10356455 10356523 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA;STRUC= -chr5 146258290 146258322 ID=SCA12_PPP2R2B;MOTIFS=GCT;STRUC= -chr5 176981490 176981532 ID=OPDM_FAM193B;MOTIFS=GCC;STRUC= -chr6 13328708 13328835 ID=OPDM_TBC1D7;MOTIFS=GCC;STRUC= +chr5 146258290 146258322 ID=SCA12_PPP2R2B;MOTIFS=CTG;STRUC= +chr5 176981490 176981532 ID=OPDM_FAM193B;MOTIFS=CCG;STRUC= +chr6 13328708 13328835 ID=OPDM_TBC1D7;MOTIFS=CCG;STRUC= chr6 16327864 16327955 ID=SCA1_ATXN1;MOTIFS=CTG;STRUC= chr6 45390487 45390538 ID=CCD_RUNX2;MOTIFS=GCN;STRUC= chr6 170870994 170871105 ID=SCA17_TBP;MOTIFS=CAG;STRUC= -chr7 27239297 27239351 ID=HFG_HOXA13-III;MOTIFS=NGC;STRUC= -chr7 27239444 27239480 ID=HFG_HOXA13-II;MOTIFS=NGC;STRUC= -chr7 27239543 27239585 ID=HFG_HOXA13-I;MOTIFS=NGC;STRUC= -chr7 55955293 55955332 ID=FRA7A_ZNF713;MOTIFS=GCG;STRUC= -chr8 105601198 105601227 ID=OPDM1_LRP12;MOTIFS=CGC;STRUC= +chr7 27239297 27239351 ID=HFG_HOXA13-III;MOTIFS=NGC,GCN;STRUC= +chr7 27239444 27239480 ID=HFG_HOXA13-II;MOTIFS=NGC,GCN;STRUC= +chr7 27239543 27239585 ID=HFG_HOXA13-I;MOTIFS=NGC,GCN;STRUC= +chr7 55955293 55955332 ID=FRA7A_ZNF713;MOTIFS=CGG;STRUC= +chr8 105601198 105601227 ID=OPDM1_LRP12;MOTIFS=CCG;STRUC= chr8 119379051 119379157 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA;STRUC= -chr9 27573482 27573544 ID=FTDALS1_C9orf72;MOTIFS=GGCCCC;STRUC= -chr9 71652186 71652220 ID=FRDA_FXN;MOTIFS=A,GAA;STRUC= -chr9 133556992 133557028 ID=HSAN-VIII_PRDM12;MOTIFS=GCC;STRUC= -chr9 135946564 135947124 ID=MODY8_CEL;MOTIFS=GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG;STRUC= -chr10 81586139 81586160 ID=OPML1_NUTM2B-AS1;MOTIFS=GGC;STRUC= +chr9 27573482 27573544 ID=FTDALS1_C9orf72;MOTIFS=CCCCGG,GGCCCC;STRUC= +chr9 71652186 71652220 ID=FRDA_FXN;MOTIFS=A,GAA,AAG;STRUC= +chr9 133556992 133557028 ID=HSAN-VIII_PRDM12;MOTIFS=CCG;STRUC= +chr9 135946564 135947124 ID=MODY8_CEL;MOTIFS=GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG;STRUC= +chr10 81586139 81586160 ID=OPML1_NUTM2B-AS1;MOTIFS=CGG;STRUC= chr11 119076999 119077033 ID=JBS_CBL;MOTIFS=CGG;STRUC= chr12 7045879 7045938 ID=DRPLA_ATN1;MOTIFS=CAG;STRUC= -chr12 50898784 50898807 ID=FRA12A_DIP2B;MOTIFS=GGC;STRUC= +chr12 50898784 50898807 ID=FRA12A_DIP2B;MOTIFS=CGG;STRUC= chr12 112036753 112036823 ID=SCA2_ATXN2;MOTIFS=CTG;STRUC= -chr12 124018267 124018297 ID=OPDM4_RILPL1;MOTIFS=GGC;STRUC= +chr12 124018267 124018297 ID=OPDM4_RILPL1;MOTIFS=CGG;STRUC= chr13 70713485 70713561 ID=SCA8_ATXN8OS;MOTIFS=CTA,CTG;STRUC= chr13 100637702 100637748 ID=HPE5_ZIC2;MOTIFS=GCN;STRUC= -chr13 102813924 102814076 ID=SCA27B_FGF14;MOTIFS=GAA,GGA,GCA;STRUC= +chr13 102813924 102814076 ID=SCA27B_FGF14;MOTIFS=AAG,GAA,AGG,CAG;STRUC= chr14 23790681 23790712 ID=OPMD_PABPN1;MOTIFS=GCN;STRUC= chr14 92537354 92537396 ID=SCA3_ATXN3;MOTIFS=CTG;STRUC= -chr15 23086363 23086389 ID=ALS1_NIPA1;MOTIFS=GCG;STRUC= +chr15 23086363 23086389 ID=ALS1_NIPA1;MOTIFS=CGG;STRUC= chr15 34711626 34711652 ID=aFTLD-U_GOLGA8A;MOTIFS=CT,TTTC,CCTT,CCCTCT;STRUC= -chr15 89112664 89112683 ID=CHNG3_MIR7-2;MOTIFS=TTTG;STRUC= -chr15 89876810 89876860 ID=CPEO_POLG;MOTIFS=GCT,GTT;STRUC= -chr16 17564764 17564779 ID=DBQD2_XYLT1;MOTIFS=GCC;STRUC= +chr15 89112664 89112683 ID=CHNG3_MIR7-2;MOTIFS=GTTT,TTTG;STRUC= +chr15 89876810 89876860 ID=CPEO_POLG;MOTIFS=GCT,GTT,CTG;STRUC= +chr16 17564764 17564779 ID=DBQD2_XYLT1;MOTIFS=GCC,CCG;STRUC= chr16 24624759 24624853 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA;STRUC= -chr16 66524299 66524369 ID=SCA31_BEAN1;MOTIFS=TGGAA,TAGAA,AATAA;STRUC= +chr16 66524299 66524369 ID=SCA31_BEAN1;MOTIFS=AATGG,AATAG,AATAA;STRUC= chr16 67876765 67876853 ID=SCA_THAP11;MOTIFS=CAG;STRUC= -chr16 72821593 72821657 ID=SCA4_ZFHX3;MOTIFS=GCC;STRUC= +chr16 72821593 72821657 ID=SCA4_ZFHX3;MOTIFS=CCG;STRUC= chr16 87637888 87637935 ID=HDL2_JPH3;MOTIFS=CTG;STRUC= -chr17 17711672 17711774 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA;STRUC= -chr17 78120808 78120938 ID=RCPS_EIF4A3;MOTIFS=CCTCGCTGTGCCGCTGCCGA;STRUC= +chr17 17711672 17711774 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA,ATTTT;STRUC= +chr17 78120808 78120938 ID=RCPS_EIF4A3;MOTIFS=GCCGCTGCCGACCTCGCTGT,CCTCGCTGTGCCGCTGCCGA;STRUC= chr18 666891 667632 ID=CPUM_TYMS;MOTIFS=GATGGT;STRUC= chr18 53253384 53253460 ID=FECD3_TCF4;MOTIFS=CAG;STRUC= -chr19 4510739 4513671 ID=MRUPAV_PLIN4;MOTIFS=TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC;STRUC= +chr19 4510739 4513671 ID=MRUPAV_PLIN4;MOTIFS=GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT,TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC;STRUC= chr19 13318672 13318712 ID=SCA6_CACNA1A;MOTIFS=CTG;STRUC= chr19 14606853 14606887 ID=OPDM2_GIPC1;MOTIFS=CCG;STRUC= -chr19 18896844 18896860 ID=EDM1-PSACH_COMP;MOTIFS=GTC;STRUC= +chr19 18896844 18896860 ID=EDM1-PSACH_COMP;MOTIFS=CGT,GTC;STRUC= chr19 46273462 46273524 ID=DM1_DMPK;MOTIFS=CAG;STRUC= -chr20 2633378 2633421 ID=SCA36_NOP56;MOTIFS=GGCCTG,CGCCTG;STRUC= -chr20 4680016 4680139 ID=CJD_PRNP;MOTIFS=CCTCAGGGCGGTGGTGGCTGGGGGCAG,CCTCATGGTGGTGGCTGGGGGCAG,GGTGGTGGCTGGGGGCAGCCTCAT;STRUC= +chr20 2633378 2633421 ID=SCA36_NOP56;MOTIFS=GGCCTG,CGCCTG,CCTGGG;STRUC= +chr20 4680016 4680139 ID=CJD_PRNP;MOTIFS=CCTCAGGGCGGTGGTGGCTGGGGGCAG,CCTCATGGTGGTGGCTGGGGGCAG,AGCCTCATGGTGGTGGCTGGGGGC,GGTGGTGGCTGGGGGCAGCCTCAT;STRUC= chr21 45196323 45196360 ID=EPM1_CSTB;MOTIFS=CGCGGGGCGGGG;STRUC= chr22 19754285 19754330 ID=TOF_TBX1;MOTIFS=GCN;STRUC= chr22 38713287 38713380 ID=EPM_CSNK1E;MOTIFS=CCG;STRUC= chr22 46191234 46191304 ID=SCA10_ATXN10;MOTIFS=ATTCT;STRUC= -chrX 25031646 25031682 ID=PRTS_ARX;MOTIFS=NGC;STRUC= -chrX 25031766 25031814 ID=EIEE1_ARX;MOTIFS=NGC;STRUC= -chrX 31302674 31302730 ID=DMD_DMD;MOTIFS=TTC,T;STRUC= -chrX 66765158 66765261 ID=SBMA_AR;MOTIFS=GCA;STRUC= +chrX 25031646 25031682 ID=PRTS_ARX;MOTIFS=NGC,GCN;STRUC= +chrX 25031766 25031814 ID=EIEE1_ARX;MOTIFS=NGC,GCN;STRUC= +chrX 31302674 31302730 ID=DMD_DMD;MOTIFS=TTC,T,CTT;STRUC= +chrX 66765158 66765261 ID=SBMA_AR;MOTIFS=CAG;STRUC= chrX 70672904 70672981 ID=XDP_TAF1;MOTIFS=AGAGGG;STRUC= chrX 136648985 136649015 ID=VACTERLX_ZIC3;MOTIFS=GCN;STRUC= -chrX 139586481 139586526 ID=XLID_SOX3;MOTIFS=NGC;STRUC= +chrX 139586481 139586526 ID=XLID_SOX3;MOTIFS=NGC,GCN;STRUC= chrX 146993567 146993629 ID=FXS_FMR1;MOTIFS=CGG;STRUC= -chrX 147582124 147582273 ID=FRAXE_AFF2;MOTIFS=GCC;STRUC= +chrX 147582124 147582273 ID=FRAXE_AFF2;MOTIFS=CCG;STRUC= diff --git a/data/catalogs/STRchive-disease-loci.hg19.atarva.bed b/data/catalogs/STRchive-disease-loci.hg19.atarva.bed index f6abb39f..26e5c179 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.atarva.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.atarva.bed @@ -1,14 +1,14 @@ #chrom start stop motif motif_len id -chr1 1371178 1371198 GGCGCGGAGC 10 HMNR7_VWA1 +chr1 1371178 1371198 AGCGGCGCGG 10 HMNR7_VWA1 chr1 57832750 57832793 GAAAT 5 SCA37_DAB1 -chr1 94883977 94884000 GCC 3 OPDM5_ABCD3 -chr1 145209323 145209354 GGC 3 NIID_NOTCH2NLC -chr1 155160981 155162030 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA 61 ADTKD_MUC1 -chr1 156561557 156561575 GGGCC 5 NME_NAXE +chr1 94883977 94884000 CCG 3 OPDM5_ABCD3 +chr1 145209323 145209354 CGG 3 NIID_NOTCH2NLC +chr1 155160981 155162030 ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG 61 ADTKD_MUC1 +chr1 156561557 156561575 CCGGG 5 NME_NAXE chr2 96862804 96862807 AAATG 5 FAME2_STARD7 -chr2 100721260 100721286 GCC 3 FRA2A_AFF3 +chr2 100721260 100721286 CCG 3 FRA2A_AFF3 chr2 176957786 176957831 GCN 3 SD5_HOXD13 -chr2 191745598 191745646 GCA 3 GDPAG_GLS +chr2 191745598 191745646 CAG 3 GDPAG_GLS chr3 63898360 63898391 CAG 3 SCA7_ATXN7 chr3 63898391 63898403 CCG 3 SCA7_ATXN7_flank chr3 128891419 128891499 CAGG 4 DM2_CNBP @@ -19,58 +19,58 @@ chr3 183430010 183430014 TTTCA 5 FAME4_YEATS2 chr4 3076603 3076654 CAG 3 HD_HTT chr4 3076660 3076696 CCG 3 HD_HTT_flank chr4 39350099 39350103 AAGGG 5 CANVAS_RFC1 -chr4 41747989 41748049 GCN 3 CCHS_PHOX2B +chr4 41747989 41748049 NGC 3 CCHS_PHOX2B chr4 160263763 160263770 TTTCA 5 FAME7_RAPGEF2 chr5 10356515 10356523 TTTCA 5 FAME3_MARCHF6 -chr5 146258290 146258322 GCT 3 SCA12_PPP2R2B -chr5 176981490 176981532 GCC 3 OPDM_FAM193B -chr6 13328708 13328835 GCC 3 OPDM_TBC1D7 +chr5 146258290 146258322 CTG 3 SCA12_PPP2R2B +chr5 176981490 176981532 CCG 3 OPDM_FAM193B +chr6 13328708 13328835 CCG 3 OPDM_TBC1D7 chr6 16327864 16327955 CTG 3 SCA1_ATXN1 chr6 45390487 45390538 GCN 3 CCD_RUNX2 chr6 170870994 170871105 CAG 3 SCA17_TBP chr7 27239297 27239351 NGC 3 HFG_HOXA13-III chr7 27239444 27239480 NGC 3 HFG_HOXA13-II chr7 27239543 27239585 NGC 3 HFG_HOXA13-I -chr7 55955293 55955332 GCG 3 FRA7A_ZNF713 -chr8 105601198 105601227 CGC 3 OPDM1_LRP12 +chr7 55955293 55955332 CGG 3 FRA7A_ZNF713 +chr8 105601198 105601227 CCG 3 OPDM1_LRP12 chr8 119379151 119379157 TGAAA 5 FAME1_SAMD12 -chr9 27573482 27573544 GGCCCC 6 FTDALS1_C9orf72 +chr9 27573482 27573544 CCCCGG 6 FTDALS1_C9orf72 chr9 71652186 71652202 A 1 FRDA_FXN_flank chr9 71652202 71652220 GAA 3 FRDA_FXN -chr9 133556992 133557028 GCC 3 HSAN-VIII_PRDM12 +chr9 133556992 133557028 CCG 3 HSAN-VIII_PRDM12 chr9 135946564 135947124 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG 32 MODY8_CEL -chr10 81586139 81586160 GGC 3 OPML1_NUTM2B-AS1 +chr10 81586139 81586160 CGG 3 OPML1_NUTM2B-AS1 chr11 119076999 119077033 CGG 3 JBS_CBL chr12 7045879 7045938 CAG 3 DRPLA_ATN1 -chr12 50898784 50898807 GGC 3 FRA12A_DIP2B +chr12 50898784 50898807 CGG 3 FRA12A_DIP2B chr12 112036753 112036823 CTG 3 SCA2_ATXN2 -chr12 124018267 124018297 GGC 3 OPDM4_RILPL1 +chr12 124018267 124018297 CGG 3 OPDM4_RILPL1 chr13 70713485 70713515 CTA 3 SCA8_ATXN8OS_flank chr13 70713515 70713561 CTG 3 SCA8_ATXN8OS chr13 100637702 100637748 GCN 3 HPE5_ZIC2 -chr13 102813924 102814076 GAA 3 SCA27B_FGF14 +chr13 102813924 102814076 AAG 3 SCA27B_FGF14 chr14 23790681 23790712 GCN 3 OPMD_PABPN1 chr14 92537354 92537396 CTG 3 SCA3_ATXN3 -chr15 23086363 23086389 GCG 3 ALS1_NIPA1 +chr15 23086363 23086389 CGG 3 ALS1_NIPA1 chr15 34711626 34711652 CT 2 aFTLD-U_GOLGA8A -chr15 89112664 89112683 TTTG 4 CHNG3_MIR7-2 +chr15 89112664 89112683 GTTT 4 CHNG3_MIR7-2 chr15 89876810 89876816 GCT 3 CPEO_POLG_flank chr15 89876816 89876819 GTT 3 CPEO_POLG_flank chr15 89876819 89876860 GCT 3 CPEO_POLG chr16 17564764 17564779 GCC 3 DBQD2_XYLT1 chr16 24624809 24624853 TTTCA 5 FAME6_TNRC6A -chr16 66524299 66524369 TGGAA 5 SCA31_BEAN1 +chr16 66524299 66524369 AATGG 5 SCA31_BEAN1 chr16 67876765 67876853 CAG 3 SCA_THAP11 -chr16 72821593 72821657 GCC 3 SCA4_ZFHX3 +chr16 72821593 72821657 CCG 3 SCA4_ZFHX3 chr16 87637888 87637935 CTG 3 HDL2_JPH3 chr17 17711762 17711774 TTTCA 5 FAME8_RAI1 -chr17 78120808 78120938 CCTCGCTGTGCCGCTGCCGA 20 RCPS_EIF4A3 +chr17 78120808 78120938 GCCGCTGCCGACCTCGCTGT 20 RCPS_EIF4A3 chr18 666891 667632 GATGGT 6 CPUM_TYMS chr18 53253384 53253460 CAG 3 FECD3_TCF4 -chr19 4510739 4513671 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC 99 MRUPAV_PLIN4 +chr19 4510739 4513671 GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT 99 MRUPAV_PLIN4 chr19 13318672 13318712 CTG 3 SCA6_CACNA1A chr19 14606853 14606887 CCG 3 OPDM2_GIPC1 -chr19 18896844 18896860 GTC 3 EDM1-PSACH_COMP +chr19 18896844 18896860 CGT 3 EDM1-PSACH_COMP chr19 46273462 46273524 CAG 3 DM1_DMPK chr20 2633378 2633403 GGCCTG 6 SCA36_NOP56 chr20 2633403 2633421 CGCCTG 6 SCA36_NOP56_flank @@ -84,9 +84,9 @@ chrX 25031646 25031682 NGC 3 PRTS_ARX chrX 25031766 25031814 NGC 3 EIEE1_ARX chrX 31302674 31302722 TTC 3 DMD_DMD chrX 31302722 31302730 T 1 DMD_DMD_flank -chrX 66765158 66765261 GCA 3 SBMA_AR +chrX 66765158 66765261 CAG 3 SBMA_AR chrX 70672904 70672981 AGAGGG 6 XDP_TAF1 chrX 136648985 136649015 GCN 3 VACTERLX_ZIC3 chrX 139586481 139586526 NGC 3 XLID_SOX3 chrX 146993567 146993629 CGG 3 FXS_FMR1 -chrX 147582124 147582273 GCC 3 FRAXE_AFF2 +chrX 147582124 147582273 CCG 3 FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.hg19.atarva.bed.gz b/data/catalogs/STRchive-disease-loci.hg19.atarva.bed.gz index eb912884..a98b1bf2 100644 Binary files a/data/catalogs/STRchive-disease-loci.hg19.atarva.bed.gz and b/data/catalogs/STRchive-disease-loci.hg19.atarva.bed.gz differ diff --git a/data/catalogs/STRchive-disease-loci.hg19.atarva.bed.gz.tbi b/data/catalogs/STRchive-disease-loci.hg19.atarva.bed.gz.tbi index 20fb52cd..c3f4aa33 100644 Binary files a/data/catalogs/STRchive-disease-loci.hg19.atarva.bed.gz.tbi and b/data/catalogs/STRchive-disease-loci.hg19.atarva.bed.gz.tbi differ diff --git a/data/catalogs/STRchive-disease-loci.hg19.general.bed b/data/catalogs/STRchive-disease-loci.hg19.general.bed index 97bbd950..aaa22850 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.general.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.general.bed @@ -1,81 +1,81 @@ #chrom start stop id gene reference_motif_reference_orientation pathogenic_motif_reference_orientation pathogenic_min inheritance disease -chr1 1371178 1371198 HMNR7_VWA1 VWA1 GGCGCGGAGC GGCGCGGAGC 1 AR Neuronopathy, distal hereditary motor, autosomal recessive 7 -chr1 57832715 57832793 SCA37_DAB1 DAB1 AAAAT GAAAT 31 AD Spinocerebellar ataxia type 37 -chr1 94883977 94884000 OPDM5_ABCD3 ABCD3 GCC GCC 118 AD Oculopharyngodistal myopathy type 5 -chr1 145209323 145209354 NIID_NOTCH2NLC NOTCH2NLC GGC GGC 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 -chr1 155160981 155162030 ADTKD_MUC1 MUC1 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA None AD Autosomal dominant tubulointerstitial kidney disease -chr1 156561557 156561575 NME_NAXE NAXE GGGCC GGGCC 200 AR NAXE-related mitochondrial encephalopathy -chr2 96862804 96862862 FAME2_STARD7 STARD7 AAAAT AAATG 274 AD Familial adult myoclonic epilepsy 2 -chr2 100721260 100721286 FRA2A_AFF3 AFF3 GCC GCC 300 AD Intellectual disability associated with fragile site FRA2A +chr1 1371178 1371198 HMNR7_VWA1 VWA1 GGCGCGGAGC AGCGGCGCGG 1 AR Neuronopathy, distal hereditary motor, autosomal recessive 7 +chr1 57832715 57832793 SCA37_DAB1 DAB1 AAAAT TGAAA 31 AD Spinocerebellar ataxia type 37 +chr1 94883977 94884000 OPDM5_ABCD3 ABCD3 CCG CCG 118 AD Oculopharyngodistal myopathy type 5 +chr1 145209323 145209354 NIID_NOTCH2NLC NOTCH2NLC CGG CGG 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 +chr1 155160981 155162030 ADTKD_MUC1 MUC1 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG None AD Autosomal dominant tubulointerstitial kidney disease +chr1 156561557 156561575 NME_NAXE NAXE GGGCC CCGGG 200 AR NAXE-related mitochondrial encephalopathy +chr2 96862804 96862862 FAME2_STARD7 STARD7 AAAAT TGAAA 274 AD Familial adult myoclonic epilepsy 2 +chr2 100721260 100721286 FRA2A_AFF3 AFF3 CCG CCG 300 AD Intellectual disability associated with fragile site FRA2A chr2 176957786 176957831 SD5_HOXD13 HOXD13 GCN GCN 22 AD Syndactyly -chr2 191745598 191745646 GDPAG_GLS GLS GCA GCA 680 AR Glutaminase deficiency +chr2 191745598 191745646 GDPAG_GLS GLS CAG CAG 680 AR Glutaminase deficiency chr3 63898360 63898391 SCA7_ATXN7 ATXN7 CAG CAG 37 AD Spinocerebellar ataxia type 7 chr3 128891419 128891499 DM2_CNBP CNBP CAGG CAGG 75 AD Myotonic dystrophy type 2 -chr3 138664861 138664904 BPES_FOXL2 FOXL2 NGC NGC 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis +chr3 138664861 138664904 BPES_FOXL2 FOXL2 GCN NGC 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis chr3 183429975 183430014 FAME4_YEATS2 YEATS2 TTTTA TTTCA 1000 AD Familial adult myoclonic epilepsy 4 chr4 3076603 3076660 HD_HTT HTT CAG CAG 36 AD Huntington disease -chr4 39350044 39350103 CANVAS_RFC1 RFC1 AAAAG AAGGG,ACAGG,AAAGG,AGGGC 400 AR Cerebellar ataxia, neuropathy, and vestibular areflexia syndrome -chr4 41747989 41748049 CCHS_PHOX2B PHOX2B GCN GCN 26 AD Congenital central hypoventilation syndrome +chr4 39350044 39350103 CANVAS_RFC1 RFC1 AAAAG AAGGG,ACAGG,AAAGG,CAGGG 400 AR Cerebellar ataxia, neuropathy, and vestibular areflexia syndrome +chr4 41747989 41748049 CCHS_PHOX2B PHOX2B GCN NGC 26 AD Congenital central hypoventilation syndrome chr4 160263678 160263770 FAME7_RAPGEF2 RAPGEF2 TTTTA TTTCA 60 AD Familial adult myoclonic epilepsy type 7 chr5 10356455 10356523 FAME3_MARCHF6 MARCHF6 TTTTA TTTCA 650 AD Familial adult myoclonic epilepsy type 3 -chr5 146258290 146258322 SCA12_PPP2R2B PPP2R2B GCT GCT 51 AD Spinocerebellar ataxia type 12 -chr5 176981490 176981532 OPDM_FAM193B FAM193B GCC GCC 194 AD Oculopharyngodistal myopathy -chr6 13328708 13328835 OPDM_TBC1D7 TBC1D7 GCC GCC 83 AD Oculopharyngodistal myopathy +chr5 146258290 146258322 SCA12_PPP2R2B PPP2R2B CTG CTG 51 AD Spinocerebellar ataxia type 12 +chr5 176981490 176981532 OPDM_FAM193B FAM193B CCG CCG 194 AD Oculopharyngodistal myopathy +chr6 13328708 13328835 OPDM_TBC1D7 TBC1D7 CCG CCG 83 AD Oculopharyngodistal myopathy chr6 16327864 16327955 SCA1_ATXN1 ATXN1 CTG CTG 39 AD Spinocerebellar ataxia type 1 chr6 45390487 45390538 CCD_RUNX2 RUNX2 GCN GCN 20 AD Cleidocranial dysplasia chr6 170870994 170871105 SCA17_TBP TBP CAG CAG 49 AD Spinocerebellar ataxia type 17 -chr7 27239297 27239351 HFG_HOXA13-III HOXA13 NGC NGC 22 AD Hand-foot-genital syndrome 3 -chr7 27239444 27239480 HFG_HOXA13-II HOXA13 NGC NGC 18 AD Hand-foot-genital syndrome 2 -chr7 27239543 27239585 HFG_HOXA13-I HOXA13 NGC NGC 22 AD Hand-foot-genital syndrome 1 -chr7 55955293 55955332 FRA7A_ZNF713 ZNF713 GCG GCG 450 AD Autism spectrum disorder associated with fragile site FRA7A -chr8 105601198 105601227 OPDM1_LRP12 LRP12 CGC CGC 85 AD Oculopharyngodistal myopathy type 1 +chr7 27239297 27239351 HFG_HOXA13-III HOXA13 GCN NGC 22 AD Hand-foot-genital syndrome 3 +chr7 27239444 27239480 HFG_HOXA13-II HOXA13 GCN NGC 18 AD Hand-foot-genital syndrome 2 +chr7 27239543 27239585 HFG_HOXA13-I HOXA13 GCN NGC 22 AD Hand-foot-genital syndrome 1 +chr7 55955293 55955332 FRA7A_ZNF713 ZNF713 CGG CGG 450 AD Autism spectrum disorder associated with fragile site FRA7A +chr8 105601198 105601227 OPDM1_LRP12 LRP12 CCG CCG 85 AD Oculopharyngodistal myopathy type 1 chr8 119379051 119379157 FAME1_SAMD12 SAMD12 TAAAA TGAAA 105 AD Familial adult myoclonic epilepsy type 1 -chr9 27573482 27573544 FTDALS1_C9orf72 C9orf72 GGCCCC GGCCCC 31 AD Frontotemporal dementia (FTD) and/or amyotrophic lateral sclerosis (ALS) -chr9 71652202 71652220 FRDA_FXN FXN GAA GAA 56 AR Friedreich ataxia -chr9 133556992 133557028 HSAN-VIII_PRDM12 PRDM12 GCC GCC 18 AR Hereditary sensory and autonomic neuropathy type VIII -chr9 135946564 135947124 MODY8_CEL CEL GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG None AD Maturity-Onset Diabetes of the Young Type 8 -chr10 81586139 81586160 OPML1_NUTM2B-AS1 NUTM2B-AS1 GGC GGC 161 AD Oculopharyngeal myopathy with leukoencephalopathy 1 +chr9 27573482 27573544 FTDALS1_C9orf72 C9orf72 GGCCCC CCCCGG 31 AD Frontotemporal dementia (FTD) and/or amyotrophic lateral sclerosis (ALS) +chr9 71652202 71652220 FRDA_FXN FXN GAA AAG 56 AR Friedreich ataxia +chr9 133556992 133557028 HSAN-VIII_PRDM12 PRDM12 CCG CCG 18 AR Hereditary sensory and autonomic neuropathy type VIII +chr9 135946564 135947124 MODY8_CEL CEL GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC None AD Maturity-Onset Diabetes of the Young Type 8 +chr10 81586139 81586160 OPML1_NUTM2B-AS1 NUTM2B-AS1 CGG CGG 161 AD Oculopharyngeal myopathy with leukoencephalopathy 1 chr11 119076999 119077033 JBS_CBL CBL CGG CGG 101 AD Jacobsen syndrome (FRAX11B fragile site) chr12 7045879 7045938 DRPLA_ATN1 ATN1 CAG CAG 48 AD Dentatorubral-Pallidoluysian Atrophy -chr12 50898784 50898807 FRA12A_DIP2B DIP2B GGC GGC 273 AD Intellectual developmental disorder, FRA12A type +chr12 50898784 50898807 FRA12A_DIP2B DIP2B CGG CGG 273 AD Intellectual developmental disorder, FRA12A type chr12 112036753 112036823 SCA2_ATXN2 ATXN2 CTG CTG 35 AD,AR Spinocerebellar ataxia type 2 -chr12 124018267 124018297 OPDM4_RILPL1 RILPL1 GGC GGC 120 AD Oculopharyngodistal myopathy type 4 +chr12 124018267 124018297 OPDM4_RILPL1 RILPL1 CGG CGG 120 AD Oculopharyngodistal myopathy type 4 chr13 70713515 70713561 SCA8_ATXN8OS ATXN8OS CTG CTG 71 AD Spinocerebellar ataxia type 8 chr13 100637702 100637748 HPE5_ZIC2 ZIC2 GCN GCN 25 AD Holoprosencephaly-5 -chr13 102813924 102814076 SCA27B_FGF14 FGF14 GAA GAA 320 AD Spinocerebellar ataxia 27B +chr13 102813924 102814076 SCA27B_FGF14 FGF14 GAA AAG 320 AD Spinocerebellar ataxia 27B chr14 23790681 23790712 OPMD_PABPN1 PABPN1 GCN GCN 12 AD,AR Oculopharyngeal muscular dystrophy chr14 92537354 92537396 SCA3_ATXN3 ATXN3 CTG CTG 60 AD Spinocerebellar ataxia type 3/Machado-Joseph disease -chr15 23086363 23086389 ALS1_NIPA1 NIPA1 GCG GCG 11 AD Amyotrophic lateral sclerosis +chr15 23086363 23086389 ALS1_NIPA1 NIPA1 CGG CGG 11 AD Amyotrophic lateral sclerosis chr15 34711626 34711652 aFTLD-U_GOLGA8A GOLGA8A TTTC CT 190 Atypical frontotemporal lobar degeneration with ubiquitinated inclusions (aFTLD-U) -chr15 89112664 89112683 CHNG3_MIR7-2 MIR7-2 TTTG TTTG 3 AD Nongoitrous congenital hypothyroidism-3 -chr15 89876819 89876860 CPEO_POLG POLG GCT GCT None Progressive external ophthalmoplegia, Parkinson's disease -chr16 17564764 17564779 DBQD2_XYLT1 XYLT1 GCC GCC 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 +chr15 89112664 89112683 CHNG3_MIR7-2 MIR7-2 TTTG GTTT 3 AD Nongoitrous congenital hypothyroidism-3 +chr15 89876819 89876860 CPEO_POLG POLG CTG CTG None Progressive external ophthalmoplegia, Parkinson's disease +chr16 17564764 17564779 DBQD2_XYLT1 XYLT1 CCG CCG 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 chr16 24624759 24624853 FAME6_TNRC6A TNRC6A TTTTA TTTCA 1100 AD Familial adult myoclonic epilepsy type 6 -chr16 66524299 66524369 SCA31_BEAN1 BEAN1 AATAA TGGAA,TAGAA 110 AD Spinocerebellar ataxia type 31 +chr16 66524299 66524369 SCA31_BEAN1 BEAN1 AATAA AATGG,AATAG 110 AD Spinocerebellar ataxia type 31 chr16 67876765 67876853 SCA_THAP11 THAP11 CAG CAG 45 AD Spinocerebellar ataxia 51 -chr16 72821593 72821657 SCA4_ZFHX3 ZFHX3 GCC GCC 46 AD Spinocerebellar ataxia 4 +chr16 72821593 72821657 SCA4_ZFHX3 ZFHX3 CCG CCG 46 AD Spinocerebellar ataxia 4 chr16 87637888 87637935 HDL2_JPH3 JPH3 CTG CTG 40 AD Huntington disease-like 2 chr17 17711672 17711774 FAME8_RAI1 RAI1 TTTTA TTTCA 9 AD Familial adult myoclonic epilepsy type 8 -chr17 78120808 78120938 RCPS_EIF4A3 EIF4A3 CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA 14 AR Richieri-Costa-Pereira syndrome +chr17 78120808 78120938 RCPS_EIF4A3 EIF4A3 CCTCGCTGTGCCGCTGCCGA GCCGCTGCCGACCTCGCTGT 14 AR Richieri-Costa-Pereira syndrome chr18 666891 667632 CPUM_TYMS TYMS GATGGT GATGGT 210 AR Congenital Progressive Universal Melanosis chr18 53253384 53253460 FECD3_TCF4 TCF4 CAG CAG 51 AD Fuchs endothelial corneal dystrophy 3 -chr19 4510739 4513671 MRUPAV_PLIN4 PLIN4 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC 37 AD Myopathy with Rimmed Ubiquitin-Positive Autophagic Vacuolation, PLIN4-Related Myopathy +chr19 4510739 4513671 MRUPAV_PLIN4 PLIN4 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT 37 AD Myopathy with Rimmed Ubiquitin-Positive Autophagic Vacuolation, PLIN4-Related Myopathy chr19 13318672 13318712 SCA6_CACNA1A CACNA1A CTG CTG 21 AD Spinocerebellar ataxia type 6 chr19 14606853 14606887 OPDM2_GIPC1 GIPC1 CCG CCG 73 AD Oculopharyngodistal myopathy type 2 -chr19 18896844 18896860 EDM1-PSACH_COMP COMP GTC GTC 6 AD Multiple epiphyseal dysplasia, Pseudoachondroplasia +chr19 18896844 18896860 EDM1-PSACH_COMP COMP GTC CGT 6 AD Multiple epiphyseal dysplasia, Pseudoachondroplasia chr19 46273462 46273524 DM1_DMPK DMPK CAG CAG 50 AD Myotonic dystrophy type 1 -chr20 2633378 2633403 SCA36_NOP56 NOP56 GGCCTG GGCCTG 650 AD Spinocerebellar ataxia type 36 -chr20 4680043 4680139 CJD_PRNP PRNP GGTGGTGGCTGGGGGCAGCCTCAT CCTCATGGTGGTGGCTGGGGGCAG 5 AD Creutzfeldt-Jakob disease and Gerstmann-Straussler-Schneiker syndrome +chr20 2633378 2633403 SCA36_NOP56 NOP56 GGCCTG CCTGGG 650 AD Spinocerebellar ataxia type 36 +chr20 4680043 4680139 CJD_PRNP PRNP GGTGGTGGCTGGGGGCAGCCTCAT AGCCTCATGGTGGTGGCTGGGGGC 5 AD Creutzfeldt-Jakob disease and Gerstmann-Straussler-Schneiker syndrome chr21 45196323 45196360 EPM1_CSTB CSTB CGCGGGGCGGGG CGCGGGGCGGGG 30 AR Progressive Myoclonic Epilepsy Type 1 (EPM1), a.k.a Unverricht-Lundborg Disease (ULD) chr22 19754285 19754330 TOF_TBX1 TBX1 GCN GCN 25 AD Tetralogy of Fallot chr22 38713287 38713380 EPM_CSNK1E CSNK1E CCG CCG 745 AR Progressive Myoclonic Epilepsy and Developmental and Epileptic Encephalopathy chr22 46191234 46191304 SCA10_ATXN10 ATXN10 ATTCT ATTCT 800 AD Spinocerebellar ataxia type 10 -chrX 25031646 25031682 PRTS_ARX ARX NGC NGC 20 XR Partington syndrome -chrX 25031766 25031814 EIEE1_ARX ARX NGC NGC 17 XR Early-infantile epileptic encephalopathy -chrX 31302674 31302722 DMD_DMD DMD TTC TTC 59 XR Duchenne muscular dystrophy -chrX 66765158 66765261 SBMA_AR AR GCA GCA 38 XR Spinal and bulbar muscular atrophy, Kennedy Disease +chrX 25031646 25031682 PRTS_ARX ARX GCN NGC 20 XR Partington syndrome +chrX 25031766 25031814 EIEE1_ARX ARX GCN NGC 17 XR Early-infantile epileptic encephalopathy +chrX 31302674 31302722 DMD_DMD DMD TTC CTT 59 XR Duchenne muscular dystrophy +chrX 66765158 66765261 SBMA_AR AR CAG CAG 38 XR Spinal and bulbar muscular atrophy, Kennedy Disease chrX 70672904 70672981 XDP_TAF1 TAF1 AGAGGG AGAGGG 35 XR X-linked dystonia-parkinsonism (XDP) a.k.a. Dystonia 3, torsion, X-linked (DYT3) chrX 136648985 136649015 VACTERLX_ZIC3 ZIC3 GCN GCN 12 XR X-linked VACTERL syndrome -chrX 139586481 139586526 XLID_SOX3 SOX3 NGC NGC 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) +chrX 139586481 139586526 XLID_SOX3 SOX3 GCN NGC 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) chrX 146993567 146993629 FXS_FMR1 FMR1 CGG CGG 201 XD Fragile X syndrome (FXS), fragile X-associated tremor/ataxia syndrome (FXTAS), and fragile X-associated primary ovarian insufficiency FXPOI/POF1 -chrX 147582124 147582273 FRAXE_AFF2 AFF2 GCC GCC 201 XR Intellectual developmental disorder, Fragile X intellectual disability +chrX 147582124 147582273 FRAXE_AFF2 AFF2 CCG CCG 201 XR Intellectual developmental disorder, Fragile X intellectual disability diff --git a/data/catalogs/STRchive-disease-loci.hg19.longTR.bed b/data/catalogs/STRchive-disease-loci.hg19.longTR.bed index 67839406..4d1c9eea 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.longTR.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.longTR.bed @@ -1,80 +1,80 @@ -chr1 1371179 1371198 GGCGCGGAGC HMNR7_VWA1 -chr1 57832716 57832793 GAAAT,AAAAT SCA37_DAB1 -chr1 94883978 94884000 GCC OPDM5_ABCD3 -chr1 145209324 145209354 GGC NIID_NOTCH2NLC -chr1 155160982 155162030 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ADTKD_MUC1 -chr1 156561558 156561575 GGGCC NME_NAXE -chr2 96862805 96862862 AAATG,AAAAT FAME2_STARD7 -chr2 100721261 100721286 GCC FRA2A_AFF3 +chr1 1371179 1371198 AGCGGCGCGG,GGCGCGGAGC HMNR7_VWA1 +chr1 57832716 57832793 TGAAA,AAAAT SCA37_DAB1 +chr1 94883978 94884000 CCG OPDM5_ABCD3 +chr1 145209324 145209354 CGG NIID_NOTCH2NLC +chr1 155160982 155162030 ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ADTKD_MUC1 +chr1 156561558 156561575 CCGGG,GGGCC NME_NAXE +chr2 96862805 96862862 TGAAA,AAAAT FAME2_STARD7 +chr2 100721261 100721286 CCG FRA2A_AFF3 chr2 176957787 176957831 GCN SD5_HOXD13 -chr2 191745599 191745646 GCA GDPAG_GLS +chr2 191745599 191745646 CAG GDPAG_GLS chr3 63898361 63898391 CAG SCA7_ATXN7 chr3 128891420 128891499 CAGG DM2_CNBP -chr3 138664862 138664904 NGC BPES_FOXL2 +chr3 138664862 138664904 NGC,GCN BPES_FOXL2 chr3 183429976 183430014 TTTCA,TTTTA FAME4_YEATS2 chr4 3076604 3076660 CAG HD_HTT -chr4 39350045 39350103 AAGGG,ACAGG,AAAGG,AGGGC,AAAAG,AAAGGG CANVAS_RFC1 -chr4 41747990 41748049 GCN CCHS_PHOX2B +chr4 39350045 39350103 AAGGG,ACAGG,AAAGG,CAGGG,AAAAG,AAAGGG CANVAS_RFC1 +chr4 41747990 41748049 NGC,GCN CCHS_PHOX2B chr4 160263679 160263770 TTTCA,TTTTA FAME7_RAPGEF2 chr5 10356456 10356523 TTTCA,TTTTA FAME3_MARCHF6 -chr5 146258291 146258322 GCT SCA12_PPP2R2B -chr5 176981491 176981532 GCC OPDM_FAM193B -chr6 13328709 13328835 GCC OPDM_TBC1D7 +chr5 146258291 146258322 CTG SCA12_PPP2R2B +chr5 176981491 176981532 CCG OPDM_FAM193B +chr6 13328709 13328835 CCG OPDM_TBC1D7 chr6 16327865 16327955 CTG SCA1_ATXN1 chr6 45390488 45390538 GCN CCD_RUNX2 chr6 170870995 170871105 CAG SCA17_TBP -chr7 27239298 27239351 NGC HFG_HOXA13-III -chr7 27239445 27239480 NGC HFG_HOXA13-II -chr7 27239544 27239585 NGC HFG_HOXA13-I -chr7 55955294 55955332 GCG FRA7A_ZNF713 -chr8 105601199 105601227 CGC OPDM1_LRP12 +chr7 27239298 27239351 NGC,GCN HFG_HOXA13-III +chr7 27239445 27239480 NGC,GCN HFG_HOXA13-II +chr7 27239544 27239585 NGC,GCN HFG_HOXA13-I +chr7 55955294 55955332 CGG FRA7A_ZNF713 +chr8 105601199 105601227 CCG OPDM1_LRP12 chr8 119379052 119379157 TGAAA,TAAAA FAME1_SAMD12 -chr9 27573483 27573544 GGCCCC FTDALS1_C9orf72 -chr9 71652203 71652220 GAA FRDA_FXN -chr9 133556993 133557028 GCC HSAN-VIII_PRDM12 -chr9 135946565 135947124 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG MODY8_CEL -chr10 81586140 81586160 GGC OPML1_NUTM2B-AS1 +chr9 27573483 27573544 CCCCGG,GGCCCC FTDALS1_C9orf72 +chr9 71652203 71652220 AAG,GAA FRDA_FXN +chr9 133556993 133557028 CCG HSAN-VIII_PRDM12 +chr9 135946565 135947124 ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG MODY8_CEL +chr10 81586140 81586160 CGG OPML1_NUTM2B-AS1 chr11 119077000 119077033 CGG JBS_CBL chr12 7045880 7045938 CAG DRPLA_ATN1 -chr12 50898785 50898807 GGC FRA12A_DIP2B +chr12 50898785 50898807 CGG FRA12A_DIP2B chr12 112036754 112036823 CTG SCA2_ATXN2 -chr12 124018268 124018297 GGC OPDM4_RILPL1 +chr12 124018268 124018297 CGG OPDM4_RILPL1 chr13 70713516 70713561 CTG SCA8_ATXN8OS chr13 100637703 100637748 GCN HPE5_ZIC2 -chr13 102813925 102814076 GAA,GGA,GCA SCA27B_FGF14 +chr13 102813925 102814076 AAG,AGG,CAG,GAA SCA27B_FGF14 chr14 23790682 23790712 GCN OPMD_PABPN1 chr14 92537355 92537396 CTG SCA3_ATXN3 -chr15 23086364 23086389 GCG ALS1_NIPA1 +chr15 23086364 23086389 CGG ALS1_NIPA1 chr15 34711627 34711652 CT,CCTT,CCCTCT,TTTC aFTLD-U_GOLGA8A -chr15 89112665 89112683 TTTG CHNG3_MIR7-2 -chr15 89876820 89876860 GCT CPEO_POLG -chr16 17564765 17564779 GCC DBQD2_XYLT1 +chr15 89112665 89112683 GTTT,TTTG CHNG3_MIR7-2 +chr15 89876820 89876860 CTG CPEO_POLG +chr16 17564765 17564779 CCG DBQD2_XYLT1 chr16 24624760 24624853 TTTCA,TTTTA FAME6_TNRC6A -chr16 66524300 66524369 TGGAA,TAGAA,AATAA SCA31_BEAN1 +chr16 66524300 66524369 AATGG,AATAG,AATAA SCA31_BEAN1 chr16 67876766 67876853 CAG SCA_THAP11 -chr16 72821594 72821657 GCC SCA4_ZFHX3 +chr16 72821594 72821657 CCG SCA4_ZFHX3 chr16 87637889 87637935 CTG HDL2_JPH3 -chr17 17711673 17711774 TTTCA,TTTTA FAME8_RAI1 -chr17 78120809 78120938 CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 +chr17 17711673 17711774 TTTCA,ATTTT,TTTTA FAME8_RAI1 +chr17 78120809 78120938 GCCGCTGCCGACCTCGCTGT,CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 chr18 666892 667632 GATGGT CPUM_TYMS chr18 53253385 53253460 CAG FECD3_TCF4 -chr19 4510740 4513671 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC MRUPAV_PLIN4 +chr19 4510740 4513671 GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT,TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC MRUPAV_PLIN4 chr19 13318673 13318712 CTG SCA6_CACNA1A chr19 14606854 14606887 CCG OPDM2_GIPC1 -chr19 18896845 18896860 GTC EDM1-PSACH_COMP +chr19 18896845 18896860 CGT,GTC EDM1-PSACH_COMP chr19 46273463 46273524 CAG DM1_DMPK -chr20 2633379 2633403 GGCCTG SCA36_NOP56 -chr20 4680044 4680139 CCTCATGGTGGTGGCTGGGGGCAG,GGTGGTGGCTGGGGGCAGCCTCAT CJD_PRNP +chr20 2633379 2633403 CCTGGG,GGCCTG SCA36_NOP56 +chr20 4680044 4680139 AGCCTCATGGTGGTGGCTGGGGGC,GGTGGTGGCTGGGGGCAGCCTCAT CJD_PRNP chr21 45196324 45196360 CGCGGGGCGGGG EPM1_CSTB chr22 19754286 19754330 GCN TOF_TBX1 chr22 38713288 38713380 CCG EPM_CSNK1E chr22 46191235 46191304 ATTCT SCA10_ATXN10 -chrX 25031647 25031682 NGC PRTS_ARX -chrX 25031767 25031814 NGC EIEE1_ARX -chrX 31302675 31302722 TTC DMD_DMD -chrX 66765159 66765261 GCA SBMA_AR +chrX 25031647 25031682 NGC,GCN PRTS_ARX +chrX 25031767 25031814 NGC,GCN EIEE1_ARX +chrX 31302675 31302722 CTT,TTC DMD_DMD +chrX 66765159 66765261 CAG SBMA_AR chrX 70672905 70672981 AGAGGG XDP_TAF1 chrX 136648986 136649015 GCN VACTERLX_ZIC3 -chrX 139586482 139586526 NGC XLID_SOX3 +chrX 139586482 139586526 NGC,GCN XLID_SOX3 chrX 146993568 146993629 CGG FXS_FMR1 -chrX 147582125 147582273 GCC FRAXE_AFF2 +chrX 147582125 147582273 CCG FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.hg19.straglr.bed b/data/catalogs/STRchive-disease-loci.hg19.straglr.bed index f48e358b..fbb1db85 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.straglr.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.straglr.bed @@ -1,12 +1,12 @@ -chr1 1371178 1371198 GGCGCGGAGC HMNR7_VWA1 HMNR7_VWA1 +chr1 1371178 1371198 AGCGGCGCGG HMNR7_VWA1 HMNR7_VWA1 chr1 57832750 57832793 GAAAT SCA37_DAB1 SCA37_DAB1 -chr1 94883977 94884000 GCC OPDM5_ABCD3 OPDM5_ABCD3 -chr1 145209323 145209354 GGC NIID_NOTCH2NLC NIID_NOTCH2NLC -chr1 156561557 156561575 GGGCC NME_NAXE NME_NAXE +chr1 94883977 94884000 CCG OPDM5_ABCD3 OPDM5_ABCD3 +chr1 145209323 145209354 CGG NIID_NOTCH2NLC NIID_NOTCH2NLC +chr1 156561557 156561575 CCGGG NME_NAXE NME_NAXE chr2 96862804 96862807 AAATG FAME2_STARD7 FAME2_STARD7 -chr2 100721260 100721286 GCC FRA2A_AFF3 FRA2A_AFF3 +chr2 100721260 100721286 CCG FRA2A_AFF3 FRA2A_AFF3 chr2 176957786 176957831 GCN SD5_HOXD13 SD5_HOXD13 -chr2 191745598 191745646 GCA GDPAG_GLS GDPAG_GLS +chr2 191745598 191745646 CAG GDPAG_GLS GDPAG_GLS chr3 63898360 63898391 CAG SCA7_ATXN7 SCA7_ATXN7 chr3 63898391 63898403 CCG SCA7_ATXN7 SCA7_ATXN7_CCG chr3 128891419 128891499 CAGG DM2_CNBP DM2_CNBP @@ -17,57 +17,57 @@ chr3 183430010 183430014 TTTCA FAME4_YEATS2 FAME4_YEATS2 chr4 3076603 3076654 CAG HD_HTT HD_HTT chr4 3076660 3076696 CCG HD_HTT HD_HTT_CCG chr4 39350099 39350103 AAGGG CANVAS_RFC1 CANVAS_RFC1 -chr4 41747989 41748049 GCN CCHS_PHOX2B CCHS_PHOX2B +chr4 41747989 41748049 NGC CCHS_PHOX2B CCHS_PHOX2B chr4 160263763 160263770 TTTCA FAME7_RAPGEF2 FAME7_RAPGEF2 chr5 10356515 10356523 TTTCA FAME3_MARCHF6 FAME3_MARCHF6 -chr5 146258290 146258322 GCT SCA12_PPP2R2B SCA12_PPP2R2B -chr5 176981490 176981532 GCC OPDM_FAM193B OPDM_FAM193B -chr6 13328708 13328835 GCC OPDM_TBC1D7 OPDM_TBC1D7 +chr5 146258290 146258322 CTG SCA12_PPP2R2B SCA12_PPP2R2B +chr5 176981490 176981532 CCG OPDM_FAM193B OPDM_FAM193B +chr6 13328708 13328835 CCG OPDM_TBC1D7 OPDM_TBC1D7 chr6 16327864 16327955 CTG SCA1_ATXN1 SCA1_ATXN1 chr6 45390487 45390538 GCN CCD_RUNX2 CCD_RUNX2 chr6 170870994 170871105 CAG SCA17_TBP SCA17_TBP chr7 27239297 27239351 NGC HFG_HOXA13-III HFG_HOXA13-III chr7 27239444 27239480 NGC HFG_HOXA13-II HFG_HOXA13-II chr7 27239543 27239585 NGC HFG_HOXA13-I HFG_HOXA13-I -chr7 55955293 55955332 GCG FRA7A_ZNF713 FRA7A_ZNF713 -chr8 105601198 105601227 CGC OPDM1_LRP12 OPDM1_LRP12 +chr7 55955293 55955332 CGG FRA7A_ZNF713 FRA7A_ZNF713 +chr8 105601198 105601227 CCG OPDM1_LRP12 OPDM1_LRP12 chr8 119379151 119379157 TGAAA FAME1_SAMD12 FAME1_SAMD12 -chr9 27573482 27573544 GGCCCC FTDALS1_C9orf72 FTDALS1_C9orf72 +chr9 27573482 27573544 CCCCGG FTDALS1_C9orf72 FTDALS1_C9orf72 chr9 71652186 71652202 A FRDA_FXN FRDA_FXN_A chr9 71652202 71652220 GAA FRDA_FXN FRDA_FXN -chr9 133556992 133557028 GCC HSAN-VIII_PRDM12 HSAN-VIII_PRDM12 -chr10 81586139 81586160 GGC OPML1_NUTM2B-AS1 OPML1_NUTM2B-AS1 +chr9 133556992 133557028 CCG HSAN-VIII_PRDM12 HSAN-VIII_PRDM12 +chr10 81586139 81586160 CGG OPML1_NUTM2B-AS1 OPML1_NUTM2B-AS1 chr11 119076999 119077033 CGG JBS_CBL JBS_CBL chr12 7045879 7045938 CAG DRPLA_ATN1 DRPLA_ATN1 -chr12 50898784 50898807 GGC FRA12A_DIP2B FRA12A_DIP2B +chr12 50898784 50898807 CGG FRA12A_DIP2B FRA12A_DIP2B chr12 112036753 112036823 CTG SCA2_ATXN2 SCA2_ATXN2 -chr12 124018267 124018297 GGC OPDM4_RILPL1 OPDM4_RILPL1 +chr12 124018267 124018297 CGG OPDM4_RILPL1 OPDM4_RILPL1 chr13 70713485 70713515 CTA SCA8_ATXN8OS SCA8_ATXN8OS_CTA chr13 70713515 70713561 CTG SCA8_ATXN8OS SCA8_ATXN8OS chr13 100637702 100637748 GCN HPE5_ZIC2 HPE5_ZIC2 -chr13 102813924 102814076 GAA SCA27B_FGF14 SCA27B_FGF14 +chr13 102813924 102814076 AAG SCA27B_FGF14 SCA27B_FGF14 chr14 23790681 23790712 GCN OPMD_PABPN1 OPMD_PABPN1 chr14 92537354 92537396 CTG SCA3_ATXN3 SCA3_ATXN3 -chr15 23086363 23086389 GCG ALS1_NIPA1 ALS1_NIPA1 +chr15 23086363 23086389 CGG ALS1_NIPA1 ALS1_NIPA1 chr15 34711626 34711652 CT aFTLD-U_GOLGA8A aFTLD-U_GOLGA8A -chr15 89112664 89112683 TTTG CHNG3_MIR7-2 CHNG3_MIR7-2 +chr15 89112664 89112683 GTTT CHNG3_MIR7-2 CHNG3_MIR7-2 chr15 89876810 89876816 GCT CPEO_POLG CPEO_POLG_GCT chr15 89876816 89876819 GTT CPEO_POLG CPEO_POLG_GTT chr15 89876819 89876860 GCT CPEO_POLG CPEO_POLG chr16 17564764 17564779 GCC DBQD2_XYLT1 DBQD2_XYLT1 chr16 24624809 24624853 TTTCA FAME6_TNRC6A FAME6_TNRC6A -chr16 66524299 66524369 TGGAA SCA31_BEAN1 SCA31_BEAN1 +chr16 66524299 66524369 AATGG SCA31_BEAN1 SCA31_BEAN1 chr16 67876765 67876853 CAG SCA_THAP11 SCA_THAP11 -chr16 72821593 72821657 GCC SCA4_ZFHX3 SCA4_ZFHX3 +chr16 72821593 72821657 CCG SCA4_ZFHX3 SCA4_ZFHX3 chr16 87637888 87637935 CTG HDL2_JPH3 HDL2_JPH3 chr17 17711762 17711774 TTTCA FAME8_RAI1 FAME8_RAI1 -chr17 78120808 78120938 CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 RCPS_EIF4A3 +chr17 78120808 78120938 GCCGCTGCCGACCTCGCTGT RCPS_EIF4A3 RCPS_EIF4A3 chr18 666891 667632 GATGGT CPUM_TYMS CPUM_TYMS chr18 53253384 53253460 CAG FECD3_TCF4 FECD3_TCF4 -chr19 4510739 4513671 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC MRUPAV_PLIN4 MRUPAV_PLIN4 +chr19 4510739 4513671 GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT MRUPAV_PLIN4 MRUPAV_PLIN4 chr19 13318672 13318712 CTG SCA6_CACNA1A SCA6_CACNA1A chr19 14606853 14606887 CCG OPDM2_GIPC1 OPDM2_GIPC1 -chr19 18896844 18896860 GTC EDM1-PSACH_COMP EDM1-PSACH_COMP +chr19 18896844 18896860 CGT EDM1-PSACH_COMP EDM1-PSACH_COMP chr19 46273462 46273524 CAG DM1_DMPK DM1_DMPK chr20 2633378 2633403 GGCCTG SCA36_NOP56 SCA36_NOP56 chr20 2633403 2633421 CGCCTG SCA36_NOP56 SCA36_NOP56_CGCCTG @@ -81,9 +81,9 @@ chrX 25031646 25031682 NGC PRTS_ARX PRTS_ARX chrX 25031766 25031814 NGC EIEE1_ARX EIEE1_ARX chrX 31302674 31302722 TTC DMD_DMD DMD_DMD chrX 31302722 31302730 T DMD_DMD DMD_DMD_T -chrX 66765158 66765261 GCA SBMA_AR SBMA_AR +chrX 66765158 66765261 CAG SBMA_AR SBMA_AR chrX 70672904 70672981 AGAGGG XDP_TAF1 XDP_TAF1 chrX 136648985 136649015 GCN VACTERLX_ZIC3 VACTERLX_ZIC3 chrX 139586481 139586526 NGC XLID_SOX3 XLID_SOX3 chrX 146993567 146993629 CGG FXS_FMR1 FXS_FMR1 -chrX 147582124 147582273 GCC FRAXE_AFF2 FRAXE_AFF2 +chrX 147582124 147582273 CCG FRAXE_AFF2 FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.hg19.stranger.json b/data/catalogs/STRchive-disease-loci.hg19.stranger.json index c1dd1fd7..a4eaf755 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.stranger.json +++ b/data/catalogs/STRchive-disease-loci.hg19.stranger.json @@ -2,11 +2,11 @@ { "LocusId": "HMNR7_VWA1", "ReferenceRegion": "chr1:1371178-1371198", - "LocusStructure": "(GGCGCGGAGC)*", + "LocusStructure": "(AGCGGCGCGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GGCGCGGAGC", + "DisplayRU": "AGCGGCGCGG", "Disease": "HMNR7", "NormalMax": 2, "PathologicMin": 3, @@ -21,7 +21,7 @@ "PathologicRegion": "chr1:57832750-57832793", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GAAAT", + "DisplayRU": "TGAAA", "Disease": "SCA37", "NormalMax": 30, "PathologicMin": 31, @@ -30,11 +30,11 @@ { "LocusId": "OPDM5_ABCD3", "ReferenceRegion": "chr1:94883977-94884000", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "OPDM5", "NormalMax": 44, "PathologicMin": 118, @@ -43,11 +43,11 @@ { "LocusId": "NIID_NOTCH2NLC", "ReferenceRegion": "chr1:145209323-145209354", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "NIID", "NormalMax": 37, "PathologicMin": 66, @@ -56,11 +56,11 @@ { "LocusId": "NME_NAXE", "ReferenceRegion": "chr1:156561557-156561575", - "LocusStructure": "(GGGCC)*", + "LocusStructure": "(CCGGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GGGCC", + "DisplayRU": "CCGGG", "Disease": "NME", "NormalMax": 7, "PathologicMin": 200, @@ -75,7 +75,7 @@ "PathologicRegion": "chr2:96862804-96862807", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "AAATG", + "DisplayRU": "TGAAA", "Disease": "FAME2", "NormalMax": 273, "PathologicMin": 274, @@ -84,11 +84,11 @@ { "LocusId": "FRA2A_AFF3", "ReferenceRegion": "chr2:100721260-100721286", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "FRA2A", "NormalMax": 20, "PathologicMin": 300, @@ -110,11 +110,11 @@ { "LocusId": "GDPAG_GLS", "ReferenceRegion": "chr2:191745598-191745646", - "LocusStructure": "(GCA)*", + "LocusStructure": "(CAG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GCA", + "DisplayRU": "CAG", "Disease": "GDPAG", "NormalMax": 38, "PathologicMin": 680, @@ -211,11 +211,11 @@ { "LocusId": "CCHS_PHOX2B", "ReferenceRegion": "chr4:41747989-41748049", - "LocusStructure": "(GCN)*", + "LocusStructure": "(NGC)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCN", + "DisplayRU": "NGC", "Disease": "CCHS", "NormalMax": 20, "PathologicMin": 26, @@ -254,11 +254,11 @@ { "LocusId": "SCA12_PPP2R2B", "ReferenceRegion": "chr5:146258290-146258322", - "LocusStructure": "(GCT)*", + "LocusStructure": "(CTG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCT", + "DisplayRU": "CTG", "Disease": "SCA12", "NormalMax": 32, "PathologicMin": 51, @@ -267,11 +267,11 @@ { "LocusId": "OPDM_FAM193B", "ReferenceRegion": "chr5:176981490-176981532", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "OPDM", "NormalMax": 50, "PathologicMin": 194, @@ -280,11 +280,11 @@ { "LocusId": "OPDM_TBC1D7", "ReferenceRegion": "chr6:13328708-13328835", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "OPDM", "NormalMax": 60, "PathologicMin": 83, @@ -371,11 +371,11 @@ { "LocusId": "FRA7A_ZNF713", "ReferenceRegion": "chr7:55955293-55955332", - "LocusStructure": "(GCG)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCG", + "DisplayRU": "CGG", "Disease": "FRA7A", "NormalMax": 22, "PathologicMin": 450, @@ -384,11 +384,11 @@ { "LocusId": "OPDM1_LRP12", "ReferenceRegion": "chr8:105601198-105601227", - "LocusStructure": "(CGC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "CGC", + "DisplayRU": "CCG", "Disease": "OPDM1", "NormalMax": 45, "PathologicMin": 85, @@ -412,11 +412,11 @@ { "LocusId": "FTDALS1_C9orf72", "ReferenceRegion": "chr9:27573482-27573544", - "LocusStructure": "(GGCCCC)*", + "LocusStructure": "(CCCCGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGCCCC", + "DisplayRU": "CCCCGG", "Disease": "FTDALS1", "NormalMax": 23, "PathologicMin": 31, @@ -431,7 +431,7 @@ "PathologicRegion": "chr9:71652202-71652220", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GAA", + "DisplayRU": "AAG", "Disease": "FRDA", "NormalMax": 33, "PathologicMin": 56, @@ -440,11 +440,11 @@ { "LocusId": "HSAN-VIII_PRDM12", "ReferenceRegion": "chr9:133556992-133557028", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "HSAN VIII", "NormalMax": 14, "PathologicMin": 18, @@ -453,11 +453,11 @@ { "LocusId": "OPML1_NUTM2B-AS1", "ReferenceRegion": "chr10:81586139-81586160", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "OPML1", "NormalMax": 16, "PathologicMin": 161, @@ -492,11 +492,11 @@ { "LocusId": "FRA12A_DIP2B", "ReferenceRegion": "chr12:50898784-50898807", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "FRA12A", "NormalMax": 23, "PathologicMin": 273, @@ -518,11 +518,11 @@ { "LocusId": "OPDM4_RILPL1", "ReferenceRegion": "chr12:124018267-124018297", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "OPDM4", "NormalMax": 16, "PathologicMin": 120, @@ -559,11 +559,11 @@ { "LocusId": "SCA27B_FGF14", "ReferenceRegion": "chr13:102813924-102814076", - "LocusStructure": "(GAA)*", + "LocusStructure": "(AAG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GAA", + "DisplayRU": "AAG", "Disease": "SCA27B", "NormalMax": 179, "PathologicMin": 320, @@ -598,11 +598,11 @@ { "LocusId": "ALS1_NIPA1", "ReferenceRegion": "chr15:23086363-23086389", - "LocusStructure": "(GCG)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCG", + "DisplayRU": "CGG", "Disease": "ALS1", "NormalMax": 10, "PathologicMin": 11, @@ -626,11 +626,11 @@ { "LocusId": "CHNG3_MIR7-2", "ReferenceRegion": "chr15:89112664-89112683", - "LocusStructure": "(TTTG)*", + "LocusStructure": "(GTTT)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TTTG", + "DisplayRU": "GTTT", "Disease": "CHNG3", "NormalMax": 4, "PathologicMin": 5, @@ -645,7 +645,7 @@ "PathologicRegion": "chr15:89876819-89876860", "HGNCId": null, "InheritanceMode": [], - "DisplayRU": "GCT", + "DisplayRU": "CTG", "Disease": "CPEO", "NormalMax": 10, "PathologicMin": 11, @@ -660,7 +660,7 @@ "PathologicRegion": "chr16:17564764-17564779", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "DBQD2, BSS", "NormalMax": 20, "PathologicMin": 72, @@ -684,11 +684,11 @@ { "LocusId": "SCA31_BEAN1", "ReferenceRegion": "chr16:66524299-66524369", - "LocusStructure": "(TGGAA)*", + "LocusStructure": "(AATGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TGGAA", + "DisplayRU": "AATGG", "Disease": "SCA31", "NormalMax": 109, "PathologicMin": 110, @@ -710,11 +710,11 @@ { "LocusId": "SCA4_ZFHX3", "ReferenceRegion": "chr16:72821593-72821657", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "SCA4", "NormalMax": 26, "PathologicMin": 46, @@ -751,11 +751,11 @@ { "LocusId": "RCPS_EIF4A3", "ReferenceRegion": "chr17:78120808-78120938", - "LocusStructure": "(CCTCGCTGTGCCGCTGCCGA)*", + "LocusStructure": "(GCCGCTGCCGACCTCGCTGT)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "CCTCGCTGTGCCGCTGCCGA", + "DisplayRU": "GCCGCTGCCGACCTCGCTGT", "Disease": "RCPS", "NormalMax": 12, "PathologicMin": 14, @@ -790,11 +790,11 @@ { "LocusId": "MRUPAV_PLIN4", "ReferenceRegion": "chr19:4510739-4513671", - "LocusStructure": "(TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC)*", + "LocusStructure": "(GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC", + "DisplayRU": "GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT", "Disease": "MRUPAV", "NormalMax": 31, "PathologicMin": 37, @@ -829,11 +829,11 @@ { "LocusId": "EDM1-PSACH_COMP", "ReferenceRegion": "chr19:18896844-18896860", - "LocusStructure": "(GTC)*", + "LocusStructure": "(CGT)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GTC", + "DisplayRU": "CGT", "Disease": "EDM1, PSACH", "NormalMax": 5, "PathologicMin": 6, @@ -861,7 +861,7 @@ "PathologicRegion": "chr20:2633378-2633403", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGCCTG", + "DisplayRU": "CCTGGG", "Disease": "SCA36", "NormalMax": 14, "PathologicMin": 650, @@ -876,7 +876,7 @@ "PathologicRegion": "chr20:4680043-4680139", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "CCTCATGGTGGTGGCTGGGGGCAG", + "DisplayRU": "AGCCTCATGGTGGTGGCTGGGGGC", "Disease": "CJD", "NormalMax": 4, "PathologicMin": 5, @@ -971,7 +971,7 @@ "PathologicRegion": "chrX:31302674-31302722", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "TTC", + "DisplayRU": "CTT", "Disease": "DMD", "NormalMax": 33, "PathologicMin": 59, @@ -980,11 +980,11 @@ { "LocusId": "SBMA_AR", "ReferenceRegion": "chrX:66765158-66765261", - "LocusStructure": "(GCA)*", + "LocusStructure": "(CAG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "GCA", + "DisplayRU": "CAG", "Disease": "SBMA", "NormalMax": 34, "PathologicMin": 38, @@ -1045,11 +1045,11 @@ { "LocusId": "FRAXE_AFF2", "ReferenceRegion": "chrX:147582124-147582273", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "FRAXE", "NormalMax": 39, "PathologicMin": 201, diff --git a/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed b/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed index 6d1f281f..46e289e9 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed +++ b/data/catalogs/STRchive-disease-loci.hg38.TRGT.bed @@ -1,80 +1,80 @@ -chr1 1435798 1435818 ID=HMNR7_VWA1;MOTIFS=GGCGCGGAGC;STRUC= -chr1 57367043 57367121 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT;STRUC= -chr1 94418421 94418444 ID=OPDM5_ABCD3;MOTIFS=GCC;STRUC= -chr1 149390802 149390842 ID=NIID_NOTCH2NLC;MOTIFS=GGC;STRUC= -chr1 155188505 155192239 ID=ADTKD_MUC1;MOTIFS=GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA;STRUC= -chr1 156591765 156591783 ID=NME_NAXE;MOTIFS=GGGCC;STRUC= -chr2 96197066 96197124 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT;STRUC= -chr2 100104798 100104824 ID=FRA2A_AFF3;MOTIFS=GCC;STRUC= +chr1 1435798 1435818 ID=HMNR7_VWA1;MOTIFS=AGCGGCGCGG,GGCGCGGAGC;STRUC= +chr1 57367043 57367121 ID=SCA37_DAB1;MOTIFS=AAAAT,GAAAT,TGAAA;STRUC= +chr1 94418421 94418444 ID=OPDM5_ABCD3;MOTIFS=CCG;STRUC= +chr1 149390802 149390842 ID=NIID_NOTCH2NLC;MOTIFS=CGG;STRUC= +chr1 155188505 155192239 ID=ADTKD_MUC1;MOTIFS=ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG;STRUC= +chr1 156591765 156591783 ID=NME_NAXE;MOTIFS=CCGGG,GGGCC;STRUC= +chr2 96197066 96197124 ID=FAME2_STARD7;MOTIFS=AAATG,AAAAT,TGAAA;STRUC= +chr2 100104798 100104824 ID=FRA2A_AFF3;MOTIFS=CCG;STRUC= chr2 176093058 176093103 ID=SD5_HOXD13;MOTIFS=GCN;STRUC= -chr2 190880872 190880920 ID=GDPAG_GLS;MOTIFS=GCA;STRUC= +chr2 190880872 190880920 ID=GDPAG_GLS;MOTIFS=CAG;STRUC= chr3 63912684 63912727 ID=SCA7_ATXN7;MOTIFS=CAG,CCG;STRUC= chr3 129172576 129172734 ID=DM2_CNBP;MOTIFS=CAGG,CAGA,CA;STRUC= -chr3 138946019 138946062 ID=BPES_FOXL2;MOTIFS=NGC;STRUC= +chr3 138946019 138946062 ID=BPES_FOXL2;MOTIFS=NGC,GCN;STRUC= chr3 183712187 183712226 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA;STRUC= chr4 3074876 3074969 ID=HD_HTT;MOTIFS=CAG,CCG;STRUC= -chr4 39348424 39348483 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,AGGGC,AAAGGG;STRUC= -chr4 41745972 41746032 ID=CCHS_PHOX2B;MOTIFS=GCN;STRUC= +chr4 39348424 39348483 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,CAGGG,AAAGGG;STRUC= +chr4 41745972 41746032 ID=CCHS_PHOX2B;MOTIFS=NGC,GCN;STRUC= chr4 159342526 159342618 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA;STRUC= chr5 10356343 10356411 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA;STRUC= -chr5 146878727 146878759 ID=SCA12_PPP2R2B;MOTIFS=GCT;STRUC= -chr5 177554489 177554531 ID=OPDM_FAM193B;MOTIFS=GCC;STRUC= -chr6 13328476 13328603 ID=OPDM_TBC1D7;MOTIFS=GCC;STRUC= +chr5 146878727 146878759 ID=SCA12_PPP2R2B;MOTIFS=CTG;STRUC= +chr5 177554489 177554531 ID=OPDM_FAM193B;MOTIFS=CCG;STRUC= +chr6 13328476 13328603 ID=OPDM_TBC1D7;MOTIFS=CCG;STRUC= chr6 16327633 16327724 ID=SCA1_ATXN1;MOTIFS=CTG;STRUC= chr6 45422750 45422801 ID=CCD_RUNX2;MOTIFS=GCN;STRUC= chr6 170561906 170562017 ID=SCA17_TBP;MOTIFS=CAG;STRUC= -chr7 27199678 27199732 ID=HFG_HOXA13-III;MOTIFS=NGC;STRUC= -chr7 27199825 27199861 ID=HFG_HOXA13-II;MOTIFS=NGC;STRUC= -chr7 27199924 27199966 ID=HFG_HOXA13-I;MOTIFS=NGC;STRUC= -chr7 55887600 55887639 ID=FRA7A_ZNF713;MOTIFS=GCG;STRUC= -chr8 104588970 104588999 ID=OPDM1_LRP12;MOTIFS=CGC;STRUC= +chr7 27199678 27199732 ID=HFG_HOXA13-III;MOTIFS=NGC,GCN;STRUC= +chr7 27199825 27199861 ID=HFG_HOXA13-II;MOTIFS=NGC,GCN;STRUC= +chr7 27199924 27199966 ID=HFG_HOXA13-I;MOTIFS=NGC,GCN;STRUC= +chr7 55887600 55887639 ID=FRA7A_ZNF713;MOTIFS=CGG;STRUC= +chr8 104588970 104588999 ID=OPDM1_LRP12;MOTIFS=CCG;STRUC= chr8 118366812 118366918 ID=FAME1_SAMD12;MOTIFS=TAAAA,TGAAA;STRUC= -chr9 27573484 27573546 ID=FTDALS1_C9orf72;MOTIFS=GGCCCC;STRUC= -chr9 69037270 69037304 ID=FRDA_FXN;MOTIFS=A,GAA;STRUC= -chr9 130681605 130681641 ID=HSAN-VIII_PRDM12;MOTIFS=GCC;STRUC= -chr9 133071177 133071737 ID=MODY8_CEL;MOTIFS=GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG;STRUC= -chr10 79826383 79826404 ID=OPML1_NUTM2B-AS1;MOTIFS=GGC;STRUC= +chr9 27573484 27573546 ID=FTDALS1_C9orf72;MOTIFS=CCCCGG,GGCCCC;STRUC= +chr9 69037270 69037304 ID=FRDA_FXN;MOTIFS=A,GAA,AAG;STRUC= +chr9 130681605 130681641 ID=HSAN-VIII_PRDM12;MOTIFS=CCG;STRUC= +chr9 133071177 133071737 ID=MODY8_CEL;MOTIFS=GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG;STRUC= +chr10 79826383 79826404 ID=OPML1_NUTM2B-AS1;MOTIFS=CGG;STRUC= chr11 119206289 119206323 ID=JBS_CBL;MOTIFS=CGG;STRUC= chr12 6936716 6936775 ID=DRPLA_ATN1;MOTIFS=CAG;STRUC= -chr12 50505001 50505024 ID=FRA12A_DIP2B;MOTIFS=GGC;STRUC= +chr12 50505001 50505024 ID=FRA12A_DIP2B;MOTIFS=CGG;STRUC= chr12 111598949 111599019 ID=SCA2_ATXN2;MOTIFS=CTG;STRUC= -chr12 123533720 123533750 ID=OPDM4_RILPL1;MOTIFS=GGC;STRUC= +chr12 123533720 123533750 ID=OPDM4_RILPL1;MOTIFS=CGG;STRUC= chr13 70139353 70139429 ID=SCA8_ATXN8OS;MOTIFS=CTA,CTG;STRUC= chr13 99985448 99985494 ID=HPE5_ZIC2;MOTIFS=GCN;STRUC= -chr13 102161574 102161726 ID=SCA27B_FGF14;MOTIFS=GAA,GGA,GCA;STRUC= +chr13 102161574 102161726 ID=SCA27B_FGF14;MOTIFS=AAG,GAA,AGG,CAG;STRUC= chr14 23321472 23321503 ID=OPMD_PABPN1;MOTIFS=GCN;STRUC= chr14 92071010 92071052 ID=SCA3_ATXN3;MOTIFS=CTG;STRUC= -chr15 22786677 22786703 ID=ALS1_NIPA1;MOTIFS=GCG;STRUC= +chr15 22786677 22786703 ID=ALS1_NIPA1;MOTIFS=CGG;STRUC= chr15 34419425 34419451 ID=aFTLD-U_GOLGA8A;MOTIFS=CT,TTTC,CCTT,CCCTCT;STRUC= -chr15 88569433 88569452 ID=CHNG3_MIR7-2;MOTIFS=TTTG;STRUC= -chr15 89333579 89333629 ID=CPEO_POLG;MOTIFS=GCT,GTT;STRUC= -chr16 17470907 17470922 ID=DBQD2_XYLT1;MOTIFS=GCC;STRUC= +chr15 88569433 88569452 ID=CHNG3_MIR7-2;MOTIFS=GTTT,TTTG;STRUC= +chr15 89333579 89333629 ID=CPEO_POLG;MOTIFS=GCT,GTT,CTG;STRUC= +chr16 17470907 17470922 ID=DBQD2_XYLT1;MOTIFS=GCC,CCG;STRUC= chr16 24613438 24613532 ID=FAME6_TNRC6A;MOTIFS=TTTTA,TTTCA;STRUC= -chr16 66490396 66490466 ID=SCA31_BEAN1;MOTIFS=TGGAA,TAGAA,AATAA;STRUC= +chr16 66490396 66490466 ID=SCA31_BEAN1;MOTIFS=AATGG,AATAG,AATAA;STRUC= chr16 67842862 67842950 ID=SCA_THAP11;MOTIFS=CAG;STRUC= -chr16 72787694 72787758 ID=SCA4_ZFHX3;MOTIFS=GCC;STRUC= +chr16 72787694 72787758 ID=SCA4_ZFHX3;MOTIFS=CCG;STRUC= chr16 87604282 87604329 ID=HDL2_JPH3;MOTIFS=CTG;STRUC= -chr17 17808358 17808460 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA;STRUC= -chr17 80147009 80147139 ID=RCPS_EIF4A3;MOTIFS=CCTCGCTGTGCCGCTGCCGA;STRUC= +chr17 17808358 17808460 ID=FAME8_RAI1;MOTIFS=TTTTA,TTTCA,ATTTT;STRUC= +chr17 80147009 80147139 ID=RCPS_EIF4A3;MOTIFS=GCCGCTGCCGACCTCGCTGT,CCTCGCTGTGCCGCTGCCGA;STRUC= chr18 666891 667632 ID=CPUM_TYMS;MOTIFS=GATGGT;STRUC= chr18 55586153 55586229 ID=FECD3_TCF4;MOTIFS=CAG;STRUC= -chr19 4510727 4513659 ID=MRUPAV_PLIN4;MOTIFS=TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC;STRUC= +chr19 4510727 4513659 ID=MRUPAV_PLIN4;MOTIFS=GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT,TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC;STRUC= chr19 13207858 13207898 ID=SCA6_CACNA1A;MOTIFS=CTG;STRUC= chr19 14496041 14496075 ID=OPDM2_GIPC1;MOTIFS=CCG;STRUC= -chr19 18786034 18786050 ID=EDM1-PSACH_COMP;MOTIFS=GTC;STRUC= +chr19 18786034 18786050 ID=EDM1-PSACH_COMP;MOTIFS=CGT,GTC;STRUC= chr19 45770204 45770266 ID=DM1_DMPK;MOTIFS=CAG;STRUC= -chr20 2652732 2652775 ID=SCA36_NOP56;MOTIFS=GGCCTG,CGCCTG;STRUC= -chr20 4699370 4699493 ID=CJD_PRNP;MOTIFS=CCTCAGGGCGGTGGTGGCTGGGGGCAG,CCTCATGGTGGTGGCTGGGGGCAG,GGTGGTGGCTGGGGGCAGCCTCAT;STRUC= +chr20 2652732 2652775 ID=SCA36_NOP56;MOTIFS=GGCCTG,CGCCTG,CCTGGG;STRUC= +chr20 4699370 4699493 ID=CJD_PRNP;MOTIFS=CCTCAGGGCGGTGGTGGCTGGGGGCAG,CCTCATGGTGGTGGCTGGGGGCAG,AGCCTCATGGTGGTGGCTGGGGGC,GGTGGTGGCTGGGGGCAGCCTCAT;STRUC= chr21 43776442 43776479 ID=EPM1_CSTB;MOTIFS=CGCGGGGCGGGG;STRUC= chr22 19766762 19766807 ID=TOF_TBX1;MOTIFS=GCN;STRUC= chr22 38317282 38317375 ID=EPM_CSNK1E;MOTIFS=CCG;STRUC= chr22 45795354 45795424 ID=SCA10_ATXN10;MOTIFS=ATTCT;STRUC= -chrX 25013529 25013565 ID=PRTS_ARX;MOTIFS=NGC;STRUC= -chrX 25013649 25013697 ID=EIEE1_ARX;MOTIFS=NGC;STRUC= -chrX 31284557 31284613 ID=DMD_DMD;MOTIFS=TTC,T;STRUC= -chrX 67545316 67545419 ID=SBMA_AR;MOTIFS=GCA;STRUC= +chrX 25013529 25013565 ID=PRTS_ARX;MOTIFS=NGC,GCN;STRUC= +chrX 25013649 25013697 ID=EIEE1_ARX;MOTIFS=NGC,GCN;STRUC= +chrX 31284557 31284613 ID=DMD_DMD;MOTIFS=TTC,T,CTT;STRUC= +chrX 67545316 67545419 ID=SBMA_AR;MOTIFS=CAG;STRUC= chrX 71453054 71453131 ID=XDP_TAF1;MOTIFS=AGAGGG;STRUC= chrX 137566826 137566856 ID=VACTERLX_ZIC3;MOTIFS=GCN;STRUC= -chrX 140504316 140504361 ID=XLID_SOX3;MOTIFS=NGC;STRUC= +chrX 140504316 140504361 ID=XLID_SOX3;MOTIFS=NGC,GCN;STRUC= chrX 147912049 147912111 ID=FXS_FMR1;MOTIFS=CGG;STRUC= -chrX 148500604 148500753 ID=FRAXE_AFF2;MOTIFS=GCC;STRUC= +chrX 148500604 148500753 ID=FRAXE_AFF2;MOTIFS=CCG;STRUC= diff --git a/data/catalogs/STRchive-disease-loci.hg38.atarva.bed b/data/catalogs/STRchive-disease-loci.hg38.atarva.bed index 53c72b79..337c4c77 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.atarva.bed +++ b/data/catalogs/STRchive-disease-loci.hg38.atarva.bed @@ -1,14 +1,14 @@ #chrom start stop motif motif_len id -chr1 1435798 1435818 GGCGCGGAGC 10 HMNR7_VWA1 +chr1 1435798 1435818 AGCGGCGCGG 10 HMNR7_VWA1 chr1 57367078 57367121 GAAAT 5 SCA37_DAB1 -chr1 94418421 94418444 GCC 3 OPDM5_ABCD3 -chr1 149390802 149390842 GGC 3 NIID_NOTCH2NLC -chr1 155188505 155192239 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA 61 ADTKD_MUC1 -chr1 156591765 156591783 GGGCC 5 NME_NAXE +chr1 94418421 94418444 CCG 3 OPDM5_ABCD3 +chr1 149390802 149390842 CGG 3 NIID_NOTCH2NLC +chr1 155188505 155192239 ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG 61 ADTKD_MUC1 +chr1 156591765 156591783 CCGGG 5 NME_NAXE chr2 96197066 96197069 AAATG 5 FAME2_STARD7 -chr2 100104798 100104824 GCC 3 FRA2A_AFF3 +chr2 100104798 100104824 CCG 3 FRA2A_AFF3 chr2 176093058 176093103 GCN 3 SD5_HOXD13 -chr2 190880872 190880920 GCA 3 GDPAG_GLS +chr2 190880872 190880920 CAG 3 GDPAG_GLS chr3 63912684 63912715 CAG 3 SCA7_ATXN7 chr3 63912715 63912727 CCG 3 SCA7_ATXN7_flank chr3 129172576 129172656 CAGG 4 DM2_CNBP @@ -19,58 +19,58 @@ chr3 183712222 183712226 TTTCA 5 FAME4_YEATS2 chr4 3074876 3074927 CAG 3 HD_HTT chr4 3074933 3074969 CCG 3 HD_HTT_flank chr4 39348479 39348483 AAGGG 5 CANVAS_RFC1 -chr4 41745972 41746032 GCN 3 CCHS_PHOX2B +chr4 41745972 41746032 NGC 3 CCHS_PHOX2B chr4 159342611 159342618 TTTCA 5 FAME7_RAPGEF2 chr5 10356403 10356411 TTTCA 5 FAME3_MARCHF6 -chr5 146878727 146878759 GCT 3 SCA12_PPP2R2B -chr5 177554489 177554531 GCC 3 OPDM_FAM193B -chr6 13328476 13328603 GCC 3 OPDM_TBC1D7 +chr5 146878727 146878759 CTG 3 SCA12_PPP2R2B +chr5 177554489 177554531 CCG 3 OPDM_FAM193B +chr6 13328476 13328603 CCG 3 OPDM_TBC1D7 chr6 16327633 16327724 CTG 3 SCA1_ATXN1 chr6 45422750 45422801 GCN 3 CCD_RUNX2 chr6 170561906 170562017 CAG 3 SCA17_TBP chr7 27199678 27199732 NGC 3 HFG_HOXA13-III chr7 27199825 27199861 NGC 3 HFG_HOXA13-II chr7 27199924 27199966 NGC 3 HFG_HOXA13-I -chr7 55887600 55887639 GCG 3 FRA7A_ZNF713 -chr8 104588970 104588999 CGC 3 OPDM1_LRP12 +chr7 55887600 55887639 CGG 3 FRA7A_ZNF713 +chr8 104588970 104588999 CCG 3 OPDM1_LRP12 chr8 118366912 118366918 TGAAA 5 FAME1_SAMD12 -chr9 27573484 27573546 GGCCCC 6 FTDALS1_C9orf72 +chr9 27573484 27573546 CCCCGG 6 FTDALS1_C9orf72 chr9 69037270 69037286 A 1 FRDA_FXN_flank chr9 69037286 69037304 GAA 3 FRDA_FXN -chr9 130681605 130681641 GCC 3 HSAN-VIII_PRDM12 +chr9 130681605 130681641 CCG 3 HSAN-VIII_PRDM12 chr9 133071177 133071737 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG 32 MODY8_CEL -chr10 79826383 79826404 GGC 3 OPML1_NUTM2B-AS1 +chr10 79826383 79826404 CGG 3 OPML1_NUTM2B-AS1 chr11 119206289 119206323 CGG 3 JBS_CBL chr12 6936716 6936775 CAG 3 DRPLA_ATN1 -chr12 50505001 50505024 GGC 3 FRA12A_DIP2B +chr12 50505001 50505024 CGG 3 FRA12A_DIP2B chr12 111598949 111599019 CTG 3 SCA2_ATXN2 -chr12 123533720 123533750 GGC 3 OPDM4_RILPL1 +chr12 123533720 123533750 CGG 3 OPDM4_RILPL1 chr13 70139353 70139383 CTA 3 SCA8_ATXN8OS_flank chr13 70139383 70139429 CTG 3 SCA8_ATXN8OS chr13 99985448 99985494 GCN 3 HPE5_ZIC2 -chr13 102161574 102161726 GAA 3 SCA27B_FGF14 +chr13 102161574 102161726 AAG 3 SCA27B_FGF14 chr14 23321472 23321503 GCN 3 OPMD_PABPN1 chr14 92071010 92071052 CTG 3 SCA3_ATXN3 -chr15 22786677 22786703 GCG 3 ALS1_NIPA1 +chr15 22786677 22786703 CGG 3 ALS1_NIPA1 chr15 34419425 34419451 CT 2 aFTLD-U_GOLGA8A -chr15 88569433 88569452 TTTG 4 CHNG3_MIR7-2 +chr15 88569433 88569452 GTTT 4 CHNG3_MIR7-2 chr15 89333579 89333585 GCT 3 CPEO_POLG_flank chr15 89333585 89333588 GTT 3 CPEO_POLG_flank chr15 89333588 89333629 GCT 3 CPEO_POLG chr16 17470907 17470922 GCC 3 DBQD2_XYLT1 chr16 24613488 24613532 TTTCA 5 FAME6_TNRC6A -chr16 66490396 66490466 TGGAA 5 SCA31_BEAN1 +chr16 66490396 66490466 AATGG 5 SCA31_BEAN1 chr16 67842862 67842950 CAG 3 SCA_THAP11 -chr16 72787694 72787758 GCC 3 SCA4_ZFHX3 +chr16 72787694 72787758 CCG 3 SCA4_ZFHX3 chr16 87604282 87604329 CTG 3 HDL2_JPH3 chr17 17808448 17808460 TTTCA 5 FAME8_RAI1 -chr17 80147009 80147139 CCTCGCTGTGCCGCTGCCGA 20 RCPS_EIF4A3 +chr17 80147009 80147139 GCCGCTGCCGACCTCGCTGT 20 RCPS_EIF4A3 chr18 666891 667632 GATGGT 6 CPUM_TYMS chr18 55586153 55586229 CAG 3 FECD3_TCF4 -chr19 4510727 4513659 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC 99 MRUPAV_PLIN4 +chr19 4510727 4513659 GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT 99 MRUPAV_PLIN4 chr19 13207858 13207898 CTG 3 SCA6_CACNA1A chr19 14496041 14496075 CCG 3 OPDM2_GIPC1 -chr19 18786034 18786050 GTC 3 EDM1-PSACH_COMP +chr19 18786034 18786050 CGT 3 EDM1-PSACH_COMP chr19 45770204 45770266 CAG 3 DM1_DMPK chr20 2652732 2652757 GGCCTG 6 SCA36_NOP56 chr20 2652757 2652775 CGCCTG 6 SCA36_NOP56_flank @@ -84,9 +84,9 @@ chrX 25013529 25013565 NGC 3 PRTS_ARX chrX 25013649 25013697 NGC 3 EIEE1_ARX chrX 31284557 31284605 TTC 3 DMD_DMD chrX 31284605 31284613 T 1 DMD_DMD_flank -chrX 67545316 67545419 GCA 3 SBMA_AR +chrX 67545316 67545419 CAG 3 SBMA_AR chrX 71453054 71453131 AGAGGG 6 XDP_TAF1 chrX 137566826 137566856 GCN 3 VACTERLX_ZIC3 chrX 140504316 140504361 NGC 3 XLID_SOX3 chrX 147912049 147912111 CGG 3 FXS_FMR1 -chrX 148500604 148500753 GCC 3 FRAXE_AFF2 +chrX 148500604 148500753 CCG 3 FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.hg38.atarva.bed.gz b/data/catalogs/STRchive-disease-loci.hg38.atarva.bed.gz index 283d10ec..7ab40150 100644 Binary files a/data/catalogs/STRchive-disease-loci.hg38.atarva.bed.gz and b/data/catalogs/STRchive-disease-loci.hg38.atarva.bed.gz differ diff --git a/data/catalogs/STRchive-disease-loci.hg38.atarva.bed.gz.tbi b/data/catalogs/STRchive-disease-loci.hg38.atarva.bed.gz.tbi index 86b30c26..e1a7226d 100644 Binary files a/data/catalogs/STRchive-disease-loci.hg38.atarva.bed.gz.tbi and b/data/catalogs/STRchive-disease-loci.hg38.atarva.bed.gz.tbi differ diff --git a/data/catalogs/STRchive-disease-loci.hg38.general.bed b/data/catalogs/STRchive-disease-loci.hg38.general.bed index 39d87c24..c2f85cf8 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.general.bed +++ b/data/catalogs/STRchive-disease-loci.hg38.general.bed @@ -1,81 +1,81 @@ #chrom start stop id gene reference_motif_reference_orientation pathogenic_motif_reference_orientation pathogenic_min inheritance disease -chr1 1435798 1435818 HMNR7_VWA1 VWA1 GGCGCGGAGC GGCGCGGAGC 1 AR Neuronopathy, distal hereditary motor, autosomal recessive 7 -chr1 57367043 57367121 SCA37_DAB1 DAB1 AAAAT GAAAT 31 AD Spinocerebellar ataxia type 37 -chr1 94418421 94418444 OPDM5_ABCD3 ABCD3 GCC GCC 118 AD Oculopharyngodistal myopathy type 5 -chr1 149390802 149390842 NIID_NOTCH2NLC NOTCH2NLC GGC GGC 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 -chr1 155188505 155192239 ADTKD_MUC1 MUC1 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA None AD Autosomal dominant tubulointerstitial kidney disease -chr1 156591765 156591783 NME_NAXE NAXE GGGCC GGGCC 200 AR NAXE-related mitochondrial encephalopathy -chr2 96197066 96197124 FAME2_STARD7 STARD7 AAAAT AAATG 274 AD Familial adult myoclonic epilepsy 2 -chr2 100104798 100104824 FRA2A_AFF3 AFF3 GCC GCC 300 AD Intellectual disability associated with fragile site FRA2A +chr1 1435798 1435818 HMNR7_VWA1 VWA1 GGCGCGGAGC AGCGGCGCGG 1 AR Neuronopathy, distal hereditary motor, autosomal recessive 7 +chr1 57367043 57367121 SCA37_DAB1 DAB1 AAAAT TGAAA 31 AD Spinocerebellar ataxia type 37 +chr1 94418421 94418444 OPDM5_ABCD3 ABCD3 CCG CCG 118 AD Oculopharyngodistal myopathy type 5 +chr1 149390802 149390842 NIID_NOTCH2NLC NOTCH2NLC CGG CGG 66 AD Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6 +chr1 155188505 155192239 ADTKD_MUC1 MUC1 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG None AD Autosomal dominant tubulointerstitial kidney disease +chr1 156591765 156591783 NME_NAXE NAXE GGGCC CCGGG 200 AR NAXE-related mitochondrial encephalopathy +chr2 96197066 96197124 FAME2_STARD7 STARD7 AAAAT TGAAA 274 AD Familial adult myoclonic epilepsy 2 +chr2 100104798 100104824 FRA2A_AFF3 AFF3 CCG CCG 300 AD Intellectual disability associated with fragile site FRA2A chr2 176093058 176093103 SD5_HOXD13 HOXD13 GCN GCN 22 AD Syndactyly -chr2 190880872 190880920 GDPAG_GLS GLS GCA GCA 680 AR Glutaminase deficiency +chr2 190880872 190880920 GDPAG_GLS GLS CAG CAG 680 AR Glutaminase deficiency chr3 63912684 63912715 SCA7_ATXN7 ATXN7 CAG CAG 37 AD Spinocerebellar ataxia type 7 chr3 129172576 129172656 DM2_CNBP CNBP CAGG CAGG 75 AD Myotonic dystrophy type 2 -chr3 138946019 138946062 BPES_FOXL2 FOXL2 NGC NGC 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis +chr3 138946019 138946062 BPES_FOXL2 FOXL2 GCN NGC 15 AD,AR Blepharophimosis, epicanthus inversus, and ptosis chr3 183712187 183712226 FAME4_YEATS2 YEATS2 TTTTA TTTCA 1000 AD Familial adult myoclonic epilepsy 4 chr4 3074876 3074933 HD_HTT HTT CAG CAG 36 AD Huntington disease -chr4 39348424 39348483 CANVAS_RFC1 RFC1 AAAAG AAGGG,ACAGG,AAAGG,AGGGC 400 AR Cerebellar ataxia, neuropathy, and vestibular areflexia syndrome -chr4 41745972 41746032 CCHS_PHOX2B PHOX2B GCN GCN 26 AD Congenital central hypoventilation syndrome +chr4 39348424 39348483 CANVAS_RFC1 RFC1 AAAAG AAGGG,ACAGG,AAAGG,CAGGG 400 AR Cerebellar ataxia, neuropathy, and vestibular areflexia syndrome +chr4 41745972 41746032 CCHS_PHOX2B PHOX2B GCN NGC 26 AD Congenital central hypoventilation syndrome chr4 159342526 159342618 FAME7_RAPGEF2 RAPGEF2 TTTTA TTTCA 60 AD Familial adult myoclonic epilepsy type 7 chr5 10356343 10356411 FAME3_MARCHF6 MARCHF6 TTTTA TTTCA 650 AD Familial adult myoclonic epilepsy type 3 -chr5 146878727 146878759 SCA12_PPP2R2B PPP2R2B GCT GCT 51 AD Spinocerebellar ataxia type 12 -chr5 177554489 177554531 OPDM_FAM193B FAM193B GCC GCC 194 AD Oculopharyngodistal myopathy -chr6 13328476 13328603 OPDM_TBC1D7 TBC1D7 GCC GCC 83 AD Oculopharyngodistal myopathy +chr5 146878727 146878759 SCA12_PPP2R2B PPP2R2B CTG CTG 51 AD Spinocerebellar ataxia type 12 +chr5 177554489 177554531 OPDM_FAM193B FAM193B CCG CCG 194 AD Oculopharyngodistal myopathy +chr6 13328476 13328603 OPDM_TBC1D7 TBC1D7 CCG CCG 83 AD Oculopharyngodistal myopathy chr6 16327633 16327724 SCA1_ATXN1 ATXN1 CTG CTG 39 AD Spinocerebellar ataxia type 1 chr6 45422750 45422801 CCD_RUNX2 RUNX2 GCN GCN 20 AD Cleidocranial dysplasia chr6 170561906 170562017 SCA17_TBP TBP CAG CAG 49 AD Spinocerebellar ataxia type 17 -chr7 27199678 27199732 HFG_HOXA13-III HOXA13 NGC NGC 22 AD Hand-foot-genital syndrome 3 -chr7 27199825 27199861 HFG_HOXA13-II HOXA13 NGC NGC 18 AD Hand-foot-genital syndrome 2 -chr7 27199924 27199966 HFG_HOXA13-I HOXA13 NGC NGC 22 AD Hand-foot-genital syndrome 1 -chr7 55887600 55887639 FRA7A_ZNF713 ZNF713 GCG GCG 450 AD Autism spectrum disorder associated with fragile site FRA7A -chr8 104588970 104588999 OPDM1_LRP12 LRP12 CGC CGC 85 AD Oculopharyngodistal myopathy type 1 +chr7 27199678 27199732 HFG_HOXA13-III HOXA13 GCN NGC 22 AD Hand-foot-genital syndrome 3 +chr7 27199825 27199861 HFG_HOXA13-II HOXA13 GCN NGC 18 AD Hand-foot-genital syndrome 2 +chr7 27199924 27199966 HFG_HOXA13-I HOXA13 GCN NGC 22 AD Hand-foot-genital syndrome 1 +chr7 55887600 55887639 FRA7A_ZNF713 ZNF713 CGG CGG 450 AD Autism spectrum disorder associated with fragile site FRA7A +chr8 104588970 104588999 OPDM1_LRP12 LRP12 CCG CCG 85 AD Oculopharyngodistal myopathy type 1 chr8 118366812 118366918 FAME1_SAMD12 SAMD12 TAAAA TGAAA 105 AD Familial adult myoclonic epilepsy type 1 -chr9 27573484 27573546 FTDALS1_C9orf72 C9orf72 GGCCCC GGCCCC 31 AD Frontotemporal dementia (FTD) and/or amyotrophic lateral sclerosis (ALS) -chr9 69037286 69037304 FRDA_FXN FXN GAA GAA 56 AR Friedreich ataxia -chr9 130681605 130681641 HSAN-VIII_PRDM12 PRDM12 GCC GCC 18 AR Hereditary sensory and autonomic neuropathy type VIII -chr9 133071177 133071737 MODY8_CEL CEL GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG None AD Maturity-Onset Diabetes of the Young Type 8 -chr10 79826383 79826404 OPML1_NUTM2B-AS1 NUTM2B-AS1 GGC GGC 161 AD Oculopharyngeal myopathy with leukoencephalopathy 1 +chr9 27573484 27573546 FTDALS1_C9orf72 C9orf72 GGCCCC CCCCGG 31 AD Frontotemporal dementia (FTD) and/or amyotrophic lateral sclerosis (ALS) +chr9 69037286 69037304 FRDA_FXN FXN GAA AAG 56 AR Friedreich ataxia +chr9 130681605 130681641 HSAN-VIII_PRDM12 PRDM12 CCG CCG 18 AR Hereditary sensory and autonomic neuropathy type VIII +chr9 133071177 133071737 MODY8_CEL CEL GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC None AD Maturity-Onset Diabetes of the Young Type 8 +chr10 79826383 79826404 OPML1_NUTM2B-AS1 NUTM2B-AS1 CGG CGG 161 AD Oculopharyngeal myopathy with leukoencephalopathy 1 chr11 119206289 119206323 JBS_CBL CBL CGG CGG 101 AD Jacobsen syndrome (FRAX11B fragile site) chr12 6936716 6936775 DRPLA_ATN1 ATN1 CAG CAG 48 AD Dentatorubral-Pallidoluysian Atrophy -chr12 50505001 50505024 FRA12A_DIP2B DIP2B GGC GGC 273 AD Intellectual developmental disorder, FRA12A type +chr12 50505001 50505024 FRA12A_DIP2B DIP2B CGG CGG 273 AD Intellectual developmental disorder, FRA12A type chr12 111598949 111599019 SCA2_ATXN2 ATXN2 CTG CTG 35 AD,AR Spinocerebellar ataxia type 2 -chr12 123533720 123533750 OPDM4_RILPL1 RILPL1 GGC GGC 120 AD Oculopharyngodistal myopathy type 4 +chr12 123533720 123533750 OPDM4_RILPL1 RILPL1 CGG CGG 120 AD Oculopharyngodistal myopathy type 4 chr13 70139383 70139429 SCA8_ATXN8OS ATXN8OS CTG CTG 71 AD Spinocerebellar ataxia type 8 chr13 99985448 99985494 HPE5_ZIC2 ZIC2 GCN GCN 25 AD Holoprosencephaly-5 -chr13 102161574 102161726 SCA27B_FGF14 FGF14 GAA GAA 320 AD Spinocerebellar ataxia 27B +chr13 102161574 102161726 SCA27B_FGF14 FGF14 GAA AAG 320 AD Spinocerebellar ataxia 27B chr14 23321472 23321503 OPMD_PABPN1 PABPN1 GCN GCN 12 AD,AR Oculopharyngeal muscular dystrophy chr14 92071010 92071052 SCA3_ATXN3 ATXN3 CTG CTG 60 AD Spinocerebellar ataxia type 3/Machado-Joseph disease -chr15 22786677 22786703 ALS1_NIPA1 NIPA1 GCG GCG 11 AD Amyotrophic lateral sclerosis +chr15 22786677 22786703 ALS1_NIPA1 NIPA1 CGG CGG 11 AD Amyotrophic lateral sclerosis chr15 34419425 34419451 aFTLD-U_GOLGA8A GOLGA8A TTTC CT 190 Atypical frontotemporal lobar degeneration with ubiquitinated inclusions (aFTLD-U) -chr15 88569433 88569452 CHNG3_MIR7-2 MIR7-2 TTTG TTTG 3 AD Nongoitrous congenital hypothyroidism-3 -chr15 89333588 89333629 CPEO_POLG POLG GCT GCT None Progressive external ophthalmoplegia, Parkinson's disease -chr16 17470907 17470922 DBQD2_XYLT1 XYLT1 GCC GCC 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 +chr15 88569433 88569452 CHNG3_MIR7-2 MIR7-2 TTTG GTTT 3 AD Nongoitrous congenital hypothyroidism-3 +chr15 89333588 89333629 CPEO_POLG POLG CTG CTG None Progressive external ophthalmoplegia, Parkinson's disease +chr16 17470907 17470922 DBQD2_XYLT1 XYLT1 CCG CCG 72 AR Baratela-Scott Syndrome/Desbuquois dysplasia 2 chr16 24613438 24613532 FAME6_TNRC6A TNRC6A TTTTA TTTCA 1100 AD Familial adult myoclonic epilepsy type 6 -chr16 66490396 66490466 SCA31_BEAN1 BEAN1 AATAA TGGAA,TAGAA 110 AD Spinocerebellar ataxia type 31 +chr16 66490396 66490466 SCA31_BEAN1 BEAN1 AATAA AATGG,AATAG 110 AD Spinocerebellar ataxia type 31 chr16 67842862 67842950 SCA_THAP11 THAP11 CAG CAG 45 AD Spinocerebellar ataxia 51 -chr16 72787694 72787758 SCA4_ZFHX3 ZFHX3 GCC GCC 46 AD Spinocerebellar ataxia 4 +chr16 72787694 72787758 SCA4_ZFHX3 ZFHX3 CCG CCG 46 AD Spinocerebellar ataxia 4 chr16 87604282 87604329 HDL2_JPH3 JPH3 CTG CTG 40 AD Huntington disease-like 2 chr17 17808358 17808460 FAME8_RAI1 RAI1 TTTTA TTTCA 9 AD Familial adult myoclonic epilepsy type 8 -chr17 80147009 80147139 RCPS_EIF4A3 EIF4A3 CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA 14 AR Richieri-Costa-Pereira syndrome +chr17 80147009 80147139 RCPS_EIF4A3 EIF4A3 CCTCGCTGTGCCGCTGCCGA GCCGCTGCCGACCTCGCTGT 14 AR Richieri-Costa-Pereira syndrome chr18 666891 667632 CPUM_TYMS TYMS GATGGT GATGGT 210 AR Congenital Progressive Universal Melanosis chr18 55586153 55586229 FECD3_TCF4 TCF4 CAG CAG 51 AD Fuchs endothelial corneal dystrophy 3 -chr19 4510727 4513659 MRUPAV_PLIN4 PLIN4 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC 37 AD Myopathy with Rimmed Ubiquitin-Positive Autophagic Vacuolation, PLIN4-Related Myopathy +chr19 4510727 4513659 MRUPAV_PLIN4 PLIN4 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT 37 AD Myopathy with Rimmed Ubiquitin-Positive Autophagic Vacuolation, PLIN4-Related Myopathy chr19 13207858 13207898 SCA6_CACNA1A CACNA1A CTG CTG 21 AD Spinocerebellar ataxia type 6 chr19 14496041 14496075 OPDM2_GIPC1 GIPC1 CCG CCG 73 AD Oculopharyngodistal myopathy type 2 -chr19 18786034 18786050 EDM1-PSACH_COMP COMP GTC GTC 6 AD Multiple epiphyseal dysplasia, Pseudoachondroplasia +chr19 18786034 18786050 EDM1-PSACH_COMP COMP GTC CGT 6 AD Multiple epiphyseal dysplasia, Pseudoachondroplasia chr19 45770204 45770266 DM1_DMPK DMPK CAG CAG 50 AD Myotonic dystrophy type 1 -chr20 2652732 2652757 SCA36_NOP56 NOP56 GGCCTG GGCCTG 650 AD Spinocerebellar ataxia type 36 -chr20 4699397 4699493 CJD_PRNP PRNP GGTGGTGGCTGGGGGCAGCCTCAT CCTCATGGTGGTGGCTGGGGGCAG 5 AD Creutzfeldt-Jakob disease and Gerstmann-Straussler-Schneiker syndrome +chr20 2652732 2652757 SCA36_NOP56 NOP56 GGCCTG CCTGGG 650 AD Spinocerebellar ataxia type 36 +chr20 4699397 4699493 CJD_PRNP PRNP GGTGGTGGCTGGGGGCAGCCTCAT AGCCTCATGGTGGTGGCTGGGGGC 5 AD Creutzfeldt-Jakob disease and Gerstmann-Straussler-Schneiker syndrome chr21 43776442 43776479 EPM1_CSTB CSTB CGCGGGGCGGGG CGCGGGGCGGGG 30 AR Progressive Myoclonic Epilepsy Type 1 (EPM1), a.k.a Unverricht-Lundborg Disease (ULD) chr22 19766762 19766807 TOF_TBX1 TBX1 GCN GCN 25 AD Tetralogy of Fallot chr22 38317282 38317375 EPM_CSNK1E CSNK1E CCG CCG 745 AR Progressive Myoclonic Epilepsy and Developmental and Epileptic Encephalopathy chr22 45795354 45795424 SCA10_ATXN10 ATXN10 ATTCT ATTCT 800 AD Spinocerebellar ataxia type 10 -chrX 25013529 25013565 PRTS_ARX ARX NGC NGC 20 XR Partington syndrome -chrX 25013649 25013697 EIEE1_ARX ARX NGC NGC 17 XR Early-infantile epileptic encephalopathy -chrX 31284557 31284605 DMD_DMD DMD TTC TTC 59 XR Duchenne muscular dystrophy -chrX 67545316 67545419 SBMA_AR AR GCA GCA 38 XR Spinal and bulbar muscular atrophy, Kennedy Disease +chrX 25013529 25013565 PRTS_ARX ARX GCN NGC 20 XR Partington syndrome +chrX 25013649 25013697 EIEE1_ARX ARX GCN NGC 17 XR Early-infantile epileptic encephalopathy +chrX 31284557 31284605 DMD_DMD DMD TTC CTT 59 XR Duchenne muscular dystrophy +chrX 67545316 67545419 SBMA_AR AR CAG CAG 38 XR Spinal and bulbar muscular atrophy, Kennedy Disease chrX 71453054 71453131 XDP_TAF1 TAF1 AGAGGG AGAGGG 35 XR X-linked dystonia-parkinsonism (XDP) a.k.a. Dystonia 3, torsion, X-linked (DYT3) chrX 137566826 137566856 VACTERLX_ZIC3 ZIC3 GCN GCN 12 XR X-linked VACTERL syndrome -chrX 140504316 140504361 XLID_SOX3 SOX3 NGC NGC 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) +chrX 140504316 140504361 XLID_SOX3 SOX3 GCN NGC 22 XR X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX) chrX 147912049 147912111 FXS_FMR1 FMR1 CGG CGG 201 XD Fragile X syndrome (FXS), fragile X-associated tremor/ataxia syndrome (FXTAS), and fragile X-associated primary ovarian insufficiency FXPOI/POF1 -chrX 148500604 148500753 FRAXE_AFF2 AFF2 GCC GCC 201 XR Intellectual developmental disorder, Fragile X intellectual disability +chrX 148500604 148500753 FRAXE_AFF2 AFF2 CCG CCG 201 XR Intellectual developmental disorder, Fragile X intellectual disability diff --git a/data/catalogs/STRchive-disease-loci.hg38.longTR.bed b/data/catalogs/STRchive-disease-loci.hg38.longTR.bed index 9000993c..49ae12bc 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.longTR.bed +++ b/data/catalogs/STRchive-disease-loci.hg38.longTR.bed @@ -1,80 +1,80 @@ -chr1 1435799 1435818 GGCGCGGAGC HMNR7_VWA1 -chr1 57367044 57367121 GAAAT,AAAAT SCA37_DAB1 -chr1 94418422 94418444 GCC OPDM5_ABCD3 -chr1 149390803 149390842 GGC NIID_NOTCH2NLC -chr1 155188506 155192239 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ADTKD_MUC1 -chr1 156591766 156591783 GGGCC NME_NAXE -chr2 96197067 96197124 AAATG,AAAAT FAME2_STARD7 -chr2 100104799 100104824 GCC FRA2A_AFF3 +chr1 1435799 1435818 AGCGGCGCGG,GGCGCGGAGC HMNR7_VWA1 +chr1 57367044 57367121 TGAAA,AAAAT SCA37_DAB1 +chr1 94418422 94418444 CCG OPDM5_ABCD3 +chr1 149390803 149390842 CGG NIID_NOTCH2NLC +chr1 155188506 155192239 ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG ADTKD_MUC1 +chr1 156591766 156591783 CCGGG,GGGCC NME_NAXE +chr2 96197067 96197124 TGAAA,AAAAT FAME2_STARD7 +chr2 100104799 100104824 CCG FRA2A_AFF3 chr2 176093059 176093103 GCN SD5_HOXD13 -chr2 190880873 190880920 GCA GDPAG_GLS +chr2 190880873 190880920 CAG GDPAG_GLS chr3 63912685 63912715 CAG SCA7_ATXN7 chr3 129172577 129172656 CAGG DM2_CNBP -chr3 138946020 138946062 NGC BPES_FOXL2 +chr3 138946020 138946062 NGC,GCN BPES_FOXL2 chr3 183712188 183712226 TTTCA,TTTTA FAME4_YEATS2 chr4 3074877 3074933 CAG HD_HTT -chr4 39348425 39348483 AAGGG,ACAGG,AAAGG,AGGGC,AAAAG,AAAGGG CANVAS_RFC1 -chr4 41745973 41746032 GCN CCHS_PHOX2B +chr4 39348425 39348483 AAGGG,ACAGG,AAAGG,CAGGG,AAAAG,AAAGGG CANVAS_RFC1 +chr4 41745973 41746032 NGC,GCN CCHS_PHOX2B chr4 159342527 159342618 TTTCA,TTTTA FAME7_RAPGEF2 chr5 10356344 10356411 TTTCA,TTTTA FAME3_MARCHF6 -chr5 146878728 146878759 GCT SCA12_PPP2R2B -chr5 177554490 177554531 GCC OPDM_FAM193B -chr6 13328477 13328603 GCC OPDM_TBC1D7 +chr5 146878728 146878759 CTG SCA12_PPP2R2B +chr5 177554490 177554531 CCG OPDM_FAM193B +chr6 13328477 13328603 CCG OPDM_TBC1D7 chr6 16327634 16327724 CTG SCA1_ATXN1 chr6 45422751 45422801 GCN CCD_RUNX2 chr6 170561907 170562017 CAG SCA17_TBP -chr7 27199679 27199732 NGC HFG_HOXA13-III -chr7 27199826 27199861 NGC HFG_HOXA13-II -chr7 27199925 27199966 NGC HFG_HOXA13-I -chr7 55887601 55887639 GCG FRA7A_ZNF713 -chr8 104588971 104588999 CGC OPDM1_LRP12 +chr7 27199679 27199732 NGC,GCN HFG_HOXA13-III +chr7 27199826 27199861 NGC,GCN HFG_HOXA13-II +chr7 27199925 27199966 NGC,GCN HFG_HOXA13-I +chr7 55887601 55887639 CGG FRA7A_ZNF713 +chr8 104588971 104588999 CCG OPDM1_LRP12 chr8 118366813 118366918 TGAAA,TAAAA FAME1_SAMD12 -chr9 27573485 27573546 GGCCCC FTDALS1_C9orf72 -chr9 69037287 69037304 GAA FRDA_FXN -chr9 130681606 130681641 GCC HSAN-VIII_PRDM12 -chr9 133071178 133071737 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG MODY8_CEL -chr10 79826384 79826404 GGC OPML1_NUTM2B-AS1 +chr9 27573485 27573546 CCCCGG,GGCCCC FTDALS1_C9orf72 +chr9 69037287 69037304 AAG,GAA FRDA_FXN +chr9 130681606 130681641 CCG HSAN-VIII_PRDM12 +chr9 133071178 133071737 ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG MODY8_CEL +chr10 79826384 79826404 CGG OPML1_NUTM2B-AS1 chr11 119206290 119206323 CGG JBS_CBL chr12 6936717 6936775 CAG DRPLA_ATN1 -chr12 50505002 50505024 GGC FRA12A_DIP2B +chr12 50505002 50505024 CGG FRA12A_DIP2B chr12 111598950 111599019 CTG SCA2_ATXN2 -chr12 123533721 123533750 GGC OPDM4_RILPL1 +chr12 123533721 123533750 CGG OPDM4_RILPL1 chr13 70139384 70139429 CTG SCA8_ATXN8OS chr13 99985449 99985494 GCN HPE5_ZIC2 -chr13 102161575 102161726 GAA,GGA,GCA SCA27B_FGF14 +chr13 102161575 102161726 AAG,AGG,CAG,GAA SCA27B_FGF14 chr14 23321473 23321503 GCN OPMD_PABPN1 chr14 92071011 92071052 CTG SCA3_ATXN3 -chr15 22786678 22786703 GCG ALS1_NIPA1 +chr15 22786678 22786703 CGG ALS1_NIPA1 chr15 34419426 34419451 CT,CCTT,CCCTCT,TTTC aFTLD-U_GOLGA8A -chr15 88569434 88569452 TTTG CHNG3_MIR7-2 -chr15 89333589 89333629 GCT CPEO_POLG -chr16 17470908 17470922 GCC DBQD2_XYLT1 +chr15 88569434 88569452 GTTT,TTTG CHNG3_MIR7-2 +chr15 89333589 89333629 CTG CPEO_POLG +chr16 17470908 17470922 CCG DBQD2_XYLT1 chr16 24613439 24613532 TTTCA,TTTTA FAME6_TNRC6A -chr16 66490397 66490466 TGGAA,TAGAA,AATAA SCA31_BEAN1 +chr16 66490397 66490466 AATGG,AATAG,AATAA SCA31_BEAN1 chr16 67842863 67842950 CAG SCA_THAP11 -chr16 72787695 72787758 GCC SCA4_ZFHX3 +chr16 72787695 72787758 CCG SCA4_ZFHX3 chr16 87604283 87604329 CTG HDL2_JPH3 -chr17 17808359 17808460 TTTCA,TTTTA FAME8_RAI1 -chr17 80147010 80147139 CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 +chr17 17808359 17808460 TTTCA,ATTTT,TTTTA FAME8_RAI1 +chr17 80147010 80147139 GCCGCTGCCGACCTCGCTGT,CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 chr18 666892 667632 GATGGT CPUM_TYMS chr18 55586154 55586229 CAG FECD3_TCF4 -chr19 4510728 4513659 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC MRUPAV_PLIN4 +chr19 4510728 4513659 GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT,TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC MRUPAV_PLIN4 chr19 13207859 13207898 CTG SCA6_CACNA1A chr19 14496042 14496075 CCG OPDM2_GIPC1 -chr19 18786035 18786050 GTC EDM1-PSACH_COMP +chr19 18786035 18786050 CGT,GTC EDM1-PSACH_COMP chr19 45770205 45770266 CAG DM1_DMPK -chr20 2652733 2652757 GGCCTG SCA36_NOP56 -chr20 4699398 4699493 CCTCATGGTGGTGGCTGGGGGCAG,GGTGGTGGCTGGGGGCAGCCTCAT CJD_PRNP +chr20 2652733 2652757 CCTGGG,GGCCTG SCA36_NOP56 +chr20 4699398 4699493 AGCCTCATGGTGGTGGCTGGGGGC,GGTGGTGGCTGGGGGCAGCCTCAT CJD_PRNP chr21 43776443 43776479 CGCGGGGCGGGG EPM1_CSTB chr22 19766763 19766807 GCN TOF_TBX1 chr22 38317283 38317375 CCG EPM_CSNK1E chr22 45795355 45795424 ATTCT SCA10_ATXN10 -chrX 25013530 25013565 NGC PRTS_ARX -chrX 25013650 25013697 NGC EIEE1_ARX -chrX 31284558 31284605 TTC DMD_DMD -chrX 67545317 67545419 GCA SBMA_AR +chrX 25013530 25013565 NGC,GCN PRTS_ARX +chrX 25013650 25013697 NGC,GCN EIEE1_ARX +chrX 31284558 31284605 CTT,TTC DMD_DMD +chrX 67545317 67545419 CAG SBMA_AR chrX 71453055 71453131 AGAGGG XDP_TAF1 chrX 137566827 137566856 GCN VACTERLX_ZIC3 -chrX 140504317 140504361 NGC XLID_SOX3 +chrX 140504317 140504361 NGC,GCN XLID_SOX3 chrX 147912050 147912111 CGG FXS_FMR1 -chrX 148500605 148500753 GCC FRAXE_AFF2 +chrX 148500605 148500753 CCG FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.hg38.straglr.bed b/data/catalogs/STRchive-disease-loci.hg38.straglr.bed index 0e501fb8..46668cc8 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.straglr.bed +++ b/data/catalogs/STRchive-disease-loci.hg38.straglr.bed @@ -1,12 +1,12 @@ -chr1 1435798 1435818 GGCGCGGAGC HMNR7_VWA1 HMNR7_VWA1 +chr1 1435798 1435818 AGCGGCGCGG HMNR7_VWA1 HMNR7_VWA1 chr1 57367078 57367121 GAAAT SCA37_DAB1 SCA37_DAB1 -chr1 94418421 94418444 GCC OPDM5_ABCD3 OPDM5_ABCD3 -chr1 149390802 149390842 GGC NIID_NOTCH2NLC NIID_NOTCH2NLC -chr1 156591765 156591783 GGGCC NME_NAXE NME_NAXE +chr1 94418421 94418444 CCG OPDM5_ABCD3 OPDM5_ABCD3 +chr1 149390802 149390842 CGG NIID_NOTCH2NLC NIID_NOTCH2NLC +chr1 156591765 156591783 CCGGG NME_NAXE NME_NAXE chr2 96197066 96197069 AAATG FAME2_STARD7 FAME2_STARD7 -chr2 100104798 100104824 GCC FRA2A_AFF3 FRA2A_AFF3 +chr2 100104798 100104824 CCG FRA2A_AFF3 FRA2A_AFF3 chr2 176093058 176093103 GCN SD5_HOXD13 SD5_HOXD13 -chr2 190880872 190880920 GCA GDPAG_GLS GDPAG_GLS +chr2 190880872 190880920 CAG GDPAG_GLS GDPAG_GLS chr3 63912684 63912715 CAG SCA7_ATXN7 SCA7_ATXN7 chr3 63912715 63912727 CCG SCA7_ATXN7 SCA7_ATXN7_CCG chr3 129172576 129172656 CAGG DM2_CNBP DM2_CNBP @@ -17,57 +17,57 @@ chr3 183712222 183712226 TTTCA FAME4_YEATS2 FAME4_YEATS2 chr4 3074876 3074927 CAG HD_HTT HD_HTT chr4 3074933 3074969 CCG HD_HTT HD_HTT_CCG chr4 39348479 39348483 AAGGG CANVAS_RFC1 CANVAS_RFC1 -chr4 41745972 41746032 GCN CCHS_PHOX2B CCHS_PHOX2B +chr4 41745972 41746032 NGC CCHS_PHOX2B CCHS_PHOX2B chr4 159342611 159342618 TTTCA FAME7_RAPGEF2 FAME7_RAPGEF2 chr5 10356403 10356411 TTTCA FAME3_MARCHF6 FAME3_MARCHF6 -chr5 146878727 146878759 GCT SCA12_PPP2R2B SCA12_PPP2R2B -chr5 177554489 177554531 GCC OPDM_FAM193B OPDM_FAM193B -chr6 13328476 13328603 GCC OPDM_TBC1D7 OPDM_TBC1D7 +chr5 146878727 146878759 CTG SCA12_PPP2R2B SCA12_PPP2R2B +chr5 177554489 177554531 CCG OPDM_FAM193B OPDM_FAM193B +chr6 13328476 13328603 CCG OPDM_TBC1D7 OPDM_TBC1D7 chr6 16327633 16327724 CTG SCA1_ATXN1 SCA1_ATXN1 chr6 45422750 45422801 GCN CCD_RUNX2 CCD_RUNX2 chr6 170561906 170562017 CAG SCA17_TBP SCA17_TBP chr7 27199678 27199732 NGC HFG_HOXA13-III HFG_HOXA13-III chr7 27199825 27199861 NGC HFG_HOXA13-II HFG_HOXA13-II chr7 27199924 27199966 NGC HFG_HOXA13-I HFG_HOXA13-I -chr7 55887600 55887639 GCG FRA7A_ZNF713 FRA7A_ZNF713 -chr8 104588970 104588999 CGC OPDM1_LRP12 OPDM1_LRP12 +chr7 55887600 55887639 CGG FRA7A_ZNF713 FRA7A_ZNF713 +chr8 104588970 104588999 CCG OPDM1_LRP12 OPDM1_LRP12 chr8 118366912 118366918 TGAAA FAME1_SAMD12 FAME1_SAMD12 -chr9 27573484 27573546 GGCCCC FTDALS1_C9orf72 FTDALS1_C9orf72 +chr9 27573484 27573546 CCCCGG FTDALS1_C9orf72 FTDALS1_C9orf72 chr9 69037270 69037286 A FRDA_FXN FRDA_FXN_A chr9 69037286 69037304 GAA FRDA_FXN FRDA_FXN -chr9 130681605 130681641 GCC HSAN-VIII_PRDM12 HSAN-VIII_PRDM12 -chr10 79826383 79826404 GGC OPML1_NUTM2B-AS1 OPML1_NUTM2B-AS1 +chr9 130681605 130681641 CCG HSAN-VIII_PRDM12 HSAN-VIII_PRDM12 +chr10 79826383 79826404 CGG OPML1_NUTM2B-AS1 OPML1_NUTM2B-AS1 chr11 119206289 119206323 CGG JBS_CBL JBS_CBL chr12 6936716 6936775 CAG DRPLA_ATN1 DRPLA_ATN1 -chr12 50505001 50505024 GGC FRA12A_DIP2B FRA12A_DIP2B +chr12 50505001 50505024 CGG FRA12A_DIP2B FRA12A_DIP2B chr12 111598949 111599019 CTG SCA2_ATXN2 SCA2_ATXN2 -chr12 123533720 123533750 GGC OPDM4_RILPL1 OPDM4_RILPL1 +chr12 123533720 123533750 CGG OPDM4_RILPL1 OPDM4_RILPL1 chr13 70139353 70139383 CTA SCA8_ATXN8OS SCA8_ATXN8OS_CTA chr13 70139383 70139429 CTG SCA8_ATXN8OS SCA8_ATXN8OS chr13 99985448 99985494 GCN HPE5_ZIC2 HPE5_ZIC2 -chr13 102161574 102161726 GAA SCA27B_FGF14 SCA27B_FGF14 +chr13 102161574 102161726 AAG SCA27B_FGF14 SCA27B_FGF14 chr14 23321472 23321503 GCN OPMD_PABPN1 OPMD_PABPN1 chr14 92071010 92071052 CTG SCA3_ATXN3 SCA3_ATXN3 -chr15 22786677 22786703 GCG ALS1_NIPA1 ALS1_NIPA1 +chr15 22786677 22786703 CGG ALS1_NIPA1 ALS1_NIPA1 chr15 34419425 34419451 CT aFTLD-U_GOLGA8A aFTLD-U_GOLGA8A -chr15 88569433 88569452 TTTG CHNG3_MIR7-2 CHNG3_MIR7-2 +chr15 88569433 88569452 GTTT CHNG3_MIR7-2 CHNG3_MIR7-2 chr15 89333579 89333585 GCT CPEO_POLG CPEO_POLG_GCT chr15 89333585 89333588 GTT CPEO_POLG CPEO_POLG_GTT chr15 89333588 89333629 GCT CPEO_POLG CPEO_POLG chr16 17470907 17470922 GCC DBQD2_XYLT1 DBQD2_XYLT1 chr16 24613488 24613532 TTTCA FAME6_TNRC6A FAME6_TNRC6A -chr16 66490396 66490466 TGGAA SCA31_BEAN1 SCA31_BEAN1 +chr16 66490396 66490466 AATGG SCA31_BEAN1 SCA31_BEAN1 chr16 67842862 67842950 CAG SCA_THAP11 SCA_THAP11 -chr16 72787694 72787758 GCC SCA4_ZFHX3 SCA4_ZFHX3 +chr16 72787694 72787758 CCG SCA4_ZFHX3 SCA4_ZFHX3 chr16 87604282 87604329 CTG HDL2_JPH3 HDL2_JPH3 chr17 17808448 17808460 TTTCA FAME8_RAI1 FAME8_RAI1 -chr17 80147009 80147139 CCTCGCTGTGCCGCTGCCGA RCPS_EIF4A3 RCPS_EIF4A3 +chr17 80147009 80147139 GCCGCTGCCGACCTCGCTGT RCPS_EIF4A3 RCPS_EIF4A3 chr18 666891 667632 GATGGT CPUM_TYMS CPUM_TYMS chr18 55586153 55586229 CAG FECD3_TCF4 FECD3_TCF4 -chr19 4510727 4513659 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC MRUPAV_PLIN4 MRUPAV_PLIN4 +chr19 4510727 4513659 GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT MRUPAV_PLIN4 MRUPAV_PLIN4 chr19 13207858 13207898 CTG SCA6_CACNA1A SCA6_CACNA1A chr19 14496041 14496075 CCG OPDM2_GIPC1 OPDM2_GIPC1 -chr19 18786034 18786050 GTC EDM1-PSACH_COMP EDM1-PSACH_COMP +chr19 18786034 18786050 CGT EDM1-PSACH_COMP EDM1-PSACH_COMP chr19 45770204 45770266 CAG DM1_DMPK DM1_DMPK chr20 2652732 2652757 GGCCTG SCA36_NOP56 SCA36_NOP56 chr20 2652757 2652775 CGCCTG SCA36_NOP56 SCA36_NOP56_CGCCTG @@ -81,9 +81,9 @@ chrX 25013529 25013565 NGC PRTS_ARX PRTS_ARX chrX 25013649 25013697 NGC EIEE1_ARX EIEE1_ARX chrX 31284557 31284605 TTC DMD_DMD DMD_DMD chrX 31284605 31284613 T DMD_DMD DMD_DMD_T -chrX 67545316 67545419 GCA SBMA_AR SBMA_AR +chrX 67545316 67545419 CAG SBMA_AR SBMA_AR chrX 71453054 71453131 AGAGGG XDP_TAF1 XDP_TAF1 chrX 137566826 137566856 GCN VACTERLX_ZIC3 VACTERLX_ZIC3 chrX 140504316 140504361 NGC XLID_SOX3 XLID_SOX3 chrX 147912049 147912111 CGG FXS_FMR1 FXS_FMR1 -chrX 148500604 148500753 GCC FRAXE_AFF2 FRAXE_AFF2 +chrX 148500604 148500753 CCG FRAXE_AFF2 FRAXE_AFF2 diff --git a/data/catalogs/STRchive-disease-loci.hg38.stranger.json b/data/catalogs/STRchive-disease-loci.hg38.stranger.json index 0702fe80..530a264f 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.stranger.json +++ b/data/catalogs/STRchive-disease-loci.hg38.stranger.json @@ -2,11 +2,11 @@ { "LocusId": "HMNR7_VWA1", "ReferenceRegion": "chr1:1435798-1435818", - "LocusStructure": "(GGCGCGGAGC)*", + "LocusStructure": "(AGCGGCGCGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GGCGCGGAGC", + "DisplayRU": "AGCGGCGCGG", "Disease": "HMNR7", "NormalMax": 2, "PathologicMin": 3, @@ -21,7 +21,7 @@ "PathologicRegion": "chr1:57367078-57367121", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GAAAT", + "DisplayRU": "TGAAA", "Disease": "SCA37", "NormalMax": 30, "PathologicMin": 31, @@ -30,11 +30,11 @@ { "LocusId": "OPDM5_ABCD3", "ReferenceRegion": "chr1:94418421-94418444", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "OPDM5", "NormalMax": 44, "PathologicMin": 118, @@ -43,11 +43,11 @@ { "LocusId": "NIID_NOTCH2NLC", "ReferenceRegion": "chr1:149390802-149390842", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "NIID", "NormalMax": 37, "PathologicMin": 66, @@ -56,11 +56,11 @@ { "LocusId": "NME_NAXE", "ReferenceRegion": "chr1:156591765-156591783", - "LocusStructure": "(GGGCC)*", + "LocusStructure": "(CCGGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GGGCC", + "DisplayRU": "CCGGG", "Disease": "NME", "NormalMax": 7, "PathologicMin": 200, @@ -75,7 +75,7 @@ "PathologicRegion": "chr2:96197066-96197069", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "AAATG", + "DisplayRU": "TGAAA", "Disease": "FAME2", "NormalMax": 273, "PathologicMin": 274, @@ -84,11 +84,11 @@ { "LocusId": "FRA2A_AFF3", "ReferenceRegion": "chr2:100104798-100104824", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "FRA2A", "NormalMax": 20, "PathologicMin": 300, @@ -110,11 +110,11 @@ { "LocusId": "GDPAG_GLS", "ReferenceRegion": "chr2:190880872-190880920", - "LocusStructure": "(GCA)*", + "LocusStructure": "(CAG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GCA", + "DisplayRU": "CAG", "Disease": "GDPAG", "NormalMax": 38, "PathologicMin": 680, @@ -211,11 +211,11 @@ { "LocusId": "CCHS_PHOX2B", "ReferenceRegion": "chr4:41745972-41746032", - "LocusStructure": "(GCN)*", + "LocusStructure": "(NGC)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCN", + "DisplayRU": "NGC", "Disease": "CCHS", "NormalMax": 20, "PathologicMin": 26, @@ -254,11 +254,11 @@ { "LocusId": "SCA12_PPP2R2B", "ReferenceRegion": "chr5:146878727-146878759", - "LocusStructure": "(GCT)*", + "LocusStructure": "(CTG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCT", + "DisplayRU": "CTG", "Disease": "SCA12", "NormalMax": 32, "PathologicMin": 51, @@ -267,11 +267,11 @@ { "LocusId": "OPDM_FAM193B", "ReferenceRegion": "chr5:177554489-177554531", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "OPDM", "NormalMax": 50, "PathologicMin": 194, @@ -280,11 +280,11 @@ { "LocusId": "OPDM_TBC1D7", "ReferenceRegion": "chr6:13328476-13328603", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "OPDM", "NormalMax": 60, "PathologicMin": 83, @@ -371,11 +371,11 @@ { "LocusId": "FRA7A_ZNF713", "ReferenceRegion": "chr7:55887600-55887639", - "LocusStructure": "(GCG)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCG", + "DisplayRU": "CGG", "Disease": "FRA7A", "NormalMax": 22, "PathologicMin": 450, @@ -384,11 +384,11 @@ { "LocusId": "OPDM1_LRP12", "ReferenceRegion": "chr8:104588970-104588999", - "LocusStructure": "(CGC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "CGC", + "DisplayRU": "CCG", "Disease": "OPDM1", "NormalMax": 45, "PathologicMin": 85, @@ -412,11 +412,11 @@ { "LocusId": "FTDALS1_C9orf72", "ReferenceRegion": "chr9:27573484-27573546", - "LocusStructure": "(GGCCCC)*", + "LocusStructure": "(CCCCGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGCCCC", + "DisplayRU": "CCCCGG", "Disease": "FTDALS1", "NormalMax": 23, "PathologicMin": 31, @@ -431,7 +431,7 @@ "PathologicRegion": "chr9:69037286-69037304", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GAA", + "DisplayRU": "AAG", "Disease": "FRDA", "NormalMax": 33, "PathologicMin": 56, @@ -440,11 +440,11 @@ { "LocusId": "HSAN-VIII_PRDM12", "ReferenceRegion": "chr9:130681605-130681641", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "HSAN VIII", "NormalMax": 14, "PathologicMin": 18, @@ -453,11 +453,11 @@ { "LocusId": "OPML1_NUTM2B-AS1", "ReferenceRegion": "chr10:79826383-79826404", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "OPML1", "NormalMax": 16, "PathologicMin": 161, @@ -492,11 +492,11 @@ { "LocusId": "FRA12A_DIP2B", "ReferenceRegion": "chr12:50505001-50505024", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "FRA12A", "NormalMax": 23, "PathologicMin": 273, @@ -518,11 +518,11 @@ { "LocusId": "OPDM4_RILPL1", "ReferenceRegion": "chr12:123533720-123533750", - "LocusStructure": "(GGC)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGC", + "DisplayRU": "CGG", "Disease": "OPDM4", "NormalMax": 16, "PathologicMin": 120, @@ -559,11 +559,11 @@ { "LocusId": "SCA27B_FGF14", "ReferenceRegion": "chr13:102161574-102161726", - "LocusStructure": "(GAA)*", + "LocusStructure": "(AAG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GAA", + "DisplayRU": "AAG", "Disease": "SCA27B", "NormalMax": 179, "PathologicMin": 320, @@ -598,11 +598,11 @@ { "LocusId": "ALS1_NIPA1", "ReferenceRegion": "chr15:22786677-22786703", - "LocusStructure": "(GCG)*", + "LocusStructure": "(CGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCG", + "DisplayRU": "CGG", "Disease": "ALS1", "NormalMax": 10, "PathologicMin": 11, @@ -626,11 +626,11 @@ { "LocusId": "CHNG3_MIR7-2", "ReferenceRegion": "chr15:88569433-88569452", - "LocusStructure": "(TTTG)*", + "LocusStructure": "(GTTT)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TTTG", + "DisplayRU": "GTTT", "Disease": "CHNG3", "NormalMax": 4, "PathologicMin": 5, @@ -645,7 +645,7 @@ "PathologicRegion": "chr15:89333588-89333629", "HGNCId": null, "InheritanceMode": [], - "DisplayRU": "GCT", + "DisplayRU": "CTG", "Disease": "CPEO", "NormalMax": 10, "PathologicMin": 11, @@ -660,7 +660,7 @@ "PathologicRegion": "chr16:17470907-17470922", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "DBQD2, BSS", "NormalMax": 20, "PathologicMin": 72, @@ -684,11 +684,11 @@ { "LocusId": "SCA31_BEAN1", "ReferenceRegion": "chr16:66490396-66490466", - "LocusStructure": "(TGGAA)*", + "LocusStructure": "(AATGG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TGGAA", + "DisplayRU": "AATGG", "Disease": "SCA31", "NormalMax": 109, "PathologicMin": 110, @@ -710,11 +710,11 @@ { "LocusId": "SCA4_ZFHX3", "ReferenceRegion": "chr16:72787694-72787758", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "SCA4", "NormalMax": 26, "PathologicMin": 46, @@ -751,11 +751,11 @@ { "LocusId": "RCPS_EIF4A3", "ReferenceRegion": "chr17:80147009-80147139", - "LocusStructure": "(CCTCGCTGTGCCGCTGCCGA)*", + "LocusStructure": "(GCCGCTGCCGACCTCGCTGT)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AR"], - "DisplayRU": "CCTCGCTGTGCCGCTGCCGA", + "DisplayRU": "GCCGCTGCCGACCTCGCTGT", "Disease": "RCPS", "NormalMax": 12, "PathologicMin": 14, @@ -790,11 +790,11 @@ { "LocusId": "MRUPAV_PLIN4", "ReferenceRegion": "chr19:4510727-4513659", - "LocusStructure": "(TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC)*", + "LocusStructure": "(GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC", + "DisplayRU": "GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT", "Disease": "MRUPAV", "NormalMax": 31, "PathologicMin": 37, @@ -829,11 +829,11 @@ { "LocusId": "EDM1-PSACH_COMP", "ReferenceRegion": "chr19:18786034-18786050", - "LocusStructure": "(GTC)*", + "LocusStructure": "(CGT)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GTC", + "DisplayRU": "CGT", "Disease": "EDM1, PSACH", "NormalMax": 5, "PathologicMin": 6, @@ -861,7 +861,7 @@ "PathologicRegion": "chr20:2652732-2652757", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "GGCCTG", + "DisplayRU": "CCTGGG", "Disease": "SCA36", "NormalMax": 14, "PathologicMin": 650, @@ -876,7 +876,7 @@ "PathologicRegion": "chr20:4699397-4699493", "HGNCId": null, "InheritanceMode": ["AD"], - "DisplayRU": "CCTCATGGTGGTGGCTGGGGGCAG", + "DisplayRU": "AGCCTCATGGTGGTGGCTGGGGGC", "Disease": "CJD", "NormalMax": 4, "PathologicMin": 5, @@ -971,7 +971,7 @@ "PathologicRegion": "chrX:31284557-31284605", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "TTC", + "DisplayRU": "CTT", "Disease": "DMD", "NormalMax": 33, "PathologicMin": 59, @@ -980,11 +980,11 @@ { "LocusId": "SBMA_AR", "ReferenceRegion": "chrX:67545316-67545419", - "LocusStructure": "(GCA)*", + "LocusStructure": "(CAG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "GCA", + "DisplayRU": "CAG", "Disease": "SBMA", "NormalMax": 34, "PathologicMin": 38, @@ -1045,11 +1045,11 @@ { "LocusId": "FRAXE_AFF2", "ReferenceRegion": "chrX:148500604-148500753", - "LocusStructure": "(GCC)*", + "LocusStructure": "(CCG)*", "VariantType": "Repeat", "HGNCId": null, "InheritanceMode": ["XR"], - "DisplayRU": "GCC", + "DisplayRU": "CCG", "Disease": "FRAXE", "NormalMax": 39, "PathologicMin": 201, diff --git a/data/ref-alleles/ref-alleles.T2T-chm13.txt b/data/ref-alleles/ref-alleles.T2T-chm13.txt index 4a360874..e032892d 100644 --- a/data/ref-alleles/ref-alleles.T2T-chm13.txt +++ b/data/ref-alleles/ref-alleles.T2T-chm13.txt @@ -1,50 +1,50 @@ HMNR7_VWA1 -chr1 870158 870178 GGCGCGGAGC STRchive -chr1 870158 870178 GGCGCGGAGC TRGT +chr1 870158 870178 GGCGCGGAGC,AGCGGCGCGG STRchive +chr1 870158 870178 AGCGGCGCGG,GGCGCGGAGC TRGT GGCTGGCGCT GGCGCGGAGC GGCGCGGAGC GCGGTGAGTG GGCTGGCGCT GGCGCGGAGC GGCGCGGAGC GCGGTGAGTG SCA37_DAB1 -chr1 57245935 57245973 AAAAT,GAAAT STRchive -chr1 57245935 57245973 AAAAT,GAAAT TRGT +chr1 57245935 57245973 AAAAT,TGAAA STRchive +chr1 57245935 57245973 AAAAT,GAAAT,TGAAA TRGT CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC OPDM5_ABCD3 -chr1 94266544 94266567 GCC STRchive -chr1 94266544 94266567 GCC TRGT -CAGTAAGGTA GCC GCC GCC GCC GCC GCC GCC GC GTCCCCTCGC -CAGTAAGGTA GCC GCC GCC GCC GCC GCC GCC GC GTCCCCTCGC +chr1 94266544 94266567 CCG STRchive +chr1 94266544 94266567 CCG TRGT +CAGTAAGGTA G CCG CCG CCG CCG CCG CCG CCG C GTCCCCTCGC +CAGTAAGGTA G CCG CCG CCG CCG CCG CCG CCG C GTCCCCTCGC NIID_NOTCH2NLC -chr1 148519695 148519738 GGC STRchive -chr1 148519695 148519738 GGC TRGT -GATCTGCCCA GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC GGAGGA GGC GGC G ACCGAGAAGA -GATCTGCCCA GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC GGAGGA GGC GGC G ACCGAGAAGA +chr1 148519695 148519738 CGG STRchive +chr1 148519695 148519738 CGG TRGT +GATCTGCCCA GG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGGAGG CGG CG ACCGAGAAGA +GATCTGCCCA GG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGGAGG CGG CG ACCGAGAAGA ADTKD_MUC1 -chr1 154328121 154330802 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA STRchive -chr1 154328121 154330802 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA TRGT +chr1 154328121 154330802 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG STRchive +chr1 154328121 154330802 ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG TRGT TGCAGAGCCT GAGGCCGAGGTGACATTGTGGACTGGAGGGGCGGTGGAGCCCAAGGCGGGCCTGTTGTCCGGGGCCGAGGTGACACCATG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGGGTGGAGCCCGGGGCCCGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGTGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTTGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCGGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCGGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTTGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCTGGCTTGTTGTCCGGGGCTGAGGTGACATCGTGGGCTGGCGGGGTGGTGGAGCCCAGGGCTGGCCTGGTGACTGGGACCGAGGTGACATCCTGTCCCCAGGTGGCAGCTGAACCTGAAGCTGGTTCCGTGGCCGGGGCCAGAGTGACATCCTGTCCCTGAGTGGTGGAGGAGCCTGAACCGGGGCTGTGGCTGGAGAGTACGCTGCTGGTCATACTCACAGCATTCTTCTCAGTAGAGCTGGGCACTGAACTTCTCTGGGTAGCCGAAGTCTCCTT TTCTCCACCT TGCAGAGCCT GAGGCCGAGGTGACATTGTGGACTGGAGGGGCGGTGGAGCCCAAGGCGGGCCTGTTGTCCGGGGCCGAGGTGACACCATG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGGGTGGAGCCCGGGGCCCGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGTGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTTGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCGGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCGGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTTGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCTGGCTTGTTGTCCGGGGCTGAGGTGACATCGTGGGCTGGCGGGGTGGTGGAGCCCAGGGCTGGCCTGGTGACTGGGACCGAGGTGACATCCTGTCCCCAGGTGGCAGCTGAACCTGAAGCTGGTTCCGTGGCCGGGGCCAGAGTGACATCCTGTCCCTGAGTGGTGGAGGAGCCTGAACCGGGGCTGTGGCTGGAGAGTACGCTGCTGGTCATACTCACAGCATTCTTCTCAGTAGAGCTGGGCACTGAACTTCTCTGGGTAGCCGAAGTCTCCTT TTCTCCACCT NME_NAXE -chr1 155728131 155728159 GGGCC STRchive -chr1 155728131 155728159 GGGCC TRGT +chr1 155728131 155728159 GGGCC,CCGGG STRchive +chr1 155728131 155728159 CCGGG,GGGCC TRGT ACATGCGCCG GGGCC GGGCC GGGCC GGGCC GGGCC GGG GGCGCGCGCT ACATGCGCCG GGGCC GGGCC GGGCC GGGCC GGGCC GGG GGCGCGCGCT FAME2_STARD7 -chr2 96703674 96703732 AAAAT,AAATG STRchive -chr2 96703674 96703732 AAATG,AAAAT TRGT +chr2 96703674 96703732 AAAAT,TGAAA STRchive +chr2 96703674 96703732 AAATG,AAAAT,TGAAA TRGT ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA GCCAAGCACA ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA GCCAAGCACA FRA2A_AFF3 -chr2 100563685 100563738 GCC STRchive -chr2 100563685 100563738 GCC TRGT -CGCTGCTGCA GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC GGTGCTCTGC -CGCTGCTGCA GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC GGTGCTCTGC +chr2 100563685 100563738 CCG STRchive +chr2 100563685 100563738 CCG TRGT +CGCTGCTGCA G CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C GGTGCTCTGC +CGCTGCTGCA G CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C GGTGCTCTGC SD5_HOXD13 chr2 176581179 176581224 GCN STRchive @@ -53,10 +53,10 @@ TTCGGGGCGG GCG GCG GCG GCG GCA GCG GCG GCT GCG GCG GCG GCA GCG GCA GCC TCCGGCTT TTCGGGGCGG GCG GCG GCG GCG GCA GCG GCG GCT GCG GCG GCG GCA GCG GCA GCC TCCGGCTTTG GDPAG_GLS -chr2 191369982 191370024 GCA STRchive -chr2 191369982 191370024 GCA TRGT -ATCCTAGCGC GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA CCCGCATCCG -ATCCTAGCGC GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA CCCGCATCCG +chr2 191369982 191370024 CAG STRchive +chr2 191369982 191370024 CAG TRGT +ATCCTAGCGC G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA CCCGCATCCG +ATCCTAGCGC G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA CCCGCATCCG SCA7_ATXN7 chr3 63956302 63956333 CAG STRchive @@ -71,8 +71,8 @@ GTGAGACAGA CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGG CAGC CAGG CAGG GTGAGACAGA CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGG CA GC CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGA CAGA CAGA CAGA CAGA CAGA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CTGG CA GTAAT ACTCATTCAC BPES_FOXL2 -chr3 141687011 141687054 NGC STRchive -chr3 141687011 141687054 NGC TRGT +chr3 141687011 141687054 GCN,NGC STRchive +chr3 141687011 141687054 NGC,GCN TRGT CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG @@ -89,14 +89,14 @@ CAAGTCCTTC CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG C CAAGTCCTTC CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAA CAG CCG CCA CCG CCG CCG CCG CCG CCG CCG CCT CCTCAGCTTC CANVAS_RFC1 -chr4 39318077 39318136 AAAAG,AAGGG,ACAGG,AAAGG,AGGGC STRchive -chr4 39318077 39318136 AAAAG,AAGGG,ACAGG,AAAGG,AGGGC,AAAGGG TRGT +chr4 39318077 39318136 AAAAG,AAGGG,ACAGG,AAAGG,CAGGG STRchive +chr4 39318077 39318136 AAAAG,AAGGG,ACAGG,AAAGG,CAGGG,AAAGGG TRGT TCTGTTTCAA AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAA AGCATGTTCT TCTGTTTCAA AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAA AGCATGTTCT CCHS_PHOX2B -chr4 41719745 41719805 GCN STRchive -chr4 41719745 41719805 GCN TRGT +chr4 41719745 41719805 GCN,NGC STRchive +chr4 41719745 41719805 NGC,GCN TRGT CAGGCCTCCA GCT GCC GCC GCT GCC GCT GCC GCC GCC GCC GCT GCC GCG GCC GCC GCC GCT GCT GCT GCG CCGCCCTTGC CAGGCCTCCA GCT GCC GCC GCT GCC GCT GCC GCC GCC GCC GCT GCC GCG GCC GCC GCC GCT GCT GCT GCG CCGCCCTTGC @@ -113,22 +113,22 @@ CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CCGAGATGGA SCA12_PPP2R2B -chr5 147414733 147414780 GCT STRchive -chr5 147414733 147414780 GCT TRGT -CGCACTCGCA GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC AGGAGGCTGG -CGCACTCGCA GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC AGGAGGCTGG +chr5 147414733 147414780 CTG STRchive +chr5 147414733 147414780 CTG TRGT +CGCACTCGCA G CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C AGGAGGCTGG +CGCACTCGCA G CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C AGGAGGCTGG OPDM_FAM193B -chr5 178096748 178096792 GCC STRchive -chr5 178096748 178096792 GCC TRGT -TCGCTCCACA C GCC GC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC TACCGCTCCC -TCGCTCCACA C GCC GC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC TACCGCTCCC +chr5 178096748 178096792 CCG STRchive +chr5 178096748 178096792 CCG TRGT +TCGCTCCACA CG CCG CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C TACCGCTCCC +TCGCTCCACA CG CCG CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C TACCGCTCCC OPDM_TBC1D7 -chr6 13201716 13201843 GCC STRchive -chr6 13201716 13201843 GCC TRGT -GCGCGCGCCC AGACG GCC CGGGAGACAAAACTCAGC GCC GCT GCC GCT GCC GCT GCC GCC GCC GCC GGACGTGACATCAACTCCAGGTC GCC GGGCGGGC GCC GGGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT -GCGCGCGCCC AGACG GCC CGGGAGACAAAACTCAGC GCC GCT GCC GCT GCC GCT GCC GCC GCC GCC GGACGTGACATCAACTCCAGGTC GCC GGGCGGGC GCC GGGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT +chr6 13201716 13201843 CCG STRchive +chr6 13201716 13201843 CCG TRGT +GCGCGCGCCC AGACGGC CCG GGAGACAAAACTCAGCG CCG CTG CCG CTG CCG CTG CCG CCG CCG CCG GACGTGACATCAACTCCAGGTCG CCG GGCGGGCG CCG GGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT +GCGCGCGCCC AGACGGC CCG GGAGACAAAACTCAGCG CCG CTG CCG CTG CCG CTG CCG CCG CCG CCG GACGTGACATCAACTCCAGGTCG CCG GGCGGGCG CCG GGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT SCA1_ATXN1 chr6 16200188 16200282 CTG STRchive @@ -149,34 +149,34 @@ AGCAACAAAG G CAG CAG CAG CAACAACAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG C AGCAACAAAG G CAG CAG CAG CAACAACAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA ACAGGCAGTG HFG_HOXA13-III -chr7 27335684 27335720 NGC STRchive -chr7 27335684 27335720 NGC TRGT +chr7 27335684 27335720 GCN,NGC STRchive +chr7 27335684 27335720 NGC,GCN TRGT CCGAGGACGA CGC GGC GGC GGC GGC GGC GGC TGC AGC GGC GGC AGC CGACGGGGGC CCGAGGACGA CGC GGC GGC GGC GGC GGC GGC TGC AGC GGC GGC AGC CGACGGGGGC HFG_HOXA13-II -chr7 27335813 27335849 NGC STRchive -chr7 27335813 27335849 NGC TRGT +chr7 27335813 27335849 GCN,NGC STRchive +chr7 27335813 27335849 NGC,GCN TRGT GGCACTGGTT GGC CGC GGC CGC CGC CGC AGC CGC GGC CGC CGC CGC CACCGAGAAG GGCACTGGTT GGC CGC GGC CGC CGC CGC AGC CGC GGC CGC CGC CGC CACCGAGAAG HFG_HOXA13-I -chr7 27335912 27335954 NGC STRchive -chr7 27335912 27335954 NGC TRGT +chr7 27335912 27335954 GCN,NGC STRchive +chr7 27335912 27335954 NGC,GCN TRGT CCCCGGCCCC GGC AGC CGC CGC CGC TGC AGC CGC TGC TGC AGC CGC CGC CGC CCCTTCCATG CCCCGGCCCC GGC AGC CGC CGC CGC TGC AGC CGC TGC TGC AGC CGC CGC CGC CCCTTCCATG FRA7A_ZNF713 -chr7 56047900 56047939 GCG STRchive -chr7 56047900 56047939 GCG TRGT -CACCGCGGCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG TCAGGGGGCG -CACCGCGGCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG TCAGGGGGCG +chr7 56047900 56047939 CGG STRchive +chr7 56047900 56047939 CGG TRGT +CACCGCGGCG G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CG TCAGGGGGCG +CACCGCGGCG G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CG TCAGGGGGCG OPDM1_LRP12 -chr8 105716409 105716441 CGC STRchive -chr8 105716409 105716441 CGC TRGT -AGGTAGACGA CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CG AGCCACCGGC -AGGTAGACGA CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CG AGCCACCGGC +chr8 105716409 105716441 CCG STRchive +chr8 105716409 105716441 CCG TRGT +AGGTAGACGA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG AGCCACCGGC +AGGTAGACGA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG AGCCACCGGC FAME1_SAMD12 chr8 119495247 119495353 TAAAA,TGAAA STRchive @@ -185,34 +185,34 @@ ACTCTGTCTC AAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA ACTCTGTCTC AAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA ATGAACAAAA FTDALS1_C9orf72 -chr9 27584063 27584155 GGCCCC STRchive -chr9 27584063 27584155 GGCCCC TRGT -CGCAACCGCA GCCCCGCCCCGGGCCCGCCCCCGGGCCCGCCCCGACCACGCCCC GGCCCC GGCCCC GGCCCC GGCCCC GGCCCC GGCCCC GGCCCC GGCCCC TAGCGCGCGA -CGCAACCGCA GCCCCGCCCCGGGCCCGCCCCCGGGCCCGCCCCGACCACGCCCC GGCCCC GGCCCC GGCCCC GGCCCC GGCCCC GGCCCC GGCCCC GGCCCC TAGCGCGCGA +chr9 27584063 27584155 GGCCCC,CCCCGG STRchive +chr9 27584063 27584155 CCCCGG,GGCCCC TRGT +CGCAACCGCA GCCCCG CCCCGG GCCCGC CCCCGG GCCCGCCCCGACCACG CCCCGG CCCCGG CCCCGG CCCCGG CCCCGG CCCCGG CCCCGG CCCCGG CCCC TAGCGCGCGA +CGCAACCGCA GCCCCG CCCCGG GCCCGC CCCCGG GCCCGCCCCGACCACG CCCCGG CCCCGG CCCCGG CCCCGG CCCCGG CCCCGG CCCCGG CCCCGG CCCC TAGCGCGCGA FRDA_FXN -chr9 81210834 81210861 GAA STRchive -chr9 81210818 81210861 A,GAA TRGT +chr9 81210834 81210861 GAA,AAG STRchive +chr9 81210818 81210861 A,GAA,AAG TRGT AAAAATACAAAAAAAAAAAAAAAAAA GAA GAA GAA GAA GAA GAA GAA GAA GAA AATAAAGAAA AAAAATACAA A A A A A A A A A A A A A A A A GAA GAA GAA GAA GAA GAA GAA GAA GAA AATAAAGAAA HSAN-VIII_PRDM12 -chr9 142886568 142886595 GCC STRchive -chr9 142886568 142886595 GCC TRGT -CGCCCGCGCT C GCC GCC GCC GCC GCC GCC GCC GCC GC GCACCACCTG -CGCCCGCGCT C GCC GCC GCC GCC GCC GCC GCC GCC GC GCACCACCTG +chr9 142886568 142886595 CCG STRchive +chr9 142886568 142886595 CCG TRGT +CGCCCGCGCT CG CCG CCG CCG CCG CCG CCG CCG CCG C GCACCACCTG +CGCCCGCGCT CG CCG CCG CCG CCG CCG CCG CCG CCG C GCACCACCTG MODY8_CEL -chr9 145285333 145285861 GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG STRchive -chr9 145285333 145285861 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG TRGT +chr9 145285333 145285861 GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG,ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC STRchive +chr9 145285333 145285861 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG TRGT GAGGCCACCC CTGTGCCCCCCACAGGGGACTCCGAGGCCACTCCCGTGCCCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG CGCCCCCCCCGTGCCGCCCACGGGTGACGCCGGGCCCCCCCCCCGTGCCGCCCACGGGTGACTCCGGCGCCCCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCCGTGACCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCTGTGCCCCCCACGGGTGACTCTGAGGCTGCCCCTGTGCCCCCCACAGATGACTCCAAGGAAGCT CAGATGCCTG GAGGCCACCC CTGTGCCCCCCACAGGGGACTCCGAGGCCACTCCCGTGCCCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG CGCCCCCCCCGTGCCGCCCACGGGTGACGCCGGGCCCCCCCCCCGTGCCGCCCACGGGTGACTCCGGCGCCCCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCCGTGACCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCTGTGCCCCCCACGGGTGACTCTGAGGCTGCCCCTGTGCCCCCCACAGATGACTCCAAGGAAGCT CAGATGCCTG OPML1_NUTM2B-AS1 -chr10 80695718 80695748 GGC STRchive -chr10 80695718 80695748 GGC TRGT -GAAGCGGCGG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC CGGGAAGAAC -GAAGCGGCGG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC CGGGAAGAAC +chr10 80695718 80695748 CGG STRchive +chr10 80695718 80695748 CGG TRGT +GAAGCGGCGG GG CGG CGG CGG CGG CGG CGG CGG CGG CGG C CGGGAAGAAC +GAAGCGGCGG GG CGG CGG CGG CGG CGG CGG CGG CGG CGG C CGGGAAGAAC JBS_CBL chr11 119226662 119226696 CGG STRchive @@ -227,10 +227,10 @@ CCATCACCAC CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CA TCACGGAAAC CCATCACCAC CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CA TCACGGAAAC FRA12A_DIP2B -chr12 50468095 50468118 GGC STRchive -chr12 50468095 50468118 GGC TRGT -CTTTGCTCAT GGC GGC GGC GGC GGC GGC GGC GG TGCTGGTGGT -CTTTGCTCAT GGC GGC GGC GGC GGC GGC GGC GG TGCTGGTGGT +chr12 50468095 50468118 CGG STRchive +chr12 50468095 50468118 CGG TRGT +CTTTGCTCAT GG CGG CGG CGG CGG CGG CGG CGG TGCTGGTGGT +CTTTGCTCAT GG CGG CGG CGG CGG CGG CGG CGG TGCTGGTGGT SCA2_ATXN2 chr12 111575873 111575940 CTG STRchive @@ -239,10 +239,10 @@ GCGGGCGGCG G CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTG CTG CTG TTG CTG CTG CTG GCGGGCGGCG G CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTG CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG CTG GGGCTTCAGC OPDM4_RILPL1 -chr12 123532573 123532603 GGC STRchive -chr12 123532573 123532603 GGC TRGT -CTCCCGAGTG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC AGCGGGGAGG -CTCCCGAGTG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC AGCGGGGAGG +chr12 123532573 123532603 CGG STRchive +chr12 123532573 123532603 CGG TRGT +CTCCCGAGTG GG CGG CGG CGG CGG CGG CGG CGG CGG CGG C AGCGGGGAGG +CTCCCGAGTG GG CGG CGG CGG CGG CGG CGG CGG CGG CGG C AGCGGGGAGG SCA8_ATXN8OS chr13 69361243 69361270 CTG STRchive @@ -257,8 +257,8 @@ CCTGTCCCCA GCG GCG GCG GCA GCG GCG GCG GCG GCT GCG GCG GCG GCG GCC GCG G TGTCCGC CCTGTCCCCA GCG GCG GCG GCA GCG GCG GCG GCG GCT GCG GCG GCG GCG GCC GCG G TGTCCGCGGT SCA27B_FGF14 -chr13 101377549 101377792 GAA STRchive -chr13 101377549 101377792 GAA,GGA,GCA TRGT +chr13 101377549 101377792 GAA,AAG STRchive +chr13 101377549 101377792 AAG,GAA,AGG,CAG TRGT AACTTTCTGT GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA AAATGTGTTT AACTTTCTGT GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA AAATGTGTTT @@ -275,10 +275,10 @@ ATAGGTCCCC CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C ATAGGTCCCC CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTTTTG CTG CTG TCTGAAACAT ALS1_NIPA1 -chr15 20458510 20458536 GCG STRchive -chr15 20458510 20458536 GCG TRGT -AGCTGCGGCA GCG GCG GCG GCG GCG GCG GCG GCG GC CGGGGAGGGG -AGCTGCGGCA GCG GCG GCG GCG GCG GCG GCG GCG GC CGGGGAGGGG +chr15 20458510 20458536 CGG STRchive +chr15 20458510 20458536 CGG TRGT +AGCTGCGGCA G CGG CGG CGG CGG CGG CGG CGG CGG C CGGGGAGGGG +AGCTGCGGCA G CGG CGG CGG CGG CGG CGG CGG CGG C CGGGGAGGGG aFTLD-U_GOLGA8A chr15 32225152 32225178 TTTC,CT STRchive @@ -287,22 +287,22 @@ CAGCTCCCGT TT CT TT CT TT CT TT CT TT CT TT CT TT TTTTTCTGAG CAGCTCCCGT TT CT TT CT TT CT TT CT TT CT TT CT TT TTTTTCTGAG CHNG3_MIR7-2 -chr15 86324038 86324057 TTTG STRchive -chr15 86324038 86324057 TTTG TRGT +chr15 86324038 86324057 TTTG,GTTT STRchive +chr15 86324038 86324057 GTTT,TTTG TRGT ATTGTTTTTC TTTG TTTG TTTG TTTG TTT TTCTTTGAGA ATTGTTTTTC TTTG TTTG TTTG TTTG TTT TTCTTTGAGA CPEO_POLG -chr15 87088411 87088452 GCT STRchive -chr15 87088402 87088452 GCT,GTT TRGT -AGCACTTGCGGCTGCTGAG GCT GCT GTT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC CGCCGCCGCT +chr15 87088411 87088452 CTG STRchive +chr15 87088402 87088452 GCT,GTT,CTG TRGT +AGCACTTGCGGCTGCTGAG G CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C CGCCGCCGCT AGCACTTGCG GCT GCT GAG GCT GCT GTT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC CGCCGCCGCT DBQD2_XYLT1 -chr16 17477909 17478002 GCC STRchive -chr16 17477909 17478002 GCC TRGT -TCCCGCTCGG GCC GCC GCC GCC GCC GCC GCC TCGGCTC GCC GCTGCTCCTCCTCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC GCC GCC GCC GCC GCC TCCACCGCCG -TCCCGCTCGG GCC GCC GCC GCC GCC GCC GCC TCGGCTC GCC GCTGCTCCTCCTCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC GCC GCC GCC GCC GCC TCCACCGCCG +chr16 17477909 17478002 CCG STRchive +chr16 17477909 17478002 GCC,CCG TRGT +TCCCGCTCGG G CCG CCG CCG CCG CCG CCG CCTCGGCTCG CCG CTGCTCCTCCT CCG CCG CCG CCG CCG CCG CCG CCG CCG CTG CCG CCG CCG CCG CCG CCG CC TCCACCGCCG +TCCCGCTCGG GCC GCC GCC GCC GCC GCC GCC TCGGCTC GCC GCTGCTCCTCCT CCG CCG CCG CCG CCG CCG CCG CCG CCG CT GCC GCC GCC GCC GCC GCC GCC TCCACCGCCG FAME6_TNRC6A chr16 24890366 24890430 TTTTA,TTTCA STRchive @@ -311,8 +311,8 @@ CTATTAAAGC A TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA T CTATTAAAGC A TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTT ACTTATTTAT SCA31_BEAN1 -chr16 72284666 72284761 AATAA,TGGAA,TAGAA STRchive -chr16 72284666 72284761 TGGAA,TAGAA,AATAA TRGT +chr16 72284666 72284761 AATAA,AATGG,AATAG STRchive +chr16 72284666 72284761 AATGG,AATAG,AATAA TRGT ACTCTGTTTC AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA TAA AATAA AA AGAAACCTCT ACTCTGTTTC AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA TAA AATAA AA AGAAACCTCT @@ -323,10 +323,10 @@ CCCGCCGCAG G CAG CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAA CAG CAG CAG CAG CCCGCCGCAG G CAG CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG TCCTCACCCT SCA4_ZFHX3 -chr16 78605502 78605569 GCC STRchive -chr16 78605502 78605569 GCC TRGT -AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC GCC ACC GCC GCC GCC GCC GCC ACT GCC ACC GCC GCC GCC GCC G GTGGGGACGT -AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC GCC ACC GCC GCC GCC GCC GCC ACT GCC ACC GCC GCC GCC GCC G GTGGGGACGT +chr16 78605502 78605569 CCG STRchive +chr16 78605502 78605569 CCG TRGT +AGTGGTACGA G CCG CCG CCG CCG CCG CCG CCG CCG CCA CCG CCG CCG CCG CCG CCACTGCCA CCG CCG CCG CCG CCG GTGGGGACGT +AGTGGTACGA G CCG CCG CCG CCG CCG CCG CCG CCG CCA CCG CCG CCG CCG CCG CCACTGCCA CCG CCG CCG CCG CCG GTGGGGACGT HDL2_JPH3 chr16 93675723 93675776 CTG STRchive @@ -336,15 +336,15 @@ AAGCCAGGGA G CTG C CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C FAME8_RAI1 chr17 17754961 17755053 TTTTA,TTTCA STRchive -chr17 17754961 17755053 TTTTA,TTTCA TRGT -TTATTTTTAA A TTTTA TTTTA TTTTA TA TTTTA TTTTA TTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTA TTTTA TTTTA TTTTA TTTTA TTTTA TTT CATCTCAGAC +chr17 17754961 17755053 TTTTA,TTTCA,ATTTT TRGT TTATTTTTAA A TTTTA TTTTA TTTTA TA TTTTA TTTTA TTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTA TTTTA TTTTA TTTTA TTTTA TTTTA TTT CATCTCAGAC +TTATTTTTAA ATTTT ATTTT ATTTT AT ATTTT ATTTT ATT ATTTT ATTTT ATTTT ATTTT ATTTT ATTTT ATT ATTTT ATTTT ATTTT ATTTT ATTTT ATTT CATCTCAGAC RCPS_EIF4A3 -chr17 81047404 81047534 CCTCGCTGTGCCGCTGCCGA STRchive -chr17 81047404 81047534 CCTCGCTGTGCCGCTGCCGA TRGT -CCGACCTCGC TGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA GAACAGACGC -CCGACCTCGC TGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA GAACAGACGC +chr17 81047404 81047534 CCTCGCTGTGCCGCTGCCGA,GCCGCTGCCGACCTCGCTGT STRchive +chr17 81047404 81047534 GCCGCTGCCGACCTCGCTGT,CCTCGCTGTGCCGCTGCCGA TRGT +CCGACCTCGC T GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGA GAACAGACGC +CCGACCTCGC T GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGA GAACAGACGC CPUM_TYMS chr18 821235 821905 GATGGT STRchive @@ -359,8 +359,8 @@ GAGGAGGAGG AG CAG CAG CAG CAG CAG CAG CAG CAC CAG CAG CAG CAG CAG CAG CAG CAG CA GAGGAGGAGG AG CAG CAG CAG CAG CAG CAG CAG CAC CAG CAG CAG CAG CAG CAG CAG CAG CAG CA TGAAAGAGCC MRUPAV_PLIN4 -chr19 4494212 4497342 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC STRchive -chr19 4494212 4497342 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TRGT +chr19 4494212 4497342 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC,GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT STRchive +chr19 4494212 4497342 GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT,TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TRGT GCAGTCTGCT CCCCACCATTGTCTGTGGTCCTGGAACTGGTGAGTCCACCCCAGGAGGTGGCGGGGGTACTAGGTAACCAGTTCTGGAAGGTGCTGAGGCCAGTGTGGGTGGCCCCTGTCGCCACGTTCCCTGACCCCATGAGCCCAGCGGACACTGCGTCTTTGGTTCCGGTCAGCACTGTCTTGGTGGTGTCCAGGCCCCCCTGGACGGCCCCTTTGGCCATGCTCATGGCACCGGTAACCCCACTGAAGACAGTGTCCTTGGTACCCATAAGCACAGCCTTGGAGGCGTCCACGCCGGTCTGCACGGTTCCTTTGGCCACATTCACTGCCCCCGTGACTCCAGTAGTCACTGCATCCTTAGCGCCACTCAGCACCGTCTTGGCTGTGTCCACACCTGTCTGGACGGTGCCTTTGGCCACATTTACGGCACCAGTGACTCCACTGCAGACGGTGTCCTTGGTACCGGTCAGGACAGTCTTGCTGGTGTCCACGCCGGTCTGGACAGTCCCTTTGGCCAAGTTCACAGCCCCTGTGAGCCCAGTGGATACGGTGTCTTTAGTGCCAGTCAGGACAGACTTTGTAGTGTCCAGGCCCCCCTGGACGGCCCCCTTGGCCACATTCGCAGCACCGGTCACCCCACTGCAGACGGTGTCCTTGGTACCAGTTAGGACAGTCTTGGTGGTGTCCACACTGGTCTGGACGGTCCCTTTGGCGACATTCACTGCCCCCATGAGCCCAGTAGTGACTGTGTCCTTGGTGCCGGTCAGCACGGTCTTGGCCGTGTCTACACCCGTCTGGACGGCCCCCTTGGCCACATTCGCAGCACCGGTCACCCCACTGCACACAGCATCCTTGGTACCAGTTAACACAGTCTTGGTGGTGTCCATGCCGGTCTGGACAGTCCCTTTGGCCAAGTTCACAGCCCCTGTGAGCCCAGTGGACACAGCATCTTTAGTGCCAGTCAGGACAGACTTTGTAGTGTCCAGGCCCCCCTGGACGGCCCCTTTGGCCACATTCGCAGCACCGGTCACCCCACTGCCAAGGGTGTTCTTTGTACCTGTTGCGATATTTTGGGTCGTTTTCAGCCCAGTTTGCACAGCACCCTTGGCCACGTTCACAGCACCGGTGACCCCACTGCAGACAGTGTCCTTGGTACCGGTCAGCACGGTCTTGGCCGTGTCTACACCCGTCTGGACGGCCCCCTTGGCCACATTCGCAGCACCGGTCACCCCACTGCAGACGGTGTCCTTGGTACCAGTTAGGACAGTCTTGGTGGTGTCCACACTGGTCTGGACAGTCCCTTTGGCGACATTCACTGCCCCCATGAGCCCAGTAGTGACTGTGTCCTTGGTGCCGGTCAGCACGGTCTTGGCTGTGTCTACACCTGTCTGGACGGCCCCTTTGGCCACATTCACAGCACCGGTCACCCCACTGCCAAAGGTGTTCTTTGTACCTGTCGCGATATTTTGGGTCGTTTTCAGCCCAGTTTGCACAGCCCCCTTGGCCACGTTCACGGCACTGGTGACCCCACTGTAGATGGTGTCCTTGGTACCGGTTAGGACAGTTTTGGTGGTGTCCATGCCTGTCTGGACGGTCCCTTTGGCGACATTCACTGCCCCCACGAGCCCAGTAGTCACTGTGTCCTTGGTGCCGGTCAGCACGGTCTTGGCTGTGTCTACACCTGTCTGGACAGCCCCCTTGGCCACATTCGCTGCCCCCGTGAGCCCAGTGGACATCGTGTCTTTTGTACCTATGACCACAGACTTGGTGGTGTCCAGGCCCCCCTGGACGGCCCCTTTGGCCACGTTCACAGCACTGGTCACCCCACTGCAGACGGTGTCCTTGGTGCCGGTTAGGACAGTCTTGGTGGTGTCTACGCCGGTCTGGACGGTCCCTTTGGCCACGTTCACAGCCCCTGTGAGCCCAGTGGACACAGCATCTTTAGTGCCAGTCAGGACAGACTTTGTAGTGTCCAGGCCGCCCTGGACGGCCCCTTTGGCCACATTCGCAGCACCGGTGACCCCACTGCAGACAGTGTCCTTGGTACCAGTTAGAACGATCTTGGTGGTGTCCACGCCTGTCTGGATGGTTCCTCTGGCCAAATTCATGGCACCAGTCACCCCACTGCAGACGGCGTCCTTTGTACCTGTTGCGATATTTTGGGTTGTGTTCAGCCCAGTTTGCATGGCCCCCTTGGCCACATTCGCTGCCCCTGTGAGCCCAGTGGACATCGTGTCTTTCGTACCCATGACCATAGACTTGGTGGTATCCAGGCCCCCCTGGATGGCCTCTTTGGCCAAGTTCACGGCACCGGTCACCCCACTGCAGACAGTGTTCTTGGTGCCAGTTAGGACAGTCTTGG TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACTGGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACACCGGTCTGAATGCTTCCTCTGGCCACATTCACTGCCCCTGTGAGCCCAGTGGACACAGCATCTTTGGTGCCGGTCAGCACAGCCTTGGAGGTTTCCACGCCAGTCTGGACAGTCCCTTTGGCCAAGTTCACTGCCCCCATGACCCCAGTAGTCACTGTGTCTTTGGTGCCGGTCAGCACAGTCTTGGTGGTGTCCACACCGGCCTGTACGGTCCCTTTGGCCACATTCACTGCCCCCGTGAGCCCAGTGGACACCGTGTCCTTGGTGCCGGTGAGGACAGCCTTCGAGGTGTCCAGACCCCCTTGGACGGCCCCCTTAGCCATGTCCATGGCCCCTGTGACCCCGCTGGACACCGCCTCCTTGGTGCCCGTAAGTGCAGACCGAGTGGTGTCCAGGCCTCCCTGGACCACTCCCTTAGCCACGTCCACCACGCTGGCCACCCCGGAGGACACGGCATCCTTGGCCCT GGACATCTTG GCAGTCTGCT CCCCACCATTGTCTGTGGTCCTGGAACTGGTGAGTCCACCCCAGGAGGTGGCGGGGGTACTAGGTAACCAGTTCTGGAAGGTGCTGAGGCCAGTGTGGGTGGCCCCTGTCGCCACGTTCCCTGACCCCATGAGCCCAGCGGACACTGCGTCTTTGGTTCCGGTCAGCACTGTCTTGGTGGTGTCCAGGCCCCCCTGGACGGCCCCTTTGGCCATGCTCATGGCACCGGTAACCCCACTGAAGACAGTGTCCTTGGTACCCATAAGCACAGCCTTGGAGGCGTCCACGCCGGTCTGCACGGTTCCTTTGGCCACATTCACTGCCCCCGTGACTCCAGTAGTCACTGCATCCTTAGCGCCACTCAGCACCGTCTTGGCTGTGTCCACACCTGTCTGGACGGTGCCTTTGGCCACATTTACGGCACCAGTGACTCCACTGCAGACGGTGTCCTTGGTACCGGTCAGGACAGTCTTGCTGGTGTCCACGCCGGTCTGGACAGTCCCTTTGGCCAAGTTCACAGCCCCTGTGAGCCCAGTGGATACGGTGTCTTTAGTGCCAGTCAGGACAGACTTTGTAGTGTCCAGGCCCCCCTGGACGGCCCCCTTGGCCACATTCGCAGCACCGGTCACCCCACTGCAGACGGTGTCCTTGGTACCAGTTAGGACAGTCTTGGTGGTGTCCACACTGGTCTGGACGGTCCCTTTGGCGACATTCACTGCCCCCATGAGCCCAGTAGTGACTGTGTCCTTGGTGCCGGTCAGCACGGTCTTGGCCGTGTCTACACCCGTCTGGACGGCCCCCTTGGCCACATTCGCAGCACCGGTCACCCCACTGCACACAGCATCCTTGGTACCAGTTAACACAGTCTTGGTGGTGTCCATGCCGGTCTGGACAGTCCCTTTGGCCAAGTTCACAGCCCCTGTGAGCCCAGTGGACACAGCATCTTTAGTGCCAGTCAGGACAGACTTTGTAGTGTCCAGGCCCCCCTGGACGGCCCCTTTGGCCACATTCGCAGCACCGGTCACCCCACTGCCAAGGGTGTTCTTTGTACCTGTTGCGATATTTTGGGTCGTTTTCAGCCCAGTTTGCACAGCACCCTTGGCCACGTTCACAGCACCGGTGACCCCACTGCAGACAGTGTCCTTGGTACCGGTCAGCACGGTCTTGGCCGTGTCTACACCCGTCTGGACGGCCCCCTTGGCCACATTCGCAGCACCGGTCACCCCACTGCAGACGGTGTCCTTGGTACCAGTTAGGACAGTCTTGGTGGTGTCCACACTGGTCTGGACAGTCCCTTTGGCGACATTCACTGCCCCCATGAGCCCAGTAGTGACTGTGTCCTTGGTGCCGGTCAGCACGGTCTTGGCTGTGTCTACACCTGTCTGGACGGCCCCTTTGGCCACATTCACAGCACCGGTCACCCCACTGCCAAAGGTGTTCTTTGTACCTGTCGCGATATTTTGGGTCGTTTTCAGCCCAGTTTGCACAGCCCCCTTGGCCACGTTCACGGCACTGGTGACCCCACTGTAGATGGTGTCCTTGGTACCGGTTAGGACAGTTTTGGTGGTGTCCATGCCTGTCTGGACGGTCCCTTTGGCGACATTCACTGCCCCCACGAGCCCAGTAGTCACTGTGTCCTTGGTGCCGGTCAGCACGGTCTTGGCTGTGTCTACACCTGTCTGGACAGCCCCCTTGGCCACATTCGCTGCCCCCGTGAGCCCAGTGGACATCGTGTCTTTTGTACCTATGACCACAGACTTGGTGGTGTCCAGGCCCCCCTGGACGGCCCCTTTGGCCACGTTCACAGCACTGGTCACCCCACTGCAGACGGTGTCCTTGGTGCCGGTTAGGACAGTCTTGGTGGTGTCTACGCCGGTCTGGACGGTCCCTTTGGCCACGTTCACAGCCCCTGTGAGCCCAGTGGACACAGCATCTTTAGTGCCAGTCAGGACAGACTTTGTAGTGTCCAGGCCGCCCTGGACGGCCCCTTTGGCCACATTCGCAGCACCGGTGACCCCACTGCAGACAGTGTCCTTGGTACCAGTTAGAACGATCTTGGTGGTGTCCACGCCTGTCTGGATGGTTCCTCTGGCCAAATTCATGGCACCAGTCACCCCACTGCAGACGGCGTCCTTTGTACCTGTTGCGATATTTTGGGTTGTGTTCAGCCCAGTTTGCATGGCCCCCTTGGCCACATTCGCTGCCCCTGTGAGCCCAGTGGACATCGTGTCTTTCGTACCCATGACCATAGACTTGGTGGTATCCAGGCCCCCCTGGATGGCCTCTTTGGCCAAGTTCACGGCACCGGTCACCCCACTGCAGACAGTGTTCTTGGTGCCAGTTAGGACAGTCTTGG TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACTGGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACACCGGTCTGAATGCTTCCTCTGGCCACATTCACTGCCCCTGTGAGCCCAGTGGACACAGCATCTTTGGTGCCGGTCAGCACAGCCTTGGAGGTTTCCACGCCAGTCTGGACAGTCCCTTTGGCCAAGTTCACTGCCCCCATGACCCCAGTAGTCACTGTGTCTTTGGTGCCGGTCAGCACAGTCTTGGTGGTGTCCACACCGGCCTGTACGGTCCCTTTGGCCACATTCACTGCCCCCGTGAGCCCAGTGGACACCGTGTCCTTGGTGCCGGTGAGGACAGCCTTCGAGGTGTCCAGACCCCCTTGGACGGCCCCCTTAGCCATGTCCATGGCCCCTGTGACCCCGCTGGACACCGCCTCCTTGGTGCCCGTAAGTGCAGACCGAGTGGTGTCCAGGCCTCCCTGGACCACTCCCTTAGCCACGTCCACCACGCTGGCCACCCCGGAGGACACGGCATCCTTGGCCCT GGACATCTTG @@ -377,8 +377,8 @@ CTCACCTGCT CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C TGCCTCCGCC CTCACCTGCT CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C TGCCTCCGCC EDM1-PSACH_COMP -chr19 18921630 18921645 GTC STRchive -chr19 18921630 18921645 GTC TRGT +chr19 18921630 18921645 GTC,CGT STRchive +chr19 18921630 18921645 CGT,GTC TRGT CTCCGTCATT GTC GTC GTC GTC GTC GCAGGCATCA CTCCGTCATT GTC GTC GTC GTC GTC GCAGGCATCA @@ -389,15 +389,15 @@ GTGATCCCCC CAG CAG CAG CAG CAG CA TTCCCGGCTA GTGATCCCCC CAG CAG CAG CAG CAG CA TTCCCGGCTA SCA36_NOP56 -chr20 2683189 2683230 GGCCTG STRchive -chr20 2683189 2683248 GGCCTG,CGCCTG TRGT -CGCAGACAGA GCCTG GGCCTG GGCCTG GGCCTG GGCCTG GGCCTG GGCCTG CGCCTGCGCCTGCCCTGGGAACGGGTTC -CGCAGACAGA GCCTG GGCCTG GGCCTG GGCCTG GGCCTG GGCCTG GGCCTG CGCCTG CGCCTG CCCTGG GAACGGGTTC +chr20 2683189 2683230 GGCCTG,CCTGGG STRchive +chr20 2683189 2683248 GGCCTG,CGCCTG,CCTGGG TRGT +CGCAGACAGA G CCTGGG CCTGGG CCTGGG CCTGGG CCTGGG CCTGGG CCTG CGCCTGCGCCTGCCCTGGGAACGGGTTC +CGCAGACAGA G CCTGGG CCTGGG CCTGGG CCTGGG CCTGGG CCTGGG CCTG CGCCTG CGCCTG CCCTGG GAACGGGTTC CJD_PRNP -chr20 4738633 4738705 GGTGGTGGCTGGGGGCAGCCTCAT,CCTCATGGTGGTGGCTGGGGGCAG STRchive -chr20 4738606 4738705 CCTCAGGGCGGTGGTGGCTGGGGGCAG,CCTCATGGTGGTGGCTGGGGGCAG,GGTGGTGGCTGGGGGCAGCCTCAT TRGT -CCGCTACCCACCTCAGGGCGGTGGTGGCTGGGGGCAG CCTCATGGTGGTGGCTGGGGGCAG CCCCATGGTGGTGGCTGGGGACAGCCTCATGGTGGTGGCTGGGGTCAA GGAGGTGGCA +chr20 4738633 4738705 GGTGGTGGCTGGGGGCAGCCTCAT,AGCCTCATGGTGGTGGCTGGGGGC STRchive +chr20 4738606 4738705 CCTCAGGGCGGTGGTGGCTGGGGGCAG,CCTCATGGTGGTGGCTGGGGGCAG,AGCCTCATGGTGGTGGCTGGGGGC,GGTGGTGGCTGGGGGCAGCCTCAT TRGT +CCGCTACCCACCTCAGGGCGGTGGTGGCTGGGGGCAG CCTCATGGTGGTGGCTGGGGGCAGCCCCATGGTGGTGGCTGGGGACAGCCTCATGGTGGTGGCTGGGGTCAA GGAGGTGGCA CCGCTACCCA CCTCAGGGCGGTGGTGGCTGGGGGCAG CCTCATGGTGGTGGCTGGGGGCAG CCCCATGGTGGTGGCTGGGGACAGCCTCATGGTGGTGGCTGGGGTCAA GGAGGTGGCA EPM1_CSTB @@ -425,28 +425,28 @@ ACTAGAATGG ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATT ACTAGAATGG ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT TTTTGAGATG PRTS_ARX -chrX 24597766 24597802 NGC STRchive -chrX 24597766 24597802 NGC TRGT +chrX 24597766 24597802 GCN,NGC STRchive +chrX 24597766 24597802 NGC,GCN TRGT GCGTGTCCCA GGC CGC GGC GGC CGC GGC CGC GGC TGC CGC GGC GGC CCCTGCGCCG GCGTGTCCCA GGC CGC GGC GGC CGC GGC CGC GGC TGC CGC GGC GGC CCCTGCGCCG EIEE1_ARX -chrX 24597886 24597934 NGC STRchive -chrX 24597886 24597934 NGC TRGT +chrX 24597886 24597934 GCN,NGC STRchive +chrX 24597886 24597934 NGC,GCN TRGT CCGTGGCCGT GGC GGC CGC TGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC TGC CGC ACCCTGAAGG CCGTGGCCGT GGC GGC CGC TGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC TGC CGC ACCCTGAAGG DMD_DMD -chrX 30882677 30882743 TTC STRchive -chrX 30882677 30882751 TTC,T TRGT +chrX 30882677 30882743 TTC,CTT STRchive +chrX 30882677 30882751 TTC,T,CTT TRGT AACGAACTGT TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTTTTTTTGGCAGAGGTG AACGAACTGT TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC T T T T T T T T GGCAGAGGTG SBMA_AR -chrX 65975147 65975250 GCA STRchive -chrX 65975147 65975250 GCA TRGT -TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCCCAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG -TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCCCAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG +chrX 65975147 65975250 CAG STRchive +chrX 65975147 65975250 CAG TRGT +TGCTGCTGCT G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAAGAGACTAGCCC CAG G CAG CAG CAG CAG CAG CAG GGTGAGGATG +TGCTGCTGCT G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAAGAGACTAGCCC CAG G CAG CAG CAG CAG CAG CAG GGTGAGGATG XDP_TAF1 chrX 69887153 69887230 AGAGGG STRchive @@ -461,8 +461,8 @@ CTCAACCCAC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC TTCAAGCTGA CTCAACCCAC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC TTCAAGCTGA XLID_SOX3 -chrX 138816203 138816248 NGC STRchive -chrX 138816203 138816248 NGC TRGT +chrX 138816203 138816248 GCN,NGC STRchive +chrX 138816203 138816248 NGC,GCN TRGT CCGGACTGCT GGC GGC AGC GGC TGC GGC CGC GGC AGC GGC GGC GGC GGC CGC GGC ACCGGGAGGC CCGGACTGCT GGC GGC AGC GGC TGC GGC CGC GGC AGC GGC GGC GGC GGC CGC GGC ACCGGGAGGC @@ -473,8 +473,8 @@ GTGCGGCAGC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG GTGCGGCAGC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG CGG CGG CGG C TGGGCCTCGA FRAXE_AFF2 -chrX 146765190 146765342 GCC STRchive -chrX 146765190 146765342 GCC TRGT -GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCGGCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC CCC GCC GCC GCT GCC GCC GCC G GCCCGCAGCC -GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCGGCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC CCC GCC GCC GCT GCC GCC GCC G GCCCGCAGCC +chrX 146765190 146765342 CCG STRchive +chrX 146765190 146765342 CCG TRGT +GTGTGATGCT G CCG CGG CCG CCG CCG CCG CCTGTGCAG CCG CTG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CTG CCG CC CCG GCTG CCG CG CCG CG CCG CTGCCTCTGCC CCG G CCG CCC CCG CCG CCG CTG CCG CCG CCG GCCCGCAGCC +GTGTGATGCT G CCG CGG CCG CCG CCG CCG CCTGTGCAG CCG CTG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CTG CCG CC CCG GCTG CCG CG CCG CG CCG CTGCCTCTGCC CCG G CCG CCC CCG CCG CCG CTG CCG CCG CCG GCCCGCAGCC diff --git a/data/ref-alleles/ref-alleles.hg19.txt b/data/ref-alleles/ref-alleles.hg19.txt index 36022cd4..8263f3de 100644 --- a/data/ref-alleles/ref-alleles.hg19.txt +++ b/data/ref-alleles/ref-alleles.hg19.txt @@ -1,50 +1,50 @@ HMNR7_VWA1 -chr1 1371178 1371198 GGCGCGGAGC STRchive -chr1 1371178 1371198 GGCGCGGAGC TRGT +chr1 1371178 1371198 GGCGCGGAGC,AGCGGCGCGG STRchive +chr1 1371178 1371198 AGCGGCGCGG,GGCGCGGAGC TRGT GGCTGGCGCT GGCGCGGAGC GGCGCGGAGC GCGGTGAGTG GGCTGGCGCT GGCGCGGAGC GGCGCGGAGC GCGGTGAGTG SCA37_DAB1 -chr1 57832715 57832793 AAAAT,GAAAT STRchive -chr1 57832715 57832793 AAAAT,GAAAT TRGT +chr1 57832715 57832793 AAAAT,TGAAA STRchive +chr1 57832715 57832793 AAAAT,GAAAT,TGAAA TRGT CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC OPDM5_ABCD3 -chr1 94883977 94884000 GCC STRchive -chr1 94883977 94884000 GCC TRGT -CAGTAAGGTA GCC GCC GCC GCC GCC GCC GCC GC GTCCCCTCGC -CAGTAAGGTA GCC GCC GCC GCC GCC GCC GCC GC GTCCCCTCGC +chr1 94883977 94884000 CCG STRchive +chr1 94883977 94884000 CCG TRGT +CAGTAAGGTA G CCG CCG CCG CCG CCG CCG CCG C GTCCCCTCGC +CAGTAAGGTA G CCG CCG CCG CCG CCG CCG CCG C GTCCCCTCGC NIID_NOTCH2NLC -chr1 145209323 145209354 GGC STRchive -chr1 145209323 145209354 GGC TRGT -GATCTGCCCA GGC GGC GGC GGC GGC GGC GGC GGAGGA GGC G ACCGAGAAGA -GATCTGCCCA GGC GGC GGC GGC GGC GGC GGC GGAGGA GGC G ACCGAGAAGA +chr1 145209323 145209354 CGG STRchive +chr1 145209323 145209354 CGG TRGT +GATCTGCCCA GG CGG CGG CGG CGG CGG CGG CGG AGGAGGCG ACCGAGAAGA +GATCTGCCCA GG CGG CGG CGG CGG CGG CGG CGG AGGAGGCG ACCGAGAAGA ADTKD_MUC1 -chr1 155160981 155162030 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA STRchive -chr1 155160981 155162030 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA TRGT +chr1 155160981 155162030 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG STRchive +chr1 155160981 155162030 ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG TRGT TGCAGAGCCT GAGGCCGAGGTGACATTGTGGACTGGAGGGGCGGTGGAGCCCAAGGCGGGCCTGTTGTCCGGGGCCGAGGTGACACCATG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGGGTGGAGCCCGGGGCCCGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGTGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTTGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCTGGCTTGTTGTCCGGGGCTGAGGTGACATCGTGGGCTGGCGGGGTGGTGGAGCCCAGGGCTGGCCTGGTGACTGGGACCGAGGTGACATCCTGTCCCCAGGTGGCAGCTGAACCTGAAGCTGGTTCCGTGGCCGGGGCCAGAGTGACATCCTGTCCCTGAGTGGTGGAGGAGCCTGAACCGGGGCTGTGGCTGGAGAGTACGCTGCTGGTCATACTCACAGCATTCTTCTCAGTAGAGCTGGGCACTGAACTTCTCTGGGTAGCCGAAGTCTCCTT TTCTCCACCT TGCAGAGCCT GAGGCCGAGGTGACATTGTGGACTGGAGGGGCGGTGGAGCCCAAGGCGGGCCTGTTGTCCGGGGCCGAGGTGACACCATG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGGGTGGAGCCCGGGGCCCGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGTGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTTGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCTGGCTTGTTGTCCGGGGCTGAGGTGACATCGTGGGCTGGCGGGGTGGTGGAGCCCAGGGCTGGCCTGGTGACTGGGACCGAGGTGACATCCTGTCCCCAGGTGGCAGCTGAACCTGAAGCTGGTTCCGTGGCCGGGGCCAGAGTGACATCCTGTCCCTGAGTGGTGGAGGAGCCTGAACCGGGGCTGTGGCTGGAGAGTACGCTGCTGGTCATACTCACAGCATTCTTCTCAGTAGAGCTGGGCACTGAACTTCTCTGGGTAGCCGAAGTCTCCTT TTCTCCACCT NME_NAXE -chr1 156561557 156561575 GGGCC STRchive -chr1 156561557 156561575 GGGCC TRGT +chr1 156561557 156561575 GGGCC,CCGGG STRchive +chr1 156561557 156561575 CCGGG,GGGCC TRGT ACATGCGCCG GGGCC GGGCC GGGCC GGG GGCGCGCGCT ACATGCGCCG GGGCC GGGCC GGGCC GGG GGCGCGCGCT FAME2_STARD7 -chr2 96862804 96862862 AAAAT,AAATG STRchive -chr2 96862804 96862862 AAATG,AAAAT TRGT +chr2 96862804 96862862 AAAAT,TGAAA STRchive +chr2 96862804 96862862 AAATG,AAAAT,TGAAA TRGT ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA GCCAAGCACA ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA GCCAAGCACA FRA2A_AFF3 -chr2 100721260 100721286 GCC STRchive -chr2 100721260 100721286 GCC TRGT -CGCTGCTGCA GCC GCC GCC GCC GCC GCC GCC GCC GC GGTGCTCTGC -CGCTGCTGCA GCC GCC GCC GCC GCC GCC GCC GCC GC GGTGCTCTGC +chr2 100721260 100721286 CCG STRchive +chr2 100721260 100721286 CCG TRGT +CGCTGCTGCA G CCG CCG CCG CCG CCG CCG CCG CCG C GGTGCTCTGC +CGCTGCTGCA G CCG CCG CCG CCG CCG CCG CCG CCG C GGTGCTCTGC SD5_HOXD13 chr2 176957786 176957831 GCN STRchive @@ -53,10 +53,10 @@ TTCGGGGCGG GCG GCG GCG GCG GCA GCG GCG GCT GCG GCG GCG GCG GCG GCA GCC TCCGGCTT TTCGGGGCGG GCG GCG GCG GCG GCA GCG GCG GCT GCG GCG GCG GCG GCG GCA GCC TCCGGCTTTG GDPAG_GLS -chr2 191745598 191745646 GCA STRchive -chr2 191745598 191745646 GCA TRGT -ATCCTAGCGC GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA CCCGCATCCG -ATCCTAGCGC GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA CCCGCATCCG +chr2 191745598 191745646 CAG STRchive +chr2 191745598 191745646 CAG TRGT +ATCCTAGCGC G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA CCCGCATCCG +ATCCTAGCGC G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA CCCGCATCCG SCA7_ATXN7 chr3 63898360 63898391 CAG STRchive @@ -71,8 +71,8 @@ GTGAGACAGA CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGG CAGA CAGG CAGC CAGG CAGG GTGAGACAGA CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGG CAGA CAGG CA GC CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGA CAGA CAGA CAGA CAGA CAGA CAGA CAGA CAGA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CT GGCAGTAATA BPES_FOXL2 -chr3 138664861 138664904 NGC STRchive -chr3 138664861 138664904 NGC TRGT +chr3 138664861 138664904 GCN,NGC STRchive +chr3 138664861 138664904 NGC,GCN TRGT CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG @@ -89,14 +89,14 @@ CAAGTCCTTC CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG C CAAGTCCTTC CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAA CAG CCG CCA CCG CCG CCG CCG CCG CCG CCG CCT CCTCAGCTTC CANVAS_RFC1 -chr4 39350044 39350103 AAAAG,AAGGG,ACAGG,AAAGG,AGGGC STRchive -chr4 39350044 39350103 AAAAG,AAGGG,ACAGG,AAAGG,AGGGC,AAAGGG TRGT +chr4 39350044 39350103 AAAAG,AAGGG,ACAGG,AAAGG,CAGGG STRchive +chr4 39350044 39350103 AAAAG,AAGGG,ACAGG,AAAGG,CAGGG,AAAGGG TRGT TCTGTTTCAA AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAA AGCATGTTCT TCTGTTTCAA AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAA AGCATGTTCT CCHS_PHOX2B -chr4 41747989 41748049 GCN STRchive -chr4 41747989 41748049 GCN TRGT +chr4 41747989 41748049 GCN,NGC STRchive +chr4 41747989 41748049 NGC,GCN TRGT CAGGCCTCCA GCT GCC GCC GCT GCC GCT GCC GCC GCC GCC GCT GCC GCG GCC GCC GCC GCT GCT GCT GCG CCGCCCTTGC CAGGCCTCCA GCT GCC GCC GCT GCC GCT GCC GCC GCC GCC GCT GCC GCG GCC GCC GCC GCT GCT GCT GCG CCGCCCTTGC @@ -113,22 +113,22 @@ CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CCGAGATGGA SCA12_PPP2R2B -chr5 146258290 146258322 GCT STRchive -chr5 146258290 146258322 GCT TRGT -CGCACTCGCA GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC AGGAGGCTGG -CGCACTCGCA GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC AGGAGGCTGG +chr5 146258290 146258322 CTG STRchive +chr5 146258290 146258322 CTG TRGT +CGCACTCGCA G CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C AGGAGGCTGG +CGCACTCGCA G CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C AGGAGGCTGG OPDM_FAM193B -chr5 176981490 176981532 GCC STRchive -chr5 176981490 176981532 GCC TRGT -TCGCTCCACA C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC TACCGCTCCC -TCGCTCCACA C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC TACCGCTCCC +chr5 176981490 176981532 CCG STRchive +chr5 176981490 176981532 CCG TRGT +TCGCTCCACA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C TACCGCTCCC +TCGCTCCACA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C TACCGCTCCC OPDM_TBC1D7 -chr6 13328708 13328835 GCC STRchive -chr6 13328708 13328835 GCC TRGT -GCGCGCGCCC AGACG GCC CGGGAGACAAAACTCAGC GCC GCT GCC GCT GCC GCT GCC GCC GCC GCC GGACGTGACATCAACTCCAGGTC GCC GGGCGGGC GCC GGGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT -GCGCGCGCCC AGACG GCC CGGGAGACAAAACTCAGC GCC GCT GCC GCT GCC GCT GCC GCC GCC GCC GGACGTGACATCAACTCCAGGTC GCC GGGCGGGC GCC GGGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT +chr6 13328708 13328835 CCG STRchive +chr6 13328708 13328835 CCG TRGT +GCGCGCGCCC AGACGGC CCG GGAGACAAAACTCAGCG CCG CTG CCG CTG CCG CTG CCG CCG CCG CCG GACGTGACATCAACTCCAGGTCG CCG GGCGGGCG CCG GGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT +GCGCGCGCCC AGACGGC CCG GGAGACAAAACTCAGCG CCG CTG CCG CTG CCG CTG CCG CCG CCG CCG GACGTGACATCAACTCCAGGTCG CCG GGCGGGCG CCG GGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT SCA1_ATXN1 chr6 16327864 16327955 CTG STRchive @@ -149,34 +149,34 @@ AGCAACAAAG G CAG CAG CAG CAACAACAA CAG CAG CAG CAG CAG CAG CAG CAG CAA CAG CAA C AGCAACAAAG G CAG CAG CAG CAACAACAA CAG CAG CAG CAG CAG CAG CAG CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA ACAGGCAGTG HFG_HOXA13-III -chr7 27239297 27239351 NGC STRchive -chr7 27239297 27239351 NGC TRGT +chr7 27239297 27239351 GCN,NGC STRchive +chr7 27239297 27239351 NGC,GCN TRGT CCGAGGACGA CGC GGC GGC GGC GGC GGC GGC TGC AGC GGC AGC CGC GGC AGC AGC GGC GGC AGC CGACGGGGGC CCGAGGACGA CGC GGC GGC GGC GGC GGC GGC TGC AGC GGC AGC CGC GGC AGC AGC GGC GGC AGC CGACGGGGGC HFG_HOXA13-II -chr7 27239444 27239480 NGC STRchive -chr7 27239444 27239480 NGC TRGT +chr7 27239444 27239480 GCN,NGC STRchive +chr7 27239444 27239480 NGC,GCN TRGT GGCACTGGTT GGC CGC GGC CGC CGC CGC AGC CGC GGC CGC CGC CGC CACCGAGAAG GGCACTGGTT GGC CGC GGC CGC CGC CGC AGC CGC GGC CGC CGC CGC CACCGAGAAG HFG_HOXA13-I -chr7 27239543 27239585 NGC STRchive -chr7 27239543 27239585 NGC TRGT +chr7 27239543 27239585 GCN,NGC STRchive +chr7 27239543 27239585 NGC,GCN TRGT CCCCGGCCCC GGC AGC CGC CGC CGC TGC AGC CGC TGC TGC AGC CGC CGC CGC CCCTTCCATG CCCCGGCCCC GGC AGC CGC CGC CGC TGC AGC CGC TGC TGC AGC CGC CGC CGC CCCTTCCATG FRA7A_ZNF713 -chr7 55955293 55955332 GCG STRchive -chr7 55955293 55955332 GCG TRGT -CGGGTCCACC GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG TCAGGGGGCG -CGGGTCCACC GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG TCAGGGGGCG +chr7 55955293 55955332 CGG STRchive +chr7 55955293 55955332 CGG TRGT +CGGGTCCACC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CG TCAGGGGGCG +CGGGTCCACC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CG TCAGGGGGCG OPDM1_LRP12 -chr8 105601198 105601227 CGC STRchive -chr8 105601198 105601227 CGC TRGT -ACGACGCCGA CGC CGC CGC CGC CGC CGC CGC CGC CGC CG AGCCACCGGC -ACGACGCCGA CGC CGC CGC CGC CGC CGC CGC CGC CGC CG AGCCACCGGC +chr8 105601198 105601227 CCG STRchive +chr8 105601198 105601227 CCG TRGT +ACGACGCCGA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG AGCCACCGGC +ACGACGCCGA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG AGCCACCGGC FAME1_SAMD12 chr8 119379051 119379157 TAAAA,TGAAA STRchive @@ -185,34 +185,34 @@ ACTCTGTCTC AAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA ACTCTGTCTC AAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA ATGAACAAAA FTDALS1_C9orf72 -chr9 27573482 27573544 GGCCCC STRchive -chr9 27573482 27573544 GGCCCC TRGT -CGCAACCGCA GCCCCGCCCCGGGCCCGCCCCCGGGCCCGCCCCGACCACGCCCC GGCCCC GGCCCC GGCCCC TAGCGCGCGA -CGCAACCGCA GCCCCGCCCCGGGCCCGCCCCCGGGCCCGCCCCGACCACGCCCC GGCCCC GGCCCC GGCCCC TAGCGCGCGA +chr9 27573482 27573544 GGCCCC,CCCCGG STRchive +chr9 27573482 27573544 CCCCGG,GGCCCC TRGT +CGCAACCGCA GCCCCG CCCCGG GCCCGC CCCCGG GCCCGCCCCGACCACG CCCCGG CCCCGG CCCCGG CCCC TAGCGCGCGA +CGCAACCGCA GCCCCG CCCCGG GCCCGC CCCCGG GCCCGCCCCGACCACG CCCCGG CCCCGG CCCCGG CCCC TAGCGCGCGA FRDA_FXN -chr9 71652202 71652220 GAA STRchive -chr9 71652186 71652220 A,GAA TRGT +chr9 71652202 71652220 GAA,AAG STRchive +chr9 71652186 71652220 A,GAA,AAG TRGT TAAAAAATACAAAAAAAAAAAAAAAA GAA GAA GAA GAA GAA GAA AATAAAGAAA TAAAAAATAC A A A A A A A A A A A A A A A A GAA GAA GAA GAA GAA GAA AATAAAGAAA HSAN-VIII_PRDM12 -chr9 133556992 133557028 GCC STRchive -chr9 133556992 133557028 GCC TRGT -CGCCCGCGCT C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC GCACCACCTG -CGCCCGCGCT C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC GCACCACCTG +chr9 133556992 133557028 CCG STRchive +chr9 133556992 133557028 CCG TRGT +CGCCCGCGCT CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C GCACCACCTG +CGCCCGCGCT CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C GCACCACCTG MODY8_CEL -chr9 135946564 135947124 GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG STRchive -chr9 135946564 135947124 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG TRGT +chr9 135946564 135947124 GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG,ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC STRchive +chr9 135946564 135947124 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG TRGT GAGGCCACCC CTGTGCCCCCCACAGGGGACTCCGAGGCCACTCCCGTGCCCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG CGCCCCCCCCGTGCCGCCCACGGGTGACGCCGGGCCCCCCCCCGTGCCGCCCACGGGTGACTCCGGCGCCCCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCCGTGACCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCTGTGCCCCCCACGGGTGACTCTGAGGCTGCCCCTGTGCCCCCCACAGATGACTCCAAGGAAGCT CAGATGCCTG GAGGCCACCC CTGTGCCCCCCACAGGGGACTCCGAGGCCACTCCCGTGCCCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG CGCCCCCCCCGTGCCGCCCACGGGTGACGCCGGGCCCCCCCCCGTGCCGCCCACGGGTGACTCCGGCGCCCCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCCGTGACCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCTGTGCCCCCCACGGGTGACTCTGAGGCTGCCCCTGTGCCCCCCACAGATGACTCCAAGGAAGCT CAGATGCCTG OPML1_NUTM2B-AS1 -chr10 81586139 81586160 GGC STRchive -chr10 81586139 81586160 GGC TRGT -GAAGCGGCGG GGC GGC GGC GGC GGC GGC GGC CGGGAAGAAC -GAAGCGGCGG GGC GGC GGC GGC GGC GGC GGC CGGGAAGAAC +chr10 81586139 81586160 CGG STRchive +chr10 81586139 81586160 CGG TRGT +GAAGCGGCGG GG CGG CGG CGG CGG CGG CGG C CGGGAAGAAC +GAAGCGGCGG GG CGG CGG CGG CGG CGG CGG C CGGGAAGAAC JBS_CBL chr11 119076999 119077033 CGG STRchive @@ -227,10 +227,10 @@ CCATCACCAC CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG C CCATCACCAC CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA TCACGGAAAC FRA12A_DIP2B -chr12 50898784 50898807 GGC STRchive -chr12 50898784 50898807 GGC TRGT -CTTTGCTCAT GGC GGC GGC GGC GGC GGC GGC GG TGCTGGTGGT -CTTTGCTCAT GGC GGC GGC GGC GGC GGC GGC GG TGCTGGTGGT +chr12 50898784 50898807 CGG STRchive +chr12 50898784 50898807 CGG TRGT +CTTTGCTCAT GG CGG CGG CGG CGG CGG CGG CGG TGCTGGTGGT +CTTTGCTCAT GG CGG CGG CGG CGG CGG CGG CGG TGCTGGTGGT SCA2_ATXN2 chr12 112036753 112036823 CTG STRchive @@ -239,10 +239,10 @@ GCGGGCGGCG G CTG CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG GCGGGCGGCG G CTG CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG GGGCTTCAGC OPDM4_RILPL1 -chr12 124018267 124018297 GGC STRchive -chr12 124018267 124018297 GGC TRGT -CTCCCGAGTG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC AGCGGGGAGG -CTCCCGAGTG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC AGCGGGGAGG +chr12 124018267 124018297 CGG STRchive +chr12 124018267 124018297 CGG TRGT +CTCCCGAGTG GG CGG CGG CGG CGG CGG CGG CGG CGG CGG C AGCGGGGAGG +CTCCCGAGTG GG CGG CGG CGG CGG CGG CGG CGG CGG CGG C AGCGGGGAGG SCA8_ATXN8OS chr13 70713515 70713561 CTG STRchive @@ -257,10 +257,10 @@ CCTGTCCCCA GCG GCG GCG GCA GCG GCG GCG GCG GCT GCG GCG GCG GCG GCC GCG G TGTCCGC CCTGTCCCCA GCG GCG GCG GCA GCG GCG GCG GCG GCT GCG GCG GCG GCG GCC GCG G TGTCCGCGGT SCA27B_FGF14 -chr13 102813924 102814076 GAA STRchive -chr13 102813924 102814076 GAA,GGA,GCA TRGT -TGAAGAAAGA AA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA TAGAAATGTG -TGAAGAAAGA AA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA TAGAAATGTG +chr13 102813924 102814076 GAA,AAG STRchive +chr13 102813924 102814076 AAG,GAA,AGG,CAG TRGT +TGAAGAAAGA AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AA TAGAAATGTG +TGAAGAAAGA AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AA TAGAAATGTG OPMD_PABPN1 chr14 23790681 23790712 GCN STRchive @@ -275,8 +275,8 @@ ATAGGTCCCC CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTTTTG CTG CTG TCTGAAACAT ATAGGTCCCC CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTTTTG CTG CTG TCTGAAACAT ALS1_NIPA1 -chr15 23086363 23086389 GCG STRchive -chr15 23086363 23086389 GCG TRGT +chr15 23086363 23086389 CGG STRchive +chr15 23086363 23086389 CGG TRGT CCCCCTCCCC GGCCGCCGCCGCCGCCGCCGCCGCCG CTGCCGCAGC CCCCCTCCCC GGCCGCCGCCGCCGCCGCCGCCGCCG CTGCCGCAGC @@ -287,21 +287,21 @@ CAGCTCCCGT TT CT TT CT TT CT TT CT TT CT TT CT TT TTTTTCTGAG CAGCTCCCGT TT CT TT CT TT CT TT CT TT CT TT CT TT TTTTTCTGAG CHNG3_MIR7-2 -chr15 89112664 89112683 TTTG STRchive -chr15 89112664 89112683 TTTG TRGT +chr15 89112664 89112683 TTTG,GTTT STRchive +chr15 89112664 89112683 GTTT,TTTG TRGT ATTGTTTTTC TTTG TTTG TTTG TTTG TTT TTCTTTGAGA ATTGTTTTTC TTTG TTTG TTTG TTTG TTT TTCTTTGAGA CPEO_POLG -chr15 89876819 89876860 GCT STRchive -chr15 89876810 89876860 GCT,GTT TRGT -AGCACTTGCGGCTGCTGAG GCT GCT GTT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC CGCCGCCGCT +chr15 89876819 89876860 CTG STRchive +chr15 89876810 89876860 GCT,GTT,CTG TRGT +AGCACTTGCGGCTGCTGAG G CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C CGCCGCCGCT AGCACTTGCG GCT GCT GAG GCT GCT GTT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC CGCCGCCGCT DBQD2_XYLT1 -chr16 17564764 17564779 GCC STRchive -chr16 17564764 17564779 GCC TRGT -TCCCGCTCGG GCC GCC GCC GCC GCC CCCCTCCCCA +chr16 17564764 17564779 CCG STRchive +chr16 17564764 17564779 GCC,CCG TRGT +TCCCGCTCGG G CCG CCG CCG CCG CC CCCCTCCCCA TCCCGCTCGG GCC GCC GCC GCC GCC CCCCTCCCCA FAME6_TNRC6A @@ -311,8 +311,8 @@ CTATTAAAGC A TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA T CTATTAAAGC A TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTT ACTTATTTAT SCA31_BEAN1 -chr16 66524299 66524369 AATAA,TGGAA,TAGAA STRchive -chr16 66524299 66524369 TGGAA,TAGAA,AATAA TRGT +chr16 66524299 66524369 AATAA,AATGG,AATAG STRchive +chr16 66524299 66524369 AATGG,AATAG,AATAA TRGT ACTCTGTTTC AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA TAA AATAA AA AGAAACCTCT ACTCTGTTTC AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA TAA AATAA AA AGAAACCTCT @@ -323,10 +323,10 @@ CCCGCCGCAG G CAG CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAA CAG CAG CAG CAG CCCGCCGCAG G CAG CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG TCCTCACCCT SCA4_ZFHX3 -chr16 72821593 72821657 GCC STRchive -chr16 72821593 72821657 GCC TRGT -AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC ACC GCC GCC GCC GCC GCC ACT GCC ACC GCC GCC GCC GCC G GTGGGGACGT -AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC ACC GCC GCC GCC GCC GCC ACT GCC ACC GCC GCC GCC GCC G GTGGGGACGT +chr16 72821593 72821657 CCG STRchive +chr16 72821593 72821657 CCG TRGT +AGTGGTACGA G CCG CCG CCG CCG CCG CCG CCG CCA CCG CCG CCG CCG CCG CCACTGCCA CCG CCG CCG CCG CCG GTGGGGACGT +AGTGGTACGA G CCG CCG CCG CCG CCG CCG CCG CCA CCG CCG CCG CCG CCG CCACTGCCA CCG CCG CCG CCG CCG GTGGGGACGT HDL2_JPH3 chr16 87637888 87637935 CTG STRchive @@ -336,15 +336,15 @@ AAGCCAGGGA G CTG C CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG TAAG FAME8_RAI1 chr17 17711672 17711774 TTTTA,TTTCA STRchive -chr17 17711672 17711774 TTTTA,TTTCA TRGT -TTATTTTTAA A TTTTA TTTTA TTTTA TA TTTTA TTTTA TTA TTTTA TTTTA TTTTA TTTTA TTTTA TTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTT CATCTCAGAC +chr17 17711672 17711774 TTTTA,TTTCA,ATTTT TRGT TTATTTTTAA A TTTTA TTTTA TTTTA TA TTTTA TTTTA TTA TTTTA TTTTA TTTTA TTTTA TTTTA TTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTT CATCTCAGAC +TTATTTTTAA ATTTT ATTTT ATTTT AT ATTTT ATTTT ATT ATTTT ATTTT ATTTT ATTTT ATTTT ATT ATTTT ATTTT ATTTT ATTTT ATTTT ATTTT ATTTT ATTTT ATTT CATCTCAGAC RCPS_EIF4A3 -chr17 78120808 78120938 CCTCGCTGTGCCGCTGCCGA STRchive -chr17 78120808 78120938 CCTCGCTGTGCCGCTGCCGA TRGT -CCGACCTCGC TGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA GAACAGACGC -CCGACCTCGC TGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA GAACAGACGC +chr17 78120808 78120938 CCTCGCTGTGCCGCTGCCGA,GCCGCTGCCGACCTCGCTGT STRchive +chr17 78120808 78120938 GCCGCTGCCGACCTCGCTGT,CCTCGCTGTGCCGCTGCCGA TRGT +CCGACCTCGC T GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGA GAACAGACGC +CCGACCTCGC T GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGA GAACAGACGC CPUM_TYMS chr18 666891 667632 GATGGT STRchive @@ -359,8 +359,8 @@ GAGGAGGAGG AG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA GAGGAGGAGG AG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA TGAAAGAGCC MRUPAV_PLIN4 -chr19 4510739 4513671 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC STRchive -chr19 4510739 4513671 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TRGT +chr19 4510739 4513671 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC,GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT STRchive +chr19 4510739 4513671 GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT,TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TRGT GCAGTCTGCT CCCCACCATTGTCTGTGGTCCTGGAACTGGTGAGTCCACCCCAGGAGGTGGCGGGGGTACTAGGTAACCAGTTCTGGAAGGTGCTGAGGCCAGTGTGGGTGGCCCCTGTCGCCACGTTCCCTGACCCCATGAGCCCAGCGGACACTGCGTCTTTGGTTCCGGTCAGCACTGTCTTGGTGGTGTCCAGGCCCCCCTGGACGGCCCCTTTGGCCATGCTCATGGCACCGGTAACCCCACTGAAGACAGTGTCCTTGGTACCCATAAGCACAGCCTTGGAGGCGTCCACGCCGGTCTGCACGGTTCCTTTGGCCACATTCACTGCCCCCGTGACTCCAGTAGTCACTGCATCCTTAGCGCCACTCAGCACCGTCTTGGCTGTGTCCACACCTGTCTGGACGGTCCCTTTGGCCACATTTACGGCACCAGTGACTCCACTGCAGACGGTGTCCTTGGTACCGGTCAGGACAGTCTTGCTGGTGTCCACGCCGGTCTGGACAGTCCCTTTGGCCAAGTTCACAGCCCCTGTGAGCCCAGTGGACACGGCATCTTTAGTGCCAGTCAGGACAGACTTTGTAGTGTCCAGGCCCCCCTGGACGGCCCCTTTGGCCACTTTCGCAGCACCGGTCACCCCACTGCCAAGGGTGTTCTTTGTACCTGTTGCGATATTTTGGGTCGTTTTCAGCCCAGTTTGCACAGCACCCTTGGCCACGTTCGCAGCACCGGTGACCCCACTGCAGACAGTGTCCTTGGTACCGGTCAGCACGGTCTTGGCCGTGTCTACACCCATCTGGACGGCCCCCTTGGCCACATTCGCAGCACCGGTCACCCCACTGCACACAGCATCCTTGGTACCAGTTAACACAGTCTTGGTGGTGTCCATGCCGGTCTGGACAGTCCCTTTGGCCAACTTCACAGCCCCTGTGAGCCCAGTGGACACAGCATCTTTAGTGCCAGTCAGGACAGACTTTGTAGTGTCCAGGCCCCCTTGGATGGCCCCTTTGGCCACATTCGCAGCACCGGTCACCCCACTGCAGACGGTGTCCTTGGTACCAGTTAGGACAGTCTTGGTGGTGTCCACACTGGTCTGGACAGTCCCTTTGGCGACATTCACTGCCCCCATGAGCCCAGTAGTGACTGTGTCCTTGGTGCCGGTCAGCACGGTCTTGGCCGTGTCTACACCTGTCTGGGCAGCCCCTTTGGCCACATTCACAGCACTGGTCACCCCACTGCCAAAGGTGTTCTTTGTACCTGTCGCGATATTTTGGGTCGTTTTCAGCCCAGTTTGCACAGCCCCCTTGGCCACGTTCACGGCACTGGTGACCCCACTGTAGATGGTGTCCTTGGTACCGGTTAGGACAGTTTTGGTGGTGTCCATGCCTGTCTGGACGGTCCCTTTGGCGACATTCACTGCCCCCACGAGCCCAGTAGTCACTGTGTCCTTGGTGCCGGTCAGCACGGTCTTGGCTGTGTCTACACCTGTCTGGACAGCCCCCTTGGCCACATTCGCTGCCCCCGTGAGCCCAGTGGACATCGTGTCTTTTGTACCTATGACCACAGACTTGGTGGTGTCCAGGCCCCCCTGGACGGCCCCTTTGGCCACGTTCACAGCACTGGTCACCCCACTGCAGACGGTGTCCTTGGTGCCGGTTAGGACAGTCTTGGTGGTGTCTACGCCGGTCTGGACGGTCCCTTTGGCCACGTTCACAGCCCCTGTGAGCCCAGTGGACACAGCATCTTTAGTGCCAGTCAGGACAGACTTTGTAGTGTCCAGGCCGCCCTGGACGGCCCCTTTGGCCACATTCGCAGCACCGGTGACCCCACTGCAGACAGTGTCCTTGGTACCAGTTAGAACGATCTTGGTGGTGTCCACGCCTGTCTGGATGGTTCCTCTGGCCAAATTCATGGCACCAGTCACCCCACTGCAGACGGTGTCCTTTGTACCTGTTGCGATATTTTGGGTTGTGTTCAGCCCAGTTTGCATGGCCCCCTTGGCCACATTCGCTGCCCCTGTGAGCCCAGTGGACATCGTGTCTTTCGTACCCATGACCATAGACTTGGTGGTATCCAGGCCCCCCTGGATGGCCTCTTTGGCCAAGTTCACGGCACCGGTCACCCCACTGCAGACAGTGTTCTTGGTGCCAGTTAGGACAGTCTTGG TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACACCGGTCTGAATGCTTCCTCTGGCCACATTCACTGCCCCTGTGAGCCCAGTGGACACAGCATCTTTGGTGCCGGTCAGCACAGCCTTGGAGGTTTCCACGCCAGTCTGGACAGTCCCTTTGGCCAAGTTCACTGCCCCCATGACCCCAGTAGTCACTGTGTCTTTGGTGCCGGTCAGCACAGTCTTGGTGGTGTCCACACCGGCCTGTACGGTCCCTTTGGCCACATTCACTGCCCCCGTGAGCCCAGTGGACACCGTGTCCTTGGTGCCGGTGAGGACAGCCTTCGAGGTGTCCAGACCCCCTTGGACGGCCCCCTTAGCCATGTCCATGGCCCCTGTGACCCCGCTGGACACCACCTCCTTGGTGCCCGTAAGTGCAGACCGAGTGGTGTCCAGGCCTCCCTGGACCACTCCCTTAGCCACGTCCACCACGCTGGCCACCCCGGAGGACACGGCATCCTTGGCCCT GGACATCTTG GCAGTCTGCT CCCCACCATTGTCTGTGGTCCTGGAACTGGTGAGTCCACCCCAGGAGGTGGCGGGGGTACTAGGTAACCAGTTCTGGAAGGTGCTGAGGCCAGTGTGGGTGGCCCCTGTCGCCACGTTCCCTGACCCCATGAGCCCAGCGGACACTGCGTCTTTGGTTCCGGTCAGCACTGTCTTGGTGGTGTCCAGGCCCCCCTGGACGGCCCCTTTGGCCATGCTCATGGCACCGGTAACCCCACTGAAGACAGTGTCCTTGGTACCCATAAGCACAGCCTTGGAGGCGTCCACGCCGGTCTGCACGGTTCCTTTGGCCACATTCACTGCCCCCGTGACTCCAGTAGTCACTGCATCCTTAGCGCCACTCAGCACCGTCTTGGCTGTGTCCACACCTGTCTGGACGGTCCCTTTGGCCACATTTACGGCACCAGTGACTCCACTGCAGACGGTGTCCTTGGTACCGGTCAGGACAGTCTTGCTGGTGTCCACGCCGGTCTGGACAGTCCCTTTGGCCAAGTTCACAGCCCCTGTGAGCCCAGTGGACACGGCATCTTTAGTGCCAGTCAGGACAGACTTTGTAGTGTCCAGGCCCCCCTGGACGGCCCCTTTGGCCACTTTCGCAGCACCGGTCACCCCACTGCCAAGGGTGTTCTTTGTACCTGTTGCGATATTTTGGGTCGTTTTCAGCCCAGTTTGCACAGCACCCTTGGCCACGTTCGCAGCACCGGTGACCCCACTGCAGACAGTGTCCTTGGTACCGGTCAGCACGGTCTTGGCCGTGTCTACACCCATCTGGACGGCCCCCTTGGCCACATTCGCAGCACCGGTCACCCCACTGCACACAGCATCCTTGGTACCAGTTAACACAGTCTTGGTGGTGTCCATGCCGGTCTGGACAGTCCCTTTGGCCAACTTCACAGCCCCTGTGAGCCCAGTGGACACAGCATCTTTAGTGCCAGTCAGGACAGACTTTGTAGTGTCCAGGCCCCCTTGGATGGCCCCTTTGGCCACATTCGCAGCACCGGTCACCCCACTGCAGACGGTGTCCTTGGTACCAGTTAGGACAGTCTTGGTGGTGTCCACACTGGTCTGGACAGTCCCTTTGGCGACATTCACTGCCCCCATGAGCCCAGTAGTGACTGTGTCCTTGGTGCCGGTCAGCACGGTCTTGGCCGTGTCTACACCTGTCTGGGCAGCCCCTTTGGCCACATTCACAGCACTGGTCACCCCACTGCCAAAGGTGTTCTTTGTACCTGTCGCGATATTTTGGGTCGTTTTCAGCCCAGTTTGCACAGCCCCCTTGGCCACGTTCACGGCACTGGTGACCCCACTGTAGATGGTGTCCTTGGTACCGGTTAGGACAGTTTTGGTGGTGTCCATGCCTGTCTGGACGGTCCCTTTGGCGACATTCACTGCCCCCACGAGCCCAGTAGTCACTGTGTCCTTGGTGCCGGTCAGCACGGTCTTGGCTGTGTCTACACCTGTCTGGACAGCCCCCTTGGCCACATTCGCTGCCCCCGTGAGCCCAGTGGACATCGTGTCTTTTGTACCTATGACCACAGACTTGGTGGTGTCCAGGCCCCCCTGGACGGCCCCTTTGGCCACGTTCACAGCACTGGTCACCCCACTGCAGACGGTGTCCTTGGTGCCGGTTAGGACAGTCTTGGTGGTGTCTACGCCGGTCTGGACGGTCCCTTTGGCCACGTTCACAGCCCCTGTGAGCCCAGTGGACACAGCATCTTTAGTGCCAGTCAGGACAGACTTTGTAGTGTCCAGGCCGCCCTGGACGGCCCCTTTGGCCACATTCGCAGCACCGGTGACCCCACTGCAGACAGTGTCCTTGGTACCAGTTAGAACGATCTTGGTGGTGTCCACGCCTGTCTGGATGGTTCCTCTGGCCAAATTCATGGCACCAGTCACCCCACTGCAGACGGTGTCCTTTGTACCTGTTGCGATATTTTGGGTTGTGTTCAGCCCAGTTTGCATGGCCCCCTTGGCCACATTCGCTGCCCCTGTGAGCCCAGTGGACATCGTGTCTTTCGTACCCATGACCATAGACTTGGTGGTATCCAGGCCCCCCTGGATGGCCTCTTTGGCCAAGTTCACGGCACCGGTCACCCCACTGCAGACAGTGTTCTTGGTGCCAGTTAGGACAGTCTTGG TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACACCGGTCTGAATGCTTCCTCTGGCCACATTCACTGCCCCTGTGAGCCCAGTGGACACAGCATCTTTGGTGCCGGTCAGCACAGCCTTGGAGGTTTCCACGCCAGTCTGGACAGTCCCTTTGGCCAAGTTCACTGCCCCCATGACCCCAGTAGTCACTGTGTCTTTGGTGCCGGTCAGCACAGTCTTGGTGGTGTCCACACCGGCCTGTACGGTCCCTTTGGCCACATTCACTGCCCCCGTGAGCCCAGTGGACACCGTGTCCTTGGTGCCGGTGAGGACAGCCTTCGAGGTGTCCAGACCCCCTTGGACGGCCCCCTTAGCCATGTCCATGGCCCCTGTGACCCCGCTGGACACCACCTCCTTGGTGCCCGTAAGTGCAGACCGAGTGGTGTCCAGGCCTCCCTGGACCACTCCCTTAGCCACGTCCACCACGCTGGCCACCCCGGAGGACACGGCATCCTTGGCCCT GGACATCTTG @@ -377,8 +377,8 @@ CTCACCTGCT CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C TGCCTCCGCC CTCACCTGCT CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C TGCCTCCGCC EDM1-PSACH_COMP -chr19 18896844 18896860 GTC STRchive -chr19 18896844 18896860 GTC TRGT +chr19 18896844 18896860 GTC,CGT STRchive +chr19 18896844 18896860 CGT,GTC TRGT CTCCGTCATT GTC GTC GTC GTC GTC G CAGGCATCAC CTCCGTCATT GTC GTC GTC GTC GTC G CAGGCATCAC @@ -389,15 +389,15 @@ GTGATCCCCC CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG C GTGATCCCCC CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA TTCCCGGCTA SCA36_NOP56 -chr20 2633378 2633403 GGCCTG STRchive -chr20 2633378 2633421 GGCCTG,CGCCTG TRGT +chr20 2633378 2633403 GGCCTG,CCTGGG STRchive +chr20 2633378 2633421 GGCCTG,CGCCTG,CCTGGG TRGT GCCGCAGACA G GGCCTG GGCCTG GGCCTG GGCCTG CGCCTGCGCCTGCGCCTGCCCTGGGAAC GCCGCAGACA G GGCCTG GGCCTG GGCCTG GGCCTG CGCCTG CGCCTG CGCCTG CCCTGGGAAC CJD_PRNP -chr20 4680043 4680139 GGTGGTGGCTGGGGGCAGCCTCAT,CCTCATGGTGGTGGCTGGGGGCAG STRchive -chr20 4680016 4680139 CCTCAGGGCGGTGGTGGCTGGGGGCAG,CCTCATGGTGGTGGCTGGGGGCAG,GGTGGTGGCTGGGGGCAGCCTCAT TRGT -CCGCTACCCACCTCAGGGCGGTGGTGGCTGGGGGCAG CCTCATGGTGGTGGCTGGGGGCAG CCTCATGGTGGTGGCTGGGGGCAG CCCCATGGTGGTGGCTGGGGACAGCCTCATGGTGGTGGCTGGGGTCAA GGAGGTGGCA +chr20 4680043 4680139 GGTGGTGGCTGGGGGCAGCCTCAT,AGCCTCATGGTGGTGGCTGGGGGC STRchive +chr20 4680016 4680139 CCTCAGGGCGGTGGTGGCTGGGGGCAG,CCTCATGGTGGTGGCTGGGGGCAG,AGCCTCATGGTGGTGGCTGGGGGC,GGTGGTGGCTGGGGGCAGCCTCAT TRGT +CCGCTACCCACCTCAGGGCGGTGGTGGCTGGGGGCAG CCTCAT GGTGGTGGCTGGGGGCAGCCTCAT GGTGGTGGCTGGGGGCAGCCCCATGGTGGTGGCTGGGGACAGCCTCATGGTGGTGGCTGGGGTCAA GGAGGTGGCA CCGCTACCCA CCTCAGGGCGGTGGTGGCTGGGGGCAG CCTCATGGTGGTGGCTGGGGGCAG CCTCATGGTGGTGGCTGGGGGCAG CCCCATGGTGGTGGCTGGGGACAGCCTCATGGTGGTGGCTGGGGTCAA GGAGGTGGCA EPM1_CSTB @@ -425,28 +425,28 @@ ACTAGAATGG ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATT ACTAGAATGG ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT TTTTGAGATG PRTS_ARX -chrX 25031646 25031682 NGC STRchive -chrX 25031646 25031682 NGC TRGT +chrX 25031646 25031682 GCN,NGC STRchive +chrX 25031646 25031682 NGC,GCN TRGT GCGTGTCCCA GGC CGC GGC GGC CGC GGC CGC GGC TGC CGC GGC GGC CCCTGCGCCG GCGTGTCCCA GGC CGC GGC GGC CGC GGC CGC GGC TGC CGC GGC GGC CCCTGCGCCG EIEE1_ARX -chrX 25031766 25031814 NGC STRchive -chrX 25031766 25031814 NGC TRGT +chrX 25031766 25031814 GCN,NGC STRchive +chrX 25031766 25031814 NGC,GCN TRGT CCGTGGCCGT GGC GGC CGC TGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC TGC CGC ACCCTGAAGG CCGTGGCCGT GGC GGC CGC TGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC TGC CGC ACCCTGAAGG DMD_DMD -chrX 31302674 31302722 TTC STRchive -chrX 31302674 31302730 TTC,T TRGT +chrX 31302674 31302722 TTC,CTT STRchive +chrX 31302674 31302730 TTC,T,CTT TRGT AACGAACTGT TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTTTTTTTGGCAGAGGTG AACGAACTGT TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC T T T T T T T T GGCAGAGGTG SBMA_AR -chrX 66765158 66765261 GCA STRchive -chrX 66765158 66765261 GCA TRGT -TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCCCAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG -TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCCCAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG +chrX 66765158 66765261 CAG STRchive +chrX 66765158 66765261 CAG TRGT +TGCTGCTGCT G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAAGAGACTAGCCC CAG G CAG CAG CAG CAG CAG CAG GGTGAGGATG +TGCTGCTGCT G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAAGAGACTAGCCC CAG G CAG CAG CAG CAG CAG CAG GGTGAGGATG XDP_TAF1 chrX 70672904 70672981 AGAGGG STRchive @@ -461,8 +461,8 @@ CTCAACCCAC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC TTCAAGCTGA CTCAACCCAC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC TTCAAGCTGA XLID_SOX3 -chrX 139586481 139586526 NGC STRchive -chrX 139586481 139586526 NGC TRGT +chrX 139586481 139586526 GCN,NGC STRchive +chrX 139586481 139586526 NGC,GCN TRGT CCGGACTGCT GGC GGC AGC GGC TGC GGC CGC GGC AGC GGC GGC GGC GGC CGC GGC ACCGGGAGGC CCGGACTGCT GGC GGC AGC GGC TGC GGC CGC GGC AGC GGC GGC GGC GGC CGC GGC ACCGGGAGGC @@ -473,8 +473,8 @@ GTGCGGCAGC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG GTGCGGCAGC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG CGG CGG CGG C TGGGCCTCGA FRAXE_AFF2 -chrX 147582124 147582273 GCC STRchive -chrX 147582124 147582273 GCC TRGT -GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCGGCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC CCC GCC GCC GCT GCC GCC GCC G GCCCGCAGCC -GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCGGCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC CCC GCC GCC GCT GCC GCC GCC G GCCCGCAGCC +chrX 147582124 147582273 CCG STRchive +chrX 147582124 147582273 CCG TRGT +GTGTGATGCT G CCG CGG CCG CCG CCG CCG CCTGTGCAG CCG CTG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CTG CCG CC CCG GCTG CCG CG CCG CG CCG CTGCCTCTGCC CCG G CCG CCC CCG CCG CCG CTG CCG CCG CCG GCCCGCAGCC +GTGTGATGCT G CCG CGG CCG CCG CCG CCG CCTGTGCAG CCG CTG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CTG CCG CC CCG GCTG CCG CG CCG CG CCG CTGCCTCTGCC CCG G CCG CCC CCG CCG CCG CTG CCG CCG CCG GCCCGCAGCC diff --git a/data/ref-alleles/ref-alleles.hg38.txt b/data/ref-alleles/ref-alleles.hg38.txt index b9c4bd8f..9437d479 100644 --- a/data/ref-alleles/ref-alleles.hg38.txt +++ b/data/ref-alleles/ref-alleles.hg38.txt @@ -1,50 +1,50 @@ HMNR7_VWA1 -chr1 1435798 1435818 GGCGCGGAGC STRchive -chr1 1435798 1435818 GGCGCGGAGC TRGT +chr1 1435798 1435818 GGCGCGGAGC,AGCGGCGCGG STRchive +chr1 1435798 1435818 AGCGGCGCGG,GGCGCGGAGC TRGT GGCTGGCGCT GGCGCGGAGC GGCGCGGAGC GCGGTGAGTG GGCTGGCGCT GGCGCGGAGC GGCGCGGAGC GCGGTGAGTG SCA37_DAB1 -chr1 57367043 57367121 AAAAT,GAAAT STRchive -chr1 57367043 57367121 AAAAT,GAAAT TRGT +chr1 57367043 57367121 AAAAT,TGAAA STRchive +chr1 57367043 57367121 AAAAT,GAAAT,TGAAA TRGT CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC CTGTCTCCAC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA TAAATTAGCC OPDM5_ABCD3 -chr1 94418421 94418444 GCC STRchive -chr1 94418421 94418444 GCC TRGT -CAGTAAGGTA GCC GCC GCC GCC GCC GCC GCC GC GTCCCCTCGC -CAGTAAGGTA GCC GCC GCC GCC GCC GCC GCC GC GTCCCCTCGC +chr1 94418421 94418444 CCG STRchive +chr1 94418421 94418444 CCG TRGT +CAGTAAGGTA G CCG CCG CCG CCG CCG CCG CCG C GTCCCCTCGC +CAGTAAGGTA G CCG CCG CCG CCG CCG CCG CCG C GTCCCCTCGC NIID_NOTCH2NLC -chr1 149390802 149390842 GGC STRchive -chr1 149390802 149390842 GGC TRGT -GATCTGCCCA GGC GGC GGC GGC GGC GGC GGC GGC GGC GGAGGA GGC GGC G ACCGAGAAGA -GATCTGCCCA GGC GGC GGC GGC GGC GGC GGC GGC GGC GGAGGA GGC GGC G ACCGAGAAGA +chr1 149390802 149390842 CGG STRchive +chr1 149390802 149390842 CGG TRGT +GATCTGCCCA GG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGGAGG CGG CG ACCGAGAAGA +GATCTGCCCA GG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGGAGG CGG CG ACCGAGAAGA ADTKD_MUC1 -chr1 155188505 155192239 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA STRchive -chr1 155188505 155192239 GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCA,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,GCCCACGGTGTCACCTCGGCCCCGGACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCA TRGT +chr1 155188505 155192239 GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG STRchive +chr1 155188505 155192239 ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG,GGCTNNGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG,ACACCAGGCCGGCCCCGGGCTCCACCGCCCCCCCAGCCCACGGTGTCACCTCGGCCCCGG TRGT TGCAGAGCCT GAGGCCGAGGTGACATTGTGGACTGGAGGGGCGGTGGAGCCCAAGGCGGGCCTGTTGTCCGGGGCCGAGGTGACACCATG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGGGTGGAGCCCGGGGCCCGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGTGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGTGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTGGACTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTGGGCTGGGGGGGCGGTGGAGCCCGGGGGCCGGCCTGGTGTCCGGTGCCGAGGTGACACCGTGGGCTGGGGGGGCGGTGGAGCCCGGAGCCGGTCTGTTGTCCGGGGCCGAGGTGACACCGTGTGCTGGAGTGTCGGTGGAGCCCGAGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACTGTGAGCTGGGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGCTCTCTGTTTTAAATATACACCGTGGGCTGCGGGCGCGGTGGAGCCCGGGGACGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGCTCTCGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTTGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGTAGCCCGGGGCGGGCCTTGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCGGGACTGGTGTCCGGGGCCGAGGTGACACCGTG GGTTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGCGCGGTGGAGCCCGGGCCAGAACTGCTTTCCGGGGCCGTGGTGACTCCGTGGGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGCTCTCTGGGGCCGAGGTGACACCGTGGGCTGCGGGCGCGGTGGAGCCCGGGGCGGAACTCTCCGGGGCCGAGGTGACACCGTGGGCTGGGGGGGGGGGGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTGGGCTGGGGGGGCGGTGGAGCCCTGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTTGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCGGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCGGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGTCGAGGTGACACCGTGGGCTTGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCTGGGGGGGCGGTGGAGCCGGGGCCGGCCTGGTGTCCGGGGTTGGAAAACTGGGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCAGGGGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTTGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCGGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCGGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTTGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGTCCGTCCTTCTCTCCGAGGGCCGAGGTGACATCGTAGACTGCGGGCGCGGTGGAGCCCGGGTCCGGCCTGCTCTCCGAGGCCGAGGTGACACCGTAGACTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTGGGCTGGGGGGGCGGTGGAGCCCGGAACCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTTGGGGGGCGGTGGAGCCCGGGACCGACCTGGTGTCCGGGGCCGAGGTGACACCGTGGGCTGCGGGCGCGGTGGAGCCCGGGGCGGACCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCGGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTTGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCTGGCTTGTTGTCCGGGGCTGAGGTGACATCGTGGGCTGGCGGGGTGGTGGAGCCCAGGGCTGGCCTGGTGACTGGGACCGAGGTGACATCCTGTCCCCAGGTGGCAGCTGAACCTGAAGCTGGTTCCGTGGCCGGGGCCAGAGTGACATCCTGTCCCTGAGTGGTGGAGGAGCCTGAACCGGGGCTGTGGCTGGAGAGTACGCTGCTGGTCATACTCACAGCATTCTTCTCAGTAGAGCTGGGCACTGAACTTCTCTGGGTAGCCGAAGTCTCCTT TTCTCCACCT TGCAGAGCCT GAGGCCGAGGTGACATTGTGGACTGGAGGGGCGGTGGAGCCCAAGGCGGGCCTGTTGTCCGGGGCCGAGGTGACACCATG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGGGTGGAGCCCGGGGCCCGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGTGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGTGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTGGACTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTGGGCTGGGGGGGCGGTGGAGCCCGGGGGCCGGCCTGGTGTCCGGTGCCGAGGTGACACCGTGGGCTGGGGGGGCGGTGGAGCCCGGAGCCGGTCTGTTGTCCGGGGCCGAGGTGACACCGTGTGCTGGAGTGTCGGTGGAGCCCGAGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACTGTGAGCTGGGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGCTCTCTGTTTTAAATATACACCGTGGGCTGCGGGCGCGGTGGAGCCCGGGGACGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGCTCTCGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTTGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGTAGCCCGGGGCGGGCCTTGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCGGGACTGGTGTCCGGGGCCGAGGTGACACCGTG GGTTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGCTCTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGCGCGGTGGAGCCCGGGCCAGAACTGCTTTCCGGGGCCGTGGTGACTCCGTGGGCTGCGGGCGCGGTGGAGCCCGGGGCCGGCCTGCTCTCTGGGGCCGAGGTGACACCGTGGGCTGCGGGCGCGGTGGAGCCCGGGGCGGAACTCTCCGGGGCCGAGGTGACACCGTGGGCTGGGGGGGGGGGGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTGGGCTGGGGGGGCGGTGGAGCCCTGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTTGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCGGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCGGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGTCGAGGTGACACCGTGGGCTTGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCTGGGGGGGCGGTGGAGCCGGGGCCGGCCTGGTGTCCGGGGTTGGAAAACTGGGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCAGGGGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTTGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGGCGGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCGGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTTGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGCGGGCGCGGTGGAGCCCGGGTCCGTCCTTCTCTCCGAGGGCCGAGGTGACATCGTAGACTGCGGGCGCGGTGGAGCCCGGGTCCGGCCTGCTCTCCGAGGCCGAGGTGACACCGTAGACTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTGGGCTGGGGGGGCGGTGGAGCCCGGAACCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTTGGGGGGCGGTGGAGCCCGGGACCGACCTGGTGTCCGGGGCCGAGGTGACACCGTGGGCTGCGGGCGCGGTGGAGCCCGGGGCGGACCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCGGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTTGGGGGGCGGTGGAGCCCGGGGCCGGCCTGGTGTCCGGGGCCGAGGTGACACCGTG GGCTGGGGGGGCGGTGGAGCCCGGGGCTGGCTTGTTGTCCGGGGCTGAGGTGACATCGTGGGCTGGCGGGGTGGTGGAGCCCAGGGCTGGCCTGGTGACTGGGACCGAGGTGACATCCTGTCCCCAGGTGGCAGCTGAACCTGAAGCTGGTTCCGTGGCCGGGGCCAGAGTGACATCCTGTCCCTGAGTGGTGGAGGAGCCTGAACCGGGGCTGTGGCTGGAGAGTACGCTGCTGGTCATACTCACAGCATTCTTCTCAGTAGAGCTGGGCACTGAACTTCTCTGGGTAGCCGAAGTCTCCTT TTCTCCACCT NME_NAXE -chr1 156591765 156591783 GGGCC STRchive -chr1 156591765 156591783 GGGCC TRGT +chr1 156591765 156591783 GGGCC,CCGGG STRchive +chr1 156591765 156591783 CCGGG,GGGCC TRGT ACATGCGCCG GGGCC GGGCC GGGCC GGG GGCGCGCGCT ACATGCGCCG GGGCC GGGCC GGGCC GGG GGCGCGCGCT FAME2_STARD7 -chr2 96197066 96197124 AAAAT,AAATG STRchive -chr2 96197066 96197124 AAATG,AAAAT TRGT +chr2 96197066 96197124 AAAAT,TGAAA STRchive +chr2 96197066 96197124 AAATG,AAAAT,TGAAA TRGT ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA GCCAAGCACA ACTCCGTCTC AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAAAT AAA GCCAAGCACA FRA2A_AFF3 -chr2 100104798 100104824 GCC STRchive -chr2 100104798 100104824 GCC TRGT -CGCTGCTGCA GCC GCC GCC GCC GCC GCC GCC GCC GC GGTGCTCTGC -CGCTGCTGCA GCC GCC GCC GCC GCC GCC GCC GCC GC GGTGCTCTGC +chr2 100104798 100104824 CCG STRchive +chr2 100104798 100104824 CCG TRGT +CGCTGCTGCA G CCG CCG CCG CCG CCG CCG CCG CCG C GGTGCTCTGC +CGCTGCTGCA G CCG CCG CCG CCG CCG CCG CCG CCG C GGTGCTCTGC SD5_HOXD13 chr2 176093058 176093103 GCN STRchive @@ -53,10 +53,10 @@ TTCGGGGCGG GCG GCG GCG GCG GCA GCG GCG GCT GCG GCG GCG GCG GCG GCA GCC TCCGGCTT TTCGGGGCGG GCG GCG GCG GCG GCA GCG GCG GCT GCG GCG GCG GCG GCG GCA GCC TCCGGCTTTG GDPAG_GLS -chr2 190880872 190880920 GCA STRchive -chr2 190880872 190880920 GCA TRGT -ATCCTAGCGC GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA CCCGCATCCG -ATCCTAGCGC GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA CCCGCATCCG +chr2 190880872 190880920 CAG STRchive +chr2 190880872 190880920 CAG TRGT +ATCCTAGCGC G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA CCCGCATCCG +ATCCTAGCGC G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA CCCGCATCCG SCA7_ATXN7 chr3 63912684 63912715 CAG STRchive @@ -71,8 +71,8 @@ GTGAGACAGA CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGG CAGA CAGG CAGC CAGG CAGG GTGAGACAGA CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGG CAGA CAGG CA GC CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGG CAGA CAGA CAGA CAGA CAGA CAGA CAGA CAGA CAGA CAGA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CA CT GGCAGTAATA BPES_FOXL2 -chr3 138946019 138946062 NGC STRchive -chr3 138946019 138946062 NGC TRGT +chr3 138946019 138946062 GCN,NGC STRchive +chr3 138946019 138946062 NGC,GCN TRGT CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG CTACCGGGGC C CGC GGC TGC AGC CGC AGC TGC TGC AGC CGC TGC GGC TGC CGC CATCTGGCAG @@ -89,14 +89,14 @@ CAAGTCCTTC CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG C CAAGTCCTTC CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAA CAG CCG CCA CCG CCG CCG CCG CCG CCG CCG CCT CCTCAGCTTC CANVAS_RFC1 -chr4 39348424 39348483 AAAAG,AAGGG,ACAGG,AAAGG,AGGGC STRchive -chr4 39348424 39348483 AAAAG,AAGGG,ACAGG,AAAGG,AGGGC,AAAGGG TRGT +chr4 39348424 39348483 AAAAG,AAGGG,ACAGG,AAAGG,CAGGG STRchive +chr4 39348424 39348483 AAAAG,AAGGG,ACAGG,AAAGG,CAGGG,AAAGGG TRGT TCTGTTTCAA AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAA AGCATGTTCT TCTGTTTCAA AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAAG AAAA AGCATGTTCT CCHS_PHOX2B -chr4 41745972 41746032 GCN STRchive -chr4 41745972 41746032 GCN TRGT +chr4 41745972 41746032 GCN,NGC STRchive +chr4 41745972 41746032 NGC,GCN TRGT CAGGCCTCCA GCT GCC GCC GCT GCC GCT GCC GCC GCC GCC GCT GCC GCG GCC GCC GCC GCT GCT GCT GCG CCGCCCTTGC CAGGCCTCCA GCT GCC GCC GCT GCC GCT GCC GCC GCC GCC GCT GCC GCG GCC GCC GCC GCT GCT GCT GCG CCGCCCTTGC @@ -113,22 +113,22 @@ CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CTGTTTTTTA TTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTT CCGAGATGGA SCA12_PPP2R2B -chr5 146878727 146878759 GCT STRchive -chr5 146878727 146878759 GCT TRGT -CGCACTCGCA GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC AGGAGGCTGG -CGCACTCGCA GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC AGGAGGCTGG +chr5 146878727 146878759 CTG STRchive +chr5 146878727 146878759 CTG TRGT +CGCACTCGCA G CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C AGGAGGCTGG +CGCACTCGCA G CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C AGGAGGCTGG OPDM_FAM193B -chr5 177554489 177554531 GCC STRchive -chr5 177554489 177554531 GCC TRGT -TCGCTCCACA C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC TACCGCTCCC -TCGCTCCACA C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC TACCGCTCCC +chr5 177554489 177554531 CCG STRchive +chr5 177554489 177554531 CCG TRGT +TCGCTCCACA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C TACCGCTCCC +TCGCTCCACA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C TACCGCTCCC OPDM_TBC1D7 -chr6 13328476 13328603 GCC STRchive -chr6 13328476 13328603 GCC TRGT -GCGCGCGCCC AGACG GCC CGGGAGACAAAACTCAGC GCC GCT GCC GCT GCC GCT GCC GCC GCC GCC GGACGTGACATCAACTCCAGGTC GCC GGGCGGGC GCC GGGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT -GCGCGCGCCC AGACG GCC CGGGAGACAAAACTCAGC GCC GCT GCC GCT GCC GCT GCC GCC GCC GCC GGACGTGACATCAACTCCAGGTC GCC GGGCGGGC GCC GGGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT +chr6 13328476 13328603 CCG STRchive +chr6 13328476 13328603 CCG TRGT +GCGCGCGCCC AGACGGC CCG GGAGACAAAACTCAGCG CCG CTG CCG CTG CCG CTG CCG CCG CCG CCG GACGTGACATCAACTCCAGGTCG CCG GGCGGGCG CCG GGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT +GCGCGCGCCC AGACGGC CCG GGAGACAAAACTCAGCG CCG CTG CCG CTG CCG CTG CCG CCG CCG CCG GACGTGACATCAACTCCAGGTCG CCG GGCGGGCG CCG GGCGGGCGCATGCGCAGAGGGCGCGGGCAGGAA GCTCCACCCT SCA1_ATXN1 chr6 16327633 16327724 CTG STRchive @@ -149,34 +149,34 @@ AGCAACAAAG G CAG CAG CAG CAACAACAA CAG CAG CAG CAG CAG CAG CAG CAG CAA CAG CAA C AGCAACAAAG G CAG CAG CAG CAACAACAA CAG CAG CAG CAG CAG CAG CAG CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA ACAGGCAGTG HFG_HOXA13-III -chr7 27199678 27199732 NGC STRchive -chr7 27199678 27199732 NGC TRGT +chr7 27199678 27199732 GCN,NGC STRchive +chr7 27199678 27199732 NGC,GCN TRGT CCGAGGACGA CGC GGC GGC GGC GGC GGC GGC TGC AGC GGC AGC CGC GGC AGC AGC GGC GGC AGC CGACGGGGGC CCGAGGACGA CGC GGC GGC GGC GGC GGC GGC TGC AGC GGC AGC CGC GGC AGC AGC GGC GGC AGC CGACGGGGGC HFG_HOXA13-II -chr7 27199825 27199861 NGC STRchive -chr7 27199825 27199861 NGC TRGT +chr7 27199825 27199861 GCN,NGC STRchive +chr7 27199825 27199861 NGC,GCN TRGT GGCACTGGTT GGC CGC GGC CGC CGC CGC AGC CGC GGC CGC CGC CGC CACCGAGAAG GGCACTGGTT GGC CGC GGC CGC CGC CGC AGC CGC GGC CGC CGC CGC CACCGAGAAG HFG_HOXA13-I -chr7 27199924 27199966 NGC STRchive -chr7 27199924 27199966 NGC TRGT +chr7 27199924 27199966 GCN,NGC STRchive +chr7 27199924 27199966 NGC,GCN TRGT CCCCGGCCCC GGC AGC CGC CGC CGC TGC AGC CGC TGC TGC AGC CGC CGC CGC CCCTTCCATG CCCCGGCCCC GGC AGC CGC CGC CGC TGC AGC CGC TGC TGC AGC CGC CGC CGC CCCTTCCATG FRA7A_ZNF713 -chr7 55887600 55887639 GCG STRchive -chr7 55887600 55887639 GCG TRGT -CGGGTCCACC GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG TCAGGGGGCG -CGGGTCCACC GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG GCG TCAGGGGGCG +chr7 55887600 55887639 CGG STRchive +chr7 55887600 55887639 CGG TRGT +CGGGTCCACC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CG TCAGGGGGCG +CGGGTCCACC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG CG TCAGGGGGCG OPDM1_LRP12 -chr8 104588970 104588999 CGC STRchive -chr8 104588970 104588999 CGC TRGT -ACGACGCCGA CGC CGC CGC CGC CGC CGC CGC CGC CGC CG AGCCACCGGC -ACGACGCCGA CGC CGC CGC CGC CGC CGC CGC CGC CGC CG AGCCACCGGC +chr8 104588970 104588999 CCG STRchive +chr8 104588970 104588999 CCG TRGT +ACGACGCCGA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG AGCCACCGGC +ACGACGCCGA CG CCG CCG CCG CCG CCG CCG CCG CCG CCG AGCCACCGGC FAME1_SAMD12 chr8 118366812 118366918 TAAAA,TGAAA STRchive @@ -185,34 +185,34 @@ ACTCTGTCTC AAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA ACTCTGTCTC AAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA TAAAA ATGAACAAAA FTDALS1_C9orf72 -chr9 27573484 27573546 GGCCCC STRchive -chr9 27573484 27573546 GGCCCC TRGT -CGCAACCGCA GCCCCGCCCCGGGCCCGCCCCCGGGCCCGCCCCGACCACGCCCC GGCCCC GGCCCC GGCCCC TAGCGCGCGA -CGCAACCGCA GCCCCGCCCCGGGCCCGCCCCCGGGCCCGCCCCGACCACGCCCC GGCCCC GGCCCC GGCCCC TAGCGCGCGA +chr9 27573484 27573546 GGCCCC,CCCCGG STRchive +chr9 27573484 27573546 CCCCGG,GGCCCC TRGT +CGCAACCGCA GCCCCG CCCCGG GCCCGC CCCCGG GCCCGCCCCGACCACG CCCCGG CCCCGG CCCCGG CCCC TAGCGCGCGA +CGCAACCGCA GCCCCG CCCCGG GCCCGC CCCCGG GCCCGCCCCGACCACG CCCCGG CCCCGG CCCCGG CCCC TAGCGCGCGA FRDA_FXN -chr9 69037286 69037304 GAA STRchive -chr9 69037270 69037304 A,GAA TRGT +chr9 69037286 69037304 GAA,AAG STRchive +chr9 69037270 69037304 A,GAA,AAG TRGT TAAAAAATACAAAAAAAAAAAAAAAA GAA GAA GAA GAA GAA GAA AATAAAGAAA TAAAAAATAC A A A A A A A A A A A A A A A A GAA GAA GAA GAA GAA GAA AATAAAGAAA HSAN-VIII_PRDM12 -chr9 130681605 130681641 GCC STRchive -chr9 130681605 130681641 GCC TRGT -CGCCCGCGCT C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC GCACCACCTG -CGCCCGCGCT C GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GC GCACCACCTG +chr9 130681605 130681641 CCG STRchive +chr9 130681605 130681641 CCG TRGT +CGCCCGCGCT CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C GCACCACCTG +CGCCCGCGCT CG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C GCACCACCTG MODY8_CEL -chr9 133071177 133071737 GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG STRchive -chr9 133071177 133071737 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG TRGT +chr9 133071177 133071737 GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG,ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC STRchive +chr9 133071177 133071737 GGCCCCCCCGTGCCGCCCACGGGTGACTCCGG,ACGGGTGACTCCGGGGCCCCCCCGTGCCGCCC,GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG TRGT GAGGCCACCC CTGTGCCCCCCACAGGGGACTCCGAGGCCACTCCCGTGCCCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG CGCCCCCCCCGTGCCGCCCACGGGTGACGCCGGGCCCCCCCCCGTGCCGCCCACGGGTGACTCCGGCGCCCCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCCGTGACCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCTGTGCCCCCCACGGGTGACTCTGAGGCTGCCCCTGTGCCCCCCACAGATGACTCCAAGGAAGCT CAGATGCCTG GAGGCCACCC CTGTGCCCCCCACAGGGGACTCCGAGGCCACTCCCGTGCCCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG CGCCCCCCCCGTGCCGCCCACGGGTGACGCCGGGCCCCCCCCCGTGCCGCCCACGGGTGACTCCGGCGCCCCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCCGTGACCCCCACGGGTGACTCCGAGACCGCCCCCGTGCCGCCCACGGGTGACTCCGGGGCCCCCCCTGTGCCCCCCACGGGTGACTCTGAGGCTGCCCCTGTGCCCCCCACAGATGACTCCAAGGAAGCT CAGATGCCTG OPML1_NUTM2B-AS1 -chr10 79826383 79826404 GGC STRchive -chr10 79826383 79826404 GGC TRGT -GAAGCGGCGG GGC GGC GGC GGC GGC GGC GGC CGGGAAGAAC -GAAGCGGCGG GGC GGC GGC GGC GGC GGC GGC CGGGAAGAAC +chr10 79826383 79826404 CGG STRchive +chr10 79826383 79826404 CGG TRGT +GAAGCGGCGG GG CGG CGG CGG CGG CGG CGG C CGGGAAGAAC +GAAGCGGCGG GG CGG CGG CGG CGG CGG CGG C CGGGAAGAAC JBS_CBL chr11 119206289 119206323 CGG STRchive @@ -227,10 +227,10 @@ CCATCACCAC CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG C CCATCACCAC CAG CAA CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA TCACGGAAAC FRA12A_DIP2B -chr12 50505001 50505024 GGC STRchive -chr12 50505001 50505024 GGC TRGT -CTTTGCTCAT GGC GGC GGC GGC GGC GGC GGC GG TGCTGGTGGT -CTTTGCTCAT GGC GGC GGC GGC GGC GGC GGC GG TGCTGGTGGT +chr12 50505001 50505024 CGG STRchive +chr12 50505001 50505024 CGG TRGT +CTTTGCTCAT GG CGG CGG CGG CGG CGG CGG CGG TGCTGGTGGT +CTTTGCTCAT GG CGG CGG CGG CGG CGG CGG CGG TGCTGGTGGT SCA2_ATXN2 chr12 111598949 111599019 CTG STRchive @@ -239,10 +239,10 @@ GCGGGCGGCG G CTG CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG GCGGGCGGCG G CTG CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG GGGCTTCAGC OPDM4_RILPL1 -chr12 123533720 123533750 GGC STRchive -chr12 123533720 123533750 GGC TRGT -CTCCCGAGTG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC AGCGGGGAGG -CTCCCGAGTG GGC GGC GGC GGC GGC GGC GGC GGC GGC GGC AGCGGGGAGG +chr12 123533720 123533750 CGG STRchive +chr12 123533720 123533750 CGG TRGT +CTCCCGAGTG GG CGG CGG CGG CGG CGG CGG CGG CGG CGG C AGCGGGGAGG +CTCCCGAGTG GG CGG CGG CGG CGG CGG CGG CGG CGG CGG C AGCGGGGAGG SCA8_ATXN8OS chr13 70139383 70139429 CTG STRchive @@ -257,10 +257,10 @@ CCTGTCCCCA GCG GCG GCG GCA GCG GCG GCG GCG GCT GCG GCG GCG GCG GCC GCG G TGTCCGC CCTGTCCCCA GCG GCG GCG GCA GCG GCG GCG GCG GCT GCG GCG GCG GCG GCC GCG G TGTCCGCGGT SCA27B_FGF14 -chr13 102161574 102161726 GAA STRchive -chr13 102161574 102161726 GAA,GGA,GCA TRGT -TGAAGAAAGA AA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA TAGAAATGTG -TGAAGAAAGA AA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA GAA TAGAAATGTG +chr13 102161574 102161726 GAA,AAG STRchive +chr13 102161574 102161726 AAG,GAA,AGG,CAG TRGT +TGAAGAAAGA AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AA TAGAAATGTG +TGAAGAAAGA AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AAG AA TAGAAATGTG OPMD_PABPN1 chr14 23321472 23321503 GCN STRchive @@ -275,10 +275,10 @@ ATAGGTCCCC CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTTTTG CTG CTG TCTGAAACAT ATAGGTCCCC CTG CTG CTG CTG CTG CTG CTG CTG TTG CTG CTTTTG CTG CTG TCTGAAACAT ALS1_NIPA1 -chr15 22786677 22786703 GCG STRchive -chr15 22786677 22786703 GCG TRGT -AGCTGCGGCA GCG GCG GCG GCG GCG GCG GCG GCG GC CGGGGAGGGG -AGCTGCGGCA GCG GCG GCG GCG GCG GCG GCG GCG GC CGGGGAGGGG +chr15 22786677 22786703 CGG STRchive +chr15 22786677 22786703 CGG TRGT +AGCTGCGGCA G CGG CGG CGG CGG CGG CGG CGG CGG C CGGGGAGGGG +AGCTGCGGCA G CGG CGG CGG CGG CGG CGG CGG CGG C CGGGGAGGGG aFTLD-U_GOLGA8A chr15 34419425 34419451 TTTC,CT STRchive @@ -287,21 +287,21 @@ CAGCTCCCGT TT CT TT CT TT CT TT CT TT CT TT CT TT TTTTTCTGAG CAGCTCCCGT TT CT TT CT TT CT TT CT TT CT TT CT TT TTTTTCTGAG CHNG3_MIR7-2 -chr15 88569433 88569452 TTTG STRchive -chr15 88569433 88569452 TTTG TRGT +chr15 88569433 88569452 TTTG,GTTT STRchive +chr15 88569433 88569452 GTTT,TTTG TRGT ATTGTTTTTC TTTG TTTG TTTG TTTG TTT TTCTTTGAGA ATTGTTTTTC TTTG TTTG TTTG TTTG TTT TTCTTTGAGA CPEO_POLG -chr15 89333588 89333629 GCT STRchive -chr15 89333579 89333629 GCT,GTT TRGT -AGCACTTGCGGCTGCTGAG GCT GCT GTT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC CGCCGCCGCT +chr15 89333588 89333629 CTG STRchive +chr15 89333579 89333629 GCT,GTT,CTG TRGT +AGCACTTGCGGCTGCTGAG G CTG CTG TTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG C CGCCGCCGCT AGCACTTGCG GCT GCT GAG GCT GCT GTT GCT GCT GCT GCT GCT GCT GCT GCT GCT GCT GC CGCCGCCGCT DBQD2_XYLT1 -chr16 17470907 17470922 GCC STRchive -chr16 17470907 17470922 GCC TRGT -TCCCGCTCGG GCC GCC GCC GCC GCC CCCCTCCCCA +chr16 17470907 17470922 CCG STRchive +chr16 17470907 17470922 GCC,CCG TRGT +TCCCGCTCGG G CCG CCG CCG CCG CC CCCCTCCCCA TCCCGCTCGG GCC GCC GCC GCC GCC CCCCTCCCCA FAME6_TNRC6A @@ -311,8 +311,8 @@ CTATTAAAGC A TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA T CTATTAAAGC A TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTT ACTTATTTAT SCA31_BEAN1 -chr16 66490396 66490466 AATAA,TGGAA,TAGAA STRchive -chr16 66490396 66490466 TGGAA,TAGAA,AATAA TRGT +chr16 66490396 66490466 AATAA,AATGG,AATAG STRchive +chr16 66490396 66490466 AATGG,AATAG,AATAA TRGT ACTCTGTTTC AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA TAA AATAA AA AGAAACCTCT ACTCTGTTTC AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA AATAA TAA AATAA AA AGAAACCTCT @@ -323,10 +323,10 @@ CCCGCCGCAG G CAG CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAA CAG CAG CAG CAG CCCGCCGCAG G CAG CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAA CAG CAG CAG CAG CAG CAA CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG TCCTCACCCT SCA4_ZFHX3 -chr16 72787694 72787758 GCC STRchive -chr16 72787694 72787758 GCC TRGT -AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC ACC GCC GCC GCC GCC GCC ACT GCC ACC GCC GCC GCC GCC G GTGGGGACGT -AGTGGTACGA GCC GCC GCC GCC GCC GCC GCC GCC ACC GCC GCC GCC GCC GCC ACT GCC ACC GCC GCC GCC GCC G GTGGGGACGT +chr16 72787694 72787758 CCG STRchive +chr16 72787694 72787758 CCG TRGT +AGTGGTACGA G CCG CCG CCG CCG CCG CCG CCG CCA CCG CCG CCG CCG CCG CCACTGCCA CCG CCG CCG CCG CCG GTGGGGACGT +AGTGGTACGA G CCG CCG CCG CCG CCG CCG CCG CCA CCG CCG CCG CCG CCG CCACTGCCA CCG CCG CCG CCG CCG GTGGGGACGT HDL2_JPH3 chr16 87604282 87604329 CTG STRchive @@ -336,15 +336,15 @@ AAGCCAGGGA G CTG C CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG CTG TAAG FAME8_RAI1 chr17 17808358 17808460 TTTTA,TTTCA STRchive -chr17 17808358 17808460 TTTTA,TTTCA TRGT -TTATTTTTAA A TTTTA TTTTA TTTTA TA TTTTA TTTTA TTA TTTTA TTTTA TTTTA TTTTA TTTTA TTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTT CATCTCAGAC +chr17 17808358 17808460 TTTTA,TTTCA,ATTTT TRGT TTATTTTTAA A TTTTA TTTTA TTTTA TA TTTTA TTTTA TTA TTTTA TTTTA TTTTA TTTTA TTTTA TTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTTTA TTT CATCTCAGAC +TTATTTTTAA ATTTT ATTTT ATTTT AT ATTTT ATTTT ATT ATTTT ATTTT ATTTT ATTTT ATTTT ATT ATTTT ATTTT ATTTT ATTTT ATTTT ATTTT ATTTT ATTTT ATTT CATCTCAGAC RCPS_EIF4A3 -chr17 80147009 80147139 CCTCGCTGTGCCGCTGCCGA STRchive -chr17 80147009 80147139 CCTCGCTGTGCCGCTGCCGA TRGT -CCGACCTCGC TGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA GAACAGACGC -CCGACCTCGC TGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA CCTCGCTGTGCCGCTGCCGA GAACAGACGC +chr17 80147009 80147139 CCTCGCTGTGCCGCTGCCGA,GCCGCTGCCGACCTCGCTGT STRchive +chr17 80147009 80147139 GCCGCTGCCGACCTCGCTGT,CCTCGCTGTGCCGCTGCCGA TRGT +CCGACCTCGC T GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGA GAACAGACGC +CCGACCTCGC T GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGACCTCGCTGT GCCGCTGCCGA GAACAGACGC CPUM_TYMS chr18 666891 667632 GATGGT STRchive @@ -359,8 +359,8 @@ GAGGAGGAGG AG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA GAGGAGGAGG AG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA TGAAAGAGCC MRUPAV_PLIN4 -chr19 4510727 4513659 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC STRchive -chr19 4510727 4513659 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TRGT +chr19 4510727 4513659 TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC,GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT STRchive +chr19 4510727 4513659 GGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTACTGGTGTCCACGCCGGTCTGGATGGTTCCTTT,TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TRGT GCAGTCTGCT CCCCACCATTGTCTGTGGTCCTGGAACTGGTGAGTCCACCCCAGGAGGTGGCGGGGGTACTAGGTAACCAGTTCTGGAAGGTGCTGAGGCCAGTGTGGGTGGCCCCTGTCGCCACGTTCCCTGACCCCATGAGCCCAGCGGACACTGCGTCTTTGGTTCCGGTCAGCACTGTCTTGGTGGTGTCCAGGCCCCCCTGGACGGCCCCTTTGGCCATGCTCATGGCACCGGTAACCCCACTGAAGACAGTGTCCTTGGTACCCATAAGCACAGCCTTGGAGGCGTCCACGCCGGTCTGCACGGTTCCTTTGGCCACATTCACTGCCCCCGTGACTCCAGTAGTCACTGCATCCTTAGCGCCACTCAGCACCGTCTTGGCTGTGTCCACACCTGTCTGGACGGTCCCTTTGGCCACATTTACGGCACCAGTGACTCCACTGCAGACGGTGTCCTTGGTACCGGTCAGGACAGTCTTGCTGGTGTCCACGCCGGTCTGGACAGTCCCTTTGGCCAAGTTCACAGCCCCTGTGAGCCCAGTGGACACGGCATCTTTAGTGCCAGTCAGGACAGACTTTGTAGTGTCCAGGCCCCCCTGGACGGCCCCTTTGGCCACTTTCGCAGCACCGGTCACCCCACTGCCAAGGGTGTTCTTTGTACCTGTTGCGATATTTTGGGTCGTTTTCAGCCCAGTTTGCACAGCACCCTTGGCCACGTTCGCAGCACCGGTGACCCCACTGCAGACAGTGTCCTTGGTACCGGTCAGCACGGTCTTGGCCGTGTCTACACCCATCTGGACGGCCCCCTTGGCCACATTCGCAGCACCGGTCACCCCACTGCACACAGCATCCTTGGTACCAGTTAACACAGTCTTGGTGGTGTCCATGCCGGTCTGGACAGTCCCTTTGGCCAACTTCACAGCCCCTGTGAGCCCAGTGGACACAGCATCTTTAGTGCCAGTCAGGACAGACTTTGTAGTGTCCAGGCCCCCTTGGATGGCCCCTTTGGCCACATTCGCAGCACCGGTCACCCCACTGCAGACGGTGTCCTTGGTACCAGTTAGGACAGTCTTGGTGGTGTCCACACTGGTCTGGACAGTCCCTTTGGCGACATTCACTGCCCCCATGAGCCCAGTAGTGACTGTGTCCTTGGTGCCGGTCAGCACGGTCTTGGCCGTGTCTACACCTGTCTGGGCAGCCCCTTTGGCCACATTCACAGCACTGGTCACCCCACTGCCAAAGGTGTTCTTTGTACCTGTCGCGATATTTTGGGTCGTTTTCAGCCCAGTTTGCACAGCCCCCTTGGCCACGTTCACGGCACTGGTGACCCCACTGTAGATGGTGTCCTTGGTACCGGTTAGGACAGTTTTGGTGGTGTCCATGCCTGTCTGGACGGTCCCTTTGGCGACATTCACTGCCCCCACGAGCCCAGTAGTCACTGTGTCCTTGGTGCCGGTCAGCACGGTCTTGGCTGTGTCTACACCTGTCTGGACAGCCCCCTTGGCCACATTCGCTGCCCCCGTGAGCCCAGTGGACATCGTGTCTTTTGTACCTATGACCACAGACTTGGTGGTGTCCAGGCCCCCCTGGACGGCCCCTTTGGCCACGTTCACAGCACTGGTCACCCCACTGCAGACGGTGTCCTTGGTGCCGGTTAGGACAGTCTTGGTGGTGTCTACGCCGGTCTGGACGGTCCCTTTGGCCACGTTCACAGCCCCTGTGAGCCCAGTGGACACAGCATCTTTAGTGCCAGTCAGGACAGACTTTGTAGTGTCCAGGCCGCCCTGGACGGCCCCTTTGGCCACATTCGCAGCACCGGTGACCCCACTGCAGACAGTGTCCTTGGTACCAGTTAGAACGATCTTGGTGGTGTCCACGCCTGTCTGGATGGTTCCTCTGGCCAAATTCATGGCACCAGTCACCCCACTGCAGACGGTGTCCTTTGTACCTGTTGCGATATTTTGGGTTGTGTTCAGCCCAGTTTGCATGGCCCCCTTGGCCACATTCGCTGCCCCTGTGAGCCCAGTGGACATCGTGTCTTTCGTACCCATGACCATAGACTTGGTGGTATCCAGGCCCCCCTGGATGGCCTCTTTGGCCAAGTTCACGGCACCGGTCACCCCACTGCAGACAGTGTTCTTGGTGCCAGTTAGGACAGTCTTGG TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACACCGGTCTGAATGCTTCCTCTGGCCACATTCACTGCCCCTGTGAGCCCAGTGGACACAGCATCTTTGGTGCCGGTCAGCACAGCCTTGGAGGTTTCCACGCCAGTCTGGACAGTCCCTTTGGCCAAGTTCACTGCCCCCATGACCCCAGTAGTCACTGTGTCTTTGGTGCCGGTCAGCACAGTCTTGGTGGTGTCCACACCGGCCTGTACGGTCCCTTTGGCCACATTCACTGCCCCCGTGAGCCCAGTGGACACCGTGTCCTTGGTGCCGGTGAGGACAGCCTTCGAGGTGTCCAGACCCCCTTGGACGGCCCCCTTAGCCATGTCCATGGCCCCTGTGACCCCGCTGGACACCACCTCCTTGGTGCCCGTAAGTGCAGACCGAGTGGTGTCCAGGCCTCCCTGGACCACTCCCTTAGCCACGTCCACCACGCTGGCCACCCCGGAGGACACGGCATCCTTGGCCCT GGACATCTTG GCAGTCTGCT CCCCACCATTGTCTGTGGTCCTGGAACTGGTGAGTCCACCCCAGGAGGTGGCGGGGGTACTAGGTAACCAGTTCTGGAAGGTGCTGAGGCCAGTGTGGGTGGCCCCTGTCGCCACGTTCCCTGACCCCATGAGCCCAGCGGACACTGCGTCTTTGGTTCCGGTCAGCACTGTCTTGGTGGTGTCCAGGCCCCCCTGGACGGCCCCTTTGGCCATGCTCATGGCACCGGTAACCCCACTGAAGACAGTGTCCTTGGTACCCATAAGCACAGCCTTGGAGGCGTCCACGCCGGTCTGCACGGTTCCTTTGGCCACATTCACTGCCCCCGTGACTCCAGTAGTCACTGCATCCTTAGCGCCACTCAGCACCGTCTTGGCTGTGTCCACACCTGTCTGGACGGTCCCTTTGGCCACATTTACGGCACCAGTGACTCCACTGCAGACGGTGTCCTTGGTACCGGTCAGGACAGTCTTGCTGGTGTCCACGCCGGTCTGGACAGTCCCTTTGGCCAAGTTCACAGCCCCTGTGAGCCCAGTGGACACGGCATCTTTAGTGCCAGTCAGGACAGACTTTGTAGTGTCCAGGCCCCCCTGGACGGCCCCTTTGGCCACTTTCGCAGCACCGGTCACCCCACTGCCAAGGGTGTTCTTTGTACCTGTTGCGATATTTTGGGTCGTTTTCAGCCCAGTTTGCACAGCACCCTTGGCCACGTTCGCAGCACCGGTGACCCCACTGCAGACAGTGTCCTTGGTACCGGTCAGCACGGTCTTGGCCGTGTCTACACCCATCTGGACGGCCCCCTTGGCCACATTCGCAGCACCGGTCACCCCACTGCACACAGCATCCTTGGTACCAGTTAACACAGTCTTGGTGGTGTCCATGCCGGTCTGGACAGTCCCTTTGGCCAACTTCACAGCCCCTGTGAGCCCAGTGGACACAGCATCTTTAGTGCCAGTCAGGACAGACTTTGTAGTGTCCAGGCCCCCTTGGATGGCCCCTTTGGCCACATTCGCAGCACCGGTCACCCCACTGCAGACGGTGTCCTTGGTACCAGTTAGGACAGTCTTGGTGGTGTCCACACTGGTCTGGACAGTCCCTTTGGCGACATTCACTGCCCCCATGAGCCCAGTAGTGACTGTGTCCTTGGTGCCGGTCAGCACGGTCTTGGCCGTGTCTACACCTGTCTGGGCAGCCCCTTTGGCCACATTCACAGCACTGGTCACCCCACTGCCAAAGGTGTTCTTTGTACCTGTCGCGATATTTTGGGTCGTTTTCAGCCCAGTTTGCACAGCCCCCTTGGCCACGTTCACGGCACTGGTGACCCCACTGTAGATGGTGTCCTTGGTACCGGTTAGGACAGTTTTGGTGGTGTCCATGCCTGTCTGGACGGTCCCTTTGGCGACATTCACTGCCCCCACGAGCCCAGTAGTCACTGTGTCCTTGGTGCCGGTCAGCACGGTCTTGGCTGTGTCTACACCTGTCTGGACAGCCCCCTTGGCCACATTCGCTGCCCCCGTGAGCCCAGTGGACATCGTGTCTTTTGTACCTATGACCACAGACTTGGTGGTGTCCAGGCCCCCCTGGACGGCCCCTTTGGCCACGTTCACAGCACTGGTCACCCCACTGCAGACGGTGTCCTTGGTGCCGGTTAGGACAGTCTTGGTGGTGTCTACGCCGGTCTGGACGGTCCCTTTGGCCACGTTCACAGCCCCTGTGAGCCCAGTGGACACAGCATCTTTAGTGCCAGTCAGGACAGACTTTGTAGTGTCCAGGCCGCCCTGGACGGCCCCTTTGGCCACATTCGCAGCACCGGTGACCCCACTGCAGACAGTGTCCTTGGTACCAGTTAGAACGATCTTGGTGGTGTCCACGCCTGTCTGGATGGTTCCTCTGGCCAAATTCATGGCACCAGTCACCCCACTGCAGACGGTGTCCTTTGTACCTGTTGCGATATTTTGGGTTGTGTTCAGCCCAGTTTGCATGGCCCCCTTGGCCACATTCGCTGCCCCTGTGAGCCCAGTGGACATCGTGTCTTTCGTACCCATGACCATAGACTTGGTGGTATCCAGGCCCCCCTGGATGGCCTCTTTGGCCAAGTTCACGGCACCGGTCACCCCACTGCAGACAGTGTTCTTGGTGCCAGTTAGGACAGTCTTGG TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACGCCGGTCTGGATGGTTCCTTTGGCCACATTCATGGCACCAGTCACCCCACTACAGACGGTGTCCTTGGTACCTGTTAGGACAGTCTTAC TGGTGTCCACACCGGTCTGAATGCTTCCTCTGGCCACATTCACTGCCCCTGTGAGCCCAGTGGACACAGCATCTTTGGTGCCGGTCAGCACAGCCTTGGAGGTTTCCACGCCAGTCTGGACAGTCCCTTTGGCCAAGTTCACTGCCCCCATGACCCCAGTAGTCACTGTGTCTTTGGTGCCGGTCAGCACAGTCTTGGTGGTGTCCACACCGGCCTGTACGGTCCCTTTGGCCACATTCACTGCCCCCGTGAGCCCAGTGGACACCGTGTCCTTGGTGCCGGTGAGGACAGCCTTCGAGGTGTCCAGACCCCCTTGGACGGCCCCCTTAGCCATGTCCATGGCCCCTGTGACCCCGCTGGACACCACCTCCTTGGTGCCCGTAAGTGCAGACCGAGTGGTGTCCAGGCCTCCCTGGACCACTCCCTTAGCCACGTCCACCACGCTGGCCACCCCGGAGGACACGGCATCCTTGGCCCT GGACATCTTG @@ -377,8 +377,8 @@ CTCACCTGCT CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C TGCCTCCGCC CTCACCTGCT CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG C TGCCTCCGCC EDM1-PSACH_COMP -chr19 18786034 18786050 GTC STRchive -chr19 18786034 18786050 GTC TRGT +chr19 18786034 18786050 GTC,CGT STRchive +chr19 18786034 18786050 CGT,GTC TRGT CTCCGTCATT GTC GTC GTC GTC GTC G CAGGCATCAC CTCCGTCATT GTC GTC GTC GTC GTC G CAGGCATCAC @@ -389,15 +389,15 @@ GTGATCCCCC CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG C GTGATCCCCC CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CA TTCCCGGCTA SCA36_NOP56 -chr20 2652732 2652757 GGCCTG STRchive -chr20 2652732 2652775 GGCCTG,CGCCTG TRGT +chr20 2652732 2652757 GGCCTG,CCTGGG STRchive +chr20 2652732 2652775 GGCCTG,CGCCTG,CCTGGG TRGT GCCGCAGACA G GGCCTG GGCCTG GGCCTG GGCCTG CGCCTGCGCCTGCGCCTGCCCTGGGAAC GCCGCAGACA G GGCCTG GGCCTG GGCCTG GGCCTG CGCCTG CGCCTG CGCCTG CCCTGGGAAC CJD_PRNP -chr20 4699397 4699493 GGTGGTGGCTGGGGGCAGCCTCAT,CCTCATGGTGGTGGCTGGGGGCAG STRchive -chr20 4699370 4699493 CCTCAGGGCGGTGGTGGCTGGGGGCAG,CCTCATGGTGGTGGCTGGGGGCAG,GGTGGTGGCTGGGGGCAGCCTCAT TRGT -CCGCTACCCACCTCAGGGCGGTGGTGGCTGGGGGCAG CCTCATGGTGGTGGCTGGGGGCAG CCTCATGGTGGTGGCTGGGGGCAG CCCCATGGTGGTGGCTGGGGACAGCCTCATGGTGGTGGCTGGGGTCAA GGAGGTGGCA +chr20 4699397 4699493 GGTGGTGGCTGGGGGCAGCCTCAT,AGCCTCATGGTGGTGGCTGGGGGC STRchive +chr20 4699370 4699493 CCTCAGGGCGGTGGTGGCTGGGGGCAG,CCTCATGGTGGTGGCTGGGGGCAG,AGCCTCATGGTGGTGGCTGGGGGC,GGTGGTGGCTGGGGGCAGCCTCAT TRGT +CCGCTACCCACCTCAGGGCGGTGGTGGCTGGGGGCAG CCTCAT GGTGGTGGCTGGGGGCAGCCTCAT GGTGGTGGCTGGGGGCAGCCCCATGGTGGTGGCTGGGGACAGCCTCATGGTGGTGGCTGGGGTCAA GGAGGTGGCA CCGCTACCCA CCTCAGGGCGGTGGTGGCTGGGGGCAG CCTCATGGTGGTGGCTGGGGGCAG CCTCATGGTGGTGGCTGGGGGCAG CCCCATGGTGGTGGCTGGGGACAGCCTCATGGTGGTGGCTGGGGTCAA GGAGGTGGCA EPM1_CSTB @@ -425,28 +425,28 @@ ACTAGAATGG ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATT ACTAGAATGG ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT ATTCT TTTTGAGATG PRTS_ARX -chrX 25013529 25013565 NGC STRchive -chrX 25013529 25013565 NGC TRGT +chrX 25013529 25013565 GCN,NGC STRchive +chrX 25013529 25013565 NGC,GCN TRGT GCGTGTCCCA GGC CGC GGC GGC CGC GGC CGC GGC TGC CGC GGC GGC CCCTGCGCCG GCGTGTCCCA GGC CGC GGC GGC CGC GGC CGC GGC TGC CGC GGC GGC CCCTGCGCCG EIEE1_ARX -chrX 25013649 25013697 NGC STRchive -chrX 25013649 25013697 NGC TRGT +chrX 25013649 25013697 GCN,NGC STRchive +chrX 25013649 25013697 NGC,GCN TRGT CCGTGGCCGT GGC GGC CGC TGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC TGC CGC ACCCTGAAGG CCGTGGCCGT GGC GGC CGC TGC CGC CGC CGC CGC CGC CGC CGC CGC CGC CGC TGC CGC ACCCTGAAGG DMD_DMD -chrX 31284557 31284605 TTC STRchive -chrX 31284557 31284613 TTC,T TRGT +chrX 31284557 31284605 TTC,CTT STRchive +chrX 31284557 31284613 TTC,T,CTT TRGT AACGAACTGT TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTTTTTTTGGCAGAGGTG AACGAACTGT TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC TTC T T T T T T T T GGCAGAGGTG SBMA_AR -chrX 67545316 67545419 GCA STRchive -chrX 67545316 67545419 GCA TRGT -TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCCCAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG -TGCTGCTGCT GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA GCA AGAGACTAGCCCCAG GCA GCA GCA GCA GCA GCA G GGTGAGGATG +chrX 67545316 67545419 CAG STRchive +chrX 67545316 67545419 CAG TRGT +TGCTGCTGCT G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAAGAGACTAGCCC CAG G CAG CAG CAG CAG CAG CAG GGTGAGGATG +TGCTGCTGCT G CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAG CAAGAGACTAGCCC CAG G CAG CAG CAG CAG CAG CAG GGTGAGGATG XDP_TAF1 chrX 71453054 71453131 AGAGGG STRchive @@ -461,8 +461,8 @@ CTCAACCCAC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC TTCAAGCTGA CTCAACCCAC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC TTCAAGCTGA XLID_SOX3 -chrX 140504316 140504361 NGC STRchive -chrX 140504316 140504361 NGC TRGT +chrX 140504316 140504361 GCN,NGC STRchive +chrX 140504316 140504361 NGC,GCN TRGT CCGGACTGCT GGC GGC AGC GGC TGC GGC CGC GGC AGC GGC GGC GGC GGC CGC GGC ACCGGGAGGC CCGGACTGCT GGC GGC AGC GGC TGC GGC CGC GGC AGC GGC GGC GGC GGC CGC GGC ACCGGGAGGC @@ -473,8 +473,8 @@ GTGCGGCAGC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG GTGCGGCAGC G CGG CGG CGG CGG CGG CGG CGG CGG CGG CGG AGG CGG CGG CGG CGG CGG CGG CGG CGG CGG C TGGGCCTCGA FRAXE_AFF2 -chrX 148500604 148500753 GCC STRchive -chrX 148500604 148500753 GCC TRGT -GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCGGCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC CCC GCC GCC GCT GCC GCC GCC G GCCCGCAGCC -GTGTGATGCT GCC GCG GCC GCC GCC GCC GCC TGTGCA GCC GCT GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCC GCT GCC GCC CCGGCT GCC GC GCC GC GCC GCT GCC TCT GCC CCG GCC GCC CCC GCC GCC GCT GCC GCC GCC G GCCCGCAGCC +chrX 148500604 148500753 CCG STRchive +chrX 148500604 148500753 CCG TRGT +GTGTGATGCT G CCG CGG CCG CCG CCG CCG CCTGTGCAG CCG CTG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CTG CCG CC CCG GCTG CCG CG CCG CG CCG CTGCCTCTGCC CCG G CCG CCC CCG CCG CCG CTG CCG CCG CCG GCCCGCAGCC +GTGTGATGCT G CCG CGG CCG CCG CCG CCG CCTGTGCAG CCG CTG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CCG CTG CCG CC CCG GCTG CCG CG CCG CG CCG CTGCCTCTGCC CCG G CCG CCC CCG CCG CCG CTG CCG CCG CCG GCCCGCAGCC diff --git a/scripts/check-loci.py b/scripts/check-loci.py index cec4b52e..a2945661 100644 --- a/scripts/check-loci.py +++ b/scripts/check-loci.py @@ -127,67 +127,176 @@ def normalise_str(in_dna): return min(all_possible) -def get_new_motif(motif, gene_strand): +def get_canonical_motifs(schema): """ Args: - motif (string) + schema (dict): the loci JSON schema + Returns: + list: canonical motifs from schema file + >>> get_canonical_motifs({"canonical_motifs": ["CAG", "CCG"]}) + ['CAG', 'CCG'] + """ + return schema.get("canonical_motifs", []) + +def standardise_motif(motif, canonical_motifs): + """ + Args: + motif (str) + Returns: + str: motif rewritten to the preferred standard arrangement if possible + >>> test_motifs = ["CAG", "CCG", "CGG", "CTG", "GCN", "CAA", "TTTCA", "AAATG"] + >>> standardise_motif('GCC', test_motifs) + 'CCG' + >>> standardise_motif('CGC', test_motifs) + 'CCG' + >>> standardise_motif('CAG', test_motifs) + 'CAG' + >>> standardise_motif('XYZ', test_motifs) + 'XYZ' + """ + if motif is None or len(motif) == 0: + return motif + motif = motif.upper() + for canonical_motif in canonical_motifs: + canonical_motif = canonical_motif.upper() + + if len(motif) != len(canonical_motif): + continue + + if canonical_motif in circular_permuted(motif): + return canonical_motif + + return motif + +def get_other_motif(reference_motif, gene_motif, gene_strand, canonical_motifs): + """ + If only one of reference_motif or gene_motif is provided, infer the other from the gene strand. If both are provided, check that they are consistent with each other and the gene strand, and if they are inconsistent update the ref motif to match the gene motif. + + Args: + reference_motif (string) + gene_motif (string) gene_strand: either + or - Returns: - the normalized output of the string from ref to gene orientation - Get the new normalized motif for each row. - If gene_strand is +, reference orientation = gene orientation - If gene_strand is -, reverse_complement ref_ori for gene_ori - >>> get_new_motif('GAG', '+') - 'AGG' - >>> get_new_motif('GAG', '-') - 'CCT' - >>> get_new_motif('TCATC', '-') - 'AGATG' - >>> get_new_motif('TAG', 'plus') + (reference_motif, gene_motif) + + If gene_strand is +, gene orientation copies reference orientation. + If gene_strand is -, gene orientation is the reverse complement of reference orientation. + + >>> test_motifs = ["CAG", "CCG", "CGG", "CTG", "GCN", "CAA", "TTTCA", "AAATG"] + >>> get_other_motif('CCG', None, '+', test_motifs) + ('CCG', 'CCG') + >>> get_other_motif('CCG', None, '-', test_motifs) + ('CCG', 'CGG') + >>> get_other_motif('CAG', None, '-', test_motifs) + ('CAG', 'CTG') + >>> get_other_motif('TAG', None, 'plus', test_motifs) Traceback (most recent call last): ... AssertionError: Gene strand plus is not +/- """ - if gene_strand == "+": - normalized_motif = normalise_str(motif) - elif gene_strand == "-": - seq = Seq(motif) - reverse_comp = str(seq.reverse_complement()) - normalized_motif = normalise_str(reverse_comp) - else: - raise AssertionError(f'Gene strand {gene_strand} is not +/-') - return normalized_motif - -def check_motif_orientation(record): + # If gene motif is missing, infer it from the reference motif and gene strand + if gene_motif is None or gene_motif == "" and reference_motif is not None and reference_motif != "": + if gene_strand == "+": + return reference_motif, reference_motif + elif gene_strand == "-": + seq = Seq(reference_motif) + return reference_motif, str(seq.reverse_complement()) + else: + raise AssertionError(f'Gene strand {gene_strand} is not +/-') + # Check the gene_motif against the canonical motifs + gene_motif = standardise_motif(gene_motif, canonical_motifs) + + # Infer the reference motif from the gene motif and gene strand + if gene_motif is not None and gene_motif != "": + if gene_strand == "+": + return gene_motif, gene_motif + elif gene_strand == "-": + seq = Seq(gene_motif) + return str(seq.reverse_complement()), gene_motif + else: + raise AssertionError(f'Gene strand {gene_strand} is not +/-') + + return reference_motif, gene_motif + +def check_motif_orientation(record, canonical_motifs): """ Args: record (dict): a dictionary containing a single locus from the STRchive json Returns: - record (dict): the record with any motif fields with incorrect orientation updated + record (dict): the record with motif reference orientations standardized + and gene orientations recalculated from the standardized reference motifs """ + field_pairs = [ ('pathogenic_motif_reference_orientation', 'pathogenic_motif_gene_orientation'), ('benign_motif_reference_orientation', 'benign_motif_gene_orientation'), ('unknown_motif_reference_orientation', 'unknown_motif_gene_orientation'), ('interruption_reference_orientation', 'interruption_gene_orientation') ] + for ref_field, gene_field in field_pairs: + # If one in the pair is missing, infer it from the other. + # If both are present, ensure that they are consistent with each other and the gene strand, and update them if not. Gene motif will overwrite ref motif if they are inconsistent. + if record[ref_field] is None: continue - old = record[gene_field] - new = [get_new_motif(x, record['gene_strand']) for x in record[ref_field]] - if old != new: - for old_motif, new_motif in zip(old, new): + + old_ref_motifs = record[ref_field] + old_gene_motifs = record[gene_field] + + assert isinstance(old_ref_motifs, list), f"{ref_field} should be a list in record {record['id']}" + assert isinstance(old_gene_motifs, list), f"{gene_field} should be a list in record {record['id']}" + + if len(old_ref_motifs) != len(old_gene_motifs): + # Add Nones to the shorter list so they are the same length + if len(old_ref_motifs) < len(old_gene_motifs): + old_ref_motifs = old_ref_motifs + [None] * (len(old_gene_motifs) - len(old_ref_motifs)) + else: + old_gene_motifs = old_gene_motifs + [None] * (len(old_ref_motifs) - len(old_gene_motifs)) + + new_ref_motifs = [] + new_gene_motifs = [] + for old_ref_motif, old_gene_motif in zip(old_ref_motifs, old_gene_motifs): + new_ref_motif, new_gene_motif = get_other_motif(old_ref_motif, old_gene_motif, record['gene_strand'], canonical_motifs) + + new_ref_motifs.append(new_ref_motif) + new_gene_motifs.append(new_gene_motif) + + if old_ref_motifs != new_ref_motifs: + for old_motif, new_motif in zip(old_ref_motifs, new_ref_motifs): if old_motif != new_motif: - sys.stderr.write(f'Updating {record['id']} {gene_field} from {old_motif} to {new_motif}\n') - record[gene_field] = new + sys.stderr.write( + f"Updating {record['id']} {ref_field} from {old_motif} to {new_motif}\n" + ) + record[ref_field] = new_ref_motifs + + if old_gene_motifs != new_gene_motifs: + for old_motif, new_motif in zip(old_gene_motifs, new_gene_motifs): + if old_motif != new_motif: + sys.stderr.write( + f"Updating {record['id']} {gene_field} from {old_motif} to {new_motif}\n" + ) + record[gene_field] = new_gene_motifs + + # Update the reference motif to canonical + old_ref = record['reference_motif_reference_orientation'] + new_ref = [] + for motif in old_ref: + new_motif = standardise_motif(motif, canonical_motifs) + if motif != new_motif: + sys.stderr.write(f"Updating {record['id']} reference motif from {motif} to {new_motif}\n") + new_ref.append(new_motif) + record['reference_motif_reference_orientation'] = new_ref # Replace locus_structure with a string of the motifs in reference orientation - # example [ { "motif": "CAGG", "count": null, "type": "pathogenic_repeat" } ] if record['locus_structure'] is None: record['locus_structure'] = [] for motif in record['pathogenic_motif_reference_orientation']: - record['locus_structure'].append({"motif": motif, "count": None, "type": "pathogenic_repeat"}) + record['locus_structure'].append({ + "motif": motif, + "count": None, + "type": "pathogenic_repeat" + }) return record @@ -387,6 +496,10 @@ def main(json_fname, json_schema = None, curations_json = None, out_json = None, with open(json_schema, 'r') as schema_file: schema = json.load(schema_file) + canonical_motifs = [] + if schema is not None: + canonical_motifs = get_canonical_motifs(schema) + # Fixes to individual records for record in data: @@ -398,7 +511,7 @@ def main(json_fname, json_schema = None, curations_json = None, out_json = None, # Check if the field contains a string that should be a list record = check_list_fields(record) - record = check_motif_orientation(record) + record = check_motif_orientation(record, canonical_motifs) # Update disease association tags based on curations if curations_json: