From 270547974ad2066d18e848ac895bba1b026f6c76 Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Tue, 2 Jun 2026 14:14:33 -0600 Subject: [PATCH 01/29] Clean up citations --- data/STRchive-loci.json | 42 ++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 30e17c29..946e0bab 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -551,7 +551,7 @@ "age_onset_max": 83.0, "typ_age_onset_min": 12.0, "typ_age_onset_max": 48.0, - "details": "Unaffected individuals are usually (82%) compound heterozygotes in the benign range [@genereviews:NBK1175]. Intermediate alleles show reduced penetrance, and exact distinction between intermediate and the lower end of the pathogenic range is unclear [@genereviews:NBK1175]. Expansions are frequently interrupted by ATCCT, ATCCC, ATTCC, ATTTCT, ATATTCT, or ATTCTTCT; interruptions of ATTGT, TTTCT, ATTTTCT, ATTCTCT, GTTTCT, CTTCT, and ATTCTAT have been noted [@pmid:36199580] as has the interruption ATGCT [@pmid:19234597]. The ATCCT interruption motif is associated with a higher prevalence of epileptic seizures [@pmid:24318420]. Different motif patterns and mixed motif ratios may influence age of onset and anticipation [@doi:10.21203/rs.3.rs-5989910/v1]. One study suggests that alleles with completely pure ATTCT expansions are non-pathogenic, and that repeat interruptions such as ATTCC, are necessary to cause SCA10 [@pmid:36092952]", + "details": "Unaffected individuals are usually (82%) compound heterozygotes in the benign range [@genereviews:NBK1175]. Intermediate alleles show reduced penetrance, and exact distinction between intermediate and the lower end of the pathogenic range is unclear [@genereviews:NBK1175]. Expansions are frequently interrupted by ATCCT, ATCCC, ATTCC, ATTTCT, ATATTCT, or ATTCTTCT; interruptions of ATTGT, TTTCT, ATTTTCT, ATTCTCT, GTTTCT, CTTCT, and ATTCTAT have been noted [@pmid:36199580] as has the interruption ATGCT [@pmid:19234597]. The ATCCT interruption motif is associated with a higher prevalence of epileptic seizures [@pmid:24318420]. Different motif patterns and mixed motif ratios may influence age of onset and anticipation [@pmid:41229449]. One study suggests that alleles with completely pure ATTCT expansions are non-pathogenic, and that repeat interruptions such as ATTCC, are necessary to cause SCA10 [@pmid:36092952]", "mechanism": "GoF", "mechanism_detail": "Transdominant mechanism theorized [@pmid:38467784].", "year": "2000 [@pmid:11017075]", @@ -590,7 +590,7 @@ "webstr_hg19": ["STR_909210"], "locus_tags": ["somatic_instability", "anticipation", "paternal_expansion", "length_affects_onset", "length_affects_penetrance", "length_affects_phenotype", "motif_affects_instability", "motif_affects_penetrance", "motif_affects_phenotype"], "disease_tags": ["spinocerebellar_ataxia"], - "references": ["genereviews:NBK1175", "pmid:38467784", "pmid:36199580", "pmid:19234597", "pmid:24318420", "doi:10.21203/rs.3.rs-5989910/v1", "pmid:36092952", "pmid:11017075", "mondo:0011330"], + "references": ["genereviews:NBK1175", "pmid:38467784", "pmid:36199580", "pmid:19234597", "pmid:24318420", "pmid:41229449", "pmid:36092952", "pmid:11017075", "mondo:0011330"], "additional_literature": ["pmid:41229449", "pmid:41074692", "pmid:40900235", "pmid:40898875", "pmid:40488180", "pmid:40067487", "pmid:39820777", "pmid:38961870", "pmid:38832639", "pmid:35103298", "pmid:34970537", "pmid:33502644", "pmid:32520333", "pmid:32160188", "pmid:31737797", "pmid:31445906", "pmid:31342269", "pmid:29922950", "pmid:29316893", "pmid:28890930", "pmid:28423040", "pmid:27248057", "pmid:26374734", "pmid:26295943", "pmid:26077168", "pmid:26039897", "pmid:25466696", "pmid:24278426", "pmid:24269018", "pmid:23443018", "pmid:23083689", "pmid:23026538", "pmid:22065565", "pmid:22053702", "pmid:21282659", "pmid:20065034", "pmid:19651850", "pmid:19306311", "pmid:19171184", "pmid:19147916", "pmid:17961920", "pmid:17846122", "pmid:16924013", "pmid:16498633", "pmid:16385455", "pmid:15505178", "pmid:15201271", "pmid:15148151", "pmid:15096564", "pmid:12764052", "pmid:12589756", "pmid:11839840", "pmid:11160961", "pmid:9973298"] }, { @@ -820,7 +820,7 @@ "hpo_terms": null, "prevalence": "0.5/100000", "prevalence_details": "<1/100,000 [@pmid:29100084]; expansion in 1:100-1200 chromosomes [@genereviews:NBK1268]. Found across ethnicities/ancestries, with population-dependent prevalence [@genereviews:NBK1268].", - "age_onset": "Typical: third to fifth decade (20-49); Range: 0 [@genereviews:NBK1268] - 76 [@doi:10.1101/gr.279634.124].", + "age_onset": "Typical: third to fifth decade (20-49); Range: 0 [@genereviews:NBK1268] - 76 [@pmid:40015980].", "age_onset_min": 0.0, "age_onset_max": 76.0, "typ_age_onset_min": 20.0, @@ -874,7 +874,7 @@ "webstr_hg19": ["Expansion_SCA8/ATXN8"], "locus_tags": ["somatic_instability", "anticipation", "maternal_expansion", "motif_affects_onset", "motif_affects_penetrance"], "disease_tags": ["spinocerebellar_ataxia"], - "references": ["genereviews:NBK1268", "doi:10.1101/gr.279634.124", "omim:608768", "pmid:16804541", "pmid:20373340", "pmid:28451643", "pmid:34632710", "pmid:29100084", "pmid:10192387", "mondo:0012116"], + "references": ["genereviews:NBK1268", "pmid:40015980", "omim:608768", "pmid:16804541", "pmid:20373340", "pmid:28451643", "pmid:34632710", "pmid:29100084", "pmid:10192387", "mondo:0012116"], "additional_literature": ["pmid:41771688", "pmid:41762523", "pmid:41353794", "pmid:41082794", "pmid:41079917", "pmid:41074692", "pmid:41001200", "pmid:40906330", "pmid:40890648", "pmid:40844737", "pmid:40765612", "pmid:40488180", "pmid:40007153", "pmid:38961870", "pmid:38227102", "pmid:38165578", "pmid:38152578", "pmid:37906407", "pmid:37848721", "pmid:37146135", "pmid:37003406", "pmid:36703300", "pmid:36530930", "pmid:34622207", "pmid:34600502", "pmid:34284285", "pmid:33526774", "pmid:33502644", "pmid:31471687", "pmid:30109267", "pmid:29316893", "pmid:29111027", "pmid:28782341", "pmid:28229454", "pmid:27896316", "pmid:26374734", "pmid:26077168", "pmid:25466696", "pmid:23711133", "pmid:23026538", "pmid:22581592", "pmid:22520093", "pmid:22297462", "pmid:22053702", "pmid:21173221", "pmid:20403608", "pmid:19259763", "pmid:19229559", "pmid:18684474", "pmid:18418692", "pmid:17961920", "pmid:17005861", "pmid:16184604", "pmid:16054804", "pmid:15553088", "pmid:15148151", "pmid:15080863", "pmid:14972680", "pmid:14966163", "pmid:14960773", "pmid:14756671", "pmid:12838526", "pmid:12764052", "pmid:12545428", "pmid:12505613", "pmid:12470185", "pmid:12431257", "pmid:12372061", "pmid:12140678", "pmid:12042281", "pmid:11939898", "pmid:11839840", "pmid:11807410", "pmid:11708995", "pmid:11591855", "pmid:11448300", "pmid:11160961", "pmid:11121196", "pmid:11102643", "pmid:11030410", "pmid:10976642", "pmid:10958651", "pmid:10785256", "pmid:10712198", "pmid:10700168", "pmid:10690991"] }, { @@ -1033,7 +1033,7 @@ "age_onset_max": 73.0, "typ_age_onset_min": 43.0, "typ_age_onset_max": 52.0, - "details": "The intermediate range (19-20 motifs) [@doi:10.1212/NXG.0000000000200245; @genereviews:NBK1140] can be associated with a premutation, reduced penetrance, atypical phenotype, or a disease state when homozygous [@genereviews:NBK1140]. When the longer allele is > 22 motifs, the short allele does not play a role in pathogenicity/age of onset, but expansions of 21-22 motifs have age of onset influenced by the smaller allele [@doi:10.1212/NXG.0000000000200245]. For individuals with a longest allele of 19-20, the presence of a second allele of 19-20 likely increases the risk of developing SCA6 [@doi:10.1212/NXG.0000000000200245]. Undiagnosed carriers of pathogenic range repeats in CACNA1A exhibit significant cerebellar volume loss and elevated neurofilament light chain levels before clinical diagnosis, consistent with findings observed across other loci [@pmid:41951733].", + "details": "The intermediate range (19-20 motifs) [@pmid:39996131; @genereviews:NBK1140] can be associated with a premutation, reduced penetrance, atypical phenotype, or a disease state when homozygous [@genereviews:NBK1140]. When the longer allele is > 22 motifs, the short allele does not play a role in pathogenicity/age of onset, but expansions of 21-22 motifs have age of onset influenced by the smaller allele [@pmid:39996131]. For individuals with a longest allele of 19-20, the presence of a second allele of 19-20 likely increases the risk of developing SCA6 [@pmid:39996131]. Undiagnosed carriers of pathogenic range repeats in CACNA1A exhibit significant cerebellar volume loss and elevated neurofilament light chain levels before clinical diagnosis, consistent with findings observed across other loci [@pmid:41951733].", "mechanism": "GoF/LoF", "mechanism_detail": "Polyglutamine expansions associated increased expression of altered product leading to impaired gene binding and transcription factor function as well as cellular toxicity [@genereviews:NBK1140].", "year": "1997 [@pmid:8988170]", @@ -1072,7 +1072,7 @@ "webstr_hg19": ["Expansion_SCA6/CACNA1A"], "locus_tags": ["length_affects_phenotype", "length_affects_penetrance", "length_affects_onset"], "disease_tags": ["spinocerebellar_ataxia"], - "references": ["genereviews:NBK1140", "pmid:23331413", "doi:10.1212/NXG.0000000000200245", "pmid:29100084", "pmid:8988170", "mondo:0008457", "pmid:41358280"], + "references": ["genereviews:NBK1140", "pmid:23331413", "pmid:39996131", "pmid:29100084", "pmid:8988170", "mondo:0008457", "pmid:41358280"], "additional_literature": ["pmid:42038259", "pmid:41951733", "pmid:41771688", "pmid:41762523", "pmid:41612618", "pmid:41082794", "pmid:41009775", "pmid:40906330", "pmid:40900235", "pmid:40879304", "pmid:40746751", "pmid:40488180", "pmid:40189664", "pmid:39812846", "pmid:39571249", "pmid:39152783", "pmid:39048885", "pmid:38961870", "pmid:38227102", "pmid:38165578", "pmid:38152578", "pmid:37848721", "pmid:37307504", "pmid:37301203", "pmid:36618024", "pmid:36599645", "pmid:36530930", "pmid:35962273", "pmid:35188716", "pmid:35182509", "pmid:35052497", "pmid:34647648", "pmid:34600502", "pmid:34565721", "pmid:34371182", "pmid:34159894", "pmid:33502644", "pmid:33121221", "pmid:32888184", "pmid:32822634", "pmid:31522753", "pmid:30891880", "pmid:30591349", "pmid:30342765", "pmid:30314815", "pmid:30120431", "pmid:30078120", "pmid:29959555", "pmid:29553382", "pmid:29367260", "pmid:29316893", "pmid:29249939", "pmid:29111027", "pmid:29057148", "pmid:28946818", "pmid:28782341", "pmid:28585930", "pmid:28444220", "pmid:28131213", "pmid:27979829", "pmid:27896316", "pmid:27848087", "pmid:27806289", "pmid:27412786", "pmid:27400454", "pmid:27333979", "pmid:26730403", "pmid:26377379", "pmid:26374734", "pmid:26354989", "pmid:26077168", "pmid:26054379", "pmid:25634432", "pmid:25624155", "pmid:25466696", "pmid:24780882", "pmid:24534762", "pmid:24486772", "pmid:24209901", "pmid:23423669", "pmid:23407676", "pmid:23368522", "pmid:23026538", "pmid:22520093", "pmid:26859398", "pmid:26676458", "pmid:21832228", "pmid:21550405", "pmid:20069235", "pmid:19631275", "pmid:19429075", "pmid:19259763", "pmid:19224313", "pmid:18949263", "pmid:18759344", "pmid:18687887", "pmid:18685131", "pmid:18684474", "pmid:18506570", "pmid:18418678", "pmid:18285829", "pmid:18074367", "pmid:17961920", "pmid:17682009", "pmid:17516099", "pmid:17420317", "pmid:16396623", "pmid:16389595", "pmid:16310805", "pmid:16000334", "pmid:15875905", "pmid:15747371", "pmid:15553088", "pmid:15148151", "pmid:15080863", "pmid:15026782", "pmid:14967767", "pmid:14966163", "pmid:14756671", "pmid:14534930", "pmid:12810491", "pmid:12764052", "pmid:12676347", "pmid:12614315", "pmid:12545428", "pmid:11939898", "pmid:11889231", "pmid:11839840", "pmid:11804332", "pmid:11717352", "pmid:11708993", "pmid:11448300", "pmid:11355155", "pmid:11341481", "pmid:11311290", "pmid:11176970", "pmid:11160961", "pmid:10369884", "pmid:11081813", "pmid:11030410", "pmid:10985694", "pmid:10964945", "pmid:10945665", "pmid:10942107", "pmid:10894992", "pmid:10785256", "pmid:10768629", "pmid:10766906", "pmid:10690991", "pmid:10674974", "pmid:10601803", "pmid:10453742", "pmid:10442462", "pmid:10369863", "pmid:10369828", "pmid:10366652", "pmid:10225349", "pmid:9973298", "pmid:9915947", "pmid:9879686", "pmid:9855520", "pmid:9779664", "pmid:9758625", "pmid:9741473", "pmid:9696528", "pmid:9674805", "pmid:9613852", "pmid:9600677", "pmid:9559993", "pmid:9507387", "pmid:9436730", "pmid:9385362", "pmid:9371901", "pmid:9371900", "pmid:9403486", "pmid:9403480", "pmid:9345107", "pmid:9339681", "pmid:9302278", "pmid:9311738", "pmid:9259275", "pmid:9259274", "pmid:10464657", "pmid:9043864"] }, { @@ -1738,7 +1738,7 @@ "age_onset_max": 74.0, "typ_age_onset_min": 10.0, "typ_age_onset_max": 30.0, - "details": "Overview of disease locus through 2024, including largest pathogenic allele of 4,000, described in Rimoldi et al review [@pmid:39643839]. Intermediate alleles (35-49) associated with premutation [@genereviews:NBK1165]. 3%-8% of DM1 expansions contain interrupting variant repeats such as CCG and CGG, associated with later onset and milder phenotype; the variant repeat GCGGCA has also been reported [@pmid:32851192; @doi:10.1016/j.mcp.2024.102005]. In another study, interruptions of the CTG repeat with CCG, GGC, CTC or CAG motifs are estimated to occur in 3-11% of DM1 patients [@pmid:35741732]. Expansions within gene ZNF850 may function as DM1 modifiers [@doi:10.1093/hmg/ddae186].", + "details": "Overview of disease locus through 2024, including largest pathogenic allele of 4,000, described in Rimoldi et al review [@pmid:39643839]. Intermediate alleles (35-49) associated with premutation [@genereviews:NBK1165]. 3%-8% of DM1 expansions contain interrupting variant repeats such as CCG and CGG, associated with later onset and milder phenotype; the variant repeat GCGGCA has also been reported [@pmid:32851192; @pmid:39710066]. In another study, interruptions of the CTG repeat with CCG, GGC, CTC or CAG motifs are estimated to occur in 3-11% of DM1 patients [@pmid:35741732]. Expansions within gene ZNF850 may function as DM1 modifiers [@pmid:39679849].", "mechanism": "GoF", "mechanism_detail": "RNA gain-of-function: RNA gelation leading to misregulation of alternative splicing [@pmid:36169768]. Expanded-repeat RNA sequesters the muscleblind-like (MBNL) family of RNA-binding proteins as part of the disruption of pre-mRNA processing, contributing to cardiac phenotypes [@pmid:39932794]. Loss of MBNL proteins has been linked to mis-splicing of Autism spectrum-risk genes such as SCN2A, ANK2, and SHANK2, possibly leading to Autism-related traits [@pmid:40259070]. Evidence suggests that disulfide bond-dependent MBNL1/MBNL2 dimerization maintains toxic RNA foci [@pmid:41929128].", "year": "1992 [@pmid:1310900]", @@ -1777,7 +1777,7 @@ "webstr_hg19": ["Expansion_DM1/DMPK"], "locus_tags": ["somatic_instability", "anticipation", "maternal_expansion", "length_affects_onset", "length_affects_phenotype", "length_affects_severity", "motif_affects_instability", "motif_affects_onset", "motif_affects_phenotype", "motif_affects_severity"], "disease_tags": ["myotonic_dystrophy"], - "references": ["genereviews:NBK1165", "pmid:38454488", "pmid:36169768", "pmid:39932794", "pmid:40259070", "pmid:39643839", "pmid:32851192", "doi:10.1016/j.mcp.2024.102005", "pmid:35741732", "doi:10.1093/hmg/ddae186", "pmid:29100084", "pmid:31159885", "pmid:35483324", "pmid:1310900", "mondo:0008056", "pmid:29361396", "pmid:8810716", "pmid:27695335", "pmid:29871899", "pmid:37209486"], + "references": ["genereviews:NBK1165", "pmid:38454488", "pmid:36169768", "pmid:39932794", "pmid:40259070", "pmid:39643839", "pmid:32851192", "pmid:39710066", "pmid:35741732", "pmid:39679849", "pmid:29100084", "pmid:31159885", "pmid:35483324", "pmid:1310900", "mondo:0008056", "pmid:29361396", "pmid:8810716", "pmid:27695335", "pmid:29871899", "pmid:37209486"], "additional_literature": ["pmid:41996006", "pmid:41974889", "pmid:41951733", "pmid:41946260", "pmid:41855125", "pmid:41848171", "pmid:41766784", "pmid:41762523", "pmid:41722569", "pmid:41710065", "pmid:41707138", "pmid:41672630", "pmid:41610137", "pmid:41533635", "pmid:41379996", "pmid:41260620", "pmid:41250834", "pmid:41226829", "pmid:41212113", "pmid:41161721", "pmid:41074692", "pmid:40903903", "pmid:40896579", "pmid:40879030", "pmid:40712995", "pmid:40606545", "pmid:40599975", "pmid:40417743", "pmid:40296143", "pmid:40113266", "pmid:40092662", "pmid:40004498", "pmid:39710066", "pmid:39679849", "pmid:39492694", "pmid:39433769", "pmid:39415708", "pmid:39391712", "pmid:39383229", "pmid:39278936", "pmid:39267217", "pmid:39273681", "pmid:39232665", "pmid:39180495", "pmid:39126705", "pmid:38709060", "pmid:38704930", "pmid:38490135", "pmid:38314057", "pmid:37829280", "pmid:37744174", "pmid:37645891", "pmid:37638448", "pmid:37521782", "pmid:37397246", "pmid:37373276", "pmid:37352653", "pmid:37200862", "pmid:37146135", "pmid:37143315", "pmid:36892629", "pmid:36778282", "pmid:36701310", "pmid:36627397", "pmid:36352383", "pmid:36230978", "pmid:36222125", "pmid:36099027", "pmid:36084803", "pmid:36011377", "pmid:35770133", "pmid:35767654", "pmid:35567413", "pmid:35328504", "pmid:35243403", "pmid:35182509", "pmid:34976437", "pmid:34915310", "pmid:34513303", "pmid:34472530", "pmid:34432028", "pmid:34386887", "pmid:34372915", "pmid:34371182", "pmid:34262431", "pmid:34114350", "pmid:34025359", "pmid:33682722", "pmid:33624941", "pmid:33575482", "pmid:33526774", "pmid:33497365", "pmid:33363709", "pmid:33362853", "pmid:33235377", "pmid:32929188", "pmid:32823742", "pmid:32717741", "pmid:32656337", "pmid:32607474", "pmid:32350131", "pmid:32203199", "pmid:32109384", "pmid:32063450", "pmid:31996899", "pmid:31873063", "pmid:31759551", "pmid:31649961", "pmid:31624084", "pmid:31570586", "pmid:31395669", "pmid:31334355", "pmid:31316546", "pmid:31253581", "pmid:31227653", "pmid:31220271", "pmid:31164682", "pmid:31027145", "pmid:30891637", "pmid:30700578", "pmid:30615214", "pmid:30546383", "pmid:30425655", "pmid:30304901", "pmid:30216892", "pmid:30140252", "pmid:29967337", "pmid:29947794", "pmid:29592894", "pmid:29551391", "pmid:29381654", "pmid:29334465", "pmid:29274549", "pmid:29246312", "pmid:29114849", "pmid:28942489", "pmid:28886202", "pmid:28810563", "pmid:28782311", "pmid:28623239", "pmid:28435090", "pmid:28363916", "pmid:28211918", "pmid:28129118", "pmid:28102759", "pmid:27854230", "pmid:27727437", "pmid:27358583", "pmid:27245480", "pmid:27222292", "pmid:26708183", "pmid:26640575", "pmid:26586700", "pmid:26498872", "pmid:26190529", "pmid:25958258", "pmid:25712547", "pmid:25655594", "pmid:25606394", "pmid:25307018", "pmid:25303993", "pmid:25168381", "pmid:24824895", "pmid:24795756", "pmid:24781112", "pmid:24715907", "pmid:24705798", "pmid:24455202", "pmid:24269018", "pmid:24196578", "pmid:24092878", "pmid:23811192", "pmid:23570879", "pmid:23308382", "pmid:26317000", "pmid:23263591", "pmid:23209425", "pmid:23183533", "pmid:23161457", "pmid:23159592", "pmid:23139243", "pmid:22643181", "pmid:22595968", "pmid:22459146", "pmid:22427994", "pmid:22078098", "pmid:22062891", "pmid:21971425", "pmid:21949239", "pmid:21511730", "pmid:21303839", "pmid:21245981", "pmid:21204798", "pmid:21103235", "pmid:20801043", "pmid:20635151", "pmid:20603324", "pmid:20346670", "pmid:20228473", "pmid:20179953", "pmid:20171614", "pmid:20074967", "pmid:19946639", "pmid:19715468", "pmid:19632331", "pmid:19516957", "pmid:19470458", "pmid:18798829", "pmid:18729234", "pmid:18611984", "pmid:18563724", "pmid:18561181", "pmid:18559347", "pmid:18299519", "pmid:18228241", "pmid:18213375", "pmid:17987120", "pmid:17950578", "pmid:17877752", "pmid:17728322", "pmid:17487865", "pmid:17158949", "pmid:17150182", "pmid:17145685", "pmid:17114933", "pmid:16978612", "pmid:16927100", "pmid:16716318", "pmid:16624843", "pmid:16401743", "pmid:16376058", "pmid:16193250", "pmid:16027111", "pmid:15972723", "pmid:15961406", "pmid:15750273", "pmid:15684391", "pmid:15576360", "pmid:15489504", "pmid:15462191", "pmid:15459182", "pmid:15336691", "pmid:15215218", "pmid:15114529", "pmid:15019706", "pmid:14734627", "pmid:14597103", "pmid:12970845", "pmid:12630069", "pmid:12614928", "pmid:12427866", "pmid:11978764", "pmid:11809728", "pmid:11793472", "pmid:11726559", "pmid:11686919", "pmid:11592825", "pmid:11590133", "pmid:11555624", "pmid:11526199", "pmid:11260612", "pmid:11124939", "pmid:11013451", "pmid:11001736", "pmid:10970838", "pmid:10958655", "pmid:10951446", "pmid:10909850", "pmid:10802668", "pmid:10802667", "pmid:10767343", "pmid:10699184", "pmid:10668800", "pmid:10480373", "pmid:10454725", "pmid:10435210", "pmid:10332037", "pmid:10332033", "pmid:9950368", "pmid:9887331", "pmid:9858828", "pmid:9668171", "pmid:9537423", "pmid:9402536", "pmid:9401353", "pmid:9371827", "pmid:9294109", "pmid:9241283", "pmid:9241282", "pmid:9207101", "pmid:8948631", "pmid:8923304", "pmid:8673131", "pmid:8659513", "pmid:8784809", "pmid:8595416", "pmid:7626046", "pmid:7590731", "pmid:7726160", "pmid:7896884", "pmid:8288237"] }, { @@ -1927,7 +1927,7 @@ "disease": "Spinocerebellar ataxia 27B", "inheritance": ["AD"], "association_type": ["Mendelian", "Risk"], - "disease_description": "Late-onset ataxia, may have episodic onset, downbeat nystagmus, vertigo, dysarthria, visual disturbances, and neuropathy [@pmid:39349043; @pmid:42044943]. Involvement of the superior cerebellar peduncles is frequent and may aid in diagnostic efforts [@doi:10.1212/NXG.0000000000200253].", + "disease_description": "Late-onset ataxia, may have episodic onset, downbeat nystagmus, vertigo, dysarthria, visual disturbances, and neuropathy [@pmid:39349043; @pmid:42044943]. Involvement of the superior cerebellar peduncles is frequent and may aid in diagnostic efforts [@pmid:39996128].", "hpo_terms": null, "prevalence": null, "prevalence_details": "Intermediate expansions 1-2% of population, but non-GAA-pure without relation to ataxia [@genereviews:NBK599589]. Found in multiple ethnicities [@pmid:38876750]; diagnosed patients in America, Brazil, Japan, Germany, Spain, Canada, France, Austria, Australia, and Italy [@genereviews:NBK599589; @pmid:38886208; @pmid:37267898].", @@ -1975,7 +1975,7 @@ "webstr_hg19": [], "locus_tags": ["maternal_expansion", "length_affects_onset", "length_affects_penetrance", "motif_affects_penetrance", "length_affects_phenotype"], "disease_tags": ["spinocerebellar_ataxia"], - "references": ["genereviews:NBK599589", "pmid:39263992", "pmid:36516086", "pmid:37399286", "pmid:39227614", "pmid:40007153", "pmid:40379261", "pmid:38937606", "pmid:39604554", "pmid:41327893", "pmid:41277530", "pmid:38876750", "pmid:38886208", "pmid:37267898", "pmid:36493768", "pmid:39349043", "doi:10.1212/NXG.0000000000200253"], + "references": ["genereviews:NBK599589", "pmid:39263992", "pmid:36516086", "pmid:37399286", "pmid:39227614", "pmid:40007153", "pmid:40379261", "pmid:38937606", "pmid:39604554", "pmid:41327893", "pmid:41277530", "pmid:38876750", "pmid:38886208", "pmid:37267898", "pmid:36493768", "pmid:39349043", "pmid:39996128"], "additional_literature": ["pmid:42055934", "pmid:42044943", "pmid:41698164", "pmid:41504274", "pmid:41118032", "pmid:41065930", "pmid:41055766", "pmid:40974444", "pmid:40906330", "pmid:40898875", "pmid:40894141", "pmid:40879304", "pmid:40835733", "pmid:40679574", "pmid:40637932", "pmid:40623333", "pmid:40579842", "pmid:40556471", "pmid:40488180", "pmid:40273470", "pmid:40239008", "pmid:40191983", "pmid:40141365", "pmid:40024931", "pmid:40017559", "pmid:39996128", "pmid:39821862", "pmid:39801711", "pmid:39723156", "pmid:39666057", "pmid:39666053", "pmid:39574782", "pmid:39571249", "pmid:39392764", "pmid:39378335", "pmid:39152783", "pmid:39006414", "pmid:38976084", "pmid:38949032", "pmid:38866925", "pmid:38816190", "pmid:38513302", "pmid:38487929", "pmid:38472396", "pmid:38405699", "pmid:38381176", "pmid:38221848", "pmid:38170134", "pmid:38150853", "pmid:38058854", "pmid:37916889", "pmid:37646005", "pmid:37578187", "pmid:37577458", "pmid:37322040", "pmid:37165652", "pmid:32717741", "pmid:30017992", "pmid:28444220", "pmid:26677414", "pmid:26149656", "pmid:15470364", "pmid:15148151"] }, { @@ -3032,7 +3032,7 @@ "age_onset_max": 70.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "Disease is caused by the single base expansion of a heptanucleotide (7) cytosine homopolymer tract (i.e. from (C)7 to (C)8 ) within one copy of a coding VNTR, resulting in a frameshift mutation. This VNTR has a 60 bp motif, varying in length and sequence composition. This motif ranges in copy number from 20-125 (~1.5-5 kb) and is GC-rich (>80%). The specific copy of the VNTR motif involved varies by family but is consistent within a family [@genereviews:NBK535148]. This locus is particularly difficult to genotype [@pmid:23396133; @pmid:39781475]. Gamaarachchi et al. observed 20 unique VNTR haplotypes which ranged in size from 40–83 copies, with no unrelated individuals sharing the same haplotype. Unique haplotypes implied frequent independent origins of the dupC variant [@doi:10.1101/2025.03.31.646505]. NOTE: Disease is caused by a 7 to 8 C homopolymer expansion within the main motif which we represent here as a change in motif.", + "details": "Disease is caused by the single base expansion of a heptanucleotide (7) cytosine homopolymer tract (i.e. from (C)7 to (C)8 ) within one copy of a coding VNTR, resulting in a frameshift mutation. This VNTR has a 60 bp motif, varying in length and sequence composition. This motif ranges in copy number from 20-125 (~1.5-5 kb) and is GC-rich (>80%). The specific copy of the VNTR motif involved varies by family but is consistent within a family [@genereviews:NBK535148]. This locus is particularly difficult to genotype [@pmid:23396133; @pmid:39781475]. Gamaarachchi et al. observed 20 unique VNTR haplotypes which ranged in size from 40–83 copies, with no unrelated individuals sharing the same haplotype. Unique haplotypes implied frequent independent origins of the dupC variant [@pmid:41285770]. NOTE: Disease is caused by a 7 to 8 C homopolymer expansion within the main motif which we represent here as a change in motif.", "mechanism": "GoF", "mechanism_detail": "Toxic protein product accumulates in kidneys [@genereviews:NBK153723]", "year": "2013 [@pmid:23396133]", @@ -3071,7 +3071,7 @@ "webstr_hg19": [], "locus_tags": [], "disease_tags": [], - "references": ["pmid:41000883", "genereviews:NBK153723", "genereviews:NBK535148", "pmid:23396133", "pmid:39781475", "doi:10.1101/2025.03.31.646505", "mondo:0020726"], + "references": ["pmid:41000883", "genereviews:NBK153723", "genereviews:NBK535148", "pmid:23396133", "pmid:39781475", "pmid:41285770", "mondo:0020726"], "additional_literature": ["pmid:41961547", "pmid:41832605", "pmid:41345522", "pmid:40244446", "pmid:39848530", "pmid:39576755", "pmid:39325540", "pmid:39314239", "pmid:38605207", "pmid:37547453", "pmid:37456840", "pmid:37316299", "pmid:35982790", "pmid:35497811", "pmid:34641504", "pmid:34452200", "pmid:33672244", "pmid:33001366", "pmid:32451462", "pmid:32293552", "pmid:31213370", "pmid:30593830", "pmid:29520014", "pmid:29328069", "pmid:29217307", "pmid:29156055", "pmid:29052568", "pmid:28581490", "pmid:28407289", "pmid:27957769", "pmid:27036738", "pmid:27367740", "pmid:27340743", "pmid:27157321", "pmid:26943180", "pmid:26838233", "pmid:26693201", "pmid:26692014", "pmid:26498650", "pmid:25753030", "pmid:24718884", "pmid:24509297", "pmid:24416403", "pmid:24246952", "pmid:24233342", "pmid:23778023", "pmid:23770070", "pmid:23652307", "pmid:23317217", "pmid:23259747", "pmid:22970023", "pmid:21998660", "pmid:21385452", "pmid:20876819", "pmid:20562098", "pmid:20470225", "pmid:21637607", "pmid:19811637", "pmid:19625949", "pmid:19534821", "pmid:18619437", "pmid:18094420", "pmid:18021186", "pmid:17974963", "pmid:17694298", "pmid:17581677", "pmid:17203187", "pmid:17050588", "pmid:16711252", "pmid:16631167", "pmid:16302687", "pmid:16101182", "pmid:15991935", "pmid:15944787", "pmid:15814824", "pmid:15729696", "pmid:15604091", "pmid:15387710", "pmid:15115750", "pmid:15041735", "pmid:14871854", "pmid:14707484", "pmid:12747745", "pmid:12646731", "pmid:12626424", "pmid:12090474", "pmid:11923240", "pmid:11445551", "pmid:11391628", "pmid:11358826", "pmid:11295060", "pmid:11169964", "pmid:10797294", "pmid:10741704", "pmid:10653872", "pmid:10652432", "pmid:10541345", "pmid:10430099", "pmid:10389761", "pmid:10383817", "pmid:10235488", "pmid:10206297", "pmid:10052816", "pmid:10024673", "pmid:10022471", "pmid:9935162", "pmid:9823312", "pmid:9755875", "pmid:9727561", "pmid:9690452", "pmid:9591045", "pmid:9579805", "pmid:9575675", "pmid:9551361", "pmid:9427605", "pmid:9419405", "pmid:8967520", "pmid:8643693", "pmid:7594478", "pmid:8579785", "pmid:8567787", "pmid:8519447", "pmid:7628867", "pmid:7816840", "pmid:7946402", "pmid:7514493", "pmid:7690213", "pmid:7685318"] }, { @@ -3308,7 +3308,7 @@ "typ_age_onset_max": 70.0, "details": "Benign alleles are less than 38 repeats, while pathogenic alleles contain 66+ repeats [@genereviews:NBK535148]. Intermediate alleles may be associated with a phenotypic spectrum, and even pathogenic cases can have variable phenotype [@pmid:39055960; @pmid:39496005]: NOTCH2NLC expansions have been linked Alzheimer's disease and Parkinson's disease, leading to a potential role in NIID-related disorders [@pmid:31178126]. Age of onset inversely related to allele size [@pmid:38377026]. Motif variation in controls: (AGG)(CGG)n(AGG)0-3(CGG)0-2. GGA and AGC interruptions may influence phenotype [@pmid:34718964]. Interruptions documented: GGA, GGG [@pmid:35245110]; ACCGAGAAGATGCCCGCCCTGC interruption proposed but not confirmed [@pmid:38467784]. Detection may be challenging due to parology between genes: C253572.1, NOTCH2, NOTCH2NL, NBPF14, NBPF19.", "mechanism": "GoF", - "mechanism_detail": "Polyglycine expansion; may relate to methylation or RNA pathogenicity [@omim:603472; @pmid:36169768; @pmid:38467784]. Proposed mechanisms include toxic uN2CpolyG/polyglycine aggregation, RNA pathogenicity, impaired autophagy, mitochondrial dysfunction, and innate immune activation [@pmid:42058219]. The polyglycine-containing protein sequesters a key subunit of transcription factor NF-κB in nuclear inclusions, leading to impaired autophagy [@doi:10.1186/s12964-025-02079-1]. Tau pathology is evident, changes in p-tau levels and tau deposition have been reported [@pmid:41539185]. Expanded polyG proteins also induce nucleolar stress through interaction with NPM1 and rRNA. This disrupts ribosomal homeostasis and alters 3D chromatin organization through reduced CTCF/RAD21 expression [@pmid:41942455].", + "mechanism_detail": "Polyglycine expansion; may relate to methylation or RNA pathogenicity [@omim:603472; @pmid:36169768; @pmid:38467784]. Proposed mechanisms include toxic uN2CpolyG/polyglycine aggregation, RNA pathogenicity, impaired autophagy, mitochondrial dysfunction, and innate immune activation [@pmid:42058219]. The polyglycine-containing protein sequesters a key subunit of transcription factor NF-κB in nuclear inclusions, leading to impaired autophagy [@pmid:39920690]. Tau pathology is evident, changes in p-tau levels and tau deposition have been reported [@pmid:41539185]. Expanded polyG proteins also induce nucleolar stress through interaction with NPM1 and rRNA. This disrupts ribosomal homeostasis and alters 3D chromatin organization through reduced CTCF/RAD21 expression [@pmid:41942455].", "year": "2019 [@pmid:31332380]", "location_in_gene": "5' UTR", "gene_strand": "+", @@ -3345,7 +3345,7 @@ "webstr_hg19": [], "locus_tags": ["somatic_instability", "paternal_expansion", "length_affects_onset", "length_affects_phenotype", "motif_affects_onset", "motif_affects_phenotype"], "disease_tags": ["phenotypic_spectrum"], - "references": ["omim:603472", "pmid:37090934", "pmid:37305750", "pmid:36169768", "pmid:38467784", "doi:10.1186/s12964-025-02079-1", "pmid:41539185", "genereviews:NBK535148", "pmid:39055960", "pmid:39496005", "pmid:31178126", "pmid:38377026", "pmid:34718964", "pmid:35245110", "pmid:37371433", "pmid:38876750", "pmid:31332380", "mondo:0011327"], + "references": ["omim:603472", "pmid:37090934", "pmid:37305750", "pmid:36169768", "pmid:38467784", "pmid:39920690", "pmid:41539185", "genereviews:NBK535148", "pmid:39055960", "pmid:39496005", "pmid:31178126", "pmid:38377026", "pmid:34718964", "pmid:35245110", "pmid:37371433", "pmid:38876750", "pmid:31332380", "mondo:0011327"], "additional_literature": ["pmid:42058219", "pmid:42033810", "pmid:42021030", "pmid:42015293", "pmid:42005169", "pmid:41964975", "pmid:41942455", "pmid:41929501", "pmid:41888971", "pmid:41882342", "pmid:41862851", "pmid:41792844", "pmid:41762523", "pmid:41756172", "pmid:41731582", "pmid:41688968", "pmid:41634634", "pmid:41556371", "pmid:41526374", "pmid:41235412", "pmid:41154122", "pmid:41074692", "pmid:40934004", "pmid:40879637", "pmid:40765612", "pmid:40708231", "pmid:40645757", "pmid:40635536", "pmid:40609325", "pmid:40517194", "pmid:40515658", "pmid:40514451", "pmid:40267536", "pmid:40084170", "pmid:39936620", "pmid:39920690", "pmid:39609868", "pmid:39529621", "pmid:39505310", "pmid:39492694", "pmid:39418922", "pmid:39167540", "pmid:39078482", "pmid:38779172", "pmid:38667292", "pmid:38579412", "pmid:38477063", "pmid:38288273", "pmid:38145851", "pmid:37975799", "pmid:37923380", "pmid:37864208", "pmid:37823700", "pmid:37644522", "pmid:37365282", "pmid:37271829", "pmid:37237429", "pmid:37184590", "pmid:37131242", "pmid:37001413", "pmid:36948577", "pmid:36942588", "pmid:36825461", "pmid:36823368", "pmid:36809423", "pmid:36715780", "pmid:36672065", "pmid:36621630", "pmid:36588885", "pmid:36570826", "pmid:36545534", "pmid:36483830", "pmid:36458450", "pmid:36417528", "pmid:36263606", "pmid:36216675", "pmid:36207023", "pmid:36191230", "pmid:36172483", "pmid:36150977", "pmid:36086903", "pmid:36061987", "pmid:36041634", "pmid:36033605", "pmid:35974122", "pmid:35866887", "pmid:35857137", "pmid:35838850", "pmid:35788208", "pmid:35772299", "pmid:35700120", "pmid:35419641", "pmid:35411397", "pmid:35402653", "pmid:35366689", "pmid:35314910", "pmid:35297556", "pmid:35180462", "pmid:35152460", "pmid:35148830", "pmid:35147270", "pmid:34927285", "pmid:34797461", "pmid:34774111", "pmid:34750918", "pmid:34694469", "pmid:34675106", "pmid:34641814", "pmid:34392981", "pmid:34306035", "pmid:34243731", "pmid:34054431", "pmid:34017298", "pmid:33943039", "pmid:33887199", "pmid:33871559", "pmid:33871549", "pmid:33766934", "pmid:33693509", "pmid:33679585", "pmid:33626493", "pmid:33625684", "pmid:33388663", "pmid:33377220", "pmid:33377207", "pmid:33239111", "pmid:33201994", "pmid:33201988", "pmid:33146692", "pmid:33146671", "pmid:33026126", "pmid:33016348", "pmid:32989102", "pmid:32931575", "pmid:32852534", "pmid:32827029", "pmid:32817896", "pmid:32777174", "pmid:32768149", "pmid:32602554", "pmid:32535679", "pmid:32516806", "pmid:32495371", "pmid:32449905", "pmid:32268889", "pmid:32250060", "pmid:32081467", "pmid:32039647", "pmid:31886491", "pmid:31819945", "pmid:31433517", "pmid:31413119", "pmid:31332381"] }, { @@ -4966,19 +4966,19 @@ "disease": "Congenital Progressive Universal Melanosis", "inheritance": ["AR"], "association_type": ["Mendelian"], - "disease_description": "CPUM is characterized by progressive widespread hyperpigmentation beginning at birth without accompanying symptoms. Studied children with CPUM have been born to unaffected parents. The children studied have been born with diffuse, universal, and progressive hyperpigmentaion at 15 years of age. At this time the hyperpigmentation had fully progressed [@doi:10.3892/br.2025.2016]. It is not entirely clear that this is a distinct disease as it shares features with acquired universal melanosis (most similar), erythema dyschromicum perstans, lichen planus pigmentosus, and familial progressive hypigmentation.", + "disease_description": "CPUM is characterized by progressive widespread hyperpigmentation beginning at birth without accompanying symptoms. Studied children with CPUM have been born to unaffected parents. The children studied have been born with diffuse, universal, and progressive hyperpigmentaion at 15 years of age. At this time the hyperpigmentation had fully progressed [@pmid:40589716]. It is not entirely clear that this is a distinct disease as it shares features with acquired universal melanosis (most similar), erythema dyschromicum perstans, lichen planus pigmentosus, and familial progressive hypigmentation.", "hpo_terms": null, "prevalence": null, - "prevalence_details": "Found in two monozygotic twins in Thailand [@doi:10.3892/br.2025.2016].", - "age_onset": "0 (birth) [@doi:10.3892/br.2025.2016]", + "prevalence_details": "Found in two monozygotic twins in Thailand [@pmid:40589716].", + "age_onset": "0 (birth) [@pmid:40589716]", "age_onset_min": 0.0, "age_onset_max": 0.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "A study has identified an intronic GATGGT hexanucleotide tandem repeat in the TYMS gene. Both parents were found to be heterozygous carriers of the expansion, suggesting a recessive inheritance pattern. Evidence is limited, only a single family with monozygotic twins have been reoprted and no change in expression of the gene has been observed [@doi:10.3892/br.2025.2016].", + "details": "A study has identified an intronic GATGGT hexanucleotide tandem repeat in the TYMS gene. Both parents were found to be heterozygous carriers of the expansion, suggesting a recessive inheritance pattern. Evidence is limited, only a single family with monozygotic twins have been reoprted and no change in expression of the gene has been observed [@pmid:40589716].", "mechanism": "LoF", - "mechanism_detail": "Proposed mechanism involves repeat expansions in non-coding regions of the gene, reducing expression in melanocytes or keratinocytes, leading to a disruption in nucleotide balance in DNA repair and hyperpigmentation. Missense mutations disrupt nucleotide metabolsim, resulting in loss-of-function and genome instability [@doi:10.3892/br.2025.2016].", - "year": "2025 [@doi:10.3892/br.2025.2016]", + "mechanism_detail": "Proposed mechanism involves repeat expansions in non-coding regions of the gene, reducing expression in melanocytes or keratinocytes, leading to a disruption in nucleotide balance in DNA repair and hyperpigmentation. Missense mutations disrupt nucleotide metabolsim, resulting in loss-of-function and genome instability [@pmid:40589716].", + "year": "2025 [@pmid:40589716]", "location_in_gene": "Intron 3", "gene_strand": "-", "reference_motif_reference_orientation": ["GATGGT"], @@ -5014,7 +5014,7 @@ "webstr_hg19": [], "locus_tags": [], "disease_tags": [], - "references": ["doi:10.3892/br.2025.2016"], + "references": ["pmid:40589716"], "additional_literature": ["pmid:41507280", "pmid:41181325", "pmid:41074680", "pmid:41024012", "pmid:40890629", "pmid:40589716", "pmid:40195828", "pmid:38625071", "pmid:38411001", "pmid:38379425", "pmid:38311638", "pmid:38280392", "pmid:38134936", "pmid:36398398", "pmid:36209056", "pmid:35608245", "pmid:35233526", "pmid:35055435", "pmid:34650802", "pmid:34526668", "pmid:34197619", "pmid:34149004", "pmid:33805940", "pmid:33086767", "pmid:32813677", "pmid:32695278", "pmid:32612964", "pmid:32110891", "pmid:31817852", "pmid:31370354", "pmid:31161452", "pmid:30838312", "pmid:30823845", "pmid:30533396", "pmid:30464574", "pmid:30122542", "pmid:29995270", "pmid:29660633", "pmid:29394274", "pmid:29321350", "pmid:29162511", "pmid:28895423", "pmid:28349270", "pmid:28280649", "pmid:28074308", "pmid:28074022", "pmid:27995989", "pmid:27868347", "pmid:27864985", "pmid:27685916", "pmid:27375073", "pmid:29767611", "pmid:26745074", "pmid:26621114", "pmid:26277606", "pmid:26196219", "pmid:26189437", "pmid:25955097", "pmid:25648260", "pmid:25536611", "pmid:25366766", "pmid:25341694", "pmid:25294632", "pmid:25279663", "pmid:25258183", "pmid:25246386", "pmid:25028118", "pmid:25007187", "pmid:24726028", "pmid:24686188", "pmid:24641398", "pmid:24596472", "pmid:24554028", "pmid:24422758", "pmid:24415354", "pmid:24302747", "pmid:24137384", "pmid:23968134", "pmid:23571497", "pmid:23481061", "pmid:23232805", "pmid:23226765", "pmid:23148637", "pmid:22799365", "pmid:22763757", "pmid:22576918", "pmid:22086855", "pmid:22044939", "pmid:21630057", "pmid:21449681", "pmid:21269855", "pmid:21196216", "pmid:21075014", "pmid:20966539", "pmid:20962453", "pmid:20932673", "pmid:20880668", "pmid:20824655", "pmid:20651387", "pmid:20645403", "pmid:20570968", "pmid:20531375", "pmid:20372856", "pmid:20005374", "pmid:19998340", "pmid:19917450", "pmid:19798689", "pmid:19632929", "pmid:19349389", "pmid:19306093", "pmid:19200948", "pmid:19082493", "pmid:18843018", "pmid:18728661", "pmid:18704422", "pmid:18661526", "pmid:18406541", "pmid:18273818", "pmid:18267032", "pmid:18203297", "pmid:18095031", "pmid:17508355", "pmid:17396161", "pmid:17278107", "pmid:17273745", "pmid:17220568", "pmid:17201138", "pmid:17187508", "pmid:17047490", "pmid:17018589", "pmid:16929515", "pmid:16818689", "pmid:16596248", "pmid:16456808", "pmid:16334126", "pmid:16333305", "pmid:16234002", "pmid:16182121", "pmid:15897576", "pmid:15817609", "pmid:15749593", "pmid:15646842", "pmid:15579479", "pmid:15571262", "pmid:15510613", "pmid:15457444", "pmid:15386371", "pmid:15284183", "pmid:15115918", "pmid:14522928", "pmid:12684695", "pmid:12604405", "pmid:12460463", "pmid:12232785", "pmid:12215845", "pmid:12039668", "pmid:11913730", "pmid:11867566", "pmid:11751507", "pmid:11529907", "pmid:11445856", "pmid:10652619", "pmid:10636923", "pmid:10625460", "pmid:8751943", "pmid:3002689"] }, { From a773fc50912f15a4e7edfb2c7ca0c01f133d3518 Mon Sep 17 00:00:00 2001 From: gaberbz <182678422+gaberbz@users.noreply.github.com> Date: Tue, 2 Jun 2026 20:25:18 +0000 Subject: [PATCH 02/29] Update data --- data/STRchive-citations.json | 145 ++++++++++++++++++++++++++++++++--- 1 file changed, 134 insertions(+), 11 deletions(-) diff --git a/data/STRchive-citations.json b/data/STRchive-citations.json index bdb34f8d..c944ee63 100644 --- a/data/STRchive-citations.json +++ b/data/STRchive-citations.json @@ -163179,21 +163179,127 @@ "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.malacards.org/card/KNS007']' timed out after 3 seconds" }, { - "id": "genereviews:NBK1384", - "manubot_success": false, - "link": "https://www.ncbi.nlm.nih.gov/books/NBK1384", - "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.ncbi.nlm.nih.gov/books/NBK1384']' timed out after 3 seconds" + "id": "pmid:40015980", + "manubot_success": true, + "link": "https://www.ncbi.nlm.nih.gov/pubmed/40015980", + "title": "A prospective trial comparing programmable targeted long-read sequencing and short-read genome sequencing for genetic diagnosis of cerebellar ataxia.", + "type": "article-journal", + "doi": "10.1101/gr.279634.124", + "authors": [ + ["Haloom", "Rafehi"], + ["Liam G", "Fearnley"], + ["Justin", "Read"], + ["Penny", "Snell"], + ["Kayli C", "Davies"], + ["Liam", "Scott"], + ["Greta", "Gillies"], + ["Genevieve C", "Thompson"], + ["Tess A", "Field"], + ["Aleena", "Eldo"], + ["Simon", "Bodek"], + ["Ernest", "Butler"], + ["Luke", "Chen"], + ["John", "Drago"], + ["Himanshu", "Goel"], + ["Anna", "Hackett"], + ["G Michael", "Halmagyi"], + ["Andrew", "Hannaford"], + ["Katya", "Kotschet"], + ["Kishore R", "Kumar"], + ["Smitha", "Kumble"], + ["Matthew", "Lee-Archer"], + ["Abhishek", "Malhotra"], + ["Mark", "Paine"], + ["Michael", "Poon"], + ["Kate", "Pope"], + ["Katrina", "Reardon"], + ["Steven", "Ring"], + ["Anne", "Ronan"], + ["Matthew", "Silsby"], + ["Renee", "Smyth"], + ["Chloe", "Stutterd"], + ["Mathew", "Wallis"], + ["John", "Waterston"], + ["Thomas", "Wellings"], + ["Kirsty", "West"], + ["Christine", "Wools"], + ["Kathy H C", "Wu"], + ["David J", "Szmulewicz"], + ["Martin B", "Delatycki"], + ["Melanie", "Bahlo"], + ["Paul J", "Lockhart"] + ], + "publisher": "Genome research", + "issn": "1549-5469", + "date": "2025-04-14", + "abstract": "The cerebellar ataxias (CAs) are a heterogeneous group of disorders characterized by progressive incoordination. Seventeen repeat expansion (RE) loci have been identified as the primary genetic cause and account for >80% of genetic diagnoses. Despite this, diagnostic testing is limited and inefficient, often utilizing single gene assays. This study evaluates the effectiveness of long- and short-read sequencing as diagnostic tools for CA. We recruited 110 individuals (48 females, 62 males) with a clinical diagnosis of CA. Short-read genome sequencing (SR-GS) was performed to identify pathogenic RE and also non-RE variants in 356 genes associated with CA. Independently, long-read sequencing with adaptive sampling (LR-AS) was performed to identify pathogenic RE. SR-GS provided a genetic diagnosis for 38% of the cohort (40/110) including seven non-RE pathogenic variants. RE causes disease in 33 individuals, with the most common condition being SCA27B (", + "language": "en", + "note": "This CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: pubmed:40015980" }, { - "id": "genereviews:NBK1305", - "manubot_success": false, - "link": "https://www.ncbi.nlm.nih.gov/books/NBK1305", - "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.ncbi.nlm.nih.gov/books/NBK1305']' timed out after 3 seconds" + "id": "pmid:39996131", + "manubot_success": true, + "link": "https://www.ncbi.nlm.nih.gov/pubmed/39996131", + "title": "Redefining the Pathogenic CAG Repeat Units Threshold in", + "type": "article-journal", + "doi": "10.1212/nxg.0000000000200245", + "authors": [ + ["Yuya", "Hatano"], + ["Tomohiko", "Ishihara"], + ["Sachiko", "Hirokawa"], + ["Hidetoshi", "Date"], + ["Yuji", "Takahashi"], + ["Hidehiro", "Mizusawa"], + ["Osamu", "Onodera"] + ], + "publisher": "Neurology. Genetics", + "issn": "2376-7839", + "date": "2025-02-21", + "abstract": "Spinocerebellar ataxia type 6 (SCA6) is caused by expansion of CAG repeat units (RUs) in", + "language": "en", + "note": "This CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: pubmed:39996131" }, { - "id": "isbn:978-3-031-66932-3", - "manubot_success": false, - "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'isbn:978-3-031-66932-3']' timed out after 3 seconds" + "id": "pmid:41285770", + "manubot_success": true, + "link": "https://www.ncbi.nlm.nih.gov/pubmed/41285770", + "title": "Targeted sequencing and iterative assembly of near-complete genomes.", + "type": "article-journal", + "doi": "10.1038/s41467-025-65410-x", + "authors": [ + ["Hasindu", "Gamaarachchi"], + ["Igor", "Stevanovski"], + ["Jillian M", "Hammond"], + ["Andre L", "M Reis"], + ["Melissa", "Rapadas"], + ["Kavindu", "Jayasooriya"], + ["Tonia", "Russell"], + ["Dennis", "Yeow"], + ["Yvonne", "Hort"], + ["Chirag", "Patel"], + ["Andrew J", "Mallett"], + ["Elaine", "Stackpoole"], + ["Lauren", "Roman"], + ["Luke W", "Silver"], + ["Carolyn J", "Hogg"], + ["Louise M", "Streeting"], + ["Ozren", "Bogdanovic"], + ["Renata", "Coelho Rodrigues Noronha"], + ["Lu\u00eds Adriano", "Santos do Nascimento"], + ["Adauto", "Lima Cardoso"], + ["Arthur", "Georges"], + ["Haoyu", "Cheng"], + ["Hardip R", "Patel"], + ["Kishore Raj", "Kumar"], + ["Amali C", "Mallawaarachchi"], + ["Ira W", "Deveson"] + ], + "publisher": "Nature communications", + "issn": "2041-1723", + "date": "2025-11-24", + "abstract": "Advances in long-read sequencing (LRS) and assembly algorithms have made it possible to create highly complete genome assemblies for humans, animals and plants. However, ongoing development is needed to improve accessibility, affordability, and assembly quality and completeness. 'Cornetto' is a new strategy in which we use programmable selective nanopore sequencing to focus LRS data production onto the unsolved regions of a nascent assembly. This improves assembly quality and streamlines the process, both for humans and non-human vertebrates. Cornetto enables us to generate highly complete diploid human genome assemblies using only nanopore LRS data, surpassing the quality of previous efforts at a fraction of the cost. Cornetto enables genome assembly from challenging sample types like human saliva. Finally, we obtain accurate assemblies for clinically-relevant repetitive loci at the extremes of the genome, demonstrating valid approaches for genetic diagnosis in facioscapulohumeral muscular dystrophy (FSHD) and MUC1-autosomal dominant tubulointerstitial kidney disease (MUC1-ADTKD).", + "language": "en", + "note": "This CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: pubmed:41285770" }, { "id": "pmid:25101480", @@ -163218,4 +163324,21 @@ "manubot_success": false, "link": "https://pubmed.ncbi.nlm.nih.gov/39666847", "note": "WARNING: Couldn't parse Manubot response: list index out of range" +}, +{ + "id": "genereviews:NBK1384", + "manubot_success": false, + "link": "https://www.ncbi.nlm.nih.gov/books/NBK1384", + "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.ncbi.nlm.nih.gov/books/NBK1384']' timed out after 3 seconds" +}, +{ + "id": "genereviews:NBK1305", + "manubot_success": false, + "link": "https://www.ncbi.nlm.nih.gov/books/NBK1305", + "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.ncbi.nlm.nih.gov/books/NBK1305']' timed out after 3 seconds" +}, +{ + "id": "isbn:978-3-031-66932-3", + "manubot_success": false, + "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'isbn:978-3-031-66932-3']' timed out after 3 seconds" }] \ No newline at end of file From c7949c5c96bce599dc946e5d8d54df4827bec5a4 Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Tue, 2 Jun 2026 17:42:09 -0600 Subject: [PATCH 03/29] SCA27B_FGF14 edits Update data Soften claim about age of onset by repeat size correlation pmid 42096001 found an insignificant correlation between repeat size and age retrying failed run --- data/STRchive-citations.json | 268 +++++++++++++++++------------------ data/STRchive-loci.json | 4 +- 2 files changed, 136 insertions(+), 136 deletions(-) diff --git a/data/STRchive-citations.json b/data/STRchive-citations.json index c944ee63..774053af 100644 --- a/data/STRchive-citations.json +++ b/data/STRchive-citations.json @@ -162938,6 +162938,129 @@ "language": "eng", "note": "PMID: 20301458\nThis CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: url:https://www.ncbi.nlm.nih.gov/books/NBK1281" }, +{ + "id": "pmid:40015980", + "manubot_success": true, + "link": "https://www.ncbi.nlm.nih.gov/pubmed/40015980", + "title": "A prospective trial comparing programmable targeted long-read sequencing and short-read genome sequencing for genetic diagnosis of cerebellar ataxia.", + "type": "article-journal", + "doi": "10.1101/gr.279634.124", + "authors": [ + ["Haloom", "Rafehi"], + ["Liam G", "Fearnley"], + ["Justin", "Read"], + ["Penny", "Snell"], + ["Kayli C", "Davies"], + ["Liam", "Scott"], + ["Greta", "Gillies"], + ["Genevieve C", "Thompson"], + ["Tess A", "Field"], + ["Aleena", "Eldo"], + ["Simon", "Bodek"], + ["Ernest", "Butler"], + ["Luke", "Chen"], + ["John", "Drago"], + ["Himanshu", "Goel"], + ["Anna", "Hackett"], + ["G Michael", "Halmagyi"], + ["Andrew", "Hannaford"], + ["Katya", "Kotschet"], + ["Kishore R", "Kumar"], + ["Smitha", "Kumble"], + ["Matthew", "Lee-Archer"], + ["Abhishek", "Malhotra"], + ["Mark", "Paine"], + ["Michael", "Poon"], + ["Kate", "Pope"], + ["Katrina", "Reardon"], + ["Steven", "Ring"], + ["Anne", "Ronan"], + ["Matthew", "Silsby"], + ["Renee", "Smyth"], + ["Chloe", "Stutterd"], + ["Mathew", "Wallis"], + ["John", "Waterston"], + ["Thomas", "Wellings"], + ["Kirsty", "West"], + ["Christine", "Wools"], + ["Kathy H C", "Wu"], + ["David J", "Szmulewicz"], + ["Martin B", "Delatycki"], + ["Melanie", "Bahlo"], + ["Paul J", "Lockhart"] + ], + "publisher": "Genome research", + "issn": "1549-5469", + "date": "2025-04-14", + "abstract": "The cerebellar ataxias (CAs) are a heterogeneous group of disorders characterized by progressive incoordination. Seventeen repeat expansion (RE) loci have been identified as the primary genetic cause and account for >80% of genetic diagnoses. Despite this, diagnostic testing is limited and inefficient, often utilizing single gene assays. This study evaluates the effectiveness of long- and short-read sequencing as diagnostic tools for CA. We recruited 110 individuals (48 females, 62 males) with a clinical diagnosis of CA. Short-read genome sequencing (SR-GS) was performed to identify pathogenic RE and also non-RE variants in 356 genes associated with CA. Independently, long-read sequencing with adaptive sampling (LR-AS) was performed to identify pathogenic RE. SR-GS provided a genetic diagnosis for 38% of the cohort (40/110) including seven non-RE pathogenic variants. RE causes disease in 33 individuals, with the most common condition being SCA27B (", + "language": "en", + "note": "This CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: pubmed:40015980" +}, +{ + "id": "pmid:39996131", + "manubot_success": true, + "link": "https://www.ncbi.nlm.nih.gov/pubmed/39996131", + "title": "Redefining the Pathogenic CAG Repeat Units Threshold in", + "type": "article-journal", + "doi": "10.1212/nxg.0000000000200245", + "authors": [ + ["Yuya", "Hatano"], + ["Tomohiko", "Ishihara"], + ["Sachiko", "Hirokawa"], + ["Hidetoshi", "Date"], + ["Yuji", "Takahashi"], + ["Hidehiro", "Mizusawa"], + ["Osamu", "Onodera"] + ], + "publisher": "Neurology. Genetics", + "issn": "2376-7839", + "date": "2025-02-21", + "abstract": "Spinocerebellar ataxia type 6 (SCA6) is caused by expansion of CAG repeat units (RUs) in", + "language": "en", + "note": "This CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: pubmed:39996131" +}, +{ + "id": "pmid:41285770", + "manubot_success": true, + "link": "https://www.ncbi.nlm.nih.gov/pubmed/41285770", + "title": "Targeted sequencing and iterative assembly of near-complete genomes.", + "type": "article-journal", + "doi": "10.1038/s41467-025-65410-x", + "authors": [ + ["Hasindu", "Gamaarachchi"], + ["Igor", "Stevanovski"], + ["Jillian M", "Hammond"], + ["Andre L", "M Reis"], + ["Melissa", "Rapadas"], + ["Kavindu", "Jayasooriya"], + ["Tonia", "Russell"], + ["Dennis", "Yeow"], + ["Yvonne", "Hort"], + ["Chirag", "Patel"], + ["Andrew J", "Mallett"], + ["Elaine", "Stackpoole"], + ["Lauren", "Roman"], + ["Luke W", "Silver"], + ["Carolyn J", "Hogg"], + ["Louise M", "Streeting"], + ["Ozren", "Bogdanovic"], + ["Renata", "Coelho Rodrigues Noronha"], + ["Lu\u00eds Adriano", "Santos do Nascimento"], + ["Adauto", "Lima Cardoso"], + ["Arthur", "Georges"], + ["Haoyu", "Cheng"], + ["Hardip R", "Patel"], + ["Kishore Raj", "Kumar"], + ["Amali C", "Mallawaarachchi"], + ["Ira W", "Deveson"] + ], + "publisher": "Nature communications", + "issn": "2041-1723", + "date": "2025-11-24", + "abstract": "Advances in long-read sequencing (LRS) and assembly algorithms have made it possible to create highly complete genome assemblies for humans, animals and plants. However, ongoing development is needed to improve accessibility, affordability, and assembly quality and completeness. 'Cornetto' is a new strategy in which we use programmable selective nanopore sequencing to focus LRS data production onto the unsolved regions of a nascent assembly. This improves assembly quality and streamlines the process, both for humans and non-human vertebrates. Cornetto enables us to generate highly complete diploid human genome assemblies using only nanopore LRS data, surpassing the quality of previous efforts at a fraction of the cost. Cornetto enables genome assembly from challenging sample types like human saliva. Finally, we obtain accurate assemblies for clinically-relevant repetitive loci at the extremes of the genome, demonstrating valid approaches for genetic diagnosis in facioscapulohumeral muscular dystrophy (FSHD) and MUC1-autosomal dominant tubulointerstitial kidney disease (MUC1-ADTKD).", + "language": "en", + "note": "This CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: pubmed:41285770" +}, { "id": "omim:309548", "manubot_success": false, @@ -163179,127 +163302,21 @@ "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.malacards.org/card/KNS007']' timed out after 3 seconds" }, { - "id": "pmid:40015980", - "manubot_success": true, - "link": "https://www.ncbi.nlm.nih.gov/pubmed/40015980", - "title": "A prospective trial comparing programmable targeted long-read sequencing and short-read genome sequencing for genetic diagnosis of cerebellar ataxia.", - "type": "article-journal", - "doi": "10.1101/gr.279634.124", - "authors": [ - ["Haloom", "Rafehi"], - ["Liam G", "Fearnley"], - ["Justin", "Read"], - ["Penny", "Snell"], - ["Kayli C", "Davies"], - ["Liam", "Scott"], - ["Greta", "Gillies"], - ["Genevieve C", "Thompson"], - ["Tess A", "Field"], - ["Aleena", "Eldo"], - ["Simon", "Bodek"], - ["Ernest", "Butler"], - ["Luke", "Chen"], - ["John", "Drago"], - ["Himanshu", "Goel"], - ["Anna", "Hackett"], - ["G Michael", "Halmagyi"], - ["Andrew", "Hannaford"], - ["Katya", "Kotschet"], - ["Kishore R", "Kumar"], - ["Smitha", "Kumble"], - ["Matthew", "Lee-Archer"], - ["Abhishek", "Malhotra"], - ["Mark", "Paine"], - ["Michael", "Poon"], - ["Kate", "Pope"], - ["Katrina", "Reardon"], - ["Steven", "Ring"], - ["Anne", "Ronan"], - ["Matthew", "Silsby"], - ["Renee", "Smyth"], - ["Chloe", "Stutterd"], - ["Mathew", "Wallis"], - ["John", "Waterston"], - ["Thomas", "Wellings"], - ["Kirsty", "West"], - ["Christine", "Wools"], - ["Kathy H C", "Wu"], - ["David J", "Szmulewicz"], - ["Martin B", "Delatycki"], - ["Melanie", "Bahlo"], - ["Paul J", "Lockhart"] - ], - "publisher": "Genome research", - "issn": "1549-5469", - "date": "2025-04-14", - "abstract": "The cerebellar ataxias (CAs) are a heterogeneous group of disorders characterized by progressive incoordination. Seventeen repeat expansion (RE) loci have been identified as the primary genetic cause and account for >80% of genetic diagnoses. Despite this, diagnostic testing is limited and inefficient, often utilizing single gene assays. This study evaluates the effectiveness of long- and short-read sequencing as diagnostic tools for CA. We recruited 110 individuals (48 females, 62 males) with a clinical diagnosis of CA. Short-read genome sequencing (SR-GS) was performed to identify pathogenic RE and also non-RE variants in 356 genes associated with CA. Independently, long-read sequencing with adaptive sampling (LR-AS) was performed to identify pathogenic RE. SR-GS provided a genetic diagnosis for 38% of the cohort (40/110) including seven non-RE pathogenic variants. RE causes disease in 33 individuals, with the most common condition being SCA27B (", - "language": "en", - "note": "This CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: pubmed:40015980" + "id": "genereviews:NBK1384", + "manubot_success": false, + "link": "https://www.ncbi.nlm.nih.gov/books/NBK1384", + "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.ncbi.nlm.nih.gov/books/NBK1384']' timed out after 3 seconds" }, { - "id": "pmid:39996131", - "manubot_success": true, - "link": "https://www.ncbi.nlm.nih.gov/pubmed/39996131", - "title": "Redefining the Pathogenic CAG Repeat Units Threshold in", - "type": "article-journal", - "doi": "10.1212/nxg.0000000000200245", - "authors": [ - ["Yuya", "Hatano"], - ["Tomohiko", "Ishihara"], - ["Sachiko", "Hirokawa"], - ["Hidetoshi", "Date"], - ["Yuji", "Takahashi"], - ["Hidehiro", "Mizusawa"], - ["Osamu", "Onodera"] - ], - "publisher": "Neurology. Genetics", - "issn": "2376-7839", - "date": "2025-02-21", - "abstract": "Spinocerebellar ataxia type 6 (SCA6) is caused by expansion of CAG repeat units (RUs) in", - "language": "en", - "note": "This CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: pubmed:39996131" + "id": "genereviews:NBK1305", + "manubot_success": false, + "link": "https://www.ncbi.nlm.nih.gov/books/NBK1305", + "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.ncbi.nlm.nih.gov/books/NBK1305']' timed out after 3 seconds" }, { - "id": "pmid:41285770", - "manubot_success": true, - "link": "https://www.ncbi.nlm.nih.gov/pubmed/41285770", - "title": "Targeted sequencing and iterative assembly of near-complete genomes.", - "type": "article-journal", - "doi": "10.1038/s41467-025-65410-x", - "authors": [ - ["Hasindu", "Gamaarachchi"], - ["Igor", "Stevanovski"], - ["Jillian M", "Hammond"], - ["Andre L", "M Reis"], - ["Melissa", "Rapadas"], - ["Kavindu", "Jayasooriya"], - ["Tonia", "Russell"], - ["Dennis", "Yeow"], - ["Yvonne", "Hort"], - ["Chirag", "Patel"], - ["Andrew J", "Mallett"], - ["Elaine", "Stackpoole"], - ["Lauren", "Roman"], - ["Luke W", "Silver"], - ["Carolyn J", "Hogg"], - ["Louise M", "Streeting"], - ["Ozren", "Bogdanovic"], - ["Renata", "Coelho Rodrigues Noronha"], - ["Lu\u00eds Adriano", "Santos do Nascimento"], - ["Adauto", "Lima Cardoso"], - ["Arthur", "Georges"], - ["Haoyu", "Cheng"], - ["Hardip R", "Patel"], - ["Kishore Raj", "Kumar"], - ["Amali C", "Mallawaarachchi"], - ["Ira W", "Deveson"] - ], - "publisher": "Nature communications", - "issn": "2041-1723", - "date": "2025-11-24", - "abstract": "Advances in long-read sequencing (LRS) and assembly algorithms have made it possible to create highly complete genome assemblies for humans, animals and plants. However, ongoing development is needed to improve accessibility, affordability, and assembly quality and completeness. 'Cornetto' is a new strategy in which we use programmable selective nanopore sequencing to focus LRS data production onto the unsolved regions of a nascent assembly. This improves assembly quality and streamlines the process, both for humans and non-human vertebrates. Cornetto enables us to generate highly complete diploid human genome assemblies using only nanopore LRS data, surpassing the quality of previous efforts at a fraction of the cost. Cornetto enables genome assembly from challenging sample types like human saliva. Finally, we obtain accurate assemblies for clinically-relevant repetitive loci at the extremes of the genome, demonstrating valid approaches for genetic diagnosis in facioscapulohumeral muscular dystrophy (FSHD) and MUC1-autosomal dominant tubulointerstitial kidney disease (MUC1-ADTKD).", - "language": "en", - "note": "This CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: pubmed:41285770" + "id": "isbn:978-3-031-66932-3", + "manubot_success": false, + "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'isbn:978-3-031-66932-3']' timed out after 3 seconds" }, { "id": "pmid:25101480", @@ -163324,21 +163341,4 @@ "manubot_success": false, "link": "https://pubmed.ncbi.nlm.nih.gov/39666847", "note": "WARNING: Couldn't parse Manubot response: list index out of range" -}, -{ - "id": "genereviews:NBK1384", - "manubot_success": false, - "link": "https://www.ncbi.nlm.nih.gov/books/NBK1384", - "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.ncbi.nlm.nih.gov/books/NBK1384']' timed out after 3 seconds" -}, -{ - "id": "genereviews:NBK1305", - "manubot_success": false, - "link": "https://www.ncbi.nlm.nih.gov/books/NBK1305", - "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.ncbi.nlm.nih.gov/books/NBK1305']' timed out after 3 seconds" -}, -{ - "id": "isbn:978-3-031-66932-3", - "manubot_success": false, - "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'isbn:978-3-031-66932-3']' timed out after 3 seconds" }] \ No newline at end of file diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 946e0bab..766b7fb0 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -1930,13 +1930,13 @@ "disease_description": "Late-onset ataxia, may have episodic onset, downbeat nystagmus, vertigo, dysarthria, visual disturbances, and neuropathy [@pmid:39349043; @pmid:42044943]. Involvement of the superior cerebellar peduncles is frequent and may aid in diagnostic efforts [@pmid:39996128].", "hpo_terms": null, "prevalence": null, - "prevalence_details": "Intermediate expansions 1-2% of population, but non-GAA-pure without relation to ataxia [@genereviews:NBK599589]. Found in multiple ethnicities [@pmid:38876750]; diagnosed patients in America, Brazil, Japan, Germany, Spain, Canada, France, Austria, Australia, and Italy [@genereviews:NBK599589; @pmid:38886208; @pmid:37267898].", + "prevalence_details": "Intermediate expansions 1-2% of population, but non-GAA-pure without relation to ataxia [@genereviews:NBK599589]. Found in multiple ethnicities [@pmid:38876750]; diagnosed patients in America, Brazil, Japan, Germany, Spain, Canada, France, Austria, Australia, Italy, and Poland [@genereviews:NBK599589; @pmid:38886208; @pmid:37267898; @pmid:42096001].", "age_onset": "Typical: 42-70; Range: 21-87 [@genereviews:NBK599589; @pmid:39263992].", "age_onset_min": 21.0, "age_onset_max": 87.0, "typ_age_onset_min": 42.0, "typ_age_onset_max": 70.0, - "details": "Higher repeat size is associated with earlier age of onset [@pmid:39263992]. The 250-300 repeats range is linked to incomplete penetrance and >300 repeats with complete penetrance in some studies and resources [@genereviews:NBK599589; @pmid:37399286; @pmid:39227614]. However, our thresholds are taken from suggestions made by Mohren et al upon evaluation of 169 cases and 802 controls; the authors propose lower thresholds based on pathogenic cases of shorter pure repeats [@pmid:39227614]. Additionally, this study suggests that benign motifs may disrupt the formation of secondary structures in DNA/RNA, leading to reduced pathogenicity. The effects of interruptions on penetrance and onset have been demonstrated in patients, with uninterrupted expansions apparently necessary for disease [@pmid:40007153]. Interruptions of GAG, GAAGGA, GAAGAAAGAA, GAAAAGAAGAAGGAAGAAGGAA, GAAAAGAAGAAGGAA, and GCAGAAGAAGAAGAA have been reported [@pmid:40379261]. Variation in flanking regions appears to correlate with repeat size [@pmid:39227614; @pmid:38937606]. Intermediate alleles may increase ataxia susceptibility in combination with other factors or be associated with a phenotypic spectrum (multiple system atrophy) [@pmid:39227614; @pmid:39604554]. Finally, a complex (TTC/TGC) ≥300 repeat expansion has been associated as a risk factor for Parkinson's disease [@pmid:41327893; @pmid:41277530].", + "details": "Higher repeat size has been associated with earlier age of onset [@pmid:39263992], but this correlation is not significant in all cohorts [@pmid:42096001]. The 250-300 repeats range is linked to incomplete penetrance and >300 repeats with complete penetrance in some studies and resources [@genereviews:NBK599589; @pmid:37399286; @pmid:39227614]. However, our thresholds are taken from suggestions made by Mohren et al upon evaluation of 169 cases and 802 controls; the authors propose lower thresholds based on pathogenic cases of shorter pure repeats [@pmid:39227614]. Additionally, this study suggests that benign motifs may disrupt the formation of secondary structures in DNA/RNA, leading to reduced pathogenicity. The effects of interruptions on penetrance and onset have been demonstrated in patients, with uninterrupted expansions apparently necessary for disease [@pmid:40007153]. Interruptions of GAG, GAAGGA, GAAGAAAGAA, GAAAAGAAGAAGGAAGAAGGAA, GAAAAGAAGAAGGAA, and GCAGAAGAAGAAGAA have been reported [@pmid:40379261]. Variation in flanking regions appears to correlate with repeat size [@pmid:39227614; @pmid:38937606]. Intermediate alleles may increase ataxia susceptibility in combination with other factors or be associated with a phenotypic spectrum (multiple system atrophy) [@pmid:39227614; @pmid:39604554]. Finally, a complex (TTC/TGC) ≥300 repeat expansion has been associated as a risk factor for Parkinson's disease [@pmid:41327893; @pmid:41277530].", "mechanism": "LoF", "mechanism_detail": "Reduced transcript 2 [@pmid:36516086].", "year": "2023 [@pmid:36493768]", From c1a75c2a190f2669b28a3dc0da738b6b91b1a6dc Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Wed, 3 Jun 2026 11:58:52 -0600 Subject: [PATCH 04/29] OPMD_PABPN1 edits minimum OPMD prevalence of 1:600 in this Jewish Bukharian population. Also, added "usually" to autosomal dominant claim because it can be recessive in rare cases, especially when 11/11. --- data/STRchive-loci.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 766b7fb0..a94f4dcf 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -3429,10 +3429,10 @@ "disease": "Oculopharyngeal muscular dystrophy", "inheritance": ["AD", "AR"], "association_type": ["Mendelian"], - "disease_description": "OPMD is an autosomal dominant late-onset neuromuscular disease, with ptosis, dysphagia, and general facial weakness [@omim:164300; @pmid:39349043; @pmid:38876750].", + "disease_description": "OPMD is a usually autosomal dominant, late-onset neuromuscular disease, characterized by ptosis, dysphagia, and general facial weakness [@omim:164300; @pmid:39349043; @pmid:38876750]. Proximal limb weakness commonly develops later in the disease course [@pmid:42157275].", "hpo_terms": null, "prevalence": "1/100000", - "prevalence_details": "1/100,000 (population specific) [@pmid:29100084]. Frequency of (GCN)11 alleles is 1-2% of North America/Europe/Japan [@genereviews:NBK1126]. Disease is found worldwide, in more than 30 countries [@genereviews:NBK1126].", + "prevalence_details": "1/100,000 (population specific) [@pmid:29100084]. Frequency of (GCN)11 alleles is 1-2% of North America/Europe/Japan [@genereviews:NBK1126]. Disease is significantly enriched in individuals of Jewish Bukharian descent[@pmid:42157275]. Disease is found worldwide, in more than 30 countries [@genereviews:NBK1126].", "age_onset": "Typical: 40-59 [@pmid:37519616]; Range: 20-79 [@pmid:35112761].", "age_onset_min": 20.0, "age_onset_max": 79.0, From 28a0f9ffa77cead181ae81d8bf4dba0f33e53f4b Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Wed, 3 Jun 2026 14:34:50 -0600 Subject: [PATCH 05/29] Adding Diagnostic Relevance --- data/STRchive-loci.json | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index a94f4dcf..10ee56ff 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -221,7 +221,7 @@ "age_onset_max": 83.0, "typ_age_onset_min": 20.0, "typ_age_onset_max": 49.0, - "details": "Intermediate alleles indicate reduced penetrance [@genereviews:NBK1333]. Expansions larger than the pathogenic threshold in the AR gene should be evaluated carefully. Interruptions have not been observed in patient cases; it has been proposed that longer alleles with interruptions may not be pathogenic [@pmid:24041967]. Non-canonical motif CAA observed [@pmid:35245110]. Expansions are also detected ten-fold more often in a general population than would be expected by disease prevalence [@pmid:36797998]. Clinical evaluation and phenotypic matching may be necessary to determine diagnosis even in the presence of a pure expanded allele. It has been proposed that contractions may play a role in disease [@pmid:10398229]. Disease may be subclinical in females [@pmid:34922802], and can be clinically heterogeneous even within the same family [@pmid:20184516].", + "details": "Intermediate alleles indicate reduced penetrance [@genereviews:NBK1333]. Expansions larger than the pathogenic threshold in the AR gene should be evaluated carefully. Interruptions have not been observed in patient cases; it has been proposed that longer alleles with interruptions may not be pathogenic [@pmid:24041967]. Non-canonical motif CAA observed [@pmid:35245110]. Expansions are also detected ten-fold more often in a general population than would be expected by disease prevalence [@pmid:36797998]. Clinical evaluation and phenotypic matching may be necessary to determine diagnosis even in the presence of a pure expanded allele. It has been proposed that contractions may play a role in disease [@pmid:10398229]. Disease may be subclinical in females [@pmid:34922802], and can be clinically heterogeneous even within the same family [@pmid:20184516]. Although this expansion can be detected in srWGS screens [@pmid:36797998], sizing needs to be validated with standard PCR fragment analysis or RP-PCR [geneReviews:NBK1333].", "mechanism": "GoF", "mechanism_detail": "Polyglutamine alters protein conformation leading to gain-of-function neurodegeneration [@pmid:29398703; @pmid:36169768]. Transcriptional dysregulation, axonal transport disruption, and mitochondrial dysfunction also play causative roles in the neurodegeneration [@pmid:22609045].", "year": "1991 [@pmid:2062380]; the first triplet disease to be discovered [@pmid:15313856]", @@ -419,7 +419,7 @@ "age_onset_max": 72.0, "typ_age_onset_min": 20.0, "typ_age_onset_max": 40.0, - "details": "Pathogenic expansions (48-93) are fully penetrant with the exception of one documented case of 51 repeats; intermediate alleles (36-47) are associated with a milder phenotype and can expand upon transmission [@genereviews:NBK1491]. CAA interruptions have been observed without known clinical association [@pmid:35245110]. Length of the repeat is inversely associated with age of onset and severe epilepsy phenotype [@pmid:41147955].", + "details": "Pathogenic expansions (48-93) are fully penetrant with the exception of one documented case of 51 repeats; intermediate alleles (36-47) are associated with a milder phenotype and can expand upon transmission [@genereviews:NBK1491]. CAA interruptions have been observed without known clinical association [@pmid:35245110]. Length of the repeat is inversely associated with age of onset and severe epilepsy phenotype [@pmid:41147955]. These repeats are typically sized with fragment analysis and/or RP-PCR, especially on repeat-expansion panels. Routine PCR works well for moderate alleles, but very large or juvenile-onset expansions may exceed exact sizing limits [@genereviews:NBK1491]. Expansions are usually sized by PCR fragment analysis, often with RP-PCR as a reflex [geneReviews:NBK1184]. Standard fragment analysis gives total repeat length but does not resolve CAT interruptions, which typically requires targeted sequencing methods, such as Sanger sequencing [@pmid:34635619].", "mechanism": "GoF", "mechanism_detail": "Polyglutamine expansions leading to gain of function [@genereviews:NBK1491].", "year": "1994 [@pmid:7842016]", @@ -551,7 +551,7 @@ "age_onset_max": 83.0, "typ_age_onset_min": 12.0, "typ_age_onset_max": 48.0, - "details": "Unaffected individuals are usually (82%) compound heterozygotes in the benign range [@genereviews:NBK1175]. Intermediate alleles show reduced penetrance, and exact distinction between intermediate and the lower end of the pathogenic range is unclear [@genereviews:NBK1175]. Expansions are frequently interrupted by ATCCT, ATCCC, ATTCC, ATTTCT, ATATTCT, or ATTCTTCT; interruptions of ATTGT, TTTCT, ATTTTCT, ATTCTCT, GTTTCT, CTTCT, and ATTCTAT have been noted [@pmid:36199580] as has the interruption ATGCT [@pmid:19234597]. The ATCCT interruption motif is associated with a higher prevalence of epileptic seizures [@pmid:24318420]. Different motif patterns and mixed motif ratios may influence age of onset and anticipation [@pmid:41229449]. One study suggests that alleles with completely pure ATTCT expansions are non-pathogenic, and that repeat interruptions such as ATTCC, are necessary to cause SCA10 [@pmid:36092952]", + "details": "Unaffected individuals are usually (82%) compound heterozygotes in the benign range [@genereviews:NBK1175]. Intermediate alleles show reduced penetrance, and exact distinction between intermediate and the lower end of the pathogenic range is unclear [@genereviews:NBK1175]. Expansions are frequently interrupted by ATCCT, ATCCC, ATTCC, ATTTCT, ATATTCT, or ATTCTTCT; interruptions of ATTGT, TTTCT, ATTTTCT, ATTCTCT, GTTTCT, CTTCT, and ATTCTAT have been noted [@pmid:36199580] as has the interruption ATGCT [@pmid:19234597]. The ATCCT interruption motif is associated with a higher prevalence of epileptic seizures [@pmid:24318420]. Different motif patterns and mixed motif ratios may influence age of onset and anticipation [@pmid:41229449]. One study suggests that alleles with completely pure ATTCT expansions are non-pathogenic, and that repeat interruptions such as ATTCC, are necessary to cause SCA10 [@pmid:36092952]. This expansion is commonly detected with RP-PCR and fragment analysis [@pmid:32160188]. Pathogenicity depends heavily on interruptions, so long-read sequencing approaches are useful to fully resolve structure [@pmid:26295943; @pmid:32160188].", "mechanism": "GoF", "mechanism_detail": "Transdominant mechanism theorized [@pmid:38467784].", "year": "2000 [@pmid:11017075]", @@ -617,7 +617,7 @@ "age_onset_max": 86.0, "typ_age_onset_min": 30.0, "typ_age_onset_max": 39.0, - "details": "Full penetrance of single alleles occurs at ~35 repeats [@genereviews:NBK1275; @pmid:37906407] and pathogenic expansions have been documented as large as 500 repeats [@pmid:12116207]. 33-34 length repeats are associated with reduced penetrance and later onset (age >50 years) [@genereviews:NBK1275]. Homozygous 31 repeat alleles may lead to recessive disease [@pmid:30533529], while a single 29-32 repeat is associated with increased ALS risk [@genereviews:NBK1275; @pmid:25285812; @pmid:32954321]. There is some evidence that all CAG-repeat expansions in ATXN2 may be a risk factor for ALS, regardless of length and interruptions [@pmid:39956874]. CAA interruptions have been observed which appear to stabilize the allele in transmission [@genereviews:NBK1275].", + "details": "Full penetrance of single alleles occurs at ~35 repeats [@genereviews:NBK1275; @pmid:37906407] and pathogenic expansions have been documented as large as 500 repeats [@pmid:12116207]. 33-34 length repeats are associated with reduced penetrance and later onset (age >50 years) [@genereviews:NBK1275]. Homozygous 31 repeat alleles may lead to recessive disease [@pmid:30533529], while a single 29-32 repeat is associated with increased ALS risk [@genereviews:NBK1275; @pmid:25285812; @pmid:32954321]. There is some evidence that all CAG-repeat expansions in ATXN2 may be a risk factor for ALS, regardless of length and interruptions [@pmid:39956874]. CAA interruptions have been observed which appear to stabilize the allele in transmission [@genereviews:NBK1275]. This expansion is commonly detected using RP-PCR with fragment analysis while southern blots are used to approximate size over 100 repeats [@geneReviews:NBK1275].", "mechanism": "GoF/LoF", "mechanism_detail": "Polyglutamine cytoplasmic aggregates leading to cellular apoptosis; RAN translation implicated [@genereviews:NBK1275].", "year": "1996 [@pmid:8896556]", @@ -683,7 +683,7 @@ "age_onset_max": 73.0, "typ_age_onset_min": 10.0, "typ_age_onset_max": 49.0, - "details": "Benign alleles range from 11-44 repeats [@pmid:37906407], with intermediate alleles (45-59) associated with incomplete penetrance and non-classic phenotypes [@genereviews:NBK1196]. The threshold between incomplete and full penetrance is unclear, but presumed to occur at ~60 repeats [@genereviews:NBK1196; @pmid:37906407]. The interruption CAA has been observed [@pmid:35245110]; AAG is present in hg38 reference sequence. The APOE ε4 allele appears to act as a disease modifier [@pmid:39731318]; GLS expansions may also function as disease modifiers [@pmid:39699045].", + "details": "Benign alleles range from 11-44 repeats [@pmid:37906407], with intermediate alleles (45-59) associated with incomplete penetrance and non-classic phenotypes [@genereviews:NBK1196]. The threshold between incomplete and full penetrance is unclear, but presumed to occur at ~60 repeats [@genereviews:NBK1196; @pmid:37906407]. The interruption CAA has been observed [@pmid:35245110]; AAG is present in hg38 reference sequence. The APOE ε4 allele appears to act as a disease modifier [@pmid:39731318]; GLS expansions may also function as disease modifiers [@pmid:39699045].These expansions are commonly sized by PCR fragment analysis or RP-PCR [geneReviews:NBK1196]. For very large expansions or interruptions, long read targeted sequencing has been used [@pmid:40890629]", "mechanism": "GoF", "mechanism_detail": "Polyglutamine expansion leading to gain of function; aggregated and mislocalized proteins in neurons [@pmid:36169768; @genereviews:NBK1196].", "year": "1994 [@pmid:7874163]", @@ -749,7 +749,7 @@ "age_onset_max": 65.0, "typ_age_onset_min": 4.0, "typ_age_onset_max": 48.0, - "details": "Benign alleles range from 4-27 [@pmid:37906407], with intermediate alleles ranging from premutations (28-33) to reduced penetrance (34-36) [@genereviews:NBK1256]. Interruptions observed include CAA [@pmid:35245110].", + "details": "Benign alleles range from 4-27 [@pmid:37906407], with intermediate alleles ranging from premutations (28-33) to reduced penetrance (34-36) [@genereviews:NBK1256]. Interruptions observed include CAA [@pmid:35245110]. srWGS cannot accurately detect repeat expansions in this locus. Expansions are usually detected by PCR fragment analysis or RP-PCR . Most normal and moderate pathogenic alleles can be sized exactly, but very large expansions may need long-read sequencing or southern blotting [geneReviews:NBK1256].", "mechanism": "GoF", "mechanism_detail": "Polyglutamine expansion leading to gain of function; toxic misfolded intermediated suspected [@genereviews:NBK1256; @pmid:18418675].", "year": "1996 [@pmid:8908515]", @@ -825,7 +825,7 @@ "age_onset_max": 76.0, "typ_age_onset_min": 20.0, "typ_age_onset_max": 49.0, - "details": "Two genes span the CTG/CAG repeat and are expressed in opposite directions: ATXN8, a nearly pure polyglutamine repeat protein in the CAG direction, and ATXN8OS, which is transcribed to a noncoding CUG repeat RNA [@pmid:16804541]. Reduced penetrance is found in alleles of all sizes, although penetrance appears higher at 71+ repeats and repeats at 50-70 appear less likely to result in disease [@genereviews:NBK1268; @pmid:20373340]. Roda et al. suggested that the ATXN8 or ATXN8OS gene should not be evaluated in isolation as a candidate gene for spinocerebellar degenerative disease [@pmid:28451643]. CCG/CGG interruptions in high-penetrance SCA8 families increase RAN translation and protein toxicity [@pmid:34632710]; Interruptions in CTG/CAG expansion by 1 or more CCG/CGG, CTA/TAG, CTC/GAG, CCA/TGG, or CTT/AAG trinucleotides have been observed in full-penetrance repeats [@pmid:16804541; @genereviews:NBK1268].", + "details": "Two genes span the CTG/CAG repeat and are expressed in opposite directions: ATXN8, a nearly pure polyglutamine repeat protein in the CAG direction, and ATXN8OS, which is transcribed to a noncoding CUG repeat RNA [@pmid:16804541]. Reduced penetrance is found in alleles of all sizes, although penetrance appears higher at 71+ repeats and repeats at 50-70 appear less likely to result in disease [@genereviews:NBK1268; @pmid:20373340]. Roda et al. suggested that the ATXN8 or ATXN8OS gene should not be evaluated in isolation as a candidate gene for spinocerebellar degenerative disease [@pmid:28451643]. CCG/CGG interruptions in high-penetrance SCA8 families increase RAN translation and protein toxicity [@pmid:34632710]; Interruptions in CTG/CAG expansion by 1 or more CCG/CGG, CTA/TAG, CTC/GAG, CCA/TGG, or CTT/AAG trinucleotides have been observed in full-penetrance repeats [@pmid:16804541; @genereviews:NBK1268]. Short read genome sequencing can underestimate expansion size [@pmid:40015980; geneReviews:NBK1268]. RP-PCR detects large expansions while long read sequencing and southern blotting can approximate size [geneReviews:NBK1268].", "mechanism": "GoF", "mechanism_detail": "Polyglutamine/toxic gain-of-function [@omim:608768; @genereviews:NBK1268].", "year": "1999 [@pmid:10192387]", @@ -967,7 +967,7 @@ "age_onset_max": 91.0, "typ_age_onset_min": 50.0, "typ_age_onset_max": 64.0, - "details": "FTD and ALS form a clinical spectrum [@pmid:37388914; @pmid:22406228]. The clinical ranges of the C9orf72 locus remain ambiguous [@stripy:C9ORF72][@pmid:41951733]: most healthy controls have alleles up to 24 repeats [@pmid:28319737] yet 24-30 repeats are associated with ALS [@pmid:31315673] and while 60 repeats is frequently used as a threshold for uncertain alleles, the exact threshold of pathogenicity remains unclear [@genereviews:NBK268647; @pmid:38099605]. Repeats of 80 motifs and lower appear to have delayed onset for any phenotype [@pmid:28319737]. >250 repeats are associated with a full FTD/ALS disease state [@pmid:31048495], but pathogenic alleles can range from 30 to more than 4000 repeats [@pmid:38099605; @pmid:39709476]. Somatic C9orf72 repeat expansions may emerge de novo in CNS tissue from alleles below the pathogenic range, potentially contributing to sporadic ALS/FTD [@pmid:41986690]. Penetrance appears to also be age-dependent, with environmental factors and specific phenotypes associated with sex and age at onset [@pmid:28522837]. Methylation appears to increase with expansion length and age [@pmid:39709476]. ALS caused by repeat expansions in C9orf72 generally has an earlier disease onset and faster progression than other ALS presentations [@pmid:39226712]. Undiagnosed C9orf72 repeat expansion carriers exhibit elevated neurofilament light chain levels and reduced thalamic volume, consistent with findings observed across other loci [@pmid:41951733]. ", + "details": "FTD and ALS form a clinical spectrum [@pmid:37388914; @pmid:22406228]. The clinical ranges of the C9orf72 locus remain ambiguous [@stripy:C9ORF72][@pmid:41951733]: most healthy controls have alleles up to 24 repeats [@pmid:28319737] yet 24-30 repeats are associated with ALS [@pmid:31315673] and while 60 repeats is frequently used as a threshold for uncertain alleles, the exact threshold of pathogenicity remains unclear [@genereviews:NBK268647; @pmid:38099605]. Repeats of 80 motifs and lower appear to have delayed onset for any phenotype [@pmid:28319737]. >250 repeats are associated with a full FTD/ALS disease state [@pmid:31048495], but pathogenic alleles can range from 30 to more than 4000 repeats [@pmid:38099605; @pmid:39709476]. Somatic C9orf72 repeat expansions may emerge de novo in CNS tissue from alleles below the pathogenic range, potentially contributing to sporadic ALS/FTD [@pmid:41986690]. Penetrance appears to also be age-dependent, with environmental factors and specific phenotypes associated with sex and age at onset [@pmid:28522837]. Methylation appears to increase with expansion length and age [@pmid:39709476]. ALS caused by repeat expansions in C9orf72 generally has an earlier disease onset and faster progression than other ALS presentations [@pmid:39226712]. Undiagnosed C9orf72 repeat expansion carriers exhibit elevated neurofilament light chain levels and reduced thalamic volume, consistent with findings observed across other loci [@pmid:41951733].These expansions are usually detected with bidirectional RP-PCR [@pmid:21944778]. Large pathogenic expansions are difficult to size exactly by PCR, so Southern blot provides approximate sizing [@pmid:23566336], while long-read sequencing can provide more direct sizing and sequence characterization [@pmid:30126445]. ", "mechanism": "Ambiguous", "mechanism_detail": "The HRE forms DNA and RNA G-quadruplexes with distinct structures and promotes RNA/DNA hybrids (R-loops). The structural polymorphism causes a repeat length-dependent accumulation of transcripts aborted in the HRE region [@omim:105500]. Addiitonal mechanisms theorized include protein loss of function and RNA gain of function [@pmid:37847372]. Multiple cell types in the prefrontal cortex, including oligodendrocytes, microglia, astrocytes, and neurons, appear impacted during pathogenesis [@pmid:39999167].", "year": "2011 [@pmid:21944778]", @@ -1033,7 +1033,7 @@ "age_onset_max": 73.0, "typ_age_onset_min": 43.0, "typ_age_onset_max": 52.0, - "details": "The intermediate range (19-20 motifs) [@pmid:39996131; @genereviews:NBK1140] can be associated with a premutation, reduced penetrance, atypical phenotype, or a disease state when homozygous [@genereviews:NBK1140]. When the longer allele is > 22 motifs, the short allele does not play a role in pathogenicity/age of onset, but expansions of 21-22 motifs have age of onset influenced by the smaller allele [@pmid:39996131]. For individuals with a longest allele of 19-20, the presence of a second allele of 19-20 likely increases the risk of developing SCA6 [@pmid:39996131]. Undiagnosed carriers of pathogenic range repeats in CACNA1A exhibit significant cerebellar volume loss and elevated neurofilament light chain levels before clinical diagnosis, consistent with findings observed across other loci [@pmid:41951733].", + "details": "The intermediate range (19-20 motifs) [@pmid:39996131; @genereviews:NBK1140] can be associated with a premutation, reduced penetrance, atypical phenotype, or a disease state when homozygous [@genereviews:NBK1140]. When the longer allele is > 22 motifs, the short allele does not play a role in pathogenicity/age of onset, but expansions of 21-22 motifs have age of onset influenced by the smaller allele [@pmid:39996131]. For individuals with a longest allele of 19-20, the presence of a second allele of 19-20 likely increases the risk of developing SCA6 [@pmid:39996131]. Undiagnosed carriers of pathogenic range repeats in CACNA1A exhibit significant cerebellar volume loss and elevated neurofilament light chain levels before clinical diagnosis, consistent with findings observed across other loci [@pmid:41951733]. Expansions are often detected using PCR fragment analysis [@pmid:35573049].", "mechanism": "GoF/LoF", "mechanism_detail": "Polyglutamine expansions associated increased expression of altered product leading to impaired gene binding and transcription factor function as well as cellular toxicity [@genereviews:NBK1140].", "year": "1997 [@pmid:8988170]", @@ -1236,7 +1236,7 @@ "age_onset_max": 73.0, "typ_age_onset_min": 28.0, "typ_age_onset_max": 56.0, - "details": "Detailed overview of disease locus through 2024 by Rimoldi et al [@pmid:39643839]. ≤30 uninterrupted CCTG repeats or 11-26 CCTG repeats with GCTC/TCTG interruptions are considered benign; 27-29 repeats with interruptions have currently unknown significance, ~30-~54 repeats are considered premutations, ~55-74 repeats are premutations with possible reduced penetrance, and >74 repeat alleles are considered pathogenic [@genereviews:NBK1466]. Penetrance is age-dependent and approaches 100%. Locus structure is (TG)n(TCTG)n(CCTG)n. CCTG expansion causes DM2 but the other repeat units are also variable. Interruptions include GCTG/TCTG/GGCT [@pmid:35245110]. The effect of the (TCTG)n repeat remains to be determined, but it is potentially common in the repeat structure of this locus [@pmid:39703464].", + "details": "Detailed overview of disease locus through 2024 by Rimoldi et al [@pmid:39643839]. ≤30 uninterrupted CCTG repeats or 11-26 CCTG repeats with GCTC/TCTG interruptions are considered benign; 27-29 repeats with interruptions have currently unknown significance, ~30-~54 repeats are considered premutations, ~55-74 repeats are premutations with possible reduced penetrance, and >74 repeat alleles are considered pathogenic [@genereviews:NBK1466]. Penetrance is age-dependent and approaches 100%. Locus structure is (TG)n(TCTG)n(CCTG)n. CCTG expansion causes DM2 but the other repeat units are also variable. Interruptions include GCTG/TCTG/GGCT [@pmid:35245110]. The effect of the (TCTG)n repeat remains to be determined, but it is potentially common in the repeat structure of this locus [@pmid:39703464]. Because these expansions can be very large and complex, bidirectional RP-PCR is used to detect expansions [geneReviews:NBK1466], while Southern blot can estimate size [@pmid:34234810]. Long read sequencing is used to resolve size and internal structure [@pmid:36018009].", "mechanism": "GoF", "mechanism_detail": "Aberrant splicing, RAN translation [@pmid:22140091; @pmid:38467784]. Proposed pathogenisis contributions include nucleolar stress, autophagy dysregulation, and stress granule formation [@pmid:42003432].", "year": "2001 [@pmid:11486088]", @@ -1738,7 +1738,7 @@ "age_onset_max": 74.0, "typ_age_onset_min": 10.0, "typ_age_onset_max": 30.0, - "details": "Overview of disease locus through 2024, including largest pathogenic allele of 4,000, described in Rimoldi et al review [@pmid:39643839]. Intermediate alleles (35-49) associated with premutation [@genereviews:NBK1165]. 3%-8% of DM1 expansions contain interrupting variant repeats such as CCG and CGG, associated with later onset and milder phenotype; the variant repeat GCGGCA has also been reported [@pmid:32851192; @pmid:39710066]. In another study, interruptions of the CTG repeat with CCG, GGC, CTC or CAG motifs are estimated to occur in 3-11% of DM1 patients [@pmid:35741732]. Expansions within gene ZNF850 may function as DM1 modifiers [@pmid:39679849].", + "details": "Overview of disease locus through 2024, including largest pathogenic allele of 4,000, described in Rimoldi et al review [@pmid:39643839]. Intermediate alleles (35-49) associated with premutation [@genereviews:NBK1165]. 3%-8% of DM1 expansions contain interrupting variant repeats such as CCG and CGG, associated with later onset and milder phenotype; the variant repeat GCGGCA has also been reported [@pmid:32851192; @pmid:39710066]. In another study, interruptions of the CTG repeat with CCG, GGC, CTC or CAG motifs are estimated to occur in 3-11% of DM1 patients [@pmid:35741732]. Expansions within gene ZNF850 may function as DM1 modifiers [@pmid:39679849]. Flanking PCR detects alleles up to ~150 repeats while RP-PCR may detect missed alleles [@genereviews:NBK1165; @pmid:24795756]. Southern blotting can approximate the size of large expansions [@pmid:22643181] while long read sequencing resolves repeat size and structure [@pmid:41974889].", "mechanism": "GoF", "mechanism_detail": "RNA gain-of-function: RNA gelation leading to misregulation of alternative splicing [@pmid:36169768]. Expanded-repeat RNA sequesters the muscleblind-like (MBNL) family of RNA-binding proteins as part of the disruption of pre-mRNA processing, contributing to cardiac phenotypes [@pmid:39932794]. Loss of MBNL proteins has been linked to mis-splicing of Autism spectrum-risk genes such as SCN2A, ANK2, and SHANK2, possibly leading to Autism-related traits [@pmid:40259070]. Evidence suggests that disulfide bond-dependent MBNL1/MBNL2 dimerization maintains toxic RNA foci [@pmid:41929128].", "year": "1992 [@pmid:1310900]", @@ -1936,7 +1936,7 @@ "age_onset_max": 87.0, "typ_age_onset_min": 42.0, "typ_age_onset_max": 70.0, - "details": "Higher repeat size has been associated with earlier age of onset [@pmid:39263992], but this correlation is not significant in all cohorts [@pmid:42096001]. The 250-300 repeats range is linked to incomplete penetrance and >300 repeats with complete penetrance in some studies and resources [@genereviews:NBK599589; @pmid:37399286; @pmid:39227614]. However, our thresholds are taken from suggestions made by Mohren et al upon evaluation of 169 cases and 802 controls; the authors propose lower thresholds based on pathogenic cases of shorter pure repeats [@pmid:39227614]. Additionally, this study suggests that benign motifs may disrupt the formation of secondary structures in DNA/RNA, leading to reduced pathogenicity. The effects of interruptions on penetrance and onset have been demonstrated in patients, with uninterrupted expansions apparently necessary for disease [@pmid:40007153]. Interruptions of GAG, GAAGGA, GAAGAAAGAA, GAAAAGAAGAAGGAAGAAGGAA, GAAAAGAAGAAGGAA, and GCAGAAGAAGAAGAA have been reported [@pmid:40379261]. Variation in flanking regions appears to correlate with repeat size [@pmid:39227614; @pmid:38937606]. Intermediate alleles may increase ataxia susceptibility in combination with other factors or be associated with a phenotypic spectrum (multiple system atrophy) [@pmid:39227614; @pmid:39604554]. Finally, a complex (TTC/TGC) ≥300 repeat expansion has been associated as a risk factor for Parkinson's disease [@pmid:41327893; @pmid:41277530].", + "details": "Higher repeat size has been associated with earlier age of onset [@pmid:39263992], but this correlation is not significant in all cohorts [@pmid:42096001]. The 250-300 repeats range is linked to incomplete penetrance and >300 repeats with complete penetrance in some studies and resources [@genereviews:NBK599589; @pmid:37399286; @pmid:39227614]. However, our thresholds are taken from suggestions made by Mohren et al upon evaluation of 169 cases and 802 controls; the authors propose lower thresholds based on pathogenic cases of shorter pure repeats [@pmid:39227614]. Additionally, this study suggests that benign motifs may disrupt the formation of secondary structures in DNA/RNA, leading to reduced pathogenicity. The effects of interruptions on penetrance and onset have been demonstrated in patients, with uninterrupted expansions apparently necessary for disease [@pmid:40007153]. Interruptions of GAG, GAAGGA, GAAGAAAGAA, GAAAAGAAGAAGGAAGAAGGAA, GAAAAGAAGAAGGAA, and GCAGAAGAAGAAGAA have been reported [@pmid:40379261]. Variation in flanking regions appears to correlate with repeat size [@pmid:39227614; @pmid:38937606]. Intermediate alleles may increase ataxia susceptibility in combination with other factors or be associated with a phenotypic spectrum (multiple system atrophy) [@pmid:39227614; @pmid:39604554]. Finally, a complex (TTC/TGC) ≥300 repeat expansion has been associated as a risk factor for Parkinson's disease [@pmid:41327893; @pmid:41277530]. These expansions are not reliably detected by short read genome or exome sequencing [geneReviews:NBK599589]. long-range PCR and bidirectional RP-PCR are used for detecting expansions [geneReviews:NBK599589; @pmid:36516086]. Long read sequencing is used to determine repeat structure and purity [geneReviews:NBK599589; @pmid:36516086].", "mechanism": "LoF", "mechanism_detail": "Reduced transcript 2 [@pmid:36516086].", "year": "2023 [@pmid:36493768]", @@ -2002,7 +2002,7 @@ "age_onset_max": 78.0, "typ_age_onset_min": 1.0, "typ_age_onset_max": 65.0, - "details": "Intermediate or 'gray zone' occur at 45-54 alleles and may be unstable enough to expand into the premutation range, as well as associate with parkinsonism [@pmid:32463542; @genereviews:NBK1384]. FXTAS/POI occurs at 55-200 repeats, FXS >200, late onset; AGG and CTG interruptions documented [@genereviews:NBK1384; @pmid:29868108]. Women with the premutation have been reported showing episodic memory deficits, similar to those seen in AD [@pmid:41555826]. AGG interruptions are frequently reported in all associated diseases and appear to stabilize alleles; the length of the longest pure stretch predicts repeat instability [@pmid:7987398]. Elevated POI risk was observed starting at 36 repeats, increasing continuously with repeat length [@pmid:42001465].", + "details": "Intermediate or 'gray zone' occur at 45-54 alleles and may be unstable enough to expand into the premutation range, as well as associate with parkinsonism [@pmid:32463542; @genereviews:NBK1384]. FXTAS/POI occurs at 55-200 repeats, FXS >200, late onset; AGG and CTG interruptions documented [@genereviews:NBK1384; @pmid:29868108]. Women with the premutation have been reported showing episodic memory deficits, similar to those seen in AD [@pmid:41555826]. AGG interruptions are frequently reported in all associated diseases and appear to stabilize alleles; the length of the longest pure stretch predicts repeat instability [@pmid:7987398]. Elevated POI risk was observed starting at 36 repeats, increasing continuously with repeat length [@pmid:42001465]. Modern PCR techniques detect virtually all sizes of FMR1 expansions, while RP-PCR detects AGG interspersions. Southern blot analysis approximates size and indicates methylation status [GeneReviews: NBK1384]. Long-read sequencing is ideal for full characterization of repeat size, interruptions, methylation, and mosaicism. [@pmid:29868108; @pmid:31740840].", "mechanism": "LoF/GoF", "mechanism_detail": "Loss of function via transcriptional silencing in FXS, RNA gain of function in FXTAS/FXPOI [@pmid:16205714; @pmid:36169768]. PRKGG appears to modulate neurotoxicity [@pmid:41507195].", "year": "1992 [@pmid:1605194]; causative gene discovered in 1991 [@pmid:1710175]", @@ -2134,7 +2134,7 @@ "age_onset_max": 80.0, "typ_age_onset_min": 10.0, "typ_age_onset_max": 15.0, - "details": "96% of FA patients have biallelic GAA expansions in intron 1 (compared to compound heterozygous with another mutation type), in which the reference allele is conventionally 5-33 repeats [@genereviews:NBK1281]. Intermediate alleles (34-55) are associated with premutations, but may lead to disease as exact pathogenicity/penetrance thresholds have not been demarcated [@genereviews:NBK1281]. The expanded repeats can interrupted either with GAAGAG, GAAGGA, or GAAGAAAA sequences, leading to differential phenotypes [@pmid:11748752]. Allele size is correlated with disease severity and inversely correlated to age of onset, and expansions can reach 1700 repeats [@pmid:8815938].", + "details": "96% of FA patients have biallelic GAA expansions in intron 1 (compared to compound heterozygous with another mutation type), in which the reference allele is conventionally 5-33 repeats [@genereviews:NBK1281]. Intermediate alleles (34-55) are associated with premutations, but may lead to disease as exact pathogenicity/penetrance thresholds have not been demarcated [@genereviews:NBK1281]. The expanded repeats can interrupted either with GAAGAG, GAAGGA, or GAAGAAAA sequences, leading to differential phenotypes [@pmid:11748752]. Allele size is correlated with disease severity and inversely correlated to age of onset, and expansions can reach 1700 repeats [@pmid:8815938]. RP-PCR detects expansions <200 repeats while long range PCR detects expansions >200 repeats[@pmid:35595154]. Long-read sequencing has been used to size large alleles and determine sequence organization [@pmid:35595154].", "mechanism": "LoF", "mechanism_detail": "Loss of function via transcriptional silencing [@pmid:16205714; @pmid:36169768].", "year": "1996 [@pmid:8596916]", @@ -2677,7 +2677,7 @@ "age_onset_max": 85.0, "typ_age_onset_min": 35.0, "typ_age_onset_max": 44.0, - "details": "27-35 motifs are unstable/premutations, while 36-39 motifs are associated with reduced penetrance and mild phenotypes [@pmid:39572770], and alleles over 40 repeats are typically fully penetrant [@genereviews:NBK1305]. >60 motifs associated with onset age <20 years [@genereviews:NBK1305]. Only CAG expansions are considered pathogenic, but interruptions impact pathogenicity (CAA) [@pmid:35245110; @pmid:39673793]. Only fathers with premutations are considered at risk of transmitting pathogenic alleles [@pmid:19507258]. CAG repeat size 21-35 may continuously modulate brain structure and psychiatric disease risk in an age-dependent manner [@pmid:39572770] [@doi:https://doi.org/10.64898/2026.05.08.26352223]. Somatic expansion of HTT CAG repeats in vulnerable tissues is proposed to contribute to age-dependent onset and neurodegeneration, with greater repeat instability associated with earlier disease onset [@pmid:41926793; @pmid:39824182]. Undiagnosed carriers of premutation and pathogenic HTT expansions, exhibit reduced striatal brain volumes and elevated neurofilament light chain levels before clinical diagnosis, consistent with findings observed across other loci [@pmid:41951733].", + "details": "27-35 motifs are unstable/premutations, while 36-39 motifs are associated with reduced penetrance and mild phenotypes [@pmid:39572770], and alleles over 40 repeats are typically fully penetrant [@genereviews:NBK1305]. >60 motifs associated with onset age <20 years [@genereviews:NBK1305]. Only CAG expansions are considered pathogenic, but interruptions impact pathogenicity (CAA) [@pmid:35245110; @pmid:39673793]. Only fathers with premutations are considered at risk of transmitting pathogenic alleles [@pmid:19507258]. CAG repeat size 21-35 may continuously modulate brain structure and psychiatric disease risk in an age-dependent manner [@pmid:39572770] [@doi:https://doi.org/10.64898/2026.05.08.26352223]. Somatic expansion of HTT CAG repeats in vulnerable tissues is proposed to contribute to age-dependent onset and neurodegeneration, with greater repeat instability associated with earlier disease onset [@pmid:41926793; @pmid:39824182]. Undiagnosed carriers of premutation and pathogenic HTT expansions, exhibit reduced striatal brain volumes and elevated neurofilament light chain levels before clinical diagnosis, consistent with findings observed across other loci [@pmid:41951733]. PCR methods can detect expansions up to ~115 repeats, but very large expansions may require TP-PCR or Southern blot analysis. [@genereviews:NBK1305]. long-read sequencing has been used to resolve interruptions and validate sizing [@pmid:41512049].", "mechanism": "GoF/LoF", "mechanism_detail": "While the primary pathogenic mechanism is gain of function of the protein product, pathogenesis is complex and multifactorial [@pmid:27940602]. Reduced SCN4B expression in striatal neurons has been implicated as a modifier of HD-associated phenotype severity, potentially contributing to dysfunction in motor associated striatal neuronal populations [@pmid:41959367].", "year": "1993 [@pmid:8458085]", @@ -3230,7 +3230,7 @@ "age_onset_max": 67.0, "typ_age_onset_min": 40.0, "typ_age_onset_max": 60.0, - "details": "Benign alleles range from 3-14 repeats and pathogenic alleles (650+ repeats) appear fully penetrant; the significance of intermediate alleles has yet to be elucidated [@pmid:25101480]. Interruptions documented: GGCTG, GGCCCTG, GGCCG, and GGCCTTG [@pmid:37051597].", + "details": "Benign alleles range from 3-14 repeats and pathogenic alleles (650+ repeats) appear fully penetrant; the significance of intermediate alleles has yet to be elucidated [@pmid:25101480]. Interruptions documented: GGCTG, GGCCCTG, GGCCG, and GGCCTTG [@pmid:37051597]. These expansions are often detected using RP-PCR with fragment analysis [@pmid:21683323]. Long read sequencing is used for accurate sizing of these alleles [@pmid:37051597].", "mechanism": "GoF", "mechanism_detail": "Toxic protein gain-of-function, RAN translation [@omim:614153].", "year": "2011 [@pmid:21683323]", @@ -3306,7 +3306,7 @@ "age_onset_max": 78.0, "typ_age_onset_min": 30.0, "typ_age_onset_max": 70.0, - "details": "Benign alleles are less than 38 repeats, while pathogenic alleles contain 66+ repeats [@genereviews:NBK535148]. Intermediate alleles may be associated with a phenotypic spectrum, and even pathogenic cases can have variable phenotype [@pmid:39055960; @pmid:39496005]: NOTCH2NLC expansions have been linked Alzheimer's disease and Parkinson's disease, leading to a potential role in NIID-related disorders [@pmid:31178126]. Age of onset inversely related to allele size [@pmid:38377026]. Motif variation in controls: (AGG)(CGG)n(AGG)0-3(CGG)0-2. GGA and AGC interruptions may influence phenotype [@pmid:34718964]. Interruptions documented: GGA, GGG [@pmid:35245110]; ACCGAGAAGATGCCCGCCCTGC interruption proposed but not confirmed [@pmid:38467784]. Detection may be challenging due to parology between genes: C253572.1, NOTCH2, NOTCH2NL, NBPF14, NBPF19.", + "details": "Benign alleles are less than 38 repeats, while pathogenic alleles contain 66+ repeats [@genereviews:NBK535148]. Intermediate alleles may be associated with a phenotypic spectrum, and even pathogenic cases can have variable phenotype [@pmid:39055960; @pmid:39496005]: NOTCH2NLC expansions have been linked Alzheimer's disease and Parkinson's disease, leading to a potential role in NIID-related disorders [@pmid:31178126]. Age of onset inversely related to allele size [@pmid:38377026]. Motif variation in controls: (AGG)(CGG)n(AGG)0-3(CGG)0-2. GGA and AGC interruptions may influence phenotype [@pmid:34718964]. Interruptions documented: GGA, GGG [@pmid:35245110]; ACCGAGAAGATGCCCGCCCTGC interruption proposed but not confirmed [@pmid:38467784]. Detection may be challenging due to parology between genes: C253572.1, NOTCH2, NOTCH2NL, NBPF14, NBPF19. Short-read sequencing is not reliable for definitive sizing of large or complex expansions [@pmid:34034831]. RP-PCR can usually screen for expansions [@pmid:37371433], but long-read sequencing is the best method for determining size, structure, and methylation [@pmid:34774111].", "mechanism": "GoF", "mechanism_detail": "Polyglycine expansion; may relate to methylation or RNA pathogenicity [@omim:603472; @pmid:36169768; @pmid:38467784]. Proposed mechanisms include toxic uN2CpolyG/polyglycine aggregation, RNA pathogenicity, impaired autophagy, mitochondrial dysfunction, and innate immune activation [@pmid:42058219]. The polyglycine-containing protein sequesters a key subunit of transcription factor NF-κB in nuclear inclusions, leading to impaired autophagy [@pmid:39920690]. Tau pathology is evident, changes in p-tau levels and tau deposition have been reported [@pmid:41539185]. Expanded polyG proteins also induce nucleolar stress through interaction with NPM1 and rRNA. This disrupts ribosomal homeostasis and alters 3D chromatin organization through reduced CTCF/RAD21 expression [@pmid:41942455].", "year": "2019 [@pmid:31332380]", @@ -4077,7 +4077,7 @@ "age_onset_max": 76.0, "typ_age_onset_min": 36.0, "typ_age_onset_max": 52.0, - "details": "Disease is caused by an insertion of a pathogenic motif, although motif presence is variable and can expand up to 200 repeats without apparently causing a phenotype [@genereviews:NBK564656]. Pathogenic expansions (ranging from 400-2750 pathogenic motifs) may be flanked by other motifs [@genereviews:NBK564656]. For example, (AAAGG)10-25(AAGGG)exp(AAAGG)4-6 [@pmid:32851396]. Motif heterogeneity is common in unaffected individuals [@genereviews:NBK564656], and motif associations are described by Delforge et al [@pmid:38627134]. The pathogenic size threshold appears to differ for the AAAGG motif: AAAGG expansions >= 600 repeats have been observed in CANVAS patients (vs 400 with established pathogenic motif AAGGG), while ~100-380 AAAGG repeats were found in unaffected controls [@pmid:37450567]. Length appears to impact age of onset and disease severity, with particular impact from the smaller allele [@doi:10.1136/jnnp-2024-ABN.259]. Phenotypic spectrum may include Parkinsonism [@pmid:39833204], chronic cough [@pmid:39811557], idiopathic sensory neuropathy, small fiber neuropathy, and sensorimotor neuropathy [@pmid:41964406].", + "details": "Disease is caused by an insertion of a pathogenic motif, although motif presence is variable and can expand up to 200 repeats without apparently causing a phenotype [@genereviews:NBK564656]. Pathogenic expansions (ranging from 400-2750 pathogenic motifs) may be flanked by other motifs [@genereviews:NBK564656]. For example, (AAAGG)10-25(AAGGG)exp(AAAGG)4-6 [@pmid:32851396]. Motif heterogeneity is common in unaffected individuals [@genereviews:NBK564656], and motif associations are described by Delforge et al [@pmid:38627134]. The pathogenic size threshold appears to differ for the AAAGG motif: AAAGG expansions >= 600 repeats have been observed in CANVAS patients (vs 400 with established pathogenic motif AAGGG), while ~100-380 AAAGG repeats were found in unaffected controls [@pmid:37450567]. Length appears to impact age of onset and disease severity, with particular impact from the smaller allele [@doi:10.1136/jnnp-2024-ABN.259]. Phenotypic spectrum may include Parkinsonism [@pmid:39833204], chronic cough [@pmid:39811557], idiopathic sensory neuropathy, small fiber neuropathy, and sensorimotor neuropathy [@pmid:41964406]. expansions are suggested by failure of flanking PCR and a pathogenic RP-PCR sawtooth pattern, but biallelic confirmation and sizing rely on Southern blotting [@genereviews:NBK564656]. Because of the variable and complex motif structure, long read sequencing or optical genome mapping are useful resolving this expansion [@pmid:37892228; @pmid:37450567].", "mechanism": "LoF", "mechanism_detail": "LoF; exact mechanism unknown [@pmid:38467784].", "year": "2019 [@pmid:31230722]", @@ -4635,7 +4635,7 @@ "age_onset_max": 62.0, "typ_age_onset_min": 19.0, "typ_age_onset_max": 48.0, - "details": "Benign range is 25-40 repeats, pathogenic range is 49+ repeats (largest to date 66 motifs, with mild correlation between size and age of onset), and intermediate alleles (41-48 repeats) are associated with reduced penetrance and potentially milder phenotypes [@genereviews:NBK1438]. Huntington's disease like phenotype [@pmid:12805114]. CAA CAG CAA interruption is seen in all alleles stably transmitted across generations [@genereviews:NBK1438;@pmid:35245110].", + "details": "Benign range is 25-40 repeats, pathogenic range is 49+ repeats (largest to date 66 motifs, with mild correlation between size and age of onset), and intermediate alleles (41-48 repeats) are associated with reduced penetrance and potentially milder phenotypes [@genereviews:NBK1438]. Huntington's disease like phenotype [@pmid:12805114]. CAA CAG CAA interruption is seen in all alleles stably transmitted across generations [@genereviews:NBK1438;@pmid:35245110]. SRS detects some expansions but fails to size them beyond 250 bp. [@pmid:37906407] . PCR amplification may detect expansions of 66 or fewer [@genereviews:NBK1438].", "mechanism": "LoF/GoF", "mechanism_detail": "Polyglutamine expansion leading to transcriptional dysregulation [@pmid:35053321].", "year": "1999 [@pmid:10484774]", From 72e121a5437cb2450314b0352aff82bb1e5bf2b2 Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Wed, 3 Jun 2026 14:50:38 -0600 Subject: [PATCH 06/29] Various spelling/grammar fixes throughout the file --- data/STRchive-loci.json | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 10ee56ff..02559d7f 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -355,7 +355,7 @@ "typ_age_onset_max": null, "details": "ARX expansions [@genereviews:NBK535148] result in a phenotypic spectrum of conditions including Partington syndrome [@omim:309510], Early Infantile Epileptic Encephalopathy [@omim:308350], Agenesis of Corpus Callosum with Abnormal Genitalia [@omim:300004], and X-Linked Lissencephaly with Ambiguous Genitalia [@omim:300215], described in the literature [@pmid:26029707; @pmid:20506206].", "mechanism": "LoF", - "mechanism_detail": "Polyalanine expansions lead to reduction in protein product through unclear mechanism [@pmid:36169768; @pmid:38467784]. Apparent aggregation and mis-localisation of mutan protein, increased with expansion length [@genereviews:NBK51932].", + "mechanism_detail": "Polyalanine expansions lead to reduction in protein product through unclear mechanism [@pmid:36169768; @pmid:38467784]. Apparent aggregation and mis-localisation of mutant protein, increased with expansion length [@genereviews:NBK51932].", "year": "2002 [@pmid:11889467]", "location_in_gene": "Coding Exon 2, aa 144-155", "gene_strand": "-", @@ -479,7 +479,7 @@ "disease_description": "Spinocerebellar ataxia type 1 (SCA1) is a subtype of type I autosomal dominant cerebellar ataxia (ADCA type I) characterized by dysarthria, writing difficulties, limb ataxia, and commonly nystagmus and saccadic abnormalities [@mondo:0008119].", "hpo_terms": null, "prevalence": "1.5/100000", - "prevalence_details": "1-2/100,000. Cases have been reported worldwide, although prevalences varies by ancestry/ethnicity [@genereviews:NBK1184].", + "prevalence_details": "1-2/100,000. Cases have been reported worldwide, although prevalence varies by ancestry/ethnicity [@genereviews:NBK1184].", "age_onset": "Typical: 20-39 [@url:https://www.uptodate.com/contents/autosomal-dominant-spinocerebellar-ataxias]; Range: 6 [@pmid:3165612] - 63 [@pmid:8825276].", "age_onset_min": 6.0, "age_onset_max": 63.0, @@ -749,7 +749,7 @@ "age_onset_max": 65.0, "typ_age_onset_min": 4.0, "typ_age_onset_max": 48.0, - "details": "Benign alleles range from 4-27 [@pmid:37906407], with intermediate alleles ranging from premutations (28-33) to reduced penetrance (34-36) [@genereviews:NBK1256]. Interruptions observed include CAA [@pmid:35245110]. srWGS cannot accurately detect repeat expansions in this locus. Expansions are usually detected by PCR fragment analysis or RP-PCR . Most normal and moderate pathogenic alleles can be sized exactly, but very large expansions may need long-read sequencing or southern blotting [geneReviews:NBK1256].", + "details": "Benign alleles range from 4-27 [@pmid:37906407], with intermediate alleles ranging from premutations (28-33) to reduced penetrance (34-36) [@genereviews:NBK1256]. Interruptions observed include CAA [@pmid:35245110]. srWGS cannot accurately detect repeat expansions in this locus. Expansions are usually detected by PCR fragment analysis or RP-PCR. Most normal and moderate pathogenic alleles can be sized exactly, but very large expansions may need long-read sequencing or southern blotting [geneReviews:NBK1256].", "mechanism": "GoF", "mechanism_detail": "Polyglutamine expansion leading to gain of function; toxic misfolded intermediated suspected [@genereviews:NBK1256; @pmid:18418675].", "year": "1996 [@pmid:8908515]", @@ -825,7 +825,7 @@ "age_onset_max": 76.0, "typ_age_onset_min": 20.0, "typ_age_onset_max": 49.0, - "details": "Two genes span the CTG/CAG repeat and are expressed in opposite directions: ATXN8, a nearly pure polyglutamine repeat protein in the CAG direction, and ATXN8OS, which is transcribed to a noncoding CUG repeat RNA [@pmid:16804541]. Reduced penetrance is found in alleles of all sizes, although penetrance appears higher at 71+ repeats and repeats at 50-70 appear less likely to result in disease [@genereviews:NBK1268; @pmid:20373340]. Roda et al. suggested that the ATXN8 or ATXN8OS gene should not be evaluated in isolation as a candidate gene for spinocerebellar degenerative disease [@pmid:28451643]. CCG/CGG interruptions in high-penetrance SCA8 families increase RAN translation and protein toxicity [@pmid:34632710]; Interruptions in CTG/CAG expansion by 1 or more CCG/CGG, CTA/TAG, CTC/GAG, CCA/TGG, or CTT/AAG trinucleotides have been observed in full-penetrance repeats [@pmid:16804541; @genereviews:NBK1268]. Short read genome sequencing can underestimate expansion size [@pmid:40015980; geneReviews:NBK1268]. RP-PCR detects large expansions while long read sequencing and southern blotting can approximate size [geneReviews:NBK1268].", + "details": "Two genes span the CTG/CAG repeat and are expressed in opposite directions: ATXN8, a nearly pure polyglutamine repeat protein in the CAG direction, and ATXN8OS, which is transcribed to a noncoding CUG repeat RNA [@pmid:16804541]. Reduced penetrance is found in alleles of all sizes, although penetrance appears higher at 71+ repeats and repeats at 50-70 appear less likely to result in disease [@genereviews:NBK1268; @pmid:20373340]. Roda et al. suggested that the ATXN8 or ATXN8OS gene should not be evaluated in isolation as a candidate gene for spinocerebellar degenerative disease [@pmid:28451643]. CCG/CGG interruptions in high-penetrance SCA8 families increase RAN translation and protein toxicity [@pmid:34632710]; Interruptions in CTG/CAG expansion by 1 or more CCG/CGG, CTA/TAG, CTC/GAG, CCA/TGG, or CTT/AAG trinucleotides have been observed in full-penetrance repeats [@pmid:16804541; @genereviews:NBK1268]. Short read genome sequencing can underestimate expansion size [@pmid:40015980; geneReviews:NBK1268]. RP-PCR detects large expansions while long read sequencing and southern blotting can approximate size [geneReviews:NBK1268].", "mechanism": "GoF", "mechanism_detail": "Polyglutamine/toxic gain-of-function [@omim:608768; @genereviews:NBK1268].", "year": "1999 [@pmid:10192387]", @@ -967,7 +967,7 @@ "age_onset_max": 91.0, "typ_age_onset_min": 50.0, "typ_age_onset_max": 64.0, - "details": "FTD and ALS form a clinical spectrum [@pmid:37388914; @pmid:22406228]. The clinical ranges of the C9orf72 locus remain ambiguous [@stripy:C9ORF72][@pmid:41951733]: most healthy controls have alleles up to 24 repeats [@pmid:28319737] yet 24-30 repeats are associated with ALS [@pmid:31315673] and while 60 repeats is frequently used as a threshold for uncertain alleles, the exact threshold of pathogenicity remains unclear [@genereviews:NBK268647; @pmid:38099605]. Repeats of 80 motifs and lower appear to have delayed onset for any phenotype [@pmid:28319737]. >250 repeats are associated with a full FTD/ALS disease state [@pmid:31048495], but pathogenic alleles can range from 30 to more than 4000 repeats [@pmid:38099605; @pmid:39709476]. Somatic C9orf72 repeat expansions may emerge de novo in CNS tissue from alleles below the pathogenic range, potentially contributing to sporadic ALS/FTD [@pmid:41986690]. Penetrance appears to also be age-dependent, with environmental factors and specific phenotypes associated with sex and age at onset [@pmid:28522837]. Methylation appears to increase with expansion length and age [@pmid:39709476]. ALS caused by repeat expansions in C9orf72 generally has an earlier disease onset and faster progression than other ALS presentations [@pmid:39226712]. Undiagnosed C9orf72 repeat expansion carriers exhibit elevated neurofilament light chain levels and reduced thalamic volume, consistent with findings observed across other loci [@pmid:41951733].These expansions are usually detected with bidirectional RP-PCR [@pmid:21944778]. Large pathogenic expansions are difficult to size exactly by PCR, so Southern blot provides approximate sizing [@pmid:23566336], while long-read sequencing can provide more direct sizing and sequence characterization [@pmid:30126445]. ", + "details": "FTD and ALS form a clinical spectrum [@pmid:37388914; @pmid:22406228]. The clinical ranges of the C9orf72 locus remain ambiguous [@stripy:C9ORF72][@pmid:41951733]: most healthy controls have alleles up to 24 repeats [@pmid:28319737] yet 24-30 repeats are associated with ALS [@pmid:31315673] and while 60 repeats is frequently used as a threshold for uncertain alleles, the exact threshold of pathogenicity remains unclear [@genereviews:NBK268647; @pmid:38099605]. Repeats of 80 motifs and lower appear to have delayed onset for any phenotype [@pmid:28319737]. >250 repeats are associated with a full FTD/ALS disease state [@pmid:31048495], but pathogenic alleles can range from 30 to more than 4000 repeats [@pmid:38099605; @pmid:39709476]. Somatic C9orf72 repeat expansions may emerge de novo in CNS tissue from alleles below the pathogenic range, potentially contributing to sporadic ALS/FTD [@pmid:41986690]. Penetrance appears to also be age-dependent, with environmental factors and specific phenotypes associated with sex and age at onset [@pmid:28522837]. Methylation appears to increase with expansion length and age [@pmid:39709476]. ALS caused by repeat expansions in C9orf72 generally has an earlier disease onset and faster progression than other ALS presentations [@pmid:39226712]. Undiagnosed C9orf72 repeat expansion carriers exhibit elevated neurofilament light chain levels and reduced thalamic volume, consistent with findings observed across other loci [@pmid:41951733].These expansions are usually detected with bidirectional RP-PCR [@pmid:21944778]. Large pathogenic expansions are difficult to size exactly by PCR, so Southern blot provides approximate sizing [@pmid:23566336], while long-read sequencing can provide more direct sizing and sequence characterization [@pmid:30126445]. ", "mechanism": "Ambiguous", "mechanism_detail": "The HRE forms DNA and RNA G-quadruplexes with distinct structures and promotes RNA/DNA hybrids (R-loops). The structural polymorphism causes a repeat length-dependent accumulation of transcripts aborted in the HRE region [@omim:105500]. Addiitonal mechanisms theorized include protein loss of function and RNA gain of function [@pmid:37847372]. Multiple cell types in the prefrontal cortex, including oligodendrocytes, microglia, astrocytes, and neurons, appear impacted during pathogenesis [@pmid:39999167].", "year": "2011 [@pmid:21944778]", @@ -1024,7 +1024,7 @@ "disease": "Spinocerebellar ataxia type 6", "inheritance": ["AD"], "association_type": ["Mendelian"], - "disease_description": "Spinocerebellar ataxia type 6 (SCA6) is the most common subtype of autosomal dominant cerebellar ataxia type III (ADCA type III) characterized by late-onset and slowly progressive gait ataxia and other cerebellar signs such as impaired muscle coordination and nystagmus [@mondo:0008457]. Ao, et al. has proposed that this expansion may have effects on chronotype, differing by sex and menopausal status, as well as depresssion severity [@pmid:41358280].", + "disease_description": "Spinocerebellar ataxia type 6 (SCA6) is the most common subtype of autosomal dominant cerebellar ataxia type III (ADCA type III) characterized by late-onset and slowly progressive gait ataxia and other cerebellar signs such as impaired muscle coordination and nystagmus [@mondo:0008457]. Ao et al. have proposed that this expansion may have effects on chronotype, differing by sex and menopausal status, as well as depression severity [@pmid:41358280].", "hpo_terms": null, "prevalence": "2.65/100000", "prevalence_details": "13-15% of global SCA prevalence, estimated to be 0.02-31/100,000 [@genereviews:NBK1140; @pmid:29100084]: resultant estimate is 0.3-5/100,000. Found across ethnicities/ancestries, with population-dependent prevalence [@genereviews:NBK1140].", @@ -1035,7 +1035,7 @@ "typ_age_onset_max": 52.0, "details": "The intermediate range (19-20 motifs) [@pmid:39996131; @genereviews:NBK1140] can be associated with a premutation, reduced penetrance, atypical phenotype, or a disease state when homozygous [@genereviews:NBK1140]. When the longer allele is > 22 motifs, the short allele does not play a role in pathogenicity/age of onset, but expansions of 21-22 motifs have age of onset influenced by the smaller allele [@pmid:39996131]. For individuals with a longest allele of 19-20, the presence of a second allele of 19-20 likely increases the risk of developing SCA6 [@pmid:39996131]. Undiagnosed carriers of pathogenic range repeats in CACNA1A exhibit significant cerebellar volume loss and elevated neurofilament light chain levels before clinical diagnosis, consistent with findings observed across other loci [@pmid:41951733]. Expansions are often detected using PCR fragment analysis [@pmid:35573049].", "mechanism": "GoF/LoF", - "mechanism_detail": "Polyglutamine expansions associated increased expression of altered product leading to impaired gene binding and transcription factor function as well as cellular toxicity [@genereviews:NBK1140].", + "mechanism_detail": "Polyglutamine expansions associated with increased expression of altered product leading to impaired gene binding and transcription factor function as well as cellular toxicity [@genereviews:NBK1140].", "year": "1997 [@pmid:8988170]", "location_in_gene": "Coding, Last Exon: 47 or 48", "gene_strand": "-", @@ -1927,7 +1927,7 @@ "disease": "Spinocerebellar ataxia 27B", "inheritance": ["AD"], "association_type": ["Mendelian", "Risk"], - "disease_description": "Late-onset ataxia, may have episodic onset, downbeat nystagmus, vertigo, dysarthria, visual disturbances, and neuropathy [@pmid:39349043; @pmid:42044943]. Involvement of the superior cerebellar peduncles is frequent and may aid in diagnostic efforts [@pmid:39996128].", + "disease_description": "Late-onset ataxia, may have episodic onset, downbeat nystagmus, vertigo, dysarthria, visual disturbances, and neuropathy [@pmid:39349043; @pmid:42044943]. Involvement of the superior cerebellar peduncles is frequent and may aid in diagnostic efforts [@pmid:39996128].", "hpo_terms": null, "prevalence": null, "prevalence_details": "Intermediate expansions 1-2% of population, but non-GAA-pure without relation to ataxia [@genereviews:NBK599589]. Found in multiple ethnicities [@pmid:38876750]; diagnosed patients in America, Brazil, Japan, Germany, Spain, Canada, France, Austria, Australia, Italy, and Poland [@genereviews:NBK599589; @pmid:38886208; @pmid:37267898; @pmid:42096001].", @@ -2677,7 +2677,7 @@ "age_onset_max": 85.0, "typ_age_onset_min": 35.0, "typ_age_onset_max": 44.0, - "details": "27-35 motifs are unstable/premutations, while 36-39 motifs are associated with reduced penetrance and mild phenotypes [@pmid:39572770], and alleles over 40 repeats are typically fully penetrant [@genereviews:NBK1305]. >60 motifs associated with onset age <20 years [@genereviews:NBK1305]. Only CAG expansions are considered pathogenic, but interruptions impact pathogenicity (CAA) [@pmid:35245110; @pmid:39673793]. Only fathers with premutations are considered at risk of transmitting pathogenic alleles [@pmid:19507258]. CAG repeat size 21-35 may continuously modulate brain structure and psychiatric disease risk in an age-dependent manner [@pmid:39572770] [@doi:https://doi.org/10.64898/2026.05.08.26352223]. Somatic expansion of HTT CAG repeats in vulnerable tissues is proposed to contribute to age-dependent onset and neurodegeneration, with greater repeat instability associated with earlier disease onset [@pmid:41926793; @pmid:39824182]. Undiagnosed carriers of premutation and pathogenic HTT expansions, exhibit reduced striatal brain volumes and elevated neurofilament light chain levels before clinical diagnosis, consistent with findings observed across other loci [@pmid:41951733]. PCR methods can detect expansions up to ~115 repeats, but very large expansions may require TP-PCR or Southern blot analysis. [@genereviews:NBK1305]. long-read sequencing has been used to resolve interruptions and validate sizing [@pmid:41512049].", + "details": "27-35 motifs are unstable/premutations, while 36-39 motifs are associated with reduced penetrance and mild phenotypes [@pmid:39572770], and alleles over 40 repeats are typically fully penetrant [@genereviews:NBK1305]. >60 motifs associated with onset age <20 years [@genereviews:NBK1305]. Only CAG expansions are considered pathogenic, but interruptions impact pathogenicity (CAA) [@pmid:35245110; @pmid:39673793]. Only fathers with premutations are considered at risk of transmitting pathogenic alleles [@pmid:19507258]. CAG repeat size 21-35 may continuously modulate brain structure and psychiatric disease risk in an age-dependent manner [@pmid:39572770] [@doi:https://doi.org/10.64898/2026.05.08.26352223]. Somatic expansion of HTT CAG repeats in vulnerable tissues is proposed to contribute to age-dependent onset and neurodegeneration, with greater repeat instability associated with earlier disease onset [@pmid:41926793; @pmid:39824182]. Undiagnosed carriers of premutation and pathogenic HTT expansions, exhibit reduced striatal brain volumes and elevated neurofilament light chain levels before clinical diagnosis, consistent with findings observed across other loci [@pmid:41951733]. PCR methods can detect expansions up to ~115 repeats, but very large expansions may require TP-PCR or Southern blot analysis [@genereviews:NBK1305]. long-read sequencing has been used to resolve interruptions and validate sizing [@pmid:41512049].", "mechanism": "GoF/LoF", "mechanism_detail": "While the primary pathogenic mechanism is gain of function of the protein product, pathogenesis is complex and multifactorial [@pmid:27940602]. Reduced SCN4B expression in striatal neurons has been implicated as a modifier of HD-associated phenotype severity, potentially contributing to dysfunction in motor associated striatal neuronal populations [@pmid:41959367].", "year": "1993 [@pmid:8458085]", @@ -4071,7 +4071,7 @@ "disease_description": "Sensory disturbances, imbalance, oscillopsia, chronic dry cough, dysarthria and dysphagia [@pmid:38876750]; Late-onset ataxia, sensory neuropathy, vestibular areflexia syndrome [@pmid:39349043]. This expansion has been implicated in the genetic etiology of Parkinson's disease [@pmid:41177915].", "hpo_terms": null, "prevalence": null, - "prevalence_details": "Carrier frequency in European is 0.7-4% and in Chinese Han population is 2.24%; estimated prevalence of 1/20,000 to 1/625 [@genereviews:NBK564656]. Many cases are likely not diagnosed due to heterogeneous presentation [@pmid:39230846]. Observed in multiple ethnicities [@pmid:38876750]; patients diagnosed with European, Chinese Han, and Maori ancestry, as well as found in Japan, Canada, Brazil, the UK, Italy, Germany, and Australia [@genereviews:NBK564656].", + "prevalence_details": "Carrier frequency in Europeans is 0.7-4% and in Chinese Han population is 2.24%; estimated prevalence of 1/20,000 to 1/625 [@genereviews:NBK564656]. Many cases are likely not diagnosed due to heterogeneous presentation [@pmid:39230846]. Observed in multiple ethnicities [@pmid:38876750]; patients diagnosed with European, Chinese Han, and Maori ancestry, as well as found in Japan, Canada, Brazil, the UK, Italy, Germany, and Australia [@genereviews:NBK564656].", "age_onset": "Typical: 36-52; Range: 19-76 [@genereviews:NBK564656].", "age_onset_min": 19.0, "age_onset_max": 76.0, @@ -4361,7 +4361,7 @@ "age_onset_max": 9.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "Expansion to 22-26 repeats or contraction to 8 repeats can cause disease, as reported in 3 families [@genereviews:NBK535148]. There is phenotypic and allelic overlap between XLID and PHPX, with the pathogenic thresold for XLID estimated at 26 motifs and the pathogenic threshold for PHPX estimated at 22 motifs [@pmid:15800844, @pmid:12428212].", + "details": "Expansion to 22-26 repeats or contraction to 8 repeats can cause disease, as reported in 3 families [@genereviews:NBK535148]. There is phenotypic and allelic overlap between XLID and PHPX, with the pathogenic threshold for XLID estimated at 26 motifs and the pathogenic threshold for PHPX estimated at 22 motifs [@pmid:15800844, @pmid:12428212].", "mechanism": "LoF", "mechanism_detail": "Polyalanine expansions leading to aggresome formation and impaired transcriptional activity [@pmid:17127446].", "year": "2002 [@pmid:12428212]", @@ -4560,7 +4560,7 @@ "disease": "Oculopharyngodistal myopathy", "inheritance": ["AD"], "association_type": ["Mendelian"], - "disease_description": "This is a newly proposed locus for OPDM, only havng been reported in one paper [@pmid:41959811]. Oculopharyngodistal myopathy (OPDM) is a rare, adult-onset hereditary muscle disease. People with OPDM present with progressive eye and throat (pharyngeal) problems and involvement of the muscles of the lower legs and arms. Symptoms may include eyelid drooping (ptosis), swallowing difficulty, hoarse and nasal voice, leg and arm weakness, as well as muscle wasting in the face and in the legs and arms. Many people have respiratory problems due to respiratory muscle weakness. In rare cases, there is also hearing loss, as well as severe weakness in muscles of the forearms and thighs. As the disease progresses, other muscles may be affected. A blood exam may show an increased creatine kinase level and an abnormal EMG [@mondo:0025193].", + "disease_description": "This is a newly proposed locus for OPDM, only having been reported in one paper [@pmid:41959811]. Oculopharyngodistal myopathy (OPDM) is a rare, adult-onset hereditary muscle disease. People with OPDM present with progressive eye and throat (pharyngeal) problems and involvement of the muscles of the lower legs and arms. Symptoms may include eyelid drooping (ptosis), swallowing difficulty, hoarse and nasal voice, leg and arm weakness, as well as muscle wasting in the face and in the legs and arms. Many people have respiratory problems due to respiratory muscle weakness. In rare cases, there is also hearing loss, as well as severe weakness in muscles of the forearms and thighs. As the disease progresses, other muscles may be affected. A blood exam may show an increased creatine kinase level and an abnormal EMG [@mondo:0025193].", "hpo_terms": null, "prevalence": null, "prevalence_details": "Found in 3 families (7 total patients) of European ancestry [@pmid:41959811].", @@ -4635,7 +4635,7 @@ "age_onset_max": 62.0, "typ_age_onset_min": 19.0, "typ_age_onset_max": 48.0, - "details": "Benign range is 25-40 repeats, pathogenic range is 49+ repeats (largest to date 66 motifs, with mild correlation between size and age of onset), and intermediate alleles (41-48 repeats) are associated with reduced penetrance and potentially milder phenotypes [@genereviews:NBK1438]. Huntington's disease like phenotype [@pmid:12805114]. CAA CAG CAA interruption is seen in all alleles stably transmitted across generations [@genereviews:NBK1438;@pmid:35245110]. SRS detects some expansions but fails to size them beyond 250 bp. [@pmid:37906407] . PCR amplification may detect expansions of 66 or fewer [@genereviews:NBK1438].", + "details": "Benign range is 25-40 repeats, pathogenic range is 49+ repeats (largest to date 66 motifs, with mild correlation between size and age of onset), and intermediate alleles (41-48 repeats) are associated with reduced penetrance and potentially milder phenotypes [@genereviews:NBK1438]. Huntington's disease like phenotype [@pmid:12805114]. CAA CAG CAA interruption is seen in all alleles stably transmitted across generations [@genereviews:NBK1438;@pmid:35245110]. SRS detects some expansions but fails to size them beyond 250 bp. [@pmid:37906407]. PCR amplification may detect expansions of 66 or fewer [@genereviews:NBK1438].", "mechanism": "LoF/GoF", "mechanism_detail": "Polyglutamine expansion leading to transcriptional dysregulation [@pmid:35053321].", "year": "1999 [@pmid:10484774]", From 3f7a06217de7beac2be1c0402bfb0d951b0e6a9b Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Wed, 3 Jun 2026 17:29:31 -0600 Subject: [PATCH 07/29] add olyGR activating the integrated stress response in drosophila --- data/STRchive-loci.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 02559d7f..e79d316c 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -969,7 +969,7 @@ "typ_age_onset_max": 64.0, "details": "FTD and ALS form a clinical spectrum [@pmid:37388914; @pmid:22406228]. The clinical ranges of the C9orf72 locus remain ambiguous [@stripy:C9ORF72][@pmid:41951733]: most healthy controls have alleles up to 24 repeats [@pmid:28319737] yet 24-30 repeats are associated with ALS [@pmid:31315673] and while 60 repeats is frequently used as a threshold for uncertain alleles, the exact threshold of pathogenicity remains unclear [@genereviews:NBK268647; @pmid:38099605]. Repeats of 80 motifs and lower appear to have delayed onset for any phenotype [@pmid:28319737]. >250 repeats are associated with a full FTD/ALS disease state [@pmid:31048495], but pathogenic alleles can range from 30 to more than 4000 repeats [@pmid:38099605; @pmid:39709476]. Somatic C9orf72 repeat expansions may emerge de novo in CNS tissue from alleles below the pathogenic range, potentially contributing to sporadic ALS/FTD [@pmid:41986690]. Penetrance appears to also be age-dependent, with environmental factors and specific phenotypes associated with sex and age at onset [@pmid:28522837]. Methylation appears to increase with expansion length and age [@pmid:39709476]. ALS caused by repeat expansions in C9orf72 generally has an earlier disease onset and faster progression than other ALS presentations [@pmid:39226712]. Undiagnosed C9orf72 repeat expansion carriers exhibit elevated neurofilament light chain levels and reduced thalamic volume, consistent with findings observed across other loci [@pmid:41951733].These expansions are usually detected with bidirectional RP-PCR [@pmid:21944778]. Large pathogenic expansions are difficult to size exactly by PCR, so Southern blot provides approximate sizing [@pmid:23566336], while long-read sequencing can provide more direct sizing and sequence characterization [@pmid:30126445]. ", "mechanism": "Ambiguous", - "mechanism_detail": "The HRE forms DNA and RNA G-quadruplexes with distinct structures and promotes RNA/DNA hybrids (R-loops). The structural polymorphism causes a repeat length-dependent accumulation of transcripts aborted in the HRE region [@omim:105500]. Addiitonal mechanisms theorized include protein loss of function and RNA gain of function [@pmid:37847372]. Multiple cell types in the prefrontal cortex, including oligodendrocytes, microglia, astrocytes, and neurons, appear impacted during pathogenesis [@pmid:39999167].", + "mechanism_detail": "The HRE forms DNA and RNA G-quadruplexes with distinct structures and promotes RNA/DNA hybrids (R-loops). The structural polymorphism causes a repeat length-dependent accumulation of transcripts aborted in the HRE region [@omim:105500]. Addiitonal mechanisms theorized include protein loss of function and RNA gain of function [@pmid:37847372]. Multiple cell types in the prefrontal cortex, including oligodendrocytes, microglia, astrocytes, and neurons, appear impacted during pathogenesis [@pmid:39999167]. Drosophila model-system evidence suggests that RAN translated poly(GR) may contribute to toxicity by activating the integrated stress response through eIF2α phosphorylation and promoting stress granule accumulation [@pmid:42087256]", "year": "2011 [@pmid:21944778]", "location_in_gene": "Intron 1 or 5' UTR depending on transcript", "gene_strand": "-", From 9a4c7e1d1dd7a9d6309650a3caaaefadb16fb776 Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Thu, 4 Jun 2026 11:31:47 -0600 Subject: [PATCH 08/29] more diagnostic relevance more diagnostic relevance even more diagnostic relevance --- data/STRchive-loci.json | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index e79d316c..fee57424 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -23,7 +23,7 @@ "age_onset_max": 50.0, "typ_age_onset_min": 24.0, "typ_age_onset_max": 30.0, - "details": "Characterized in eight unrelated families which were used to establish benign (3-44) and pathogenic (118-694) ranges [@pmid:39068203].", + "details": "Characterized in eight unrelated families which were used to establish benign (3-44) and pathogenic (118-694) ranges [@pmid:39068203]. These expansions are usually detected with RP-PCR [@pmid:39068203]. srWGS has significantly underestimated repeat count, while long read sequencing has accurately resolved size [@pmid:39068203]. ", "mechanism": null, "mechanism_detail": "Potentially over-expression of transcripts [@pmid:39068203].", "year": "2023 [@pmid:39068203]", @@ -89,7 +89,7 @@ "age_onset_max": 10.0, "typ_age_onset_min": 2.0, "typ_age_onset_max": 10.0, - "details": "Allele ranges (benign:4-39; pathogenic: >200) inferred from The Human Gene Mutation Database [@genereviews:NBK535148]. Intermediate alleles correspond to a premutation [@pmid:23914978]. Non-canonical motifs include: CGG/CCT/GTG/CAG/CTG3 [@pmid:35245110; @pmid:34111553].", + "details": "Allele ranges (benign:4-39; pathogenic: >200) inferred from The Human Gene Mutation Database [@genereviews:NBK535148]. Intermediate alleles correspond to a premutation [@pmid:23914978]. Non-canonical motifs include: CGG/CCT/GTG/CAG/CTG3 [@pmid:35245110; @pmid:34111553]. RP-PCR can detect these expansions and size them to ~80 repeats [@pmid:34282157]. Southern blotting can size larger alleles and categorize methylation [@pmid:34282157].", "mechanism": "LoF", "mechanism_detail": "Loss of function via transcriptional silencing [@pmid:16205714; @pmid:36169768].", "year": "1993 [@pmid:8334699]", @@ -155,7 +155,7 @@ "age_onset_max": 7.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "Allele ranges established in study of 3 families; intermediate alleles likely premutations [@pmid:24763282]. Pathogenic threshold may be higher than 300 as this was the largest allele that could be accurately sized by the assay.", + "details": "Allele ranges established in study of 3 families; intermediate alleles likely premutations [@pmid:24763282]. Pathogenic threshold may be higher than 300 as this was the largest allele that could be accurately sized by the assay. srWGS may underestimate the size of large expansions. While standard PCR can detect small alleles, RP-PCR and Southern blotting are used to approximate size of large expansions [@pmid:24763282]. For exact sizing, long read sequencing has been shown to be effective [@pmid:39313615].", "mechanism": "LoF/methylation", "mechanism_detail": "Silencing of the FMR2 gene as a consequence of a CCG expansion located upstream of this gene [@malacard:KNS007].", "year": "2014 [@pmid:24763282]", @@ -287,7 +287,7 @@ "age_onset_max": 4.0, "typ_age_onset_min": 0.0, "typ_age_onset_max": 0.0, - "details": "ARX expansions [@genereviews:NBK535148] result in a phenotypic spectrum of conditions including Partington syndrome [@omim:309510], Early Infantile Epileptic Encephalopathy [@omim:308350], Agenesis of Corpus Callosum with Abnormal Genitalia [@omim:300004], and X-Linked Lissencephaly with Ambiguous Genitalia [@omim:300215], described in the literature [@pmid:26029707; @pmid:20506206].", + "details": "ARX expansions [@genereviews:NBK535148] result in a phenotypic spectrum of conditions including Partington syndrome [@omim:309510], Early Infantile Epileptic Encephalopathy [@omim:308350], Agenesis of Corpus Callosum with Abnormal Genitalia [@omim:300004], and X-Linked Lissencephaly with Ambiguous Genitalia [@omim:300215], described in the literature [@pmid:26029707; @pmid:20506206]. Because these are small coding expansions, they are sized using targeted exon 2 PCR with fragment analysis or targeted Sanger sequencing [@pmid:17668384].", "mechanism": "LoF", "mechanism_detail": "Polyalanine expansions lead to reduction in protein product through unclear mechanism [@pmid:36169768; @pmid:38467784]. Apparent aggregation and mis-localisation of mutant protein, increased with expansion length [@genereviews:NBK51932].", "year": "2002 [@pmid:11889467]", @@ -353,7 +353,7 @@ "age_onset_max": 4.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "ARX expansions [@genereviews:NBK535148] result in a phenotypic spectrum of conditions including Partington syndrome [@omim:309510], Early Infantile Epileptic Encephalopathy [@omim:308350], Agenesis of Corpus Callosum with Abnormal Genitalia [@omim:300004], and X-Linked Lissencephaly with Ambiguous Genitalia [@omim:300215], described in the literature [@pmid:26029707; @pmid:20506206].", + "details": "ARX expansions [@genereviews:NBK535148] result in a phenotypic spectrum of conditions including Partington syndrome [@omim:309510], Early Infantile Epileptic Encephalopathy [@omim:308350], Agenesis of Corpus Callosum with Abnormal Genitalia [@omim:300004], and X-Linked Lissencephaly with Ambiguous Genitalia [@omim:300215], described in the literature [@pmid:26029707; @pmid:20506206]. Because these are small coding expansions, they are sized using targeted exon 2 PCR with fragment analysis or targeted Sanger sequencing [@pmid:11889467].", "mechanism": "LoF", "mechanism_detail": "Polyalanine expansions lead to reduction in protein product through unclear mechanism [@pmid:36169768; @pmid:38467784]. Apparent aggregation and mis-localisation of mutant protein, increased with expansion length [@genereviews:NBK51932].", "year": "2002 [@pmid:11889467]", @@ -901,7 +901,7 @@ "age_onset_max": 83.0, "typ_age_onset_min": 56.0, "typ_age_onset_max": 62.0, - "details": "This locus is a novel STR-containing insertion, not present in reference genome; the pathogenic threshold (110-760) is based on the pure repeat of the pathogenic motif within the insertion [@pmid:19878914].", + "details": "This locus is a novel STR-containing insertion, not present in reference genome; the pathogenic threshold (110-760) is based on the pure repeat of the pathogenic motif within the insertion [@pmid:19878914]. RP-PCR accurately detects this insertion [@pmid:22992774], while long read sequencing can resolve sizing and motif architecture [@pmid:36289212]. ", "mechanism": "GoF", "mechanism_detail": "RNA toxicity and gain of function leading to neurodegeneration [@pmid:36371266]. Role in heterochromatin or chromosomal structure theorized [@omim:117210].", "year": "2009 [@pmid:19878914]", @@ -1383,7 +1383,7 @@ "age_onset_max": 10.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "CGG repeat in exon 1 of CSNK1E. Longest reported expanded allele of an affected individual is 745, with an unaffected sibling with repeat length 980. Father had a repeat of 8 and mother of 131.", + "details": "CGG repeat in exon 1 of CSNK1E. Longest reported expanded allele of an affected individual is 745, with an unaffected sibling with repeat length 980. Father had a repeat of 8 and mother of 131. Whole exome sequencing does not detect expansions in this locus. They have instead been detected through methylation-outlier detection and confirmed with targeted long-read sequencing [@pmid:40751262].", "mechanism": "Unknown", "mechanism_detail": "Mechanism of this disease is largely unknown, but hypermethylation is observed. Expanded alleles exhibit hypermethylation and may mediate epigenetic silencing. Unaffected carriers have been observed, indicating variable expressivity or penetrance.", "year": "2025", @@ -1445,7 +1445,7 @@ "disease": "Progressive Myoclonic Epilepsy Type 1 (EPM1), a.k.a Unverricht-Lundborg Disease (ULD)", "inheritance": ["AR"], "association_type": ["Mendelian"], - "disease_description": "Unverricht-Lundborg disease (ULD) is a rare progressive myoclonic epilepsy disorder characterized by action- and stimulus-sensitive myoclonus, and tonic-clonic seizures with ataxia, but with only a mild cognitive decline over time [@mondo:0009698].", + "disease_description": "Unverricht-Lundborg disease (ULD) is a rare progressive myoclonic epilepsy disorder characterized by action- and stimulus-sensitive myoclonus, and tonic-clonic seizures with ataxia, but with only a mild cognitive decline over time [@mondo:0009698]. Pathogenic expansions cannot be detected by srWGS. Conventional PCR can detect repeats in the normal range, while southern blots are used to detect and approximate size of expanded alleles [@genereviews:NBK1142].", "hpo_terms": null, "prevalence": null, "prevalence_details": "Worldwide prevalence unknown; Finland prevalence 2-4/100,000. Found across ethnicities/ancestries, with population-dependent prevalence; highest in Tunisia, Algeria, Morocco, and Finland [@genereviews:NBK1142].", @@ -1520,7 +1520,7 @@ "age_onset_max": 64.0, "typ_age_onset_min": 33.0, "typ_age_onset_max": 53.0, - "details": "Pathogenicity only associated with pathogenic motif >30 repeats, flanked by at least 58 repeats of reference motif on either side; reference repeat (AAAAT) can range from 1 to 400 repeats, although typically less than 30 [@genereviews:NBK541729]. The pathogenic motif is unstable, particularly when transmitted by the father [@genereviews:NBK541729].", + "details": "Pathogenicity only associated with pathogenic motif >30 repeats, flanked by at least 58 repeats of reference motif on either side; reference repeat (AAAAT) can range from 1 to 400 repeats, although typically less than 30 [@genereviews:NBK541729]. The pathogenic motif is unstable, particularly when transmitted by the father [@genereviews:NBK541729]. srWGS, exome sequencing, and RP-PCR cannot accurately detect this repeat [@genereviews:NBK541729]. Long range PCR combined with targeted sanger sequencing is the reliable method for detection and characterization [@genereviews:NBK541729].", "mechanism": "GoF", "mechanism_detail": "Toxic gain-of-function mechanism in protein, associated with alternative splicing, an RNA switch, and an upregulation of reelin-DAB1 signalling [@omim:615945; @pmid:30284037].", "year": "2017 [@pmid:28686858]", @@ -1596,7 +1596,7 @@ "age_onset_max": 3.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "Repeat ranges reflect affected and unaffected individuals from a cohort study of 70 controls (6-23 repeats), unaffected carriers representing the intermediate alleles (139-206), and affected individuals (273-306) [@pmid:17236128]. It has been hypothesized that unmethylated expansions may correspond to movement-related phenotypes (chorea, dystonia, and ataxia) [@pmid:39854091].", + "details": "Repeat ranges reflect affected and unaffected individuals from a cohort study of 70 controls (6-23 repeats), unaffected carriers representing the intermediate alleles (139-206), and affected individuals (273-306) [@pmid:17236128]. It has been hypothesized that unmethylated expansions may correspond to movement-related phenotypes (chorea, dystonia, and ataxia) [@pmid:39854091]. srWGS has underestimated expansion size in this locus. RP-PCR and southern blotting is used to detect expansions [@pmid:17236128], while long read sequencing can accurately size them [@pmid:39854091]", "mechanism": "LoF", "mechanism_detail": "Hypermethylation leading to decreased expression, although unmethylated expansion leads to increased expression [@omim:136630; @pmid:37248219].", "year": "2007 [@pmid:17236128]", @@ -1804,7 +1804,7 @@ "age_onset_max": 0.0, "typ_age_onset_min": 0.0, "typ_age_onset_max": 0.0, - "details": "Complex repeat of 18-20 nucleotides expands to cause disease: disease is found in individuals with 14-16 repeats [@pmid:24360810], while controls have typically 3-12 repeats with as low as 1 repeat [@genereviews:NBK535148; @gnomad:EIF4A3]. Significance of intermediate alleles is unknown [@pmid:29112243].", + "details": "Complex repeat of 18-20 nucleotides expands to cause disease: disease is found in individuals with 14-16 repeats [@pmid:24360810], while controls have typically 3-12 repeats with as low as 1 repeat [@genereviews:NBK535148; @gnomad:EIF4A3]. Significance of intermediate alleles is unknown [@pmid:29112243]. srWGS and exon sequencing do not reliably detect this expansion. Instead, targeted 5' UTR PCR + Sanger sequencing is the common detection methodology [@pmid:29112243; @pmid:24360810].", "mechanism": "LoF", "mechanism_detail": "LoF from a hypomorphic allele [@pmid:24360810].", "year": "2014 [@pmid:24360810]; syndrome described in 1992 [@pmid:1632438]", @@ -2210,7 +2210,7 @@ "age_onset_max": 70.0, "typ_age_onset_min": 20.0, "typ_age_onset_max": 34.0, - "details": "Benign repeats range from absent [@gnomad:GIPC1] to 32 [@genereviews:NBK535148], while pathogenic alleles range from 73-164 repeats [@pmid:38876750; @genereviews:NBK535148]. Findings suggest that alternative initiation sites and an upstream CTG codon serve as the initiation site for RAN translation [@pmid:41121761]. Intermediate alleles have undetermined significance but may represent a phenotypic spectrum [@pmid:32413282]. Interruptions documented: CGA [@pmid:35245110]. Interruptions proposed but not confirmed in primary literature: TCG/CCT/TTG [@pmid:38467784]. One proband with ataxia and repeat size 11/112 had an asymptomatic father with 650 repeats and higher methylation [@pmid:41975469]", + "details": "Benign repeats range from absent [@gnomad:GIPC1] to 32 [@genereviews:NBK535148], while pathogenic alleles range from 73-164 repeats [@pmid:38876750; @genereviews:NBK535148]. Findings suggest that alternative initiation sites and an upstream CTG codon serve as the initiation site for RAN translation [@pmid:41121761]. Intermediate alleles have undetermined significance but may represent a phenotypic spectrum [@pmid:32413282]. Interruptions documented: CGA [@pmid:35245110]. Interruptions proposed but not confirmed in primary literature: TCG/CCT/TTG [@pmid:38467784]. One proband with ataxia and repeat size 11/112 had an asymptomatic father with 650 repeats and higher methylation [@pmid:41975469]. These expansions may not be reliably detected in srWGS or exome sequencing [@pmid:32413282]. Most repeats can be detected with RP-PCR, and long read sequencing accurately determines size and structure [@pmid:32413282].", "mechanism": "LoF/GoF?", "mechanism_detail": "Findings suggest that the mechanism is likely not LoF, but the mechanism is otherwise unknown [@pmid:41121761]. This expansion appears to be predominantly RAN translated into a toxic protein [@pmid:41121761]. This protein has been reported to impair cell proliferation, induce cytotoxicity and apoptosis in multiple cell lines, and caused phenotypic defects in a zebrafish model [@pmid:41121761].", "year": "2020 [@pmid:32413282]", @@ -2824,7 +2824,7 @@ "age_onset_max": 66.0, "typ_age_onset_min": 31.0, "typ_age_onset_max": 51.0, - "details": "Benign range (13-45) inferred from cohort data, but pathogenic range isn't yet fully understood [@genereviews:NBK535148]. In a cohort of 65 patients from 59 families, alleles ranged from 85-289 repeats, with an inverse relationship between size and age of onset [@pmid:34047774]. Inherited peripheral neuropathy (IPN) may be associated with shorter expansions [@pmid:39013564]. Interruptions seen: ACG, CCA [@pmid:35245110].", + "details": "Benign range (13-45) inferred from cohort data, but pathogenic range isn't yet fully understood [@genereviews:NBK535148]. In a cohort of 65 patients from 59 families, alleles ranged from 85-289 repeats, with an inverse relationship between size and age of onset [@pmid:34047774]. Inherited peripheral neuropathy (IPN) may be associated with shorter expansions [@pmid:39013564]. Interruptions seen: ACG, CCA [@pmid:35245110]. srWGS does not reliably detect large expansions in this locus [@pmid:40858832]. These repeats are most reliably detected using RP-PCR followed by long read sequencing [@pmid:39013564]", "mechanism": "GoF?", "mechanism_detail": "RNA mediated toxicity hypothesized [@omim:164310]; may involve RAN translation [@pmid:38467784]. Somatic mosicism and hypermethylation have also been reported [@pmid:41131788].", "year": "2019 [@pmid:31332380]", @@ -2890,7 +2890,7 @@ "age_onset_max": 50.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "Healthy controls do not have pathogenic allele (TTTCA), but do have 9-20 benign motifs (TTTTA) [@genereviews:NBK535148]. Total allele size in probands spanned from 650-1035 repeats; an inverse relationship between allele size and age of onset was noted [@pmid:31664039, @pmid:40788430]. In one study it was proposed that pathogenicity only occurs when TTTCA is expanded [@pmid: 40788430].", + "details": "Healthy controls do not have pathogenic allele (TTTCA), but do have 9-20 benign motifs (TTTTA) [@genereviews:NBK535148]. Total allele size in probands spanned from 650-1035 repeats; an inverse relationship between allele size and age of onset was noted [@pmid:31664039, @pmid:40788430]. In one study it was proposed that pathogenicity only occurs when TTTCA is expanded [@pmid: 40788430]. RP-PCR can detect the pathogenic TTTCA insertion motif but does not adequately resolve complex TTTTA/TTTCA architecture [@pmid:41268177]. Long range PCR followed by long read sequencing is able to size and determine structure [@pmid:40200849].", "mechanism": "Unknown", "mechanism_detail": "Noted as unknown in literature [@omim:613608].", "year": "2019 [@pmid:31664039]", @@ -3032,7 +3032,7 @@ "age_onset_max": 70.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "Disease is caused by the single base expansion of a heptanucleotide (7) cytosine homopolymer tract (i.e. from (C)7 to (C)8 ) within one copy of a coding VNTR, resulting in a frameshift mutation. This VNTR has a 60 bp motif, varying in length and sequence composition. This motif ranges in copy number from 20-125 (~1.5-5 kb) and is GC-rich (>80%). The specific copy of the VNTR motif involved varies by family but is consistent within a family [@genereviews:NBK535148]. This locus is particularly difficult to genotype [@pmid:23396133; @pmid:39781475]. Gamaarachchi et al. observed 20 unique VNTR haplotypes which ranged in size from 40–83 copies, with no unrelated individuals sharing the same haplotype. Unique haplotypes implied frequent independent origins of the dupC variant [@pmid:41285770]. NOTE: Disease is caused by a 7 to 8 C homopolymer expansion within the main motif which we represent here as a change in motif.", + "details": "Disease is caused by the single base expansion of a heptanucleotide (7) cytosine homopolymer tract (i.e. from (C)7 to (C)8 ) within one copy of a coding VNTR, resulting in a frameshift mutation. This VNTR has a 60 bp motif, varying in length and sequence composition. This motif ranges in copy number from 20-125 (~1.5-5 kb) and is GC-rich (>80%). The specific copy of the VNTR motif involved varies by family but is consistent within a family [@genereviews:NBK535148]. This locus is particularly difficult to genotype [@pmid:23396133; @pmid:39781475]. Gamaarachchi et al. observed 20 unique VNTR haplotypes which ranged in size from 40–83 copies, with no unrelated individuals sharing the same haplotype. Unique haplotypes implied frequent independent origins of the dupC variant [@pmid:41285770]. NOTE: Disease is caused by a 7 to 8 C homopolymer expansion within the main motif which we represent here as a change in motif. srWGS, exome sequencing, and sanger sequencing do not reliably detect these mutations as they are mainly in the GC-rich MUC1 VNTR [@genereviews:NBK153723]. Instead, they are commonly detected using a VNTR assay [@genereviews:NBK153723], or resolved with long read sequencing [@pmid:29520014].", "mechanism": "GoF", "mechanism_detail": "Toxic protein product accumulates in kidneys [@genereviews:NBK153723]", "year": "2013 [@pmid:23396133]", @@ -3372,7 +3372,7 @@ "age_onset_max": 40.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "Benign range (3-16 repeats) established in 1000 controls, studied alongside pathogenic probands of up to 700 repeats [@pmid:31332380]. Pathogenicity occurs at repeats as short as 161 motifs [@pmid:38159879; @pmid:37923380], while intermediate alleles may correlate to milder phenotypes [@pmid:38159879]. Alt transcript in opposite direction: LOC642361.", + "details": "Benign range (3-16 repeats) established in 1000 controls, studied alongside pathogenic probands of up to 700 repeats [@pmid:31332380]. Pathogenicity occurs at repeats as short as 161 motifs [@pmid:38159879; @pmid:37923380], while intermediate alleles may correlate to milder phenotypes [@pmid:38159879]. Alt transcript in opposite direction: LOC642361. RP-PCR is effective at detecting these expansions [@pmid:39308795], while long read sequencing is used to resolve size and structure of repeats [@pmid:38159879]. ", "mechanism": "GoF?", "mechanism_detail": "RNA mediated toxicity hypothesized, overall mechanism unknown [@omim:618637; @pmid:36169768].", "year": "2019 [@pmid:31332380]", @@ -3438,7 +3438,7 @@ "age_onset_max": 79.0, "typ_age_onset_min": 40.0, "typ_age_onset_max": 59.0, - "details": "Disease is caused by a GCN polyalanine expansion in the first exon of PABPN1. Most known patients have (GCG)+, but GCN (any polyalanine) may be pathogenic [@genereviews:NBK1126]. This locus acts in a dominant manner for allele sizes ≥ 12 GCN motifs (90% of cases) and in a recessive manner for 11 GCN motifs, i.e. the genotype (GCN)11(GCN)11 (10% of cases). Additionally, disease is known to be more severe in cases of two expanded alleles. Age of onset is inverse to allele size, while penetrance and severity increase with allele size [@genereviews:NBK1126]. Mild, late-onset disease can occur in individuals with a (GCN)10(GCN)11 genotype, suggesting variable penetrance [@pmid:28011929]. The definition of this locus differs in the literature with prior work counting exact GCG motifs for a benign size of (GCG)6 [@pmid:9462747], while later resources count GCNs (any alanine codon), widening the region by 4 motifs to a benign size of (GCN)10 [@genereviews:NBK1126; @pmid:39349043]. STRchive is using the GCN definition.", + "details": "Disease is caused by a GCN polyalanine expansion in the first exon of PABPN1. Most known patients have (GCG)+, but GCN (any polyalanine) may be pathogenic [@genereviews:NBK1126]. This locus acts in a dominant manner for allele sizes ≥ 12 GCN motifs (90% of cases) and in a recessive manner for 11 GCN motifs, i.e. the genotype (GCN)11(GCN)11 (10% of cases). Additionally, disease is known to be more severe in cases of two expanded alleles. Age of onset is inverse to allele size, while penetrance and severity increase with allele size [@genereviews:NBK1126]. Mild, late-onset disease can occur in individuals with a (GCN)10(GCN)11 genotype, suggesting variable penetrance [@pmid:28011929]. The definition of this locus differs in the literature with prior work counting exact GCG motifs for a benign size of (GCG)6 [@pmid:9462747], while later resources count GCNs (any alanine codon), widening the region by 4 motifs to a benign size of (GCN)10 [@genereviews:NBK1126; @pmid:39349043]. STRchive is using the GCN definition. Flanking PCR with fragment analysis accurately detects this expansion [@pmid:27980005]. In heterozygotic individuals, repeats are usually sized with sanger sequencing. When there are biallelic expanded variants, NGS or fragment analysis should be used instead.", "mechanism": "GoF/LoF", "mechanism_detail": "Polyalanine expansions leading to cellular toxicity (loss of function) as well as abnormal aggregation and inefficient protein degradation, which may impact mRNA processing [@genereviews:NBK1126].", "year": "1998 [@pmid:9462747]", @@ -3504,7 +3504,7 @@ "age_onset_max": 36.0, "typ_age_onset_min": 0.0, "typ_age_onset_max": 2.0, - "details": "Alleles of 24 repeats (and sometimes 25 repeats) correspond to delayed disease onset and/or milder phenotype; alleles above benign range (9-20 repeats) and below the pathogenic range (26-33 repeats) have uncertain significance [@genereviews:NBK1427].", + "details": "Alleles of 24 repeats (and sometimes 25 repeats) correspond to delayed disease onset and/or milder phenotype; alleles above benign range (9-20 repeats) and below the pathogenic range (26-33 repeats) have uncertain significance [@genereviews:NBK1427]. These expansions may not be reliably detected in srWGS or exome sequencing. Fragment analysis is the standard detection method, while sanger sequencing can determine the exact GCN repeat size. [@genereviews:NBK1427]", "mechanism": "LoF/GoF", "mechanism_detail": "Polyalanine expansion leading to loss or gain of function, dependent on altered protein product [@pmid:38467784; @genereviews:NBK1427]. Correlation between length and reduced transcriptional activity [@pmid:15888479].", "year": "2003 [@pmid:12640453]", @@ -3570,7 +3570,7 @@ "age_onset_max": 66.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "This is an expanded variable number tandem repeat (VNTR) in the PLIN4 gene, located in exon 3. This repeat consists of a 99 bp motif which encodes 33 amino-acids within the perilipin-4 protein [@pmid:32451610]. Expansions of this 99 bp motif leads to insertion of multiple imperfect 33–amino acid repeats. These repetitive sequences are thought to contribute to abnormal protein aggregation and dysregulated autophagy seen in affected muscle tissue [@omim:601846].", + "details": "This is an expanded variable number tandem repeat (VNTR) in the PLIN4 gene, located in exon 3. This repeat consists of a 99 bp motif which encodes 33 amino-acids within the perilipin-4 protein [@pmid:32451610]. Expansions of this 99 bp motif leads to insertion of multiple imperfect 33–amino acid repeats. These repetitive sequences are thought to contribute to abnormal protein aggregation and dysregulated autophagy seen in affected muscle tissue [@omim:601846]. srWGS and exome sequencing do not reliably detect this repeat, so long range PCR is used for detection while long read sequence is needed to fully resolve size and structure [@pmid:32451610; @pmid:33811808].", "mechanism": "GoF", "mechanism_detail": "The present disease is characterized by dominantly inherited progressively increasing mobilization of aggrephagy at sites of progressive accumulation of a mutated protein, suggesting that the mutation is leading to aggregation, likely through misfolding, exceeding aggrephagic capacity. [@pmid:32451610]", "year": "2020 [@pmid:32451610]", @@ -3717,7 +3717,7 @@ "age_onset_max": 62.0, "typ_age_onset_min": 26.0, "typ_age_onset_max": 50.0, - "details": "Benign range is 6-32 repeats, intermediate range 40-49, and pathogenic range is 51-78 [@pmid:37906407]; intermediate alleles are associated with reduced penetrance [@pmid:11198281].", + "details": "Benign range is 6-32 repeats, intermediate range 40-49, and pathogenic range is 51-78 [@pmid:37906407]; intermediate alleles are associated with reduced penetrance [@pmid:11198281]. In this locus, RP-PCR generally detects expansions while PCR with fragment analysis approximates allele size. Large expansions may require confirmation with southern blot [@pmid:35262663; @pmid:10581021].", "mechanism": "GoF", "mechanism_detail": "Polyalanine gain of function associated with RAN translation [@pmid:38467784].", "year": "1999 [@pmid:10581021]", From 89bfe4c024d25b30c18ce8238da9b1f6914144b1 Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Fri, 5 Jun 2026 13:21:12 -0600 Subject: [PATCH 09/29] MODY8_CEL edits adding single base deletions to MODY_CEL Add pancreatitis presentation delete lipase disruption theory and add new details on protein misfolding for MODY8_CEL adding additional citation triangulating 3 mechanism papers --- data/STRchive-loci.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index fee57424..89ff8032 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -1156,7 +1156,7 @@ "disease": "Maturity-Onset Diabetes of the Young Type 8", "inheritance": ["AD"], "association_type": ["Mendelian"], - "disease_description": "Maturity-onset diabetes of the young type 8 (MODY8) is characterized by onset of diabetes before age 25 years, with slowly progressive pancreatic exocrine dysfunction, fatty replacement of pancreatic parenchyma (lipomatosis), and development of pancreatic cysts [@omim:609812]. Other types of this disease have been associated with various genes and variant types. Comorbidity has been proposed between MODY and fecal elastase deficiency (FED).", + "disease_description": "Maturity-onset diabetes of the young type 8 (MODY8) is characterized by onset of diabetes before age 25 years, with slowly progressive pancreatic exocrine dysfunction, fatty replacement of pancreatic parenchyma (lipomatosis), and development of pancreatic cysts [@omim:609812]. Other types of this disease have been associated with various genes and variant types. In some CEL VNTR deletion carriers, chronic pancreatitis may precede diabetes, and one reported family had hereditary pancreatitis as the predominant phenotype [@pmid:34850019]. Comorbidity has been proposed between MODY and fecal elastase deficiency (FED).", "hpo_terms": null, "prevalence": null, "prevalence_details": "Found in individuals of Danish and Norwegian ancestry [@pmid:16369531; @pmid:19760265].", @@ -1165,9 +1165,9 @@ "age_onset_max": 17.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "The locus contains 17 imperfect 33 bp motifs, with a stretch of 7 perfect GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG motifs. Several pathogenic mutations have been proposed. A single basepair deletion causing a frameshift mutation [@pmid:16369531; @pmid:19760265]. Another is a 1bp deletion of (C)8 to (C)7 within the VNTR, causing a motif change (this is the pathogenic motif represented here). Also, a contraction that deletes one of the VNTR repeats may be pathogenic, with reduced penetrance, although evidence for this is sparse [@pmid:19760265]. Another study identified a c.2041_2042delinsCGG p.(Val681Argfs*6) mutation in the 12th motif (one of the imperfect motifs) [@pmid:39361122]. Several non-tandem repeat pathogenic MODY variants have also been reported in this gene. Given limited data and multiple proposed pathogenic variants, the normal and pathogenic ranges are currently difficult to define.", + "details": "The locus contains 17 imperfect 33 bp motifs, with a stretch of 7 perfect GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG motifs. Several pathogenic mutations have been proposed. The most supported pathogenic variants are single base deletions in the proximal VNTR, reported in repeat segments 1, 4, and 5 [@pmid:34850019]. One reported proximal VNTR deletion is a 1bp deletion of (C)8 to (C)7 within the VNTR, causing a motif change (this is the pathogenic motif represented here). Distal CEL VNTR single-base insertions, particularly INS9/INS10/INS12, have been reported as likely benign polymorphisms, while proximal insertion variants may have greater pathogenic potential [@pmid:38483348]. Also, a contraction that deletes one of the VNTR repeats may be pathogenic, with reduced penetrance, although evidence for this is sparse [@pmid:19760265]. Another study identified a c.2041_2042delinsCGG p.(Val681Argfs*6) mutation in the 12th motif (one of the imperfect motifs) [@pmid:39361122]. Several non-tandem repeat pathogenic MODY variants have also been reported in this gene. Given limited data and multiple proposed pathogenic variants, the normal and pathogenic ranges are currently difficult to define.", "mechanism": null, - "mechanism_detail": "Loss of function at the protein level may be part of the molecular mechanism. Research suggests that the mutations disrupt the C-terminal protein, leading to reduced stability of the mutant lipase in vitro [@pmid:16369531].", + "mechanism_detail": "Proximal CEL VNTR frameshift variants alter the C-terminal tandem-repeat domain and become pathogenic through protein misfolding and proteotoxic gain-of-function. Pathogenic proximal deletion variants show increased aggregation, reduced secretion, ER stress, and UPR activation, while enzymatic activity is largely preserved [@pmid:21784842; @pmid:27650499; @pmid:33862081]. Functional testing of CEL VNTR insertion variants showed that proximal insertions had greater aggregation and UPR effects [@pmid:38483348].", "year": "2005", "location_in_gene": "Exon 11", "gene_strand": "+", From 9ef64f80283a11463d8a08cd527385fc0bf65f71 Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Mon, 8 Jun 2026 11:41:18 -0600 Subject: [PATCH 10/29] C9orf72 edits --- data/STRchive-loci.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 89ff8032..c4b01fd2 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -967,9 +967,9 @@ "age_onset_max": 91.0, "typ_age_onset_min": 50.0, "typ_age_onset_max": 64.0, - "details": "FTD and ALS form a clinical spectrum [@pmid:37388914; @pmid:22406228]. The clinical ranges of the C9orf72 locus remain ambiguous [@stripy:C9ORF72][@pmid:41951733]: most healthy controls have alleles up to 24 repeats [@pmid:28319737] yet 24-30 repeats are associated with ALS [@pmid:31315673] and while 60 repeats is frequently used as a threshold for uncertain alleles, the exact threshold of pathogenicity remains unclear [@genereviews:NBK268647; @pmid:38099605]. Repeats of 80 motifs and lower appear to have delayed onset for any phenotype [@pmid:28319737]. >250 repeats are associated with a full FTD/ALS disease state [@pmid:31048495], but pathogenic alleles can range from 30 to more than 4000 repeats [@pmid:38099605; @pmid:39709476]. Somatic C9orf72 repeat expansions may emerge de novo in CNS tissue from alleles below the pathogenic range, potentially contributing to sporadic ALS/FTD [@pmid:41986690]. Penetrance appears to also be age-dependent, with environmental factors and specific phenotypes associated with sex and age at onset [@pmid:28522837]. Methylation appears to increase with expansion length and age [@pmid:39709476]. ALS caused by repeat expansions in C9orf72 generally has an earlier disease onset and faster progression than other ALS presentations [@pmid:39226712]. Undiagnosed C9orf72 repeat expansion carriers exhibit elevated neurofilament light chain levels and reduced thalamic volume, consistent with findings observed across other loci [@pmid:41951733].These expansions are usually detected with bidirectional RP-PCR [@pmid:21944778]. Large pathogenic expansions are difficult to size exactly by PCR, so Southern blot provides approximate sizing [@pmid:23566336], while long-read sequencing can provide more direct sizing and sequence characterization [@pmid:30126445]. ", + "details": "FTD and ALS form a clinical spectrum [@pmid:37388914; @pmid:22406228]. The clinical ranges of the C9orf72 locus remain ambiguous [@stripy:C9ORF72][@pmid:41951733]: most healthy controls have alleles up to 24 repeats [@pmid:28319737] yet 24-30 repeats are associated with ALS [@pmid:31315673] and while 60 repeats is frequently used as a threshold for uncertain alleles, the exact threshold of pathogenicity remains unclear [@genereviews:NBK268647; @pmid:38099605]. Repeats of 80 motifs and lower appear to have delayed onset for any phenotype [@pmid:28319737]. >250 repeats are associated with a full FTD/ALS disease state [@pmid:31048495], but pathogenic alleles can range from 30 to more than 4000 repeats [@pmid:38099605; @pmid:39709476]. Somatic C9orf72 repeat expansions may emerge de novo in CNS tissue from alleles below the pathogenic range, potentially contributing to sporadic ALS/FTD [@pmid:41986690]. Penetrance appears to also be age-dependent, with environmental factors and specific phenotypes associated with sex and age at onset [@pmid:28522837]. Methylation appears to increase with expansion length and age [@pmid:39709476]. ALS caused by repeat expansions in C9orf72 generally has an earlier disease onset and faster progression than other ALS presentations [@pmid:39226712]. Undiagnosed C9orf72 repeat expansion carriers exhibit elevated NfL levels and reduced thalamic volume, also observed across other loci [@pmid:41951733]. Pathogenic C9orf72 repeat expansions have also been identified in ambiguous late onset behavioral or psychiatric like presentations [@pmid:42158267]. In the NYGC ALS Consortium WGS cohort, pathogenic C9orf72 repeat expansions were the most frequent ALS-associated mutation. Among classical ALS cases, they represented 9% of ALS cases [@pmid:42145639]. These expansions are usually detected with bidirectional RP-PCR [@pmid:21944778]. Southern blot approximates size [@pmid:23566336], while long-read sequencing can provide more precise information on size and structure [@pmid:30126445].", "mechanism": "Ambiguous", - "mechanism_detail": "The HRE forms DNA and RNA G-quadruplexes with distinct structures and promotes RNA/DNA hybrids (R-loops). The structural polymorphism causes a repeat length-dependent accumulation of transcripts aborted in the HRE region [@omim:105500]. Addiitonal mechanisms theorized include protein loss of function and RNA gain of function [@pmid:37847372]. Multiple cell types in the prefrontal cortex, including oligodendrocytes, microglia, astrocytes, and neurons, appear impacted during pathogenesis [@pmid:39999167]. Drosophila model-system evidence suggests that RAN translated poly(GR) may contribute to toxicity by activating the integrated stress response through eIF2α phosphorylation and promoting stress granule accumulation [@pmid:42087256]", + "mechanism_detail": "The HRE forms DNA and RNA G-quadruplexes with distinct structures and promotes RNA/DNA hybrids (R-loops). The structural polymorphism causes a repeat length-dependent accumulation of transcripts aborted in the HRE region [@omim:105500]. Addiitonal mechanisms theorized include protein loss of function and RNA gain of function [@pmid:37847372]. Multiple cell types in the prefrontal cortex, including oligodendrocytes, microglia, astrocytes, and neurons, appear impacted during pathogenesis [@pmid:39999167]. Drosophila model-system evidence suggests that RAN translated poly(GR) may contribute to toxicity by activating the integrated stress response through eIF2α phosphorylation and promoting stress granule accumulation [@pmid:42087256]. C9orf72 repeat expansions are associated with reduced C9orf72 expression in multiple ALS tissues and altered splicing of the exon 1a isoform [@pmid:42145639]. Reduced C9orf72 expression has also been observed in peripheral blood immune cells from C9orf72-associated ALS, with C9-ALS showing distinct monocyte activation signatures. In ALS spinal cord, activated myeloid cells expressing complement, lipid-processing, and phagocytic genes occur in regions with motor neuron loss and TDP-43 pathology [@pmid:42135512].", "year": "2011 [@pmid:21944778]", "location_in_gene": "Intron 1 or 5' UTR depending on transcript", "gene_strand": "-", From 2ee5ad2dc01ae2474b74017dc2cf9a150de32ab0 Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Tue, 9 Jun 2026 13:45:48 -0600 Subject: [PATCH 11/29] SCA27B_FGF14 updates --- data/STRchive-loci.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index c4b01fd2..c40cda93 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -1930,7 +1930,7 @@ "disease_description": "Late-onset ataxia, may have episodic onset, downbeat nystagmus, vertigo, dysarthria, visual disturbances, and neuropathy [@pmid:39349043; @pmid:42044943]. Involvement of the superior cerebellar peduncles is frequent and may aid in diagnostic efforts [@pmid:39996128].", "hpo_terms": null, "prevalence": null, - "prevalence_details": "Intermediate expansions 1-2% of population, but non-GAA-pure without relation to ataxia [@genereviews:NBK599589]. Found in multiple ethnicities [@pmid:38876750]; diagnosed patients in America, Brazil, Japan, Germany, Spain, Canada, France, Austria, Australia, Italy, and Poland [@genereviews:NBK599589; @pmid:38886208; @pmid:37267898; @pmid:42096001].", + "prevalence_details": "Intermediate expansions 1-2% of population, but non-GAA-pure without relation to ataxia [@genereviews:NBK599589]. Found in multiple ethnicities [@pmid:38876750]; diagnosed patients in America, Brazil, Japan, Germany, Spain, Canada, France, Austria, Australia, Italy, and Poland [@genereviews:NBK599589; @pmid:38886208; @pmid:37267898; @pmid:42096001]. Prevalence is population dependent, ranging from 1.83 to 61% of different ataxia cohorts, with specific enrichment in French-Canadian populations [@pmid:36516086; @pmid:42090775].", "age_onset": "Typical: 42-70; Range: 21-87 [@genereviews:NBK599589; @pmid:39263992].", "age_onset_min": 21.0, "age_onset_max": 87.0, From 29131b117c302e09a2ddb48b65cf5b0641d6e8f5 Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Tue, 9 Jun 2026 17:02:16 -0600 Subject: [PATCH 12/29] Triangulating C9orf72 NEFL info --- data/STRchive-loci.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index c40cda93..cfdae564 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -967,7 +967,7 @@ "age_onset_max": 91.0, "typ_age_onset_min": 50.0, "typ_age_onset_max": 64.0, - "details": "FTD and ALS form a clinical spectrum [@pmid:37388914; @pmid:22406228]. The clinical ranges of the C9orf72 locus remain ambiguous [@stripy:C9ORF72][@pmid:41951733]: most healthy controls have alleles up to 24 repeats [@pmid:28319737] yet 24-30 repeats are associated with ALS [@pmid:31315673] and while 60 repeats is frequently used as a threshold for uncertain alleles, the exact threshold of pathogenicity remains unclear [@genereviews:NBK268647; @pmid:38099605]. Repeats of 80 motifs and lower appear to have delayed onset for any phenotype [@pmid:28319737]. >250 repeats are associated with a full FTD/ALS disease state [@pmid:31048495], but pathogenic alleles can range from 30 to more than 4000 repeats [@pmid:38099605; @pmid:39709476]. Somatic C9orf72 repeat expansions may emerge de novo in CNS tissue from alleles below the pathogenic range, potentially contributing to sporadic ALS/FTD [@pmid:41986690]. Penetrance appears to also be age-dependent, with environmental factors and specific phenotypes associated with sex and age at onset [@pmid:28522837]. Methylation appears to increase with expansion length and age [@pmid:39709476]. ALS caused by repeat expansions in C9orf72 generally has an earlier disease onset and faster progression than other ALS presentations [@pmid:39226712]. Undiagnosed C9orf72 repeat expansion carriers exhibit elevated NfL levels and reduced thalamic volume, also observed across other loci [@pmid:41951733]. Pathogenic C9orf72 repeat expansions have also been identified in ambiguous late onset behavioral or psychiatric like presentations [@pmid:42158267]. In the NYGC ALS Consortium WGS cohort, pathogenic C9orf72 repeat expansions were the most frequent ALS-associated mutation. Among classical ALS cases, they represented 9% of ALS cases [@pmid:42145639]. These expansions are usually detected with bidirectional RP-PCR [@pmid:21944778]. Southern blot approximates size [@pmid:23566336], while long-read sequencing can provide more precise information on size and structure [@pmid:30126445].", + "details": "FTD and ALS form a clinical spectrum [@pmid:37388914; @pmid:22406228]. The clinical ranges of the C9orf72 locus remain ambiguous [@stripy:C9ORF72][@pmid:41951733]: most healthy controls have alleles up to 24 repeats [@pmid:28319737] yet 24-30 repeats are associated with ALS [@pmid:31315673] and while 60 repeats is frequently used as a threshold for uncertain alleles, the exact threshold of pathogenicity remains unclear [@genereviews:NBK268647; @pmid:38099605]. Repeats of 80 motifs and lower appear to have delayed onset for any phenotype [@pmid:28319737]. >250 repeats are associated with a full FTD/ALS disease state [@pmid:31048495], but pathogenic alleles can range from 30 to more than 4000 repeats [@pmid:38099605; @pmid:39709476]. Somatic C9orf72 repeat expansions may emerge de novo in CNS tissue from alleles below the pathogenic range, potentially contributing to sporadic ALS/FTD [@pmid:41986690]. Penetrance appears to also be age-dependent, with environmental factors and specific phenotypes associated with sex and age at onset [@pmid:28522837]. Methylation appears to increase with expansion length and age [@pmid:39709476]. ALS caused by repeat expansions in C9orf72 generally has an earlier disease onset and faster progression than other ALS presentations [@pmid:39226712]. C9orf72 expansions have been associated with reduced thalamic volume in undiagnosed carriers, and plasma neurofilament light chain has an approximately linear association with repeat count and motor neuron disease risk [@pmid:41951733; @pmid:42095061]. Pathogenic C9orf72 repeat expansions have also been identified in ambiguous late onset behavioral or psychiatric like presentations [@pmid:42158267]. In the NYGC ALS Consortium WGS cohort, pathogenic C9orf72 repeat expansions were the most frequent ALS-associated mutation. Among classical ALS cases, they represented 9% of ALS cases [@pmid:42145639]. These expansions are usually detected with bidirectional RP-PCR [@pmid:21944778]. Southern blot approximates size [@pmid:23566336], while long-read sequencing can provide more precise information on size and structure [@pmid:30126445].", "mechanism": "Ambiguous", "mechanism_detail": "The HRE forms DNA and RNA G-quadruplexes with distinct structures and promotes RNA/DNA hybrids (R-loops). The structural polymorphism causes a repeat length-dependent accumulation of transcripts aborted in the HRE region [@omim:105500]. Addiitonal mechanisms theorized include protein loss of function and RNA gain of function [@pmid:37847372]. Multiple cell types in the prefrontal cortex, including oligodendrocytes, microglia, astrocytes, and neurons, appear impacted during pathogenesis [@pmid:39999167]. Drosophila model-system evidence suggests that RAN translated poly(GR) may contribute to toxicity by activating the integrated stress response through eIF2α phosphorylation and promoting stress granule accumulation [@pmid:42087256]. C9orf72 repeat expansions are associated with reduced C9orf72 expression in multiple ALS tissues and altered splicing of the exon 1a isoform [@pmid:42145639]. Reduced C9orf72 expression has also been observed in peripheral blood immune cells from C9orf72-associated ALS, with C9-ALS showing distinct monocyte activation signatures. In ALS spinal cord, activated myeloid cells expressing complement, lipid-processing, and phagocytic genes occur in regions with motor neuron loss and TDP-43 pathology [@pmid:42135512].", "year": "2011 [@pmid:21944778]", From ea3a8fe93fa41277561c0476dc57d7b215c52230 Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Tue, 9 Jun 2026 18:00:08 -0600 Subject: [PATCH 13/29] adding TCTG motif info --- data/STRchive-loci.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index cfdae564..7f846769 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -1236,7 +1236,7 @@ "age_onset_max": 73.0, "typ_age_onset_min": 28.0, "typ_age_onset_max": 56.0, - "details": "Detailed overview of disease locus through 2024 by Rimoldi et al [@pmid:39643839]. ≤30 uninterrupted CCTG repeats or 11-26 CCTG repeats with GCTC/TCTG interruptions are considered benign; 27-29 repeats with interruptions have currently unknown significance, ~30-~54 repeats are considered premutations, ~55-74 repeats are premutations with possible reduced penetrance, and >74 repeat alleles are considered pathogenic [@genereviews:NBK1466]. Penetrance is age-dependent and approaches 100%. Locus structure is (TG)n(TCTG)n(CCTG)n. CCTG expansion causes DM2 but the other repeat units are also variable. Interruptions include GCTG/TCTG/GGCT [@pmid:35245110]. The effect of the (TCTG)n repeat remains to be determined, but it is potentially common in the repeat structure of this locus [@pmid:39703464]. Because these expansions can be very large and complex, bidirectional RP-PCR is used to detect expansions [geneReviews:NBK1466], while Southern blot can estimate size [@pmid:34234810]. Long read sequencing is used to resolve size and internal structure [@pmid:36018009].", + "details": "Detailed overview of disease locus through 2024 by Rimoldi et al [@pmid:39643839]. ≤30 uninterrupted CCTG repeats or 11-26 CCTG repeats with GCTC/TCTG interruptions are considered benign; 27-29 repeats with interruptions have currently unknown significance, ~30-~54 repeats are considered premutations, ~55-74 repeats are premutations with possible reduced penetrance, and >74 repeat alleles are considered pathogenic [@genereviews:NBK1466]. Penetrance is age-dependent and approaches 100%. Locus structure is (TG)n(TCTG)n(CCTG)n. CCTG expansion causes DM2 but the other repeat units are also variable. Interruptions include GCTG/TCTG/GGCT [@pmid:35245110]. Many DM2 expansions include a downstream 3' (TCTG)n block after the main array. One cohort found this structure in 88% of DM2 patients [@pmid:39703464]. Bidirectional RP-PCR and Southern blotting are used for detection [geneReviews:NBK1466]. A downstream 3′ (TCTG)n block can cause false negative or unclear standard 3′ CCTG-primed results, so TCTG targeted QP-PCR or long read sequencing can resolve these cases [@pmid:36018009; @pmid:41937177].", "mechanism": "GoF", "mechanism_detail": "Aberrant splicing, RAN translation [@pmid:22140091; @pmid:38467784]. Proposed pathogenisis contributions include nucleolar stress, autophagy dysregulation, and stress granule formation [@pmid:42003432].", "year": "2001 [@pmid:11486088]", From 91ed00f14c712fad533dc54ed16a22d1dc6a8d11 Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Tue, 9 Jun 2026 21:03:01 -0600 Subject: [PATCH 14/29] FGF14 presentation --- data/STRchive-loci.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 7f846769..3fc1dd44 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -1936,7 +1936,7 @@ "age_onset_max": 87.0, "typ_age_onset_min": 42.0, "typ_age_onset_max": 70.0, - "details": "Higher repeat size has been associated with earlier age of onset [@pmid:39263992], but this correlation is not significant in all cohorts [@pmid:42096001]. The 250-300 repeats range is linked to incomplete penetrance and >300 repeats with complete penetrance in some studies and resources [@genereviews:NBK599589; @pmid:37399286; @pmid:39227614]. However, our thresholds are taken from suggestions made by Mohren et al upon evaluation of 169 cases and 802 controls; the authors propose lower thresholds based on pathogenic cases of shorter pure repeats [@pmid:39227614]. Additionally, this study suggests that benign motifs may disrupt the formation of secondary structures in DNA/RNA, leading to reduced pathogenicity. The effects of interruptions on penetrance and onset have been demonstrated in patients, with uninterrupted expansions apparently necessary for disease [@pmid:40007153]. Interruptions of GAG, GAAGGA, GAAGAAAGAA, GAAAAGAAGAAGGAAGAAGGAA, GAAAAGAAGAAGGAA, and GCAGAAGAAGAAGAA have been reported [@pmid:40379261]. Variation in flanking regions appears to correlate with repeat size [@pmid:39227614; @pmid:38937606]. Intermediate alleles may increase ataxia susceptibility in combination with other factors or be associated with a phenotypic spectrum (multiple system atrophy) [@pmid:39227614; @pmid:39604554]. Finally, a complex (TTC/TGC) ≥300 repeat expansion has been associated as a risk factor for Parkinson's disease [@pmid:41327893; @pmid:41277530]. These expansions are not reliably detected by short read genome or exome sequencing [geneReviews:NBK599589]. long-range PCR and bidirectional RP-PCR are used for detecting expansions [geneReviews:NBK599589; @pmid:36516086]. Long read sequencing is used to determine repeat structure and purity [geneReviews:NBK599589; @pmid:36516086].", + "details": "Higher repeat size has been associated with earlier age of onset [@pmid:39263992], but this correlation is not significant in all cohorts [@pmid:42096001]. The 250-300 repeats range is linked to incomplete penetrance and >300 repeats with complete penetrance in some studies and resources [@genereviews:NBK599589; @pmid:37399286; @pmid:39227614]. However, our thresholds are taken from suggestions made by Mohren et al upon evaluation of 169 cases and 802 controls; the authors propose lower thresholds based on pathogenic cases of shorter pure repeats [@pmid:39227614]. Additionally, this study suggests that benign motifs may disrupt the formation of secondary structures in DNA/RNA, leading to reduced pathogenicity. The effects of interruptions on penetrance and onset have been demonstrated in patients, with uninterrupted expansions apparently necessary for disease [@pmid:40007153]. Interruptions of GAG, GAAGGA, GAAGAAAGAA, GAAAAGAAGAAGGAAGAAGGAA, GAAAAGAAGAAGGAA, and GCAGAAGAAGAAGAA have been reported [@pmid:40379261]. Variation in flanking regions appears to correlate with repeat size [@pmid:39227614; @pmid:38937606]. Intermediate alleles may increase ataxia susceptibility in combination with other factors or be associated with a phenotypic spectrum (multiple system atrophy) [@pmid:39227614; @pmid:39604554]. A complex (TTC/TGC) ≥300 repeat expansion has been associated as a risk factor for Parkinson's disease [@pmid:41327893; @pmid:41277530]. Expansions can sometimes present as apparently sporadic adult-onset ataxia despite autosomal dominant inheritance [@pmid:42204984]. These expansions are not reliably detected by short read genome or exome sequencing [geneReviews:NBK599589]. long-range PCR and bidirectional RP-PCR are used for detecting expansions [geneReviews:NBK599589; @pmid:36516086]. Long read sequencing is used to determine repeat structure and purity [geneReviews:NBK599589; @pmid:36516086].", "mechanism": "LoF", "mechanism_detail": "Reduced transcript 2 [@pmid:36516086].", "year": "2023 [@pmid:36493768]", From 096e0c729edbf59179cc4cd159c6fc22dec62d18 Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Wed, 10 Jun 2026 10:32:16 -0600 Subject: [PATCH 15/29] Update mechanism for DM1_DMPK --- data/STRchive-loci.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 3fc1dd44..30049612 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -1740,7 +1740,7 @@ "typ_age_onset_max": 30.0, "details": "Overview of disease locus through 2024, including largest pathogenic allele of 4,000, described in Rimoldi et al review [@pmid:39643839]. Intermediate alleles (35-49) associated with premutation [@genereviews:NBK1165]. 3%-8% of DM1 expansions contain interrupting variant repeats such as CCG and CGG, associated with later onset and milder phenotype; the variant repeat GCGGCA has also been reported [@pmid:32851192; @pmid:39710066]. In another study, interruptions of the CTG repeat with CCG, GGC, CTC or CAG motifs are estimated to occur in 3-11% of DM1 patients [@pmid:35741732]. Expansions within gene ZNF850 may function as DM1 modifiers [@pmid:39679849]. Flanking PCR detects alleles up to ~150 repeats while RP-PCR may detect missed alleles [@genereviews:NBK1165; @pmid:24795756]. Southern blotting can approximate the size of large expansions [@pmid:22643181] while long read sequencing resolves repeat size and structure [@pmid:41974889].", "mechanism": "GoF", - "mechanism_detail": "RNA gain-of-function: RNA gelation leading to misregulation of alternative splicing [@pmid:36169768]. Expanded-repeat RNA sequesters the muscleblind-like (MBNL) family of RNA-binding proteins as part of the disruption of pre-mRNA processing, contributing to cardiac phenotypes [@pmid:39932794]. Loss of MBNL proteins has been linked to mis-splicing of Autism spectrum-risk genes such as SCN2A, ANK2, and SHANK2, possibly leading to Autism-related traits [@pmid:40259070]. Evidence suggests that disulfide bond-dependent MBNL1/MBNL2 dimerization maintains toxic RNA foci [@pmid:41929128].", + "mechanism_detail": "RNA gain-of-function: RNA gelation leading to misregulation of alternative splicing [@pmid:36169768]. Expanded DMPK r(CUG)n RNA forms a hairpin containing periodic 1*1 U/U internal loops that engage/sequester MBNL family RNA-binding proteins, especially MBNL1 [@pmid:42182465], disrupting pre mRNA processing and contributing to cardiac phenotypes [@pmid:39932794]. Loss of MBNL proteins has been linked to mis-splicing of Autism spectrum-risk genes such as SCN2A, ANK2, and SHANK2, possibly leading to Autism-related traits [@pmid:40259070]. Evidence suggests that disulfide bond-dependent MBNL1/MBNL2 dimerization maintains toxic RNA foci [@pmid:41929128].", "year": "1992 [@pmid:1310900]", "location_in_gene": "3' UTR", "gene_strand": "-", From b34e9e24e35e431a8193520f8c7d6c0e57795841 Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Wed, 10 Jun 2026 11:10:16 -0600 Subject: [PATCH 16/29] Expanding age of onset --- data/STRchive-loci.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 30049612..21adae36 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -3712,7 +3712,7 @@ "hpo_terms": null, "prevalence": null, "prevalence_details": "Frequent in India; rare in other populations [@pmid:34711523].", - "age_onset": "Typical: 26-50; Range: 8-56 [@omim:604326].", + "age_onset": "Typical: 26-50; Range: 8-62 [@omim:604326; @pmid:42105155].", "age_onset_min": 8.0, "age_onset_max": 62.0, "typ_age_onset_min": 26.0, From 87f07cf89114c8f69f141f46309aea26b514dc64 Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Wed, 10 Jun 2026 12:52:02 -0600 Subject: [PATCH 17/29] adding missing references --- data/STRchive-loci.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 21adae36..482f831c 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -1427,7 +1427,7 @@ "webstr_hg19": [], "locus_tags": [], "disease_tags": ["epilepsy"], - "references": [], + "references": ["pmid:40751262", "pmid 39107278"], "additional_literature": [] }, { @@ -1777,7 +1777,7 @@ "webstr_hg19": ["Expansion_DM1/DMPK"], "locus_tags": ["somatic_instability", "anticipation", "maternal_expansion", "length_affects_onset", "length_affects_phenotype", "length_affects_severity", "motif_affects_instability", "motif_affects_onset", "motif_affects_phenotype", "motif_affects_severity"], "disease_tags": ["myotonic_dystrophy"], - "references": ["genereviews:NBK1165", "pmid:38454488", "pmid:36169768", "pmid:39932794", "pmid:40259070", "pmid:39643839", "pmid:32851192", "pmid:39710066", "pmid:35741732", "pmid:39679849", "pmid:29100084", "pmid:31159885", "pmid:35483324", "pmid:1310900", "mondo:0008056", "pmid:29361396", "pmid:8810716", "pmid:27695335", "pmid:29871899", "pmid:37209486"], + "references": ["genereviews:NBK1165", "pmid:38454488", "pmid:36169768", "pmid:39932794", "pmid:40259070", "pmid:39643839", "pmid:32851192", "pmid:39710066", "pmid:35741732", "pmid:39679849", "pmid:29100084", "pmid:31159885", "pmid:35483324", "pmid:1310900", "mondo:0008056", "pmid:29361396", "pmid:8810716", "pmid:27695335", "pmid:29871899", "pmid:37209486", "pmid:41929128"], "additional_literature": ["pmid:41996006", "pmid:41974889", "pmid:41951733", "pmid:41946260", "pmid:41855125", "pmid:41848171", "pmid:41766784", "pmid:41762523", "pmid:41722569", "pmid:41710065", "pmid:41707138", "pmid:41672630", "pmid:41610137", "pmid:41533635", "pmid:41379996", "pmid:41260620", "pmid:41250834", "pmid:41226829", "pmid:41212113", "pmid:41161721", "pmid:41074692", "pmid:40903903", "pmid:40896579", "pmid:40879030", "pmid:40712995", "pmid:40606545", "pmid:40599975", "pmid:40417743", "pmid:40296143", "pmid:40113266", "pmid:40092662", "pmid:40004498", "pmid:39710066", "pmid:39679849", "pmid:39492694", "pmid:39433769", "pmid:39415708", "pmid:39391712", "pmid:39383229", "pmid:39278936", "pmid:39267217", "pmid:39273681", "pmid:39232665", "pmid:39180495", "pmid:39126705", "pmid:38709060", "pmid:38704930", "pmid:38490135", "pmid:38314057", "pmid:37829280", "pmid:37744174", "pmid:37645891", "pmid:37638448", "pmid:37521782", "pmid:37397246", "pmid:37373276", "pmid:37352653", "pmid:37200862", "pmid:37146135", "pmid:37143315", "pmid:36892629", "pmid:36778282", "pmid:36701310", "pmid:36627397", "pmid:36352383", "pmid:36230978", "pmid:36222125", "pmid:36099027", "pmid:36084803", "pmid:36011377", "pmid:35770133", "pmid:35767654", "pmid:35567413", "pmid:35328504", "pmid:35243403", "pmid:35182509", "pmid:34976437", "pmid:34915310", "pmid:34513303", "pmid:34472530", "pmid:34432028", "pmid:34386887", "pmid:34372915", "pmid:34371182", "pmid:34262431", "pmid:34114350", "pmid:34025359", "pmid:33682722", "pmid:33624941", "pmid:33575482", "pmid:33526774", "pmid:33497365", "pmid:33363709", "pmid:33362853", "pmid:33235377", "pmid:32929188", "pmid:32823742", "pmid:32717741", "pmid:32656337", "pmid:32607474", "pmid:32350131", "pmid:32203199", "pmid:32109384", "pmid:32063450", "pmid:31996899", "pmid:31873063", "pmid:31759551", "pmid:31649961", "pmid:31624084", "pmid:31570586", "pmid:31395669", "pmid:31334355", "pmid:31316546", "pmid:31253581", "pmid:31227653", "pmid:31220271", "pmid:31164682", "pmid:31027145", "pmid:30891637", "pmid:30700578", "pmid:30615214", "pmid:30546383", "pmid:30425655", "pmid:30304901", "pmid:30216892", "pmid:30140252", "pmid:29967337", "pmid:29947794", "pmid:29592894", "pmid:29551391", "pmid:29381654", "pmid:29334465", "pmid:29274549", "pmid:29246312", "pmid:29114849", "pmid:28942489", "pmid:28886202", "pmid:28810563", "pmid:28782311", "pmid:28623239", "pmid:28435090", "pmid:28363916", "pmid:28211918", "pmid:28129118", "pmid:28102759", "pmid:27854230", "pmid:27727437", "pmid:27358583", "pmid:27245480", "pmid:27222292", "pmid:26708183", "pmid:26640575", "pmid:26586700", "pmid:26498872", "pmid:26190529", "pmid:25958258", "pmid:25712547", "pmid:25655594", "pmid:25606394", "pmid:25307018", "pmid:25303993", "pmid:25168381", "pmid:24824895", "pmid:24795756", "pmid:24781112", "pmid:24715907", "pmid:24705798", "pmid:24455202", "pmid:24269018", "pmid:24196578", "pmid:24092878", "pmid:23811192", "pmid:23570879", "pmid:23308382", "pmid:26317000", "pmid:23263591", "pmid:23209425", "pmid:23183533", "pmid:23161457", "pmid:23159592", "pmid:23139243", "pmid:22643181", "pmid:22595968", "pmid:22459146", "pmid:22427994", "pmid:22078098", "pmid:22062891", "pmid:21971425", "pmid:21949239", "pmid:21511730", "pmid:21303839", "pmid:21245981", "pmid:21204798", "pmid:21103235", "pmid:20801043", "pmid:20635151", "pmid:20603324", "pmid:20346670", "pmid:20228473", "pmid:20179953", "pmid:20171614", "pmid:20074967", "pmid:19946639", "pmid:19715468", "pmid:19632331", "pmid:19516957", "pmid:19470458", "pmid:18798829", "pmid:18729234", "pmid:18611984", "pmid:18563724", "pmid:18561181", "pmid:18559347", "pmid:18299519", "pmid:18228241", "pmid:18213375", "pmid:17987120", "pmid:17950578", "pmid:17877752", "pmid:17728322", "pmid:17487865", "pmid:17158949", "pmid:17150182", "pmid:17145685", "pmid:17114933", "pmid:16978612", "pmid:16927100", "pmid:16716318", "pmid:16624843", "pmid:16401743", "pmid:16376058", "pmid:16193250", "pmid:16027111", "pmid:15972723", "pmid:15961406", "pmid:15750273", "pmid:15684391", "pmid:15576360", "pmid:15489504", "pmid:15462191", "pmid:15459182", "pmid:15336691", "pmid:15215218", "pmid:15114529", "pmid:15019706", "pmid:14734627", "pmid:14597103", "pmid:12970845", "pmid:12630069", "pmid:12614928", "pmid:12427866", "pmid:11978764", "pmid:11809728", "pmid:11793472", "pmid:11726559", "pmid:11686919", "pmid:11592825", "pmid:11590133", "pmid:11555624", "pmid:11526199", "pmid:11260612", "pmid:11124939", "pmid:11013451", "pmid:11001736", "pmid:10970838", "pmid:10958655", "pmid:10951446", "pmid:10909850", "pmid:10802668", "pmid:10802667", "pmid:10767343", "pmid:10699184", "pmid:10668800", "pmid:10480373", "pmid:10454725", "pmid:10435210", "pmid:10332037", "pmid:10332033", "pmid:9950368", "pmid:9887331", "pmid:9858828", "pmid:9668171", "pmid:9537423", "pmid:9402536", "pmid:9401353", "pmid:9371827", "pmid:9294109", "pmid:9241283", "pmid:9241282", "pmid:9207101", "pmid:8948631", "pmid:8923304", "pmid:8673131", "pmid:8659513", "pmid:8784809", "pmid:8595416", "pmid:7626046", "pmid:7590731", "pmid:7726160", "pmid:7896884", "pmid:8288237"] }, { @@ -2041,7 +2041,7 @@ "webstr_hg19": ["Expansion_FXS/FMR1"], "locus_tags": ["somatic_instability", "anticipation", "maternal_expansion", "length_affects_onset", "length_affects_penetrance", "length_affects_phenotype", "length_affects_severity", "motif_affects_instability"], "disease_tags": ["phenotypic_spectrum", "ataxia"], - "references": ["genereviews:NBK1384", "url:https://www.uptodate.com/contents/fragile-x-syndrome-clinical-features-and-diagnosis-in-children-and-adolescents", "pmid:17427188", "isbn:978-3-031-66932-3", "pmid:16205714", "pmid:36169768", "pmid:41507195", "pmid:32463542", "pmid:29868108", "pmid:41555826", "pmid:7987398", "pmid:24700618", "pmid:29100084", "pmid:39320553", "pmid:1605194", "pmid:1710175", "mondo:0010383", "mondo:0010706", "mondo:0010382"], + "references": ["genereviews:NBK1384", "url:https://www.uptodate.com/contents/fragile-x-syndrome-clinical-features-and-diagnosis-in-children-and-adolescents", "pmid:17427188", "isbn:978-3-031-66932-3", "pmid:16205714", "pmid:36169768", "pmid:41507195", "pmid:32463542", "pmid:29868108", "pmid:41555826", "pmid:7987398", "pmid:24700618", "pmid:29100084", "pmid:39320553", "pmid:1605194", "pmid:1710175", "mondo:0010383", "mondo:0010706", "mondo:0010382", "pmid:41929128", "pmid:29100084", "pmid:12805114", "pmid:35245110", "pmid:35053321"], "additional_literature": ["pmid:42041789", "pmid:42001465", "pmid:41952192", "pmid:41929501", "pmid:41917775", "pmid:41806827", "pmid:41792844", "pmid:41777701", "pmid:41762523", "pmid:41717020", "pmid:41672630", "pmid:41648852", "pmid:41557506", "pmid:41523206", "pmid:41514368", "pmid:41409170", "pmid:41386846", "pmid:41385812", "pmid:41372183", "pmid:41351347", "pmid:41278766", "pmid:41256123", "pmid:41167304", "pmid:41145158", "pmid:41120736", "pmid:41098569", "pmid:41074692", "pmid:41028987", "pmid:41015363", "pmid:40980401", "pmid:40940631", "pmid:40877251", "pmid:40869951", "pmid:40879637", "pmid:40778130", "pmid:40653294", "pmid:40600017", "pmid:40534679", "pmid:40488180", "pmid:40480633", "pmid:40459253", "pmid:40455869", "pmid:40418066", "pmid:40417743", "pmid:40296143", "pmid:40287634", "pmid:40244008", "pmid:40243429", "pmid:40243408", "pmid:40220918", "pmid:40166285", "pmid:40149430", "pmid:40141467", "pmid:40141297", "pmid:39945490", "pmid:39934227", "pmid:39839505", "pmid:39684429", "pmid:39654947", "pmid:39588919", "pmid:39574643", "pmid:39553953", "pmid:39492694", "pmid:39482338", "pmid:39488698", "pmid:39095619", "pmid:38997701", "pmid:38961870", "pmid:38946987", "pmid:38865241", "pmid:38772058", "pmid:38714961", "pmid:38522837", "pmid:38412259", "pmid:38307002", "pmid:38164622", "pmid:38162443", "pmid:38134876", "pmid:37970883", "pmid:37936174", "pmid:37906407", "pmid:37776526", "pmid:37745859", "pmid:37628570", "pmid:37583466", "pmid:37551886", "pmid:37551173", "pmid:37508562", "pmid:37364131", "pmid:37352983", "pmid:37347418", "pmid:37333274", "pmid:37209683", "pmid:37200782", "pmid:37146135", "pmid:37120588", "pmid:36882476", "pmid:36816716", "pmid:36250920", "pmid:36227727", "pmid:36012355", "pmid:35977823", "pmid:35948990", "pmid:35904811", "pmid:35729184", "pmid:35701103", "pmid:35681093", "pmid:35609145", "pmid:35182509", "pmid:35152460", "pmid:35129870", "pmid:35101584", "pmid:35072235", "pmid:35038595", "pmid:35026985", "pmid:34938155", "pmid:34926684", "pmid:34924936", "pmid:34880790", "pmid:34845661", "pmid:34828275", "pmid:34738199", "pmid:34690787", "pmid:34679478", "pmid:34646309", "pmid:34641814", "pmid:34641644", "pmid:34542254", "pmid:34456771", "pmid:34421690", "pmid:34372915", "pmid:34358321", "pmid:34321326", "pmid:34296199", "pmid:34276797", "pmid:34193467", "pmid:34153466", "pmid:34117786", "pmid:34111553", "pmid:34077515", "pmid:34054431", "pmid:34046842", "pmid:33998336", "pmid:33856019", "pmid:33854084", "pmid:33772546", "pmid:33709078", "pmid:33692361", "pmid:33642901", "pmid:33627639", "pmid:33585555", "pmid:33523882", "pmid:33497798", "pmid:33381520", "pmid:33374331", "pmid:33296661", "pmid:33195422", "pmid:33181255", "pmid:33151065", "pmid:33039683", "pmid:33008014", "pmid:33007370", "pmid:32787884", "pmid:32716213", "pmid:32695777", "pmid:32688058", "pmid:32589669", "pmid:32478017", "pmid:32446918", "pmid:32281281", "pmid:32258228", "pmid:32089525", "pmid:32066985", "pmid:32048109", "pmid:32012997", "pmid:31991700", "pmid:31989181", "pmid:31891607", "pmid:31887710", "pmid:31880363", "pmid:31866572", "pmid:31804632", "pmid:31733943", "pmid:31671347", "pmid:31665086", "pmid:31632248", "pmid:31586346", "pmid:31566610", "pmid:31512951", "pmid:31481131", "pmid:31468394", "pmid:31347257", "pmid:31336350", "pmid:31332380", "pmid:31299981", "pmid:31294106", "pmid:31264835", "pmid:31159589", "pmid:31096929", "pmid:31026518", "pmid:30984240", "pmid:30900173", "pmid:30847793", "pmid:30840878", "pmid:30832215", "pmid:30808398", "pmid:30665341", "pmid:30619448", "pmid:30606610", "pmid:30576349", "pmid:30567555", "pmid:30566867", "pmid:30538724", "pmid:30509972", "pmid:30396881", "pmid:30396281", "pmid:30312299", "pmid:30311737", "pmid:30211570", "pmid:30197656", "pmid:30173918", "pmid:30160796", "pmid:30158855", "pmid:30147707", "pmid:30030199", "pmid:29990673", "pmid:29981579", "pmid:29971092", "pmid:29880767", "pmid:29844802", "pmid:29766042", "pmid:29760651", "pmid:29379561", "pmid:29375310", "pmid:29316893", "pmid:29275276", "pmid:29267266", "pmid:29209628", "pmid:29188551", "pmid:28967713", "pmid:28895261", "pmid:28829283", "pmid:28815939", "pmid:28812997", "pmid:28697590", "pmid:28504725", "pmid:28454580", "pmid:28391068", "pmid:28369393", "pmid:28278294", "pmid:28233916", "pmid:28193118", "pmid:28173181", "pmid:28103472", "pmid:28065649", "pmid:28005950", "pmid:27883256", "pmid:27862088", "pmid:27841182", "pmid:27841172", "pmid:27840045", "pmid:27822316", "pmid:27816231", "pmid:27784894", "pmid:27646161", "pmid:27768763", "pmid:27761921", "pmid:27667322", "pmid:27616423", "pmid:27713816", "pmid:27708271", "pmid:27696642", "pmid:27696273", "pmid:27695335", "pmid:27540028", "pmid:27427765", "pmid:27375073", "pmid:27372099", "pmid:27355815", "pmid:27355445", "pmid:27335370", "pmid:27315125", "pmid:27294193", "pmid:27066582", "pmid:27042357", "pmid:27041225", "pmid:27001315", "pmid:26940792", "pmid:26825750", "pmid:26743003", "pmid:26716517", "pmid:26694146", "pmid:26554012", "pmid:26537920", "pmid:26463479", "pmid:26440889", "pmid:26420841", "pmid:26345686", "pmid:26322075", "pmid:26298472", "pmid:26194536", "pmid:26099177", "pmid:26029703", "pmid:25954027", "pmid:25953684", "pmid:25886163", "pmid:25875842", "pmid:25788698", "pmid:25776194", "pmid:25763861", "pmid:25726753", "pmid:25693964", "pmid:25689687", "pmid:25606365", "pmid:25436181", "pmid:25399540", "pmid:25366135", "pmid:25346430", "pmid:25290064", "pmid:25278957", "pmid:25250047", "pmid:25179629", "pmid:25171808", "pmid:25170346", "pmid:25147555", "pmid:25110527", "pmid:25093044", "pmid:25085749", "pmid:25013385", "pmid:24998620", "pmid:24963073", "pmid:24958193", "pmid:24938362", "pmid:24920338", "pmid:24912415", "pmid:24875778", "pmid:24858908", "pmid:24821701", "pmid:24816393", "pmid:24814676", "pmid:24787137", "pmid:24763282", "pmid:24743386", "pmid:24718368", "pmid:24715853", "pmid:24657592", "pmid:24654675", "pmid:24630283", "pmid:24591415", "pmid:24578575", "pmid:24463622", "pmid:24455203", "pmid:24448548", "pmid:24428240", "pmid:24424424", "pmid:24401315", "pmid:24352881", "pmid:24289922", "pmid:24261641", "pmid:24249225", "pmid:24177047", "pmid:24130133", "pmid:23949867", "pmid:23896050", "pmid:23792063", "pmid:23753897", "pmid:23731704", "pmid:23719910", "pmid:23683082", "pmid:23602499", "pmid:23574351", "pmid:23553633", "pmid:23497562", "pmid:23478018", "pmid:23464607", "pmid:23440729", "pmid:23250915", "pmid:23198693", "pmid:23148490", "pmid:23146966", "pmid:23015788", "pmid:22924671", "pmid:22918986", "pmid:22887750", "pmid:22796595", "pmid:22707411", "pmid:22612820", "pmid:22619118", "pmid:22581803", "pmid:22507827", "pmid:22498846", "pmid:22489017", "pmid:22466801", "pmid:22427040", "pmid:22393900", "pmid:22387066", "pmid:22311273", "pmid:22251309", "pmid:22241100", "pmid:22224633", "pmid:22223546", "pmid:22211843", "pmid:22177572", "pmid:22161987", "pmid:22149120", "pmid:22022567", "pmid:22004265", "pmid:21944929", "pmid:21808616", "pmid:21807882", "pmid:21775729", "pmid:21767618", "pmid:21720528", "pmid:21671049", "pmid:21646280", "pmid:21636656", "pmid:21617890", "pmid:21596781", "pmid:21572337", "pmid:21567456", "pmid:21499798", "pmid:21476992", "pmid:21445959", "pmid:21430544", "pmid:21389081", "pmid:21329465", "pmid:21270637", "pmid:21267007", "pmid:21254876", "pmid:21170301", "pmid:21051337", "pmid:20938029", "pmid:20935171", "pmid:20858229", "pmid:20801083", "pmid:20799337", "pmid:20736975", "pmid:20702130", "pmid:20616364", "pmid:20431035", "pmid:20425835", "pmid:20410144", "pmid:20364100", "pmid:20221430", "pmid:20213777", "pmid:20168238", "pmid:20118148", "pmid:20051238", "pmid:20011099", "pmid:20001115", "pmid:19927162", "pmid:19846466", "pmid:19804849", "pmid:19796183", "pmid:19778484", "pmid:19760650", "pmid:19710035", "pmid:19684044", "pmid:19562332", "pmid:19525339", "pmid:19514725", "pmid:19460939", "pmid:19460937", "pmid:19440516", "pmid:19404994", "pmid:19204162", "pmid:19097038", "pmid:19045956", "pmid:19026394", "pmid:19014369", "pmid:18565783", "pmid:18563710", "pmid:18553360", "pmid:18535897", "pmid:18472227", "pmid:18428348", "pmid:18413472", "pmid:18403614", "pmid:18384775", "pmid:18373410", "pmid:18357616", "pmid:18310361", "pmid:18281036", "pmid:18165971", "pmid:18165276", "pmid:18160412", "pmid:18057320", "pmid:17724287", "pmid:17698009", "pmid:17674408", "pmid:17635840", "pmid:17591512", "pmid:17516099", "pmid:17442505", "pmid:17322660", "pmid:17295053", "pmid:17279084", "pmid:17266074", "pmid:17152065", "pmid:17150213", "pmid:17089161", "pmid:17044853", "pmid:16780889", "pmid:16761284", "pmid:16361284", "pmid:16337617", "pmid:16258159", "pmid:16164596", "pmid:16047092", "pmid:15971024", "pmid:15947063", "pmid:15876460", "pmid:15811008", "pmid:15741991", "pmid:15659577", "pmid:15649335", "pmid:15629215", "pmid:15483045", "pmid:15377638", "pmid:15302914", "pmid:15068386", "pmid:14755444", "pmid:14735162", "pmid:14722156", "pmid:14560307", "pmid:14519687", "pmid:14517952", "pmid:12948442", "pmid:12905066", "pmid:12853612", "pmid:12681986", "pmid:12659659", "pmid:12634803", "pmid:12515381", "pmid:12379314", "pmid:12232854", "pmid:12160728", "pmid:12111644", "pmid:11992571", "pmid:11886710", "pmid:11807410", "pmid:11551101", "pmid:11499669", "pmid:11487573", "pmid:11410685", "pmid:11256870", "pmid:11229516", "pmid:11142761", "pmid:11142752", "pmid:11119302", "pmid:11097353", "pmid:11073538", "pmid:11070156", "pmid:11005143", "pmid:10995510", "pmid:10987654", "pmid:10941804", "pmid:10915764", "pmid:10870330", "pmid:10855793", "pmid:10780779", "pmid:10773084", "pmid:10710419", "pmid:10674158", "pmid:10631132", "pmid:10587583", "pmid:10567518", "pmid:10545610", "pmid:10521303", "pmid:10462618", "pmid:10447261", "pmid:10445321", "pmid:10424820", "pmid:10409756", "pmid:10369109", "pmid:10331601", "pmid:10331600", "pmid:10204857", "pmid:9916838", "pmid:9856500", "pmid:9811938", "pmid:9806479", "pmid:9792200", "pmid:9761677", "pmid:9738717", "pmid:9653650", "pmid:9624140", "pmid:9630071", "pmid:9604772", "pmid:9603608", "pmid:9529778", "pmid:9485421", "pmid:9514250", "pmid:9507388", "pmid:9415473", "pmid:9437788", "pmid:9399905", "pmid:9358013", "pmid:9341861", "pmid:9382110", "pmid:9299309", "pmid:9279752", "pmid:9254854", "pmid:9207038", "pmid:9201980", "pmid:9195158", "pmid:9640603", "pmid:9131013", "pmid:8798682", "pmid:8808600", "pmid:8792815", "pmid:8792813", "pmid:8844091", "pmid:8844089", "pmid:8844077", "pmid:8844068", "pmid:8844065", "pmid:8844064", "pmid:8755928", "pmid:8698331", "pmid:8826482", "pmid:8826479", "pmid:8725793", "pmid:8636996", "pmid:8673086", "pmid:8664297", "pmid:8644711", "pmid:8626781", "pmid:8872026", "pmid:8800930", "pmid:8519769", "pmid:8634688", "pmid:7499428", "pmid:8589687", "pmid:7581460", "pmid:8593539", "pmid:8579216", "pmid:8559749", "pmid:7541938", "pmid:7761473", "pmid:7758107", "pmid:7732383", "pmid:7783163", "pmid:8750357", "pmid:7825564", "pmid:7717734", "pmid:7881407", "pmid:7864047", "pmid:7927336", "pmid:7849707", "pmid:8023854", "pmid:8197163", "pmid:8162055", "pmid:8275089", "pmid:8244331", "pmid:8237919", "pmid:7902319", "pmid:7692601", "pmid:8242066", "pmid:8334699", "pmid:1642231", "pmid:1605199"] }, { From 5ef9f5fa69728f97c27e109def6d961b2d652abb Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Wed, 10 Jun 2026 13:04:31 -0600 Subject: [PATCH 18/29] Fixing space in reference --- data/STRchive-loci.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 482f831c..1be58096 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -1374,7 +1374,7 @@ "disease": "Progressive Myoclonic Epilepsy and Developmental and Epileptic Encephalopathy", "inheritance": ["AR"], "association_type": ["Mendelian"], - "disease_description": "Progressive myoclonic epilepsy is a heterogeneous neurodegenerative disorder characterized by early-onset myoclonus, epilepsy, generalized tonic-clonic seizures, and progressive neurological deterioration [@pmid:40751262]. It has also been proposed that this locus is also associated with developmental and epileptic encephalopathies [@pmid: 39107278]. We hypothesize that the two diseases may make up an expressivity or phenotypic spectrum and/or that hypermethylation causes changes in onset and severity.", + "disease_description": "Progressive myoclonic epilepsy is a heterogeneous neurodegenerative disorder characterized by early-onset myoclonus, epilepsy, generalized tonic-clonic seizures, and progressive neurological deterioration [@pmid:40751262]. It has also been proposed that this locus is also associated with developmental and epileptic encephalopathies [@pmid:39107278]. We hypothesize that the two diseases may make up an expressivity or phenotypic spectrum and/or that hypermethylation causes changes in onset and severity.", "hpo_terms": ["HP:0001336 Myoclonous", "HP:0001251 Ataxia", "HP:0002080 Intention Tremor", "HP:0007000 Morning Myoclonic Jerks", "HP:0001260 Dysarthia", "HP:0001249 Intellectual Disability", "HP:0002392 EEG with Polyspike Wave Complexes", "HP:0002070 Limb Ataxia", "HP:0000726 Dementia", "HP:0000992 Cutaneous Photosensitivity"], "prevalence": null, "prevalence_details": "EPM found in one Azerbaijani proband [@pmid:40751262] and DEE found in two additional patients [@pmid:39107278]. This expansion has been reported in unaffected individuals [@pmid:40751262].", @@ -1427,7 +1427,7 @@ "webstr_hg19": [], "locus_tags": [], "disease_tags": ["epilepsy"], - "references": ["pmid:40751262", "pmid 39107278"], + "references": ["pmid:40751262", "pmid:39107278"], "additional_literature": [] }, { From 4d5012bd4bd3068f12785dcec08e64bb9d6f721c Mon Sep 17 00:00:00 2001 From: gaberbz <182678422+gaberbz@users.noreply.github.com> Date: Wed, 10 Jun 2026 19:11:32 +0000 Subject: [PATCH 19/29] Update data --- data/STRchive-citations.json | 148 ++++++++++++++++++++++++++++++++--- data/STRchive-loci.json | 2 +- 2 files changed, 138 insertions(+), 12 deletions(-) diff --git a/data/STRchive-citations.json b/data/STRchive-citations.json index 774053af..add0bca7 100644 --- a/data/STRchive-citations.json +++ b/data/STRchive-citations.json @@ -163302,21 +163302,130 @@ "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.malacards.org/card/KNS007']' timed out after 3 seconds" }, { - "id": "genereviews:NBK1384", - "manubot_success": false, - "link": "https://www.ncbi.nlm.nih.gov/books/NBK1384", - "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.ncbi.nlm.nih.gov/books/NBK1384']' timed out after 3 seconds" + "id": "pmid:40751262", + "manubot_success": true, + "link": "https://www.ncbi.nlm.nih.gov/pubmed/40751262", + "title": "A CGG Repeat Expansion in CSNK1E Associated with Progressive Myoclonic Epilepsy with Incomplete Penetrance.", + "type": "article-journal", + "doi": "10.1002/mds.30326", + "authors": [ + ["Fulya", "Ak\u00e7imen"], + ["Pilar", "Alvarez Jerez"], + ["Ulviyya", "Guliyeva"], + ["Jasmine", "Lee"], + ["Laksh", "Malik"], + ["Breeana", "Baker"], + ["Kamran", "Salayev"], + ["Sughra", "Guliyeva"], + ["Kimberley J", "Billingsley"], + ["Henry", "Houlden"], + ["Andrew B", "Singleton"], + ["Cornelis", "Blauwendraat"], + ["Sara", "Bandres-Ciga"], + ["Rauan", "Kaiyrzhanov"] + ], + "publisher": "Movement disorders : official journal of the Movement Disorder Society", + "issn": "1531-8257", + "date": "2025-08-01", + "abstract": "Progressive myoclonic epilepsy is a heterogeneous neurodegenerative disorder characterized by early-onset myoclonus, epilepsy, generalized tonic-clonic seizures, and progressive neurological deterioration. Recently, a CGG repeat expansion and increased CSNK1E DNA methylation have been shown to be associated with developmental and epileptic encephalopathies.", + "language": "en", + "note": "This CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: pubmed:40751262" }, { - "id": "genereviews:NBK1305", - "manubot_success": false, - "link": "https://www.ncbi.nlm.nih.gov/books/NBK1305", - "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.ncbi.nlm.nih.gov/books/NBK1305']' timed out after 3 seconds" + "id": "pmid:39107278", + "manubot_success": true, + "link": "https://www.ncbi.nlm.nih.gov/pubmed/39107278", + "title": "Diagnostic utility of DNA methylation analysis in genetically unsolved pediatric epilepsies and CHD2 episignature refinement.", + "type": "article-journal", + "doi": "10.1038/s41467-024-50159-6", + "authors": [ + ["Christy W", "LaFlamme"], + ["Cassandra", "Rastin"], + ["Soham", "Sengupta"], + ["Helen E", "Pennington"], + ["Sophie J", "Russ-Hall"], + ["Amy L", "Schneider"], + ["Emily S", "Bonkowski"], + ["Edith P", "Almanza Fuerte"], + ["Talia J", "Allan"], + ["Miranda Perez-Galey", "Zalusky"], + ["Joy", "Goffena"], + ["Sophia B", "Gibson"], + ["Denis M", "Nyaga"], + ["Nico", "Lieffering"], + ["Malavika", "Hebbar"], + ["Emily V", "Walker"], + ["Daniel", "Darnell"], + ["Scott R", "Olsen"], + ["Pandurang", "Kolekar"], + ["Mohamed Nadhir", "Djekidel"], + ["Wojciech", "Rosikiewicz"], + ["Haley", "McConkey"], + ["Jennifer", "Kerkhof"], + ["Michael A", "Levy"], + ["Raissa", "Relator"], + ["Dorit", "Lev"], + ["Tally", "Lerman-Sagie"], + ["Kristen L", "Park"], + ["Marielle", "Alders"], + ["Gerarda", "Cappuccio"], + ["Nicolas", "Chatron"], + ["Leigh", "Demain"], + ["David", "Genevieve"], + ["Gaetan", "Lesca"], + ["Tony", "Roscioli"], + ["Damien", "Sanlaville"], + ["Matthew L", "Tedder"], + ["Sachin", "Gupta"], + ["Elizabeth A", "Jones"], + ["Monika", "Weisz-Hubshman"], + ["Shamika", "Ketkar"], + ["Hongzheng", "Dai"], + ["Kim C", "Worley"], + ["Jill A", "Rosenfeld"], + ["Hsiao-Tuan", "Chao"], + ["Geoffrey", "Neale"], + ["Gemma L", "Carvill"], + ["Zhaoming", "Wang"], + ["Samuel F", "Berkovic"], + ["Lynette G", "Sadleir"], + ["Danny E", "Miller"], + ["Ingrid E", "Scheffer"], + ["Bekim", "Sadikovic"], + ["Heather C", "Mefford"] + ], + "publisher": "Nature communications", + "issn": "2041-1723", + "date": "2024-08-06", + "abstract": "Sequence-based genetic testing identifies causative variants in ~\u200950% of individuals with developmental and epileptic encephalopathies (DEEs). Aberrant changes in DNA methylation are implicated in various neurodevelopmental disorders but remain unstudied in DEEs. We interrogate the diagnostic utility of genome-wide DNA methylation array analysis on peripheral blood samples from 582 individuals with genetically unsolved DEEs. We identify rare differentially methylated regions (DMRs) and explanatory episignatures to uncover causative and candidate genetic etiologies in 12 individuals. Using long-read sequencing, we identify DNA variants underlying rare DMRs, including one balanced translocation, three CG-rich repeat expansions, and four copy number variants. We also identify pathogenic variants associated with episignatures. Finally, we refine the CHD2 episignature using an 850\u2009K methylation array and bisulfite sequencing to investigate potential insights into CHD2 pathophysiology. Our study demonstrates the diagnostic yield of genome-wide DNA methylation analysis to identify causal and candidate variants as 2% (12/582) for unsolved DEE cases.", + "language": "en", + "note": "This CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: pubmed:39107278" }, { - "id": "isbn:978-3-031-66932-3", - "manubot_success": false, - "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'isbn:978-3-031-66932-3']' timed out after 3 seconds" + "id": "pmid:41929128", + "manubot_success": true, + "link": "https://www.ncbi.nlm.nih.gov/pubmed/41929128", + "title": "Muscleblind-like proteins dimerize by forming disulfide bonds to regulate alternative splicing and pathogenic RNA foci formation.", + "type": "article-journal", + "doi": "10.64898/2026.03.24.714019", + "authors": [ + ["Luke A", "Knudson"], + ["Adam", "Kosti"], + ["Kathryn R", "Moss"], + ["Liang", "Shi"], + ["GiaLinh N", "Nguyen"], + ["Aleksandra", "Janusz-Kaminska"], + ["Eric X", "Zhou"], + ["Ryan P", "Hildebrandt"], + ["Eric T", "Wang"], + ["Gary J", "Bassell"] + ], + "publisher": "bioRxiv : the preprint server for biology", + "issn": "2692-8205", + "date": "2026-03-26", + "abstract": "Muscleblind-like (MBNL) RNA-binding proteins (RBPs) possess modular domains that mediate regulation of alternative splicing and RNA localization. Myotonic Dystrophy Type 1 is a CTG repeat expansion disorder where MBNL is sequestered into intranuclear RNA foci, impairing its function. Previous studies found that MBNL self-associates through its exon 7, but the nature of this interaction is not well understood. We identified a cysteine in MBNL1 exon 7 that enables dimerization through formation of an intermolecular disulfide bond. We likewise demonstrate that MBNL2 dimerizes by forming disulfide bonds between multiple cysteines in its carboxy-terminus. Nucleocytoplasmic fractionation revealed a greater proportion of MBNL1 dimer in the nucleus, suggesting a nuclear function for the MBNL1 dimer. We investigated a connection between MBNL1 dimerization and MBNL1-mediated regulation of alternative splicing. To accomplish this, we mutated the MBNL1 cysteine in question to alanine (C325A) and performed RNAseq. We uncovered novel splicing events sensitive to MBNL1 dimerization. We also found that MBNL1 C325A, when co-expressed with expanded CTG repeats, produces smaller, more numerous foci, suggesting a role for the MBNL1 dimer in maintaining foci integrity. These results provide insight into biological and pathological mechanisms of MBNL1 dimerization and suggest other RBPs might similarly dimerize to regulate function.", + "language": "en", + "note": "This CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: pubmed:41929128" }, { "id": "pmid:25101480", @@ -163341,4 +163450,21 @@ "manubot_success": false, "link": "https://pubmed.ncbi.nlm.nih.gov/39666847", "note": "WARNING: Couldn't parse Manubot response: list index out of range" +}, +{ + "id": "genereviews:NBK1384", + "manubot_success": false, + "link": "https://www.ncbi.nlm.nih.gov/books/NBK1384", + "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.ncbi.nlm.nih.gov/books/NBK1384']' timed out after 3 seconds" +}, +{ + "id": "genereviews:NBK1305", + "manubot_success": false, + "link": "https://www.ncbi.nlm.nih.gov/books/NBK1305", + "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.ncbi.nlm.nih.gov/books/NBK1305']' timed out after 3 seconds" +}, +{ + "id": "isbn:978-3-031-66932-3", + "manubot_success": false, + "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'isbn:978-3-031-66932-3']' timed out after 3 seconds" }] \ No newline at end of file diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 1be58096..94801fe9 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -2041,7 +2041,7 @@ "webstr_hg19": ["Expansion_FXS/FMR1"], "locus_tags": ["somatic_instability", "anticipation", "maternal_expansion", "length_affects_onset", "length_affects_penetrance", "length_affects_phenotype", "length_affects_severity", "motif_affects_instability"], "disease_tags": ["phenotypic_spectrum", "ataxia"], - "references": ["genereviews:NBK1384", "url:https://www.uptodate.com/contents/fragile-x-syndrome-clinical-features-and-diagnosis-in-children-and-adolescents", "pmid:17427188", "isbn:978-3-031-66932-3", "pmid:16205714", "pmid:36169768", "pmid:41507195", "pmid:32463542", "pmid:29868108", "pmid:41555826", "pmid:7987398", "pmid:24700618", "pmid:29100084", "pmid:39320553", "pmid:1605194", "pmid:1710175", "mondo:0010383", "mondo:0010706", "mondo:0010382", "pmid:41929128", "pmid:29100084", "pmid:12805114", "pmid:35245110", "pmid:35053321"], + "references": ["genereviews:NBK1384", "url:https://www.uptodate.com/contents/fragile-x-syndrome-clinical-features-and-diagnosis-in-children-and-adolescents", "pmid:17427188", "isbn:978-3-031-66932-3", "pmid:16205714", "pmid:36169768", "pmid:41507195", "pmid:32463542", "pmid:29868108", "pmid:41555826", "pmid:7987398", "pmid:24700618", "pmid:29100084", "pmid:39320553", "pmid:1605194", "pmid:1710175", "mondo:0010383", "mondo:0010706", "mondo:0010382", "pmid:41929128", "pmid:12805114", "pmid:35245110", "pmid:35053321"], "additional_literature": ["pmid:42041789", "pmid:42001465", "pmid:41952192", "pmid:41929501", "pmid:41917775", "pmid:41806827", "pmid:41792844", "pmid:41777701", "pmid:41762523", "pmid:41717020", "pmid:41672630", "pmid:41648852", "pmid:41557506", "pmid:41523206", "pmid:41514368", "pmid:41409170", "pmid:41386846", "pmid:41385812", "pmid:41372183", "pmid:41351347", "pmid:41278766", "pmid:41256123", "pmid:41167304", "pmid:41145158", "pmid:41120736", "pmid:41098569", "pmid:41074692", "pmid:41028987", "pmid:41015363", "pmid:40980401", "pmid:40940631", "pmid:40877251", "pmid:40869951", "pmid:40879637", "pmid:40778130", "pmid:40653294", "pmid:40600017", "pmid:40534679", "pmid:40488180", "pmid:40480633", "pmid:40459253", "pmid:40455869", "pmid:40418066", "pmid:40417743", "pmid:40296143", "pmid:40287634", "pmid:40244008", "pmid:40243429", "pmid:40243408", "pmid:40220918", "pmid:40166285", "pmid:40149430", "pmid:40141467", "pmid:40141297", "pmid:39945490", "pmid:39934227", "pmid:39839505", "pmid:39684429", "pmid:39654947", "pmid:39588919", "pmid:39574643", "pmid:39553953", "pmid:39492694", "pmid:39482338", "pmid:39488698", "pmid:39095619", "pmid:38997701", "pmid:38961870", "pmid:38946987", "pmid:38865241", "pmid:38772058", "pmid:38714961", "pmid:38522837", "pmid:38412259", "pmid:38307002", "pmid:38164622", "pmid:38162443", "pmid:38134876", "pmid:37970883", "pmid:37936174", "pmid:37906407", "pmid:37776526", "pmid:37745859", "pmid:37628570", "pmid:37583466", "pmid:37551886", "pmid:37551173", "pmid:37508562", "pmid:37364131", "pmid:37352983", "pmid:37347418", "pmid:37333274", "pmid:37209683", "pmid:37200782", "pmid:37146135", "pmid:37120588", "pmid:36882476", "pmid:36816716", "pmid:36250920", "pmid:36227727", "pmid:36012355", "pmid:35977823", "pmid:35948990", "pmid:35904811", "pmid:35729184", "pmid:35701103", "pmid:35681093", "pmid:35609145", "pmid:35182509", "pmid:35152460", "pmid:35129870", "pmid:35101584", "pmid:35072235", "pmid:35038595", "pmid:35026985", "pmid:34938155", "pmid:34926684", "pmid:34924936", "pmid:34880790", "pmid:34845661", "pmid:34828275", "pmid:34738199", "pmid:34690787", "pmid:34679478", "pmid:34646309", "pmid:34641814", "pmid:34641644", "pmid:34542254", "pmid:34456771", "pmid:34421690", "pmid:34372915", "pmid:34358321", "pmid:34321326", "pmid:34296199", "pmid:34276797", "pmid:34193467", "pmid:34153466", "pmid:34117786", "pmid:34111553", "pmid:34077515", "pmid:34054431", "pmid:34046842", "pmid:33998336", "pmid:33856019", "pmid:33854084", "pmid:33772546", "pmid:33709078", "pmid:33692361", "pmid:33642901", "pmid:33627639", "pmid:33585555", "pmid:33523882", "pmid:33497798", "pmid:33381520", "pmid:33374331", "pmid:33296661", "pmid:33195422", "pmid:33181255", "pmid:33151065", "pmid:33039683", "pmid:33008014", "pmid:33007370", "pmid:32787884", "pmid:32716213", "pmid:32695777", "pmid:32688058", "pmid:32589669", "pmid:32478017", "pmid:32446918", "pmid:32281281", "pmid:32258228", "pmid:32089525", "pmid:32066985", "pmid:32048109", "pmid:32012997", "pmid:31991700", "pmid:31989181", "pmid:31891607", "pmid:31887710", "pmid:31880363", "pmid:31866572", "pmid:31804632", "pmid:31733943", "pmid:31671347", "pmid:31665086", "pmid:31632248", "pmid:31586346", "pmid:31566610", "pmid:31512951", "pmid:31481131", "pmid:31468394", "pmid:31347257", "pmid:31336350", "pmid:31332380", "pmid:31299981", "pmid:31294106", "pmid:31264835", "pmid:31159589", "pmid:31096929", "pmid:31026518", "pmid:30984240", "pmid:30900173", "pmid:30847793", "pmid:30840878", "pmid:30832215", "pmid:30808398", "pmid:30665341", "pmid:30619448", "pmid:30606610", "pmid:30576349", "pmid:30567555", "pmid:30566867", "pmid:30538724", "pmid:30509972", "pmid:30396881", "pmid:30396281", "pmid:30312299", "pmid:30311737", "pmid:30211570", "pmid:30197656", "pmid:30173918", "pmid:30160796", "pmid:30158855", "pmid:30147707", "pmid:30030199", "pmid:29990673", "pmid:29981579", "pmid:29971092", "pmid:29880767", "pmid:29844802", "pmid:29766042", "pmid:29760651", "pmid:29379561", "pmid:29375310", "pmid:29316893", "pmid:29275276", "pmid:29267266", "pmid:29209628", "pmid:29188551", "pmid:28967713", "pmid:28895261", "pmid:28829283", "pmid:28815939", "pmid:28812997", "pmid:28697590", "pmid:28504725", "pmid:28454580", "pmid:28391068", "pmid:28369393", "pmid:28278294", "pmid:28233916", "pmid:28193118", "pmid:28173181", "pmid:28103472", "pmid:28065649", "pmid:28005950", "pmid:27883256", "pmid:27862088", "pmid:27841182", "pmid:27841172", "pmid:27840045", "pmid:27822316", "pmid:27816231", "pmid:27784894", "pmid:27646161", "pmid:27768763", "pmid:27761921", "pmid:27667322", "pmid:27616423", "pmid:27713816", "pmid:27708271", "pmid:27696642", "pmid:27696273", "pmid:27695335", "pmid:27540028", "pmid:27427765", "pmid:27375073", "pmid:27372099", "pmid:27355815", "pmid:27355445", "pmid:27335370", "pmid:27315125", "pmid:27294193", "pmid:27066582", "pmid:27042357", "pmid:27041225", "pmid:27001315", "pmid:26940792", "pmid:26825750", "pmid:26743003", "pmid:26716517", "pmid:26694146", "pmid:26554012", "pmid:26537920", "pmid:26463479", "pmid:26440889", "pmid:26420841", "pmid:26345686", "pmid:26322075", "pmid:26298472", "pmid:26194536", "pmid:26099177", "pmid:26029703", "pmid:25954027", "pmid:25953684", "pmid:25886163", "pmid:25875842", "pmid:25788698", "pmid:25776194", "pmid:25763861", "pmid:25726753", "pmid:25693964", "pmid:25689687", "pmid:25606365", "pmid:25436181", "pmid:25399540", "pmid:25366135", "pmid:25346430", "pmid:25290064", "pmid:25278957", "pmid:25250047", "pmid:25179629", "pmid:25171808", "pmid:25170346", "pmid:25147555", "pmid:25110527", "pmid:25093044", "pmid:25085749", "pmid:25013385", "pmid:24998620", "pmid:24963073", "pmid:24958193", "pmid:24938362", "pmid:24920338", "pmid:24912415", "pmid:24875778", "pmid:24858908", "pmid:24821701", "pmid:24816393", "pmid:24814676", "pmid:24787137", "pmid:24763282", "pmid:24743386", "pmid:24718368", "pmid:24715853", "pmid:24657592", "pmid:24654675", "pmid:24630283", "pmid:24591415", "pmid:24578575", "pmid:24463622", "pmid:24455203", "pmid:24448548", "pmid:24428240", "pmid:24424424", "pmid:24401315", "pmid:24352881", "pmid:24289922", "pmid:24261641", "pmid:24249225", "pmid:24177047", "pmid:24130133", "pmid:23949867", "pmid:23896050", "pmid:23792063", "pmid:23753897", "pmid:23731704", "pmid:23719910", "pmid:23683082", "pmid:23602499", "pmid:23574351", "pmid:23553633", "pmid:23497562", "pmid:23478018", "pmid:23464607", "pmid:23440729", "pmid:23250915", "pmid:23198693", "pmid:23148490", "pmid:23146966", "pmid:23015788", "pmid:22924671", "pmid:22918986", "pmid:22887750", "pmid:22796595", "pmid:22707411", "pmid:22612820", "pmid:22619118", "pmid:22581803", "pmid:22507827", "pmid:22498846", "pmid:22489017", "pmid:22466801", "pmid:22427040", "pmid:22393900", "pmid:22387066", "pmid:22311273", "pmid:22251309", "pmid:22241100", "pmid:22224633", "pmid:22223546", "pmid:22211843", "pmid:22177572", "pmid:22161987", "pmid:22149120", "pmid:22022567", "pmid:22004265", "pmid:21944929", "pmid:21808616", "pmid:21807882", "pmid:21775729", "pmid:21767618", "pmid:21720528", "pmid:21671049", "pmid:21646280", "pmid:21636656", "pmid:21617890", "pmid:21596781", "pmid:21572337", "pmid:21567456", "pmid:21499798", "pmid:21476992", "pmid:21445959", "pmid:21430544", "pmid:21389081", "pmid:21329465", "pmid:21270637", "pmid:21267007", "pmid:21254876", "pmid:21170301", "pmid:21051337", "pmid:20938029", "pmid:20935171", "pmid:20858229", "pmid:20801083", "pmid:20799337", "pmid:20736975", "pmid:20702130", "pmid:20616364", "pmid:20431035", "pmid:20425835", "pmid:20410144", "pmid:20364100", "pmid:20221430", "pmid:20213777", "pmid:20168238", "pmid:20118148", "pmid:20051238", "pmid:20011099", "pmid:20001115", "pmid:19927162", "pmid:19846466", "pmid:19804849", "pmid:19796183", "pmid:19778484", "pmid:19760650", "pmid:19710035", "pmid:19684044", "pmid:19562332", "pmid:19525339", "pmid:19514725", "pmid:19460939", "pmid:19460937", "pmid:19440516", "pmid:19404994", "pmid:19204162", "pmid:19097038", "pmid:19045956", "pmid:19026394", "pmid:19014369", "pmid:18565783", "pmid:18563710", "pmid:18553360", "pmid:18535897", "pmid:18472227", "pmid:18428348", "pmid:18413472", "pmid:18403614", "pmid:18384775", "pmid:18373410", "pmid:18357616", "pmid:18310361", "pmid:18281036", "pmid:18165971", "pmid:18165276", "pmid:18160412", "pmid:18057320", "pmid:17724287", "pmid:17698009", "pmid:17674408", "pmid:17635840", "pmid:17591512", "pmid:17516099", "pmid:17442505", "pmid:17322660", "pmid:17295053", "pmid:17279084", "pmid:17266074", "pmid:17152065", "pmid:17150213", "pmid:17089161", "pmid:17044853", "pmid:16780889", "pmid:16761284", "pmid:16361284", "pmid:16337617", "pmid:16258159", "pmid:16164596", "pmid:16047092", "pmid:15971024", "pmid:15947063", "pmid:15876460", "pmid:15811008", "pmid:15741991", "pmid:15659577", "pmid:15649335", "pmid:15629215", "pmid:15483045", "pmid:15377638", "pmid:15302914", "pmid:15068386", "pmid:14755444", "pmid:14735162", "pmid:14722156", "pmid:14560307", "pmid:14519687", "pmid:14517952", "pmid:12948442", "pmid:12905066", "pmid:12853612", "pmid:12681986", "pmid:12659659", "pmid:12634803", "pmid:12515381", "pmid:12379314", "pmid:12232854", "pmid:12160728", "pmid:12111644", "pmid:11992571", "pmid:11886710", "pmid:11807410", "pmid:11551101", "pmid:11499669", "pmid:11487573", "pmid:11410685", "pmid:11256870", "pmid:11229516", "pmid:11142761", "pmid:11142752", "pmid:11119302", "pmid:11097353", "pmid:11073538", "pmid:11070156", "pmid:11005143", "pmid:10995510", "pmid:10987654", "pmid:10941804", "pmid:10915764", "pmid:10870330", "pmid:10855793", "pmid:10780779", "pmid:10773084", "pmid:10710419", "pmid:10674158", "pmid:10631132", "pmid:10587583", "pmid:10567518", "pmid:10545610", "pmid:10521303", "pmid:10462618", "pmid:10447261", "pmid:10445321", "pmid:10424820", "pmid:10409756", "pmid:10369109", "pmid:10331601", "pmid:10331600", "pmid:10204857", "pmid:9916838", "pmid:9856500", "pmid:9811938", "pmid:9806479", "pmid:9792200", "pmid:9761677", "pmid:9738717", "pmid:9653650", "pmid:9624140", "pmid:9630071", "pmid:9604772", "pmid:9603608", "pmid:9529778", "pmid:9485421", "pmid:9514250", "pmid:9507388", "pmid:9415473", "pmid:9437788", "pmid:9399905", "pmid:9358013", "pmid:9341861", "pmid:9382110", "pmid:9299309", "pmid:9279752", "pmid:9254854", "pmid:9207038", "pmid:9201980", "pmid:9195158", "pmid:9640603", "pmid:9131013", "pmid:8798682", "pmid:8808600", "pmid:8792815", "pmid:8792813", "pmid:8844091", "pmid:8844089", "pmid:8844077", "pmid:8844068", "pmid:8844065", "pmid:8844064", "pmid:8755928", "pmid:8698331", "pmid:8826482", "pmid:8826479", "pmid:8725793", "pmid:8636996", "pmid:8673086", "pmid:8664297", "pmid:8644711", "pmid:8626781", "pmid:8872026", "pmid:8800930", "pmid:8519769", "pmid:8634688", "pmid:7499428", "pmid:8589687", "pmid:7581460", "pmid:8593539", "pmid:8579216", "pmid:8559749", "pmid:7541938", "pmid:7761473", "pmid:7758107", "pmid:7732383", "pmid:7783163", "pmid:8750357", "pmid:7825564", "pmid:7717734", "pmid:7881407", "pmid:7864047", "pmid:7927336", "pmid:7849707", "pmid:8023854", "pmid:8197163", "pmid:8162055", "pmid:8275089", "pmid:8244331", "pmid:8237919", "pmid:7902319", "pmid:7692601", "pmid:8242066", "pmid:8334699", "pmid:1642231", "pmid:1605199"] }, { From f637fb4dc52330b0b2d9343300f1e9e9b970c4c1 Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Wed, 10 Jun 2026 15:34:22 -0600 Subject: [PATCH 20/29] add potential subclinical neuropathy --- data/STRchive-loci.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 94801fe9..0cbc624d 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -3297,7 +3297,7 @@ "disease": "Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6", "inheritance": ["AD"], "association_type": ["Mendelian"], - "disease_description": "Neuronal intranuclear inclusion disease (NIID) is a very rare multisystem neurodegenerative disorder characterized by the presence of eosinophilic intranuclear inclusions in neuronal and glial cells, and neuronal loss [@mondo:0011327]. Often presents with gastrointestinal symptoms, including chronic refractory nausea, which can precede neurologic manifestations by decades in one documented case [pmid:41929501]. Renal, bladder, and other visceral organ involvement have been reported and may occasionally precede neurological symptoms [@pmid:42058219; @pmid:42005169]. Due to overlapping phenotypes and the shared locus, it is unclear whether these four diseases are comorbid, synonymous, or entirely separate.", + "disease_description": "Neuronal intranuclear inclusion disease (NIID) is a very rare multisystem neurodegenerative disorder characterized by the presence of eosinophilic intranuclear inclusions in neuronal and glial cells, and neuronal loss [@mondo:0011327]. Often presents with gastrointestinal symptoms, including chronic refractory nausea, which can precede neurologic manifestations by decades in one documented case [pmid:41929501]. Renal, bladder, and other visceral organ involvement have been reported and may occasionally precede neurological symptoms [@pmid:42058219; @pmid:42005169]. Subclinical peripheral neuropathy has been reported in NOTCH2NLC related NIID [@pmid:42001002]. Due to overlapping phenotypes and the shared locus, it is unclear whether these four diseases are comorbid, synonymous, or entirely separate.", "hpo_terms": [], "prevalence": null, "prevalence_details": ">400 patients reported in literature [@pmid:37371433]. Found in individuals of East Asian ancestry [@pmid:38876750].", From a2a7955914c6883aaeb0ef694d024f8a79d0e969 Mon Sep 17 00:00:00 2001 From: gaberbz <182678422+gaberbz@users.noreply.github.com> Date: Wed, 10 Jun 2026 21:41:27 +0000 Subject: [PATCH 21/29] Update data --- data/STRchive-citations.json | 274 +++++++++++++++++------------------ 1 file changed, 137 insertions(+), 137 deletions(-) diff --git a/data/STRchive-citations.json b/data/STRchive-citations.json index add0bca7..cc8b95bd 100644 --- a/data/STRchive-citations.json +++ b/data/STRchive-citations.json @@ -163061,6 +163061,132 @@ "language": "en", "note": "This CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: pubmed:41285770" }, +{ + "id": "pmid:40751262", + "manubot_success": true, + "link": "https://www.ncbi.nlm.nih.gov/pubmed/40751262", + "title": "A CGG Repeat Expansion in CSNK1E Associated with Progressive Myoclonic Epilepsy with Incomplete Penetrance.", + "type": "article-journal", + "doi": "10.1002/mds.30326", + "authors": [ + ["Fulya", "Ak\u00e7imen"], + ["Pilar", "Alvarez Jerez"], + ["Ulviyya", "Guliyeva"], + ["Jasmine", "Lee"], + ["Laksh", "Malik"], + ["Breeana", "Baker"], + ["Kamran", "Salayev"], + ["Sughra", "Guliyeva"], + ["Kimberley J", "Billingsley"], + ["Henry", "Houlden"], + ["Andrew B", "Singleton"], + ["Cornelis", "Blauwendraat"], + ["Sara", "Bandres-Ciga"], + ["Rauan", "Kaiyrzhanov"] + ], + "publisher": "Movement disorders : official journal of the Movement Disorder Society", + "issn": "1531-8257", + "date": "2025-08-01", + "abstract": "Progressive myoclonic epilepsy is a heterogeneous neurodegenerative disorder characterized by early-onset myoclonus, epilepsy, generalized tonic-clonic seizures, and progressive neurological deterioration. Recently, a CGG repeat expansion and increased CSNK1E DNA methylation have been shown to be associated with developmental and epileptic encephalopathies.", + "language": "en", + "note": "This CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: pubmed:40751262" +}, +{ + "id": "pmid:39107278", + "manubot_success": true, + "link": "https://www.ncbi.nlm.nih.gov/pubmed/39107278", + "title": "Diagnostic utility of DNA methylation analysis in genetically unsolved pediatric epilepsies and CHD2 episignature refinement.", + "type": "article-journal", + "doi": "10.1038/s41467-024-50159-6", + "authors": [ + ["Christy W", "LaFlamme"], + ["Cassandra", "Rastin"], + ["Soham", "Sengupta"], + ["Helen E", "Pennington"], + ["Sophie J", "Russ-Hall"], + ["Amy L", "Schneider"], + ["Emily S", "Bonkowski"], + ["Edith P", "Almanza Fuerte"], + ["Talia J", "Allan"], + ["Miranda Perez-Galey", "Zalusky"], + ["Joy", "Goffena"], + ["Sophia B", "Gibson"], + ["Denis M", "Nyaga"], + ["Nico", "Lieffering"], + ["Malavika", "Hebbar"], + ["Emily V", "Walker"], + ["Daniel", "Darnell"], + ["Scott R", "Olsen"], + ["Pandurang", "Kolekar"], + ["Mohamed Nadhir", "Djekidel"], + ["Wojciech", "Rosikiewicz"], + ["Haley", "McConkey"], + ["Jennifer", "Kerkhof"], + ["Michael A", "Levy"], + ["Raissa", "Relator"], + ["Dorit", "Lev"], + ["Tally", "Lerman-Sagie"], + ["Kristen L", "Park"], + ["Marielle", "Alders"], + ["Gerarda", "Cappuccio"], + ["Nicolas", "Chatron"], + ["Leigh", "Demain"], + ["David", "Genevieve"], + ["Gaetan", "Lesca"], + ["Tony", "Roscioli"], + ["Damien", "Sanlaville"], + ["Matthew L", "Tedder"], + ["Sachin", "Gupta"], + ["Elizabeth A", "Jones"], + ["Monika", "Weisz-Hubshman"], + ["Shamika", "Ketkar"], + ["Hongzheng", "Dai"], + ["Kim C", "Worley"], + ["Jill A", "Rosenfeld"], + ["Hsiao-Tuan", "Chao"], + ["Geoffrey", "Neale"], + ["Gemma L", "Carvill"], + ["Zhaoming", "Wang"], + ["Samuel F", "Berkovic"], + ["Lynette G", "Sadleir"], + ["Danny E", "Miller"], + ["Ingrid E", "Scheffer"], + ["Bekim", "Sadikovic"], + ["Heather C", "Mefford"] + ], + "publisher": "Nature communications", + "issn": "2041-1723", + "date": "2024-08-06", + "abstract": "Sequence-based genetic testing identifies causative variants in ~\u200950% of individuals with developmental and epileptic encephalopathies (DEEs). Aberrant changes in DNA methylation are implicated in various neurodevelopmental disorders but remain unstudied in DEEs. We interrogate the diagnostic utility of genome-wide DNA methylation array analysis on peripheral blood samples from 582 individuals with genetically unsolved DEEs. We identify rare differentially methylated regions (DMRs) and explanatory episignatures to uncover causative and candidate genetic etiologies in 12 individuals. Using long-read sequencing, we identify DNA variants underlying rare DMRs, including one balanced translocation, three CG-rich repeat expansions, and four copy number variants. We also identify pathogenic variants associated with episignatures. Finally, we refine the CHD2 episignature using an 850\u2009K methylation array and bisulfite sequencing to investigate potential insights into CHD2 pathophysiology. Our study demonstrates the diagnostic yield of genome-wide DNA methylation analysis to identify causal and candidate variants as 2% (12/582) for unsolved DEE cases.", + "language": "en", + "note": "This CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: pubmed:39107278" +}, +{ + "id": "pmid:41929128", + "manubot_success": true, + "link": "https://www.ncbi.nlm.nih.gov/pubmed/41929128", + "title": "Muscleblind-like proteins dimerize by forming disulfide bonds to regulate alternative splicing and pathogenic RNA foci formation.", + "type": "article-journal", + "doi": "10.64898/2026.03.24.714019", + "authors": [ + ["Luke A", "Knudson"], + ["Adam", "Kosti"], + ["Kathryn R", "Moss"], + ["Liang", "Shi"], + ["GiaLinh N", "Nguyen"], + ["Aleksandra", "Janusz-Kaminska"], + ["Eric X", "Zhou"], + ["Ryan P", "Hildebrandt"], + ["Eric T", "Wang"], + ["Gary J", "Bassell"] + ], + "publisher": "bioRxiv : the preprint server for biology", + "issn": "2692-8205", + "date": "2026-03-26", + "abstract": "Muscleblind-like (MBNL) RNA-binding proteins (RBPs) possess modular domains that mediate regulation of alternative splicing and RNA localization. Myotonic Dystrophy Type 1 is a CTG repeat expansion disorder where MBNL is sequestered into intranuclear RNA foci, impairing its function. Previous studies found that MBNL self-associates through its exon 7, but the nature of this interaction is not well understood. We identified a cysteine in MBNL1 exon 7 that enables dimerization through formation of an intermolecular disulfide bond. We likewise demonstrate that MBNL2 dimerizes by forming disulfide bonds between multiple cysteines in its carboxy-terminus. Nucleocytoplasmic fractionation revealed a greater proportion of MBNL1 dimer in the nucleus, suggesting a nuclear function for the MBNL1 dimer. We investigated a connection between MBNL1 dimerization and MBNL1-mediated regulation of alternative splicing. To accomplish this, we mutated the MBNL1 cysteine in question to alanine (C325A) and performed RNAseq. We uncovered novel splicing events sensitive to MBNL1 dimerization. We also found that MBNL1 C325A, when co-expressed with expanded CTG repeats, produces smaller, more numerous foci, suggesting a role for the MBNL1 dimer in maintaining foci integrity. These results provide insight into biological and pathological mechanisms of MBNL1 dimerization and suggest other RBPs might similarly dimerize to regulate function.", + "language": "en", + "note": "This CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: pubmed:41929128" +}, { "id": "omim:309548", "manubot_success": false, @@ -163302,130 +163428,21 @@ "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.malacards.org/card/KNS007']' timed out after 3 seconds" }, { - "id": "pmid:40751262", - "manubot_success": true, - "link": "https://www.ncbi.nlm.nih.gov/pubmed/40751262", - "title": "A CGG Repeat Expansion in CSNK1E Associated with Progressive Myoclonic Epilepsy with Incomplete Penetrance.", - "type": "article-journal", - "doi": "10.1002/mds.30326", - "authors": [ - ["Fulya", "Ak\u00e7imen"], - ["Pilar", "Alvarez Jerez"], - ["Ulviyya", "Guliyeva"], - ["Jasmine", "Lee"], - ["Laksh", "Malik"], - ["Breeana", "Baker"], - ["Kamran", "Salayev"], - ["Sughra", "Guliyeva"], - ["Kimberley J", "Billingsley"], - ["Henry", "Houlden"], - ["Andrew B", "Singleton"], - ["Cornelis", "Blauwendraat"], - ["Sara", "Bandres-Ciga"], - ["Rauan", "Kaiyrzhanov"] - ], - "publisher": "Movement disorders : official journal of the Movement Disorder Society", - "issn": "1531-8257", - "date": "2025-08-01", - "abstract": "Progressive myoclonic epilepsy is a heterogeneous neurodegenerative disorder characterized by early-onset myoclonus, epilepsy, generalized tonic-clonic seizures, and progressive neurological deterioration. Recently, a CGG repeat expansion and increased CSNK1E DNA methylation have been shown to be associated with developmental and epileptic encephalopathies.", - "language": "en", - "note": "This CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: pubmed:40751262" + "id": "genereviews:NBK1384", + "manubot_success": false, + "link": "https://www.ncbi.nlm.nih.gov/books/NBK1384", + "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.ncbi.nlm.nih.gov/books/NBK1384']' timed out after 3 seconds" }, { - "id": "pmid:39107278", - "manubot_success": true, - "link": "https://www.ncbi.nlm.nih.gov/pubmed/39107278", - "title": "Diagnostic utility of DNA methylation analysis in genetically unsolved pediatric epilepsies and CHD2 episignature refinement.", - "type": "article-journal", - "doi": "10.1038/s41467-024-50159-6", - "authors": [ - ["Christy W", "LaFlamme"], - ["Cassandra", "Rastin"], - ["Soham", "Sengupta"], - ["Helen E", "Pennington"], - ["Sophie J", "Russ-Hall"], - ["Amy L", "Schneider"], - ["Emily S", "Bonkowski"], - ["Edith P", "Almanza Fuerte"], - ["Talia J", "Allan"], - ["Miranda Perez-Galey", "Zalusky"], - ["Joy", "Goffena"], - ["Sophia B", "Gibson"], - ["Denis M", "Nyaga"], - ["Nico", "Lieffering"], - ["Malavika", "Hebbar"], - ["Emily V", "Walker"], - ["Daniel", "Darnell"], - ["Scott R", "Olsen"], - ["Pandurang", "Kolekar"], - ["Mohamed Nadhir", "Djekidel"], - ["Wojciech", "Rosikiewicz"], - ["Haley", "McConkey"], - ["Jennifer", "Kerkhof"], - ["Michael A", "Levy"], - ["Raissa", "Relator"], - ["Dorit", "Lev"], - ["Tally", "Lerman-Sagie"], - ["Kristen L", "Park"], - ["Marielle", "Alders"], - ["Gerarda", "Cappuccio"], - ["Nicolas", "Chatron"], - ["Leigh", "Demain"], - ["David", "Genevieve"], - ["Gaetan", "Lesca"], - ["Tony", "Roscioli"], - ["Damien", "Sanlaville"], - ["Matthew L", "Tedder"], - ["Sachin", "Gupta"], - ["Elizabeth A", "Jones"], - ["Monika", "Weisz-Hubshman"], - ["Shamika", "Ketkar"], - ["Hongzheng", "Dai"], - ["Kim C", "Worley"], - ["Jill A", "Rosenfeld"], - ["Hsiao-Tuan", "Chao"], - ["Geoffrey", "Neale"], - ["Gemma L", "Carvill"], - ["Zhaoming", "Wang"], - ["Samuel F", "Berkovic"], - ["Lynette G", "Sadleir"], - ["Danny E", "Miller"], - ["Ingrid E", "Scheffer"], - ["Bekim", "Sadikovic"], - ["Heather C", "Mefford"] - ], - "publisher": "Nature communications", - "issn": "2041-1723", - "date": "2024-08-06", - "abstract": "Sequence-based genetic testing identifies causative variants in ~\u200950% of individuals with developmental and epileptic encephalopathies (DEEs). Aberrant changes in DNA methylation are implicated in various neurodevelopmental disorders but remain unstudied in DEEs. We interrogate the diagnostic utility of genome-wide DNA methylation array analysis on peripheral blood samples from 582 individuals with genetically unsolved DEEs. We identify rare differentially methylated regions (DMRs) and explanatory episignatures to uncover causative and candidate genetic etiologies in 12 individuals. Using long-read sequencing, we identify DNA variants underlying rare DMRs, including one balanced translocation, three CG-rich repeat expansions, and four copy number variants. We also identify pathogenic variants associated with episignatures. Finally, we refine the CHD2 episignature using an 850\u2009K methylation array and bisulfite sequencing to investigate potential insights into CHD2 pathophysiology. Our study demonstrates the diagnostic yield of genome-wide DNA methylation analysis to identify causal and candidate variants as 2% (12/582) for unsolved DEE cases.", - "language": "en", - "note": "This CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: pubmed:39107278" + "id": "genereviews:NBK1305", + "manubot_success": false, + "link": "https://www.ncbi.nlm.nih.gov/books/NBK1305", + "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.ncbi.nlm.nih.gov/books/NBK1305']' timed out after 3 seconds" }, { - "id": "pmid:41929128", - "manubot_success": true, - "link": "https://www.ncbi.nlm.nih.gov/pubmed/41929128", - "title": "Muscleblind-like proteins dimerize by forming disulfide bonds to regulate alternative splicing and pathogenic RNA foci formation.", - "type": "article-journal", - "doi": "10.64898/2026.03.24.714019", - "authors": [ - ["Luke A", "Knudson"], - ["Adam", "Kosti"], - ["Kathryn R", "Moss"], - ["Liang", "Shi"], - ["GiaLinh N", "Nguyen"], - ["Aleksandra", "Janusz-Kaminska"], - ["Eric X", "Zhou"], - ["Ryan P", "Hildebrandt"], - ["Eric T", "Wang"], - ["Gary J", "Bassell"] - ], - "publisher": "bioRxiv : the preprint server for biology", - "issn": "2692-8205", - "date": "2026-03-26", - "abstract": "Muscleblind-like (MBNL) RNA-binding proteins (RBPs) possess modular domains that mediate regulation of alternative splicing and RNA localization. Myotonic Dystrophy Type 1 is a CTG repeat expansion disorder where MBNL is sequestered into intranuclear RNA foci, impairing its function. Previous studies found that MBNL self-associates through its exon 7, but the nature of this interaction is not well understood. We identified a cysteine in MBNL1 exon 7 that enables dimerization through formation of an intermolecular disulfide bond. We likewise demonstrate that MBNL2 dimerizes by forming disulfide bonds between multiple cysteines in its carboxy-terminus. Nucleocytoplasmic fractionation revealed a greater proportion of MBNL1 dimer in the nucleus, suggesting a nuclear function for the MBNL1 dimer. We investigated a connection between MBNL1 dimerization and MBNL1-mediated regulation of alternative splicing. To accomplish this, we mutated the MBNL1 cysteine in question to alanine (C325A) and performed RNAseq. We uncovered novel splicing events sensitive to MBNL1 dimerization. We also found that MBNL1 C325A, when co-expressed with expanded CTG repeats, produces smaller, more numerous foci, suggesting a role for the MBNL1 dimer in maintaining foci integrity. These results provide insight into biological and pathological mechanisms of MBNL1 dimerization and suggest other RBPs might similarly dimerize to regulate function.", - "language": "en", - "note": "This CSL Item was generated by Manubot v0.6.1 from its persistent identifier (standard_id).\nstandard_id: pubmed:41929128" + "id": "isbn:978-3-031-66932-3", + "manubot_success": false, + "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'isbn:978-3-031-66932-3']' timed out after 3 seconds" }, { "id": "pmid:25101480", @@ -163450,21 +163467,4 @@ "manubot_success": false, "link": "https://pubmed.ncbi.nlm.nih.gov/39666847", "note": "WARNING: Couldn't parse Manubot response: list index out of range" -}, -{ - "id": "genereviews:NBK1384", - "manubot_success": false, - "link": "https://www.ncbi.nlm.nih.gov/books/NBK1384", - "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.ncbi.nlm.nih.gov/books/NBK1384']' timed out after 3 seconds" -}, -{ - "id": "genereviews:NBK1305", - "manubot_success": false, - "link": "https://www.ncbi.nlm.nih.gov/books/NBK1305", - "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'url:https://www.ncbi.nlm.nih.gov/books/NBK1305']' timed out after 3 seconds" -}, -{ - "id": "isbn:978-3-031-66932-3", - "manubot_success": false, - "note": "WARNING: Manubot could not generate citation: Command '['manubot', 'cite', 'isbn:978-3-031-66932-3']' timed out after 3 seconds" }] \ No newline at end of file From 1f698c84c3e091e2c89af5b2dd0dd259d0e46859 Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Thu, 11 Jun 2026 10:55:36 -0600 Subject: [PATCH 22/29] add C9orf72 promoter hypermethylation --- data/STRchive-loci.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 0cbc624d..1618d564 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -967,7 +967,7 @@ "age_onset_max": 91.0, "typ_age_onset_min": 50.0, "typ_age_onset_max": 64.0, - "details": "FTD and ALS form a clinical spectrum [@pmid:37388914; @pmid:22406228]. The clinical ranges of the C9orf72 locus remain ambiguous [@stripy:C9ORF72][@pmid:41951733]: most healthy controls have alleles up to 24 repeats [@pmid:28319737] yet 24-30 repeats are associated with ALS [@pmid:31315673] and while 60 repeats is frequently used as a threshold for uncertain alleles, the exact threshold of pathogenicity remains unclear [@genereviews:NBK268647; @pmid:38099605]. Repeats of 80 motifs and lower appear to have delayed onset for any phenotype [@pmid:28319737]. >250 repeats are associated with a full FTD/ALS disease state [@pmid:31048495], but pathogenic alleles can range from 30 to more than 4000 repeats [@pmid:38099605; @pmid:39709476]. Somatic C9orf72 repeat expansions may emerge de novo in CNS tissue from alleles below the pathogenic range, potentially contributing to sporadic ALS/FTD [@pmid:41986690]. Penetrance appears to also be age-dependent, with environmental factors and specific phenotypes associated with sex and age at onset [@pmid:28522837]. Methylation appears to increase with expansion length and age [@pmid:39709476]. ALS caused by repeat expansions in C9orf72 generally has an earlier disease onset and faster progression than other ALS presentations [@pmid:39226712]. C9orf72 expansions have been associated with reduced thalamic volume in undiagnosed carriers, and plasma neurofilament light chain has an approximately linear association with repeat count and motor neuron disease risk [@pmid:41951733; @pmid:42095061]. Pathogenic C9orf72 repeat expansions have also been identified in ambiguous late onset behavioral or psychiatric like presentations [@pmid:42158267]. In the NYGC ALS Consortium WGS cohort, pathogenic C9orf72 repeat expansions were the most frequent ALS-associated mutation. Among classical ALS cases, they represented 9% of ALS cases [@pmid:42145639]. These expansions are usually detected with bidirectional RP-PCR [@pmid:21944778]. Southern blot approximates size [@pmid:23566336], while long-read sequencing can provide more precise information on size and structure [@pmid:30126445].", + "details": "FTD and ALS form a clinical spectrum [@pmid:37388914; @pmid:22406228]. The clinical ranges of the C9orf72 locus remain ambiguous [@stripy:C9ORF72][@pmid:41951733]: most healthy controls have alleles up to 24 repeats [@pmid:28319737] yet 24-30 repeats are associated with ALS [@pmid:31315673] and while 60 repeats is frequently used as a threshold for uncertain alleles, the exact threshold of pathogenicity remains unclear [@genereviews:NBK268647; @pmid:38099605]. Repeats of 80 motifs and lower appear to have delayed onset for any phenotype [@pmid:28319737]. >250 repeats are associated with a full FTD/ALS disease state [@pmid:31048495], but pathogenic alleles can range from 30 to more than 4000 repeats [@pmid:38099605; @pmid:39709476]. Somatic C9orf72 repeat expansions may emerge de novo in CNS tissue from alleles below the pathogenic range, potentially contributing to sporadic ALS/FTD [@pmid:41986690]. Penetrance appears to also be age-dependent, with environmental factors and specific phenotypes associated with sex and age at onset [@pmid:28522837]. Methylation appears to increase with expansion length and age [@pmid:39709476], and C9orf72 promoter hypermethylation has been observed in expansion carriers [@pmid:42222887]. ALS caused by repeat expansions in C9orf72 generally has an earlier disease onset and faster progression than other ALS presentations [@pmid:39226712]. C9orf72 expansions have been associated with reduced thalamic volume in undiagnosed carriers, and plasma neurofilament light chain has an approximately linear association with repeat count and motor neuron disease risk [@pmid:41951733; @pmid:42095061]. Pathogenic C9orf72 repeat expansions have also been identified in ambiguous late onset behavioral or psychiatric like presentations [@pmid:42158267]. In the NYGC ALS Consortium WGS cohort, pathogenic C9orf72 repeat expansions were the most frequent ALS-associated mutation. Among classical ALS cases, they represented 9% of ALS cases [@pmid:42145639]. These expansions are usually detected with bidirectional RP-PCR [@pmid:21944778]. Southern blot approximates size [@pmid:23566336], while long-read sequencing can provide more precise information on size and structure [@pmid:30126445].", "mechanism": "Ambiguous", "mechanism_detail": "The HRE forms DNA and RNA G-quadruplexes with distinct structures and promotes RNA/DNA hybrids (R-loops). The structural polymorphism causes a repeat length-dependent accumulation of transcripts aborted in the HRE region [@omim:105500]. Addiitonal mechanisms theorized include protein loss of function and RNA gain of function [@pmid:37847372]. Multiple cell types in the prefrontal cortex, including oligodendrocytes, microglia, astrocytes, and neurons, appear impacted during pathogenesis [@pmid:39999167]. Drosophila model-system evidence suggests that RAN translated poly(GR) may contribute to toxicity by activating the integrated stress response through eIF2α phosphorylation and promoting stress granule accumulation [@pmid:42087256]. C9orf72 repeat expansions are associated with reduced C9orf72 expression in multiple ALS tissues and altered splicing of the exon 1a isoform [@pmid:42145639]. Reduced C9orf72 expression has also been observed in peripheral blood immune cells from C9orf72-associated ALS, with C9-ALS showing distinct monocyte activation signatures. In ALS spinal cord, activated myeloid cells expressing complement, lipid-processing, and phagocytic genes occur in regions with motor neuron loss and TDP-43 pathology [@pmid:42135512].", "year": "2011 [@pmid:21944778]", From 268f8620c2393085a7a10fd1cca9388835a80581 Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Thu, 11 Jun 2026 11:41:18 -0600 Subject: [PATCH 23/29] change mechanism to GoF proteotoxic GoF is supported by the literature --- data/STRchive-loci.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 1618d564..46ae91d0 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -1166,7 +1166,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "The locus contains 17 imperfect 33 bp motifs, with a stretch of 7 perfect GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG motifs. Several pathogenic mutations have been proposed. The most supported pathogenic variants are single base deletions in the proximal VNTR, reported in repeat segments 1, 4, and 5 [@pmid:34850019]. One reported proximal VNTR deletion is a 1bp deletion of (C)8 to (C)7 within the VNTR, causing a motif change (this is the pathogenic motif represented here). Distal CEL VNTR single-base insertions, particularly INS9/INS10/INS12, have been reported as likely benign polymorphisms, while proximal insertion variants may have greater pathogenic potential [@pmid:38483348]. Also, a contraction that deletes one of the VNTR repeats may be pathogenic, with reduced penetrance, although evidence for this is sparse [@pmid:19760265]. Another study identified a c.2041_2042delinsCGG p.(Val681Argfs*6) mutation in the 12th motif (one of the imperfect motifs) [@pmid:39361122]. Several non-tandem repeat pathogenic MODY variants have also been reported in this gene. Given limited data and multiple proposed pathogenic variants, the normal and pathogenic ranges are currently difficult to define.", - "mechanism": null, + "mechanism": "GoF", "mechanism_detail": "Proximal CEL VNTR frameshift variants alter the C-terminal tandem-repeat domain and become pathogenic through protein misfolding and proteotoxic gain-of-function. Pathogenic proximal deletion variants show increased aggregation, reduced secretion, ER stress, and UPR activation, while enzymatic activity is largely preserved [@pmid:21784842; @pmid:27650499; @pmid:33862081]. Functional testing of CEL VNTR insertion variants showed that proximal insertions had greater aggregation and UPR effects [@pmid:38483348].", "year": "2005", "location_in_gene": "Exon 11", From 8c6ac33818e3f072680e3b1cad922dc822b6f860 Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Thu, 11 Jun 2026 13:28:42 -0600 Subject: [PATCH 24/29] Editing diagnostic Relevance to be more consice/standardized --- data/STRchive-loci.json | 74 ++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 46ae91d0..9702e25f 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -23,7 +23,7 @@ "age_onset_max": 50.0, "typ_age_onset_min": 24.0, "typ_age_onset_max": 30.0, - "details": "Characterized in eight unrelated families which were used to establish benign (3-44) and pathogenic (118-694) ranges [@pmid:39068203]. These expansions are usually detected with RP-PCR [@pmid:39068203]. srWGS has significantly underestimated repeat count, while long read sequencing has accurately resolved size [@pmid:39068203]. ", + "details": "Characterized in eight unrelated families which were used to establish benign (3-44) and pathogenic (118-694) ranges [@pmid:39068203]. RP-PCR is typically used for detection [@pmid:39068203]. srWGS has significantly underestimated repeat count, while long read sequencing has accurately resolved size [@pmid:39068203].", "mechanism": null, "mechanism_detail": "Potentially over-expression of transcripts [@pmid:39068203].", "year": "2023 [@pmid:39068203]", @@ -89,7 +89,7 @@ "age_onset_max": 10.0, "typ_age_onset_min": 2.0, "typ_age_onset_max": 10.0, - "details": "Allele ranges (benign:4-39; pathogenic: >200) inferred from The Human Gene Mutation Database [@genereviews:NBK535148]. Intermediate alleles correspond to a premutation [@pmid:23914978]. Non-canonical motifs include: CGG/CCT/GTG/CAG/CTG3 [@pmid:35245110; @pmid:34111553]. RP-PCR can detect these expansions and size them to ~80 repeats [@pmid:34282157]. Southern blotting can size larger alleles and categorize methylation [@pmid:34282157].", + "details": "Allele ranges (benign:4-39; pathogenic: >200) inferred from The Human Gene Mutation Database [@genereviews:NBK535148]. Intermediate alleles correspond to a premutation [@pmid:23914978]. Non-canonical motifs include: CGG/CCT/GTG/CAG/CTG3 [@pmid:35245110; @pmid:34111553]. RP-PCR can detect these expansions and size alleles to ~80 repeats, while Southern blotting sizes full expansions and detects methylation [@pmid:34282157].", "mechanism": "LoF", "mechanism_detail": "Loss of function via transcriptional silencing [@pmid:16205714; @pmid:36169768].", "year": "1993 [@pmid:8334699]", @@ -155,7 +155,7 @@ "age_onset_max": 7.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "Allele ranges established in study of 3 families; intermediate alleles likely premutations [@pmid:24763282]. Pathogenic threshold may be higher than 300 as this was the largest allele that could be accurately sized by the assay. srWGS may underestimate the size of large expansions. While standard PCR can detect small alleles, RP-PCR and Southern blotting are used to approximate size of large expansions [@pmid:24763282]. For exact sizing, long read sequencing has been shown to be effective [@pmid:39313615].", + "details": "Allele ranges established in study of 3 families; intermediate alleles likely premutations [@pmid:24763282]. Pathogenic threshold may be higher than 300 as this was the largest allele that could be accurately sized by the assay. Standard PCR detects small alleles, while RP-PCR and Southern blotting detect large expansions. srWGS may underestimate the size of large expansions and exact sizing requires long read sequencing [@pmid:24763282; @pmid:39313615].", "mechanism": "LoF/methylation", "mechanism_detail": "Silencing of the FMR2 gene as a consequence of a CCG expansion located upstream of this gene [@malacard:KNS007].", "year": "2014 [@pmid:24763282]", @@ -221,7 +221,7 @@ "age_onset_max": 83.0, "typ_age_onset_min": 20.0, "typ_age_onset_max": 49.0, - "details": "Intermediate alleles indicate reduced penetrance [@genereviews:NBK1333]. Expansions larger than the pathogenic threshold in the AR gene should be evaluated carefully. Interruptions have not been observed in patient cases; it has been proposed that longer alleles with interruptions may not be pathogenic [@pmid:24041967]. Non-canonical motif CAA observed [@pmid:35245110]. Expansions are also detected ten-fold more often in a general population than would be expected by disease prevalence [@pmid:36797998]. Clinical evaluation and phenotypic matching may be necessary to determine diagnosis even in the presence of a pure expanded allele. It has been proposed that contractions may play a role in disease [@pmid:10398229]. Disease may be subclinical in females [@pmid:34922802], and can be clinically heterogeneous even within the same family [@pmid:20184516]. Although this expansion can be detected in srWGS screens [@pmid:36797998], sizing needs to be validated with standard PCR fragment analysis or RP-PCR [geneReviews:NBK1333].", + "details": "Intermediate alleles indicate reduced penetrance [@genereviews:NBK1333]. Expansions larger than the pathogenic threshold in the AR gene should be evaluated carefully. Interruptions have not been observed in patient cases; it has been proposed that longer alleles with interruptions may not be pathogenic [@pmid:24041967]. Non-canonical motif CAA observed [@pmid:35245110]. Expansions are also detected ten-fold more often in a general population than would be expected by disease prevalence [@pmid:36797998]. Clinical evaluation and phenotypic matching may be necessary to determine diagnosis even in the presence of a pure expanded allele. It has been proposed that contractions may play a role in disease [@pmid:10398229]. Disease may be subclinical in females [@pmid:34922802], and can be clinically heterogeneous even within the same family [@pmid:20184516]. Although srWGS screens can detect this expansion[@pmid:36797998], sizing needs to be validated with standard PCR fragment analysis or RP-PCR [geneReviews:NBK1333].", "mechanism": "GoF", "mechanism_detail": "Polyglutamine alters protein conformation leading to gain-of-function neurodegeneration [@pmid:29398703; @pmid:36169768]. Transcriptional dysregulation, axonal transport disruption, and mitochondrial dysfunction also play causative roles in the neurodegeneration [@pmid:22609045].", "year": "1991 [@pmid:2062380]; the first triplet disease to be discovered [@pmid:15313856]", @@ -419,7 +419,7 @@ "age_onset_max": 72.0, "typ_age_onset_min": 20.0, "typ_age_onset_max": 40.0, - "details": "Pathogenic expansions (48-93) are fully penetrant with the exception of one documented case of 51 repeats; intermediate alleles (36-47) are associated with a milder phenotype and can expand upon transmission [@genereviews:NBK1491]. CAA interruptions have been observed without known clinical association [@pmid:35245110]. Length of the repeat is inversely associated with age of onset and severe epilepsy phenotype [@pmid:41147955]. These repeats are typically sized with fragment analysis and/or RP-PCR, especially on repeat-expansion panels. Routine PCR works well for moderate alleles, but very large or juvenile-onset expansions may exceed exact sizing limits [@genereviews:NBK1491]. Expansions are usually sized by PCR fragment analysis, often with RP-PCR as a reflex [geneReviews:NBK1184]. Standard fragment analysis gives total repeat length but does not resolve CAT interruptions, which typically requires targeted sequencing methods, such as Sanger sequencing [@pmid:34635619].", + "details": "Pathogenic expansions (48-93) are fully penetrant with the exception of one documented case of 51 repeats; intermediate alleles (36-47) are associated with a milder phenotype and can expand upon transmission [@genereviews:NBK1491]. CAA interruptions have been observed without known clinical association [@pmid:35245110]. Length of the repeat is inversely associated with age of onset and severe epilepsy phenotype [@pmid:41147955]. PCR fragment analysis detects most moderate alleles, but Southern blotting or RP-PCR may be needed for large expansions or apparent homozygosity [@genereviews:NBK1491].", "mechanism": "GoF", "mechanism_detail": "Polyglutamine expansions leading to gain of function [@genereviews:NBK1491].", "year": "1994 [@pmid:7842016]", @@ -485,7 +485,7 @@ "age_onset_max": 63.0, "typ_age_onset_min": 20.0, "typ_age_onset_max": 39.0, - "details": "Penetrance is dependent on sequence purity in addition to expansion length: pure repeats are pathogenic at 39 repeats [@pmid:37906407], while CAT interruptions [@pmid:35245110] can lead to reduced penetrance at comparable lengths [@genereviews:NBK1184]. Regardless, intermediate alleles are considered premutations which may lead to disease upon transmission [@genereviews:NBK1184]. CAA interruptions have also been reported, but not linked to any phenotypic consequences [@pmid:23935513].", + "details": "Penetrance is dependent on sequence purity in addition to expansion length: pure repeats are pathogenic at 39 repeats [@pmid:37906407], while CAT interruptions [@pmid:35245110] can lead to reduced penetrance at comparable lengths [@genereviews:NBK1184]. Regardless, intermediate alleles are considered premutations which may lead to disease upon transmission [@genereviews:NBK1184]. CAA interruptions have also been reported, but not linked to any phenotypic consequences [@pmid:23935513]. PCR fragment analysis, often with reflex RP-PCR, is commonly used for sizing [geneReviews:NBK1184]. Standard fragment analysis does not resolve CAT interruptions, which require targeted analysis like Sanger sequencing [@pmid:34635619].", "mechanism": "GoF/LoF", "mechanism_detail": "Polyglutamine expansion leading to toxic gain of function with eventual misregulation-based loss of function/dominant negative [@genereviews:NBK1184; @pmid:35573049].", "year": "1993 [@pmid:8358429]", @@ -551,7 +551,7 @@ "age_onset_max": 83.0, "typ_age_onset_min": 12.0, "typ_age_onset_max": 48.0, - "details": "Unaffected individuals are usually (82%) compound heterozygotes in the benign range [@genereviews:NBK1175]. Intermediate alleles show reduced penetrance, and exact distinction between intermediate and the lower end of the pathogenic range is unclear [@genereviews:NBK1175]. Expansions are frequently interrupted by ATCCT, ATCCC, ATTCC, ATTTCT, ATATTCT, or ATTCTTCT; interruptions of ATTGT, TTTCT, ATTTTCT, ATTCTCT, GTTTCT, CTTCT, and ATTCTAT have been noted [@pmid:36199580] as has the interruption ATGCT [@pmid:19234597]. The ATCCT interruption motif is associated with a higher prevalence of epileptic seizures [@pmid:24318420]. Different motif patterns and mixed motif ratios may influence age of onset and anticipation [@pmid:41229449]. One study suggests that alleles with completely pure ATTCT expansions are non-pathogenic, and that repeat interruptions such as ATTCC, are necessary to cause SCA10 [@pmid:36092952]. This expansion is commonly detected with RP-PCR and fragment analysis [@pmid:32160188]. Pathogenicity depends heavily on interruptions, so long-read sequencing approaches are useful to fully resolve structure [@pmid:26295943; @pmid:32160188].", + "details": "Unaffected individuals are usually (82%) compound heterozygotes in the benign range [@genereviews:NBK1175]. Intermediate alleles show reduced penetrance, and exact distinction between intermediate and the lower end of the pathogenic range is unclear [@genereviews:NBK1175]. Expansions are frequently interrupted by ATCCT, ATCCC, ATTCC, ATTTCT, ATATTCT, or ATTCTTCT; interruptions of ATTGT, TTTCT, ATTTTCT, ATTCTCT, GTTTCT, CTTCT, and ATTCTAT have been noted [@pmid:36199580] as has the interruption ATGCT [@pmid:19234597]. The ATCCT interruption motif is associated with a higher prevalence of epileptic seizures [@pmid:24318420]. Different motif patterns and mixed motif ratios may influence age of onset and anticipation [@pmid:41229449]. One study suggests that alleles with completely pure ATTCT expansions are non-pathogenic, and that repeat interruptions such as ATTCC, are necessary to cause SCA10 [@pmid:36092952]. RP-PCR with fragment analysis is commonly used for detection. Pathogenicity depends heavily on interruptions, so long read sequencing approaches are useful for full structure resolution [@pmid:26295943; @pmid:32160188].", "mechanism": "GoF", "mechanism_detail": "Transdominant mechanism theorized [@pmid:38467784].", "year": "2000 [@pmid:11017075]", @@ -617,7 +617,7 @@ "age_onset_max": 86.0, "typ_age_onset_min": 30.0, "typ_age_onset_max": 39.0, - "details": "Full penetrance of single alleles occurs at ~35 repeats [@genereviews:NBK1275; @pmid:37906407] and pathogenic expansions have been documented as large as 500 repeats [@pmid:12116207]. 33-34 length repeats are associated with reduced penetrance and later onset (age >50 years) [@genereviews:NBK1275]. Homozygous 31 repeat alleles may lead to recessive disease [@pmid:30533529], while a single 29-32 repeat is associated with increased ALS risk [@genereviews:NBK1275; @pmid:25285812; @pmid:32954321]. There is some evidence that all CAG-repeat expansions in ATXN2 may be a risk factor for ALS, regardless of length and interruptions [@pmid:39956874]. CAA interruptions have been observed which appear to stabilize the allele in transmission [@genereviews:NBK1275]. This expansion is commonly detected using RP-PCR with fragment analysis while southern blots are used to approximate size over 100 repeats [@geneReviews:NBK1275].", + "details": "Full penetrance of single alleles occurs at ~35 repeats [@genereviews:NBK1275; @pmid:37906407] and pathogenic expansions have been documented as large as 500 repeats [@pmid:12116207]. 33-34 length repeats are associated with reduced penetrance and later onset (age >50 years) [@genereviews:NBK1275]. Homozygous 31 repeat alleles may lead to recessive disease [@pmid:30533529], while a single 29-32 repeat is associated with increased ALS risk [@genereviews:NBK1275; @pmid:25285812; @pmid:32954321]. There is some evidence that all CAG-repeat expansions in ATXN2 may be a risk factor for ALS, regardless of length and interruptions [@pmid:39956874]. CAA interruptions have been observed which appear to stabilize the allele in transmission [@genereviews:NBK1275]. RP-PCR with fragment analysis is commonly used for detection, while southern blotting approximates size over 100 repeats [@geneReviews:NBK1275].", "mechanism": "GoF/LoF", "mechanism_detail": "Polyglutamine cytoplasmic aggregates leading to cellular apoptosis; RAN translation implicated [@genereviews:NBK1275].", "year": "1996 [@pmid:8896556]", @@ -683,7 +683,7 @@ "age_onset_max": 73.0, "typ_age_onset_min": 10.0, "typ_age_onset_max": 49.0, - "details": "Benign alleles range from 11-44 repeats [@pmid:37906407], with intermediate alleles (45-59) associated with incomplete penetrance and non-classic phenotypes [@genereviews:NBK1196]. The threshold between incomplete and full penetrance is unclear, but presumed to occur at ~60 repeats [@genereviews:NBK1196; @pmid:37906407]. The interruption CAA has been observed [@pmid:35245110]; AAG is present in hg38 reference sequence. The APOE ε4 allele appears to act as a disease modifier [@pmid:39731318]; GLS expansions may also function as disease modifiers [@pmid:39699045].These expansions are commonly sized by PCR fragment analysis or RP-PCR [geneReviews:NBK1196]. For very large expansions or interruptions, long read targeted sequencing has been used [@pmid:40890629]", + "details": "Benign alleles range from 11-44 repeats [@pmid:37906407], with intermediate alleles (45-59) associated with incomplete penetrance and non-classic phenotypes [@genereviews:NBK1196]. The threshold between incomplete and full penetrance is unclear, but presumed to occur at ~60 repeats [@genereviews:NBK1196; @pmid:37906407]. The interruption CAA has been observed [@pmid:35245110]; AAG is present in hg38 reference sequence. The APOE ε4 allele appears to act as a disease modifier [@pmid:39731318]; GLS expansions may also function as disease modifiers [@pmid:39699045].PCR fragment analysis or RP-PCR commonly detect expansions, but apparent homozygosity may require southern blotting [@geneReviews:NBK1196]. Long read sequencing can characterize full structure [@pmid:40890629].", "mechanism": "GoF", "mechanism_detail": "Polyglutamine expansion leading to gain of function; aggregated and mislocalized proteins in neurons [@pmid:36169768; @genereviews:NBK1196].", "year": "1994 [@pmid:7874163]", @@ -749,7 +749,7 @@ "age_onset_max": 65.0, "typ_age_onset_min": 4.0, "typ_age_onset_max": 48.0, - "details": "Benign alleles range from 4-27 [@pmid:37906407], with intermediate alleles ranging from premutations (28-33) to reduced penetrance (34-36) [@genereviews:NBK1256]. Interruptions observed include CAA [@pmid:35245110]. srWGS cannot accurately detect repeat expansions in this locus. Expansions are usually detected by PCR fragment analysis or RP-PCR. Most normal and moderate pathogenic alleles can be sized exactly, but very large expansions may need long-read sequencing or southern blotting [geneReviews:NBK1256].", + "details": "Benign alleles range from 4-27 [@pmid:37906407], with intermediate alleles ranging from premutations (28-33) to reduced penetrance (34-36) [@genereviews:NBK1256]. Interruptions observed include CAA [@pmid:35245110]. srWGS cannot accurately detect repeat expansions at this locus. PCR fragment analysis or RP-PCR detects expansions and sizes most normal or moderate pathogenic alleles, while very large expansions may require Southern blotting or long read sequencing [@geneReviews:NBK1256].", "mechanism": "GoF", "mechanism_detail": "Polyglutamine expansion leading to gain of function; toxic misfolded intermediated suspected [@genereviews:NBK1256; @pmid:18418675].", "year": "1996 [@pmid:8908515]", @@ -825,7 +825,7 @@ "age_onset_max": 76.0, "typ_age_onset_min": 20.0, "typ_age_onset_max": 49.0, - "details": "Two genes span the CTG/CAG repeat and are expressed in opposite directions: ATXN8, a nearly pure polyglutamine repeat protein in the CAG direction, and ATXN8OS, which is transcribed to a noncoding CUG repeat RNA [@pmid:16804541]. Reduced penetrance is found in alleles of all sizes, although penetrance appears higher at 71+ repeats and repeats at 50-70 appear less likely to result in disease [@genereviews:NBK1268; @pmid:20373340]. Roda et al. suggested that the ATXN8 or ATXN8OS gene should not be evaluated in isolation as a candidate gene for spinocerebellar degenerative disease [@pmid:28451643]. CCG/CGG interruptions in high-penetrance SCA8 families increase RAN translation and protein toxicity [@pmid:34632710]; Interruptions in CTG/CAG expansion by 1 or more CCG/CGG, CTA/TAG, CTC/GAG, CCA/TGG, or CTT/AAG trinucleotides have been observed in full-penetrance repeats [@pmid:16804541; @genereviews:NBK1268]. Short read genome sequencing can underestimate expansion size [@pmid:40015980; geneReviews:NBK1268]. RP-PCR detects large expansions while long read sequencing and southern blotting can approximate size [geneReviews:NBK1268].", + "details": "Two genes span the CTG/CAG repeat and are expressed in opposite directions: ATXN8, a nearly pure polyglutamine repeat protein in the CAG direction, and ATXN8OS, which is transcribed to a noncoding CUG repeat RNA [@pmid:16804541]. Reduced penetrance is found in alleles of all sizes, although penetrance appears higher at 71+ repeats and repeats at 50-70 appear less likely to result in disease [@genereviews:NBK1268; @pmid:20373340]. Roda et al. suggested that the ATXN8 or ATXN8OS gene should not be evaluated in isolation as a candidate gene for spinocerebellar degenerative disease [@pmid:28451643]. CCG/CGG interruptions in high-penetrance SCA8 families increase RAN translation and protein toxicity [@pmid:34632710]; Interruptions in CTG/CAG expansion by 1 or more CCG/CGG, CTA/TAG, CTC/GAG, CCA/TGG, or CTT/AAG trinucleotides have been observed in full-penetrance repeats [@pmid:16804541; @genereviews:NBK1268]. Short read genome sequencing can underestimate expansion size [@pmid:40015980; @geneReviews:NBK1268]. RP-PCR detects large expansions while long read sequencing and southern blotting approximate size [@geneReviews:NBK1268].", "mechanism": "GoF", "mechanism_detail": "Polyglutamine/toxic gain-of-function [@omim:608768; @genereviews:NBK1268].", "year": "1999 [@pmid:10192387]", @@ -901,7 +901,7 @@ "age_onset_max": 83.0, "typ_age_onset_min": 56.0, "typ_age_onset_max": 62.0, - "details": "This locus is a novel STR-containing insertion, not present in reference genome; the pathogenic threshold (110-760) is based on the pure repeat of the pathogenic motif within the insertion [@pmid:19878914]. RP-PCR accurately detects this insertion [@pmid:22992774], while long read sequencing can resolve sizing and motif architecture [@pmid:36289212]. ", + "details": "This locus is a novel STR-containing insertion, not present in reference genome; the pathogenic threshold (110-760) is based on the pure repeat of the pathogenic motif within the insertion [@pmid:19878914]. RP-PCR accurately detects this insertion [@pmid:22992774], while long read sequencing can resolve sizing and motif architecture [@pmid:36289212].", "mechanism": "GoF", "mechanism_detail": "RNA toxicity and gain of function leading to neurodegeneration [@pmid:36371266]. Role in heterochromatin or chromosomal structure theorized [@omim:117210].", "year": "2009 [@pmid:19878914]", @@ -967,7 +967,7 @@ "age_onset_max": 91.0, "typ_age_onset_min": 50.0, "typ_age_onset_max": 64.0, - "details": "FTD and ALS form a clinical spectrum [@pmid:37388914; @pmid:22406228]. The clinical ranges of the C9orf72 locus remain ambiguous [@stripy:C9ORF72][@pmid:41951733]: most healthy controls have alleles up to 24 repeats [@pmid:28319737] yet 24-30 repeats are associated with ALS [@pmid:31315673] and while 60 repeats is frequently used as a threshold for uncertain alleles, the exact threshold of pathogenicity remains unclear [@genereviews:NBK268647; @pmid:38099605]. Repeats of 80 motifs and lower appear to have delayed onset for any phenotype [@pmid:28319737]. >250 repeats are associated with a full FTD/ALS disease state [@pmid:31048495], but pathogenic alleles can range from 30 to more than 4000 repeats [@pmid:38099605; @pmid:39709476]. Somatic C9orf72 repeat expansions may emerge de novo in CNS tissue from alleles below the pathogenic range, potentially contributing to sporadic ALS/FTD [@pmid:41986690]. Penetrance appears to also be age-dependent, with environmental factors and specific phenotypes associated with sex and age at onset [@pmid:28522837]. Methylation appears to increase with expansion length and age [@pmid:39709476], and C9orf72 promoter hypermethylation has been observed in expansion carriers [@pmid:42222887]. ALS caused by repeat expansions in C9orf72 generally has an earlier disease onset and faster progression than other ALS presentations [@pmid:39226712]. C9orf72 expansions have been associated with reduced thalamic volume in undiagnosed carriers, and plasma neurofilament light chain has an approximately linear association with repeat count and motor neuron disease risk [@pmid:41951733; @pmid:42095061]. Pathogenic C9orf72 repeat expansions have also been identified in ambiguous late onset behavioral or psychiatric like presentations [@pmid:42158267]. In the NYGC ALS Consortium WGS cohort, pathogenic C9orf72 repeat expansions were the most frequent ALS-associated mutation. Among classical ALS cases, they represented 9% of ALS cases [@pmid:42145639]. These expansions are usually detected with bidirectional RP-PCR [@pmid:21944778]. Southern blot approximates size [@pmid:23566336], while long-read sequencing can provide more precise information on size and structure [@pmid:30126445].", + "details": "FTD and ALS form a clinical spectrum [@pmid:37388914; @pmid:22406228]. The clinical ranges of the C9orf72 locus remain ambiguous [@stripy:C9ORF72][@pmid:41951733]: most healthy controls have alleles up to 24 repeats [@pmid:28319737] yet 24-30 repeats are associated with ALS [@pmid:31315673] and while 60 repeats is frequently used as a threshold for uncertain alleles, the exact threshold of pathogenicity remains unclear [@genereviews:NBK268647; @pmid:38099605]. Repeats of 80 motifs and lower appear to have delayed onset for any phenotype [@pmid:28319737]. >250 repeats are associated with a full FTD/ALS disease state [@pmid:31048495], but pathogenic alleles can range from 30 to more than 4000 repeats [@pmid:38099605; @pmid:39709476]. Somatic C9orf72 repeat expansions may emerge de novo in CNS tissue from alleles below the pathogenic range, potentially contributing to sporadic ALS/FTD [@pmid:41986690]. Penetrance appears to also be age-dependent, with environmental factors and specific phenotypes associated with sex and age at onset [@pmid:28522837]. Methylation appears to increase with expansion length and age [@pmid:39709476], and C9orf72 promoter hypermethylation has been observed in expansion carriers [@pmid:42222887]. ALS caused by repeat expansions in C9orf72 generally has an earlier disease onset and faster progression than other ALS presentations [@pmid:39226712]. C9orf72 expansions have been associated with reduced thalamic volume in undiagnosed carriers, and plasma neurofilament light chain has an approximately linear association with repeat count and motor neuron disease risk [@pmid:41951733; @pmid:42095061]. Pathogenic C9orf72 repeat expansions have also been identified in ambiguous late onset behavioral or psychiatric like presentations [@pmid:42158267]. Bidirectional RP-PCR is typically used for detection [@pmid:21944778]. Large pathogenic expansions are difficult to size exactly by PCR, so Southern blot provides approximate size [@pmid:23566336], while long read sequencing provides direct sizing and sequence characterization [@pmid:30126445].", "mechanism": "Ambiguous", "mechanism_detail": "The HRE forms DNA and RNA G-quadruplexes with distinct structures and promotes RNA/DNA hybrids (R-loops). The structural polymorphism causes a repeat length-dependent accumulation of transcripts aborted in the HRE region [@omim:105500]. Addiitonal mechanisms theorized include protein loss of function and RNA gain of function [@pmid:37847372]. Multiple cell types in the prefrontal cortex, including oligodendrocytes, microglia, astrocytes, and neurons, appear impacted during pathogenesis [@pmid:39999167]. Drosophila model-system evidence suggests that RAN translated poly(GR) may contribute to toxicity by activating the integrated stress response through eIF2α phosphorylation and promoting stress granule accumulation [@pmid:42087256]. C9orf72 repeat expansions are associated with reduced C9orf72 expression in multiple ALS tissues and altered splicing of the exon 1a isoform [@pmid:42145639]. Reduced C9orf72 expression has also been observed in peripheral blood immune cells from C9orf72-associated ALS, with C9-ALS showing distinct monocyte activation signatures. In ALS spinal cord, activated myeloid cells expressing complement, lipid-processing, and phagocytic genes occur in regions with motor neuron loss and TDP-43 pathology [@pmid:42135512].", "year": "2011 [@pmid:21944778]", @@ -1033,7 +1033,7 @@ "age_onset_max": 73.0, "typ_age_onset_min": 43.0, "typ_age_onset_max": 52.0, - "details": "The intermediate range (19-20 motifs) [@pmid:39996131; @genereviews:NBK1140] can be associated with a premutation, reduced penetrance, atypical phenotype, or a disease state when homozygous [@genereviews:NBK1140]. When the longer allele is > 22 motifs, the short allele does not play a role in pathogenicity/age of onset, but expansions of 21-22 motifs have age of onset influenced by the smaller allele [@pmid:39996131]. For individuals with a longest allele of 19-20, the presence of a second allele of 19-20 likely increases the risk of developing SCA6 [@pmid:39996131]. Undiagnosed carriers of pathogenic range repeats in CACNA1A exhibit significant cerebellar volume loss and elevated neurofilament light chain levels before clinical diagnosis, consistent with findings observed across other loci [@pmid:41951733]. Expansions are often detected using PCR fragment analysis [@pmid:35573049].", + "details": "The intermediate range (19-20 motifs) [@pmid:39996131; @genereviews:NBK1140] can be associated with a premutation, reduced penetrance, atypical phenotype, or a disease state when homozygous [@genereviews:NBK1140]. When the longer allele is > 22 motifs, the short allele does not play a role in pathogenicity/age of onset, but expansions of 21-22 motifs have age of onset influenced by the smaller allele [@pmid:39996131]. For individuals with a longest allele of 19-20, the presence of a second allele of 19-20 likely increases the risk of developing SCA6 [@pmid:39996131]. Undiagnosed carriers of pathogenic range repeats in CACNA1A exhibit significant cerebellar volume loss and elevated NEFL levels before clinical diagnosis, also seen in other loci [@pmid:41951733]. Expansions are often detected by PCR fragment analysis [@pmid:35573049].", "mechanism": "GoF/LoF", "mechanism_detail": "Polyglutamine expansions associated with increased expression of altered product leading to impaired gene binding and transcription factor function as well as cellular toxicity [@genereviews:NBK1140].", "year": "1997 [@pmid:8988170]", @@ -1383,7 +1383,7 @@ "age_onset_max": 10.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "CGG repeat in exon 1 of CSNK1E. Longest reported expanded allele of an affected individual is 745, with an unaffected sibling with repeat length 980. Father had a repeat of 8 and mother of 131. Whole exome sequencing does not detect expansions in this locus. They have instead been detected through methylation-outlier detection and confirmed with targeted long-read sequencing [@pmid:40751262].", + "details": "CGG repeat in exon 1 of CSNK1E. Longest reported expanded allele of an affected individual is 745, with an unaffected sibling with repeat length 980. Father had a repeat of 8 and mother of 131. Exome sequencing does not detect expansions in this locus. Reported cases were identified through methylation outlier detection and confirmed by targeted long read sequencing [@pmid:40751262].", "mechanism": "Unknown", "mechanism_detail": "Mechanism of this disease is largely unknown, but hypermethylation is observed. Expanded alleles exhibit hypermethylation and may mediate epigenetic silencing. Unaffected carriers have been observed, indicating variable expressivity or penetrance.", "year": "2025", @@ -1445,7 +1445,7 @@ "disease": "Progressive Myoclonic Epilepsy Type 1 (EPM1), a.k.a Unverricht-Lundborg Disease (ULD)", "inheritance": ["AR"], "association_type": ["Mendelian"], - "disease_description": "Unverricht-Lundborg disease (ULD) is a rare progressive myoclonic epilepsy disorder characterized by action- and stimulus-sensitive myoclonus, and tonic-clonic seizures with ataxia, but with only a mild cognitive decline over time [@mondo:0009698]. Pathogenic expansions cannot be detected by srWGS. Conventional PCR can detect repeats in the normal range, while southern blots are used to detect and approximate size of expanded alleles [@genereviews:NBK1142].", + "disease_description": "Unverricht-Lundborg disease (ULD) is a rare progressive myoclonic epilepsy disorder characterized by action- and stimulus-sensitive myoclonus, and tonic-clonic seizures with ataxia, but with only a mild cognitive decline over time [@mondo:0009698].", "hpo_terms": null, "prevalence": null, "prevalence_details": "Worldwide prevalence unknown; Finland prevalence 2-4/100,000. Found across ethnicities/ancestries, with population-dependent prevalence; highest in Tunisia, Algeria, Morocco, and Finland [@genereviews:NBK1142].", @@ -1454,7 +1454,7 @@ "age_onset_max": 18.0, "typ_age_onset_min": 6.0, "typ_age_onset_max": 15.0, - "details": "Affected individuals have an unstable 12-nucleotide (dodecomer) repeat expansion. Alleles containing 2-3 motifs are considered benign, while alleles with 30-125 repeats are fully penetrant [@pmid:18325013]. Alleles in the range 12-17 repeats have been observed, however the individuals carrying them have not undergone clinical evaluation. Alleles in the range 4-11 and 18-29 repeats have not been reported to date.", + "details": "Affected individuals have an unstable 12-nucleotide (dodecomer) repeat expansion. Alleles containing 2-3 motifs are considered benign, while alleles with 30-125 repeats are fully penetrant [@pmid:18325013]. Alleles in the range 12-17 repeats have been observed, however the individuals carrying them have not undergone clinical evaluation. Alleles in the range 4-11 and 18-29 repeats have not been reported to date. srWGS cannot detect pathogenic expansions. Conventional PCR can detects normal range alleles, while southern blotting detects and approximate expanded allele size [@genereviews:NBK1142].", "mechanism": "LoF", "mechanism_detail": "The repeat expanison causes significantly reduced expression of cystatin-B protein [@genereviews:NBK1142].", "year": "1997 [@pmid:9126745]", @@ -1520,7 +1520,7 @@ "age_onset_max": 64.0, "typ_age_onset_min": 33.0, "typ_age_onset_max": 53.0, - "details": "Pathogenicity only associated with pathogenic motif >30 repeats, flanked by at least 58 repeats of reference motif on either side; reference repeat (AAAAT) can range from 1 to 400 repeats, although typically less than 30 [@genereviews:NBK541729]. The pathogenic motif is unstable, particularly when transmitted by the father [@genereviews:NBK541729]. srWGS, exome sequencing, and RP-PCR cannot accurately detect this repeat [@genereviews:NBK541729]. Long range PCR combined with targeted sanger sequencing is the reliable method for detection and characterization [@genereviews:NBK541729].", + "details": "Pathogenicity only associated with pathogenic motif >30 repeats, flanked by at least 58 repeats of reference motif on either side; reference repeat (AAAAT) can range from 1 to 400 repeats, although typically less than 30 [@genereviews:NBK541729]. The pathogenic motif is unstable, particularly when transmitted by the father [@genereviews:NBK541729]. srWGS, exome sequencing, and RP-PCR do not accurately detect this repeat, but long range PCR with targeted Sanger sequencing is reliable for detection and characterization [@genereviews:NBK541729].", "mechanism": "GoF", "mechanism_detail": "Toxic gain-of-function mechanism in protein, associated with alternative splicing, an RNA switch, and an upregulation of reelin-DAB1 signalling [@omim:615945; @pmid:30284037].", "year": "2017 [@pmid:28686858]", @@ -1596,7 +1596,7 @@ "age_onset_max": 3.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "Repeat ranges reflect affected and unaffected individuals from a cohort study of 70 controls (6-23 repeats), unaffected carriers representing the intermediate alleles (139-206), and affected individuals (273-306) [@pmid:17236128]. It has been hypothesized that unmethylated expansions may correspond to movement-related phenotypes (chorea, dystonia, and ataxia) [@pmid:39854091]. srWGS has underestimated expansion size in this locus. RP-PCR and southern blotting is used to detect expansions [@pmid:17236128], while long read sequencing can accurately size them [@pmid:39854091]", + "details": "Repeat ranges reflect affected and unaffected individuals from a cohort study of 70 controls (6-23 repeats), unaffected carriers representing the intermediate alleles (139-206), and affected individuals (273-306) [@pmid:17236128]. It has been hypothesized that unmethylated expansions may correspond to movement-related phenotypes (chorea, dystonia, and ataxia) [@pmid:39854091]. srWGS can underestimate expansion size. RP-PCR and southern blotting detect expansions [@pmid:17236128], while long read sequencing accurately sizes them [@pmid:39854091]", "mechanism": "LoF", "mechanism_detail": "Hypermethylation leading to decreased expression, although unmethylated expansion leads to increased expression [@omim:136630; @pmid:37248219].", "year": "2007 [@pmid:17236128]", @@ -1738,7 +1738,7 @@ "age_onset_max": 74.0, "typ_age_onset_min": 10.0, "typ_age_onset_max": 30.0, - "details": "Overview of disease locus through 2024, including largest pathogenic allele of 4,000, described in Rimoldi et al review [@pmid:39643839]. Intermediate alleles (35-49) associated with premutation [@genereviews:NBK1165]. 3%-8% of DM1 expansions contain interrupting variant repeats such as CCG and CGG, associated with later onset and milder phenotype; the variant repeat GCGGCA has also been reported [@pmid:32851192; @pmid:39710066]. In another study, interruptions of the CTG repeat with CCG, GGC, CTC or CAG motifs are estimated to occur in 3-11% of DM1 patients [@pmid:35741732]. Expansions within gene ZNF850 may function as DM1 modifiers [@pmid:39679849]. Flanking PCR detects alleles up to ~150 repeats while RP-PCR may detect missed alleles [@genereviews:NBK1165; @pmid:24795756]. Southern blotting can approximate the size of large expansions [@pmid:22643181] while long read sequencing resolves repeat size and structure [@pmid:41974889].", + "details": "Overview of disease locus through 2024, including largest pathogenic allele of 4,000, described in Rimoldi et al review [@pmid:39643839]. Intermediate alleles (35-49) associated with premutation [@genereviews:NBK1165]. 3%-8% of DM1 expansions contain interrupting variant repeats such as CCG and CGG, associated with later onset and milder phenotype; the variant repeat GCGGCA has also been reported [@pmid:32851192; @pmid:39710066]. In another study, interruptions of the CTG repeat with CCG, GGC, CTC or CAG motifs are estimated to occur in 3-11% of DM1 patients [@pmid:35741732]. Expansions within gene ZNF850 may function as DM1 modifiers [@pmid:39679849]. Flanking PCR detects alleles up to ~150 repeats, while RP-PCR may detect missed expanded alleles [@genereviews:NBK1165; @pmid:24795756]. Southern blotting approximates the size of large expansions [@pmid:22643181], while long read sequencing resolves repeat size and structure [@pmid:41974889].", "mechanism": "GoF", "mechanism_detail": "RNA gain-of-function: RNA gelation leading to misregulation of alternative splicing [@pmid:36169768]. Expanded DMPK r(CUG)n RNA forms a hairpin containing periodic 1*1 U/U internal loops that engage/sequester MBNL family RNA-binding proteins, especially MBNL1 [@pmid:42182465], disrupting pre mRNA processing and contributing to cardiac phenotypes [@pmid:39932794]. Loss of MBNL proteins has been linked to mis-splicing of Autism spectrum-risk genes such as SCN2A, ANK2, and SHANK2, possibly leading to Autism-related traits [@pmid:40259070]. Evidence suggests that disulfide bond-dependent MBNL1/MBNL2 dimerization maintains toxic RNA foci [@pmid:41929128].", "year": "1992 [@pmid:1310900]", @@ -1804,7 +1804,7 @@ "age_onset_max": 0.0, "typ_age_onset_min": 0.0, "typ_age_onset_max": 0.0, - "details": "Complex repeat of 18-20 nucleotides expands to cause disease: disease is found in individuals with 14-16 repeats [@pmid:24360810], while controls have typically 3-12 repeats with as low as 1 repeat [@genereviews:NBK535148; @gnomad:EIF4A3]. Significance of intermediate alleles is unknown [@pmid:29112243]. srWGS and exon sequencing do not reliably detect this expansion. Instead, targeted 5' UTR PCR + Sanger sequencing is the common detection methodology [@pmid:29112243; @pmid:24360810].", + "details": "Complex repeat of 18-20 nucleotides expands to cause disease: disease is found in individuals with 14-16 repeats [@pmid:24360810], while controls have typically 3-12 repeats with as low as 1 repeat [@genereviews:NBK535148; @gnomad:EIF4A3]. Significance of intermediate alleles is unknown [@pmid:29112243]. srWGS and exon sequencing do not reliably detect this expansion. Targeted 5′ UTR PCR with Sanger sequencing is the common detection methodology [@pmid:29112243; @pmid:24360810].", "mechanism": "LoF", "mechanism_detail": "LoF from a hypomorphic allele [@pmid:24360810].", "year": "2014 [@pmid:24360810]; syndrome described in 1992 [@pmid:1632438]", @@ -1936,7 +1936,7 @@ "age_onset_max": 87.0, "typ_age_onset_min": 42.0, "typ_age_onset_max": 70.0, - "details": "Higher repeat size has been associated with earlier age of onset [@pmid:39263992], but this correlation is not significant in all cohorts [@pmid:42096001]. The 250-300 repeats range is linked to incomplete penetrance and >300 repeats with complete penetrance in some studies and resources [@genereviews:NBK599589; @pmid:37399286; @pmid:39227614]. However, our thresholds are taken from suggestions made by Mohren et al upon evaluation of 169 cases and 802 controls; the authors propose lower thresholds based on pathogenic cases of shorter pure repeats [@pmid:39227614]. Additionally, this study suggests that benign motifs may disrupt the formation of secondary structures in DNA/RNA, leading to reduced pathogenicity. The effects of interruptions on penetrance and onset have been demonstrated in patients, with uninterrupted expansions apparently necessary for disease [@pmid:40007153]. Interruptions of GAG, GAAGGA, GAAGAAAGAA, GAAAAGAAGAAGGAAGAAGGAA, GAAAAGAAGAAGGAA, and GCAGAAGAAGAAGAA have been reported [@pmid:40379261]. Variation in flanking regions appears to correlate with repeat size [@pmid:39227614; @pmid:38937606]. Intermediate alleles may increase ataxia susceptibility in combination with other factors or be associated with a phenotypic spectrum (multiple system atrophy) [@pmid:39227614; @pmid:39604554]. A complex (TTC/TGC) ≥300 repeat expansion has been associated as a risk factor for Parkinson's disease [@pmid:41327893; @pmid:41277530]. Expansions can sometimes present as apparently sporadic adult-onset ataxia despite autosomal dominant inheritance [@pmid:42204984]. These expansions are not reliably detected by short read genome or exome sequencing [geneReviews:NBK599589]. long-range PCR and bidirectional RP-PCR are used for detecting expansions [geneReviews:NBK599589; @pmid:36516086]. Long read sequencing is used to determine repeat structure and purity [geneReviews:NBK599589; @pmid:36516086].", + "details": "Higher repeat size has been associated with earlier age of onset [@pmid:39263992], but this correlation is not significant in all cohorts [@pmid:42096001]. The 250-300 repeats range is linked to incomplete penetrance and >300 repeats with complete penetrance in some studies and resources [@genereviews:NBK599589; @pmid:37399286; @pmid:39227614]. However, our thresholds are taken from suggestions made by Mohren et al upon evaluation of 169 cases and 802 controls; the authors propose lower thresholds based on pathogenic cases of shorter pure repeats [@pmid:39227614]. Additionally, this study suggests that benign motifs may disrupt the formation of secondary structures in DNA/RNA, leading to reduced pathogenicity. The effects of interruptions on penetrance and onset have been demonstrated in patients, with uninterrupted expansions apparently necessary for disease [@pmid:40007153]. Interruptions of GAG, GAAGGA, GAAGAAAGAA, GAAAAGAAGAAGGAAGAAGGAA, GAAAAGAAGAAGGAA, and GCAGAAGAAGAAGAA have been reported [@pmid:40379261]. Variation in flanking regions appears to correlate with repeat size [@pmid:39227614; @pmid:38937606]. Intermediate alleles may increase ataxia susceptibility in combination with other factors or be associated with a phenotypic spectrum (multiple system atrophy) [@pmid:39227614; @pmid:39604554]. A complex (TTC/TGC) ≥300 repeat expansion has been associated as a risk factor for Parkinson's disease [@pmid:41327893; @pmid:41277530]. Expansions can sometimes present as apparently sporadic adult-onset ataxia despite autosomal dominant inheritance [@pmid:42204984]. Short read genome or exome sequencing do not reliably detect these expansions [geneReviews:NBK599589]. long range PCR and bidirectional RP-PCR are used for detection, while long read sequencing can determine repeat structure and purity [geneReviews:NBK599589; @pmid:36516086]. ", "mechanism": "LoF", "mechanism_detail": "Reduced transcript 2 [@pmid:36516086].", "year": "2023 [@pmid:36493768]", @@ -2002,7 +2002,7 @@ "age_onset_max": 78.0, "typ_age_onset_min": 1.0, "typ_age_onset_max": 65.0, - "details": "Intermediate or 'gray zone' occur at 45-54 alleles and may be unstable enough to expand into the premutation range, as well as associate with parkinsonism [@pmid:32463542; @genereviews:NBK1384]. FXTAS/POI occurs at 55-200 repeats, FXS >200, late onset; AGG and CTG interruptions documented [@genereviews:NBK1384; @pmid:29868108]. Women with the premutation have been reported showing episodic memory deficits, similar to those seen in AD [@pmid:41555826]. AGG interruptions are frequently reported in all associated diseases and appear to stabilize alleles; the length of the longest pure stretch predicts repeat instability [@pmid:7987398]. Elevated POI risk was observed starting at 36 repeats, increasing continuously with repeat length [@pmid:42001465]. Modern PCR techniques detect virtually all sizes of FMR1 expansions, while RP-PCR detects AGG interspersions. Southern blot analysis approximates size and indicates methylation status [GeneReviews: NBK1384]. Long-read sequencing is ideal for full characterization of repeat size, interruptions, methylation, and mosaicism. [@pmid:29868108; @pmid:31740840].", + "details": "Intermediate or 'gray zone' occur at 45-54 alleles and may be unstable enough to expand into the premutation range, as well as associate with parkinsonism [@pmid:32463542; @genereviews:NBK1384]. FXTAS/POI occurs at 55-200 repeats, FXS >200, late onset; AGG and CTG interruptions documented [@genereviews:NBK1384; @pmid:29868108]. Women with the premutation have been reported showing episodic memory deficits, similar to those seen in AD [@pmid:41555826]. AGG interruptions are frequently reported in all associated diseases and appear to stabilize alleles; the length of the longest pure stretch predicts repeat instability [@pmid:7987398]. Elevated POI risk was observed starting at 36 repeats, increasing continuously with repeat length [@pmid:42001465]. Modern PCR techniques detect virtually all FMR1 expansion sizes, while RP-PCR detects AGG interspersions. Southern blotting approximates size and indicates methylation status [GeneReviews: NBK1384]. Long read sequencing is best characterizes repeat size, interruptions, methylation, and mosaicism. [@pmid:29868108; @pmid:31740840].", "mechanism": "LoF/GoF", "mechanism_detail": "Loss of function via transcriptional silencing in FXS, RNA gain of function in FXTAS/FXPOI [@pmid:16205714; @pmid:36169768]. PRKGG appears to modulate neurotoxicity [@pmid:41507195].", "year": "1992 [@pmid:1605194]; causative gene discovered in 1991 [@pmid:1710175]", @@ -2134,7 +2134,7 @@ "age_onset_max": 80.0, "typ_age_onset_min": 10.0, "typ_age_onset_max": 15.0, - "details": "96% of FA patients have biallelic GAA expansions in intron 1 (compared to compound heterozygous with another mutation type), in which the reference allele is conventionally 5-33 repeats [@genereviews:NBK1281]. Intermediate alleles (34-55) are associated with premutations, but may lead to disease as exact pathogenicity/penetrance thresholds have not been demarcated [@genereviews:NBK1281]. The expanded repeats can interrupted either with GAAGAG, GAAGGA, or GAAGAAAA sequences, leading to differential phenotypes [@pmid:11748752]. Allele size is correlated with disease severity and inversely correlated to age of onset, and expansions can reach 1700 repeats [@pmid:8815938]. RP-PCR detects expansions <200 repeats while long range PCR detects expansions >200 repeats[@pmid:35595154]. Long-read sequencing has been used to size large alleles and determine sequence organization [@pmid:35595154].", + "details": "96% of FA patients have biallelic GAA expansions in intron 1 (compared to compound heterozygous with another mutation type), in which the reference allele is conventionally 5-33 repeats [@genereviews:NBK1281]. Intermediate alleles (34-55) are associated with premutations, but may lead to disease as exact pathogenicity/penetrance thresholds have not been demarcated [@genereviews:NBK1281]. The expanded repeats can interrupted either with GAAGAG, GAAGGA, or GAAGAAAA sequences, leading to differential phenotypes [@pmid:11748752]. Allele size is correlated with disease severity and inversely correlated to age of onset, and expansions can reach 1700 repeats [@pmid:8815938]. RP-PCR and long range PCR can detect expansions [@pmid:35595154]. Long read sequencing sizes large alleles and resolves sequence organization [@pmid:35595154].", "mechanism": "LoF", "mechanism_detail": "Loss of function via transcriptional silencing [@pmid:16205714; @pmid:36169768].", "year": "1996 [@pmid:8596916]", @@ -2210,7 +2210,7 @@ "age_onset_max": 70.0, "typ_age_onset_min": 20.0, "typ_age_onset_max": 34.0, - "details": "Benign repeats range from absent [@gnomad:GIPC1] to 32 [@genereviews:NBK535148], while pathogenic alleles range from 73-164 repeats [@pmid:38876750; @genereviews:NBK535148]. Findings suggest that alternative initiation sites and an upstream CTG codon serve as the initiation site for RAN translation [@pmid:41121761]. Intermediate alleles have undetermined significance but may represent a phenotypic spectrum [@pmid:32413282]. Interruptions documented: CGA [@pmid:35245110]. Interruptions proposed but not confirmed in primary literature: TCG/CCT/TTG [@pmid:38467784]. One proband with ataxia and repeat size 11/112 had an asymptomatic father with 650 repeats and higher methylation [@pmid:41975469]. These expansions may not be reliably detected in srWGS or exome sequencing [@pmid:32413282]. Most repeats can be detected with RP-PCR, and long read sequencing accurately determines size and structure [@pmid:32413282].", + "details": "Benign repeats range from absent [@gnomad:GIPC1] to 32 [@genereviews:NBK535148], while pathogenic alleles range from 73-164 repeats [@pmid:38876750; @genereviews:NBK535148]. Findings suggest that alternative initiation sites and an upstream CTG codon serve as the initiation site for RAN translation [@pmid:41121761]. Intermediate alleles have undetermined significance but may represent a phenotypic spectrum [@pmid:32413282]. Interruptions documented: CGA [@pmid:35245110]. Interruptions proposed but not confirmed in primary literature: TCG/CCT/TTG [@pmid:38467784]. One proband with ataxia and repeat size 11/112 had an asymptomatic father with 650 repeats and higher methylation [@pmid:41975469]. srWGS or exome sequencing do not reliably detect these expansions [@pmid:32413282]. RP-PCR detects most repeats, while long read sequencing accurately resolves size and structure [@pmid:32413282].", "mechanism": "LoF/GoF?", "mechanism_detail": "Findings suggest that the mechanism is likely not LoF, but the mechanism is otherwise unknown [@pmid:41121761]. This expansion appears to be predominantly RAN translated into a toxic protein [@pmid:41121761]. This protein has been reported to impair cell proliferation, induce cytotoxicity and apoptosis in multiple cell lines, and caused phenotypic defects in a zebrafish model [@pmid:41121761].", "year": "2020 [@pmid:32413282]", @@ -2677,7 +2677,7 @@ "age_onset_max": 85.0, "typ_age_onset_min": 35.0, "typ_age_onset_max": 44.0, - "details": "27-35 motifs are unstable/premutations, while 36-39 motifs are associated with reduced penetrance and mild phenotypes [@pmid:39572770], and alleles over 40 repeats are typically fully penetrant [@genereviews:NBK1305]. >60 motifs associated with onset age <20 years [@genereviews:NBK1305]. Only CAG expansions are considered pathogenic, but interruptions impact pathogenicity (CAA) [@pmid:35245110; @pmid:39673793]. Only fathers with premutations are considered at risk of transmitting pathogenic alleles [@pmid:19507258]. CAG repeat size 21-35 may continuously modulate brain structure and psychiatric disease risk in an age-dependent manner [@pmid:39572770] [@doi:https://doi.org/10.64898/2026.05.08.26352223]. Somatic expansion of HTT CAG repeats in vulnerable tissues is proposed to contribute to age-dependent onset and neurodegeneration, with greater repeat instability associated with earlier disease onset [@pmid:41926793; @pmid:39824182]. Undiagnosed carriers of premutation and pathogenic HTT expansions, exhibit reduced striatal brain volumes and elevated neurofilament light chain levels before clinical diagnosis, consistent with findings observed across other loci [@pmid:41951733]. PCR methods can detect expansions up to ~115 repeats, but very large expansions may require TP-PCR or Southern blot analysis [@genereviews:NBK1305]. long-read sequencing has been used to resolve interruptions and validate sizing [@pmid:41512049].", + "details": "27-35 motifs are unstable/premutations, while 36-39 motifs are associated with reduced penetrance and mild phenotypes [@pmid:39572770], and alleles over 40 repeats are typically fully penetrant [@genereviews:NBK1305]. >60 motifs associated with onset age <20 years [@genereviews:NBK1305]. Only CAG expansions are considered pathogenic, but interruptions impact pathogenicity (CAA) [@pmid:35245110; @pmid:39673793]. Only fathers with premutations are considered at risk of transmitting pathogenic alleles [@pmid:19507258]. CAG repeat size 21-35 may continuously modulate brain structure and psychiatric disease risk in an age-dependent manner [@pmid:39572770] [@doi:https://doi.org/10.64898/2026.05.08.26352223]. Somatic expansion of HTT CAG repeats in vulnerable tissues is proposed to contribute to age-dependent onset and neurodegeneration, with greater repeat instability associated with earlier disease onset [@pmid:41926793; @pmid:39824182]. Undiagnosed carriers of premutation and pathogenic HTT expansions, exhibit reduced striatal brain volumes and elevated neurofilament light chain levels before clinical diagnosis, consistent with findings observed across other loci [@pmid:41951733]. PCR methods detect expansions up to ~115 repeats, but very large expansions may require TP-PCR or Southern blotting. [@genereviews:NBK1305]. long read sequencing can resolve interruptions and validate sizing [@pmid:41512049].", "mechanism": "GoF/LoF", "mechanism_detail": "While the primary pathogenic mechanism is gain of function of the protein product, pathogenesis is complex and multifactorial [@pmid:27940602]. Reduced SCN4B expression in striatal neurons has been implicated as a modifier of HD-associated phenotype severity, potentially contributing to dysfunction in motor associated striatal neuronal populations [@pmid:41959367].", "year": "1993 [@pmid:8458085]", @@ -2824,7 +2824,7 @@ "age_onset_max": 66.0, "typ_age_onset_min": 31.0, "typ_age_onset_max": 51.0, - "details": "Benign range (13-45) inferred from cohort data, but pathogenic range isn't yet fully understood [@genereviews:NBK535148]. In a cohort of 65 patients from 59 families, alleles ranged from 85-289 repeats, with an inverse relationship between size and age of onset [@pmid:34047774]. Inherited peripheral neuropathy (IPN) may be associated with shorter expansions [@pmid:39013564]. Interruptions seen: ACG, CCA [@pmid:35245110]. srWGS does not reliably detect large expansions in this locus [@pmid:40858832]. These repeats are most reliably detected using RP-PCR followed by long read sequencing [@pmid:39013564]", + "details": "Benign range (13-45) inferred from cohort data, but pathogenic range isn't yet fully understood [@genereviews:NBK535148]. In a cohort of 65 patients from 59 families, alleles ranged from 85-289 repeats, with an inverse relationship between size and age of onset [@pmid:34047774]. Inherited peripheral neuropathy (IPN) may be associated with shorter expansions [@pmid:39013564]. Interruptions seen: ACG, CCA [@pmid:35245110]. srWGS does not reliably detect large expansions in this locus [@pmid:40858832]. RP-PCR followed by long read sequencing is the most reliable characterization approach [@pmid:39013564].", "mechanism": "GoF?", "mechanism_detail": "RNA mediated toxicity hypothesized [@omim:164310]; may involve RAN translation [@pmid:38467784]. Somatic mosicism and hypermethylation have also been reported [@pmid:41131788].", "year": "2019 [@pmid:31332380]", @@ -3032,7 +3032,7 @@ "age_onset_max": 70.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "Disease is caused by the single base expansion of a heptanucleotide (7) cytosine homopolymer tract (i.e. from (C)7 to (C)8 ) within one copy of a coding VNTR, resulting in a frameshift mutation. This VNTR has a 60 bp motif, varying in length and sequence composition. This motif ranges in copy number from 20-125 (~1.5-5 kb) and is GC-rich (>80%). The specific copy of the VNTR motif involved varies by family but is consistent within a family [@genereviews:NBK535148]. This locus is particularly difficult to genotype [@pmid:23396133; @pmid:39781475]. Gamaarachchi et al. observed 20 unique VNTR haplotypes which ranged in size from 40–83 copies, with no unrelated individuals sharing the same haplotype. Unique haplotypes implied frequent independent origins of the dupC variant [@pmid:41285770]. NOTE: Disease is caused by a 7 to 8 C homopolymer expansion within the main motif which we represent here as a change in motif. srWGS, exome sequencing, and sanger sequencing do not reliably detect these mutations as they are mainly in the GC-rich MUC1 VNTR [@genereviews:NBK153723]. Instead, they are commonly detected using a VNTR assay [@genereviews:NBK153723], or resolved with long read sequencing [@pmid:29520014].", + "details": "Disease is caused by the single base expansion of a heptanucleotide (7) cytosine homopolymer tract (i.e. from (C)7 to (C)8 ) within one copy of a coding VNTR, resulting in a frameshift mutation. This VNTR has a 60 bp motif, varying in length and sequence composition. This motif ranges in copy number from 20-125 (~1.5-5 kb) and is GC-rich (>80%). The specific copy of the VNTR motif involved varies by family but is consistent within a family [@genereviews:NBK535148]. This locus is particularly difficult to genotype [@pmid:23396133; @pmid:39781475]. Gamaarachchi et al. observed 20 unique VNTR haplotypes which ranged in size from 40–83 copies, with no unrelated individuals sharing the same haplotype. Unique haplotypes implied frequent independent origins of the dupC variant [@pmid:41285770]. NOTE: Disease is caused by a 7 to 8 C homopolymer expansion within the main motif which we represent here as a change in motif. Exome sequencing, srWGS, and Sanger sequencing do not reliably detect these variants [@genereviews:NBK153723]. Instead, they are commonly detected by a VNTR assay, or resolved with long read sequencing [@genereviews:NBK153723; @pmid:29520014].", "mechanism": "GoF", "mechanism_detail": "Toxic protein product accumulates in kidneys [@genereviews:NBK153723]", "year": "2013 [@pmid:23396133]", @@ -3230,7 +3230,7 @@ "age_onset_max": 67.0, "typ_age_onset_min": 40.0, "typ_age_onset_max": 60.0, - "details": "Benign alleles range from 3-14 repeats and pathogenic alleles (650+ repeats) appear fully penetrant; the significance of intermediate alleles has yet to be elucidated [@pmid:25101480]. Interruptions documented: GGCTG, GGCCCTG, GGCCG, and GGCCTTG [@pmid:37051597]. These expansions are often detected using RP-PCR with fragment analysis [@pmid:21683323]. Long read sequencing is used for accurate sizing of these alleles [@pmid:37051597].", + "details": "Benign alleles range from 3-14 repeats and pathogenic alleles (650+ repeats) appear fully penetrant; the significance of intermediate alleles has yet to be elucidated [@pmid:25101480]. Interruptions documented: GGCTG, GGCCCTG, GGCCG, and GGCCTTG [@pmid:37051597].RP-PCR with fragment analysis usually detect these expansions[@pmid:21683323]. Long read sequencing is used for accurate allele sizing [@pmid:37051597].", "mechanism": "GoF", "mechanism_detail": "Toxic protein gain-of-function, RAN translation [@omim:614153].", "year": "2011 [@pmid:21683323]", @@ -3306,7 +3306,7 @@ "age_onset_max": 78.0, "typ_age_onset_min": 30.0, "typ_age_onset_max": 70.0, - "details": "Benign alleles are less than 38 repeats, while pathogenic alleles contain 66+ repeats [@genereviews:NBK535148]. Intermediate alleles may be associated with a phenotypic spectrum, and even pathogenic cases can have variable phenotype [@pmid:39055960; @pmid:39496005]: NOTCH2NLC expansions have been linked Alzheimer's disease and Parkinson's disease, leading to a potential role in NIID-related disorders [@pmid:31178126]. Age of onset inversely related to allele size [@pmid:38377026]. Motif variation in controls: (AGG)(CGG)n(AGG)0-3(CGG)0-2. GGA and AGC interruptions may influence phenotype [@pmid:34718964]. Interruptions documented: GGA, GGG [@pmid:35245110]; ACCGAGAAGATGCCCGCCCTGC interruption proposed but not confirmed [@pmid:38467784]. Detection may be challenging due to parology between genes: C253572.1, NOTCH2, NOTCH2NL, NBPF14, NBPF19. Short-read sequencing is not reliable for definitive sizing of large or complex expansions [@pmid:34034831]. RP-PCR can usually screen for expansions [@pmid:37371433], but long-read sequencing is the best method for determining size, structure, and methylation [@pmid:34774111].", + "details": "Benign alleles are less than 38 repeats, while pathogenic alleles contain 66+ repeats [@genereviews:NBK535148]. Intermediate alleles may be associated with a phenotypic spectrum, and even pathogenic cases can have variable phenotype [@pmid:39055960; @pmid:39496005]: NOTCH2NLC expansions have been linked Alzheimer's disease and Parkinson's disease, leading to a potential role in NIID-related disorders [@pmid:31178126]. Age of onset inversely related to allele size [@pmid:38377026]. Motif variation in controls: (AGG)(CGG)n(AGG)0-3(CGG)0-2. GGA and AGC interruptions may influence phenotype [@pmid:34718964]. Interruptions documented: GGA, GGG [@pmid:35245110]; ACCGAGAAGATGCCCGCCCTGC interruption proposed but not confirmed [@pmid:38467784]. Detection may be challenging due to parology between genes: C253572.1, NOTCH2, NOTCH2NL, NBPF14, NBPF19. Short read sequencing is unreliable for definitive sizing of large or complex expansions [@pmid:34034831]. RP-PCR can screen for expansions [@pmid:37371433], but long-read sequencing best resolves size, structure, and methylation [@pmid:34774111].", "mechanism": "GoF", "mechanism_detail": "Polyglycine expansion; may relate to methylation or RNA pathogenicity [@omim:603472; @pmid:36169768; @pmid:38467784]. Proposed mechanisms include toxic uN2CpolyG/polyglycine aggregation, RNA pathogenicity, impaired autophagy, mitochondrial dysfunction, and innate immune activation [@pmid:42058219]. The polyglycine-containing protein sequesters a key subunit of transcription factor NF-κB in nuclear inclusions, leading to impaired autophagy [@pmid:39920690]. Tau pathology is evident, changes in p-tau levels and tau deposition have been reported [@pmid:41539185]. Expanded polyG proteins also induce nucleolar stress through interaction with NPM1 and rRNA. This disrupts ribosomal homeostasis and alters 3D chromatin organization through reduced CTCF/RAD21 expression [@pmid:41942455].", "year": "2019 [@pmid:31332380]", @@ -3372,7 +3372,7 @@ "age_onset_max": 40.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "Benign range (3-16 repeats) established in 1000 controls, studied alongside pathogenic probands of up to 700 repeats [@pmid:31332380]. Pathogenicity occurs at repeats as short as 161 motifs [@pmid:38159879; @pmid:37923380], while intermediate alleles may correlate to milder phenotypes [@pmid:38159879]. Alt transcript in opposite direction: LOC642361. RP-PCR is effective at detecting these expansions [@pmid:39308795], while long read sequencing is used to resolve size and structure of repeats [@pmid:38159879]. ", + "details": "Benign range (3-16 repeats) established in 1000 controls, studied alongside pathogenic probands of up to 700 repeats [@pmid:31332380]. Pathogenicity occurs at repeats as short as 161 motifs [@pmid:38159879; @pmid:37923380], while intermediate alleles may correlate to milder phenotypes [@pmid:38159879]. Alt transcript in opposite direction: LOC642361.RP-PCR effectively detects these expansions [@pmid:39308795], while long read sequencing resolves size and structure [@pmid:38159879].", "mechanism": "GoF?", "mechanism_detail": "RNA mediated toxicity hypothesized, overall mechanism unknown [@omim:618637; @pmid:36169768].", "year": "2019 [@pmid:31332380]", @@ -3438,7 +3438,7 @@ "age_onset_max": 79.0, "typ_age_onset_min": 40.0, "typ_age_onset_max": 59.0, - "details": "Disease is caused by a GCN polyalanine expansion in the first exon of PABPN1. Most known patients have (GCG)+, but GCN (any polyalanine) may be pathogenic [@genereviews:NBK1126]. This locus acts in a dominant manner for allele sizes ≥ 12 GCN motifs (90% of cases) and in a recessive manner for 11 GCN motifs, i.e. the genotype (GCN)11(GCN)11 (10% of cases). Additionally, disease is known to be more severe in cases of two expanded alleles. Age of onset is inverse to allele size, while penetrance and severity increase with allele size [@genereviews:NBK1126]. Mild, late-onset disease can occur in individuals with a (GCN)10(GCN)11 genotype, suggesting variable penetrance [@pmid:28011929]. The definition of this locus differs in the literature with prior work counting exact GCG motifs for a benign size of (GCG)6 [@pmid:9462747], while later resources count GCNs (any alanine codon), widening the region by 4 motifs to a benign size of (GCN)10 [@genereviews:NBK1126; @pmid:39349043]. STRchive is using the GCN definition. Flanking PCR with fragment analysis accurately detects this expansion [@pmid:27980005]. In heterozygotic individuals, repeats are usually sized with sanger sequencing. When there are biallelic expanded variants, NGS or fragment analysis should be used instead.", + "details": "Disease is caused by a GCN polyalanine expansion in the first exon of PABPN1. Most known patients have (GCG)+, but GCN (any polyalanine) may be pathogenic [@genereviews:NBK1126]. This locus acts in a dominant manner for allele sizes ≥ 12 GCN motifs (90% of cases) and in a recessive manner for 11 GCN motifs, i.e. the genotype (GCN)11(GCN)11 (10% of cases). Additionally, disease is known to be more severe in cases of two expanded alleles. Age of onset is inverse to allele size, while penetrance and severity increase with allele size [@genereviews:NBK1126]. Mild, late-onset disease can occur in individuals with a (GCN)10(GCN)11 genotype, suggesting variable penetrance [@pmid:28011929]. The definition of this locus differs in the literature with prior work counting exact GCG motifs for a benign size of (GCG)6 [@pmid:9462747], while later resources count GCNs (any alanine codon), widening the region by 4 motifs to a benign size of (GCN)10 [@genereviews:NBK1126; @pmid:39349043]. STRchive is using the GCN definition. Flanking PCR with fragment analysis accurately detects expansions [@pmid:27980005]. Heterozygous expansions are usually sized by Sanger sequencing. Biallelic expanded variants are assessed using NGS or fragment analysis.", "mechanism": "GoF/LoF", "mechanism_detail": "Polyalanine expansions leading to cellular toxicity (loss of function) as well as abnormal aggregation and inefficient protein degradation, which may impact mRNA processing [@genereviews:NBK1126].", "year": "1998 [@pmid:9462747]", @@ -3504,7 +3504,7 @@ "age_onset_max": 36.0, "typ_age_onset_min": 0.0, "typ_age_onset_max": 2.0, - "details": "Alleles of 24 repeats (and sometimes 25 repeats) correspond to delayed disease onset and/or milder phenotype; alleles above benign range (9-20 repeats) and below the pathogenic range (26-33 repeats) have uncertain significance [@genereviews:NBK1427]. These expansions may not be reliably detected in srWGS or exome sequencing. Fragment analysis is the standard detection method, while sanger sequencing can determine the exact GCN repeat size. [@genereviews:NBK1427]", + "details": "Alleles of 24 repeats (and sometimes 25 repeats) correspond to delayed disease onset and/or milder phenotype; alleles above benign range (9-20 repeats) and below the pathogenic range (26-33 repeats) have uncertain significance [@genereviews:NBK1427]. srWGS or exome sequencing do not reliably detect these expansions. Fragment analysis is the standard detection method, while Sanger sequencing determines the exact repeat size. [@genereviews:NBK1427]", "mechanism": "LoF/GoF", "mechanism_detail": "Polyalanine expansion leading to loss or gain of function, dependent on altered protein product [@pmid:38467784; @genereviews:NBK1427]. Correlation between length and reduced transcriptional activity [@pmid:15888479].", "year": "2003 [@pmid:12640453]", @@ -3570,7 +3570,7 @@ "age_onset_max": 66.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "This is an expanded variable number tandem repeat (VNTR) in the PLIN4 gene, located in exon 3. This repeat consists of a 99 bp motif which encodes 33 amino-acids within the perilipin-4 protein [@pmid:32451610]. Expansions of this 99 bp motif leads to insertion of multiple imperfect 33–amino acid repeats. These repetitive sequences are thought to contribute to abnormal protein aggregation and dysregulated autophagy seen in affected muscle tissue [@omim:601846]. srWGS and exome sequencing do not reliably detect this repeat, so long range PCR is used for detection while long read sequence is needed to fully resolve size and structure [@pmid:32451610; @pmid:33811808].", + "details": "This is an expanded variable number tandem repeat (VNTR) in the PLIN4 gene, located in exon 3. This repeat consists of a 99 bp motif which encodes 33 amino-acids within the perilipin-4 protein [@pmid:32451610]. Expansions of this 99 bp motif leads to insertion of multiple imperfect 33–amino acid repeats. These repetitive sequences are thought to contribute to abnormal protein aggregation and dysregulated autophagy seen in affected muscle tissue [@omim:601846]. srWGS or exome sequencing do not reliably detect this repeat. Long range PCR is used for detection while long read sequence is needed to fully resolve size and structure [@pmid:32451610], @pmid:33811808].", "mechanism": "GoF", "mechanism_detail": "The present disease is characterized by dominantly inherited progressively increasing mobilization of aggrephagy at sites of progressive accumulation of a mutated protein, suggesting that the mutation is leading to aggregation, likely through misfolding, exceeding aggrephagic capacity. [@pmid:32451610]", "year": "2020 [@pmid:32451610]", @@ -3717,7 +3717,7 @@ "age_onset_max": 62.0, "typ_age_onset_min": 26.0, "typ_age_onset_max": 50.0, - "details": "Benign range is 6-32 repeats, intermediate range 40-49, and pathogenic range is 51-78 [@pmid:37906407]; intermediate alleles are associated with reduced penetrance [@pmid:11198281]. In this locus, RP-PCR generally detects expansions while PCR with fragment analysis approximates allele size. Large expansions may require confirmation with southern blot [@pmid:35262663; @pmid:10581021].", + "details": "Benign range is 6-32 repeats, intermediate range 40-49, and pathogenic range is 51-78 [@pmid:37906407]; intermediate alleles are associated with reduced penetrance [@pmid:11198281]. RP-PCR generally detects expansions, while PCR fragment analysis approximates allele size. Large expansions may require southern blot confirmation [@pmid:35262663; @pmid:10581021].", "mechanism": "GoF", "mechanism_detail": "Polyalanine gain of function associated with RAN translation [@pmid:38467784].", "year": "1999 [@pmid:10581021]", @@ -4077,7 +4077,7 @@ "age_onset_max": 76.0, "typ_age_onset_min": 36.0, "typ_age_onset_max": 52.0, - "details": "Disease is caused by an insertion of a pathogenic motif, although motif presence is variable and can expand up to 200 repeats without apparently causing a phenotype [@genereviews:NBK564656]. Pathogenic expansions (ranging from 400-2750 pathogenic motifs) may be flanked by other motifs [@genereviews:NBK564656]. For example, (AAAGG)10-25(AAGGG)exp(AAAGG)4-6 [@pmid:32851396]. Motif heterogeneity is common in unaffected individuals [@genereviews:NBK564656], and motif associations are described by Delforge et al [@pmid:38627134]. The pathogenic size threshold appears to differ for the AAAGG motif: AAAGG expansions >= 600 repeats have been observed in CANVAS patients (vs 400 with established pathogenic motif AAGGG), while ~100-380 AAAGG repeats were found in unaffected controls [@pmid:37450567]. Length appears to impact age of onset and disease severity, with particular impact from the smaller allele [@doi:10.1136/jnnp-2024-ABN.259]. Phenotypic spectrum may include Parkinsonism [@pmid:39833204], chronic cough [@pmid:39811557], idiopathic sensory neuropathy, small fiber neuropathy, and sensorimotor neuropathy [@pmid:41964406]. expansions are suggested by failure of flanking PCR and a pathogenic RP-PCR sawtooth pattern, but biallelic confirmation and sizing rely on Southern blotting [@genereviews:NBK564656]. Because of the variable and complex motif structure, long read sequencing or optical genome mapping are useful resolving this expansion [@pmid:37892228; @pmid:37450567].", + "details": "Disease is caused by an insertion of a pathogenic motif, although motif presence is variable and can expand up to 200 repeats without apparently causing a phenotype [@genereviews:NBK564656]. Pathogenic expansions (ranging from 400-2750 pathogenic motifs) may be flanked by other motifs [@genereviews:NBK564656]. For example, (AAAGG)10-25(AAGGG)exp(AAAGG)4-6 [@pmid:32851396]. Motif heterogeneity is common in unaffected individuals [@genereviews:NBK564656], and motif associations are described by Delforge et al [@pmid:38627134]. The pathogenic size threshold appears to differ for the AAAGG motif: AAAGG expansions >= 600 repeats have been observed in CANVAS patients (vs 400 with established pathogenic motif AAGGG), while ~100-380 AAAGG repeats were found in unaffected controls [@pmid:37450567]. Length appears to impact age of onset and disease severity, with particular impact from the smaller allele [@doi:10.1136/jnnp-2024-ABN.259]. Phenotypic spectrum may include Parkinsonism [@pmid:39833204], chronic cough [@pmid:39811557], idiopathic sensory neuropathy, small fiber neuropathy, and sensorimotor neuropathy [@pmid:41964406].Expansions are suggested by flanking PCR failure and a pathogenic RP-PCR sawtooth pattern, but biallelic confirmation and sizing rely on Southern blotting [@genereviews:NBK564656]. Long read sequencing or optical genome mapping are useful resolving this variable, complex motif structure [@pmid:37892228; @pmid:37450567].", "mechanism": "LoF", "mechanism_detail": "LoF; exact mechanism unknown [@pmid:38467784].", "year": "2019 [@pmid:31230722]", @@ -4635,7 +4635,7 @@ "age_onset_max": 62.0, "typ_age_onset_min": 19.0, "typ_age_onset_max": 48.0, - "details": "Benign range is 25-40 repeats, pathogenic range is 49+ repeats (largest to date 66 motifs, with mild correlation between size and age of onset), and intermediate alleles (41-48 repeats) are associated with reduced penetrance and potentially milder phenotypes [@genereviews:NBK1438]. Huntington's disease like phenotype [@pmid:12805114]. CAA CAG CAA interruption is seen in all alleles stably transmitted across generations [@genereviews:NBK1438;@pmid:35245110]. SRS detects some expansions but fails to size them beyond 250 bp. [@pmid:37906407]. PCR amplification may detect expansions of 66 or fewer [@genereviews:NBK1438].", + "details": "Benign range is 25-40 repeats, pathogenic range is 49+ repeats (largest to date 66 motifs, with mild correlation between size and age of onset), and intermediate alleles (41-48 repeats) are associated with reduced penetrance and potentially milder phenotypes [@genereviews:NBK1438]. Huntington's disease like phenotype [@pmid:12805114]. CAA CAG CAA interruption is seen in all alleles stably transmitted across generations [@genereviews:NBK1438;@pmid:35245110]. Short-read sequencing can detect some expansions but cannot size alleles beyond 250 bp. PCR amplification may detect expansions of ≤66 repeats [@pmid:37906407; @geneReviews].", "mechanism": "LoF/GoF", "mechanism_detail": "Polyglutamine expansion leading to transcriptional dysregulation [@pmid:35053321].", "year": "1999 [@pmid:10484774]", From 4a16161b5af4b5f9c347625d4f03be72c42a68e2 Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Thu, 11 Jun 2026 15:57:18 -0600 Subject: [PATCH 25/29] more diagnostic relevance --- data/STRchive-loci.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 9702e25f..76878e58 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -2276,7 +2276,7 @@ "age_onset_max": 4.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "Pathogenic range from 3 unrelated probands; benign range inferred from mutation data [@genereviews:NBK535148]. Disease cases can be caused by homozygosity or compund heterozygotes [@omim:618412].", + "details": "Pathogenic range from 3 unrelated probands; benign range inferred from mutation data [@genereviews:NBK535148]. Disease cases can be caused by homozygosity or compund heterozygotes [@omim:618412]. Exome sequencing cannot detect these expansions. srWGS has flagged expanded alleles while RP-PCR has confirmed them. Complex alleles may require OGM or long read sequencing to resolve size and structure [@pmid:30970188; @pmid:35913761].", "mechanism": "LoF", "mechanism_detail": "Change in histone modification decreases transcription [@omim:618412].", "year": "2019 [@pmid:30970188]", @@ -2413,7 +2413,7 @@ "age_onset_max": 0.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "Anticipation does not occur, and expansions appear fully penetrant [@genereviews:NBK1423].", + "details": "Anticipation does not occur, and expansions appear fully penetrant [@genereviews:NBK1423]. PCR amplification of tract I, followed by fragment analysis or Sanger sequencing can detect and size these alleles. Standard NGS may miss expanded repeats [geneReviews:NBK1423].", "mechanism": "LoF", "mechanism_detail": "Polyalanine expansions leading to haploinsufficiency [@genereviews:NBK1423]", "year": "2004 [@pmid:15385446]", @@ -2479,7 +2479,7 @@ "age_onset_max": 0.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "Anticipation does not occur, and expansions appear fully penetrant [@genereviews:NBK1423].", + "details": "Anticipation does not occur, and expansions appear fully penetrant [@genereviews:NBK1423]. PCR amplification of tract II, followed by fragment analysis or Sanger sequencing can detect and size these alleles. Standard NGS may miss expanded repeats [geneReviews:NBK1423]", "mechanism": "LoF", "mechanism_detail": "Polyalanine expansions leading to haploinsufficiency [@genereviews:NBK1423]", "year": "2003 [@pmid:12676922]", @@ -2545,7 +2545,7 @@ "age_onset_max": 0.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "Anticipation does not occur, and expansions appear fully penetrant; it is unknown if contractions also lead to phenotypic variation [@genereviews:NBK1423].", + "details": "Anticipation does not occur, and expansions appear fully penetrant; it is unknown if contractions also lead to phenotypic variation [@genereviews:NBK1423]. PCR amplification of tract III, followed by fragment analysis or Sanger sequencing can detect and size these alleles. Standard NGS may miss expanded repeats [geneReviews:NBK1423].", "mechanism": "LoF", "mechanism_detail": "Polyalanine expansions leading to haploinsufficiency [@genereviews:NBK1423]", "year": "2000 [@pmid:10839976]", @@ -2611,7 +2611,7 @@ "age_onset_max": 0.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "Benign alleles are highly conserved to be 15 repeats, with disease observed in individuals with 22-23 repeats [@pmid:8614804; @pmid:22406499] as well as in individuals with 8-11 repeats [@genereviews:NBK535148].", + "details": "Benign alleles are highly conserved to be 15 repeats, with disease observed in individuals with 22-23 repeats [@pmid:8614804; @pmid:22406499] as well as in individuals with 8-11 repeats [@genereviews:NBK535148]. Targeted PCR across exon 1 has detected expansions, and subcloning with Sanger sequencing has characterized them [@pmid:9223304].", "mechanism": "GoF", "mechanism_detail": "Polyalanine expansion leading to GoF [@pmid:38467784]", "year": "1996 [@pmid:8614804]", From ff797810f2d31e567ed95f14bc17bcc586cb134d Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Thu, 11 Jun 2026 16:53:07 -0600 Subject: [PATCH 26/29] Add detection section to STRchive loci schema Added detection section with methods for detecting and sizing expansions. --- data/STRchive-loci.schema.json | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/data/STRchive-loci.schema.json b/data/STRchive-loci.schema.json index a24788ba..d081327c 100644 --- a/data/STRchive-loci.schema.json +++ b/data/STRchive-loci.schema.json @@ -232,6 +232,17 @@ "in_text_citations": true, "multiline": true }, + "detection": { + "section": "Locus", + "title": "Detection", + "description": "Known methods used to detect and size expansions, with citations in the format e.g. [@doi:12345; @pmid:12345]. This could include information on which methods accurately detect, size, and resolve loci, as well as methods known to be innacurate.", + "examples": [ + "RP-PCR has detected expansions, while long read sequencing fully resolves size and structure [@pmid:12345]" + ], + "type": ["string", "null"], + "in_text_citations": true, + "multiline": true + }, "mechanism": { "section": "Locus", "title": "Mechanism", From 2f1b2471b8d9a4523319a8adef4e278c1adc802c Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Thu, 11 Jun 2026 16:57:08 -0600 Subject: [PATCH 27/29] Add detection section to site --- site/src/pages/loci/[id].astro | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/site/src/pages/loci/[id].astro b/site/src/pages/loci/[id].astro index 598cf2e4..01a60a16 100644 --- a/site/src/pages/loci/[id].astro +++ b/site/src/pages/loci/[id].astro @@ -298,6 +298,16 @@ const curation = curations.find((curation) => curation.Locus_ID === id); ) } + { + locus.detection && ( +
+
Detection
+ + + +
+ ) + } { (locus.mechanism || locus.mechanism_detail) && ( From 8c7a4ef1951381c51d4eca851d2567a22567eedb Mon Sep 17 00:00:00 2001 From: gaberbz <182678422+gaberbz@users.noreply.github.com> Date: Thu, 11 Jun 2026 23:05:59 +0000 Subject: [PATCH 28/29] Update data --- data/STRchive-loci.json | 80 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 76878e58..1053486f 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -24,6 +24,7 @@ "typ_age_onset_min": 24.0, "typ_age_onset_max": 30.0, "details": "Characterized in eight unrelated families which were used to establish benign (3-44) and pathogenic (118-694) ranges [@pmid:39068203]. RP-PCR is typically used for detection [@pmid:39068203]. srWGS has significantly underestimated repeat count, while long read sequencing has accurately resolved size [@pmid:39068203].", + "detection": null, "mechanism": null, "mechanism_detail": "Potentially over-expression of transcripts [@pmid:39068203].", "year": "2023 [@pmid:39068203]", @@ -90,6 +91,7 @@ "typ_age_onset_min": 2.0, "typ_age_onset_max": 10.0, "details": "Allele ranges (benign:4-39; pathogenic: >200) inferred from The Human Gene Mutation Database [@genereviews:NBK535148]. Intermediate alleles correspond to a premutation [@pmid:23914978]. Non-canonical motifs include: CGG/CCT/GTG/CAG/CTG3 [@pmid:35245110; @pmid:34111553]. RP-PCR can detect these expansions and size alleles to ~80 repeats, while Southern blotting sizes full expansions and detects methylation [@pmid:34282157].", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Loss of function via transcriptional silencing [@pmid:16205714; @pmid:36169768].", "year": "1993 [@pmid:8334699]", @@ -156,6 +158,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Allele ranges established in study of 3 families; intermediate alleles likely premutations [@pmid:24763282]. Pathogenic threshold may be higher than 300 as this was the largest allele that could be accurately sized by the assay. Standard PCR detects small alleles, while RP-PCR and Southern blotting detect large expansions. srWGS may underestimate the size of large expansions and exact sizing requires long read sequencing [@pmid:24763282; @pmid:39313615].", + "detection": null, "mechanism": "LoF/methylation", "mechanism_detail": "Silencing of the FMR2 gene as a consequence of a CCG expansion located upstream of this gene [@malacard:KNS007].", "year": "2014 [@pmid:24763282]", @@ -222,6 +225,7 @@ "typ_age_onset_min": 20.0, "typ_age_onset_max": 49.0, "details": "Intermediate alleles indicate reduced penetrance [@genereviews:NBK1333]. Expansions larger than the pathogenic threshold in the AR gene should be evaluated carefully. Interruptions have not been observed in patient cases; it has been proposed that longer alleles with interruptions may not be pathogenic [@pmid:24041967]. Non-canonical motif CAA observed [@pmid:35245110]. Expansions are also detected ten-fold more often in a general population than would be expected by disease prevalence [@pmid:36797998]. Clinical evaluation and phenotypic matching may be necessary to determine diagnosis even in the presence of a pure expanded allele. It has been proposed that contractions may play a role in disease [@pmid:10398229]. Disease may be subclinical in females [@pmid:34922802], and can be clinically heterogeneous even within the same family [@pmid:20184516]. Although srWGS screens can detect this expansion[@pmid:36797998], sizing needs to be validated with standard PCR fragment analysis or RP-PCR [geneReviews:NBK1333].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "Polyglutamine alters protein conformation leading to gain-of-function neurodegeneration [@pmid:29398703; @pmid:36169768]. Transcriptional dysregulation, axonal transport disruption, and mitochondrial dysfunction also play causative roles in the neurodegeneration [@pmid:22609045].", "year": "1991 [@pmid:2062380]; the first triplet disease to be discovered [@pmid:15313856]", @@ -288,6 +292,7 @@ "typ_age_onset_min": 0.0, "typ_age_onset_max": 0.0, "details": "ARX expansions [@genereviews:NBK535148] result in a phenotypic spectrum of conditions including Partington syndrome [@omim:309510], Early Infantile Epileptic Encephalopathy [@omim:308350], Agenesis of Corpus Callosum with Abnormal Genitalia [@omim:300004], and X-Linked Lissencephaly with Ambiguous Genitalia [@omim:300215], described in the literature [@pmid:26029707; @pmid:20506206]. Because these are small coding expansions, they are sized using targeted exon 2 PCR with fragment analysis or targeted Sanger sequencing [@pmid:17668384].", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Polyalanine expansions lead to reduction in protein product through unclear mechanism [@pmid:36169768; @pmid:38467784]. Apparent aggregation and mis-localisation of mutant protein, increased with expansion length [@genereviews:NBK51932].", "year": "2002 [@pmid:11889467]", @@ -354,6 +359,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "ARX expansions [@genereviews:NBK535148] result in a phenotypic spectrum of conditions including Partington syndrome [@omim:309510], Early Infantile Epileptic Encephalopathy [@omim:308350], Agenesis of Corpus Callosum with Abnormal Genitalia [@omim:300004], and X-Linked Lissencephaly with Ambiguous Genitalia [@omim:300215], described in the literature [@pmid:26029707; @pmid:20506206]. Because these are small coding expansions, they are sized using targeted exon 2 PCR with fragment analysis or targeted Sanger sequencing [@pmid:11889467].", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Polyalanine expansions lead to reduction in protein product through unclear mechanism [@pmid:36169768; @pmid:38467784]. Apparent aggregation and mis-localisation of mutant protein, increased with expansion length [@genereviews:NBK51932].", "year": "2002 [@pmid:11889467]", @@ -420,6 +426,7 @@ "typ_age_onset_min": 20.0, "typ_age_onset_max": 40.0, "details": "Pathogenic expansions (48-93) are fully penetrant with the exception of one documented case of 51 repeats; intermediate alleles (36-47) are associated with a milder phenotype and can expand upon transmission [@genereviews:NBK1491]. CAA interruptions have been observed without known clinical association [@pmid:35245110]. Length of the repeat is inversely associated with age of onset and severe epilepsy phenotype [@pmid:41147955]. PCR fragment analysis detects most moderate alleles, but Southern blotting or RP-PCR may be needed for large expansions or apparent homozygosity [@genereviews:NBK1491].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "Polyglutamine expansions leading to gain of function [@genereviews:NBK1491].", "year": "1994 [@pmid:7842016]", @@ -486,6 +493,7 @@ "typ_age_onset_min": 20.0, "typ_age_onset_max": 39.0, "details": "Penetrance is dependent on sequence purity in addition to expansion length: pure repeats are pathogenic at 39 repeats [@pmid:37906407], while CAT interruptions [@pmid:35245110] can lead to reduced penetrance at comparable lengths [@genereviews:NBK1184]. Regardless, intermediate alleles are considered premutations which may lead to disease upon transmission [@genereviews:NBK1184]. CAA interruptions have also been reported, but not linked to any phenotypic consequences [@pmid:23935513]. PCR fragment analysis, often with reflex RP-PCR, is commonly used for sizing [geneReviews:NBK1184]. Standard fragment analysis does not resolve CAT interruptions, which require targeted analysis like Sanger sequencing [@pmid:34635619].", + "detection": null, "mechanism": "GoF/LoF", "mechanism_detail": "Polyglutamine expansion leading to toxic gain of function with eventual misregulation-based loss of function/dominant negative [@genereviews:NBK1184; @pmid:35573049].", "year": "1993 [@pmid:8358429]", @@ -552,6 +560,7 @@ "typ_age_onset_min": 12.0, "typ_age_onset_max": 48.0, "details": "Unaffected individuals are usually (82%) compound heterozygotes in the benign range [@genereviews:NBK1175]. Intermediate alleles show reduced penetrance, and exact distinction between intermediate and the lower end of the pathogenic range is unclear [@genereviews:NBK1175]. Expansions are frequently interrupted by ATCCT, ATCCC, ATTCC, ATTTCT, ATATTCT, or ATTCTTCT; interruptions of ATTGT, TTTCT, ATTTTCT, ATTCTCT, GTTTCT, CTTCT, and ATTCTAT have been noted [@pmid:36199580] as has the interruption ATGCT [@pmid:19234597]. The ATCCT interruption motif is associated with a higher prevalence of epileptic seizures [@pmid:24318420]. Different motif patterns and mixed motif ratios may influence age of onset and anticipation [@pmid:41229449]. One study suggests that alleles with completely pure ATTCT expansions are non-pathogenic, and that repeat interruptions such as ATTCC, are necessary to cause SCA10 [@pmid:36092952]. RP-PCR with fragment analysis is commonly used for detection. Pathogenicity depends heavily on interruptions, so long read sequencing approaches are useful for full structure resolution [@pmid:26295943; @pmid:32160188].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "Transdominant mechanism theorized [@pmid:38467784].", "year": "2000 [@pmid:11017075]", @@ -618,6 +627,7 @@ "typ_age_onset_min": 30.0, "typ_age_onset_max": 39.0, "details": "Full penetrance of single alleles occurs at ~35 repeats [@genereviews:NBK1275; @pmid:37906407] and pathogenic expansions have been documented as large as 500 repeats [@pmid:12116207]. 33-34 length repeats are associated with reduced penetrance and later onset (age >50 years) [@genereviews:NBK1275]. Homozygous 31 repeat alleles may lead to recessive disease [@pmid:30533529], while a single 29-32 repeat is associated with increased ALS risk [@genereviews:NBK1275; @pmid:25285812; @pmid:32954321]. There is some evidence that all CAG-repeat expansions in ATXN2 may be a risk factor for ALS, regardless of length and interruptions [@pmid:39956874]. CAA interruptions have been observed which appear to stabilize the allele in transmission [@genereviews:NBK1275]. RP-PCR with fragment analysis is commonly used for detection, while southern blotting approximates size over 100 repeats [@geneReviews:NBK1275].", + "detection": null, "mechanism": "GoF/LoF", "mechanism_detail": "Polyglutamine cytoplasmic aggregates leading to cellular apoptosis; RAN translation implicated [@genereviews:NBK1275].", "year": "1996 [@pmid:8896556]", @@ -684,6 +694,7 @@ "typ_age_onset_min": 10.0, "typ_age_onset_max": 49.0, "details": "Benign alleles range from 11-44 repeats [@pmid:37906407], with intermediate alleles (45-59) associated with incomplete penetrance and non-classic phenotypes [@genereviews:NBK1196]. The threshold between incomplete and full penetrance is unclear, but presumed to occur at ~60 repeats [@genereviews:NBK1196; @pmid:37906407]. The interruption CAA has been observed [@pmid:35245110]; AAG is present in hg38 reference sequence. The APOE ε4 allele appears to act as a disease modifier [@pmid:39731318]; GLS expansions may also function as disease modifiers [@pmid:39699045].PCR fragment analysis or RP-PCR commonly detect expansions, but apparent homozygosity may require southern blotting [@geneReviews:NBK1196]. Long read sequencing can characterize full structure [@pmid:40890629].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "Polyglutamine expansion leading to gain of function; aggregated and mislocalized proteins in neurons [@pmid:36169768; @genereviews:NBK1196].", "year": "1994 [@pmid:7874163]", @@ -750,6 +761,7 @@ "typ_age_onset_min": 4.0, "typ_age_onset_max": 48.0, "details": "Benign alleles range from 4-27 [@pmid:37906407], with intermediate alleles ranging from premutations (28-33) to reduced penetrance (34-36) [@genereviews:NBK1256]. Interruptions observed include CAA [@pmid:35245110]. srWGS cannot accurately detect repeat expansions at this locus. PCR fragment analysis or RP-PCR detects expansions and sizes most normal or moderate pathogenic alleles, while very large expansions may require Southern blotting or long read sequencing [@geneReviews:NBK1256].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "Polyglutamine expansion leading to gain of function; toxic misfolded intermediated suspected [@genereviews:NBK1256; @pmid:18418675].", "year": "1996 [@pmid:8908515]", @@ -826,6 +838,7 @@ "typ_age_onset_min": 20.0, "typ_age_onset_max": 49.0, "details": "Two genes span the CTG/CAG repeat and are expressed in opposite directions: ATXN8, a nearly pure polyglutamine repeat protein in the CAG direction, and ATXN8OS, which is transcribed to a noncoding CUG repeat RNA [@pmid:16804541]. Reduced penetrance is found in alleles of all sizes, although penetrance appears higher at 71+ repeats and repeats at 50-70 appear less likely to result in disease [@genereviews:NBK1268; @pmid:20373340]. Roda et al. suggested that the ATXN8 or ATXN8OS gene should not be evaluated in isolation as a candidate gene for spinocerebellar degenerative disease [@pmid:28451643]. CCG/CGG interruptions in high-penetrance SCA8 families increase RAN translation and protein toxicity [@pmid:34632710]; Interruptions in CTG/CAG expansion by 1 or more CCG/CGG, CTA/TAG, CTC/GAG, CCA/TGG, or CTT/AAG trinucleotides have been observed in full-penetrance repeats [@pmid:16804541; @genereviews:NBK1268]. Short read genome sequencing can underestimate expansion size [@pmid:40015980; @geneReviews:NBK1268]. RP-PCR detects large expansions while long read sequencing and southern blotting approximate size [@geneReviews:NBK1268].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "Polyglutamine/toxic gain-of-function [@omim:608768; @genereviews:NBK1268].", "year": "1999 [@pmid:10192387]", @@ -902,6 +915,7 @@ "typ_age_onset_min": 56.0, "typ_age_onset_max": 62.0, "details": "This locus is a novel STR-containing insertion, not present in reference genome; the pathogenic threshold (110-760) is based on the pure repeat of the pathogenic motif within the insertion [@pmid:19878914]. RP-PCR accurately detects this insertion [@pmid:22992774], while long read sequencing can resolve sizing and motif architecture [@pmid:36289212].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "RNA toxicity and gain of function leading to neurodegeneration [@pmid:36371266]. Role in heterochromatin or chromosomal structure theorized [@omim:117210].", "year": "2009 [@pmid:19878914]", @@ -968,6 +982,7 @@ "typ_age_onset_min": 50.0, "typ_age_onset_max": 64.0, "details": "FTD and ALS form a clinical spectrum [@pmid:37388914; @pmid:22406228]. The clinical ranges of the C9orf72 locus remain ambiguous [@stripy:C9ORF72][@pmid:41951733]: most healthy controls have alleles up to 24 repeats [@pmid:28319737] yet 24-30 repeats are associated with ALS [@pmid:31315673] and while 60 repeats is frequently used as a threshold for uncertain alleles, the exact threshold of pathogenicity remains unclear [@genereviews:NBK268647; @pmid:38099605]. Repeats of 80 motifs and lower appear to have delayed onset for any phenotype [@pmid:28319737]. >250 repeats are associated with a full FTD/ALS disease state [@pmid:31048495], but pathogenic alleles can range from 30 to more than 4000 repeats [@pmid:38099605; @pmid:39709476]. Somatic C9orf72 repeat expansions may emerge de novo in CNS tissue from alleles below the pathogenic range, potentially contributing to sporadic ALS/FTD [@pmid:41986690]. Penetrance appears to also be age-dependent, with environmental factors and specific phenotypes associated with sex and age at onset [@pmid:28522837]. Methylation appears to increase with expansion length and age [@pmid:39709476], and C9orf72 promoter hypermethylation has been observed in expansion carriers [@pmid:42222887]. ALS caused by repeat expansions in C9orf72 generally has an earlier disease onset and faster progression than other ALS presentations [@pmid:39226712]. C9orf72 expansions have been associated with reduced thalamic volume in undiagnosed carriers, and plasma neurofilament light chain has an approximately linear association with repeat count and motor neuron disease risk [@pmid:41951733; @pmid:42095061]. Pathogenic C9orf72 repeat expansions have also been identified in ambiguous late onset behavioral or psychiatric like presentations [@pmid:42158267]. Bidirectional RP-PCR is typically used for detection [@pmid:21944778]. Large pathogenic expansions are difficult to size exactly by PCR, so Southern blot provides approximate size [@pmid:23566336], while long read sequencing provides direct sizing and sequence characterization [@pmid:30126445].", + "detection": null, "mechanism": "Ambiguous", "mechanism_detail": "The HRE forms DNA and RNA G-quadruplexes with distinct structures and promotes RNA/DNA hybrids (R-loops). The structural polymorphism causes a repeat length-dependent accumulation of transcripts aborted in the HRE region [@omim:105500]. Addiitonal mechanisms theorized include protein loss of function and RNA gain of function [@pmid:37847372]. Multiple cell types in the prefrontal cortex, including oligodendrocytes, microglia, astrocytes, and neurons, appear impacted during pathogenesis [@pmid:39999167]. Drosophila model-system evidence suggests that RAN translated poly(GR) may contribute to toxicity by activating the integrated stress response through eIF2α phosphorylation and promoting stress granule accumulation [@pmid:42087256]. C9orf72 repeat expansions are associated with reduced C9orf72 expression in multiple ALS tissues and altered splicing of the exon 1a isoform [@pmid:42145639]. Reduced C9orf72 expression has also been observed in peripheral blood immune cells from C9orf72-associated ALS, with C9-ALS showing distinct monocyte activation signatures. In ALS spinal cord, activated myeloid cells expressing complement, lipid-processing, and phagocytic genes occur in regions with motor neuron loss and TDP-43 pathology [@pmid:42135512].", "year": "2011 [@pmid:21944778]", @@ -1034,6 +1049,7 @@ "typ_age_onset_min": 43.0, "typ_age_onset_max": 52.0, "details": "The intermediate range (19-20 motifs) [@pmid:39996131; @genereviews:NBK1140] can be associated with a premutation, reduced penetrance, atypical phenotype, or a disease state when homozygous [@genereviews:NBK1140]. When the longer allele is > 22 motifs, the short allele does not play a role in pathogenicity/age of onset, but expansions of 21-22 motifs have age of onset influenced by the smaller allele [@pmid:39996131]. For individuals with a longest allele of 19-20, the presence of a second allele of 19-20 likely increases the risk of developing SCA6 [@pmid:39996131]. Undiagnosed carriers of pathogenic range repeats in CACNA1A exhibit significant cerebellar volume loss and elevated NEFL levels before clinical diagnosis, also seen in other loci [@pmid:41951733]. Expansions are often detected by PCR fragment analysis [@pmid:35573049].", + "detection": null, "mechanism": "GoF/LoF", "mechanism_detail": "Polyglutamine expansions associated with increased expression of altered product leading to impaired gene binding and transcription factor function as well as cellular toxicity [@genereviews:NBK1140].", "year": "1997 [@pmid:8988170]", @@ -1100,6 +1116,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "70% of individuals have 11 repeats [@pmid:7603564], but pathogenic expansion can span hundreds of motifs [@pmid:10767345]. The CGG repeat expansion can lead to a fragile site and subsequent deletion of 11q (shown in 2 cases) but total causality is unclear; intermediate alleles are associated with a premutation [@pmid:19267933].", + "detection": null, "mechanism": "Hypermethylation", "mechanism_detail": "DNA hypermethylation/11q deletion in sporadic cases [@pmid:38467784].", "year": "1995 [@pmid:7603564]", @@ -1166,6 +1183,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "The locus contains 17 imperfect 33 bp motifs, with a stretch of 7 perfect GGCCCCCCCCGTGCCGCCCACGGGTGACTCCGG motifs. Several pathogenic mutations have been proposed. The most supported pathogenic variants are single base deletions in the proximal VNTR, reported in repeat segments 1, 4, and 5 [@pmid:34850019]. One reported proximal VNTR deletion is a 1bp deletion of (C)8 to (C)7 within the VNTR, causing a motif change (this is the pathogenic motif represented here). Distal CEL VNTR single-base insertions, particularly INS9/INS10/INS12, have been reported as likely benign polymorphisms, while proximal insertion variants may have greater pathogenic potential [@pmid:38483348]. Also, a contraction that deletes one of the VNTR repeats may be pathogenic, with reduced penetrance, although evidence for this is sparse [@pmid:19760265]. Another study identified a c.2041_2042delinsCGG p.(Val681Argfs*6) mutation in the 12th motif (one of the imperfect motifs) [@pmid:39361122]. Several non-tandem repeat pathogenic MODY variants have also been reported in this gene. Given limited data and multiple proposed pathogenic variants, the normal and pathogenic ranges are currently difficult to define.", + "detection": null, "mechanism": "GoF", "mechanism_detail": "Proximal CEL VNTR frameshift variants alter the C-terminal tandem-repeat domain and become pathogenic through protein misfolding and proteotoxic gain-of-function. Pathogenic proximal deletion variants show increased aggregation, reduced secretion, ER stress, and UPR activation, while enzymatic activity is largely preserved [@pmid:21784842; @pmid:27650499; @pmid:33862081]. Functional testing of CEL VNTR insertion variants showed that proximal insertions had greater aggregation and UPR effects [@pmid:38483348].", "year": "2005", @@ -1237,6 +1255,7 @@ "typ_age_onset_min": 28.0, "typ_age_onset_max": 56.0, "details": "Detailed overview of disease locus through 2024 by Rimoldi et al [@pmid:39643839]. ≤30 uninterrupted CCTG repeats or 11-26 CCTG repeats with GCTC/TCTG interruptions are considered benign; 27-29 repeats with interruptions have currently unknown significance, ~30-~54 repeats are considered premutations, ~55-74 repeats are premutations with possible reduced penetrance, and >74 repeat alleles are considered pathogenic [@genereviews:NBK1466]. Penetrance is age-dependent and approaches 100%. Locus structure is (TG)n(TCTG)n(CCTG)n. CCTG expansion causes DM2 but the other repeat units are also variable. Interruptions include GCTG/TCTG/GGCT [@pmid:35245110]. Many DM2 expansions include a downstream 3' (TCTG)n block after the main array. One cohort found this structure in 88% of DM2 patients [@pmid:39703464]. Bidirectional RP-PCR and Southern blotting are used for detection [geneReviews:NBK1466]. A downstream 3′ (TCTG)n block can cause false negative or unclear standard 3′ CCTG-primed results, so TCTG targeted QP-PCR or long read sequencing can resolve these cases [@pmid:36018009; @pmid:41937177].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "Aberrant splicing, RAN translation [@pmid:22140091; @pmid:38467784]. Proposed pathogenisis contributions include nucleolar stress, autophagy dysregulation, and stress granule formation [@pmid:42003432].", "year": "2001 [@pmid:11486088]", @@ -1318,6 +1337,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Both expansions to (GTC)6-7 and contractions to (GTC)4 are associated with disease [@genereviews:NBK1487].", + "detection": null, "mechanism": "LoF/GoF?", "mechanism_detail": "Poly-aspartic acid expansions, domain dependent [@pmid:29530484]; may involve misfolding but still unestablished [@genereviews:NBK1123].", "year": "1999 [@pmid:9887340]", @@ -1384,6 +1404,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "CGG repeat in exon 1 of CSNK1E. Longest reported expanded allele of an affected individual is 745, with an unaffected sibling with repeat length 980. Father had a repeat of 8 and mother of 131. Exome sequencing does not detect expansions in this locus. Reported cases were identified through methylation outlier detection and confirmed by targeted long read sequencing [@pmid:40751262].", + "detection": null, "mechanism": "Unknown", "mechanism_detail": "Mechanism of this disease is largely unknown, but hypermethylation is observed. Expanded alleles exhibit hypermethylation and may mediate epigenetic silencing. Unaffected carriers have been observed, indicating variable expressivity or penetrance.", "year": "2025", @@ -1455,6 +1476,7 @@ "typ_age_onset_min": 6.0, "typ_age_onset_max": 15.0, "details": "Affected individuals have an unstable 12-nucleotide (dodecomer) repeat expansion. Alleles containing 2-3 motifs are considered benign, while alleles with 30-125 repeats are fully penetrant [@pmid:18325013]. Alleles in the range 12-17 repeats have been observed, however the individuals carrying them have not undergone clinical evaluation. Alleles in the range 4-11 and 18-29 repeats have not been reported to date. srWGS cannot detect pathogenic expansions. Conventional PCR can detects normal range alleles, while southern blotting detects and approximate expanded allele size [@genereviews:NBK1142].", + "detection": null, "mechanism": "LoF", "mechanism_detail": "The repeat expanison causes significantly reduced expression of cystatin-B protein [@genereviews:NBK1142].", "year": "1997 [@pmid:9126745]", @@ -1521,6 +1543,7 @@ "typ_age_onset_min": 33.0, "typ_age_onset_max": 53.0, "details": "Pathogenicity only associated with pathogenic motif >30 repeats, flanked by at least 58 repeats of reference motif on either side; reference repeat (AAAAT) can range from 1 to 400 repeats, although typically less than 30 [@genereviews:NBK541729]. The pathogenic motif is unstable, particularly when transmitted by the father [@genereviews:NBK541729]. srWGS, exome sequencing, and RP-PCR do not accurately detect this repeat, but long range PCR with targeted Sanger sequencing is reliable for detection and characterization [@genereviews:NBK541729].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "Toxic gain-of-function mechanism in protein, associated with alternative splicing, an RNA switch, and an upregulation of reelin-DAB1 signalling [@omim:615945; @pmid:30284037].", "year": "2017 [@pmid:28686858]", @@ -1597,6 +1620,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Repeat ranges reflect affected and unaffected individuals from a cohort study of 70 controls (6-23 repeats), unaffected carriers representing the intermediate alleles (139-206), and affected individuals (273-306) [@pmid:17236128]. It has been hypothesized that unmethylated expansions may correspond to movement-related phenotypes (chorea, dystonia, and ataxia) [@pmid:39854091]. srWGS can underestimate expansion size. RP-PCR and southern blotting detect expansions [@pmid:17236128], while long read sequencing accurately sizes them [@pmid:39854091]", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Hypermethylation leading to decreased expression, although unmethylated expansion leads to increased expression [@omim:136630; @pmid:37248219].", "year": "2007 [@pmid:17236128]", @@ -1663,6 +1687,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "There is conflicting evidence for the association between this repeat expansion and Duchenne muscular dystrophy. The association was reported in a single family, from which the benign and pathogenic ranges were inferred from affected and unaffected family members [@pmid:27417533]. The population frequency of the proposed pathogenic allele is much higher than expected for a highly penetrant early-onset condition.", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Functional defect in dystrophin/dystroglycan [@pmid:16969582].", "year": "2016 [@pmid:27417533]", @@ -1739,6 +1764,7 @@ "typ_age_onset_min": 10.0, "typ_age_onset_max": 30.0, "details": "Overview of disease locus through 2024, including largest pathogenic allele of 4,000, described in Rimoldi et al review [@pmid:39643839]. Intermediate alleles (35-49) associated with premutation [@genereviews:NBK1165]. 3%-8% of DM1 expansions contain interrupting variant repeats such as CCG and CGG, associated with later onset and milder phenotype; the variant repeat GCGGCA has also been reported [@pmid:32851192; @pmid:39710066]. In another study, interruptions of the CTG repeat with CCG, GGC, CTC or CAG motifs are estimated to occur in 3-11% of DM1 patients [@pmid:35741732]. Expansions within gene ZNF850 may function as DM1 modifiers [@pmid:39679849]. Flanking PCR detects alleles up to ~150 repeats, while RP-PCR may detect missed expanded alleles [@genereviews:NBK1165; @pmid:24795756]. Southern blotting approximates the size of large expansions [@pmid:22643181], while long read sequencing resolves repeat size and structure [@pmid:41974889].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "RNA gain-of-function: RNA gelation leading to misregulation of alternative splicing [@pmid:36169768]. Expanded DMPK r(CUG)n RNA forms a hairpin containing periodic 1*1 U/U internal loops that engage/sequester MBNL family RNA-binding proteins, especially MBNL1 [@pmid:42182465], disrupting pre mRNA processing and contributing to cardiac phenotypes [@pmid:39932794]. Loss of MBNL proteins has been linked to mis-splicing of Autism spectrum-risk genes such as SCN2A, ANK2, and SHANK2, possibly leading to Autism-related traits [@pmid:40259070]. Evidence suggests that disulfide bond-dependent MBNL1/MBNL2 dimerization maintains toxic RNA foci [@pmid:41929128].", "year": "1992 [@pmid:1310900]", @@ -1805,6 +1831,7 @@ "typ_age_onset_min": 0.0, "typ_age_onset_max": 0.0, "details": "Complex repeat of 18-20 nucleotides expands to cause disease: disease is found in individuals with 14-16 repeats [@pmid:24360810], while controls have typically 3-12 repeats with as low as 1 repeat [@genereviews:NBK535148; @gnomad:EIF4A3]. Significance of intermediate alleles is unknown [@pmid:29112243]. srWGS and exon sequencing do not reliably detect this expansion. Targeted 5′ UTR PCR with Sanger sequencing is the common detection methodology [@pmid:29112243; @pmid:24360810].", + "detection": null, "mechanism": "LoF", "mechanism_detail": "LoF from a hypomorphic allele [@pmid:24360810].", "year": "2014 [@pmid:24360810]; syndrome described in 1992 [@pmid:1632438]", @@ -1871,6 +1898,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Benign range (<50) inferred from cohort data, but exact upper bound was not reported. Two affected patients had repeat lengths of 194 and 198, with an unaffected parent with a repeat length of 158 [@pmid:39868092]. The unaffected parent makes the inheritence pattern uncertain, but it appears to be autosomal dominant.", + "detection": null, "mechanism": null, "mechanism_detail": "Accumulation of toxic RAN proteins is a proposed mecahnism [@pmid:38585781]", "year": "2026 [@pmid:39868092]", @@ -1937,6 +1965,7 @@ "typ_age_onset_min": 42.0, "typ_age_onset_max": 70.0, "details": "Higher repeat size has been associated with earlier age of onset [@pmid:39263992], but this correlation is not significant in all cohorts [@pmid:42096001]. The 250-300 repeats range is linked to incomplete penetrance and >300 repeats with complete penetrance in some studies and resources [@genereviews:NBK599589; @pmid:37399286; @pmid:39227614]. However, our thresholds are taken from suggestions made by Mohren et al upon evaluation of 169 cases and 802 controls; the authors propose lower thresholds based on pathogenic cases of shorter pure repeats [@pmid:39227614]. Additionally, this study suggests that benign motifs may disrupt the formation of secondary structures in DNA/RNA, leading to reduced pathogenicity. The effects of interruptions on penetrance and onset have been demonstrated in patients, with uninterrupted expansions apparently necessary for disease [@pmid:40007153]. Interruptions of GAG, GAAGGA, GAAGAAAGAA, GAAAAGAAGAAGGAAGAAGGAA, GAAAAGAAGAAGGAA, and GCAGAAGAAGAAGAA have been reported [@pmid:40379261]. Variation in flanking regions appears to correlate with repeat size [@pmid:39227614; @pmid:38937606]. Intermediate alleles may increase ataxia susceptibility in combination with other factors or be associated with a phenotypic spectrum (multiple system atrophy) [@pmid:39227614; @pmid:39604554]. A complex (TTC/TGC) ≥300 repeat expansion has been associated as a risk factor for Parkinson's disease [@pmid:41327893; @pmid:41277530]. Expansions can sometimes present as apparently sporadic adult-onset ataxia despite autosomal dominant inheritance [@pmid:42204984]. Short read genome or exome sequencing do not reliably detect these expansions [geneReviews:NBK599589]. long range PCR and bidirectional RP-PCR are used for detection, while long read sequencing can determine repeat structure and purity [geneReviews:NBK599589; @pmid:36516086]. ", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Reduced transcript 2 [@pmid:36516086].", "year": "2023 [@pmid:36493768]", @@ -2003,6 +2032,7 @@ "typ_age_onset_min": 1.0, "typ_age_onset_max": 65.0, "details": "Intermediate or 'gray zone' occur at 45-54 alleles and may be unstable enough to expand into the premutation range, as well as associate with parkinsonism [@pmid:32463542; @genereviews:NBK1384]. FXTAS/POI occurs at 55-200 repeats, FXS >200, late onset; AGG and CTG interruptions documented [@genereviews:NBK1384; @pmid:29868108]. Women with the premutation have been reported showing episodic memory deficits, similar to those seen in AD [@pmid:41555826]. AGG interruptions are frequently reported in all associated diseases and appear to stabilize alleles; the length of the longest pure stretch predicts repeat instability [@pmid:7987398]. Elevated POI risk was observed starting at 36 repeats, increasing continuously with repeat length [@pmid:42001465]. Modern PCR techniques detect virtually all FMR1 expansion sizes, while RP-PCR detects AGG interspersions. Southern blotting approximates size and indicates methylation status [GeneReviews: NBK1384]. Long read sequencing is best characterizes repeat size, interruptions, methylation, and mosaicism. [@pmid:29868108; @pmid:31740840].", + "detection": null, "mechanism": "LoF/GoF", "mechanism_detail": "Loss of function via transcriptional silencing in FXS, RNA gain of function in FXTAS/FXPOI [@pmid:16205714; @pmid:36169768]. PRKGG appears to modulate neurotoxicity [@pmid:41507195].", "year": "1992 [@pmid:1605194]; causative gene discovered in 1991 [@pmid:1710175]", @@ -2069,6 +2099,7 @@ "typ_age_onset_min": 0.0, "typ_age_onset_max": 0.0, "details": "14 repeats appears highly constrained in humans: homozygous expansions from 14 polyalanines to 19 leads to disease, which can be limited to isolated palpebral defects [@pmid:15591279]. Heterozygous expansions to 24 polyalanines also lead to disease [@pmid:15591279]. Locus start can differ between catalogs, which can affect genotyping.", + "detection": null, "mechanism": "GoF/LoF", "mechanism_detail": "Polyalanine expansion leads to haploinsufficiency, likely due to decreased protein availibility due to mislocalization following nuclear inclusion [@genereviews:NBK1441; @pmid:15591279]", "year": "2003 [@pmid:12529855]", @@ -2135,6 +2166,7 @@ "typ_age_onset_min": 10.0, "typ_age_onset_max": 15.0, "details": "96% of FA patients have biallelic GAA expansions in intron 1 (compared to compound heterozygous with another mutation type), in which the reference allele is conventionally 5-33 repeats [@genereviews:NBK1281]. Intermediate alleles (34-55) are associated with premutations, but may lead to disease as exact pathogenicity/penetrance thresholds have not been demarcated [@genereviews:NBK1281]. The expanded repeats can interrupted either with GAAGAG, GAAGGA, or GAAGAAAA sequences, leading to differential phenotypes [@pmid:11748752]. Allele size is correlated with disease severity and inversely correlated to age of onset, and expansions can reach 1700 repeats [@pmid:8815938]. RP-PCR and long range PCR can detect expansions [@pmid:35595154]. Long read sequencing sizes large alleles and resolves sequence organization [@pmid:35595154].", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Loss of function via transcriptional silencing [@pmid:16205714; @pmid:36169768].", "year": "1996 [@pmid:8596916]", @@ -2211,6 +2243,7 @@ "typ_age_onset_min": 20.0, "typ_age_onset_max": 34.0, "details": "Benign repeats range from absent [@gnomad:GIPC1] to 32 [@genereviews:NBK535148], while pathogenic alleles range from 73-164 repeats [@pmid:38876750; @genereviews:NBK535148]. Findings suggest that alternative initiation sites and an upstream CTG codon serve as the initiation site for RAN translation [@pmid:41121761]. Intermediate alleles have undetermined significance but may represent a phenotypic spectrum [@pmid:32413282]. Interruptions documented: CGA [@pmid:35245110]. Interruptions proposed but not confirmed in primary literature: TCG/CCT/TTG [@pmid:38467784]. One proband with ataxia and repeat size 11/112 had an asymptomatic father with 650 repeats and higher methylation [@pmid:41975469]. srWGS or exome sequencing do not reliably detect these expansions [@pmid:32413282]. RP-PCR detects most repeats, while long read sequencing accurately resolves size and structure [@pmid:32413282].", + "detection": null, "mechanism": "LoF/GoF?", "mechanism_detail": "Findings suggest that the mechanism is likely not LoF, but the mechanism is otherwise unknown [@pmid:41121761]. This expansion appears to be predominantly RAN translated into a toxic protein [@pmid:41121761]. This protein has been reported to impair cell proliferation, induce cytotoxicity and apoptosis in multiple cell lines, and caused phenotypic defects in a zebrafish model [@pmid:41121761].", "year": "2020 [@pmid:32413282]", @@ -2277,6 +2310,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Pathogenic range from 3 unrelated probands; benign range inferred from mutation data [@genereviews:NBK535148]. Disease cases can be caused by homozygosity or compund heterozygotes [@omim:618412]. Exome sequencing cannot detect these expansions. srWGS has flagged expanded alleles while RP-PCR has confirmed them. Complex alleles may require OGM or long read sequencing to resolve size and structure [@pmid:30970188; @pmid:35913761].", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Change in histone modification decreases transcription [@omim:618412].", "year": "2019 [@pmid:30970188]", @@ -2343,6 +2377,7 @@ "typ_age_onset_min": 34.0, "typ_age_onset_max": 55.0, "details": "Variation in repeat length, motif length, and motif sequence, with long CT-dimer expansions strongly associated with aFTLD-U risk. CCTT and CCCTCT motif expansions have been observed in unaffected individuals. CCCCT repeats were present in one aFTLD-U case. Proposed risk-associated expansions are typically >450 bp with >80% CT content and/or contain >190 CT dimers, though unaffected carriers have also been observed. Although the functional consequence of this repeat remains unknown, its presence in nearly 60% of aFTLD-U cases points to a major role in disease pathogenesis [@pmid:41820575].", + "detection": null, "mechanism": "Unknown [@pmid:41820575].", "mechanism_detail": null, "year": "2026 [@pmid:41820575]", @@ -2414,6 +2449,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Anticipation does not occur, and expansions appear fully penetrant [@genereviews:NBK1423]. PCR amplification of tract I, followed by fragment analysis or Sanger sequencing can detect and size these alleles. Standard NGS may miss expanded repeats [geneReviews:NBK1423].", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Polyalanine expansions leading to haploinsufficiency [@genereviews:NBK1423]", "year": "2004 [@pmid:15385446]", @@ -2480,6 +2516,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Anticipation does not occur, and expansions appear fully penetrant [@genereviews:NBK1423]. PCR amplification of tract II, followed by fragment analysis or Sanger sequencing can detect and size these alleles. Standard NGS may miss expanded repeats [geneReviews:NBK1423]", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Polyalanine expansions leading to haploinsufficiency [@genereviews:NBK1423]", "year": "2003 [@pmid:12676922]", @@ -2546,6 +2583,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Anticipation does not occur, and expansions appear fully penetrant; it is unknown if contractions also lead to phenotypic variation [@genereviews:NBK1423]. PCR amplification of tract III, followed by fragment analysis or Sanger sequencing can detect and size these alleles. Standard NGS may miss expanded repeats [geneReviews:NBK1423].", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Polyalanine expansions leading to haploinsufficiency [@genereviews:NBK1423]", "year": "2000 [@pmid:10839976]", @@ -2612,6 +2650,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Benign alleles are highly conserved to be 15 repeats, with disease observed in individuals with 22-23 repeats [@pmid:8614804; @pmid:22406499] as well as in individuals with 8-11 repeats [@genereviews:NBK535148]. Targeted PCR across exon 1 has detected expansions, and subcloning with Sanger sequencing has characterized them [@pmid:9223304].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "Polyalanine expansion leading to GoF [@pmid:38467784]", "year": "1996 [@pmid:8614804]", @@ -2678,6 +2717,7 @@ "typ_age_onset_min": 35.0, "typ_age_onset_max": 44.0, "details": "27-35 motifs are unstable/premutations, while 36-39 motifs are associated with reduced penetrance and mild phenotypes [@pmid:39572770], and alleles over 40 repeats are typically fully penetrant [@genereviews:NBK1305]. >60 motifs associated with onset age <20 years [@genereviews:NBK1305]. Only CAG expansions are considered pathogenic, but interruptions impact pathogenicity (CAA) [@pmid:35245110; @pmid:39673793]. Only fathers with premutations are considered at risk of transmitting pathogenic alleles [@pmid:19507258]. CAG repeat size 21-35 may continuously modulate brain structure and psychiatric disease risk in an age-dependent manner [@pmid:39572770] [@doi:https://doi.org/10.64898/2026.05.08.26352223]. Somatic expansion of HTT CAG repeats in vulnerable tissues is proposed to contribute to age-dependent onset and neurodegeneration, with greater repeat instability associated with earlier disease onset [@pmid:41926793; @pmid:39824182]. Undiagnosed carriers of premutation and pathogenic HTT expansions, exhibit reduced striatal brain volumes and elevated neurofilament light chain levels before clinical diagnosis, consistent with findings observed across other loci [@pmid:41951733]. PCR methods detect expansions up to ~115 repeats, but very large expansions may require TP-PCR or Southern blotting. [@genereviews:NBK1305]. long read sequencing can resolve interruptions and validate sizing [@pmid:41512049].", + "detection": null, "mechanism": "GoF/LoF", "mechanism_detail": "While the primary pathogenic mechanism is gain of function of the protein product, pathogenesis is complex and multifactorial [@pmid:27940602]. Reduced SCN4B expression in striatal neurons has been implicated as a modifier of HD-associated phenotype severity, potentially contributing to dysfunction in motor associated striatal neuronal populations [@pmid:41959367].", "year": "1993 [@pmid:8458085]", @@ -2759,6 +2799,7 @@ "typ_age_onset_min": 30.0, "typ_age_onset_max": 52.0, "details": "Intermediate alleles (29-39) may either be premutations or associated with reduced penetrance; the longest pathogenic expansion (40+ motifs) to date is 60 repeats [@genereviews:NBK1529]", + "detection": null, "mechanism": "LoF/GoF", "mechanism_detail": "Non-mutually exclusive mechanisms include loss of function from RNA sequestration and gain of function from toxic transcripts and increased protein expression [@genereviews:NBK1529]", "year": "2001 [@pmid:11694876]", @@ -2825,6 +2866,7 @@ "typ_age_onset_min": 31.0, "typ_age_onset_max": 51.0, "details": "Benign range (13-45) inferred from cohort data, but pathogenic range isn't yet fully understood [@genereviews:NBK535148]. In a cohort of 65 patients from 59 families, alleles ranged from 85-289 repeats, with an inverse relationship between size and age of onset [@pmid:34047774]. Inherited peripheral neuropathy (IPN) may be associated with shorter expansions [@pmid:39013564]. Interruptions seen: ACG, CCA [@pmid:35245110]. srWGS does not reliably detect large expansions in this locus [@pmid:40858832]. RP-PCR followed by long read sequencing is the most reliable characterization approach [@pmid:39013564].", + "detection": null, "mechanism": "GoF?", "mechanism_detail": "RNA mediated toxicity hypothesized [@omim:164310]; may involve RAN translation [@pmid:38467784]. Somatic mosicism and hypermethylation have also been reported [@pmid:41131788].", "year": "2019 [@pmid:31332380]", @@ -2891,6 +2933,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Healthy controls do not have pathogenic allele (TTTCA), but do have 9-20 benign motifs (TTTTA) [@genereviews:NBK535148]. Total allele size in probands spanned from 650-1035 repeats; an inverse relationship between allele size and age of onset was noted [@pmid:31664039, @pmid:40788430]. In one study it was proposed that pathogenicity only occurs when TTTCA is expanded [@pmid: 40788430]. RP-PCR can detect the pathogenic TTTCA insertion motif but does not adequately resolve complex TTTTA/TTTCA architecture [@pmid:41268177]. Long range PCR followed by long read sequencing is able to size and determine structure [@pmid:40200849].", + "detection": null, "mechanism": "Unknown", "mechanism_detail": "Noted as unknown in literature [@omim:613608].", "year": "2019 [@pmid:31664039]", @@ -2967,6 +3010,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Deletion of one repeat (4 to 3 repeats) found in 10 unrelated families with unsolved disease [@pmid:38714869]; a heterozygous T-to-G transversion in the third repeat can also lead to disease [@pmid:38714868].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "Activates a thyroid-specific enhancer [@pmid:38714868; @pmid:38714869].", "year": "2024 [@pmid:38714869]", @@ -3033,6 +3077,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Disease is caused by the single base expansion of a heptanucleotide (7) cytosine homopolymer tract (i.e. from (C)7 to (C)8 ) within one copy of a coding VNTR, resulting in a frameshift mutation. This VNTR has a 60 bp motif, varying in length and sequence composition. This motif ranges in copy number from 20-125 (~1.5-5 kb) and is GC-rich (>80%). The specific copy of the VNTR motif involved varies by family but is consistent within a family [@genereviews:NBK535148]. This locus is particularly difficult to genotype [@pmid:23396133; @pmid:39781475]. Gamaarachchi et al. observed 20 unique VNTR haplotypes which ranged in size from 40–83 copies, with no unrelated individuals sharing the same haplotype. Unique haplotypes implied frequent independent origins of the dupC variant [@pmid:41285770]. NOTE: Disease is caused by a 7 to 8 C homopolymer expansion within the main motif which we represent here as a change in motif. Exome sequencing, srWGS, and Sanger sequencing do not reliably detect these variants [@genereviews:NBK153723]. Instead, they are commonly detected by a VNTR assay, or resolved with long read sequencing [@genereviews:NBK153723; @pmid:29520014].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "Toxic protein product accumulates in kidneys [@genereviews:NBK153723]", "year": "2013 [@pmid:23396133]", @@ -3099,6 +3144,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Benign range (2-7) alleles established by 484 control alleles and validated with orthogonal databases, while single proband had expansion of ~200 repeats inherited from mother via uniparental disomy [@pmid:39455596]. While the repeat expansion is newly reported, other variants in the NAXE gene have previously been associated with mitochondrial encephalopathy.", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Reduced NAXE expression from expansion in promoter; hypermethylation was detected at and downstream of the repeat sequence in the proband as well as the maternal copy of the expanded allele, which was not present in the maternal normal range allele nor in the controls [@pmid:39455596]", "year": "2024 [@pmid:39455596]", @@ -3165,6 +3211,7 @@ "typ_age_onset_min": 44.0, "typ_age_onset_max": 60.0, "details": "Allelic ranges taken from STRipy based on primary literature [@stripy:NIPA1]. Currently proposed as a modifier for ALS [@pmid:31286297]. Note: the motif for this locus is CGG in hg38 and T2T reference genomes, while in hg19, the motif is the reverse complement CCG because it is on the negative strand. GCA, GCT, and GCC interruptions have been reported [@pmid:40585427].", + "detection": null, "mechanism": null, "mechanism_detail": null, "year": "2019 [@pmid:30342764]", @@ -3231,6 +3278,7 @@ "typ_age_onset_min": 40.0, "typ_age_onset_max": 60.0, "details": "Benign alleles range from 3-14 repeats and pathogenic alleles (650+ repeats) appear fully penetrant; the significance of intermediate alleles has yet to be elucidated [@pmid:25101480]. Interruptions documented: GGCTG, GGCCCTG, GGCCG, and GGCCTTG [@pmid:37051597].RP-PCR with fragment analysis usually detect these expansions[@pmid:21683323]. Long read sequencing is used for accurate allele sizing [@pmid:37051597].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "Toxic protein gain-of-function, RAN translation [@omim:614153].", "year": "2011 [@pmid:21683323]", @@ -3307,6 +3355,7 @@ "typ_age_onset_min": 30.0, "typ_age_onset_max": 70.0, "details": "Benign alleles are less than 38 repeats, while pathogenic alleles contain 66+ repeats [@genereviews:NBK535148]. Intermediate alleles may be associated with a phenotypic spectrum, and even pathogenic cases can have variable phenotype [@pmid:39055960; @pmid:39496005]: NOTCH2NLC expansions have been linked Alzheimer's disease and Parkinson's disease, leading to a potential role in NIID-related disorders [@pmid:31178126]. Age of onset inversely related to allele size [@pmid:38377026]. Motif variation in controls: (AGG)(CGG)n(AGG)0-3(CGG)0-2. GGA and AGC interruptions may influence phenotype [@pmid:34718964]. Interruptions documented: GGA, GGG [@pmid:35245110]; ACCGAGAAGATGCCCGCCCTGC interruption proposed but not confirmed [@pmid:38467784]. Detection may be challenging due to parology between genes: C253572.1, NOTCH2, NOTCH2NL, NBPF14, NBPF19. Short read sequencing is unreliable for definitive sizing of large or complex expansions [@pmid:34034831]. RP-PCR can screen for expansions [@pmid:37371433], but long-read sequencing best resolves size, structure, and methylation [@pmid:34774111].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "Polyglycine expansion; may relate to methylation or RNA pathogenicity [@omim:603472; @pmid:36169768; @pmid:38467784]. Proposed mechanisms include toxic uN2CpolyG/polyglycine aggregation, RNA pathogenicity, impaired autophagy, mitochondrial dysfunction, and innate immune activation [@pmid:42058219]. The polyglycine-containing protein sequesters a key subunit of transcription factor NF-κB in nuclear inclusions, leading to impaired autophagy [@pmid:39920690]. Tau pathology is evident, changes in p-tau levels and tau deposition have been reported [@pmid:41539185]. Expanded polyG proteins also induce nucleolar stress through interaction with NPM1 and rRNA. This disrupts ribosomal homeostasis and alters 3D chromatin organization through reduced CTCF/RAD21 expression [@pmid:41942455].", "year": "2019 [@pmid:31332380]", @@ -3373,6 +3422,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Benign range (3-16 repeats) established in 1000 controls, studied alongside pathogenic probands of up to 700 repeats [@pmid:31332380]. Pathogenicity occurs at repeats as short as 161 motifs [@pmid:38159879; @pmid:37923380], while intermediate alleles may correlate to milder phenotypes [@pmid:38159879]. Alt transcript in opposite direction: LOC642361.RP-PCR effectively detects these expansions [@pmid:39308795], while long read sequencing resolves size and structure [@pmid:38159879].", + "detection": null, "mechanism": "GoF?", "mechanism_detail": "RNA mediated toxicity hypothesized, overall mechanism unknown [@omim:618637; @pmid:36169768].", "year": "2019 [@pmid:31332380]", @@ -3439,6 +3489,7 @@ "typ_age_onset_min": 40.0, "typ_age_onset_max": 59.0, "details": "Disease is caused by a GCN polyalanine expansion in the first exon of PABPN1. Most known patients have (GCG)+, but GCN (any polyalanine) may be pathogenic [@genereviews:NBK1126]. This locus acts in a dominant manner for allele sizes ≥ 12 GCN motifs (90% of cases) and in a recessive manner for 11 GCN motifs, i.e. the genotype (GCN)11(GCN)11 (10% of cases). Additionally, disease is known to be more severe in cases of two expanded alleles. Age of onset is inverse to allele size, while penetrance and severity increase with allele size [@genereviews:NBK1126]. Mild, late-onset disease can occur in individuals with a (GCN)10(GCN)11 genotype, suggesting variable penetrance [@pmid:28011929]. The definition of this locus differs in the literature with prior work counting exact GCG motifs for a benign size of (GCG)6 [@pmid:9462747], while later resources count GCNs (any alanine codon), widening the region by 4 motifs to a benign size of (GCN)10 [@genereviews:NBK1126; @pmid:39349043]. STRchive is using the GCN definition. Flanking PCR with fragment analysis accurately detects expansions [@pmid:27980005]. Heterozygous expansions are usually sized by Sanger sequencing. Biallelic expanded variants are assessed using NGS or fragment analysis.", + "detection": null, "mechanism": "GoF/LoF", "mechanism_detail": "Polyalanine expansions leading to cellular toxicity (loss of function) as well as abnormal aggregation and inefficient protein degradation, which may impact mRNA processing [@genereviews:NBK1126].", "year": "1998 [@pmid:9462747]", @@ -3505,6 +3556,7 @@ "typ_age_onset_min": 0.0, "typ_age_onset_max": 2.0, "details": "Alleles of 24 repeats (and sometimes 25 repeats) correspond to delayed disease onset and/or milder phenotype; alleles above benign range (9-20 repeats) and below the pathogenic range (26-33 repeats) have uncertain significance [@genereviews:NBK1427]. srWGS or exome sequencing do not reliably detect these expansions. Fragment analysis is the standard detection method, while Sanger sequencing determines the exact repeat size. [@genereviews:NBK1427]", + "detection": null, "mechanism": "LoF/GoF", "mechanism_detail": "Polyalanine expansion leading to loss or gain of function, dependent on altered protein product [@pmid:38467784; @genereviews:NBK1427]. Correlation between length and reduced transcriptional activity [@pmid:15888479].", "year": "2003 [@pmid:12640453]", @@ -3571,6 +3623,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "This is an expanded variable number tandem repeat (VNTR) in the PLIN4 gene, located in exon 3. This repeat consists of a 99 bp motif which encodes 33 amino-acids within the perilipin-4 protein [@pmid:32451610]. Expansions of this 99 bp motif leads to insertion of multiple imperfect 33–amino acid repeats. These repetitive sequences are thought to contribute to abnormal protein aggregation and dysregulated autophagy seen in affected muscle tissue [@omim:601846]. srWGS or exome sequencing do not reliably detect this repeat. Long range PCR is used for detection while long read sequence is needed to fully resolve size and structure [@pmid:32451610], @pmid:33811808].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "The present disease is characterized by dominantly inherited progressively increasing mobilization of aggrephagy at sites of progressive accumulation of a mutated protein, suggesting that the mutation is leading to aggregation, likely through misfolding, exceeding aggrephagic capacity. [@pmid:32451610]", "year": "2020 [@pmid:32451610]", @@ -3637,6 +3690,7 @@ "typ_age_onset_min": 57.0, "typ_age_onset_max": 59.0, "details": "There is conflicting evidence for the association between this repeat expansion and Parkinson's risk [@pmid:20399836; @pmid:10196696; @pmid:22963882], as well as overall disease significance. May be predisposing factor in earlier age of onset in FRDA patients [@pmid:19043662].", + "detection": null, "mechanism": null, "mechanism_detail": null, "year": null, @@ -3718,6 +3772,7 @@ "typ_age_onset_min": 26.0, "typ_age_onset_max": 50.0, "details": "Benign range is 6-32 repeats, intermediate range 40-49, and pathogenic range is 51-78 [@pmid:37906407]; intermediate alleles are associated with reduced penetrance [@pmid:11198281]. RP-PCR generally detects expansions, while PCR fragment analysis approximates allele size. Large expansions may require southern blot confirmation [@pmid:35262663; @pmid:10581021].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "Polyalanine gain of function associated with RAN translation [@pmid:38467784].", "year": "1999 [@pmid:10581021]", @@ -3784,6 +3839,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Pathogenic expansion found in 2 families, from whom pathogenic range (18-19) is inferred [@pmid:26005867]. Benign range (7-14) inferred from Human Gene Mutation Database [@genereviews:NBK535148].", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Mutations abrogate the histone-modifying potential of PRDM12, consistent with a loss of function mechanism [@omim:616488].", "year": "2015 [@pmid:26005867]", @@ -3850,6 +3906,7 @@ "typ_age_onset_min": 50.0, "typ_age_onset_max": 60.0, "details": "Normal PRNP alleles have one nonapeptide followed by four octapeptide tandem repeat sequences, each of which comprises the amino acids: Pro-(His/Gln)-Gly-Gly-Gly-(-/Trp)-Gly-Gln; any additional repeat leads to pathogenicity, with the largest repeat observed 16 motifs [@genereviews:NBK1229]. Insertion length may correspond to phenotype, such as CJD versus frontotemporal dementia [@pmid:36977684].", + "detection": null, "mechanism": "LoF?", "mechanism_detail": "Loss of function hypothesized [@pmid:38467784]", "year": "1991 [@pmid:1683708]", @@ -3926,6 +3983,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "TTTTA repeat expansions and TTTCA repeat insertions in intron 4 of the RAI1 gene that co-segregated with disease status in a single large family from Mali [@pmid:37994247]. Ten affected individuals were studied. Both TTTTA and TTTCA motifs were observed in all eight of the affected individuals with spanning reads, with allele sizes in the range: (TTTTA)278-773(TTTCA)9-334. A single individual was observed with additional motifs and interruptions in one allele with the structure: (TTTTA)exp(GGGGT)ins(GGGAT)ins(TTTCA)ins. TTTCA repeats were absent in 200 Malian controls, who had alleles in the range: (TTTCA)16-20. Reviewed in [@pmid:38876750]. It is uncertain if expansions at both the TTTTA and TTTCA motifs, or only the TTTCA motif are required for pathogenicity. The pathogenic range in STRchive is for the TTTCA motif only. Note: locus is partially deleted in T2T reference genome.", + "detection": null, "mechanism": "Unknown", "mechanism_detail": "Expression isn't changed [@pmid:7994247].", "year": "2024 [@pmid:37994247]", @@ -4002,6 +4060,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "The locus structure is (TTTTA)exp(TTTCA)exp(TTTTA)n, where only the TTTCA is specific to affected individuals as a pathogenic insertion [@pmid:29507423; @pmid:30351492]. Pathogenic expansions range from 60 to thousands of repeats [@pmid:29507423; @pmid:30351492]. Interruptions seen: TATTA, TTTTTA [@pmid:35245110].", + "detection": null, "mechanism": "GoF?", "mechanism_detail": "RNA toxicity hypothesized [@pmid:29507423].", "year": "2018 [@pmid:29507423]", @@ -4078,6 +4137,7 @@ "typ_age_onset_min": 36.0, "typ_age_onset_max": 52.0, "details": "Disease is caused by an insertion of a pathogenic motif, although motif presence is variable and can expand up to 200 repeats without apparently causing a phenotype [@genereviews:NBK564656]. Pathogenic expansions (ranging from 400-2750 pathogenic motifs) may be flanked by other motifs [@genereviews:NBK564656]. For example, (AAAGG)10-25(AAGGG)exp(AAAGG)4-6 [@pmid:32851396]. Motif heterogeneity is common in unaffected individuals [@genereviews:NBK564656], and motif associations are described by Delforge et al [@pmid:38627134]. The pathogenic size threshold appears to differ for the AAAGG motif: AAAGG expansions >= 600 repeats have been observed in CANVAS patients (vs 400 with established pathogenic motif AAGGG), while ~100-380 AAAGG repeats were found in unaffected controls [@pmid:37450567]. Length appears to impact age of onset and disease severity, with particular impact from the smaller allele [@doi:10.1136/jnnp-2024-ABN.259]. Phenotypic spectrum may include Parkinsonism [@pmid:39833204], chronic cough [@pmid:39811557], idiopathic sensory neuropathy, small fiber neuropathy, and sensorimotor neuropathy [@pmid:41964406].Expansions are suggested by flanking PCR failure and a pathogenic RP-PCR sawtooth pattern, but biallelic confirmation and sizing rely on Southern blotting [@genereviews:NBK564656]. Long read sequencing or optical genome mapping are useful resolving this variable, complex motif structure [@pmid:37892228; @pmid:37450567].", + "detection": null, "mechanism": "LoF", "mechanism_detail": "LoF; exact mechanism unknown [@pmid:38467784].", "year": "2019 [@pmid:31230722]", @@ -4154,6 +4214,7 @@ "typ_age_onset_min": 18.0, "typ_age_onset_max": 30.0, "details": "Benign alleles have been documented to have 6-16 repeats [@pmid:37864208], while pathogenic repeats range from 120 to 197 repeats; there is no apparent relationship between allele size and age of onset [@pmid:37864208; @pmid:35148830]. Intermediate alleles amy be associated with incomplete penetrance, or milder phenotypes [@pmid:35148830]. AGG, TGG, and CGT interruptions observed [@pmid:35148830; @pmid:35700120].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "Polyglutamine expansion leading to RNA-mediated gain-of-function mechanism [@pmid:38467784].", "year": "2022 [@pmid:35148830]", @@ -4220,6 +4281,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Benign range (4-17 repeats) established from gnomAD and primary literature; pathogenic ranges (20-27) reflect two clinical cases to date [@gnomad:RUNX2; @pmid:26220009]. Intermediate alleles (i.e, 18 repeats; 19 not reported) appear to not be associated with disease [@pmid:20560987; @pmid:26220009]. The gene RUNX2 was previously called CBFA1, as reflected in some of the literature [@pmid:9182765].", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Polyalanine expansion leading to haploinsufficiency [@pmid:26220009].", "year": "Causation identified in 2015 [@pmid:26220009]; clinical association found in 1997 [@pmid:9182765]", @@ -4286,6 +4348,7 @@ "typ_age_onset_min": 21.0, "typ_age_onset_max": 39.0, "details": "Novel, pathogenic alleles include expansions of TTTTAn + TTTCAn, but only the TTTCA insertion is specific to affected individuals and associated with symptom age of onset [@pmid:39569876]; pathogenic expansions range from 105 to 3860 repeats [@omim:601068; @pmid:29507423]", + "detection": null, "mechanism": "GoF?", "mechanism_detail": "RNA mediated gain of function proposed [@omim:601068; @pmid:38467784].", "year": "2018 [@pmid:29507423]", @@ -4362,6 +4425,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Expansion to 22-26 repeats or contraction to 8 repeats can cause disease, as reported in 3 families [@genereviews:NBK535148]. There is phenotypic and allelic overlap between XLID and PHPX, with the pathogenic threshold for XLID estimated at 26 motifs and the pathogenic threshold for PHPX estimated at 22 motifs [@pmid:15800844, @pmid:12428212].", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Polyalanine expansions leading to aggresome formation and impaired transcriptional activity [@pmid:17127446].", "year": "2002 [@pmid:12428212]", @@ -4428,6 +4492,7 @@ "typ_age_onset_min": 12.0, "typ_age_onset_max": 30.0, "details": "Disease caused by novel insertion of pathogenic motif (not found in any controls); size of pathogenic motif observed in two probands ranged from ~274-558 repeats, while the expanded reference motif ranged from 340-390 [@pmid:31664034].", + "detection": null, "mechanism": "GoF?", "mechanism_detail": "RNA toxicity theorized [@pmid:31664034; @pmid:38467784].", "year": "2019 [@pmid:31664034]", @@ -4504,6 +4569,7 @@ "typ_age_onset_min": 39.7, "typ_age_onset_max": 39.7, "details": "Strong inverse relationship between age of onset and insertion length, which varied between 35-52 repeats [@pmid:29229810].", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Altered splicing with intron retention, haploinsufficiency [@pmid:38876750].", "year": "2017 [@pmid:29229810]", @@ -4570,6 +4636,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Benign range (0-60) estimated from population data and pathogenic range (83-148) gathered from 7 patients from 3 unrelated families [@pmid:41959811]. The hg38 coordinates reported in the paper (chr6:13328476-13328603) [@pmid:41959811] contain multiple annotated TRs an interruptions, so the true coordinates are likely more narrow.", + "detection": null, "mechanism": "GoF", "mechanism_detail": "Gain of Function apparent, but mechanism is unknown. Methylation and RAN translation have been oberseved [@pmid:41959811].", "year": "2026 [@pmid:41959811]", @@ -4636,6 +4703,7 @@ "typ_age_onset_min": 19.0, "typ_age_onset_max": 48.0, "details": "Benign range is 25-40 repeats, pathogenic range is 49+ repeats (largest to date 66 motifs, with mild correlation between size and age of onset), and intermediate alleles (41-48 repeats) are associated with reduced penetrance and potentially milder phenotypes [@genereviews:NBK1438]. Huntington's disease like phenotype [@pmid:12805114]. CAA CAG CAA interruption is seen in all alleles stably transmitted across generations [@genereviews:NBK1438;@pmid:35245110]. Short-read sequencing can detect some expansions but cannot size alleles beyond 250 bp. PCR amplification may detect expansions of ≤66 repeats [@pmid:37906407; @geneReviews].", + "detection": null, "mechanism": "LoF/GoF", "mechanism_detail": "Polyglutamine expansion leading to transcriptional dysregulation [@pmid:35053321].", "year": "1999 [@pmid:10484774]", @@ -4702,6 +4770,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Found in one Turkish individual with Tetralogy of Fallot who had 25 repeats rather than 15 [@genereviews:NBK535148].", + "detection": null, "mechanism": "LoF/GoF", "mechanism_detail": "Polyalanine expansion, leading to cytoplasmic aggregation [@omim:187500; @pmid:19948535].", "year": "2010 [@pmid:19948535]", @@ -4768,6 +4837,7 @@ "typ_age_onset_min": 40.0, "typ_age_onset_max": 59.0, "details": "Most controls have <40 repeats while majority of patients have >50 repeats; penetrance is <100%, as unaffected individuals have been documented with >80 repeats and alleles of affected individuals range from 12-2600 [@genereviews:NBK535148; @pmid:25168903]. Expansions are causative in approximately 70% of disease cases [@genereviews:NBK535148].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "Sequestration of MBNL1 in RNA foci, similar to the mechanism underlying myotonic dystrophy-1 [@pmid:25593321]. Variation in RAN translation [@pmid:38467784].", "year": "2012 [@pmid:23185296]", @@ -4834,6 +4904,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Expansion (45-100 repeats) found in affected individuals from 2 families and not in 500 controls (benign range: 20-38 repeats) [@pmid:37148549]. Longer alleles were associated with earlier age of onset. For example, an individual with 100 repeats had age of onset at 4 years. CAA interruptions can reduce toxicity [@pmid:37148549; @pmid:37148549].", + "detection": null, "mechanism": "GoF", "mechanism_detail": "Polyglutamine expansion leading to gain of function toxicity [@pmid:37148549; @pmid:38467784].", "year": "2023 [@pmid:37148549]", @@ -4900,6 +4971,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Novel, reported pathogenic alleles: (TTTTA)22 (TTTCA)exp (TTTTA)exp, but only the TTTCA is specific to affected individuals (size: 1100 repeats). Non-pathogenic reference TTTTA repeat was expanded in nine healthy subjects 40-120 repeats and in two individuals was potentially even longer [@pmid:29507423].", + "detection": null, "mechanism": "GoF?", "mechanism_detail": "RNA toxicity hypothesized [@pmid:29507423; @pmid:38467784].", "year": "2018 [@pmid:29507423]", @@ -4976,6 +5048,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "A study has identified an intronic GATGGT hexanucleotide tandem repeat in the TYMS gene. Both parents were found to be heterozygous carriers of the expansion, suggesting a recessive inheritance pattern. Evidence is limited, only a single family with monozygotic twins have been reoprted and no change in expression of the gene has been observed [@pmid:40589716].", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Proposed mechanism involves repeat expansions in non-coding regions of the gene, reducing expression in melanocytes or keratinocytes, leading to a disruption in nucleotide balance in DNA repair and hyperpigmentation. Missense mutations disrupt nucleotide metabolsim, resulting in loss-of-function and genome instability [@pmid:40589716].", "year": "2025 [@pmid:40589716]", @@ -5042,6 +5115,7 @@ "typ_age_onset_min": 1.0, "typ_age_onset_max": 3.0, "details": "Any deviation from 2 motifs is thought to be pathogenic [@genereviews:NBK535148].", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Loss of function [@pmid:38467784].", "year": "2021 [@pmid:33559681]", @@ -5108,6 +5182,7 @@ "typ_age_onset_min": 0.0, "typ_age_onset_max": 0.0, "details": "Benign range (0-20) taken from primary literatre of unaffected individuals and gnomAD data [@pmid:30554721; @gnomad:XYLT1]. Minimum repeat size to cause disease thought to range between 72 and 110 repeats [@pmid:30554721]. Repeat is within a 238bp sequence which is missing from hg38 but present in T2T-CHM13", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Methylation [@pmid:30554721].", "year": "2019 [@pmid:30554721]", @@ -5189,6 +5264,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Novel insertion of ~1000 repeats observed to cause disease [@pmid:31539032].", + "detection": null, "mechanism": "GoF?", "mechanism_detail": "RNA toxicity hypothesized [@pmid:31539032].", "year": "2019 [@pmid:31539032]", @@ -5265,6 +5341,7 @@ "typ_age_onset_min": 37.0, "typ_age_onset_max": 56.0, "details": "Disease-causing expansions range from 46 repeats [@pmid:38973251] to 74 repeats [@pmid:38035881]. Possible anticipation in disease [@pmid:38197134; @pmid:38035881]; intermediate alleles may correspond to premutations [@pmid:38973251]. Most unaffected individuals had 21 motifs, but benign alleles range from 14-26 repeats [@pmid:38035881].", + "detection": null, "mechanism": "GoF?", "mechanism_detail": "Potential RNA-mediated gain of function mechanism theorized [@pmid:38467784]", "year": "2023 [@pmid:38035881]", @@ -5331,6 +5408,7 @@ "typ_age_onset_min": 0.0, "typ_age_onset_max": 0.0, "details": "The benign allele of 15 repeats expands to 25 repeats to cause disease [@genereviews:NBK51932], although the expansion can potentially present with a mild phenotype [@doi:10.1016/j.gimo.2024.101607]", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Polyalanine expansion interfering with DNA binding and transcriptional activation [@pmid:19177455; @pmid:15590697].", "year": "2001 [@pmid:11285244]", @@ -5397,6 +5475,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": "Frequently genotyped as (NGC)*, although locus structure has been reported as (GCC)8(GCT)(GCC) [@pmid:38467784]. 1 patient with VACTERL died at birth with 12 repeats [@pmid:20452998]. 8 patients with X-linked oculo-auriculo-vertebral spectrum (OAVS) had 11 repeats (intermediate allele size); 1 individual in OAVS cohort with 12 repeats; likely phenotypic spectrum [@pmid:32639022].", + "detection": null, "mechanism": "Unknown", "mechanism_detail": "Polyalanine expansion with unknown mechanism [@pmid:17581576].", "year": "2010 [@pmid:20452998]", @@ -5463,6 +5542,7 @@ "typ_age_onset_min": null, "typ_age_onset_max": null, "details": " 176 controls were used to establish the benign range (5-22 repeats), whereas a singular proband was identified with ~450 repeats [@pmid:25196122]. The observed intermediate alleles were presumed to function as premutations, with variable amounts of methylation [@pmid:25196122]", + "detection": null, "mechanism": "LoF", "mechanism_detail": "Methylation, evidence of transcriptional misregulation [@omim:616181; @pmid:25196122].", "year": "2014 [@pmid:25196122]", From b62cca852446d423cdc4409c58bbdd7757e1525b Mon Sep 17 00:00:00 2001 From: Gabriel Zinser Date: Thu, 11 Jun 2026 19:34:30 -0600 Subject: [PATCH 29/29] Tesing detection field --- data/STRchive-loci.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index 1053486f..9149d8c7 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -224,7 +224,7 @@ "age_onset_max": 83.0, "typ_age_onset_min": 20.0, "typ_age_onset_max": 49.0, - "details": "Intermediate alleles indicate reduced penetrance [@genereviews:NBK1333]. Expansions larger than the pathogenic threshold in the AR gene should be evaluated carefully. Interruptions have not been observed in patient cases; it has been proposed that longer alleles with interruptions may not be pathogenic [@pmid:24041967]. Non-canonical motif CAA observed [@pmid:35245110]. Expansions are also detected ten-fold more often in a general population than would be expected by disease prevalence [@pmid:36797998]. Clinical evaluation and phenotypic matching may be necessary to determine diagnosis even in the presence of a pure expanded allele. It has been proposed that contractions may play a role in disease [@pmid:10398229]. Disease may be subclinical in females [@pmid:34922802], and can be clinically heterogeneous even within the same family [@pmid:20184516]. Although srWGS screens can detect this expansion[@pmid:36797998], sizing needs to be validated with standard PCR fragment analysis or RP-PCR [geneReviews:NBK1333].", + "details": "Intermediate alleles indicate reduced penetrance [@genereviews:NBK1333]. Expansions larger than the pathogenic threshold in the AR gene should be evaluated carefully. Interruptions have not been observed in patient cases; it has been proposed that longer alleles with interruptions may not be pathogenic [@pmid:24041967]. Non-canonical motif CAA observed [@pmid:35245110]. Expansions are also detected ten-fold more often in a general population than would be expected by disease prevalence [@pmid:36797998]. Clinical evaluation and phenotypic matching may be necessary to determine diagnosis even in the presence of a pure expanded allele. It has been proposed that contractions may play a role in disease [@pmid:10398229]. Disease may be subclinical in females [@pmid:34922802], and can be clinically heterogeneous even within the same family [@pmid:20184516]. Although srWGS screens can detect this expansion[@pmid:36797998], sizing is generally be validated with standard PCR fragment analysis or RP-PCR [geneReviews:NBK1333].", "detection": null, "mechanism": "GoF", "mechanism_detail": "Polyglutamine alters protein conformation leading to gain-of-function neurodegeneration [@pmid:29398703; @pmid:36169768]. Transcriptional dysregulation, axonal transport disruption, and mitochondrial dysfunction also play causative roles in the neurodegeneration [@pmid:22609045].", @@ -291,7 +291,7 @@ "age_onset_max": 4.0, "typ_age_onset_min": 0.0, "typ_age_onset_max": 0.0, - "details": "ARX expansions [@genereviews:NBK535148] result in a phenotypic spectrum of conditions including Partington syndrome [@omim:309510], Early Infantile Epileptic Encephalopathy [@omim:308350], Agenesis of Corpus Callosum with Abnormal Genitalia [@omim:300004], and X-Linked Lissencephaly with Ambiguous Genitalia [@omim:300215], described in the literature [@pmid:26029707; @pmid:20506206]. Because these are small coding expansions, they are sized using targeted exon 2 PCR with fragment analysis or targeted Sanger sequencing [@pmid:17668384].", + "details": "ARX expansions [@genereviews:NBK535148] result in a phenotypic spectrum of conditions including Partington syndrome [@omim:309510], Early Infantile Epileptic Encephalopathy [@omim:308350], Agenesis of Corpus Callosum with Abnormal Genitalia [@omim:300004], and X-Linked Lissencephaly with Ambiguous Genitalia [@omim:300215], described in the literature [@pmid:26029707; @pmid:20506206]. Because these are small coding expansions, they have been sized using targeted exon 2 PCR with fragment analysis or targeted Sanger sequencing [@pmid:17668384].", "detection": null, "mechanism": "LoF", "mechanism_detail": "Polyalanine expansions lead to reduction in protein product through unclear mechanism [@pmid:36169768; @pmid:38467784]. Apparent aggregation and mis-localisation of mutant protein, increased with expansion length [@genereviews:NBK51932].", @@ -358,7 +358,7 @@ "age_onset_max": 4.0, "typ_age_onset_min": null, "typ_age_onset_max": null, - "details": "ARX expansions [@genereviews:NBK535148] result in a phenotypic spectrum of conditions including Partington syndrome [@omim:309510], Early Infantile Epileptic Encephalopathy [@omim:308350], Agenesis of Corpus Callosum with Abnormal Genitalia [@omim:300004], and X-Linked Lissencephaly with Ambiguous Genitalia [@omim:300215], described in the literature [@pmid:26029707; @pmid:20506206]. Because these are small coding expansions, they are sized using targeted exon 2 PCR with fragment analysis or targeted Sanger sequencing [@pmid:11889467].", + "details": "ARX expansions [@genereviews:NBK535148] result in a phenotypic spectrum of conditions including Partington syndrome [@omim:309510], Early Infantile Epileptic Encephalopathy [@omim:308350], Agenesis of Corpus Callosum with Abnormal Genitalia [@omim:300004], and X-Linked Lissencephaly with Ambiguous Genitalia [@omim:300215], described in the literature [@pmid:26029707; @pmid:20506206]. Because these are small coding expansions, they have been sized using targeted exon 2 PCR with fragment analysis or targeted Sanger sequencing [@pmid:11889467].", "detection": null, "mechanism": "LoF", "mechanism_detail": "Polyalanine expansions lead to reduction in protein product through unclear mechanism [@pmid:36169768; @pmid:38467784]. Apparent aggregation and mis-localisation of mutant protein, increased with expansion length [@genereviews:NBK51932].", @@ -425,7 +425,7 @@ "age_onset_max": 72.0, "typ_age_onset_min": 20.0, "typ_age_onset_max": 40.0, - "details": "Pathogenic expansions (48-93) are fully penetrant with the exception of one documented case of 51 repeats; intermediate alleles (36-47) are associated with a milder phenotype and can expand upon transmission [@genereviews:NBK1491]. CAA interruptions have been observed without known clinical association [@pmid:35245110]. Length of the repeat is inversely associated with age of onset and severe epilepsy phenotype [@pmid:41147955]. PCR fragment analysis detects most moderate alleles, but Southern blotting or RP-PCR may be needed for large expansions or apparent homozygosity [@genereviews:NBK1491].", + "details": "Pathogenic expansions (48-93) are fully penetrant with the exception of one documented case of 51 repeats; intermediate alleles (36-47) are associated with a milder phenotype and can expand upon transmission [@genereviews:NBK1491]. CAA interruptions have been observed without known clinical association [@pmid:35245110]. Length of the repeat is inversely associated with age of onset and severe epilepsy phenotype [@pmid:41147955]. PCR fragment analysis has detected most moderate alleles, but Southern blotting or RP-PCR may be needed for large expansions or apparent homozygosity [@genereviews:NBK1491].", "detection": null, "mechanism": "GoF", "mechanism_detail": "Polyglutamine expansions leading to gain of function [@genereviews:NBK1491].", @@ -492,8 +492,8 @@ "age_onset_max": 63.0, "typ_age_onset_min": 20.0, "typ_age_onset_max": 39.0, - "details": "Penetrance is dependent on sequence purity in addition to expansion length: pure repeats are pathogenic at 39 repeats [@pmid:37906407], while CAT interruptions [@pmid:35245110] can lead to reduced penetrance at comparable lengths [@genereviews:NBK1184]. Regardless, intermediate alleles are considered premutations which may lead to disease upon transmission [@genereviews:NBK1184]. CAA interruptions have also been reported, but not linked to any phenotypic consequences [@pmid:23935513]. PCR fragment analysis, often with reflex RP-PCR, is commonly used for sizing [geneReviews:NBK1184]. Standard fragment analysis does not resolve CAT interruptions, which require targeted analysis like Sanger sequencing [@pmid:34635619].", - "detection": null, + "details": "Penetrance is dependent on sequence purity in addition to expansion length: pure repeats are pathogenic at 39 repeats [@pmid:37906407], while CAT interruptions [@pmid:35245110] can lead to reduced penetrance at comparable lengths [@genereviews:NBK1184]. Regardless, intermediate alleles are considered premutations which may lead to disease upon transmission [@genereviews:NBK1184]. CAA interruptions have also been reported, but not linked to any phenotypic consequences [@pmid:23935513].", + "detection": " PCR fragment analysis, often with reflex RP-PCR, is commonly used for sizing [geneReviews:NBK1184]. Standard fragment analysis does not resolve CAT interruptions, which require targeted analysis like Sanger sequencing [@pmid:34635619].", "mechanism": "GoF/LoF", "mechanism_detail": "Polyglutamine expansion leading to toxic gain of function with eventual misregulation-based loss of function/dominant negative [@genereviews:NBK1184; @pmid:35573049].", "year": "1993 [@pmid:8358429]",