CCBR · kelly-sovacool · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026
diff --git a/.github/instructions/python.instructions.md b/.github/instructions/python.instructions.md
@@ -0,0 +1,14 @@
+---
+name: "Python Standards"
+description: "Coding conventions for Python files"
+applyTo: "**/*.py"
+---
+
+# Python coding standards
+
+- Follow the PEP 8 style guide.
+- Use 4 spaces for indentation.
+- Write docstrings for public functions in Google style, using quartodoc interlinks when applicable.
+- If a function has any `return` statement, there should be only one `return` statement as the last line of the function.
+- Do not use `continue` or `break` in loops.
+- Do not use bare `except` clauses; always specify the expected exception type.
diff --git a/.github/workflows/auto-format.yml b/.github/workflows/auto-format.yml
@@ -32,11 +32,10 @@ jobs:
           owner: ${{ github.repository_owner }}
 
       - uses: actions/checkout@v6
-        if: github.event_name == 'pull_request'
         with:
           fetch-depth: 0
           token: ${{ steps.generate-token.outputs.token }}
-          ref: ${{ github.head_ref && github.event_name == 'pull_request' || github.ref_name }}
+          ref: ${{ github.event_name == 'pull_request' && github.head_ref || github.ref_name }}
 
       - name: git config
         run: |
@@ -61,6 +60,7 @@ jobs:
         uses: pre-commit/action@v3.0.1
         continue-on-error: true
       - name: commit & push
+        if: ${{ github.event_name == 'pull_request' }}
         run: |
           git config --global user.name "github-actions[bot]"
           git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"

diff --git a/scripts/filter_fastq_by_readids_highmem.py b/scripts/filter_fastq_by_readids_highmem.py
@@ -5,6 +5,7 @@
 
 
 def get_sname(s):
+    """Return the sample name from the input path."""
     sname = s.name
     sname = sname.split()[0]
     return sname

diff --git a/scripts/filter_fastq_by_readids_highmem_pe.py b/scripts/filter_fastq_by_readids_highmem_pe.py
@@ -5,12 +5,14 @@
 
 
 def get_sname(s):
+    """Return the sample name from the input path."""
     sname = s.name
     sname = sname.split()[0]
     return sname
 
 
 def fixoutfilename(f):
+    """Return the normalized output filename."""
     outfqfilename = f
     dummy = outfqfilename.strip().split(".")
     if dummy[-1] == "gz":

diff --git a/src/ccbr_tools/GSEA/deg2gs.py b/src/ccbr_tools/GSEA/deg2gs.py
@@ -38,6 +38,7 @@
 ####################################
 def filter_by_p(x, nhits, pvalue, qvalue):
     # Filter the data to nhits, pvalue, qvalue
+    """Filter rows by p-value."""
     x.sort_values(by=["p"], inplace=True)
     x = x[(x["p"] <= pvalue) & (x["q"] <= qvalue)]
     if x.shape[0] > nhits:
@@ -54,6 +55,7 @@ def filter_by_p(x, nhits, pvalue, qvalue):
 
 def main():
     # Usage statement
+    """Run the CLI."""
     parseStr = "Reads RNASeq differential expression output files\n\
 and outputs a prioritized list of genes for use in GSEA or ToppFun.\n\
 Will filter by both p and fdr values, and export up to nhits values.\n\

diff --git a/src/ccbr_tools/GSEA/multitext2excel.py b/src/ccbr_tools/GSEA/multitext2excel.py
@@ -55,6 +55,7 @@
 
 def main():
     # Usage statement
+    """Run the CLI."""
     parseStr = (
         'Reads a list of files and imports them each into a separate tab in one Excel spreadsheet.\n\n\
     Usage:\n\

diff --git a/src/ccbr_tools/GSEA/ncbr_huse.py b/src/ccbr_tools/GSEA/ncbr_huse.py
@@ -33,6 +33,7 @@
 def run_cmd(theCommand, fn, dorun):
     # Run the given command, x is a list of command parameters
     # if dorun = false, just send out the notification of the next python step
+    """Run a shell command and capture its output."""
     print(" ".join(theCommand) + "\n")
     fn.write(" ".join(theCommand) + "\n")
     fn.flush()
@@ -46,6 +47,7 @@ def run_cmd(theCommand, fn, dorun):
 def run_os_cmd(theCommand, fn, dorun):
     # Run the given command, x is a list of command parameters
     # if dorun = false, just send out the notification of the next python step
+    """Run an operating-system command."""
     theCommandStr = " ".join(theCommand)
     print(theCommandStr + "\n")
     fn.write(theCommandStr + "\n")
@@ -60,6 +62,7 @@ def run_os_cmd(theCommand, fn, dorun):
 def un_gzip(fname, logfn):
     # If rerunning and the previous step has already been compressed,
     # need to uncompress it before you can rerun the command
+    """Decompress a gzipped file."""
     gzip_name = fname + ".gz"
     if (not os.path.isfile(fname)) and os.path.isfile(gzip_name):
         ungz_cmd = ["gunzip", gzip_name]
@@ -70,6 +73,7 @@ def un_gzip(fname, logfn):
 # Read ~/.my.cnf and connect to an SQL database
 #
 def con_db(host_name, db_name, port_number):
+    """Connect to the database."""
     if MySQLdb is None:
         raise ModuleNotFoundError(
             "MySQLdb is required for database connections. Install mysqlclient."
@@ -96,6 +100,7 @@ def con_db(host_name, db_name, port_number):
 # Print updates to screen and log file
 #
 def send_update(updateStr, log=None, quiet=False):
+    """Send a status update."""
     if not quiet:
         print(updateStr)
 
@@ -108,6 +113,7 @@ def send_update(updateStr, log=None, quiet=False):
 # Log error message and exit
 #
 def err_out(errMsg, log=None):
+    """Exit with an error message."""
     if log is not None:
         log.write(errMsg)
 
@@ -118,11 +124,13 @@ def err_out(errMsg, log=None):
 # Pause for user to be ready to continue, use contkey=None to get any input
 #
 def pause_for_input(txt, contkey="y", quitkey="q", log=None):
+    """Pause until the user provides a valid response."""
     # tally the number of tries
     answer_cnt = 0
+    result = None
 
     # loop for the user to enter input, give them a few tries
-    while True:
+    while result is None:
         # wait for the input
         answer = input(txt)
 
@@ -132,11 +140,11 @@ def pause_for_input(txt, contkey="y", quitkey="q", log=None):
 
         # if none, just return the input
         if contkey is None:
-            return answer
+            result = answer
 
         # if there is a contkey, then be sure it is correctly typed
         elif answer == contkey:
-            return answer
+            result = answer
 
         else:
             # give them additional help and increment the answer count
@@ -156,12 +164,14 @@ def pause_for_input(txt, contkey="y", quitkey="q", log=None):
                 )
 
             answer_cnt = answer_cnt + 1
+    return result
 
 
 #
 # Count sequences in a fasta file
 #
 def fasta_count(fastaFile):
+    """Count FASTA records in a file."""
     seqcount = 0
     for line in open(fastaFile, "r"):
         if re.match(">", line):
@@ -173,6 +183,7 @@ def fasta_count(fastaFile):
 # Count sequences in a fasta file
 #
 def fasta_list(fastaFile):
+    """List FASTA record identifiers."""
     seqs = []
     for line in open(fastaFile, "r"):
         if re.match(">", line):

diff --git a/src/ccbr_tools/__main__.py b/src/ccbr_tools/__main__.py
@@ -212,6 +212,7 @@ def install(tool_name, version_tag, run, branch_tag, software_type, hpc):
 
 
 def main():
+    """Run the CLI."""
     cli(prog_name="ccbr_tools")
 
 

diff --git a/src/ccbr_tools/gb2gtf.py b/src/ccbr_tools/gb2gtf.py
@@ -14,6 +14,7 @@
 
 
 def main():
+    """Run the CLI."""
     if check_args(sys.argv):
         gb2gtf(sys.argv)
 
@@ -25,6 +26,7 @@ def main():
 
 
 def check_args(args):
+    """Validate the CLI arguments."""
     valid_usage = True
     if len(args) < 2 or "-h" in args or "--help" in args:
         print(usage_msg)
@@ -34,6 +36,7 @@ def check_args(args):
 
 def gb2gtf(args):
     # get all sequence records for the specified genbank file
+    """Convert a GenBank file to GTF output."""
     recs = [rec for rec in SeqIO.parse(args[1], "genbank")]
 
     # print the number of sequence records that were extracted
@@ -127,10 +130,6 @@ def gb2gtf(args):
                     gffstring[8] = y
                     print("\t".join(gffstring) + ";")
 
-            #            print(j,part)
-            else:
-                continue
-
             # else:
 
     #        print(l.start)

diff --git a/src/ccbr_tools/github.py b/src/ccbr_tools/github.py
@@ -60,11 +60,12 @@ def get_user_info(user_login):
               If the user is not found, returns a minimal dict with 'login' and 'name' set to None.
     """
     url = f"https://api.github.com/users/{user_login}"
+    user_info = {"login": user_login, "name": None}
     try:
-        return get_url_json(url)
+        user_info = get_url_json(url)
     except ConnectionError as e:
         warnings.warn(f"Could not retrieve user info for {user_login}. {str(e)}")
-        return {"login": user_login, "name": None}
+    return user_info
 
 
 def get_contrib_html(contrib, img_attr="{width=100px height=100px}"):

diff --git a/src/ccbr_tools/homologfinder/hf.py b/src/ccbr_tools/homologfinder/hf.py
@@ -94,6 +94,7 @@ def collect_args():
 
 
 def process_genelist(gl, lookup):
+    """Expand a gene list with homologs from the lookup table."""
     result = []
     for g in gl:
         if g in lookup:
@@ -102,6 +103,7 @@ def process_genelist(gl, lookup):
 
 
 def process_args(args, lookup):
+    """Process CLI arguments into a gene list."""
     if args.gene:
         r = process_genelist([args.gene], lookup)
     if args.genelist:
@@ -116,11 +118,13 @@ def process_args(args, lookup):
 
 
 def print_results(result):
+    """Print the homolog finder results."""
     for g in result:
         print(g)
 
 
 def read_lookup():
+    """Read the homolog lookup table."""
     lookup = dict()
     lookup_filepath = (
         importlib.resources.files(__package__) / "human_mouse_homolog_lookup.txt"
@@ -135,6 +139,7 @@ def read_lookup():
 def create_homolog_table(
     rpt_file=importlib.resources.files(__package__) / "HOM_MouseHumanSequence.rpt",
 ):
+    """Create the homolog lookup table."""
     cols = ["DB Class Key", "Common Organism Name", "Symbol"]
     df = pd.read_csv(rpt_file, usecols=cols, sep="\t")
     # human-mouse homologs file --> HOM_MouseHumanSequence.rpt
@@ -146,9 +151,10 @@ def create_homolog_table(
             lookup[row["DB Class Key"]] = dict()
             lookup[row["DB Class Key"]]["mouse, laboratory"] = list()
             lookup[row["DB Class Key"]]["human"] = list()
-        if row["Common Organism Name"] not in lookup[row["DB Class Key"]]:
-            continue
-        lookup[row["DB Class Key"]][row["Common Organism Name"]].append(row["Symbol"])
+        if row["Common Organism Name"] in lookup[row["DB Class Key"]]:
+            lookup[row["DB Class Key"]][row["Common Organism Name"]].append(
+                row["Symbol"]
+            )
     for k, v in lookup.items():
         # print(",".join(v["mouse, laboratory"]),",".join(v["human"]),sep="\t")
         for gene_symbol in v["mouse, laboratory"]:
@@ -165,10 +171,12 @@ def create_homolog_table(
 
 
 def hf(args):
+    """Run the homolog finder lookup."""
     return process_args(args, read_lookup())
 
 
 def main():
+    """Run the CLI."""
     args = collect_args()
     results = hf(args)
     print_results(results)

diff --git a/src/ccbr_tools/hooks/__main__.py b/src/ccbr_tools/hooks/__main__.py
@@ -33,6 +33,7 @@ def cli():
 
 
 def main():
+    """Run the CLI."""
     cli(prog_name="ccbr-hooks")
 
 

diff --git a/src/ccbr_tools/hooks/detect_absolute_paths.py b/src/ccbr_tools/hooks/detect_absolute_paths.py
@@ -124,9 +124,11 @@ def raise_error_if_abs_paths_detected(files, ignored_patterns=None):
             for idx in range(1, len(parts)):
                 match_targets.append(pathlib.Path(*parts[idx:]))
 
-            if any(spec.match_file(target.as_posix()) for target in match_targets):
-                continue
-            filtered_files.append(file)
+            is_ignored = any(
+                spec.match_file(target.as_posix()) for target in match_targets
+            )
+            if not is_ignored:
+                filtered_files.append(file)
         files = filtered_files
 
     if any([file_contains_absolute_path(file) for file in files]):
@@ -138,16 +140,13 @@ def load_ignored_paths(ignored_paths_file):
     Load ignored file paths/patterns from a file, one per line.
     Supports gitignore-style wildcards.
     """
-    if not ignored_paths_file:
-        return []
-
     patterns = []
-    with open(ignored_paths_file, "r") as f:
-        for line in f:
-            stripped = line.strip()
-            if not stripped or stripped.startswith("#"):
-                continue
-            patterns.append(stripped)
+    if ignored_paths_file:
+        with open(ignored_paths_file, "r") as f:
+            for line in f:
+                stripped = line.strip()
+                if stripped and not stripped.startswith("#"):
+                    patterns.append(stripped)
 
     return patterns
Original file line number	Diff line number	Diff line change
Expand Up		@@ -212,6 +212,7 @@ def install(tool_name, version_tag, run, branch_tag, software_type, hpc):


		def main():
		"""Run the CLI."""
		cli(prog_name="ccbr_tools")


Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -33,6 +33,7 @@ def cli():


		def main():
		"""Run the CLI."""
		cli(prog_name="ccbr-hooks")


Expand Down