Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .github/instructions/python.instructions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
name: "Python Standards"
description: "Coding conventions for Python files"
applyTo: "**/*.py"
---

# Python coding standards

- Follow the PEP 8 style guide.
- Use 4 spaces for indentation.
- Write docstrings for public functions in Google style, using quartodoc interlinks when applicable.
- If a function has any `return` statement, there should be only one `return` statement as the last line of the function.
- Do not use `continue` or `break` in loops.
- Do not use bare `except` clauses; always specify the expected exception type.
4 changes: 2 additions & 2 deletions .github/workflows/auto-format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,10 @@ jobs:
owner: ${{ github.repository_owner }}

- uses: actions/checkout@v6
if: github.event_name == 'pull_request'
with:
fetch-depth: 0
token: ${{ steps.generate-token.outputs.token }}
ref: ${{ github.head_ref && github.event_name == 'pull_request' || github.ref_name }}
ref: ${{ github.event_name == 'pull_request' && github.head_ref || github.ref_name }}

- name: git config
run: |
Expand All @@ -61,6 +60,7 @@ jobs:
uses: pre-commit/action@v3.0.1
continue-on-error: true
- name: commit & push
if: ${{ github.event_name == 'pull_request' }}
run: |
git config --global user.name "github-actions[bot]"
git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
Expand Down
1 change: 1 addition & 0 deletions scripts/filter_fastq_by_readids_highmem.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@


def get_sname(s):
"""Return the sample name from the input path."""
sname = s.name
sname = sname.split()[0]
return sname
Expand Down
2 changes: 2 additions & 0 deletions scripts/filter_fastq_by_readids_highmem_pe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@


def get_sname(s):
"""Return the sample name from the input path."""
sname = s.name
sname = sname.split()[0]
return sname


def fixoutfilename(f):
"""Return the normalized output filename."""
outfqfilename = f
dummy = outfqfilename.strip().split(".")
if dummy[-1] == "gz":
Expand Down
2 changes: 2 additions & 0 deletions src/ccbr_tools/GSEA/deg2gs.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
####################################
def filter_by_p(x, nhits, pvalue, qvalue):
# Filter the data to nhits, pvalue, qvalue
"""Filter rows by p-value."""
x.sort_values(by=["p"], inplace=True)
x = x[(x["p"] <= pvalue) & (x["q"] <= qvalue)]
if x.shape[0] > nhits:
Expand All @@ -54,6 +55,7 @@ def filter_by_p(x, nhits, pvalue, qvalue):

def main():
# Usage statement
"""Run the CLI."""
parseStr = "Reads RNASeq differential expression output files\n\
and outputs a prioritized list of genes for use in GSEA or ToppFun.\n\
Will filter by both p and fdr values, and export up to nhits values.\n\
Expand Down
1 change: 1 addition & 0 deletions src/ccbr_tools/GSEA/multitext2excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@

def main():
# Usage statement
"""Run the CLI."""
parseStr = (
'Reads a list of files and imports them each into a separate tab in one Excel spreadsheet.\n\n\
Usage:\n\
Expand Down
17 changes: 14 additions & 3 deletions src/ccbr_tools/GSEA/ncbr_huse.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
def run_cmd(theCommand, fn, dorun):
# Run the given command, x is a list of command parameters
# if dorun = false, just send out the notification of the next python step
"""Run a shell command and capture its output."""
print(" ".join(theCommand) + "\n")
fn.write(" ".join(theCommand) + "\n")
fn.flush()
Expand All @@ -46,6 +47,7 @@ def run_cmd(theCommand, fn, dorun):
def run_os_cmd(theCommand, fn, dorun):
# Run the given command, x is a list of command parameters
# if dorun = false, just send out the notification of the next python step
"""Run an operating-system command."""
theCommandStr = " ".join(theCommand)
print(theCommandStr + "\n")
fn.write(theCommandStr + "\n")
Expand All @@ -60,6 +62,7 @@ def run_os_cmd(theCommand, fn, dorun):
def un_gzip(fname, logfn):
# If rerunning and the previous step has already been compressed,
# need to uncompress it before you can rerun the command
"""Decompress a gzipped file."""
gzip_name = fname + ".gz"
if (not os.path.isfile(fname)) and os.path.isfile(gzip_name):
ungz_cmd = ["gunzip", gzip_name]
Expand All @@ -70,6 +73,7 @@ def un_gzip(fname, logfn):
# Read ~/.my.cnf and connect to an SQL database
#
def con_db(host_name, db_name, port_number):
"""Connect to the database."""
if MySQLdb is None:
raise ModuleNotFoundError(
"MySQLdb is required for database connections. Install mysqlclient."
Expand All @@ -96,6 +100,7 @@ def con_db(host_name, db_name, port_number):
# Print updates to screen and log file
#
def send_update(updateStr, log=None, quiet=False):
"""Send a status update."""
if not quiet:
print(updateStr)

Expand All @@ -108,6 +113,7 @@ def send_update(updateStr, log=None, quiet=False):
# Log error message and exit
#
def err_out(errMsg, log=None):
"""Exit with an error message."""
if log is not None:
log.write(errMsg)

Expand All @@ -118,11 +124,13 @@ def err_out(errMsg, log=None):
# Pause for user to be ready to continue, use contkey=None to get any input
#
def pause_for_input(txt, contkey="y", quitkey="q", log=None):
"""Pause until the user provides a valid response."""
# tally the number of tries
answer_cnt = 0
result = None

# loop for the user to enter input, give them a few tries
while True:
while result is None:
# wait for the input
answer = input(txt)

Expand All @@ -132,11 +140,11 @@ def pause_for_input(txt, contkey="y", quitkey="q", log=None):

# if none, just return the input
if contkey is None:
return answer
result = answer

# if there is a contkey, then be sure it is correctly typed
elif answer == contkey:
return answer
result = answer

else:
# give them additional help and increment the answer count
Expand All @@ -156,12 +164,14 @@ def pause_for_input(txt, contkey="y", quitkey="q", log=None):
)

answer_cnt = answer_cnt + 1
return result


#
# Count sequences in a fasta file
#
def fasta_count(fastaFile):
"""Count FASTA records in a file."""
seqcount = 0
for line in open(fastaFile, "r"):
if re.match(">", line):
Expand All @@ -173,6 +183,7 @@ def fasta_count(fastaFile):
# Count sequences in a fasta file
#
def fasta_list(fastaFile):
"""List FASTA record identifiers."""
seqs = []
for line in open(fastaFile, "r"):
if re.match(">", line):
Expand Down
1 change: 1 addition & 0 deletions src/ccbr_tools/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ def install(tool_name, version_tag, run, branch_tag, software_type, hpc):


def main():
"""Run the CLI."""
cli(prog_name="ccbr_tools")


Expand Down
7 changes: 3 additions & 4 deletions src/ccbr_tools/gb2gtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@


def main():
"""Run the CLI."""
if check_args(sys.argv):
gb2gtf(sys.argv)

Expand All @@ -25,6 +26,7 @@ def main():


def check_args(args):
"""Validate the CLI arguments."""
valid_usage = True
if len(args) < 2 or "-h" in args or "--help" in args:
print(usage_msg)
Expand All @@ -34,6 +36,7 @@ def check_args(args):

def gb2gtf(args):
# get all sequence records for the specified genbank file
"""Convert a GenBank file to GTF output."""
recs = [rec for rec in SeqIO.parse(args[1], "genbank")]

# print the number of sequence records that were extracted
Expand Down Expand Up @@ -127,10 +130,6 @@ def gb2gtf(args):
gffstring[8] = y
print("\t".join(gffstring) + ";")

# print(j,part)
else:
continue

# else:

# print(l.start)
Expand Down
5 changes: 3 additions & 2 deletions src/ccbr_tools/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,12 @@ def get_user_info(user_login):
If the user is not found, returns a minimal dict with 'login' and 'name' set to None.
"""
url = f"https://api.github.com/users/{user_login}"
user_info = {"login": user_login, "name": None}
try:
return get_url_json(url)
user_info = get_url_json(url)
except ConnectionError as e:
warnings.warn(f"Could not retrieve user info for {user_login}. {str(e)}")
return {"login": user_login, "name": None}
return user_info


def get_contrib_html(contrib, img_attr="{width=100px height=100px}"):
Expand Down
14 changes: 11 additions & 3 deletions src/ccbr_tools/homologfinder/hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def collect_args():


def process_genelist(gl, lookup):
"""Expand a gene list with homologs from the lookup table."""
result = []
for g in gl:
if g in lookup:
Expand All @@ -102,6 +103,7 @@ def process_genelist(gl, lookup):


def process_args(args, lookup):
"""Process CLI arguments into a gene list."""
if args.gene:
r = process_genelist([args.gene], lookup)
if args.genelist:
Expand All @@ -116,11 +118,13 @@ def process_args(args, lookup):


def print_results(result):
"""Print the homolog finder results."""
for g in result:
print(g)


def read_lookup():
"""Read the homolog lookup table."""
lookup = dict()
lookup_filepath = (
importlib.resources.files(__package__) / "human_mouse_homolog_lookup.txt"
Expand All @@ -135,6 +139,7 @@ def read_lookup():
def create_homolog_table(
rpt_file=importlib.resources.files(__package__) / "HOM_MouseHumanSequence.rpt",
):
"""Create the homolog lookup table."""
cols = ["DB Class Key", "Common Organism Name", "Symbol"]
df = pd.read_csv(rpt_file, usecols=cols, sep="\t")
# human-mouse homologs file --> HOM_MouseHumanSequence.rpt
Expand All @@ -146,9 +151,10 @@ def create_homolog_table(
lookup[row["DB Class Key"]] = dict()
lookup[row["DB Class Key"]]["mouse, laboratory"] = list()
lookup[row["DB Class Key"]]["human"] = list()
if row["Common Organism Name"] not in lookup[row["DB Class Key"]]:
continue
lookup[row["DB Class Key"]][row["Common Organism Name"]].append(row["Symbol"])
if row["Common Organism Name"] in lookup[row["DB Class Key"]]:
lookup[row["DB Class Key"]][row["Common Organism Name"]].append(
row["Symbol"]
)
for k, v in lookup.items():
# print(",".join(v["mouse, laboratory"]),",".join(v["human"]),sep="\t")
for gene_symbol in v["mouse, laboratory"]:
Expand All @@ -165,10 +171,12 @@ def create_homolog_table(


def hf(args):
"""Run the homolog finder lookup."""
return process_args(args, read_lookup())


def main():
"""Run the CLI."""
args = collect_args()
results = hf(args)
print_results(results)
Expand Down
1 change: 1 addition & 0 deletions src/ccbr_tools/hooks/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def cli():


def main():
"""Run the CLI."""
cli(prog_name="ccbr-hooks")


Expand Down
23 changes: 11 additions & 12 deletions src/ccbr_tools/hooks/detect_absolute_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,11 @@ def raise_error_if_abs_paths_detected(files, ignored_patterns=None):
for idx in range(1, len(parts)):
match_targets.append(pathlib.Path(*parts[idx:]))

if any(spec.match_file(target.as_posix()) for target in match_targets):
continue
filtered_files.append(file)
is_ignored = any(
spec.match_file(target.as_posix()) for target in match_targets
)
if not is_ignored:
filtered_files.append(file)
files = filtered_files

if any([file_contains_absolute_path(file) for file in files]):
Expand All @@ -138,16 +140,13 @@ def load_ignored_paths(ignored_paths_file):
Load ignored file paths/patterns from a file, one per line.
Supports gitignore-style wildcards.
"""
if not ignored_paths_file:
return []

patterns = []
with open(ignored_paths_file, "r") as f:
for line in f:
stripped = line.strip()
if not stripped or stripped.startswith("#"):
continue
patterns.append(stripped)
if ignored_paths_file:
with open(ignored_paths_file, "r") as f:
for line in f:
stripped = line.strip()
if stripped and not stripped.startswith("#"):
patterns.append(stripped)

return patterns

Expand Down
Loading
Loading