Skip to content

Commit 68b0a50

Browse files
committed
Add append_primers script
1 parent 2e1656f commit 68b0a50

File tree

2 files changed

+153
-0
lines changed

2 files changed

+153
-0
lines changed

Diff for: micall/main.py

+1
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
"micall/monitor/micall_watcher.py",
9090
"micall/tcr/igblast.py",
9191
"micall/utils/fasta_to_fastq.py",
92+
"micall/utils/append_primers.py",
9293
]
9394

9495

Diff for: micall/utils/append_primers.py

+152
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
#!/usr/bin/env python
2+
3+
"""
4+
Append configurable primer sequences to every FASTA sequence
5+
in an input file.
6+
"""
7+
8+
import argparse
9+
import sys
10+
import logging
11+
from pathlib import Path
12+
from typing import Sequence
13+
14+
from Bio import SeqIO
15+
from Bio.Seq import Seq
16+
from Bio.SeqRecord import SeqRecord
17+
18+
logging.basicConfig(
19+
level=logging.INFO,
20+
format="%(asctime)s - %(levelname)s - %(message)s",
21+
datefmt="%Y-%m-%d %H:%M:%S",
22+
)
23+
logger = logging.getLogger(__name__)
24+
25+
# Default no-mixture primer sequences.
26+
DEFAULT_FORWARD_PRIMER = "GCGCCCGAACAGGGACCTGAAAGCGAAAG"
27+
DEFAULT_REVERSE_PRIMER = "TAAGCCTCAATAAAGCTTGCCTTGAGTGC"
28+
29+
30+
class UserError(ValueError):
31+
def __init__(self, fmt: str, *fmt_args: object):
32+
self.fmt = fmt
33+
self.fmt_args = fmt_args
34+
self.code = 1
35+
36+
37+
def append_primers_to_record(record: SeqRecord,
38+
fwd_primer: str,
39+
rev_primer: str) -> SeqRecord:
40+
41+
"""
42+
Create a new SeqRecord with primer sequences appended. The new sequence
43+
is the forward primer + original sequence + reverse primer.
44+
"""
45+
46+
new_seq = Seq(fwd_primer + str(record.seq) + rev_primer)
47+
new_record = SeqRecord(
48+
new_seq,
49+
id=record.id,
50+
name=record.name,
51+
description=record.description
52+
)
53+
54+
return new_record
55+
56+
57+
def parse_arguments(argv: Sequence[str]) -> argparse.Namespace:
58+
parser = argparse.ArgumentParser(
59+
description="Append configurable primer sequences to every "
60+
"FASTA sequence in an input file."
61+
)
62+
parser.add_argument("input_fasta", type=Path,
63+
help="Path to the input FASTA file")
64+
parser.add_argument("output_fasta", type=Path,
65+
help="Path to the output FASTA file")
66+
parser.add_argument("--forward-primer", default=DEFAULT_FORWARD_PRIMER,
67+
help="The forward primer to prepend to each sequence.")
68+
parser.add_argument("--reverse-primer", default=DEFAULT_REVERSE_PRIMER,
69+
help="The reverse primer to append to each sequence.")
70+
71+
verbosity_group = parser.add_mutually_exclusive_group()
72+
verbosity_group.add_argument('--verbose', action='store_true',
73+
help='Increase output verbosity.')
74+
verbosity_group.add_argument('--no-verbose', action='store_true',
75+
help='Normal output verbosity.', default=True)
76+
verbosity_group.add_argument('--debug', action='store_true',
77+
help='Maximum output verbosity.')
78+
verbosity_group.add_argument('--quiet', action='store_true',
79+
help='Minimize output verbosity.')
80+
81+
return parser.parse_args(argv)
82+
83+
84+
def configure_logging(args: argparse.Namespace) -> None:
85+
if args.quiet:
86+
logger.setLevel(logging.ERROR)
87+
elif args.debug:
88+
logger.setLevel(logging.DEBUG)
89+
elif args.verbose:
90+
logger.setLevel(logging.INFO)
91+
else:
92+
logger.setLevel(logging.WARNING)
93+
94+
95+
def append_primers(input: Path,
96+
output: Path,
97+
forward_primer: str,
98+
reverse_primer: str,
99+
) -> None:
100+
101+
"""
102+
Main routine that processes the input FASTA file,
103+
appends primers to each sequence,
104+
and writes the results to an output file.
105+
"""
106+
107+
if not input.exists() or not input.is_file():
108+
raise UserError("Input FASTA file does not exist: %r.", str(input))
109+
110+
logger.info("Processing FASTA file: %s", input)
111+
records = []
112+
for record in SeqIO.parse(str(input), "fasta"):
113+
new_record = append_primers_to_record(
114+
record, forward_primer, reverse_primer)
115+
records.append(new_record)
116+
117+
logger.info("Primer appending complete. Writing to output file: %r",
118+
str(output))
119+
count = SeqIO.write(records, str(output), "fasta")
120+
logger.info("Wrote %d modified FASTA records to %r.",
121+
count, str(output))
122+
123+
124+
def main(argv: Sequence[str]) -> int:
125+
args = parse_arguments(argv)
126+
configure_logging(args)
127+
append_primers(input=args.input_fasta,
128+
output=args.output_fasta,
129+
forward_primer=args.forward_primer,
130+
reverse_primer=args.reverse_primer,
131+
)
132+
return 0
133+
134+
135+
def entry() -> None:
136+
try:
137+
rc = main(sys.argv[1:])
138+
logger.debug("Done.")
139+
except BrokenPipeError:
140+
logger.debug("Broken pipe.")
141+
rc = 1
142+
except KeyboardInterrupt:
143+
logger.debug("Interrupted.")
144+
rc = 1
145+
except UserError as e:
146+
logger.fatal(e.fmt, *e.fmt_args)
147+
rc = e.code
148+
149+
sys.exit(rc)
150+
151+
152+
if __name__ == "__main__": entry() # noqa

0 commit comments

Comments
 (0)