Skip to content

Commit

Permalink
fix: count reads in fastq without zcat
Browse files Browse the repository at this point in the history
  • Loading branch information
js2264 committed Feb 15, 2024
1 parent e78e7ea commit 680e56a
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 18 deletions.
22 changes: 5 additions & 17 deletions hicstuff/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -1435,7 +1435,7 @@ def check_is_fasta(in_file):

def check_fastq_entries(in_file):
"""
Check how many reads are in the input fastq. Requires zcat.
Check how many reads are in the input fastq.
Parameters
----------
Expand All @@ -1450,24 +1450,12 @@ def check_fastq_entries(in_file):

with open(in_file, 'rb') as f:
is_gzip = f.read(2) == b'\x1f\x8b'

if is_gzip:
n_lines = sp.run(
"zcat < {f} | wc -l".format(f = in_file),
stdout=sp.PIPE,
stderr=sp.PIPE,
shell = True,
encoding = 'utf-8'
).stdout[:-2].split(" ")[0]
with gzip.open(in_file, "rt") as input_fastq:
n_lines = sum(1 for line in input_fastq)
else:
n_lines = sp.run(
"wc -l {f}".format(f = in_file),
stdout=sp.PIPE,
stderr=sp.PIPE,
shell = True,
encoding = 'utf-8'
).stdout[:-2].split(" ")[0]

with open(in_file, "r") as input_fastq:
n_lines = sum(1 for line in input_fastq)
n_reads = int(n_lines)/4
return n_reads

Expand Down
3 changes: 2 additions & 1 deletion hicstuff/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -886,6 +886,7 @@ def _out_file(fname):
pairs_idx = input1

# Perform genome alignment
nreads_input1 = 0
if start_stage == 0:

# Check number of reads in both fastqs
Expand All @@ -895,7 +896,7 @@ def _out_file(fname):
if (nreads_input1 != nreads_input2):
logger.error("Fastq files do not have the same number of reads.")
else:
logger.info("{n} reads found in each fastq file.".format(n = nreads_input1))
logger.info("{n} reads found in each fastq file.".format(n = int(nreads_input1)))

# Define mapping choice (default normal):
if mapping == "normal":
Expand Down

0 comments on commit 680e56a

Please sign in to comment.