Skip to content

Commit 3e608a5

Browse files
authored
Allow for sequencing lanes (#90)
1 parent ff1f323 commit 3e608a5

File tree

2 files changed

+13
-4
lines changed

2 files changed

+13
-4
lines changed

CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
# 2.0.0
4+
5+
* Allow multiple sequencing lanes for MTB data
6+
37
# 1.9.0 2021-06-28
48

59
* Provides new ETL routine written in Java, that will replace all Jython scripts at some point [(#85)](https://github.com/qbicsoftware/etl-scripts/pull/85)

drop-boxes/register-mtb-data-dropbox/register-mtb-data-dropbox.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@
108108

109109
# Regex matching the RNAseq sample file naming specification
110110
RNASEQ_REG = re.compile(r'.*tumor_rna.[1,2]{1}.fastq.gz')
111+
# Update from 2021-07-06: Support lanes, indicated with a three digit number
112+
RNASEQ_REG_LANES = re.compile(r'.*tumor_rna_[0-9]{3}.[1,2]{1}.fastq.gz')
111113

112114
# Path to the openBIS properties file
113115
PROPERTIES = '/etc/openbis.properties'
@@ -214,7 +216,7 @@ def process(transaction):
214216
for in_file in file_list:
215217
if in_file.endswith('origlabfilename') or in_file.endswith('sha256sum') or 'source_dropbox.txt' in in_file:
216218
continue
217-
if RNASEQ_REG.findall(in_file):
219+
if RNASEQ_REG.findall(in_file) or RNASEQ_REG_LANES.findall(in_file):
218220
rna_seq_files.append(in_file)
219221
elif 'fastq' in in_file:
220222
if 'normal' in in_file:
@@ -253,8 +255,10 @@ def execute_vcf_registration(vcf_files, transaction):
253255

254256

255257
def execute_fastq_registration(fastqs_normal, fastqs_tumor, transaction):
256-
if len(fastqs_tumor) != 2 or len(fastqs_normal) != 2:
258+
if len(fastqs_tumor) < 2 or len(fastqs_normal) < 2:
257259
raise mtbutils.MTBdropboxerror("Tumor/normal fastq dataset was not complete. Please check.")
260+
elif len(fastqs_tumor) != len(fastqs_normal):
261+
raise mtbutils.MTBdropboxerror("Tumor/normal fastq dataset dont have the same number of files. Are all lanes provided?")
258262
else:
259263
proc_fastq(fastqs_tumor, transaction)
260264
proc_fastq(fastqs_normal, transaction)
@@ -296,7 +300,8 @@ def register_rnaseq(rna_seq_files, transaction):
296300
the reason for the failure.
297301
"""
298302
print(mtbutils.log_stardate('Registering incoming MTB RNAseq data {}'.format(rna_seq_files)))
299-
assert len(rna_seq_files) == 2
303+
# Check if dataset files are paired end and complete
304+
assert len(rna_seq_files) % 2 == 0
300305
file1 = os.path.basename(rna_seq_files[0])
301306
file2 = os.path.basename(rna_seq_files[1])
302307
assert len(set(QCODE_REG.findall(file1))) == 1
@@ -448,7 +453,7 @@ def proc_fastq(fastq_file, transaction):
448453
"""Register fastq as dataset in openBIS"""
449454

450455
# Check, if there are file pairs present (paired-end data!)
451-
if len(fastq_file) != 2:
456+
if len(fastq_file) % 2 != 0:
452457
raise mtbutils.MTBdropboxerror('Expecting paired end reads files, found only {}'
453458
.format(len(fastq_file)))
454459
qbiccode_f1 = QCODE_REG.findall(os.path.basename(fastq_file[0]))

0 commit comments

Comments
 (0)