|
108 | 108 |
|
109 | 109 | # Regex matching the RNAseq sample file naming specification
|
110 | 110 | RNASEQ_REG = re.compile(r'.*tumor_rna.[1,2]{1}.fastq.gz')
|
| 111 | +# Update from 2021-07-06: Support lanes, indicated with a three digit number |
| 112 | +RNASEQ_REG_LANES = re.compile(r'.*tumor_rna_[0-9]{3}.[1,2]{1}.fastq.gz') |
111 | 113 |
|
112 | 114 | # Path to the openBIS properties file
|
113 | 115 | PROPERTIES = '/etc/openbis.properties'
|
@@ -214,7 +216,7 @@ def process(transaction):
|
214 | 216 | for in_file in file_list:
|
215 | 217 | if in_file.endswith('origlabfilename') or in_file.endswith('sha256sum') or 'source_dropbox.txt' in in_file:
|
216 | 218 | continue
|
217 |
| - if RNASEQ_REG.findall(in_file): |
| 219 | + if RNASEQ_REG.findall(in_file) or RNASEQ_REG_LANES.findall(in_file): |
218 | 220 | rna_seq_files.append(in_file)
|
219 | 221 | elif 'fastq' in in_file:
|
220 | 222 | if 'normal' in in_file:
|
@@ -253,8 +255,10 @@ def execute_vcf_registration(vcf_files, transaction):
|
253 | 255 |
|
254 | 256 |
|
255 | 257 | def execute_fastq_registration(fastqs_normal, fastqs_tumor, transaction):
|
256 |
| - if len(fastqs_tumor) != 2 or len(fastqs_normal) != 2: |
| 258 | + if len(fastqs_tumor) < 2 or len(fastqs_normal) < 2: |
257 | 259 | raise mtbutils.MTBdropboxerror("Tumor/normal fastq dataset was not complete. Please check.")
|
| 260 | + elif len(fastqs_tumor) != len(fastqs_normal): |
| 261 | + raise mtbutils.MTBdropboxerror("Tumor/normal fastq dataset dont have the same number of files. Are all lanes provided?") |
258 | 262 | else:
|
259 | 263 | proc_fastq(fastqs_tumor, transaction)
|
260 | 264 | proc_fastq(fastqs_normal, transaction)
|
@@ -296,7 +300,8 @@ def register_rnaseq(rna_seq_files, transaction):
|
296 | 300 | the reason for the failure.
|
297 | 301 | """
|
298 | 302 | print(mtbutils.log_stardate('Registering incoming MTB RNAseq data {}'.format(rna_seq_files)))
|
299 |
| - assert len(rna_seq_files) == 2 |
| 303 | + # Check if dataset files are paired end and complete |
| 304 | + assert len(rna_seq_files) % 2 == 0 |
300 | 305 | file1 = os.path.basename(rna_seq_files[0])
|
301 | 306 | file2 = os.path.basename(rna_seq_files[1])
|
302 | 307 | assert len(set(QCODE_REG.findall(file1))) == 1
|
@@ -448,7 +453,7 @@ def proc_fastq(fastq_file, transaction):
|
448 | 453 | """Register fastq as dataset in openBIS"""
|
449 | 454 |
|
450 | 455 | # Check, if there are file pairs present (paired-end data!)
|
451 |
| - if len(fastq_file) != 2: |
| 456 | + if len(fastq_file) % 2 != 0: |
452 | 457 | raise mtbutils.MTBdropboxerror('Expecting paired end reads files, found only {}'
|
453 | 458 | .format(len(fastq_file)))
|
454 | 459 | qbiccode_f1 = QCODE_REG.findall(os.path.basename(fastq_file[0]))
|
|
0 commit comments