|
108 | 108 |
|
109 | 109 | # Regex matching the RNAseq sample file naming specification |
110 | 110 | RNASEQ_REG = re.compile(r'.*tumor_rna.[1,2]{1}.fastq.gz') |
| 111 | +# Update from 2021-07-06: Support lanes, indicated with a three digit number |
| 112 | +RNASEQ_REG_LANES = re.compile(r'.*tumor_rna_[0-9]{3}.[1,2]{1}.fastq.gz') |
111 | 113 |
|
112 | 114 | # Path to the openBIS properties file |
113 | 115 | PROPERTIES = '/etc/openbis.properties' |
@@ -214,7 +216,7 @@ def process(transaction): |
214 | 216 | for in_file in file_list: |
215 | 217 | if in_file.endswith('origlabfilename') or in_file.endswith('sha256sum') or 'source_dropbox.txt' in in_file: |
216 | 218 | continue |
217 | | - if RNASEQ_REG.findall(in_file): |
| 219 | + if RNASEQ_REG.findall(in_file) or RNASEQ_REG_LANES.findall(in_file): |
218 | 220 | rna_seq_files.append(in_file) |
219 | 221 | elif 'fastq' in in_file: |
220 | 222 | if 'normal' in in_file: |
@@ -253,8 +255,10 @@ def execute_vcf_registration(vcf_files, transaction): |
253 | 255 |
|
254 | 256 |
|
255 | 257 | def execute_fastq_registration(fastqs_normal, fastqs_tumor, transaction): |
256 | | - if len(fastqs_tumor) != 2 or len(fastqs_normal) != 2: |
| 258 | + if len(fastqs_tumor) < 2 or len(fastqs_normal) < 2: |
257 | 259 | raise mtbutils.MTBdropboxerror("Tumor/normal fastq dataset was not complete. Please check.") |
| 260 | + elif len(fastqs_tumor) != len(fastqs_normal): |
| 261 | + raise mtbutils.MTBdropboxerror("Tumor/normal fastq dataset dont have the same number of files. Are all lanes provided?") |
258 | 262 | else: |
259 | 263 | proc_fastq(fastqs_tumor, transaction) |
260 | 264 | proc_fastq(fastqs_normal, transaction) |
@@ -296,7 +300,8 @@ def register_rnaseq(rna_seq_files, transaction): |
296 | 300 | the reason for the failure. |
297 | 301 | """ |
298 | 302 | print(mtbutils.log_stardate('Registering incoming MTB RNAseq data {}'.format(rna_seq_files))) |
299 | | - assert len(rna_seq_files) == 2 |
| 303 | + # Check if dataset files are paired end and complete |
| 304 | + assert len(rna_seq_files) % 2 == 0 |
300 | 305 | file1 = os.path.basename(rna_seq_files[0]) |
301 | 306 | file2 = os.path.basename(rna_seq_files[1]) |
302 | 307 | assert len(set(QCODE_REG.findall(file1))) == 1 |
@@ -448,7 +453,7 @@ def proc_fastq(fastq_file, transaction): |
448 | 453 | """Register fastq as dataset in openBIS""" |
449 | 454 |
|
450 | 455 | # Check, if there are file pairs present (paired-end data!) |
451 | | - if len(fastq_file) != 2: |
| 456 | + if len(fastq_file) % 2 != 0: |
452 | 457 | raise mtbutils.MTBdropboxerror('Expecting paired end reads files, found only {}' |
453 | 458 | .format(len(fastq_file))) |
454 | 459 | qbiccode_f1 = QCODE_REG.findall(os.path.basename(fastq_file[0])) |
|
0 commit comments