108
108
109
109
# Regex matching the RNAseq sample file naming specification
110
110
RNASEQ_REG = re .compile (r'.*tumor_rna.[1,2]{1}.fastq.gz' )
111
+ # Update from 2021-07-06: Support lanes, indicated with a three digit number
112
+ RNASEQ_REG_LANES = re .compile (r'.*tumor_rna_[0-9]{3}.[1,2]{1}.fastq.gz' )
111
113
112
114
# Path to the openBIS properties file
113
115
PROPERTIES = '/etc/openbis.properties'
@@ -214,7 +216,7 @@ def process(transaction):
214
216
for in_file in file_list :
215
217
if in_file .endswith ('origlabfilename' ) or in_file .endswith ('sha256sum' ) or 'source_dropbox.txt' in in_file :
216
218
continue
217
- if RNASEQ_REG .findall (in_file ):
219
+ if RNASEQ_REG .findall (in_file ) or RNASEQ_REG_LANES . findall ( in_file ) :
218
220
rna_seq_files .append (in_file )
219
221
elif 'fastq' in in_file :
220
222
if 'normal' in in_file :
@@ -253,8 +255,10 @@ def execute_vcf_registration(vcf_files, transaction):
253
255
254
256
255
257
def execute_fastq_registration (fastqs_normal , fastqs_tumor , transaction ):
256
- if len (fastqs_tumor ) != 2 or len (fastqs_normal ) != 2 :
258
+ if len (fastqs_tumor ) < 2 or len (fastqs_normal ) < 2 :
257
259
raise mtbutils .MTBdropboxerror ("Tumor/normal fastq dataset was not complete. Please check." )
260
+ elif len (fastqs_tumor ) != len (fastqs_normal ):
261
+ raise mtbutils .MTBdropboxerror ("Tumor/normal fastq dataset dont have the same number of files. Are all lanes provided?" )
258
262
else :
259
263
proc_fastq (fastqs_tumor , transaction )
260
264
proc_fastq (fastqs_normal , transaction )
@@ -271,14 +275,19 @@ def get_last_exp_id(experiments):
271
275
return exp_ids [- 1 ]
272
276
273
277
274
- def getNextFreeBarcode (projectcode , numberOfBarcodes ):
278
+ def getNextFreeBarcode (projectcode , numberOfBarcodes , transaction , space ):
275
279
letters = string .ascii_uppercase
276
- numberOfBarcodes += 1
277
-
278
- currentLetter = letters [numberOfBarcodes / 999 ]
279
- currentNumber = numberOfBarcodes % 999
280
- code = projectcode + str (currentNumber ).zfill (3 ) + currentLetter
281
- return code + checksum .checksum (code )
280
+ sampleExists = True
281
+ newSampleCode = None
282
+ while sampleExists :
283
+ numberOfBarcodes += 1
284
+ currentLetter = letters [numberOfBarcodes / 999 ]
285
+ currentNumber = numberOfBarcodes % 999
286
+ code = projectcode + str (currentNumber ).zfill (3 ) + currentLetter
287
+ newSampleCode = code + checksum .checksum (code )
288
+ sampleExists = transaction .getSampleForUpdate (
289
+ "/{space}/{sample}" .format (space = space , sample = newSampleCode ))
290
+ return newSampleCode
282
291
283
292
284
293
def register_rnaseq (rna_seq_files , transaction ):
@@ -296,7 +305,8 @@ def register_rnaseq(rna_seq_files, transaction):
296
305
the reason for the failure.
297
306
"""
298
307
print (mtbutils .log_stardate ('Registering incoming MTB RNAseq data {}' .format (rna_seq_files )))
299
- assert len (rna_seq_files ) == 2
308
+ # Check if dataset files are paired end and complete
309
+ assert len (rna_seq_files ) % 2 == 0
300
310
file1 = os .path .basename (rna_seq_files [0 ])
301
311
file2 = os .path .basename (rna_seq_files [1 ])
302
312
assert len (set (QCODE_REG .findall (file1 ))) == 1
@@ -315,7 +325,10 @@ def register_rnaseq(rna_seq_files, transaction):
315
325
sc .addSubCriteria (SearchSubCriteria .createExperimentCriteria (pc ))
316
326
result = search_service .searchForSamples (sc )
317
327
print ("Found {} samples for project {} in space {}." .format (len (result ), project , space ))
318
- new_rna_sample_barcode = getNextFreeBarcode (project , numberOfBarcodes = len (result ))
328
+ new_rna_sample_barcode = getNextFreeBarcode (project ,
329
+ numberOfBarcodes = len (result ),
330
+ transaction = transaction ,
331
+ space = space )
319
332
320
333
# Now get the parent sample id (tumor sample, type: BIOLOGICAL_SAMPLE)
321
334
tumor_dna_sample = getsample (dna_barcode , transaction )
@@ -448,7 +461,7 @@ def proc_fastq(fastq_file, transaction):
448
461
"""Register fastq as dataset in openBIS"""
449
462
450
463
# Check, if there are file pairs present (paired-end data!)
451
- if len (fastq_file ) != 2 :
464
+ if len (fastq_file ) % 2 != 0 :
452
465
raise mtbutils .MTBdropboxerror ('Expecting paired end reads files, found only {}'
453
466
.format (len (fastq_file )))
454
467
qbiccode_f1 = QCODE_REG .findall (os .path .basename (fastq_file [0 ]))
0 commit comments