Skip to content

Commit e93f6f8

Browse files
authored
Add ETL logic for MTB project data outside of the routine QUK17 project (#89)
This PR adds an ETL routine for MTB project data, that belong to a common research context. This distinguishes from the ETL routine for routine MTB data registration, that is also submit to CentraXX. Co-authored-by: wow-such-code <[email protected]>
1 parent 3e608a5 commit e93f6f8

File tree

5 files changed

+823
-8
lines changed

5 files changed

+823
-8
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
# 2.0.0
44

5+
* Provides new ETL for MTB project data that are not supposed to be stored in QUK17 [(#89)](https://github.com/qbicsoftware/etl-scripts/pull/89)
56
* Allow multiple sequencing lanes for MTB data
67

78
# 1.9.0 2021-06-28

drop-boxes/register-mtb-data-dropbox/register-mtb-data-dropbox.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -275,14 +275,19 @@ def get_last_exp_id(experiments):
275275
return exp_ids[-1]
276276

277277

278-
def getNextFreeBarcode(projectcode, numberOfBarcodes):
278+
def getNextFreeBarcode(projectcode, numberOfBarcodes, transaction, space):
279279
letters = string.ascii_uppercase
280-
numberOfBarcodes += 1
281-
282-
currentLetter = letters[numberOfBarcodes / 999]
283-
currentNumber = numberOfBarcodes % 999
284-
code = projectcode + str(currentNumber).zfill(3) + currentLetter
285-
return code + checksum.checksum(code)
280+
sampleExists = True
281+
newSampleCode = None
282+
while sampleExists:
283+
numberOfBarcodes += 1
284+
currentLetter = letters[numberOfBarcodes / 999]
285+
currentNumber = numberOfBarcodes % 999
286+
code = projectcode + str(currentNumber).zfill(3) + currentLetter
287+
newSampleCode = code + checksum.checksum(code)
288+
sampleExists = transaction.getSampleForUpdate(
289+
"/{space}/{sample}".format(space=space, sample=newSampleCode))
290+
return newSampleCode
286291

287292

288293
def register_rnaseq(rna_seq_files, transaction):
@@ -320,7 +325,10 @@ def register_rnaseq(rna_seq_files, transaction):
320325
sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))
321326
result = search_service.searchForSamples(sc)
322327
print("Found {} samples for project {} in space {}.".format(len(result), project, space))
323-
new_rna_sample_barcode = getNextFreeBarcode(project, numberOfBarcodes=len(result))
328+
new_rna_sample_barcode = getNextFreeBarcode(project,
329+
numberOfBarcodes=len(result),
330+
transaction=transaction,
331+
space=space)
324332

325333
# Now get the parent sample id (tumor sample, type: BIOLOGICAL_SAMPLE)
326334
tumor_dna_sample = getsample(dna_barcode, transaction)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
""" A utility class, holding helper functions
2+
for the main dropbox """
3+
4+
import datetime
5+
import subprocess as sp
6+
7+
MTB_CONVERTER_PATH = '/home/qeana10/bin/miniconda/bin/mtbconverter'
8+
9+
def mtbconverter(cmds):
10+
"""Tries to activate a given conda environment"""
11+
command = [MTB_CONVERTER_PATH] + cmds
12+
ret_code = sp.call(command)
13+
return ret_code
14+
15+
def log_stardate(msg):
16+
"""Prints a message nicely with current stardate"""
17+
stardate = datetime.datetime.now()
18+
return '{} [{}]: {}'.format(stardate.isoformat(), 'mtbconverter', msg)
19+
20+
class MTBdropboxerror(Exception):
21+
"""A generic Exception class for this dropbox."""
22+
23+
class Counter():
24+
25+
def __init__(self):
26+
self.counter = 1
27+
28+
def newId(self):
29+
self.counter += 1
30+
return self.counter - 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#
2+
# Drop box for registering a fastq file as a data set
3+
#
4+
# Variables:
5+
# incoming-root-dir
6+
# Path to the directory which contains incoming directories for drop boxes.
7+
incoming-dir = ${incoming-root-dir}/QBiC-register-mtb-projects-data
8+
incoming-data-completeness-condition = marker-file
9+
top-level-data-set-handler = ch.systemsx.cisd.etlserver.registrator.api.v2.JythonTopLevelDataSetHandlerV2
10+
script-path = register-mtb-projects-dropbox.py
11+
storage-processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor

0 commit comments

Comments
 (0)