Skip to content

Commit 798a1e1

Browse files
Merge pull request #42 from qbicsoftware/feature/nanopore_unclassified
register unclassified pooling data at experiment level
2 parents 7dd9a9e + 4ff27d5 commit 798a1e1

File tree

1 file changed

+42
-21
lines changed

1 file changed

+42
-21
lines changed

drop-boxes/register-nanopore-dropbox/register-nanopore.py

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -157,13 +157,15 @@ def createExperimentFromMeasurement(transaction, currentPath, space, project, me
157157
if measurement.getAdapter():
158158
runExperiment.setPropertyValue("Q_SEQUENCING_ADAPTER", measurement.getAdapter())
159159
# handle measured samples
160-
unclassifiedMap = measurement.getUnclassifiedData()
161160
for barcode in rawDataPerSample.keySet():
162161
datamap = rawDataPerSample.get(barcode)
163162
newLogFolder = createLogFolder(currentPath)
164163
# 3.) Aggregate all log files into an own log folder per measurement
165164
copyLogFilesTo(measurement.getLogFiles(), currentPath, newLogFolder)
166-
createSampleWithData(transaction, space, barcode, datamap, unclassifiedMap, runExperiment, currentPath, newLogFolder)
165+
createSampleWithData(transaction, space, barcode, datamap, runExperiment, currentPath, newLogFolder)
166+
unclassifiedMap = measurement.getUnclassifiedData()
167+
if len(unclassifiedMap) > 0:
168+
registerUnclassifiedData(transaction, unclassifiedMap, runExperiment, currentPath, measurement.getFlowcellId())
167169

168170
# fills the global dictionary containing all checksums for paths from the global checksum file
169171
def fillChecksumMap(checksumFilePath):
@@ -190,8 +192,39 @@ def createChecksumFileForFolder(incomingPath, folderPath):
190192
f.write(value+' *'+key+'\n')
191193
return checksumFilePath
192194

195+
# prepares unclassified data folder (e.g. unclassified fast5_pass) including checksums and moves folder to target destination folder
196+
def prepareUnclassifiedData(transaction, unclassifiedDataObject, currentPath, destinationPath):
197+
incomingPath = transaction.getIncoming().getAbsolutePath()
198+
relativePath = unclassifiedDataObject.getRelativePath()
199+
# the source path of the currently handled data object (e.g. unclassified fast5_fail folder)
200+
unclassifiedSourcePath = os.path.join(os.path.dirname(currentPath), relativePath)
201+
unclassifiedChecksumFile = createChecksumFileForFolder(incomingPath, unclassifiedSourcePath)
202+
# we move the unclassified object to its destination (e.g. the unclassified fast5 top folder)
203+
os.rename(unclassifiedSourcePath, destinationPath)
204+
205+
# attaches unclassified data to the run experiment without sample
206+
def registerUnclassifiedData(transaction, unclassifiedDataMap, runExperiment, currentPath, flowcellBarcode):
207+
topFolderFastq = os.path.join(currentPath, flowcellBarcode+"_unclassified_fastq")
208+
topFolderFast5 = os.path.join(currentPath, flowcellBarcode+"_unclassified_fast5")
209+
os.makedirs(topFolderFastq)
210+
os.makedirs(topFolderFast5)
211+
212+
#create checksum files and move unclassified folders to their top folder
213+
prepareUnclassifiedData(transaction, unclassifiedDataMap.get("fastqfail"), currentPath, os.path.join(topFolderFastq, "fastq_fail"))
214+
prepareUnclassifiedData(transaction, unclassifiedDataMap.get("fastqpass"), currentPath, os.path.join(topFolderFastq, "fastq_pass"))
215+
216+
prepareUnclassifiedData(transaction, unclassifiedDataMap.get("fast5fail"), currentPath, os.path.join(topFolderFast5, "fast5_fail"))
217+
prepareUnclassifiedData(transaction, unclassifiedDataMap.get("fast5pass"), currentPath, os.path.join(topFolderFast5, "fast5_pass"))
218+
219+
fast5DataSet = transaction.createNewDataSet(NANOPORE_FAST5_CODE)
220+
fastQDataSet = transaction.createNewDataSet(NANOPORE_FASTQ_CODE)
221+
fast5DataSet.setExperiment(runExperiment)
222+
fastQDataSet.setExperiment(runExperiment)
223+
transaction.moveFile(topFolderFast5, fast5DataSet)
224+
transaction.moveFile(topFolderFastq, fastQDataSet)
225+
193226
# moves a subset of nanopore data to a new target path, needed to add fastq and fast5 subfolders to the same dataset
194-
def prepareDataFolder(incomingPath, currentPath, destinationPath, dataObject, unclassifiedDataObject, suffix):
227+
def prepareDataFolder(incomingPath, currentPath, destinationPath, dataObject, suffix):
195228
name = dataObject.getName()
196229
relativePath = dataObject.getRelativePath()
197230
# the source path of the currently handled data object (e.g. fast5_fail folder)
@@ -200,15 +233,8 @@ def prepareDataFolder(incomingPath, currentPath, destinationPath, dataObject, un
200233
# destination path containing data type (fastq or fast5), as well as the parent sample code, so pooled samples can be handled
201234
destination = os.path.join(destinationPath, name + "_" + suffix)
202235
os.rename(sourcePath, destination)
203-
# if unclassified data exists, create relevant checksums and add them with the data to the expected (barcoded) data folder
204-
if unclassifiedDataObject:
205-
relativePath = unclassifiedDataObject.getRelativePath()
206-
# the source path of the currently handled data object (e.g. unclassified fast5_fail folder)
207-
unclassifiedSourcePath = os.path.join(os.path.dirname(currentPath), relativePath)
208-
unclassifiedChecksumFile = createChecksumFileForFolder(incomingPath, unclassifiedSourcePath)
209-
shutil.copytree(unclassifiedSourcePath, os.path.join(destination,"unclassified"))
210-
211-
def createSampleWithData(transaction, space, parentSampleCode, mapWithDataForSample, unclassifiedDataMap, openbisExperiment, currentPath, absLogPath):
236+
237+
def createSampleWithData(transaction, space, parentSampleCode, mapWithDataForSample, openbisExperiment, currentPath, absLogPath):
212238
""" Aggregates all measurement related files and registers them in openBIS.
213239
214240
The Map mapWithDataForSample contains all DataFolders created for one sample code:
@@ -236,24 +262,19 @@ def createSampleWithData(transaction, space, parentSampleCode, mapWithDataForSam
236262
topFolderFastq = os.path.join(currentPath, parentSampleCode+"_fastq")
237263
os.makedirs(topFolderFastq)
238264

239-
unclassifiedFastqFail = unclassifiedDataMap.get("fastqfail")
240-
unclassifiedFastqPass = unclassifiedDataMap.get("fastqpass")
241-
unclassifiedFast5Fail = unclassifiedDataMap.get("fast5fail")
242-
unclassifiedFast5Pass = unclassifiedDataMap.get("fast5pass")
243-
244265
fastqFail = mapWithDataForSample.get("fastqfail")
245-
prepareDataFolder(incomingPath, currentPath, topFolderFastq, fastqFail, unclassifiedFastqFail, "fail")
266+
prepareDataFolder(incomingPath, currentPath, topFolderFastq, fastqFail, "fail")
246267
fastqPass = mapWithDataForSample.get("fastqpass")
247-
prepareDataFolder(incomingPath, currentPath, topFolderFastq, fastqPass, unclassifiedFastqPass, "pass")
268+
prepareDataFolder(incomingPath, currentPath, topFolderFastq, fastqPass, "pass")
248269

249270
# Aggregate the folders fast5fail and fast5pass under a common folder "<sample code>_fast5"
250271
topFolderFast5 = os.path.join(currentPath, parentSampleCode+"_fast5")
251272
os.makedirs(topFolderFast5)
252273

253274
fast5Fail = mapWithDataForSample.get("fast5fail")
254-
prepareDataFolder(incomingPath, currentPath, topFolderFast5, fast5Fail, unclassifiedFast5Fail, "fail")
275+
prepareDataFolder(incomingPath, currentPath, topFolderFast5, fast5Fail, "fail")
255276
fast5Pass = mapWithDataForSample.get("fast5pass")
256-
prepareDataFolder(incomingPath, currentPath, topFolderFast5, fast5Pass, unclassifiedFast5Pass, "pass")
277+
prepareDataFolder(incomingPath, currentPath, topFolderFast5, fast5Pass, "pass")
257278

258279
fast5DataSet = transaction.createNewDataSet(NANOPORE_FAST5_CODE)
259280
fastQDataSet = transaction.createNewDataSet(NANOPORE_FASTQ_CODE)

0 commit comments

Comments
 (0)