@@ -157,13 +157,15 @@ def createExperimentFromMeasurement(transaction, currentPath, space, project, me
157
157
if measurement .getAdapter ():
158
158
runExperiment .setPropertyValue ("Q_SEQUENCING_ADAPTER" , measurement .getAdapter ())
159
159
# handle measured samples
160
- unclassifiedMap = measurement .getUnclassifiedData ()
161
160
for barcode in rawDataPerSample .keySet ():
162
161
datamap = rawDataPerSample .get (barcode )
163
162
newLogFolder = createLogFolder (currentPath )
164
163
# 3.) Aggregate all log files into an own log folder per measurement
165
164
copyLogFilesTo (measurement .getLogFiles (), currentPath , newLogFolder )
166
- createSampleWithData (transaction , space , barcode , datamap , unclassifiedMap , runExperiment , currentPath , newLogFolder )
165
+ createSampleWithData (transaction , space , barcode , datamap , runExperiment , currentPath , newLogFolder )
166
+ unclassifiedMap = measurement .getUnclassifiedData ()
167
+ if len (unclassifiedMap ) > 0 :
168
+ registerUnclassifiedData (transaction , unclassifiedMap , runExperiment , currentPath , measurement .getFlowcellId ())
167
169
168
170
# fills the global dictionary containing all checksums for paths from the global checksum file
169
171
def fillChecksumMap (checksumFilePath ):
@@ -190,8 +192,39 @@ def createChecksumFileForFolder(incomingPath, folderPath):
190
192
f .write (value + ' *' + key + '\n ' )
191
193
return checksumFilePath
192
194
195
+ # prepares unclassified data folder (e.g. unclassified fast5_pass) including checksums and moves folder to target destination folder
196
+ def prepareUnclassifiedData (transaction , unclassifiedDataObject , currentPath , destinationPath ):
197
+ incomingPath = transaction .getIncoming ().getAbsolutePath ()
198
+ relativePath = unclassifiedDataObject .getRelativePath ()
199
+ # the source path of the currently handled data object (e.g. unclassified fast5_fail folder)
200
+ unclassifiedSourcePath = os .path .join (os .path .dirname (currentPath ), relativePath )
201
+ unclassifiedChecksumFile = createChecksumFileForFolder (incomingPath , unclassifiedSourcePath )
202
+ # we move the unclassified object to its destination (e.g. the unclassified fast5 top folder)
203
+ os .rename (unclassifiedSourcePath , destinationPath )
204
+
205
+ # attaches unclassified data to the run experiment without sample
206
+ def registerUnclassifiedData (transaction , unclassifiedDataMap , runExperiment , currentPath , flowcellBarcode ):
207
+ topFolderFastq = os .path .join (currentPath , flowcellBarcode + "_unclassified_fastq" )
208
+ topFolderFast5 = os .path .join (currentPath , flowcellBarcode + "_unclassified_fast5" )
209
+ os .makedirs (topFolderFastq )
210
+ os .makedirs (topFolderFast5 )
211
+
212
+ #create checksum files and move unclassified folders to their top folder
213
+ prepareUnclassifiedData (transaction , unclassifiedDataMap .get ("fastqfail" ), currentPath , os .path .join (topFolderFastq , "fastq_fail" ))
214
+ prepareUnclassifiedData (transaction , unclassifiedDataMap .get ("fastqpass" ), currentPath , os .path .join (topFolderFastq , "fastq_pass" ))
215
+
216
+ prepareUnclassifiedData (transaction , unclassifiedDataMap .get ("fast5fail" ), currentPath , os .path .join (topFolderFast5 , "fast5_fail" ))
217
+ prepareUnclassifiedData (transaction , unclassifiedDataMap .get ("fast5pass" ), currentPath , os .path .join (topFolderFast5 , "fast5_pass" ))
218
+
219
+ fast5DataSet = transaction .createNewDataSet (NANOPORE_FAST5_CODE )
220
+ fastQDataSet = transaction .createNewDataSet (NANOPORE_FASTQ_CODE )
221
+ fast5DataSet .setExperiment (runExperiment )
222
+ fastQDataSet .setExperiment (runExperiment )
223
+ transaction .moveFile (topFolderFast5 , fast5DataSet )
224
+ transaction .moveFile (topFolderFastq , fastQDataSet )
225
+
193
226
# moves a subset of nanopore data to a new target path, needed to add fastq and fast5 subfolders to the same dataset
194
- def prepareDataFolder (incomingPath , currentPath , destinationPath , dataObject , unclassifiedDataObject , suffix ):
227
+ def prepareDataFolder (incomingPath , currentPath , destinationPath , dataObject , suffix ):
195
228
name = dataObject .getName ()
196
229
relativePath = dataObject .getRelativePath ()
197
230
# the source path of the currently handled data object (e.g. fast5_fail folder)
@@ -200,15 +233,8 @@ def prepareDataFolder(incomingPath, currentPath, destinationPath, dataObject, un
200
233
# destination path containing data type (fastq or fast5), as well as the parent sample code, so pooled samples can be handled
201
234
destination = os .path .join (destinationPath , name + "_" + suffix )
202
235
os .rename (sourcePath , destination )
203
- # if unclassified data exists, create relevant checksums and add them with the data to the expected (barcoded) data folder
204
- if unclassifiedDataObject :
205
- relativePath = unclassifiedDataObject .getRelativePath ()
206
- # the source path of the currently handled data object (e.g. unclassified fast5_fail folder)
207
- unclassifiedSourcePath = os .path .join (os .path .dirname (currentPath ), relativePath )
208
- unclassifiedChecksumFile = createChecksumFileForFolder (incomingPath , unclassifiedSourcePath )
209
- shutil .copytree (unclassifiedSourcePath , os .path .join (destination ,"unclassified" ))
210
-
211
- def createSampleWithData (transaction , space , parentSampleCode , mapWithDataForSample , unclassifiedDataMap , openbisExperiment , currentPath , absLogPath ):
236
+
237
+ def createSampleWithData (transaction , space , parentSampleCode , mapWithDataForSample , openbisExperiment , currentPath , absLogPath ):
212
238
""" Aggregates all measurement related files and registers them in openBIS.
213
239
214
240
The Map mapWithDataForSample contains all DataFolders created for one sample code:
@@ -236,24 +262,19 @@ def createSampleWithData(transaction, space, parentSampleCode, mapWithDataForSam
236
262
topFolderFastq = os .path .join (currentPath , parentSampleCode + "_fastq" )
237
263
os .makedirs (topFolderFastq )
238
264
239
- unclassifiedFastqFail = unclassifiedDataMap .get ("fastqfail" )
240
- unclassifiedFastqPass = unclassifiedDataMap .get ("fastqpass" )
241
- unclassifiedFast5Fail = unclassifiedDataMap .get ("fast5fail" )
242
- unclassifiedFast5Pass = unclassifiedDataMap .get ("fast5pass" )
243
-
244
265
fastqFail = mapWithDataForSample .get ("fastqfail" )
245
- prepareDataFolder (incomingPath , currentPath , topFolderFastq , fastqFail , unclassifiedFastqFail , "fail" )
266
+ prepareDataFolder (incomingPath , currentPath , topFolderFastq , fastqFail , "fail" )
246
267
fastqPass = mapWithDataForSample .get ("fastqpass" )
247
- prepareDataFolder (incomingPath , currentPath , topFolderFastq , fastqPass , unclassifiedFastqPass , "pass" )
268
+ prepareDataFolder (incomingPath , currentPath , topFolderFastq , fastqPass , "pass" )
248
269
249
270
# Aggregate the folders fast5fail and fast5pass under a common folder "<sample code>_fast5"
250
271
topFolderFast5 = os .path .join (currentPath , parentSampleCode + "_fast5" )
251
272
os .makedirs (topFolderFast5 )
252
273
253
274
fast5Fail = mapWithDataForSample .get ("fast5fail" )
254
- prepareDataFolder (incomingPath , currentPath , topFolderFast5 , fast5Fail , unclassifiedFast5Fail , "fail" )
275
+ prepareDataFolder (incomingPath , currentPath , topFolderFast5 , fast5Fail , "fail" )
255
276
fast5Pass = mapWithDataForSample .get ("fast5pass" )
256
- prepareDataFolder (incomingPath , currentPath , topFolderFast5 , fast5Pass , unclassifiedFast5Pass , "pass" )
277
+ prepareDataFolder (incomingPath , currentPath , topFolderFast5 , fast5Pass , "pass" )
257
278
258
279
fast5DataSet = transaction .createNewDataSet (NANOPORE_FAST5_CODE )
259
280
fastQDataSet = transaction .createNewDataSet (NANOPORE_FASTQ_CODE )
0 commit comments