@@ -157,13 +157,15 @@ def createExperimentFromMeasurement(transaction, currentPath, space, project, me
157
157
if measurement .getAdapter ():
158
158
runExperiment .setPropertyValue ("Q_SEQUENCING_ADAPTER" , measurement .getAdapter ())
159
159
# handle measured samples
160
- unclassifiedMap = measurement .getUnclassifiedData ()
161
160
for barcode in rawDataPerSample .keySet ():
162
161
datamap = rawDataPerSample .get (barcode )
163
162
newLogFolder = createLogFolder (currentPath )
164
163
# 3.) Aggregate all log files into an own log folder per measurement
165
164
copyLogFilesTo (measurement .getLogFiles (), currentPath , newLogFolder )
166
- createSampleWithData (transaction , space , barcode , datamap , unclassifiedMap , runExperiment , currentPath , newLogFolder )
165
+ createSampleWithData (transaction , space , barcode , datamap , runExperiment , currentPath , newLogFolder )
166
+ unclassifiedMap = measurement .getUnclassifiedData ()
167
+ if len (unclassifiedMap ) > 0 :
168
+ registerUnclassifiedData (transaction , unclassifiedMap , runExperiment , currentPath , measurement .getFlowcellId ())
167
169
168
170
# fills the global dictionary containing all checksums for paths from the global checksum file
169
171
def fillChecksumMap (checksumFilePath ):
@@ -190,8 +192,40 @@ def createChecksumFileForFolder(incomingPath, folderPath):
190
192
f .write (value + ' *' + key + '\n ' )
191
193
return checksumFilePath
192
194
195
+ # prepares unclassified data folder (e.g. unclassified fast5_pass) including checksums and moves folder to target destination folder
196
+ def prepareUnclassifiedData (transaction , unclassifiedDataObject , currentPath , destinationPath ):
197
+ incomingPath = transaction .getIncoming ().getAbsolutePath ()
198
+ relativePath = unclassifiedDataObject .getRelativePath ()
199
+ destination = os .path .join (destinationPath , unclassifiedDataObject .getName ())
200
+ # the source path of the currently handled data object (e.g. unclassified fast5_fail folder)
201
+ unclassifiedSourcePath = os .path .join (os .path .dirname (currentPath ), relativePath )
202
+ unclassifiedChecksumFile = createChecksumFileForFolder (incomingPath , unclassifiedSourcePath )
203
+ # we move the unclassified object to its destination (e.g. the unclassified fast5 top folder)
204
+ os .rename (unclassifiedSourcePath , destination )
205
+
206
+ # attaches unclassified data to the run experiment without sample
207
+ def registerUnclassifiedData (transaction , unclassifiedDataMap , runExperiment , currentPath , flowcellBarcode ):
208
+ topFolderFastq = os .path .join (currentPath , flowcellBarcode + "_unclassified_fastq" )
209
+ topFolderFast5 = os .path .join (currentPath , flowcellBarcode + "_unclassified_fast5" )
210
+ os .makedirs (topFolderFastq )
211
+ os .makedirs (topFolderFast5 )
212
+
213
+ #create checksum files and move unclassified folders to their top folder
214
+ prepareUnclassifiedData (transaction , unclassifiedDataMap .get ("fastqfail" ), currentPath , topFolderFastq )
215
+ prepareUnclassifiedData (transaction , unclassifiedDataMap .get ("fastqpass" ), currentPath , topFolderFastq )
216
+
217
+ prepareUnclassifiedData (transaction , unclassifiedDataMap .get ("fast5fail" ), currentPath , topFolderFast5 )
218
+ prepareUnclassifiedData (transaction , unclassifiedDataMap .get ("fast5pass" ), currentPath , topFolderFast5 )
219
+
220
+ fast5DataSet = transaction .createNewDataSet (NANOPORE_FAST5_CODE )
221
+ fastQDataSet = transaction .createNewDataSet (NANOPORE_FASTQ_CODE )
222
+ fast5DataSet .setExperiment (runExperiment )
223
+ fastQDataSet .setExperiment (runExperiment )
224
+ transaction .moveFile (topFolderFast5 , fast5DataSet )
225
+ transaction .moveFile (topFolderFastq , fastQDataSet )
226
+
193
227
# moves a subset of nanopore data to a new target path, needed to add fastq and fast5 subfolders to the same dataset
194
- def prepareDataFolder (incomingPath , currentPath , destinationPath , dataObject , unclassifiedDataObject , suffix ):
228
+ def prepareDataFolder (incomingPath , currentPath , destinationPath , dataObject , suffix ):
195
229
name = dataObject .getName ()
196
230
relativePath = dataObject .getRelativePath ()
197
231
# the source path of the currently handled data object (e.g. fast5_fail folder)
@@ -200,15 +234,8 @@ def prepareDataFolder(incomingPath, currentPath, destinationPath, dataObject, un
200
234
# destination path containing data type (fastq or fast5), as well as the parent sample code, so pooled samples can be handled
201
235
destination = os .path .join (destinationPath , name + "_" + suffix )
202
236
os .rename (sourcePath , destination )
203
- # if unclassified data exists, create relevant checksums and add them with the data to the expected (barcoded) data folder
204
- if unclassifiedDataObject :
205
- relativePath = unclassifiedDataObject .getRelativePath ()
206
- # the source path of the currently handled data object (e.g. unclassified fast5_fail folder)
207
- unclassifiedSourcePath = os .path .join (os .path .dirname (currentPath ), relativePath )
208
- unclassifiedChecksumFile = createChecksumFileForFolder (incomingPath , unclassifiedSourcePath )
209
- shutil .copytree (unclassifiedSourcePath , os .path .join (destination ,"unclassified" ))
210
-
211
- def createSampleWithData (transaction , space , parentSampleCode , mapWithDataForSample , unclassifiedDataMap , openbisExperiment , currentPath , absLogPath ):
237
+
238
+ def createSampleWithData (transaction , space , parentSampleCode , mapWithDataForSample , openbisExperiment , currentPath , absLogPath ):
212
239
""" Aggregates all measurement related files and registers them in openBIS.
213
240
214
241
The Map mapWithDataForSample contains all DataFolders created for one sample code:
@@ -236,24 +263,19 @@ def createSampleWithData(transaction, space, parentSampleCode, mapWithDataForSam
236
263
topFolderFastq = os .path .join (currentPath , parentSampleCode + "_fastq" )
237
264
os .makedirs (topFolderFastq )
238
265
239
- unclassifiedFastqFail = unclassifiedDataMap .get ("fastqfail" )
240
- unclassifiedFastqPass = unclassifiedDataMap .get ("fastqpass" )
241
- unclassifiedFast5Fail = unclassifiedDataMap .get ("fast5fail" )
242
- unclassifiedFast5Pass = unclassifiedDataMap .get ("fast5pass" )
243
-
244
266
fastqFail = mapWithDataForSample .get ("fastqfail" )
245
- prepareDataFolder (incomingPath , currentPath , topFolderFastq , fastqFail , unclassifiedFastqFail , "fail" )
267
+ prepareDataFolder (incomingPath , currentPath , topFolderFastq , fastqFail , "fail" )
246
268
fastqPass = mapWithDataForSample .get ("fastqpass" )
247
- prepareDataFolder (incomingPath , currentPath , topFolderFastq , fastqPass , unclassifiedFastqPass , "pass" )
269
+ prepareDataFolder (incomingPath , currentPath , topFolderFastq , fastqPass , "pass" )
248
270
249
271
# Aggregate the folders fast5fail and fast5pass under a common folder "<sample code>_fast5"
250
272
topFolderFast5 = os .path .join (currentPath , parentSampleCode + "_fast5" )
251
273
os .makedirs (topFolderFast5 )
252
274
253
275
fast5Fail = mapWithDataForSample .get ("fast5fail" )
254
- prepareDataFolder (incomingPath , currentPath , topFolderFast5 , fast5Fail , unclassifiedFast5Fail , "fail" )
276
+ prepareDataFolder (incomingPath , currentPath , topFolderFast5 , fast5Fail , "fail" )
255
277
fast5Pass = mapWithDataForSample .get ("fast5pass" )
256
- prepareDataFolder (incomingPath , currentPath , topFolderFast5 , fast5Pass , unclassifiedFast5Pass , "pass" )
278
+ prepareDataFolder (incomingPath , currentPath , topFolderFast5 , fast5Pass , "pass" )
257
279
258
280
fast5DataSet = transaction .createNewDataSet (NANOPORE_FAST5_CODE )
259
281
fastQDataSet = transaction .createNewDataSet (NANOPORE_FASTQ_CODE )
0 commit comments