@@ -63,20 +63,31 @@ def parse_arguments(args=None):
63
63
return parser
64
64
65
65
66
- def filter_scores_target_list (pScoresDictionary , pTargetList = None , pTargetIntervalTree = None , pTargetFile = None ):
66
+ def filter_scores_target_list (pScoresDictionary , pTargetFType , pTargetPosDict , pTargetList = None , pTargetIntervalTree = None , pTargetFile = None ):
67
67
68
68
accepted_scores = {}
69
69
same_target_dict = {}
70
70
target_regions_intervaltree = None
71
- if pTargetList is not None :
72
-
71
+ # newly added
72
+ if pTargetFType == 'hdf5' :
73
73
# read hdf content for this specific combination
74
74
targetFileHDF5Object = h5py .File (pTargetFile , 'r' )
75
75
target_object = targetFileHDF5Object ['/' .join (pTargetList )]
76
76
chromosome = target_object .get ('chromosome' )[()].decode ("utf-8" )
77
77
start_list = list (target_object ['start_list' ][:])
78
78
end_list = list (target_object ['end_list' ][:])
79
79
targetFileHDF5Object .close ()
80
+ elif pTargetFType == 'bed4' :
81
+ chromosome = pTargetPosDict [pTargetList [- 1 ]]['chromosome' ]
82
+ start_list = pTargetPosDict [pTargetList [- 1 ]]['start_list' ]
83
+ end_list = pTargetPosDict [pTargetList [- 1 ]]['end_list' ]
84
+ elif pTargetFType == 'bed3' :
85
+ target_regions_intervaltree = pTargetIntervalTree
86
+ else :
87
+ log .error ('No target list given.' )
88
+ raise Exception ('No target list given.' )
89
+
90
+ if pTargetList is not None :
80
91
chromosome = [chromosome ] * len (start_list )
81
92
82
93
target_regions = list (zip (chromosome , start_list , end_list ))
@@ -85,12 +96,6 @@ def filter_scores_target_list(pScoresDictionary, pTargetList=None, pTargetInterv
85
96
86
97
hicmatrix = hm .hiCMatrix ()
87
98
target_regions_intervaltree = hicmatrix .intervalListToIntervalTree (target_regions )[0 ]
88
- elif pTargetIntervalTree is not None :
89
- target_regions_intervaltree = pTargetIntervalTree
90
-
91
- else :
92
- log .error ('No target list given.' )
93
- raise Exception ('No target list given.' )
94
99
95
100
for key in pScoresDictionary :
96
101
chromosome = pScoresDictionary [key ][0 ]
@@ -193,12 +198,12 @@ def writeAggregateHDF(pOutFileName, pOutfileNamesList, pAcceptedScoresList, pArg
193
198
aggregateFileH5Object .close ()
194
199
195
200
196
- def run_target_list_compilation (pInteractionFilesList , pTargetList , pArgs , pViewpointObj , pQueue = None , pOneTarget = False ):
201
+ def run_target_list_compilation (pInteractionFilesList , pTargetList , pTargetFType , pTargetPosDict , pArgs , pViewpointObj , pQueue = None , pOneTarget = False ):
197
202
outfile_names_list = []
198
203
accepted_scores_list = []
199
204
target_regions_intervaltree = None
200
205
try :
201
- if pOneTarget == True :
206
+ if pTargetFType == 'bed3' :
202
207
try :
203
208
target_regions = utilities .readBed (pTargetList )
204
209
except Exception as exp :
@@ -211,14 +216,13 @@ def run_target_list_compilation(pInteractionFilesList, pTargetList, pArgs, pView
211
216
outfile_names_list_intern = []
212
217
accepted_scores_list_intern = []
213
218
for sample in interactionFile :
214
-
215
219
interaction_data , interaction_file_data , _ = pViewpointObj .readInteractionFile (pArgs .interactionFile , sample )
216
220
if pOneTarget == True :
217
221
target_file = None
218
222
else :
219
223
target_file = pTargetList [i ]
220
224
221
- accepted_scores = filter_scores_target_list (interaction_file_data , pTargetList = target_file , pTargetIntervalTree = target_regions_intervaltree , pTargetFile = pArgs .targetFile )
225
+ accepted_scores = filter_scores_target_list (interaction_file_data , pTargetFType , pTargetPosDict , pTargetList = target_file , pTargetIntervalTree = target_regions_intervaltree , pTargetFile = pArgs .targetFile )
222
226
223
227
outfile_names_list_intern .append (sample )
224
228
accepted_scores_list_intern .append (accepted_scores )
@@ -238,7 +242,7 @@ def run_target_list_compilation(pInteractionFilesList, pTargetList, pArgs, pView
238
242
return
239
243
240
244
241
- def call_multi_core (pInteractionFilesList , pTargetFileList , pFunctionName , pArgs , pViewpointObj ):
245
+ def call_multi_core (pInteractionFilesList , pTargetFileList , pTargetFType , pTargetPosDict , pFunctionName , pArgs , pViewpointObj ):
242
246
if len (pInteractionFilesList ) < pArgs .threads :
243
247
pArgs .threads = len (pInteractionFilesList )
244
248
outfile_names_list = [None ] * pArgs .threads
@@ -272,6 +276,8 @@ def call_multi_core(pInteractionFilesList, pTargetFileList, pFunctionName, pArgs
272
276
process [i ] = Process (target = pFunctionName , kwargs = dict (
273
277
pInteractionFilesList = interactionFileListThread ,
274
278
pTargetList = targetFileListThread ,
279
+ pTargetFType = pTargetFType ,
280
+ pTargetPosDict = pTargetPosDict ,
275
281
pArgs = pArgs ,
276
282
pViewpointObj = pViewpointObj ,
277
283
pQueue = queue [i ],
@@ -318,16 +324,32 @@ def main(args=None):
318
324
319
325
targetList = []
320
326
present_genes = {}
327
+ target_ftype = ''
328
+ targetPosDict = None
321
329
# read hdf file
322
330
interactionFileHDF5Object = h5py .File (args .interactionFile , 'r' )
323
331
keys_interactionFile = list (interactionFileHDF5Object .keys ())
324
332
325
333
if h5py .is_hdf5 (args .targetFile ):
326
-
327
334
targetDict , present_genes = viewpointObj .readTargetHDFFile (args .targetFile )
335
+ target_ftype = 'hdf5'
328
336
329
337
else :
330
- targetList = [args .targetFile ]
338
+ with open (args .targetFile ) as file :
339
+ for line in file .readlines ():
340
+ if line .startswith ('#' ):
341
+ continue
342
+ _line = line .strip ().split ('\t ' )
343
+ break
344
+ if len (_line ) == 4 :
345
+ log .info ('Targets BED contains 4 columns, aggregating on column 4' )
346
+ target_ftype = 'bed4'
347
+ present_genes , targetDict , targetPosDict = utilities .readTargetBed (args .targetFile )
348
+ elif len (_line ) == 3 :
349
+ targetList = [args .targetFile ]
350
+ target_ftype = 'bed3'
351
+ else :
352
+ log .error ('BED of targets list must have 3 or 4 columns' )
331
353
332
354
if len (keys_interactionFile ) > 1 :
333
355
@@ -346,7 +368,7 @@ def main(args=None):
346
368
geneList2 = sorted (list (matrix_obj2 [chromosome2 ].keys ()))
347
369
348
370
for gene1 , gene2 in zip (geneList1 , geneList2 ):
349
- if h5py . is_hdf5 ( args . targetFile ) :
371
+ if target_ftype != 'bed3' :
350
372
if gene1 in present_genes [sample ][sample2 ]:
351
373
interactionDict [gene1 ] = [[sample , chromosome1 , gene1 ], [sample2 , chromosome2 , gene2 ]]
352
374
else :
@@ -356,7 +378,7 @@ def main(args=None):
356
378
357
379
interactionFileHDF5Object .close ()
358
380
359
- if h5py . is_hdf5 ( args . targetFile ) :
381
+ if target_ftype != 'bed3' :
360
382
key_outer_matrix = present_genes .keys ()
361
383
for keys_outer in key_outer_matrix :
362
384
keys_inner_matrix = present_genes [keys_outer ].keys ()
@@ -365,5 +387,5 @@ def main(args=None):
365
387
interactionList .append (interactionDict [gene ])
366
388
targetList .append (targetDict [gene ])
367
389
368
- outfile_names_list , accepted_scores_list = call_multi_core (interactionList , targetList , run_target_list_compilation , args , viewpointObj )
390
+ outfile_names_list , accepted_scores_list = call_multi_core (interactionList , targetList , target_ftype , targetPosDict , run_target_list_compilation , args , viewpointObj )
369
391
writeAggregateHDF (args .outFileName , outfile_names_list , accepted_scores_list , args )
0 commit comments