22
22
import deeprank2 .features
23
23
from deeprank2 .domain .aminoacidlist import convert_aa_nomenclature
24
24
from deeprank2 .features import components , conservation , contact
25
- from deeprank2 .molstruct .residue import Residue , SingleResidueVariant
25
+ from deeprank2 .molstruct .residue import SingleResidueVariant
26
26
from deeprank2 .utils .buildgraph import get_contact_atoms , get_structure , get_surrounding_residues
27
27
from deeprank2 .utils .graph import Graph
28
28
from deeprank2 .utils .grid import Augmentation , GridSettings , MapMethod
@@ -265,12 +265,11 @@ def _build_helper(self) -> Graph:
265
265
structure = self ._load_structure ()
266
266
267
267
# find the variant residue and its surroundings
268
- variant_residue : Residue = None
269
268
for residue in structure .get_chain (self .variant_chain_id ).residues :
270
269
if residue .number == self .variant_residue_number and residue .insertion_code == self .insertion_code :
271
270
variant_residue = residue
272
271
break
273
- if variant_residue is None :
272
+ else : # if break is not reached
274
273
msg = f"Residue not found in { self .pdb_path } : { self .variant_chain_id } { self .residue_id } "
275
274
raise ValueError (msg )
276
275
self .variant = SingleResidueVariant (variant_residue , self .variant_amino_acid )
@@ -354,19 +353,12 @@ def _build_helper(self) -> Graph:
354
353
raise ValueError (msg )
355
354
356
355
# build the graph
357
- if self .resolution == "atom" :
358
- graph = Graph .build_graph (
359
- contact_atoms ,
360
- self .get_query_id (),
361
- self .max_edge_length ,
362
- )
363
- elif self .resolution == "residue" :
364
- residues_selected = list ({atom .residue for atom in contact_atoms })
365
- graph = Graph .build_graph (
366
- residues_selected ,
367
- self .get_query_id (),
368
- self .max_edge_length ,
369
- )
356
+ nodes = contact_atoms if self .resolution == "atom" else list ({atom .residue for atom in contact_atoms })
357
+ graph = Graph .build_graph (
358
+ nodes = nodes ,
359
+ graph_id = self .get_query_id (),
360
+ max_edge_length = self .max_edge_length ,
361
+ )
370
362
371
363
graph .center = np .mean ([atom .position for atom in contact_atoms ], axis = 0 )
372
364
structure = contact_atoms [0 ].residue .chain .model
@@ -453,7 +445,7 @@ def __iter__(self) -> Iterator[Query]:
453
445
def __len__ (self ) -> int :
454
446
return len (self ._queries )
455
447
456
- def _process_one_query (self , query : Query ) -> None :
448
+ def _process_one_query (self , query : Query , log_error_traceback : bool = False ) -> None :
457
449
"""Only one process may access an hdf5 file at a time."""
458
450
try :
459
451
output_path = f"{ self ._prefix } -{ os .getpid ()} .hdf5"
@@ -479,10 +471,12 @@ def _process_one_query(self, query: Query) -> None:
479
471
480
472
except (ValueError , AttributeError , KeyError , TimeoutError ) as e :
481
473
_log .warning (
482
- f"\n Graph/Query with ID { query .get_query_id ()} ran into an Exception ({ e .__class__ .__name__ } : { e } ),"
483
- " and it has not been written to the hdf5 file. More details below:" ,
474
+ f"Graph/Query with ID { query .get_query_id ()} ran into an Exception and was not written to the hdf5 file.\n "
475
+ f"Exception found: { e .__class__ .__name__ } : { e } .\n "
476
+ "You may proceed with your analysis, but this query will be ignored.\n " ,
484
477
)
485
- _log .exception (e )
478
+ if log_error_traceback :
479
+ _log .exception (f"----Full error traceback:----\n { e } " )
486
480
487
481
def process (
488
482
self ,
@@ -493,6 +487,7 @@ def process(
493
487
grid_settings : GridSettings | None = None ,
494
488
grid_map_method : MapMethod | None = None ,
495
489
grid_augmentation_count : int = 0 ,
490
+ log_error_traceback : bool = False ,
496
491
) -> list [str ]:
497
492
"""Render queries into graphs (and optionally grids).
498
493
@@ -510,6 +505,8 @@ def process(
510
505
grid_settings: If valid together with `grid_map_method`, the grid data will be stored as well. Defaults to None.
511
506
grid_map_method: If valid together with `grid_settings`, the grid data will be stored as well. Defaults to None.
512
507
grid_augmentation_count: Number of grid data augmentations (must be >= 0). Defaults to 0.
508
+ log_error_traceback: if True, logs full error message in case query fails. Otherwise only the error message is logged.
509
+ Defaults to false.
513
510
514
511
Returns:
515
512
The list of paths of the generated HDF5 files.
@@ -536,7 +533,7 @@ def process(
536
533
self ._grid_augmentation_count = grid_augmentation_count
537
534
538
535
_log .info (f"Creating pool function to process { len (self )} queries..." )
539
- pool_function = partial (self ._process_one_query )
536
+ pool_function = partial (self ._process_one_query , log_error_traceback = log_error_traceback )
540
537
with Pool (self ._cpu_count ) as pool :
541
538
_log .info ("Starting pooling...\n " )
542
539
pool .map (pool_function , self .queries )
@@ -551,6 +548,24 @@ def process(
551
548
os .remove (output_path )
552
549
return glob (f"{ prefix } .hdf5" )
553
550
551
+ n_processed = 0
552
+ for hdf5file in output_paths :
553
+ with h5py .File (hdf5file , "r" ) as hdf5 :
554
+ # List of all graphs in hdf5, each graph representing
555
+ # a SRV and its sourrouding environment
556
+ n_processed += len (list (hdf5 .keys ()))
557
+
558
+ if not n_processed :
559
+ msg = "No queries have been processed."
560
+ raise ValueError (msg )
561
+ if n_processed != len (self .queries ):
562
+ _log .warning (
563
+ f"Not all queries have been processed. You can proceed with the analysis of { n_processed } /{ len (self .queries )} queries.\n "
564
+ "Set `log_error_traceback` to True for advanced troubleshooting." ,
565
+ )
566
+ else :
567
+ _log .info (f"{ n_processed } queries have been processed." )
568
+
554
569
return output_paths
555
570
556
571
def _set_feature_modules (self , feature_modules : list [ModuleType , str ] | ModuleType | str ) -> list [str ]:
0 commit comments