2222import deeprank2 .features
2323from deeprank2 .domain .aminoacidlist import convert_aa_nomenclature
2424from deeprank2 .features import components , conservation , contact
25- from deeprank2 .molstruct .residue import Residue , SingleResidueVariant
25+ from deeprank2 .molstruct .residue import SingleResidueVariant
2626from deeprank2 .utils .buildgraph import get_contact_atoms , get_structure , get_surrounding_residues
2727from deeprank2 .utils .graph import Graph
2828from deeprank2 .utils .grid import Augmentation , GridSettings , MapMethod
@@ -265,12 +265,11 @@ def _build_helper(self) -> Graph:
265265 structure = self ._load_structure ()
266266
267267 # find the variant residue and its surroundings
268- variant_residue : Residue = None
269268 for residue in structure .get_chain (self .variant_chain_id ).residues :
270269 if residue .number == self .variant_residue_number and residue .insertion_code == self .insertion_code :
271270 variant_residue = residue
272271 break
273- if variant_residue is None :
272+ else : # if break is not reached
274273 msg = f"Residue not found in { self .pdb_path } : { self .variant_chain_id } { self .residue_id } "
275274 raise ValueError (msg )
276275 self .variant = SingleResidueVariant (variant_residue , self .variant_amino_acid )
@@ -354,19 +353,12 @@ def _build_helper(self) -> Graph:
354353 raise ValueError (msg )
355354
356355 # build the graph
357- if self .resolution == "atom" :
358- graph = Graph .build_graph (
359- contact_atoms ,
360- self .get_query_id (),
361- self .max_edge_length ,
362- )
363- elif self .resolution == "residue" :
364- residues_selected = list ({atom .residue for atom in contact_atoms })
365- graph = Graph .build_graph (
366- residues_selected ,
367- self .get_query_id (),
368- self .max_edge_length ,
369- )
356+ nodes = contact_atoms if self .resolution == "atom" else list ({atom .residue for atom in contact_atoms })
357+ graph = Graph .build_graph (
358+ nodes = nodes ,
359+ graph_id = self .get_query_id (),
360+ max_edge_length = self .max_edge_length ,
361+ )
370362
371363 graph .center = np .mean ([atom .position for atom in contact_atoms ], axis = 0 )
372364 structure = contact_atoms [0 ].residue .chain .model
@@ -453,7 +445,7 @@ def __iter__(self) -> Iterator[Query]:
453445 def __len__ (self ) -> int :
454446 return len (self ._queries )
455447
456- def _process_one_query (self , query : Query ) -> None :
448+ def _process_one_query (self , query : Query , log_error_traceback : bool = False ) -> None :
457449 """Only one process may access an hdf5 file at a time."""
458450 try :
459451 output_path = f"{ self ._prefix } -{ os .getpid ()} .hdf5"
@@ -479,10 +471,12 @@ def _process_one_query(self, query: Query) -> None:
479471
480472 except (ValueError , AttributeError , KeyError , TimeoutError ) as e :
481473 _log .warning (
482- f"\n Graph/Query with ID { query .get_query_id ()} ran into an Exception ({ e .__class__ .__name__ } : { e } ),"
483- " and it has not been written to the hdf5 file. More details below:" ,
474+ f"Graph/Query with ID { query .get_query_id ()} ran into an Exception and was not written to the hdf5 file.\n "
475+ f"Exception found: { e .__class__ .__name__ } : { e } .\n "
476+ "You may proceed with your analysis, but this query will be ignored.\n " ,
484477 )
485- _log .exception (e )
478+ if log_error_traceback :
479+ _log .exception (f"----Full error traceback:----\n { e } " )
486480
487481 def process (
488482 self ,
@@ -493,6 +487,7 @@ def process(
493487 grid_settings : GridSettings | None = None ,
494488 grid_map_method : MapMethod | None = None ,
495489 grid_augmentation_count : int = 0 ,
490+ log_error_traceback : bool = False ,
496491 ) -> list [str ]:
497492 """Render queries into graphs (and optionally grids).
498493
@@ -510,6 +505,8 @@ def process(
510505 grid_settings: If valid together with `grid_map_method`, the grid data will be stored as well. Defaults to None.
511506 grid_map_method: If valid together with `grid_settings`, the grid data will be stored as well. Defaults to None.
512507 grid_augmentation_count: Number of grid data augmentations (must be >= 0). Defaults to 0.
508+ log_error_traceback: if True, logs full error message in case query fails. Otherwise only the error message is logged.
509+ Defaults to false.
513510
514511 Returns:
515512 The list of paths of the generated HDF5 files.
@@ -536,7 +533,7 @@ def process(
536533 self ._grid_augmentation_count = grid_augmentation_count
537534
538535 _log .info (f"Creating pool function to process { len (self )} queries..." )
539- pool_function = partial (self ._process_one_query )
536+ pool_function = partial (self ._process_one_query , log_error_traceback = log_error_traceback )
540537 with Pool (self ._cpu_count ) as pool :
541538 _log .info ("Starting pooling...\n " )
542539 pool .map (pool_function , self .queries )
@@ -551,6 +548,24 @@ def process(
551548 os .remove (output_path )
552549 return glob (f"{ prefix } .hdf5" )
553550
551+ n_processed = 0
552+ for hdf5file in output_paths :
553+ with h5py .File (hdf5file , "r" ) as hdf5 :
554+ # List of all graphs in hdf5, each graph representing
555+ # a SRV and its sourrouding environment
556+ n_processed += len (list (hdf5 .keys ()))
557+
558+ if not n_processed :
559+ msg = "No queries have been processed."
560+ raise ValueError (msg )
561+ if n_processed != len (self .queries ):
562+ _log .warning (
563+ f"Not all queries have been processed. You can proceed with the analysis of { n_processed } /{ len (self .queries )} queries.\n "
564+ "Set `log_error_traceback` to True for advanced troubleshooting." ,
565+ )
566+ else :
567+ _log .info (f"{ n_processed } queries have been processed." )
568+
554569 return output_paths
555570
556571 def _set_feature_modules (self , feature_modules : list [ModuleType , str ] | ModuleType | str ) -> list [str ]:
0 commit comments