DeepRank
diff --git a/‎.github/workflows/notebooks.yml
Lines changed: 3 additions & 1 deletion b/‎.github/workflows/notebooks.yml
Lines changed: 3 additions & 1 deletion
diff --git a/‎.ruff.toml
Lines changed: 59 additions & 0 deletions b/‎.ruff.toml
Lines changed: 59 additions & 0 deletions
diff --git a/‎Dockerfile
Lines changed: 33 additions & 10 deletions b/‎Dockerfile
Lines changed: 33 additions & 10 deletions
diff --git a/‎deeprank2/dataset.py
Lines changed: 2 additions & 2 deletions b/‎deeprank2/dataset.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎deeprank2/query.py
Lines changed: 36 additions & 21 deletions b/‎deeprank2/query.py
Lines changed: 36 additions & 21 deletions
diff --git a/‎pyproject.toml
Lines changed: 0 additions & 59 deletions b/‎pyproject.toml
Lines changed: 0 additions & 59 deletions
@@ -54,9 +54,11 @@ jobs:
       - name: Download the data for the tutorials
         shell: bash -l {0}
         run: |
-          wget https://zenodo.org/records/8349335/files/data_raw.zip
+          wget https://zenodo.org/records/13709906/files/data_raw.zip
           unzip data_raw.zip -d data_raw
           mv data_raw tutorials
+          echo listing files in data_raw:
+          ls tutorials/data_raw
 
       - name: Run tutorial notebooks
         run: pytest --nbmake tutorials
@@ -0,0 +1,59 @@
+target-version = "py310"
+output-format = "concise"
+line-length = 159
+
+[lint]
+select = ["ALL"]
+pydocstyle.convention = "google" # docstring settings
+ignore = [
+    # Unrealistic for this code base
+    "PTH",     # flake8-use-pathlib    
+    "N",       # naming conventions
+    "PLR0912", # Too many branches,
+    "PLR0913", # Too many arguments in function definition
+    "D102",    # Missing docstring in public method
+    # Unwanted
+    "FBT",    # Using boolean arguments
+    "ANN101", # Missing type annotation for `self` in method
+    "ANN102", # Missing type annotation for `cls` in classmethod
+    "ANN204", # Missing return type annotation for special (dunder) method
+    "B028",   # No explicit `stacklevel` keyword argument found in warning
+    "S105",   # Possible hardcoded password
+    "S311",   # insecure random generators
+    "PT011",  # pytest-raises-too-broad
+    "SIM108", # Use ternary operator
+    # Unwanted docstrings
+    "D100", # Missing module docstring
+    "D104", # Missing public package docstring
+    "D105", # Missing docstring in magic method
+    "D107", # Missing docstring in `__init__`
+]
+
+# Autofix settings
+fixable = ["ALL"]
+unfixable = ["F401"] # unused imports (should not disappear while editing)
+extend-safe-fixes = [
+    "D415",   # First line should end with a period, question mark, or exclamation point
+    "D300",   # Use triple double quotes `"""`
+    "D200",   # One-line docstring should fit on one line
+    "TCH",    # Format type checking only imports
+    "ISC001", # Implicitly concatenated strings on a single line
+    "EM",     # Exception message variables
+    "RUF013", # Implicit Optional
+    "B006",   # Mutable default argument
+]
+
+isort.known-first-party = ["deeprank2"]
+
+[lint.per-file-ignores]
+"tests/*" = [
+    "S101",    # Use of `assert` detected
+    "PLR2004", # Magic value used in comparison
+    "D101",    # Missing class docstring
+    "D102",    # Missing docstring in public method
+    "D103",    # Missing docstring in public function
+    "SLF001",  # private member access
+]
+"docs/*" = ["ALL"]
+"tests/perf/*" = ["T201"] # Use of print statements
+"*.ipynb" = ["T201", "E402", "D103"]
@@ -1,28 +1,51 @@
 # Pull base image
-FROM --platform=linux/x86_64 condaforge/miniforge3:23.3.1-1
+FROM --platform=linux/amd64 ubuntu:22.04
+
+ARG MINIFORGE_NAME=Miniforge3
+ARG MINIFORGE_VERSION=24.3.0-0
+ARG TARGETPLATFORM
+
+ENV CONDA_DIR=/opt/conda
+ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
+ENV PATH=${CONDA_DIR}/bin:${PATH}
+
+RUN \
+  ## Install apt dependencies
+  apt-get update && \
+  apt-get install --no-install-recommends --yes \
+      wget bzip2 unzip ca-certificates \
+      git && \
+  ## Download and install Miniforge
+  wget --no-hsts --quiet https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/${MINIFORGE_NAME}-${MINIFORGE_VERSION}-Linux-x86_64.sh -O /tmp/miniforge.sh && \
+  /bin/bash /tmp/miniforge.sh -b -p ${CONDA_DIR} && \
+  rm /tmp/miniforge.sh && \
+  echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> /etc/skel/.bashrc && \
+  echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> ~/.bashrc
 
-# Add files
-ADD ./tutorials /home/deeprank2/tutorials
 ADD ./env/deeprank2.yml /home/deeprank2
 
 RUN \
-  # Install dependencies and package
-  apt update -y && \
-  apt install unzip -y && \
-  ## GCC
-  apt install -y gcc && \
   ## Create the environment and install the dependencies
   mamba env create -f /home/deeprank2/deeprank2.yml && \
+  conda install -n deeprank2 conda-forge::gcc && \
   ## Activate the environment and install pip packages
-  /opt/conda/bin/conda run -n deeprank2 pip install deeprank2 && \
+  conda run -n deeprank2 pip install deeprank2 && \
   ## Activate the environment automatically when entering the container
   echo "source activate deeprank2" >~/.bashrc && \
   # Get the data for running the tutorials
   if [ -d "/home/deeprank2/tutorials/data_raw" ]; then rm -Rf /home/deeprank2/tutorials/data_raw; fi && \
   if [ -d "/home/deeprank2/tutorials/data_processed" ]; then rm -Rf /home/deeprank2/tutorials/data_processed; fi && \
   wget https://zenodo.org/records/8349335/files/data_raw.zip && \
   unzip data_raw.zip -d data_raw && \
-  mv data_raw /home/deeprank2/tutorials
+  mv data_raw /home/deeprank2/tutorials && \
+  apt-get clean && \
+  rm -rf /var/lib/apt/lists/* && \
+  conda clean --tarballs --index-cache --packages --yes && \
+  find ${CONDA_DIR} -follow -type f -name '*.a' -delete && \
+  find ${CONDA_DIR} -follow -type f -name '*.pyc' -delete && \
+  conda clean --force-pkgs-dirs --all --yes
+
+ADD ./tutorials /home/deeprank2/tutorials
 
 ENV PATH /opt/conda/envs/deeprank2/bin:$PATH
 
 
@@ -112,7 +112,7 @@ def _check_and_inherit_train(  # noqa: C901
                         for key in data["features_transform"].values():
                             if key["transform"] is None:
                                 continue
-                            key["transform"] = eval(key["transform"])  # noqa: S307, PGH001
+                            key["transform"] = eval(key["transform"])  # noqa: S307
             except pickle.UnpicklingError as e:
                 msg = "The path provided to `train_source` is not a valid DeepRank2 pre-trained model."
                 raise ValueError(msg) from e
@@ -277,7 +277,7 @@ def _filter_targets(self, grp: h5py.Group) -> bool:
                     for operator_string in [">", "<", "==", "<=", ">=", "!="]:
                         operation = operation.replace(operator_string, f"{target_value}" + operator_string)
 
-                    if not eval(operation):  # noqa: S307, PGH001
+                    if not eval(operation):  # noqa: S307
                         return False
 
                 elif target_condition is not None:
 
@@ -22,7 +22,7 @@
 import deeprank2.features
 from deeprank2.domain.aminoacidlist import convert_aa_nomenclature
 from deeprank2.features import components, conservation, contact
-from deeprank2.molstruct.residue import Residue, SingleResidueVariant
+from deeprank2.molstruct.residue import SingleResidueVariant
 from deeprank2.utils.buildgraph import get_contact_atoms, get_structure, get_surrounding_residues
 from deeprank2.utils.graph import Graph
 from deeprank2.utils.grid import Augmentation, GridSettings, MapMethod
@@ -265,12 +265,11 @@ def _build_helper(self) -> Graph:
         structure = self._load_structure()
 
         # find the variant residue and its surroundings
-        variant_residue: Residue = None
         for residue in structure.get_chain(self.variant_chain_id).residues:
             if residue.number == self.variant_residue_number and residue.insertion_code == self.insertion_code:
                 variant_residue = residue
                 break
-        if variant_residue is None:
+        else:  # if break is not reached
             msg = f"Residue not found in {self.pdb_path}: {self.variant_chain_id} {self.residue_id}"
             raise ValueError(msg)
         self.variant = SingleResidueVariant(variant_residue, self.variant_amino_acid)
@@ -354,19 +353,12 @@ def _build_helper(self) -> Graph:
             raise ValueError(msg)
 
         # build the graph
-        if self.resolution == "atom":
-            graph = Graph.build_graph(
-                contact_atoms,
-                self.get_query_id(),
-                self.max_edge_length,
-            )
-        elif self.resolution == "residue":
-            residues_selected = list({atom.residue for atom in contact_atoms})
-            graph = Graph.build_graph(
-                residues_selected,
-                self.get_query_id(),
-                self.max_edge_length,
-            )
+        nodes = contact_atoms if self.resolution == "atom" else list({atom.residue for atom in contact_atoms})
+        graph = Graph.build_graph(
+            nodes=nodes,
+            graph_id=self.get_query_id(),
+            max_edge_length=self.max_edge_length,
+        )
 
         graph.center = np.mean([atom.position for atom in contact_atoms], axis=0)
         structure = contact_atoms[0].residue.chain.model
@@ -453,7 +445,7 @@ def __iter__(self) -> Iterator[Query]:
     def __len__(self) -> int:
         return len(self._queries)
 
-    def _process_one_query(self, query: Query) -> None:
+    def _process_one_query(self, query: Query, log_error_traceback: bool = False) -> None:
         """Only one process may access an hdf5 file at a time."""
         try:
             output_path = f"{self._prefix}-{os.getpid()}.hdf5"
@@ -479,10 +471,12 @@ def _process_one_query(self, query: Query) -> None:
 
         except (ValueError, AttributeError, KeyError, TimeoutError) as e:
             _log.warning(
-                f"\nGraph/Query with ID {query.get_query_id()} ran into an Exception ({e.__class__.__name__}: {e}),"
-                " and it has not been written to the hdf5 file. More details below:",
+                f"Graph/Query with ID {query.get_query_id()} ran into an Exception and was not written to the hdf5 file.\n"
+                f"Exception found: {e.__class__.__name__}: {e}.\n"
+                "You may proceed with your analysis, but this query will be ignored.\n",
             )
-            _log.exception(e)
+            if log_error_traceback:
+                _log.exception(f"----Full error traceback:----\n{e}")
 
     def process(
         self,
@@ -493,6 +487,7 @@ def process(
         grid_settings: GridSettings | None = None,
         grid_map_method: MapMethod | None = None,
         grid_augmentation_count: int = 0,
+        log_error_traceback: bool = False,
     ) -> list[str]:
         """Render queries into graphs (and optionally grids).
 
@@ -510,6 +505,8 @@ def process(
             grid_settings: If valid together with `grid_map_method`, the grid data will be stored as well. Defaults to None.
             grid_map_method: If valid together with `grid_settings`, the grid data will be stored as well. Defaults to None.
             grid_augmentation_count: Number of grid data augmentations (must be >= 0). Defaults to 0.
+            log_error_traceback: if True, logs full error message in case query fails. Otherwise only the error message is logged.
+                Defaults to false.
 
         Returns:
             The list of paths of the generated HDF5 files.
@@ -536,7 +533,7 @@ def process(
         self._grid_augmentation_count = grid_augmentation_count
 
         _log.info(f"Creating pool function to process {len(self)} queries...")
-        pool_function = partial(self._process_one_query)
+        pool_function = partial(self._process_one_query, log_error_traceback=log_error_traceback)
         with Pool(self._cpu_count) as pool:
             _log.info("Starting pooling...\n")
             pool.map(pool_function, self.queries)
@@ -551,6 +548,24 @@ def process(
                 os.remove(output_path)
             return glob(f"{prefix}.hdf5")
 
+        n_processed = 0
+        for hdf5file in output_paths:
+            with h5py.File(hdf5file, "r") as hdf5:
+                # List of all graphs in hdf5, each graph representing
+                # a SRV and its sourrouding environment
+                n_processed += len(list(hdf5.keys()))
+
+        if not n_processed:
+            msg = "No queries have been processed."
+            raise ValueError(msg)
+        if n_processed != len(self.queries):
+            _log.warning(
+                f"Not all queries have been processed. You can proceed with the analysis of {n_processed}/{len(self.queries)} queries.\n"
+                "Set `log_error_traceback` to True for advanced troubleshooting.",
+            )
+        else:
+            _log.info(f"{n_processed} queries have been processed.")
+
         return output_paths
 
     def _set_feature_modules(self, feature_modules: list[ModuleType, str] | ModuleType | str) -> list[str]:
 
@@ -89,62 +89,3 @@ include = ["deeprank2*"]
 [tool.pytest.ini_options]
 # pytest options: -ra: show summary info for all test outcomes
 addopts = "-ra"
-
-[tool.ruff]
-output-format = "concise"
-line-length = 159
-
-[tool.ruff.lint]
-select = ["ALL"]
-pydocstyle.convention = "google" # docstring settings
-ignore = [
-    # Unrealistic for this code base
-    "PTH",     # flake8-use-pathlib    
-    "N",       # naming conventions
-    "PLR0912", # Too many branches,
-    "PLR0913", # Too many arguments in function definition
-    "D102",    # Missing docstring in public method
-    # Unwanted
-    "FBT",    # Using boolean arguments
-    "ANN101", # Missing type annotation for `self` in method
-    "ANN102", # Missing type annotation for `cls` in classmethod
-    "ANN204", # Missing return type annotation for special (dunder) method
-    "B028",   # No explicit `stacklevel` keyword argument found in warning
-    "S105",   # Possible hardcoded password
-    "S311",   # insecure random generators
-    "PT011",  # pytest-raises-too-broad
-    "SIM108", # Use ternary operator
-    # Unwanted docstrings
-    "D100", # Missing module docstring
-    "D104", # Missing public package docstring
-    "D105", # Missing docstring in magic method
-    "D107", # Missing docstring in `__init__`
-]
-
-# Autofix settings
-fixable = ["ALL"]
-unfixable = ["F401"] # unused imports (should not disappear while editing)
-extend-safe-fixes = [
-    "D415",   # First line should end with a period, question mark, or exclamation point
-    "D300",   # Use triple double quotes `"""`
-    "D200",   # One-line docstring should fit on one line
-    "TCH",    # Format type checking only imports
-    "ISC001", # Implicitly concatenated strings on a single line
-    "EM",     # Exception message variables
-    "RUF013", # Implicit Optional
-    "B006",   # Mutable default argument
-]
-
-isort.known-first-party = ["deeprank2"]
-
-[tool.ruff.lint.per-file-ignores]
-"tests/*" = [
-    "S101",    # Use of `assert` detected
-    "PLR2004", # Magic value used in comparison
-    "D101",    # Missing class docstring
-    "D102",    # Missing docstring in public method
-    "D103",    # Missing docstring in public function
-    "SLF001",  # private member access
-]
-"docs/*" = ["ALL"]
-"tests/perf/*" = ["T201"] # Use of print statements