Skip to content

Commit a90c325

Browse files
authored
Merge branch 'dev' into 170_release_workflow_dbodor
2 parents 4e2964c + 592672a commit a90c325

File tree

12 files changed

+1006
-943
lines changed

12 files changed

+1006
-943
lines changed

.github/workflows/notebooks.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,11 @@ jobs:
5454
- name: Download the data for the tutorials
5555
shell: bash -l {0}
5656
run: |
57-
wget https://zenodo.org/records/8349335/files/data_raw.zip
57+
wget https://zenodo.org/records/13709906/files/data_raw.zip
5858
unzip data_raw.zip -d data_raw
5959
mv data_raw tutorials
60+
echo listing files in data_raw:
61+
ls tutorials/data_raw
6062
6163
- name: Run tutorial notebooks
6264
run: pytest --nbmake tutorials

.ruff.toml

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
target-version = "py310"
2+
output-format = "concise"
3+
line-length = 159
4+
5+
[lint]
6+
select = ["ALL"]
7+
pydocstyle.convention = "google" # docstring settings
8+
ignore = [
9+
# Unrealistic for this code base
10+
"PTH", # flake8-use-pathlib
11+
"N", # naming conventions
12+
"PLR0912", # Too many branches,
13+
"PLR0913", # Too many arguments in function definition
14+
"D102", # Missing docstring in public method
15+
# Unwanted
16+
"FBT", # Using boolean arguments
17+
"ANN101", # Missing type annotation for `self` in method
18+
"ANN102", # Missing type annotation for `cls` in classmethod
19+
"ANN204", # Missing return type annotation for special (dunder) method
20+
"B028", # No explicit `stacklevel` keyword argument found in warning
21+
"S105", # Possible hardcoded password
22+
"S311", # insecure random generators
23+
"PT011", # pytest-raises-too-broad
24+
"SIM108", # Use ternary operator
25+
# Unwanted docstrings
26+
"D100", # Missing module docstring
27+
"D104", # Missing public package docstring
28+
"D105", # Missing docstring in magic method
29+
"D107", # Missing docstring in `__init__`
30+
]
31+
32+
# Autofix settings
33+
fixable = ["ALL"]
34+
unfixable = ["F401"] # unused imports (should not disappear while editing)
35+
extend-safe-fixes = [
36+
"D415", # First line should end with a period, question mark, or exclamation point
37+
"D300", # Use triple double quotes `"""`
38+
"D200", # One-line docstring should fit on one line
39+
"TCH", # Format type checking only imports
40+
"ISC001", # Implicitly concatenated strings on a single line
41+
"EM", # Exception message variables
42+
"RUF013", # Implicit Optional
43+
"B006", # Mutable default argument
44+
]
45+
46+
isort.known-first-party = ["deeprank2"]
47+
48+
[lint.per-file-ignores]
49+
"tests/*" = [
50+
"S101", # Use of `assert` detected
51+
"PLR2004", # Magic value used in comparison
52+
"D101", # Missing class docstring
53+
"D102", # Missing docstring in public method
54+
"D103", # Missing docstring in public function
55+
"SLF001", # private member access
56+
]
57+
"docs/*" = ["ALL"]
58+
"tests/perf/*" = ["T201"] # Use of print statements
59+
"*.ipynb" = ["T201", "E402", "D103"]

Dockerfile

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,51 @@
11
# Pull base image
2-
FROM --platform=linux/x86_64 condaforge/miniforge3:23.3.1-1
2+
FROM --platform=linux/amd64 ubuntu:22.04
3+
4+
ARG MINIFORGE_NAME=Miniforge3
5+
ARG MINIFORGE_VERSION=24.3.0-0
6+
ARG TARGETPLATFORM
7+
8+
ENV CONDA_DIR=/opt/conda
9+
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
10+
ENV PATH=${CONDA_DIR}/bin:${PATH}
11+
12+
RUN \
13+
## Install apt dependencies
14+
apt-get update && \
15+
apt-get install --no-install-recommends --yes \
16+
wget bzip2 unzip ca-certificates \
17+
git && \
18+
## Download and install Miniforge
19+
wget --no-hsts --quiet https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/${MINIFORGE_NAME}-${MINIFORGE_VERSION}-Linux-x86_64.sh -O /tmp/miniforge.sh && \
20+
/bin/bash /tmp/miniforge.sh -b -p ${CONDA_DIR} && \
21+
rm /tmp/miniforge.sh && \
22+
echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> /etc/skel/.bashrc && \
23+
echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> ~/.bashrc
324

4-
# Add files
5-
ADD ./tutorials /home/deeprank2/tutorials
625
ADD ./env/deeprank2.yml /home/deeprank2
726

827
RUN \
9-
# Install dependencies and package
10-
apt update -y && \
11-
apt install unzip -y && \
12-
## GCC
13-
apt install -y gcc && \
1428
## Create the environment and install the dependencies
1529
mamba env create -f /home/deeprank2/deeprank2.yml && \
30+
conda install -n deeprank2 conda-forge::gcc && \
1631
## Activate the environment and install pip packages
17-
/opt/conda/bin/conda run -n deeprank2 pip install deeprank2 && \
32+
conda run -n deeprank2 pip install deeprank2 && \
1833
## Activate the environment automatically when entering the container
1934
echo "source activate deeprank2" >~/.bashrc && \
2035
# Get the data for running the tutorials
2136
if [ -d "/home/deeprank2/tutorials/data_raw" ]; then rm -Rf /home/deeprank2/tutorials/data_raw; fi && \
2237
if [ -d "/home/deeprank2/tutorials/data_processed" ]; then rm -Rf /home/deeprank2/tutorials/data_processed; fi && \
2338
wget https://zenodo.org/records/8349335/files/data_raw.zip && \
2439
unzip data_raw.zip -d data_raw && \
25-
mv data_raw /home/deeprank2/tutorials
40+
mv data_raw /home/deeprank2/tutorials && \
41+
apt-get clean && \
42+
rm -rf /var/lib/apt/lists/* && \
43+
conda clean --tarballs --index-cache --packages --yes && \
44+
find ${CONDA_DIR} -follow -type f -name '*.a' -delete && \
45+
find ${CONDA_DIR} -follow -type f -name '*.pyc' -delete && \
46+
conda clean --force-pkgs-dirs --all --yes
47+
48+
ADD ./tutorials /home/deeprank2/tutorials
2649

2750
ENV PATH /opt/conda/envs/deeprank2/bin:$PATH
2851

deeprank2/dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def _check_and_inherit_train( # noqa: C901
112112
for key in data["features_transform"].values():
113113
if key["transform"] is None:
114114
continue
115-
key["transform"] = eval(key["transform"]) # noqa: S307, PGH001
115+
key["transform"] = eval(key["transform"]) # noqa: S307
116116
except pickle.UnpicklingError as e:
117117
msg = "The path provided to `train_source` is not a valid DeepRank2 pre-trained model."
118118
raise ValueError(msg) from e
@@ -277,7 +277,7 @@ def _filter_targets(self, grp: h5py.Group) -> bool:
277277
for operator_string in [">", "<", "==", "<=", ">=", "!="]:
278278
operation = operation.replace(operator_string, f"{target_value}" + operator_string)
279279

280-
if not eval(operation): # noqa: S307, PGH001
280+
if not eval(operation): # noqa: S307
281281
return False
282282

283283
elif target_condition is not None:

deeprank2/query.py

Lines changed: 36 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import deeprank2.features
2323
from deeprank2.domain.aminoacidlist import convert_aa_nomenclature
2424
from deeprank2.features import components, conservation, contact
25-
from deeprank2.molstruct.residue import Residue, SingleResidueVariant
25+
from deeprank2.molstruct.residue import SingleResidueVariant
2626
from deeprank2.utils.buildgraph import get_contact_atoms, get_structure, get_surrounding_residues
2727
from deeprank2.utils.graph import Graph
2828
from deeprank2.utils.grid import Augmentation, GridSettings, MapMethod
@@ -265,12 +265,11 @@ def _build_helper(self) -> Graph:
265265
structure = self._load_structure()
266266

267267
# find the variant residue and its surroundings
268-
variant_residue: Residue = None
269268
for residue in structure.get_chain(self.variant_chain_id).residues:
270269
if residue.number == self.variant_residue_number and residue.insertion_code == self.insertion_code:
271270
variant_residue = residue
272271
break
273-
if variant_residue is None:
272+
else: # if break is not reached
274273
msg = f"Residue not found in {self.pdb_path}: {self.variant_chain_id} {self.residue_id}"
275274
raise ValueError(msg)
276275
self.variant = SingleResidueVariant(variant_residue, self.variant_amino_acid)
@@ -354,19 +353,12 @@ def _build_helper(self) -> Graph:
354353
raise ValueError(msg)
355354

356355
# build the graph
357-
if self.resolution == "atom":
358-
graph = Graph.build_graph(
359-
contact_atoms,
360-
self.get_query_id(),
361-
self.max_edge_length,
362-
)
363-
elif self.resolution == "residue":
364-
residues_selected = list({atom.residue for atom in contact_atoms})
365-
graph = Graph.build_graph(
366-
residues_selected,
367-
self.get_query_id(),
368-
self.max_edge_length,
369-
)
356+
nodes = contact_atoms if self.resolution == "atom" else list({atom.residue for atom in contact_atoms})
357+
graph = Graph.build_graph(
358+
nodes=nodes,
359+
graph_id=self.get_query_id(),
360+
max_edge_length=self.max_edge_length,
361+
)
370362

371363
graph.center = np.mean([atom.position for atom in contact_atoms], axis=0)
372364
structure = contact_atoms[0].residue.chain.model
@@ -453,7 +445,7 @@ def __iter__(self) -> Iterator[Query]:
453445
def __len__(self) -> int:
454446
return len(self._queries)
455447

456-
def _process_one_query(self, query: Query) -> None:
448+
def _process_one_query(self, query: Query, log_error_traceback: bool = False) -> None:
457449
"""Only one process may access an hdf5 file at a time."""
458450
try:
459451
output_path = f"{self._prefix}-{os.getpid()}.hdf5"
@@ -479,10 +471,12 @@ def _process_one_query(self, query: Query) -> None:
479471

480472
except (ValueError, AttributeError, KeyError, TimeoutError) as e:
481473
_log.warning(
482-
f"\nGraph/Query with ID {query.get_query_id()} ran into an Exception ({e.__class__.__name__}: {e}),"
483-
" and it has not been written to the hdf5 file. More details below:",
474+
f"Graph/Query with ID {query.get_query_id()} ran into an Exception and was not written to the hdf5 file.\n"
475+
f"Exception found: {e.__class__.__name__}: {e}.\n"
476+
"You may proceed with your analysis, but this query will be ignored.\n",
484477
)
485-
_log.exception(e)
478+
if log_error_traceback:
479+
_log.exception(f"----Full error traceback:----\n{e}")
486480

487481
def process(
488482
self,
@@ -493,6 +487,7 @@ def process(
493487
grid_settings: GridSettings | None = None,
494488
grid_map_method: MapMethod | None = None,
495489
grid_augmentation_count: int = 0,
490+
log_error_traceback: bool = False,
496491
) -> list[str]:
497492
"""Render queries into graphs (and optionally grids).
498493
@@ -510,6 +505,8 @@ def process(
510505
grid_settings: If valid together with `grid_map_method`, the grid data will be stored as well. Defaults to None.
511506
grid_map_method: If valid together with `grid_settings`, the grid data will be stored as well. Defaults to None.
512507
grid_augmentation_count: Number of grid data augmentations (must be >= 0). Defaults to 0.
508+
log_error_traceback: if True, logs full error message in case query fails. Otherwise only the error message is logged.
509+
Defaults to false.
513510
514511
Returns:
515512
The list of paths of the generated HDF5 files.
@@ -536,7 +533,7 @@ def process(
536533
self._grid_augmentation_count = grid_augmentation_count
537534

538535
_log.info(f"Creating pool function to process {len(self)} queries...")
539-
pool_function = partial(self._process_one_query)
536+
pool_function = partial(self._process_one_query, log_error_traceback=log_error_traceback)
540537
with Pool(self._cpu_count) as pool:
541538
_log.info("Starting pooling...\n")
542539
pool.map(pool_function, self.queries)
@@ -551,6 +548,24 @@ def process(
551548
os.remove(output_path)
552549
return glob(f"{prefix}.hdf5")
553550

551+
n_processed = 0
552+
for hdf5file in output_paths:
553+
with h5py.File(hdf5file, "r") as hdf5:
554+
# List of all graphs in hdf5, each graph representing
555+
# a SRV and its sourrouding environment
556+
n_processed += len(list(hdf5.keys()))
557+
558+
if not n_processed:
559+
msg = "No queries have been processed."
560+
raise ValueError(msg)
561+
if n_processed != len(self.queries):
562+
_log.warning(
563+
f"Not all queries have been processed. You can proceed with the analysis of {n_processed}/{len(self.queries)} queries.\n"
564+
"Set `log_error_traceback` to True for advanced troubleshooting.",
565+
)
566+
else:
567+
_log.info(f"{n_processed} queries have been processed.")
568+
554569
return output_paths
555570

556571
def _set_feature_modules(self, feature_modules: list[ModuleType, str] | ModuleType | str) -> list[str]:

pyproject.toml

Lines changed: 0 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -89,62 +89,3 @@ include = ["deeprank2*"]
8989
[tool.pytest.ini_options]
9090
# pytest options: -ra: show summary info for all test outcomes
9191
addopts = "-ra"
92-
93-
[tool.ruff]
94-
output-format = "concise"
95-
line-length = 159
96-
97-
[tool.ruff.lint]
98-
select = ["ALL"]
99-
pydocstyle.convention = "google" # docstring settings
100-
ignore = [
101-
# Unrealistic for this code base
102-
"PTH", # flake8-use-pathlib
103-
"N", # naming conventions
104-
"PLR0912", # Too many branches,
105-
"PLR0913", # Too many arguments in function definition
106-
"D102", # Missing docstring in public method
107-
# Unwanted
108-
"FBT", # Using boolean arguments
109-
"ANN101", # Missing type annotation for `self` in method
110-
"ANN102", # Missing type annotation for `cls` in classmethod
111-
"ANN204", # Missing return type annotation for special (dunder) method
112-
"B028", # No explicit `stacklevel` keyword argument found in warning
113-
"S105", # Possible hardcoded password
114-
"S311", # insecure random generators
115-
"PT011", # pytest-raises-too-broad
116-
"SIM108", # Use ternary operator
117-
# Unwanted docstrings
118-
"D100", # Missing module docstring
119-
"D104", # Missing public package docstring
120-
"D105", # Missing docstring in magic method
121-
"D107", # Missing docstring in `__init__`
122-
]
123-
124-
# Autofix settings
125-
fixable = ["ALL"]
126-
unfixable = ["F401"] # unused imports (should not disappear while editing)
127-
extend-safe-fixes = [
128-
"D415", # First line should end with a period, question mark, or exclamation point
129-
"D300", # Use triple double quotes `"""`
130-
"D200", # One-line docstring should fit on one line
131-
"TCH", # Format type checking only imports
132-
"ISC001", # Implicitly concatenated strings on a single line
133-
"EM", # Exception message variables
134-
"RUF013", # Implicit Optional
135-
"B006", # Mutable default argument
136-
]
137-
138-
isort.known-first-party = ["deeprank2"]
139-
140-
[tool.ruff.lint.per-file-ignores]
141-
"tests/*" = [
142-
"S101", # Use of `assert` detected
143-
"PLR2004", # Magic value used in comparison
144-
"D101", # Missing class docstring
145-
"D102", # Missing docstring in public method
146-
"D103", # Missing docstring in public function
147-
"SLF001", # private member access
148-
]
149-
"docs/*" = ["ALL"]
150-
"tests/perf/*" = ["T201"] # Use of print statements

0 commit comments

Comments
 (0)