Skip to content

Commit 966e386

Browse files
authored
Merge branch 'master' into 71-joss-paper
2 parents edb9429 + d8a705a commit 966e386

File tree

12 files changed

+119
-21
lines changed

12 files changed

+119
-21
lines changed

Diff for: .github/workflows/ci.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -29,24 +29,24 @@ jobs:
2929
fail-fast: false
3030
matrix:
3131
os: ["ubuntu-latest", "macOS-latest", "windows-latest"]
32-
python-version: ["3.8", "3.9", "3.10", "3.11"]
32+
python-version: ["3.9", "3.10", "3.11", "3.12"]
3333
# Only test lowest and highest version on the expensive/slow
3434
# macOS and windows runners (UPDATE when supported versions change):
3535
exclude:
3636
- os: macOS-latest
3737
python-version: 3.10
3838
- os: macOS-latest
39-
python-version: 3.9
39+
python-version: 3.11
4040
- os: windows-latest
4141
python-version: 3.10
4242
- os: windows-latest
43-
python-version: 3.9
43+
python-version: 3.11
4444

4545
steps:
4646
- uses: actions/checkout@v3
4747

4848
# More info on options: https://github.com/conda-incubator/setup-miniconda
49-
- uses: mamba-org/provision-with-micromamba@main
49+
- uses: mamba-org/setup-micromamba@main
5050
with:
5151
environment-file: devtools/conda-envs/test_env.yaml
5252
environment-name: test

Diff for: CHANGES

+11-2
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,22 @@ The rules for this file:
1414

1515
------------------------------------------------------------------------------
1616

17-
*/*/2023 hl2500
17+
*/*/2023 hl2500, xiki-tempula
1818

1919
* 2.2.0
20-
20+
21+
Changes
22+
- Require pandas >= 2.1 (PR #340)
23+
- For pandas>=2.1, metadata will be loaded from the parquet file (issue #331, PR #340).
24+
- add support for Python 3.12, remove Python 3.8 support (issue #341, PR #304).
25+
2126
Enhancements
2227
- Add a TI estimator using gaussian quadrature to calculate the free energy.
2328
(issue #302, PR #304)
29+
- Warning issued when the series is `None` for `statistical_inefficiency`
30+
(issue #337, PR #338)
31+
- ValueError issued when `df` and `series` for `statistical_inefficiency`
32+
doesn't have the same length (issue #337, PR #338)
2433

2534

2635
22/06/2023 xiki-tempula

Diff for: devtools/conda-envs/test_env.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ channels:
44
dependencies:
55
- python
66
- numpy
7-
- pandas
7+
- pandas>=2.1
88
- pymbar>=4
99
- scipy
1010
- scikit-learn

Diff for: environment.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@ channels:
44
dependencies:
55
- python
66
- numpy
7-
- pandas
7+
- pandas>=2.1
88
- pymbar>=4
99
- scipy
1010
- scikit-learn
1111
- pyarrow
1212
- matplotlib
13+
- loguru

Diff for: readthedocs.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ build:
1313
python: "mambaforge-4.10"
1414

1515
conda:
16-
environment: environment.yml
16+
environment: devtools/conda-envs/test_env.yaml
1717

1818
python:
1919
install:

Diff for: setup.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@
2929
"Operating System :: Microsoft :: Windows ",
3030
"Programming Language :: Python",
3131
"Programming Language :: Python :: 3",
32-
"Programming Language :: Python :: 3.8",
3332
"Programming Language :: Python :: 3.9",
3433
"Programming Language :: Python :: 3.10",
3534
"Programming Language :: Python :: 3.11",
35+
"Programming Language :: Python :: 3.12",
3636
"Topic :: Scientific/Engineering",
3737
"Topic :: Scientific/Engineering :: Bio-Informatics",
3838
"Topic :: Scientific/Engineering :: Chemistry",
@@ -43,11 +43,11 @@
4343
license="BSD",
4444
long_description=open("README.rst").read(),
4545
long_description_content_type="text/x-rst",
46-
python_requires=">=3.8",
46+
python_requires=">=3.9",
4747
tests_require=["pytest", "alchemtest"],
4848
install_requires=[
4949
"numpy",
50-
"pandas>=1.4",
50+
"pandas>=2.1",
5151
"pymbar>=4",
5252
"scipy",
5353
"scikit-learn",

Diff for: src/alchemlyb/parsing/parquet.py

+36-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,42 @@
11
import pandas as pd
2+
from loguru import logger
23

34
from . import _init_attrs
45

56

6-
@_init_attrs
7+
def _read_parquet_with_metadata(path: str, T: float) -> pd.DataFrame:
8+
"""
9+
Check if the metadata is included in the Dataframe and has the correct
10+
temperature.
11+
12+
Parameters
13+
----------
14+
path : str
15+
Path to parquet file to extract dataframe from.
16+
T : float
17+
Temperature in Kelvin of the simulations.
18+
19+
Returns
20+
-------
21+
DataFrame
22+
"""
23+
df = pd.read_parquet(path)
24+
if "temperature" not in df.attrs:
25+
logger.warning(
26+
f"No temperature metadata found in {path}. "
27+
f"Serialise the Dataframe with pandas>=2.1 to preserve the metadata."
28+
)
29+
df.attrs["temperature"] = T
30+
df.attrs["energy_unit"] = "kT"
31+
else:
32+
if df.attrs["temperature"] != T:
33+
raise ValueError(
34+
f"Temperature in the input ({T}) doesn't match the temperature "
35+
f"in the dataframe ({df.attrs['temperature']})."
36+
)
37+
return df
38+
39+
740
def extract_u_nk(path, T):
841
r"""Return reduced potentials `u_nk` (unit: kT) from a pandas parquet file.
942
@@ -36,7 +69,7 @@ def extract_u_nk(path, T):
3669
.. versionadded:: 2.1.0
3770
3871
"""
39-
u_nk = pd.read_parquet(path)
72+
u_nk = _read_parquet_with_metadata(path, T)
4073
columns = list(u_nk.columns)
4174
if isinstance(columns[0], str) and columns[0][0] == "(":
4275
new_columns = []
@@ -81,4 +114,4 @@ def extract_dHdl(path, T):
81114
.. versionadded:: 2.1.0
82115
83116
"""
84-
return pd.read_parquet(path)
117+
return _read_parquet_with_metadata(path, T)

Diff for: src/alchemlyb/preprocessing/subsampling.py

+9
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,15 @@ def _prepare_input(df, series, drop_duplicates, sort):
363363
series : Series
364364
Formatted Series.
365365
"""
366+
if series is None:
367+
warnings.warn(
368+
"The series input is `None`, would not subsample according to statistical inefficiency."
369+
)
370+
371+
elif len(df) != len(series):
372+
raise ValueError(
373+
f"The length of df ({len(df)}) should be same as the length of series ({len(series)})."
374+
)
366375
if _check_multiple_times(df):
367376
if drop_duplicates:
368377
df, series = _drop_duplicates(df, series)

Diff for: src/alchemlyb/tests/conftest.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def gmx_ABFE():
8282

8383

8484
@pytest.fixture
85-
def gmx_ABFE_complex_n_uk(gmx_ABFE):
85+
def gmx_ABFE_complex_u_nk(gmx_ABFE):
8686
return [gmx.extract_u_nk(file, T=300) for file in gmx_ABFE["complex"]]
8787

8888

Diff for: src/alchemlyb/tests/parsing/test_parquet.py

+34-1
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,45 @@ def test_extract_dHdl(dHdl_list, request, tmp_path):
1212
new_dHdl = extract_dHdl(str(tmp_path / "dhdl.parquet"), T=300)
1313
assert (new_dHdl.columns == dHdl.columns).all()
1414
assert (new_dHdl.index == dHdl.index).all()
15+
assert new_dHdl.attrs["temperature"] == 300
16+
assert new_dHdl.attrs["energy_unit"] == "kT"
1517

1618

17-
@pytest.mark.parametrize("u_nk_list", ["gmx_benzene_VDW_u_nk", "gmx_ABFE_complex_n_uk"])
19+
@pytest.mark.parametrize("u_nk_list", ["gmx_benzene_VDW_u_nk", "gmx_ABFE_complex_u_nk"])
1820
def test_extract_dHdl(u_nk_list, request, tmp_path):
1921
u_nk = request.getfixturevalue(u_nk_list)[0]
2022
u_nk.to_parquet(path=str(tmp_path / "u_nk.parquet"), index=True)
2123
new_u_nk = extract_u_nk(str(tmp_path / "u_nk.parquet"), T=300)
2224
assert (new_u_nk.columns == u_nk.columns).all()
2325
assert (new_u_nk.index == u_nk.index).all()
26+
assert new_u_nk.attrs["temperature"] == 300
27+
assert new_u_nk.attrs["energy_unit"] == "kT"
28+
29+
30+
@pytest.fixture()
31+
def u_nk(gmx_ABFE_complex_u_nk):
32+
return gmx_ABFE_complex_u_nk[0]
33+
34+
35+
def test_no_T(u_nk, tmp_path, caplog):
36+
u_nk.attrs = {}
37+
u_nk.to_parquet(path=str(tmp_path / "temp.parquet"), index=True)
38+
extract_u_nk(str(tmp_path / "temp.parquet"), 300)
39+
assert (
40+
"Serialise the Dataframe with pandas>=2.1 to preserve the metadata."
41+
in caplog.text
42+
)
43+
44+
45+
def test_wrong_T(u_nk, tmp_path, caplog):
46+
u_nk.to_parquet(path=str(tmp_path / "temp.parquet"), index=True)
47+
with pytest.raises(ValueError, match="doesn't match the temperature"):
48+
extract_u_nk(str(tmp_path / "temp.parquet"), 400)
49+
50+
51+
def test_metadata_unchanged(u_nk, tmp_path):
52+
u_nk.attrs = {"temperature": 400, "energy_unit": "kcal/mol"}
53+
u_nk.to_parquet(path=str(tmp_path / "temp.parquet"), index=True)
54+
new_u_nk = extract_u_nk(str(tmp_path / "temp.parquet"), 400)
55+
assert new_u_nk.attrs["temperature"] == 400
56+
assert new_u_nk.attrs["energy_unit"] == "kcal/mol"

Diff for: src/alchemlyb/tests/test_preprocessing.py

+16-3
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ def u_nk(gmx_benzene_Coulomb_u_nk):
4141

4242

4343
@pytest.fixture()
44-
def multi_index_u_nk(gmx_ABFE_complex_n_uk):
45-
return gmx_ABFE_complex_n_uk[0]
44+
def multi_index_u_nk(gmx_ABFE_complex_u_nk):
45+
return gmx_ABFE_complex_u_nk[0]
4646

4747

4848
@pytest.fixture()
@@ -470,7 +470,7 @@ def test_decorrelate_dhdl_multiple_l(multi_index_dHdl):
470470
)
471471

472472

473-
def test_raise_non_uk(multi_index_dHdl):
473+
def test_raise_nou_nk(multi_index_dHdl):
474474
with pytest.raises(ValueError):
475475
decorrelate_u_nk(
476476
multi_index_dHdl,
@@ -544,3 +544,16 @@ def test_statistical_inefficiency(self, caplog, u_nk):
544544
assert "Running statistical inefficiency analysis." in caplog.text
545545
assert "Statistical inefficiency:" in caplog.text
546546
assert "Number of uncorrelated samples:" in caplog.text
547+
548+
549+
def test_unequil_input(dHdl):
550+
with pytest.raises(ValueError, match="should be same as the length of series"):
551+
statistical_inefficiency(dHdl, series=dHdl[:10])
552+
553+
554+
def test_series_none(dHdl):
555+
with pytest.warns(
556+
UserWarning,
557+
match="The series input is `None`, would not subsample according to statistical inefficiency.",
558+
):
559+
statistical_inefficiency(dHdl, series=None)

Diff for: src/alchemlyb/tests/test_workflow_ABFE.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ def test_single_estimator_ti(self, workflow, monkeypatch):
258258
summary = workflow.generate_result()
259259
assert np.isclose(summary["TI"]["Stages"]["TOTAL"], 21.51472826028906, 0.1)
260260

261-
def test_unprocessed_n_uk(self, workflow, monkeypatch):
261+
def test_unprocessed_u_nk(self, workflow, monkeypatch):
262262
monkeypatch.setattr(workflow, "u_nk_sample_list", None)
263263
monkeypatch.setattr(workflow, "estimator", dict())
264264
workflow.estimate()

0 commit comments

Comments
 (0)