-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
/
Copy pathvalidate_min_versions_in_sync.py
executable file
·300 lines (260 loc) · 10.4 KB
/
validate_min_versions_in_sync.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
#!/usr/bin/env python3
"""
Check pandas required and optional dependencies are synced across:
ci/deps/actions-.*-minimum_versions.yaml
pandas/compat/_optional.py
setup.cfg
TODO: doc/source/getting_started/install.rst
This is meant to be run as a pre-commit hook - to run it manually, you can do:
pre-commit run validate-min-versions-in-sync --all-files
"""
from __future__ import annotations
import pathlib
import sys
import yaml
if sys.version_info >= (3, 11):
import tomllib
else:
import tomli as tomllib
from typing import Any
from scripts.generate_pip_deps_from_conda import CONDA_TO_PIP
DOC_PATH = pathlib.Path("doc/source/getting_started/install.rst").resolve()
CI_PATH = next(
pathlib.Path("ci/deps").absolute().glob("actions-*-minimum_versions.yaml")
)
CODE_PATH = pathlib.Path("pandas/compat/_optional.py").resolve()
SETUP_PATH = pathlib.Path("pyproject.toml").resolve()
YAML_PATH = pathlib.Path("ci/deps")
ENV_PATH = pathlib.Path("environment.yml")
EXCLUDE_DEPS = {"tzdata", "blosc", "pandas-gbq", "pyqt", "pyqt5"}
EXCLUSION_LIST = frozenset(["python=3.8[build=*_pypy]"])
# pandas package is not available
# in pre-commit environment
sys.path.append("pandas/compat")
sys.path.append("pandas/util")
import _exceptions
import version
sys.modules["pandas.util.version"] = version
sys.modules["pandas.util._exceptions"] = _exceptions
import _optional
def pin_min_versions_to_ci_deps() -> int:
"""
Pin minimum versions to CI dependencies.
Pip dependencies are not pinned.
"""
all_yaml_files = list(YAML_PATH.iterdir())
all_yaml_files.append(ENV_PATH)
toml_dependencies = {}
with open(SETUP_PATH, "rb") as toml_f:
toml_dependencies = tomllib.load(toml_f)
ret = 0
for curr_file in all_yaml_files:
with open(curr_file, encoding="utf-8") as yaml_f:
yaml_start_data = yaml_f.read()
yaml_file = yaml.safe_load(yaml_start_data)
yaml_dependencies = yaml_file["dependencies"]
yaml_map = get_yaml_map_from(yaml_dependencies)
toml_map = get_toml_map_from(toml_dependencies)
yaml_result_data = pin_min_versions_to_yaml_file(
yaml_map, toml_map, yaml_start_data
)
if yaml_result_data != yaml_start_data:
with open(curr_file, "w", encoding="utf-8") as f:
f.write(yaml_result_data)
ret |= 1
return ret
def get_toml_map_from(toml_dic: dict[str, Any]) -> dict[str, str]:
toml_deps = {}
toml_dependencies = set(toml_dic["project"]["optional-dependencies"]["all"])
for dependency in toml_dependencies:
toml_package, toml_version = dependency.strip().split(">=")
toml_deps[toml_package] = toml_version
return toml_deps
def get_operator_from(dependency: str) -> str | None:
if "<=" in dependency:
operator = "<="
elif ">=" in dependency:
operator = ">="
elif "=" in dependency:
operator = "="
elif ">" in dependency:
operator = ">"
elif "<" in dependency:
operator = "<"
else:
operator = None
return operator
def get_yaml_map_from(
yaml_dic: list[str | dict[str, list[str]]],
) -> dict[str, list[str] | None]:
yaml_map: dict[str, list[str] | None] = {}
for dependency in yaml_dic:
if (
isinstance(dependency, dict)
or dependency in EXCLUSION_LIST
or dependency in yaml_map
):
continue
search_text = str(dependency)
operator = get_operator_from(search_text)
if "," in dependency:
yaml_dependency, yaml_version1 = search_text.split(",")
operator = get_operator_from(yaml_dependency)
assert operator is not None
yaml_package, yaml_version2 = yaml_dependency.split(operator)
yaml_version2 = operator + yaml_version2
yaml_map[yaml_package] = [yaml_version1, yaml_version2]
elif "[build=*_pypy]" in dependency:
search_text = search_text.replace("[build=*_pypy]", "")
yaml_package, yaml_version = search_text.split(operator)
yaml_version = operator + yaml_version
yaml_map[yaml_package] = [yaml_version]
elif operator is not None:
yaml_package, yaml_version = search_text.split(operator)
yaml_version = operator + yaml_version
yaml_map[yaml_package] = [yaml_version]
else:
yaml_package, yaml_version = search_text.strip(), None
yaml_map[yaml_package] = yaml_version
return yaml_map
def clean_version_list(
yaml_versions: list[str], toml_version: version.Version
) -> list[str]:
for i in range(len(yaml_versions)):
yaml_version = yaml_versions[i].strip()
operator = get_operator_from(yaml_version)
assert operator is not None
if "<=" in operator or ">=" in operator:
yaml_version = yaml_version[2:]
else:
yaml_version = yaml_version[1:]
yaml_version = version.parse(yaml_version)
if yaml_version < toml_version:
yaml_versions[i] = "-" + str(yaml_version)
elif yaml_version >= toml_version:
if ">" in operator:
yaml_versions[i] = "-" + str(yaml_version)
return yaml_versions
def pin_min_versions_to_yaml_file(
yaml_map: dict[str, list[str] | None], toml_map: dict[str, str], yaml_file_data: str
) -> str:
data = yaml_file_data
for yaml_package, yaml_versions in yaml_map.items():
if yaml_package in EXCLUSION_LIST:
continue
old_dep = yaml_package
if yaml_versions is not None:
old_dep = old_dep + ", ".join(yaml_versions)
if CONDA_TO_PIP.get(yaml_package, yaml_package) in toml_map:
min_dep = toml_map[CONDA_TO_PIP.get(yaml_package, yaml_package)]
elif yaml_package in toml_map:
min_dep = toml_map[yaml_package]
else:
continue
if yaml_versions is None:
new_dep = old_dep + ">=" + min_dep
data = data.replace(old_dep, new_dep, 1)
continue
toml_version = version.parse(min_dep)
yaml_versions_list = clean_version_list(yaml_versions, toml_version)
cleaned_yaml_versions = [x for x in yaml_versions_list if "-" not in x]
new_dep = yaml_package
for clean_yaml_version in cleaned_yaml_versions:
new_dep += clean_yaml_version + ", "
operator = get_operator_from(new_dep)
if operator != "=":
new_dep += ">=" + min_dep
else:
new_dep = new_dep[:-2]
data = data.replace(old_dep, new_dep)
return data
def get_versions_from_code() -> dict[str, str]:
"""Min versions for checking within pandas code."""
install_map = _optional.INSTALL_MAPPING
inverse_install_map = {v: k for k, v in install_map.items()}
versions = _optional.VERSIONS
for item in EXCLUDE_DEPS:
item = inverse_install_map.get(item, item)
versions.pop(item, None)
return {install_map.get(k, k).casefold(): v for k, v in versions.items()}
def get_versions_from_ci(content: list[str]) -> tuple[dict[str, str], dict[str, str]]:
"""Min versions in CI job for testing all optional dependencies."""
# Don't parse with pyyaml because it ignores comments we're looking for
seen_required = False
seen_optional = False
seen_test = False
required_deps = {}
optional_deps = {}
for line in content:
if "# test dependencies" in line:
seen_test = True
elif seen_test and "- pytest>=" in line:
# Only grab pytest
package, version = line.strip().split(">=")
package = package[2:]
optional_deps[package.casefold()] = version
elif "# required dependencies" in line:
seen_required = True
elif "# optional dependencies" in line:
seen_optional = True
elif "- pip:" in line:
continue
elif seen_required and line.strip():
if "==" in line:
package, version = line.strip().split("==", maxsplit=1)
else:
package, version = line.strip().split("=", maxsplit=1)
package = package.split()[-1]
if package in EXCLUDE_DEPS:
continue
if not seen_optional:
required_deps[package.casefold()] = version
else:
optional_deps[package.casefold()] = version
return required_deps, optional_deps
def get_versions_from_toml() -> dict[str, str]:
"""Min versions in pyproject.toml for pip install pandas[extra]."""
install_map = _optional.INSTALL_MAPPING
optional_dependencies = {}
with open(SETUP_PATH, "rb") as pyproject_f:
pyproject_toml = tomllib.load(pyproject_f)
opt_deps = pyproject_toml["project"]["optional-dependencies"]
dependencies = set(opt_deps["all"])
# remove pytest plugin dependencies
pytest_plugins = {dep for dep in opt_deps["test"] if dep.startswith("pytest-")}
dependencies = dependencies.difference(pytest_plugins)
for dependency in dependencies:
package, version = dependency.strip().split(">=")
optional_dependencies[install_map.get(package, package).casefold()] = version
for item in EXCLUDE_DEPS:
optional_dependencies.pop(item, None)
return optional_dependencies
def main() -> int:
ret = 0
ret |= pin_min_versions_to_ci_deps()
with open(CI_PATH, encoding="utf-8") as f:
_, ci_optional = get_versions_from_ci(f.readlines())
code_optional = get_versions_from_code()
setup_optional = get_versions_from_toml()
diff = (ci_optional.items() | code_optional.items() | setup_optional.items()) - (
ci_optional.items() & code_optional.items() & setup_optional.items()
)
if diff:
packages = {package for package, _ in diff}
out = sys.stdout
out.write(
f"The follow minimum version differences were found between "
f"{CI_PATH}, {CODE_PATH} AND {SETUP_PATH}. "
f"Please ensure these are aligned: \n\n"
)
for package in packages:
out.write(
f"{package}\n"
f"{CI_PATH}: {ci_optional.get(package, 'Not specified')}\n"
f"{CODE_PATH}: {code_optional.get(package, 'Not specified')}\n"
f"{SETUP_PATH}: {setup_optional.get(package, 'Not specified')}\n\n"
)
ret |= 1
return ret
if __name__ == "__main__":
sys.exit(main())