Skip to content

Commit abf65c7

Browse files
Merge pull request #4089 from aboutcode-org/update-conda-assembly
Update package assembly in conda installations
2 parents f321b4d + 32f8797 commit abf65c7

File tree

15 files changed

+1357
-5
lines changed

15 files changed

+1357
-5
lines changed

azure-pipelines.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,8 @@ jobs:
233233

234234
- template: etc/ci/azure-posix.yml
235235
parameters:
236-
job_name: macos12_cpython_latest_from_pip
237-
image_name: macos-12
236+
job_name: macos14_cpython_latest_from_pip
237+
image_name: macos-14
238238
python_versions: ['3.9', '3.10', '3.11', '3.12']
239239
test_suites:
240240
all: venv/bin/pip install --upgrade-strategy eager --force-reinstall --upgrade -e .[testing] && venv/bin/pytest -n 2 -vvs tests/scancode/test_cli.py

src/packagedcode/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
cocoapods.PodfileLockHandler,
7474
cocoapods.PodfileHandler,
7575

76+
conda.CondaMetaJsonHandler,
7677
conda.CondaMetaYamlHandler,
7778
conda.CondaYamlHandler,
7879

src/packagedcode/conda.py

Lines changed: 333 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#
99

1010
import io
11+
import json
1112

1213
import saneyaml
1314
from packageurl import PackageURL
@@ -23,7 +24,333 @@
2324
See https://repo.continuum.io/pkgs/free for examples.
2425
"""
2526

26-
# TODO: there are likely other package data files for Conda
27+
28+
class CondaBaseHandler(models.DatafileHandler):
29+
"""
30+
Assemble package data and files present in conda manifests present in the
31+
usual structure of a conda installation. Here the manifests which are
32+
assembled together are:
33+
- Conda metadata JSON (CondaMetaJsonHandler)
34+
- Conda meta.yaml recipe (CondaMetaYamlHandler)
35+
36+
Example paths for these manifests:
37+
/opt/conda/conda-meta/requests-2.32.3-py312h06a4308_1.json
38+
/opt/conda/pkgs/requests-2.32.3-py312h06a4308_1/info/recipe/meta.yaml
39+
"""
40+
41+
@classmethod
42+
def assemble(cls, package_data, resource, codebase, package_adder=models.add_to_package):
43+
44+
if codebase.has_single_resource:
45+
yield from models.DatafileHandler.assemble(package_data, resource, codebase, package_adder)
46+
return
47+
48+
# We do not have any package data detected here
49+
if not resource.package_data:
50+
return
51+
52+
# If this is a Conda meta.yaml, try to find the corresponding metadata JSON
53+
# and if present, run assembly on the metadata resource
54+
if CondaMetaYamlHandler.is_datafile(resource.location):
55+
conda_meta_json = cls.find_conda_meta_json_resource(resource, codebase)
56+
if conda_meta_json:
57+
package_data_meta_json, = conda_meta_json.package_data
58+
yield from cls.assemble(
59+
package_data=package_data_meta_json,
60+
resource=conda_meta_json,
61+
codebase=codebase,
62+
package_adder=package_adder,
63+
)
64+
65+
# corresponding metadata JSON does not exist, so handle this meta.yaml
66+
else:
67+
yield from cls.assemble_from_meta_yaml_only(
68+
package_data=package_data,
69+
resource=resource,
70+
codebase=codebase,
71+
package_adder=package_adder,
72+
)
73+
74+
return
75+
76+
if not package_data.purl:
77+
yield resource
78+
return
79+
80+
package = models.Package.from_package_data(
81+
package_data=package_data,
82+
datafile_path=resource.path,
83+
)
84+
yield from cls.get_and_assmeble_from_meta_yaml(
85+
package=package,
86+
resource=resource,
87+
codebase=codebase,
88+
package_adder=package_adder,
89+
)
90+
91+
package.populate_license_fields()
92+
yield package
93+
94+
cls.assign_package_to_resources(
95+
package=package,
96+
resource=resource,
97+
codebase=codebase,
98+
package_adder=package_adder,
99+
)
100+
101+
# we yield this as we do not want this further processed
102+
yield resource
103+
104+
cls.assign_packages_to_resources_from_metadata_json(
105+
package=package,
106+
package_data=package_data,
107+
resource=resource,
108+
codebase=codebase,
109+
package_adder=package_adder,
110+
)
111+
112+
@classmethod
113+
def assign_packages_to_resources_from_metadata_json(
114+
cls,
115+
package,
116+
package_data,
117+
resource,
118+
codebase,
119+
package_adder=models.add_to_package,
120+
):
121+
"""
122+
Get the file paths present in the `package_data` of a metadata JSON `resource`
123+
and assign them to the `package` created from the manifest.
124+
"""
125+
extracted_package_dir = package_data.extra_data.get('extracted_package_dir')
126+
files = package_data.extra_data.get('files')
127+
128+
if not extracted_package_dir or not files:
129+
return
130+
131+
conda_metadata_dir = resource.parent(codebase)
132+
if not conda_metadata_dir:
133+
return
134+
135+
conda_root_dir = conda_metadata_dir.parent(codebase)
136+
if not conda_root_dir:
137+
return
138+
139+
root_path_segment, _, package_dir = extracted_package_dir.rpartition("/pkgs/")
140+
if not conda_root_dir.path.endswith(root_path_segment):
141+
return
142+
143+
package_dir_path = f"{conda_root_dir.path}/pkgs/{package_dir}"
144+
package_dir_resource = codebase.get_resource(path=package_dir_path)
145+
if package_dir_resource:
146+
cls.assign_package_to_resources(
147+
package=package,
148+
resource=package_dir_resource,
149+
codebase=codebase,
150+
package_adder=package_adder,
151+
)
152+
153+
conda_package_path = f"{conda_root_dir.path}/pkgs/{package_dir}.conda"
154+
conda_package_resource = codebase.get_resource(path=conda_package_path)
155+
if conda_package_resource:
156+
cls.assign_package_to_resources(
157+
package=package,
158+
resource=conda_package_resource,
159+
codebase=codebase,
160+
package_adder=package_adder,
161+
)
162+
163+
for file_path in files:
164+
full_file_path = f"{conda_root_dir.path}/{file_path}"
165+
file_resource = codebase.get_resource(path=full_file_path)
166+
if file_resource:
167+
cls.assign_package_to_resources(
168+
package=package,
169+
resource=file_resource,
170+
codebase=codebase,
171+
package_adder=package_adder,
172+
)
173+
174+
@classmethod
175+
def get_and_assmeble_from_meta_yaml(cls, package, resource, codebase, package_adder=models.add_to_package):
176+
"""
177+
For a conda metadata JSON `resource`, try to find the corresponding meta.yaml and
178+
update the `package` from it. Also yield dependencies present in the meta.yaml,
179+
and the `resource` to complete assembling from this manifest.
180+
"""
181+
conda_meta_yaml = cls.find_conda_meta_yaml_resource(resource, codebase)
182+
183+
if conda_meta_yaml:
184+
conda_meta_yaml_package_data, = conda_meta_yaml.package_data
185+
package.update(
186+
package_data=conda_meta_yaml_package_data,
187+
datafile_path=conda_meta_yaml.path,
188+
)
189+
cls.assign_package_to_resources(
190+
package=package,
191+
resource=conda_meta_yaml,
192+
codebase=codebase,
193+
package_adder=package_adder,
194+
)
195+
meta_yaml_package_data = models.PackageData.from_dict(conda_meta_yaml_package_data)
196+
if meta_yaml_package_data.dependencies:
197+
yield from models.Dependency.from_dependent_packages(
198+
dependent_packages=meta_yaml_package_data.dependencies,
199+
datafile_path=conda_meta_yaml.path,
200+
datasource_id=meta_yaml_package_data.datasource_id,
201+
package_uid=package.package_uid,
202+
)
203+
204+
yield conda_meta_yaml
205+
206+
@classmethod
207+
def assemble_from_meta_yaml_only(cls, package_data, resource, codebase, package_adder=models.add_to_package):
208+
"""
209+
Assemble and yeild package, dependencies and the meta YAML `resource` from
210+
it's `package_data`, and also assign resources to the package.
211+
"""
212+
if not package_data.purl:
213+
return
214+
215+
package = models.Package.from_package_data(
216+
package_data=package_data,
217+
datafile_path=resource.path,
218+
)
219+
package.populate_license_fields()
220+
yield package
221+
222+
dependent_packages = package_data.dependencies
223+
if dependent_packages:
224+
yield from models.Dependency.from_dependent_packages(
225+
dependent_packages=dependent_packages,
226+
datafile_path=resource.path,
227+
datasource_id=package_data.datasource_id,
228+
package_uid=package.package_uid,
229+
)
230+
231+
CondaMetaYamlHandler.assign_package_to_resources(
232+
package=package,
233+
resource=resource,
234+
codebase=codebase,
235+
package_adder=package_adder,
236+
)
237+
yield resource
238+
239+
@classmethod
240+
def check_valid_packages_dir_name(cls, package_dir_resource, resource, codebase):
241+
"""
242+
Return the name of the `package_dir_resource`, if it is valid, i.e.
243+
the package (name, version) data present in `resource` matches the
244+
directory name, and the package directory is present in it's usual
245+
location in a conda installation.
246+
"""
247+
package_dir_parent = package_dir_resource.parent(codebase)
248+
249+
meta_yaml_package_data, = resource.package_data
250+
name = meta_yaml_package_data.get("name")
251+
version = meta_yaml_package_data.get("version")
252+
if f"{name}-{version}" in package_dir_resource.name and (
253+
package_dir_parent and "pkgs" in package_dir_parent.name
254+
):
255+
return package_dir_resource.name
256+
257+
@classmethod
258+
def find_conda_meta_json_resource(cls, resource, codebase):
259+
"""
260+
Given a resource for a conda meta.yaml resource, find if it has any
261+
corresponding metadata JSON located inside the conda-meta/ directory,
262+
and return the resource if they exist, else return None.
263+
"""
264+
package_dir_resource = CondaMetaYamlHandler.get_conda_root(resource, codebase)
265+
if not package_dir_resource or not resource.package_data:
266+
return
267+
268+
package_dir_name = cls.check_valid_packages_dir_name(
269+
package_dir_resource=package_dir_resource,
270+
resource=resource,
271+
codebase=codebase,
272+
)
273+
if not package_dir_name:
274+
return
275+
276+
root_resource = package_dir_resource.parent(codebase).parent(codebase)
277+
if not root_resource:
278+
return
279+
280+
root_resource_path = root_resource.path
281+
conda_meta_path = f"{root_resource_path}/conda-meta/{package_dir_name}.json"
282+
conda_meta_resource = codebase.get_resource(path=conda_meta_path)
283+
284+
if conda_meta_resource and conda_meta_resource.package_data:
285+
return conda_meta_resource
286+
287+
@classmethod
288+
def find_conda_meta_yaml_resource(cls, resource, codebase):
289+
"""
290+
Given a resource for a metadata JSON located inside the conda-meta/
291+
directory, find if it has any corresponding conda meta.yaml, and return
292+
the resource if they exist, else return None.
293+
"""
294+
package_dir_name, _json, _ = resource.name.rpartition(".json")
295+
parent_resource = resource.parent(codebase)
296+
if not parent_resource and not parent_resource.name == "conda-meta":
297+
return
298+
299+
root_resource = parent_resource.parent(codebase)
300+
if not root_resource:
301+
return
302+
303+
root_resource_path = root_resource.path
304+
package_dir_path = f"{root_resource_path}/pkgs/{package_dir_name}/"
305+
package_dir_resource = codebase.get_resource(path=package_dir_path)
306+
if not package_dir_resource:
307+
return
308+
309+
meta_yaml_path = f"{package_dir_path}info/recipe/meta.yaml"
310+
meta_yaml_resource = codebase.get_resource(path=meta_yaml_path)
311+
if meta_yaml_resource and meta_yaml_resource.package_data:
312+
return meta_yaml_resource
313+
314+
315+
class CondaMetaJsonHandler(CondaBaseHandler):
316+
datasource_id = 'conda_meta_json'
317+
path_patterns = ('*conda-meta/*.json',)
318+
default_package_type = 'conda'
319+
default_primary_language = 'Python'
320+
description = 'Conda metadata JSON in rootfs'
321+
documentation_url = 'https://docs.conda.io/'
322+
323+
@classmethod
324+
def parse(cls, location, package_only=False):
325+
with io.open(location, encoding='utf-8') as loc:
326+
conda_metadata = json.load(loc)
327+
328+
name = conda_metadata.get('name')
329+
version = conda_metadata.get('version')
330+
extracted_license_statement = conda_metadata.get('license')
331+
download_url = conda_metadata.get('url')
332+
333+
extra_data_fields = ['requested_spec', 'channel']
334+
package_file_fields = ['extracted_package_dir', 'files', 'package_tarball_full_path']
335+
other_package_fields = ['size', 'md5', 'sha256']
336+
337+
extra_data = {}
338+
for metadata_field in extra_data_fields + package_file_fields:
339+
extra_data[metadata_field] = conda_metadata.get(metadata_field)
340+
341+
package_data = dict(
342+
datasource_id=cls.datasource_id,
343+
type=cls.default_package_type,
344+
name=name,
345+
version=version,
346+
extracted_license_statement=extracted_license_statement,
347+
download_url=download_url,
348+
extra_data=extra_data,
349+
)
350+
for package_field in other_package_fields:
351+
package_data[package_field] = conda_metadata.get(package_field)
352+
yield models.PackageData.from_data(package_data, package_only)
353+
27354

28355
class CondaYamlHandler(BaseDependencyFileHandler):
29356
datasource_id = 'conda_yaml'
@@ -55,7 +382,7 @@ def parse(cls, location, package_only=False):
55382
yield models.PackageData.from_data(package_data, package_only)
56383

57384

58-
class CondaMetaYamlHandler(models.DatafileHandler):
385+
class CondaMetaYamlHandler(CondaBaseHandler):
59386
datasource_id = 'conda_meta_yaml'
60387
default_package_type = 'conda'
61388
path_patterns = ('*/meta.yaml',)
@@ -67,13 +394,17 @@ def get_conda_root(cls, resource, codebase):
67394
"""
68395
Return a root Resource given a meta.yaml ``resource``.
69396
"""
397+
if not resource:
398+
return
399+
70400
# the root is either the parent or further up for yaml stored under
71401
# an "info" dir. We support extractcode extraction.
72402
# in a source repo it would be in <repo>/conda.recipe/meta.yaml
73403
paths = (
74404
'info/recipe.tar-extract/recipe/meta.yaml',
75405
'info/recipe/recipe/meta.yaml',
76406
'conda.recipe/meta.yaml',
407+
'info/recipe/meta.yaml',
77408
)
78409
res = resource
79410
for pth in paths:

0 commit comments

Comments
 (0)