Skip to content

Commit 9ad4834

Browse files
committed
Add support for .ABOUT files in InspectManifest #284
Signed-off-by: Thomas Druez <[email protected]>
1 parent c6511e2 commit 9ad4834

9 files changed

+166
-46
lines changed

CHANGELOG.rst

+5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ Changelog
44
v31.1.0 (unreleased)
55
--------------------
66

7+
- Add a new "inspect manifest" pipeline to resolve packages from manifest, lockfile,
8+
and SBOM. The resolved packages are created as discovered packages.
9+
Support PyPI "requirements.txt" files and AboutCode ".ABOUT" files
10+
https://github.com/nexB/scancode.io/issues/284
11+
712
- Add a new "check vulnerabilities" pipeline to lookup vulnerabilities in the
813
VulnerableCode database for all project discovered packages.
914
Vulnerability data is stored in the extra_data field of each package.

docs/scanpipe-pipes.rst

+15-5
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,6 @@ Docker
2323
.. automodule:: scanpipe.pipes.docker
2424
:members:
2525

26-
Windows
27-
-------
28-
.. automodule:: scanpipe.pipes.windows
29-
:members:
3026

3127
Fetch
3228
-----
@@ -45,13 +41,27 @@ Output
4541
:members:
4642
:exclude-members: JSONResultsGenerator
4743

44+
Resolve
45+
-------
46+
.. automodule:: scanpipe.pipes.resolve
47+
:members:
48+
4849
RootFS
4950
------
50-
5151
.. automodule:: scanpipe.pipes.rootfs
5252
:members:
5353

5454
ScanCode
5555
--------
5656
.. automodule:: scanpipe.pipes.scancode
5757
:members:
58+
59+
VulnerableCode
60+
--------------
61+
.. automodule:: scanpipe.pipes.vulnerablecode
62+
:members:
63+
64+
Windows
65+
-------
66+
.. automodule:: scanpipe.pipes.windows
67+
:members:

scanpipe/models.py

+11
Original file line numberDiff line numberDiff line change
@@ -2318,6 +2318,17 @@ def create_from_data(cls, project, package_data):
23182318
discovered_package.save(save_error=False, capture_exception=False)
23192319
return discovered_package
23202320

2321+
@classmethod
2322+
def clean_data(cls, data, include_none=False):
2323+
"""
2324+
Returns the `data` dict keeping only entries for fields available in the model.
2325+
"""
2326+
return {
2327+
field_name: value
2328+
for field_name, value in data.items()
2329+
if field_name in cls.model_fields() # and value
2330+
}
2331+
23212332
@property
23222333
def spdx_id(self):
23232334
return f"SPDXRef-scancodeio-{self._meta.model_name}-{self.uuid}"

scanpipe/pipelines/inspect_manifest.py

+11-32
Original file line numberDiff line numberDiff line change
@@ -20,41 +20,18 @@
2020
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
2121
# Visit https://github.com/nexB/scancode.io for support and download.
2222

23-
from packagedcode import APPLICATION_PACKAGE_DATAFILE_HANDLERS
24-
from python_inspector.resolve_cli import resolver_api
25-
2623
from scanpipe.pipelines import Pipeline
24+
from scanpipe.pipes import resolve
2725
from scanpipe.pipes import update_or_create_package
2826

2927

30-
def resolve_pypi_packages(input_location):
31-
"""
32-
Resolve the PyPI packages from the `input_location` requirements file.
33-
"""
34-
inspector_output = resolver_api(
35-
requirement_files=[input_location],
36-
prefer_source=True,
37-
)
38-
return inspector_output.packages
39-
40-
41-
# Mapping between the `default_package_type` its related resolver function
42-
resolver_registry = {
43-
"pypi": resolve_pypi_packages,
44-
}
45-
46-
47-
def get_default_package_type(input_location):
48-
for handler in APPLICATION_PACKAGE_DATAFILE_HANDLERS:
49-
if handler.is_datafile(input_location):
50-
return handler.default_package_type
51-
52-
5328
class InspectManifest(Pipeline):
5429
"""
5530
A pipeline to inspect one or more manifest files and resolve its packages.
5631
57-
Only PyPI requirements file are supported.
32+
Supports:
33+
- PyPI "requirements.txt" files
34+
- AboutCode ".ABOUT" files
5835
"""
5936

6037
@classmethod
@@ -77,18 +54,20 @@ def create_packages_from_manifest(self):
7754
Resolves manifest files into packages.
7855
"""
7956
for input_location in self.input_locations:
80-
default_package_type = get_default_package_type(input_location)
57+
default_package_type = resolve.get_default_package_type(input_location)
8158
if not default_package_type:
8259
raise Exception(f"No package type found for {input_location}")
8360

84-
resolver = resolver_registry.get(default_package_type)
61+
resolver = resolve.resolver_registry.get(default_package_type)
8562
if not resolver:
86-
raise Exception(f"No resolver for {default_package_type}")
63+
raise Exception(
64+
f'No resolver for package type "{default_package_type}" for '
65+
f"{input_location}"
66+
)
8767

8868
resolved_packages = resolver(input_location=input_location)
89-
9069
if not resolved_packages:
91-
raise Exception("No packages could be resolved.")
70+
raise Exception(f"No packages could be resolved for {input_location}")
9271

9372
for package_data in resolved_packages:
9473
update_or_create_package(self.project, package_data)

scanpipe/pipes/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def update_or_create_package(project, package_data, codebase_resource=None):
9595
package = None
9696

9797
package_data = package_data.copy()
98-
if release_date := package_data["release_date"]:
98+
if release_date := package_data.get("release_date"):
9999
if type(release_date) is str:
100100
package_data["release_date"] = datetime.fromisoformat(release_date).date()
101101

scanpipe/pipes/resolve.py

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# http://nexb.com and https://github.com/nexB/scancode.io
4+
# The ScanCode.io software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode.io is provided as-is without warranties.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
16+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
17+
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
18+
# for any legal advice.
19+
#
20+
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
21+
# Visit https://github.com/nexB/scancode.io for support and download.
22+
23+
from attributecode.model import About
24+
from packagedcode import APPLICATION_PACKAGE_DATAFILE_HANDLERS
25+
from packageurl import PackageURL
26+
from python_inspector.resolve_cli import resolver_api
27+
28+
from scanpipe.models import DiscoveredPackage
29+
30+
"""
31+
Utilities to resolve packages from manifest, lockfile, and SBOM.
32+
"""
33+
34+
35+
def resolve_pypi_packages(input_location):
36+
"""
37+
Resolve the PyPI packages from the `input_location` requirements file.
38+
"""
39+
inspector_output = resolver_api(
40+
requirement_files=[input_location],
41+
prefer_source=True,
42+
)
43+
return inspector_output.packages
44+
45+
46+
def resolve_about_packages(input_location):
47+
"""
48+
Resolve the packages from the `input_location` .ABOUT file.
49+
"""
50+
about = About(location=input_location)
51+
about_data = about.as_dict()
52+
53+
if package_url := about_data.get("package_url"):
54+
package_url_data = PackageURL.from_string(package_url).to_dict(encode=True)
55+
for field_name, value in package_url_data.items():
56+
if value:
57+
about_data[field_name] = value
58+
59+
package_data = DiscoveredPackage.clean_data(about_data)
60+
return [package_data]
61+
62+
63+
def get_default_package_type(input_location):
64+
"""
65+
Return the package type associated with the provided `input_location`.
66+
This type is used to get the related handler that knows how process the input.
67+
"""
68+
for handler in APPLICATION_PACKAGE_DATAFILE_HANDLERS:
69+
if handler.is_datafile(input_location):
70+
return handler.default_package_type
71+
72+
73+
# Mapping between the `default_package_type` its related resolver function
74+
resolver_registry = {
75+
"about": resolve_about_packages,
76+
"pypi": resolve_pypi_packages,
77+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
about_resource: Django-4.0.8-py3-none-any.whl
2+
name: django
3+
version: 4.0.8
4+
download_url: https://files.pythonhosted.org/packages/e1/d0/d90528978da16288d470bb423abad307ed7ae724090132ff6bf67d6a5579/Django-4.0.8-py3-none-any.whl
5+
package_url: pkg:pypi/[email protected]
6+
license_expression: bsd-new
7+
attribute: yes
8+
checksum_md5: 386349753c386e574dceca5067e2788a
9+
checksum_sha1: 4cc6f7abda928a0b12cd1f1cd8ad3677519ca04e
10+
licenses:
11+
- key: bsd-new
12+
name: BSD-3-Clause
13+
file: bsd-new.LICENSE

scanpipe/tests/test_pipelines.py

+30-6
Original file line numberDiff line numberDiff line change
@@ -625,10 +625,7 @@ def test_scanpipe_check_vulnerabilities_pipeline_integration_test(
625625
expected = {"discovered_vulnerabilities": vulnerability_data}
626626
self.assertEqual(expected, package1.extra_data)
627627

628-
@mock.patch("scanpipe.pipelines.inspect_manifest.resolver_api")
629-
def test_scanpipe_inspect_manifest_pipeline_integration_test(self, resolver_api):
630-
resolver_api.return_value = mock.Mock(packages=[])
631-
628+
def test_scanpipe_inspect_manifest_pipeline_integration_test(self):
632629
pipeline_name = "inspect_manifest"
633630
project1 = Project.objects.create(name="Analysis")
634631

@@ -640,14 +637,21 @@ def test_scanpipe_inspect_manifest_pipeline_integration_test(self, resolver_api)
640637
self.assertEqual(1, exitcode, msg=out)
641638
self.assertIn("No package type found for", out)
642639

643-
project1.reset(keep_input=False)
640+
@mock.patch("scanpipe.pipes.resolve.resolver_api")
641+
def test_scanpipe_inspect_manifest_pipeline_pypi_integration_test(
642+
self, resolver_api
643+
):
644+
pipeline_name = "inspect_manifest"
645+
project1 = Project.objects.create(name="Analysis")
646+
644647
run = project1.add_pipeline(pipeline_name)
645648
pipeline = run.make_pipeline_instance()
646649

650+
resolver_api.return_value = mock.Mock(packages=[])
647651
project1.move_input_from(tempfile.mkstemp(suffix="requirements.txt")[1])
648652
exitcode, out = pipeline.execute()
649653
self.assertEqual(1, exitcode, msg=out)
650-
self.assertIn("No packages could be resolved.", out)
654+
self.assertIn("No packages could be resolved", out)
651655

652656
resolver_api.return_value = mock.Mock(packages=[package_data1])
653657
exitcode, out = pipeline.execute()
@@ -659,3 +663,23 @@ def test_scanpipe_inspect_manifest_pipeline_integration_test(self, resolver_api)
659663
for field_name, value in package_data1.items():
660664
if value and field_name not in exclude_fields:
661665
self.assertEqual(value, getattr(discoveredpackage, field_name))
666+
667+
def test_scanpipe_inspect_manifest_pipeline_aboutfile_integration_test(self):
668+
pipeline_name = "inspect_manifest"
669+
project1 = Project.objects.create(name="Analysis")
670+
671+
input_location = self.data_location / "Django-4.0.8-py3-none-any.whl.ABOUT"
672+
project1.copy_input_from(input_location)
673+
674+
run = project1.add_pipeline(pipeline_name)
675+
pipeline = run.make_pipeline_instance()
676+
677+
exitcode, out = pipeline.execute()
678+
self.assertEqual(0, exitcode, msg=out)
679+
680+
self.assertEqual(1, project1.discoveredpackages.count())
681+
discoveredpackage = project1.discoveredpackages.get()
682+
self.assertEqual("pypi", discoveredpackage.type)
683+
self.assertEqual("django", discoveredpackage.name)
684+
self.assertEqual("4.0.8", discoveredpackage.version)
685+
self.assertEqual("bsd-new", discoveredpackage.license_expression)

setup.cfg

+3-2
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,9 @@ install_requires =
7575
commoncode==31.0.0
7676
# FetchCode
7777
fetchcode-container==1.2.3.210512; sys_platform == "linux"
78-
# Python-inspector
79-
python-inspector==0.9.1
78+
# Inspectors
79+
python-inspector==0.9.2
80+
aboutcode-toolkit==7.2.0
8081
# Utilities
8182
XlsxWriter==3.0.3
8283
requests==2.28.1

0 commit comments

Comments
 (0)