Skip to content

Commit fc0aff3

Browse files
authored
Add a new "Inspect Manifest" pipeline #284 (#552)
Signed-off-by: Thomas Druez <[email protected]>
1 parent 4b690cc commit fc0aff3

12 files changed

+277
-6
lines changed

CHANGELOG.rst

+5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ Changelog
44
v31.1.0 (unreleased)
55
--------------------
66

7+
- Add a new "inspect manifest" pipeline to resolve packages from manifest, lockfile,
8+
and SBOM. The resolved packages are created as discovered packages.
9+
Support PyPI "requirements.txt" files and AboutCode ".ABOUT" files
10+
https://github.com/nexB/scancode.io/issues/284
11+
712
- Add a new "check vulnerabilities" pipeline to lookup vulnerabilities in the
813
VulnerableCode database for all project discovered packages.
914
Vulnerability data is stored in the extra_data field of each package.

docs/built-in-pipelines.rst

+10
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,16 @@ Docker Windows Image Analysis
3939
:members:
4040
:member-order: bysource
4141

42+
.. _pipeline_inspect_manifest:
43+
44+
Inspect Manifest
45+
----------------
46+
.. autoclass:: scanpipe.pipelines.inspect_manifest.InspectManifest()
47+
:members:
48+
:member-order: bysource
49+
50+
.. _pipeline_load_inventory:
51+
4252
Load Inventory From Scan
4353
------------------------
4454
.. autoclass:: scanpipe.pipelines.load_inventory.LoadInventory()

docs/scanpipe-pipes.rst

+15-5
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,6 @@ Docker
2323
.. automodule:: scanpipe.pipes.docker
2424
:members:
2525

26-
Windows
27-
-------
28-
.. automodule:: scanpipe.pipes.windows
29-
:members:
3026

3127
Fetch
3228
-----
@@ -45,13 +41,27 @@ Output
4541
:members:
4642
:exclude-members: JSONResultsGenerator
4743

44+
Resolve
45+
-------
46+
.. automodule:: scanpipe.pipes.resolve
47+
:members:
48+
4849
RootFS
4950
------
50-
5151
.. automodule:: scanpipe.pipes.rootfs
5252
:members:
5353

5454
ScanCode
5555
--------
5656
.. automodule:: scanpipe.pipes.scancode
5757
:members:
58+
59+
VulnerableCode
60+
--------------
61+
.. automodule:: scanpipe.pipes.vulnerablecode
62+
:members:
63+
64+
Windows
65+
-------
66+
.. automodule:: scanpipe.pipes.windows
67+
:members:

scanpipe/models.py

+11
Original file line numberDiff line numberDiff line change
@@ -2318,6 +2318,17 @@ def create_from_data(cls, project, package_data):
23182318
discovered_package.save(save_error=False, capture_exception=False)
23192319
return discovered_package
23202320

2321+
@classmethod
2322+
def clean_data(cls, data, include_none=False):
2323+
"""
2324+
Returns the `data` dict keeping only entries for fields available in the model.
2325+
"""
2326+
return {
2327+
field_name: value
2328+
for field_name, value in data.items()
2329+
if field_name in cls.model_fields() # and value
2330+
}
2331+
23212332
@property
23222333
def spdx_id(self):
23232334
return f"SPDXRef-scancodeio-{self._meta.model_name}-{self.uuid}"
+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# http://nexb.com and https://github.com/nexB/scancode.io
4+
# The ScanCode.io software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode.io is provided as-is without warranties.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
16+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
17+
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
18+
# for any legal advice.
19+
#
20+
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
21+
# Visit https://github.com/nexB/scancode.io for support and download.
22+
23+
from scanpipe.pipelines import Pipeline
24+
from scanpipe.pipes import resolve
25+
from scanpipe.pipes import update_or_create_package
26+
27+
28+
class InspectManifest(Pipeline):
29+
"""
30+
A pipeline to inspect one or more manifest files and resolve its packages.
31+
32+
Supports:
33+
- PyPI "requirements.txt" files
34+
- AboutCode ".ABOUT" files
35+
"""
36+
37+
@classmethod
38+
def steps(cls):
39+
return (
40+
cls.get_manifest_inputs,
41+
cls.create_packages_from_manifest,
42+
)
43+
44+
def get_manifest_inputs(self):
45+
"""
46+
Locates all the manifest files from the project's input/ directory.
47+
"""
48+
self.input_locations = [
49+
str(input.absolute()) for input in self.project.inputs()
50+
]
51+
52+
def create_packages_from_manifest(self):
53+
"""
54+
Resolves manifest files into packages.
55+
"""
56+
for input_location in self.input_locations:
57+
default_package_type = resolve.get_default_package_type(input_location)
58+
if not default_package_type:
59+
raise Exception(f"No package type found for {input_location}")
60+
61+
resolver = resolve.resolver_registry.get(default_package_type)
62+
if not resolver:
63+
raise Exception(
64+
f'No resolver for package type "{default_package_type}" for '
65+
f"{input_location}"
66+
)
67+
68+
resolved_packages = resolver(input_location=input_location)
69+
if not resolved_packages:
70+
raise Exception(f"No packages could be resolved for {input_location}")
71+
72+
for package_data in resolved_packages:
73+
update_or_create_package(self.project, package_data)

scanpipe/pipes/__init__.py

+5
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,11 @@ def update_or_create_package(project, package_data, codebase_resource=None):
9494
except DiscoveredPackage.DoesNotExist:
9595
package = None
9696

97+
package_data = package_data.copy()
98+
if release_date := package_data.get("release_date"):
99+
if type(release_date) is str:
100+
package_data["release_date"] = datetime.fromisoformat(release_date).date()
101+
97102
if package:
98103
package.update_from_data(package_data)
99104
else:

scanpipe/pipes/resolve.py

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# http://nexb.com and https://github.com/nexB/scancode.io
4+
# The ScanCode.io software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode.io is provided as-is without warranties.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
16+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
17+
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
18+
# for any legal advice.
19+
#
20+
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
21+
# Visit https://github.com/nexB/scancode.io for support and download.
22+
23+
from attributecode.model import About
24+
from packagedcode import APPLICATION_PACKAGE_DATAFILE_HANDLERS
25+
from packageurl import PackageURL
26+
from python_inspector.resolve_cli import resolver_api
27+
28+
from scanpipe.models import DiscoveredPackage
29+
30+
"""
31+
Utilities to resolve packages from manifest, lockfile, and SBOM.
32+
"""
33+
34+
35+
def resolve_pypi_packages(input_location):
36+
"""
37+
Resolve the PyPI packages from the `input_location` requirements file.
38+
"""
39+
inspector_output = resolver_api(
40+
requirement_files=[input_location],
41+
prefer_source=True,
42+
)
43+
return inspector_output.packages
44+
45+
46+
def resolve_about_packages(input_location):
47+
"""
48+
Resolve the packages from the `input_location` .ABOUT file.
49+
"""
50+
about = About(location=input_location)
51+
about_data = about.as_dict()
52+
53+
if package_url := about_data.get("package_url"):
54+
package_url_data = PackageURL.from_string(package_url).to_dict(encode=True)
55+
for field_name, value in package_url_data.items():
56+
if value:
57+
about_data[field_name] = value
58+
59+
package_data = DiscoveredPackage.clean_data(about_data)
60+
return [package_data]
61+
62+
63+
def get_default_package_type(input_location):
64+
"""
65+
Return the package type associated with the provided `input_location`.
66+
This type is used to get the related handler that knows how process the input.
67+
"""
68+
for handler in APPLICATION_PACKAGE_DATAFILE_HANDLERS:
69+
if handler.is_datafile(input_location):
70+
return handler.default_package_type
71+
72+
73+
# Mapping between the `default_package_type` its related resolver function
74+
resolver_registry = {
75+
"about": resolve_about_packages,
76+
"pypi": resolve_pypi_packages,
77+
}

scanpipe/tests/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
"subpath": None,
5050
"primary_language": None,
5151
"description": "add and remove users and groups",
52-
"release_date": None,
52+
"release_date": "1999-10-10",
5353
"parties": [
5454
{
5555
"type": None,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
about_resource: Django-4.0.8-py3-none-any.whl
2+
name: django
3+
version: 4.0.8
4+
download_url: https://files.pythonhosted.org/packages/e1/d0/d90528978da16288d470bb423abad307ed7ae724090132ff6bf67d6a5579/Django-4.0.8-py3-none-any.whl
5+
package_url: pkg:pypi/[email protected]
6+
license_expression: bsd-new
7+
attribute: yes
8+
checksum_md5: 386349753c386e574dceca5067e2788a
9+
checksum_sha1: 4cc6f7abda928a0b12cd1f1cd8ad3677519ca04e
10+
licenses:
11+
- key: bsd-new
12+
name: BSD-3-Clause
13+
file: bsd-new.LICENSE

scanpipe/tests/test_pipelines.py

+59
Original file line numberDiff line numberDiff line change
@@ -624,3 +624,62 @@ def test_scanpipe_check_vulnerabilities_pipeline_integration_test(
624624
package1.refresh_from_db()
625625
expected = {"discovered_vulnerabilities": vulnerability_data}
626626
self.assertEqual(expected, package1.extra_data)
627+
628+
def test_scanpipe_inspect_manifest_pipeline_integration_test(self):
629+
pipeline_name = "inspect_manifest"
630+
project1 = Project.objects.create(name="Analysis")
631+
632+
run = project1.add_pipeline(pipeline_name)
633+
pipeline = run.make_pipeline_instance()
634+
635+
project1.move_input_from(tempfile.mkstemp()[1])
636+
exitcode, out = pipeline.execute()
637+
self.assertEqual(1, exitcode, msg=out)
638+
self.assertIn("No package type found for", out)
639+
640+
@mock.patch("scanpipe.pipes.resolve.resolver_api")
641+
def test_scanpipe_inspect_manifest_pipeline_pypi_integration_test(
642+
self, resolver_api
643+
):
644+
pipeline_name = "inspect_manifest"
645+
project1 = Project.objects.create(name="Analysis")
646+
647+
run = project1.add_pipeline(pipeline_name)
648+
pipeline = run.make_pipeline_instance()
649+
650+
resolver_api.return_value = mock.Mock(packages=[])
651+
project1.move_input_from(tempfile.mkstemp(suffix="requirements.txt")[1])
652+
exitcode, out = pipeline.execute()
653+
self.assertEqual(1, exitcode, msg=out)
654+
self.assertIn("No packages could be resolved", out)
655+
656+
resolver_api.return_value = mock.Mock(packages=[package_data1])
657+
exitcode, out = pipeline.execute()
658+
self.assertEqual(0, exitcode, msg=out)
659+
660+
self.assertEqual(1, project1.discoveredpackages.count())
661+
discoveredpackage = project1.discoveredpackages.get()
662+
exclude_fields = ["qualifiers", "release_date", "size"]
663+
for field_name, value in package_data1.items():
664+
if value and field_name not in exclude_fields:
665+
self.assertEqual(value, getattr(discoveredpackage, field_name))
666+
667+
def test_scanpipe_inspect_manifest_pipeline_aboutfile_integration_test(self):
668+
pipeline_name = "inspect_manifest"
669+
project1 = Project.objects.create(name="Analysis")
670+
671+
input_location = self.data_location / "Django-4.0.8-py3-none-any.whl.ABOUT"
672+
project1.copy_input_from(input_location)
673+
674+
run = project1.add_pipeline(pipeline_name)
675+
pipeline = run.make_pipeline_instance()
676+
677+
exitcode, out = pipeline.execute()
678+
self.assertEqual(0, exitcode, msg=out)
679+
680+
self.assertEqual(1, project1.discoveredpackages.count())
681+
discoveredpackage = project1.discoveredpackages.get()
682+
self.assertEqual("pypi", discoveredpackage.type)
683+
self.assertEqual("django", discoveredpackage.name)
684+
self.assertEqual("4.0.8", discoveredpackage.version)
685+
self.assertEqual("bsd-new", discoveredpackage.license_expression)

scanpipe/tests/test_pipes.py

+4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
2121
# Visit https://github.com/nexB/scancode.io for support and download.
2222

23+
import datetime
2324
import json
2425
import os
2526
import tempfile
@@ -1055,6 +1056,7 @@ def test_scanpipe_pipes_update_or_create_package(self):
10551056
package = update_or_create_package(p1, package_data1)
10561057
self.assertEqual("pkg:deb/debian/[email protected]?arch=all", package.purl)
10571058
self.assertEqual("", package.primary_language)
1059+
self.assertEqual(datetime.date(1999, 10, 10), package.release_date)
10581060

10591061
updated_data = dict(package_data1)
10601062
updated_data["primary_language"] = "Python"
@@ -1067,9 +1069,11 @@ def test_scanpipe_pipes_update_or_create_package(self):
10671069
package_data2 = dict(package_data1)
10681070
package_data2["name"] = "new name"
10691071
package_data2["package_uid"] = ""
1072+
package_data2["release_date"] = "2020-11-01T01:40:20"
10701073
package2 = update_or_create_package(p1, package_data2, resource1)
10711074
self.assertNotEqual(package.pk, package2.pk)
10721075
self.assertIn(resource1, package2.codebase_resources.all())
1076+
self.assertEqual(datetime.date(2020, 11, 1), package2.release_date)
10731077

10741078

10751079
class ScanPipePipesTransactionTest(TransactionTestCase):

setup.cfg

+4
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ install_requires =
7575
commoncode==31.0.0
7676
# FetchCode
7777
fetchcode-container==1.2.3.210512; sys_platform == "linux"
78+
# Inspectors
79+
python-inspector==0.9.2
80+
aboutcode-toolkit==7.2.0
7881
# Utilities
7982
XlsxWriter==3.0.3
8083
requests==2.28.1
@@ -107,6 +110,7 @@ scancodeio_pipelines =
107110
check_vulnerabilities = scanpipe.pipelines.check_vulnerabilities:CheckVulnerabilities
108111
docker = scanpipe.pipelines.docker:Docker
109112
docker_windows = scanpipe.pipelines.docker_windows:DockerWindows
113+
inspect_manifest = scanpipe.pipelines.inspect_manifest:InspectManifest
110114
load_inventory = scanpipe.pipelines.load_inventory:LoadInventory
111115
root_filesystems = scanpipe.pipelines.root_filesystems:RootFS
112116
scan_codebase = scanpipe.pipelines.scan_codebase:ScanCodebase

0 commit comments

Comments
 (0)