Skip to content

Commit fc0aff3

Browse files
authored
Add a new "Inspect Manifest" pipeline #284 (#552)
Signed-off-by: Thomas Druez <[email protected]>
1 parent 4b690cc commit fc0aff3

File tree

12 files changed

+277
-6
lines changed

12 files changed

+277
-6
lines changed

CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ Changelog
44
v31.1.0 (unreleased)
55
--------------------
66

7+
- Add a new "inspect manifest" pipeline to resolve packages from manifest, lockfile,
8+
and SBOM. The resolved packages are created as discovered packages.
9+
Support PyPI "requirements.txt" files and AboutCode ".ABOUT" files
10+
https://github.com/nexB/scancode.io/issues/284
11+
712
- Add a new "check vulnerabilities" pipeline to lookup vulnerabilities in the
813
VulnerableCode database for all project discovered packages.
914
Vulnerability data is stored in the extra_data field of each package.

docs/built-in-pipelines.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,16 @@ Docker Windows Image Analysis
3939
:members:
4040
:member-order: bysource
4141

42+
.. _pipeline_inspect_manifest:
43+
44+
Inspect Manifest
45+
----------------
46+
.. autoclass:: scanpipe.pipelines.inspect_manifest.InspectManifest()
47+
:members:
48+
:member-order: bysource
49+
50+
.. _pipeline_load_inventory:
51+
4252
Load Inventory From Scan
4353
------------------------
4454
.. autoclass:: scanpipe.pipelines.load_inventory.LoadInventory()

docs/scanpipe-pipes.rst

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,6 @@ Docker
2323
.. automodule:: scanpipe.pipes.docker
2424
:members:
2525

26-
Windows
27-
-------
28-
.. automodule:: scanpipe.pipes.windows
29-
:members:
3026

3127
Fetch
3228
-----
@@ -45,13 +41,27 @@ Output
4541
:members:
4642
:exclude-members: JSONResultsGenerator
4743

44+
Resolve
45+
-------
46+
.. automodule:: scanpipe.pipes.resolve
47+
:members:
48+
4849
RootFS
4950
------
50-
5151
.. automodule:: scanpipe.pipes.rootfs
5252
:members:
5353

5454
ScanCode
5555
--------
5656
.. automodule:: scanpipe.pipes.scancode
5757
:members:
58+
59+
VulnerableCode
60+
--------------
61+
.. automodule:: scanpipe.pipes.vulnerablecode
62+
:members:
63+
64+
Windows
65+
-------
66+
.. automodule:: scanpipe.pipes.windows
67+
:members:

scanpipe/models.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2318,6 +2318,17 @@ def create_from_data(cls, project, package_data):
23182318
discovered_package.save(save_error=False, capture_exception=False)
23192319
return discovered_package
23202320

2321+
@classmethod
2322+
def clean_data(cls, data, include_none=False):
2323+
"""
2324+
Returns the `data` dict keeping only entries for fields available in the model.
2325+
"""
2326+
return {
2327+
field_name: value
2328+
for field_name, value in data.items()
2329+
if field_name in cls.model_fields() # and value
2330+
}
2331+
23212332
@property
23222333
def spdx_id(self):
23232334
return f"SPDXRef-scancodeio-{self._meta.model_name}-{self.uuid}"
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# http://nexb.com and https://github.com/nexB/scancode.io
4+
# The ScanCode.io software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode.io is provided as-is without warranties.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
16+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
17+
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
18+
# for any legal advice.
19+
#
20+
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
21+
# Visit https://github.com/nexB/scancode.io for support and download.
22+
23+
from scanpipe.pipelines import Pipeline
24+
from scanpipe.pipes import resolve
25+
from scanpipe.pipes import update_or_create_package
26+
27+
28+
class InspectManifest(Pipeline):
29+
"""
30+
A pipeline to inspect one or more manifest files and resolve its packages.
31+
32+
Supports:
33+
- PyPI "requirements.txt" files
34+
- AboutCode ".ABOUT" files
35+
"""
36+
37+
@classmethod
38+
def steps(cls):
39+
return (
40+
cls.get_manifest_inputs,
41+
cls.create_packages_from_manifest,
42+
)
43+
44+
def get_manifest_inputs(self):
45+
"""
46+
Locates all the manifest files from the project's input/ directory.
47+
"""
48+
self.input_locations = [
49+
str(input.absolute()) for input in self.project.inputs()
50+
]
51+
52+
def create_packages_from_manifest(self):
53+
"""
54+
Resolves manifest files into packages.
55+
"""
56+
for input_location in self.input_locations:
57+
default_package_type = resolve.get_default_package_type(input_location)
58+
if not default_package_type:
59+
raise Exception(f"No package type found for {input_location}")
60+
61+
resolver = resolve.resolver_registry.get(default_package_type)
62+
if not resolver:
63+
raise Exception(
64+
f'No resolver for package type "{default_package_type}" for '
65+
f"{input_location}"
66+
)
67+
68+
resolved_packages = resolver(input_location=input_location)
69+
if not resolved_packages:
70+
raise Exception(f"No packages could be resolved for {input_location}")
71+
72+
for package_data in resolved_packages:
73+
update_or_create_package(self.project, package_data)

scanpipe/pipes/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,11 @@ def update_or_create_package(project, package_data, codebase_resource=None):
9494
except DiscoveredPackage.DoesNotExist:
9595
package = None
9696

97+
package_data = package_data.copy()
98+
if release_date := package_data.get("release_date"):
99+
if type(release_date) is str:
100+
package_data["release_date"] = datetime.fromisoformat(release_date).date()
101+
97102
if package:
98103
package.update_from_data(package_data)
99104
else:

scanpipe/pipes/resolve.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# http://nexb.com and https://github.com/nexB/scancode.io
4+
# The ScanCode.io software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode.io is provided as-is without warranties.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
16+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
17+
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
18+
# for any legal advice.
19+
#
20+
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
21+
# Visit https://github.com/nexB/scancode.io for support and download.
22+
23+
from attributecode.model import About
24+
from packagedcode import APPLICATION_PACKAGE_DATAFILE_HANDLERS
25+
from packageurl import PackageURL
26+
from python_inspector.resolve_cli import resolver_api
27+
28+
from scanpipe.models import DiscoveredPackage
29+
30+
"""
31+
Utilities to resolve packages from manifest, lockfile, and SBOM.
32+
"""
33+
34+
35+
def resolve_pypi_packages(input_location):
36+
"""
37+
Resolve the PyPI packages from the `input_location` requirements file.
38+
"""
39+
inspector_output = resolver_api(
40+
requirement_files=[input_location],
41+
prefer_source=True,
42+
)
43+
return inspector_output.packages
44+
45+
46+
def resolve_about_packages(input_location):
47+
"""
48+
Resolve the packages from the `input_location` .ABOUT file.
49+
"""
50+
about = About(location=input_location)
51+
about_data = about.as_dict()
52+
53+
if package_url := about_data.get("package_url"):
54+
package_url_data = PackageURL.from_string(package_url).to_dict(encode=True)
55+
for field_name, value in package_url_data.items():
56+
if value:
57+
about_data[field_name] = value
58+
59+
package_data = DiscoveredPackage.clean_data(about_data)
60+
return [package_data]
61+
62+
63+
def get_default_package_type(input_location):
64+
"""
65+
Return the package type associated with the provided `input_location`.
66+
This type is used to get the related handler that knows how process the input.
67+
"""
68+
for handler in APPLICATION_PACKAGE_DATAFILE_HANDLERS:
69+
if handler.is_datafile(input_location):
70+
return handler.default_package_type
71+
72+
73+
# Mapping between the `default_package_type` its related resolver function
74+
resolver_registry = {
75+
"about": resolve_about_packages,
76+
"pypi": resolve_pypi_packages,
77+
}

scanpipe/tests/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
"subpath": None,
5050
"primary_language": None,
5151
"description": "add and remove users and groups",
52-
"release_date": None,
52+
"release_date": "1999-10-10",
5353
"parties": [
5454
{
5555
"type": None,
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
about_resource: Django-4.0.8-py3-none-any.whl
2+
name: django
3+
version: 4.0.8
4+
download_url: https://files.pythonhosted.org/packages/e1/d0/d90528978da16288d470bb423abad307ed7ae724090132ff6bf67d6a5579/Django-4.0.8-py3-none-any.whl
5+
package_url: pkg:pypi/[email protected]
6+
license_expression: bsd-new
7+
attribute: yes
8+
checksum_md5: 386349753c386e574dceca5067e2788a
9+
checksum_sha1: 4cc6f7abda928a0b12cd1f1cd8ad3677519ca04e
10+
licenses:
11+
- key: bsd-new
12+
name: BSD-3-Clause
13+
file: bsd-new.LICENSE

scanpipe/tests/test_pipelines.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -624,3 +624,62 @@ def test_scanpipe_check_vulnerabilities_pipeline_integration_test(
624624
package1.refresh_from_db()
625625
expected = {"discovered_vulnerabilities": vulnerability_data}
626626
self.assertEqual(expected, package1.extra_data)
627+
628+
def test_scanpipe_inspect_manifest_pipeline_integration_test(self):
629+
pipeline_name = "inspect_manifest"
630+
project1 = Project.objects.create(name="Analysis")
631+
632+
run = project1.add_pipeline(pipeline_name)
633+
pipeline = run.make_pipeline_instance()
634+
635+
project1.move_input_from(tempfile.mkstemp()[1])
636+
exitcode, out = pipeline.execute()
637+
self.assertEqual(1, exitcode, msg=out)
638+
self.assertIn("No package type found for", out)
639+
640+
@mock.patch("scanpipe.pipes.resolve.resolver_api")
641+
def test_scanpipe_inspect_manifest_pipeline_pypi_integration_test(
642+
self, resolver_api
643+
):
644+
pipeline_name = "inspect_manifest"
645+
project1 = Project.objects.create(name="Analysis")
646+
647+
run = project1.add_pipeline(pipeline_name)
648+
pipeline = run.make_pipeline_instance()
649+
650+
resolver_api.return_value = mock.Mock(packages=[])
651+
project1.move_input_from(tempfile.mkstemp(suffix="requirements.txt")[1])
652+
exitcode, out = pipeline.execute()
653+
self.assertEqual(1, exitcode, msg=out)
654+
self.assertIn("No packages could be resolved", out)
655+
656+
resolver_api.return_value = mock.Mock(packages=[package_data1])
657+
exitcode, out = pipeline.execute()
658+
self.assertEqual(0, exitcode, msg=out)
659+
660+
self.assertEqual(1, project1.discoveredpackages.count())
661+
discoveredpackage = project1.discoveredpackages.get()
662+
exclude_fields = ["qualifiers", "release_date", "size"]
663+
for field_name, value in package_data1.items():
664+
if value and field_name not in exclude_fields:
665+
self.assertEqual(value, getattr(discoveredpackage, field_name))
666+
667+
def test_scanpipe_inspect_manifest_pipeline_aboutfile_integration_test(self):
668+
pipeline_name = "inspect_manifest"
669+
project1 = Project.objects.create(name="Analysis")
670+
671+
input_location = self.data_location / "Django-4.0.8-py3-none-any.whl.ABOUT"
672+
project1.copy_input_from(input_location)
673+
674+
run = project1.add_pipeline(pipeline_name)
675+
pipeline = run.make_pipeline_instance()
676+
677+
exitcode, out = pipeline.execute()
678+
self.assertEqual(0, exitcode, msg=out)
679+
680+
self.assertEqual(1, project1.discoveredpackages.count())
681+
discoveredpackage = project1.discoveredpackages.get()
682+
self.assertEqual("pypi", discoveredpackage.type)
683+
self.assertEqual("django", discoveredpackage.name)
684+
self.assertEqual("4.0.8", discoveredpackage.version)
685+
self.assertEqual("bsd-new", discoveredpackage.license_expression)

0 commit comments

Comments
 (0)