Skip to content

Commit 91bb744

Browse files
committed
[issue-558] add optional feature to generate a relationship graph
Signed-off-by: Meret Behrens <[email protected]>
1 parent 0e1df0a commit 91bb744

8 files changed

+295
-15
lines changed

README.md

+15-1
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,11 @@ This library implements SPDX parsers, convertors, validators and handlers in Pyt
3838

3939
# Features
4040

41-
* API to create and manipulate SPDX v2.2 and v2.3 documents.
41+
* API to create and manipulate SPDX v2.2 and v2.3 documents
4242
* Parse, convert, create and validate SPDX files
4343
* supported formats: Tag/Value, RDF, JSON, YAML, XML
44+
* visualize the structure of a SPDX document by creating an `AGraph`. Note: This is an optional feature and requires
45+
additional installation of optional dependencies
4446

4547
# Planned features
4648

@@ -78,6 +80,18 @@ instead of `bin`.
7880

7981
* For help use `pyspdxtools --help`
8082

83+
3. **GRAPH GENERATION** (optional feature)
84+
85+
* This feature generates a graph representing all elements in the SPDX document and their connections based on the provided
86+
relationships. The graph can be rendered to a picture. Below is an example for the file `tests/data/formats/SPDXJSONExample-v2.3.spdx.json`:
87+
![SPDXJSONExample-v2.3.spdx.png](assets/SPDXJSONExample-v2.3.spdx.png)
88+
* Make sure you install the optional dependencies `networkx` and `pygraphviz`. To do so run `pip install ".[graph_generation]"`.
89+
* Use `pyspdxtools -i <input_file> --graph -o <output_file>` where `<output_file>` is an output file name with valid format for `pygraphviz` (check
90+
the documentation [here](https://pygraphviz.github.io/documentation/stable/reference/agraph.html#pygraphviz.AGraph.draw)).
91+
* If you are using a source distribution, try running
92+
`pyspdxtools -i tests/data/formats/SPDXJSONExample-v2.3.spdx.json --graph -o SPDXJSONExample-v2.3.spdx.png` to generate
93+
a png with an overview of the structure of the example file.
94+
8195
## Library usage
8296
1. **DATA MODEL**
8397
* The `src.spdx.model` package constitutes the internal SPDX v2.3 data model (v2.2 is a simply a subset of this).

assets/SPDXJSONExample-v2.3.spdx.png

144 KB
Loading

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ dynamic = ["version"]
3030
[project.optional-dependencies]
3131
test = ["pytest"]
3232
code_style = ["isort", "black", "flake8"]
33+
graph_generation = ["pygraphviz", "networkx"]
3334

3435
[project.scripts]
3536
pyspdxtools = "spdx.clitools.pyspdxtools:main"

src/spdx/clitools/pyspdxtools.py

+26-6
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import click
2020

21+
from spdx.graph_generation import export_graph_from_document
2122
from spdx.model.document import Document
2223
from spdx.parser.error import SPDXParsingError
2324
from spdx.parser.parse_anything import parse_file
@@ -32,7 +33,8 @@
3233
@click.option(
3334
"--outfile",
3435
"-o",
35-
help="The file to write the converted document to (write a dash for output to stdout or omit for no conversion).",
36+
help="The file to write the converted document to (write a dash for output to stdout or omit for no conversion). "
37+
"If you add the option --graph to the command the generated graph will be written to this file.",
3638
)
3739
@click.option(
3840
"--version",
@@ -41,7 +43,15 @@
4143
default=None,
4244
)
4345
@click.option("--novalidation", is_flag=True, help="Don't validate the provided document.")
44-
def main(infile: str, outfile: str, version: str, novalidation: bool):
46+
@click.option(
47+
"--graph",
48+
is_flag=True,
49+
default=False,
50+
help="Generate a relationship graph from the input file. "
51+
"The generated graph is saved to the file specified with --outfile. "
52+
"Note: You need to install the optional dependencies 'networkx' and 'pygraphviz' for this feature.",
53+
)
54+
def main(infile: str, outfile: str, version: str, novalidation: bool, graph: bool):
4555
"""
4656
CLI-tool for validating SPDX documents and converting between RDF, TAG-VALUE, JSON, YAML and XML formats.
4757
Formats are determined by the file endings.
@@ -50,9 +60,6 @@ def main(infile: str, outfile: str, version: str, novalidation: bool):
5060
try:
5161
document: Document = parse_file(infile)
5262

53-
if outfile == "-":
54-
tagvalue_writer.write_document(document, sys.stdout)
55-
5663
if not novalidation:
5764
if not version:
5865
version = document.creation_info.spdx_version
@@ -72,7 +79,20 @@ def main(infile: str, outfile: str, version: str, novalidation: bool):
7279
else:
7380
logging.info("The document is valid.")
7481

75-
if outfile and outfile != "-":
82+
if outfile == "-":
83+
tagvalue_writer.write_document(document, sys.stdout)
84+
85+
elif graph:
86+
try:
87+
export_graph_from_document(document, outfile)
88+
except ImportError:
89+
logging.error(
90+
"To be able to draw a relationship graph of the parsed document "
91+
"you need to install 'networkx' and 'pygraphviz'. Run 'pip install \".[graph_generation]\"'."
92+
)
93+
sys.exit(1)
94+
95+
elif outfile:
7696
write_file(document, outfile, validate=False)
7797

7898
except NotImplementedError as err:

src/spdx/document_utils.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-FileCopyrightText: 2022 spdx contributors
22
#
33
# SPDX-License-Identifier: Apache-2.0
4-
from typing import List, Union
4+
from typing import Dict, List, Union
55

66
from spdx.model.document import Document
77
from spdx.model.file import File
@@ -17,9 +17,15 @@ def get_contained_spdx_element_ids(document: Document) -> List[str]:
1717

1818

1919
def get_element_from_spdx_id(document: Document, spdx_id: str) -> Union[Package, File, Snippet, None]:
20-
elements = [file_ for file_ in document.files]
21-
elements.extend([package_ for package_ in document.packages])
22-
elements.extend([snippet_ for snippet_ in document.snippets])
23-
for element in elements:
24-
if element.spdx_id == spdx_id:
25-
return element
20+
contained_spdx_elements: Dict[str, Union[Package, File, Snippet]] = get_contained_spdx_elements(document)
21+
if spdx_id not in contained_spdx_elements:
22+
return None
23+
return contained_spdx_elements[spdx_id]
24+
25+
26+
def get_contained_spdx_elements(document: Document) -> Dict[str, Union[Package, File, Snippet]]:
27+
contained_spdx_elements = {package.spdx_id: package for package in document.packages}
28+
contained_spdx_elements.update({file.spdx_id: file for file in document.files})
29+
contained_spdx_elements.update({snippet.spdx_id: snippet for snippet in document.snippets})
30+
31+
return contained_spdx_elements

src/spdx/graph_generation.py

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# SPDX-FileCopyrightText: 2023 spdx contributors
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
from typing import Dict, List, Union
5+
6+
from spdx.model.file import File
7+
from spdx.model.package import Package
8+
from spdx.model.snippet import Snippet
9+
10+
try:
11+
from networkx import DiGraph
12+
except ImportError:
13+
DiGraph = None
14+
from spdx.document_utils import get_contained_spdx_elements
15+
from spdx.model.document import Document
16+
from spdx.model.relationship import Relationship
17+
18+
19+
def export_graph_from_document(document: Document, file_name: str) -> None:
20+
from networkx.drawing import nx_agraph
21+
22+
graph = generate_relationship_graph_from_spdx(document)
23+
_color_nodes(graph)
24+
attributes_graph = nx_agraph.to_agraph(graph) # convert to a pygraphviz graph
25+
attributes_graph.draw(file_name, prog="dot")
26+
27+
28+
def generate_relationship_graph_from_spdx(document: Document) -> DiGraph:
29+
from networkx import DiGraph
30+
31+
graph = DiGraph()
32+
graph.add_node(document.creation_info.spdx_id, element=document.creation_info)
33+
34+
contained_elements: Dict[str, Union[Package, File, Snippet]] = get_contained_spdx_elements(document)
35+
contained_element_nodes = [(spdx_id, {"element": element}) for spdx_id, element in contained_elements.items()]
36+
graph.add_nodes_from(contained_element_nodes)
37+
38+
relationships_by_spdx_id: Dict[str, List[Relationship]] = dict()
39+
for relationship in document.relationships:
40+
relationships_by_spdx_id.setdefault(relationship.spdx_element_id, []).append(relationship)
41+
42+
for spdx_id, relationships in relationships_by_spdx_id.items():
43+
if spdx_id not in graph.nodes():
44+
# this will add any external spdx_id to the graph where we have no further information about the element,
45+
# to indicate that this node represents an element we add the attribute "element"
46+
graph.add_node(spdx_id, element=None)
47+
for relationship in relationships:
48+
relationship_node_key = relationship.spdx_element_id + "_" + relationship.relationship_type.name
49+
graph.add_node(relationship_node_key, comment=relationship.comment)
50+
graph.add_edge(relationship.spdx_element_id, relationship_node_key)
51+
# if the related spdx element is SpdxNone or SpdxNoAssertion we need a type conversion
52+
related_spdx_element_id = str(relationship.related_spdx_element_id)
53+
54+
if related_spdx_element_id not in graph.nodes():
55+
# this will add any external spdx_id to the graph where we have no further information about
56+
# the element, to indicate that this node represents an element we add the attribute "element"
57+
graph.add_node(
58+
related_spdx_element_id,
59+
element=None,
60+
)
61+
graph.add_edge(relationship_node_key, related_spdx_element_id)
62+
63+
return graph
64+
65+
66+
def _color_nodes(graph: DiGraph) -> None:
67+
for node in graph.nodes():
68+
if "_" in node:
69+
# nodes representing a RelationshipType are concatenated with the spdx_element_id,
70+
# to only see the RelationshipType when rendering the graph to a picture we add
71+
# a label to these nodes
72+
graph.add_node(node, color="lightgreen", label=node.split("_", 1)[-1])
73+
elif node == "SPDXRef-DOCUMENT":
74+
graph.add_node(node, color="indianred2")
75+
else:
76+
graph.add_node(node, color="lightskyblue")

tests/spdx/test_document_utils.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import pytest
77

8-
from spdx.document_utils import get_contained_spdx_element_ids, get_element_from_spdx_id
8+
from spdx.document_utils import get_contained_spdx_element_ids, get_contained_spdx_elements, get_element_from_spdx_id
99
from tests.spdx.fixtures import document_fixture, file_fixture, package_fixture, snippet_fixture
1010

1111

@@ -26,3 +26,11 @@ def test_get_element_from_spdx_id(variables):
2626
assert get_element_from_spdx_id(document, file.spdx_id) == file
2727
assert get_element_from_spdx_id(document, snippet.spdx_id) == snippet
2828
assert get_element_from_spdx_id(document, "unknown_id") is None
29+
30+
31+
def test_get_contained_spdx_elements(variables):
32+
document, package, file, snippet = variables
33+
contained_elements = get_contained_spdx_elements(document)
34+
assert contained_elements[package.spdx_id] == package
35+
assert contained_elements[file.spdx_id] == file
36+
assert contained_elements[snippet.spdx_id] == snippet

tests/spdx/test_graph_generation.py

+155
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
# SPDX-FileCopyrightText: 2023 spdx contributors
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
from pathlib import Path
5+
from typing import List
6+
from unittest import TestCase
7+
8+
import pytest
9+
10+
from spdx.graph_generation import generate_relationship_graph_from_spdx
11+
from spdx.model.document import Document
12+
from spdx.model.relationship import Relationship, RelationshipType
13+
from spdx.parser.parse_anything import parse_file
14+
from tests.spdx.fixtures import document_fixture, file_fixture, package_fixture
15+
16+
try:
17+
import networkx # noqa: F401
18+
import pygraphviz # noqa: F401
19+
except ImportError:
20+
pytest.skip("Skip this module as the tests need optional dependencies to run.", allow_module_level=True)
21+
22+
23+
@pytest.mark.parametrize(
24+
"file_name, nodes_count, edges_count, relationship_node_keys",
25+
[
26+
(
27+
"SPDXJSONExample-v2.3.spdx.json",
28+
22,
29+
22,
30+
["SPDXRef-Package_DYNAMIC_LINK", "SPDXRef-JenaLib_CONTAINS"],
31+
),
32+
(
33+
"SPDXJSONExample-v2.2.spdx.json",
34+
20,
35+
19,
36+
["SPDXRef-Package_DYNAMIC_LINK", "SPDXRef-JenaLib_CONTAINS"],
37+
),
38+
(
39+
"SPDXRdfExample-v2.3.spdx.rdf.xml",
40+
22,
41+
22,
42+
["SPDXRef-Package_DYNAMIC_LINK", "SPDXRef-JenaLib_CONTAINS"],
43+
),
44+
(
45+
"SPDXRdfExample-v2.2.spdx.rdf.xml",
46+
20,
47+
17,
48+
["SPDXRef-Package_DYNAMIC_LINK", "SPDXRef-JenaLib_CONTAINS"],
49+
),
50+
(
51+
"SPDXTagExample-v2.3.spdx",
52+
22,
53+
22,
54+
["SPDXRef-Package_DYNAMIC_LINK", "SPDXRef-JenaLib_CONTAINS"],
55+
),
56+
],
57+
)
58+
def test_generate_graph_from_spdx(
59+
file_name: str,
60+
nodes_count: int,
61+
edges_count: int,
62+
relationship_node_keys: List[str],
63+
) -> None:
64+
document = parse_file(str(Path(__file__).resolve().parent.parent / "spdx" / "data" / "formats" / file_name))
65+
graph = generate_relationship_graph_from_spdx(document)
66+
67+
assert document.creation_info.spdx_id in graph.nodes()
68+
assert graph.number_of_nodes() == nodes_count
69+
assert graph.number_of_edges() == edges_count
70+
assert "SPDXRef-DOCUMENT_DESCRIBES" in graph.nodes()
71+
for relationship_node_key in relationship_node_keys:
72+
assert relationship_node_key in graph.nodes()
73+
74+
75+
def test_complete_connected_graph() -> None:
76+
document = _create_minimal_document()
77+
78+
graph = generate_relationship_graph_from_spdx(document)
79+
80+
TestCase().assertCountEqual(
81+
graph.nodes(),
82+
[
83+
"SPDXRef-DOCUMENT",
84+
"SPDXRef-Package-A",
85+
"SPDXRef-Package-B",
86+
"SPDXRef-File",
87+
"SPDXRef-DOCUMENT_DESCRIBES",
88+
"SPDXRef-Package-A_CONTAINS",
89+
"SPDXRef-Package-B_CONTAINS",
90+
],
91+
)
92+
TestCase().assertCountEqual(
93+
graph.edges(),
94+
[
95+
("SPDXRef-DOCUMENT", "SPDXRef-DOCUMENT_DESCRIBES"),
96+
("SPDXRef-DOCUMENT_DESCRIBES", "SPDXRef-Package-A"),
97+
("SPDXRef-DOCUMENT_DESCRIBES", "SPDXRef-Package-B"),
98+
("SPDXRef-Package-A", "SPDXRef-Package-A_CONTAINS"),
99+
("SPDXRef-Package-A_CONTAINS", "SPDXRef-File"),
100+
("SPDXRef-Package-B", "SPDXRef-Package-B_CONTAINS"),
101+
("SPDXRef-Package-B_CONTAINS", "SPDXRef-File"),
102+
],
103+
)
104+
105+
106+
def test_complete_unconnected_graph() -> None:
107+
document = _create_minimal_document()
108+
document.packages += [package_fixture(spdx_id="SPDXRef-Package-C", name="Package without connection to document")]
109+
110+
graph = generate_relationship_graph_from_spdx(document)
111+
112+
TestCase().assertCountEqual(
113+
graph.nodes(),
114+
[
115+
"SPDXRef-DOCUMENT",
116+
"SPDXRef-Package-A",
117+
"SPDXRef-Package-B",
118+
"SPDXRef-File",
119+
"SPDXRef-DOCUMENT_DESCRIBES",
120+
"SPDXRef-Package-A_CONTAINS",
121+
"SPDXRef-Package-B_CONTAINS",
122+
"SPDXRef-Package-C",
123+
],
124+
)
125+
TestCase().assertCountEqual(
126+
graph.edges(),
127+
[
128+
("SPDXRef-DOCUMENT", "SPDXRef-DOCUMENT_DESCRIBES"),
129+
("SPDXRef-DOCUMENT_DESCRIBES", "SPDXRef-Package-A"),
130+
("SPDXRef-DOCUMENT_DESCRIBES", "SPDXRef-Package-B"),
131+
("SPDXRef-Package-A", "SPDXRef-Package-A_CONTAINS"),
132+
("SPDXRef-Package-A_CONTAINS", "SPDXRef-File"),
133+
("SPDXRef-Package-B", "SPDXRef-Package-B_CONTAINS"),
134+
("SPDXRef-Package-B_CONTAINS", "SPDXRef-File"),
135+
],
136+
)
137+
138+
139+
def _create_minimal_document() -> Document:
140+
packages = [
141+
package_fixture(spdx_id="SPDXRef-Package-A", name="Package-A"),
142+
package_fixture(spdx_id="SPDXRef-Package-B", name="Package-B"),
143+
]
144+
files = [
145+
file_fixture(spdx_id="SPDXRef-File", name="File"),
146+
]
147+
relationships = [
148+
Relationship("SPDXRef-DOCUMENT", RelationshipType.DESCRIBES, "SPDXRef-Package-A"),
149+
Relationship("SPDXRef-DOCUMENT", RelationshipType.DESCRIBES, "SPDXRef-Package-B"),
150+
Relationship("SPDXRef-Package-A", RelationshipType.CONTAINS, "SPDXRef-File"),
151+
Relationship("SPDXRef-Package-B", RelationshipType.CONTAINS, "SPDXRef-File"),
152+
]
153+
document = document_fixture(packages=packages, files=files, relationships=relationships, snippets=[])
154+
155+
return document

0 commit comments

Comments
 (0)