Skip to content

Commit 9d824c8

Browse files
committed
add wrapper methods to ensure correct types in rdf parser
Signed-off-by: Meret Behrens <[email protected]>
1 parent 0e22204 commit 9d824c8

18 files changed

+165
-59
lines changed

src/spdx/parser/rdf/annotation_parser.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-FileCopyrightText: 2023 spdx contributors
22
#
33
# SPDX-License-Identifier: Apache-2.0
4-
from rdflib import RDFS, Graph, URIRef
4+
from rdflib import RDFS, BNode, Graph, URIRef
55

66
from spdx.datetime_conversions import datetime_from_str
77
from spdx.model.annotation import Annotation, AnnotationType
@@ -12,7 +12,7 @@
1212
from spdx.rdfschema.namespace import SPDX_NAMESPACE
1313

1414

15-
def parse_annotation(annotation_node: URIRef, graph: Graph, parent_node: URIRef, doc_namespace: str) -> Annotation:
15+
def parse_annotation(annotation_node: BNode, graph: Graph, parent_node: URIRef, doc_namespace: str) -> Annotation:
1616
logger = Logger()
1717
spdx_id = parse_spdx_id(parent_node, doc_namespace, graph)
1818
annotator = parse_literal(

src/spdx/parser/rdf/checksum_parser.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-FileCopyrightText: 2023 spdx contributors
22
#
33
# SPDX-License-Identifier: Apache-2.0
4-
from rdflib import Graph, URIRef
4+
from rdflib import BNode, Graph
55

66
from spdx.model.checksum import Checksum, ChecksumAlgorithm
77
from spdx.parser.error import SPDXParsingError
@@ -11,7 +11,7 @@
1111
from spdx.rdfschema.namespace import SPDX_NAMESPACE
1212

1313

14-
def parse_checksum(parent_node: URIRef, graph: Graph) -> Checksum:
14+
def parse_checksum(parent_node: BNode, graph: Graph) -> Checksum:
1515
logger = Logger()
1616
algorithm = parse_literal(
1717
logger, graph, parent_node, SPDX_NAMESPACE.algorithm, parsing_method=convert_rdf_to_algorithm

src/spdx/parser/rdf/creation_info_parser.py

+14-5
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,12 @@
1919
from spdx.parser.logger import Logger
2020
from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages
2121
from spdx.parser.rdf.checksum_parser import parse_checksum
22-
from spdx.parser.rdf.graph_parsing_functions import parse_literal, parse_spdx_id, remove_prefix
22+
from spdx.parser.rdf.graph_parsing_functions import (
23+
get_correctly_typed_triples,
24+
parse_literal,
25+
parse_spdx_id,
26+
remove_prefix,
27+
)
2328
from spdx.rdfschema.namespace import LICENSE_NAMESPACE, SPDX_NAMESPACE
2429

2530

@@ -50,10 +55,14 @@ def parse_creation_info(graph: Graph) -> Tuple[CreationInfo, URIRef]:
5055
)
5156
creator_comment = parse_literal(logger, graph, creation_info_node, RDFS.comment)
5257
creators = []
53-
for _, _, creator_literal in graph.triples((creation_info_node, SPDX_NAMESPACE.creator, None)):
54-
creators.append(ActorParser.parse_actor(creator_literal))
58+
for _, _, creator_literal in get_correctly_typed_triples(
59+
logger, graph, creation_info_node, SPDX_NAMESPACE.creator
60+
):
61+
creators.append(ActorParser.parse_actor(creator_literal.toPython()))
5562
external_document_refs = []
56-
for _, _, external_document_node in graph.triples((doc_node, SPDX_NAMESPACE.externalDocumentRef, None)):
63+
for _, _, external_document_node in get_correctly_typed_triples(
64+
logger, graph, doc_node, SPDX_NAMESPACE.externalDocumentRef
65+
):
5766
external_document_refs.append(parse_external_document_refs(external_document_node, graph, namespace))
5867

5968
raise_parsing_error_if_logger_has_messages(logger, "CreationInfo")
@@ -93,7 +102,7 @@ def parse_namespace_and_spdx_id(graph: Graph) -> (str, str):
93102
)
94103
sys.exit(1)
95104

96-
namespace, spdx_id = urldefrag(subject)
105+
namespace, spdx_id = urldefrag(str(subject))
97106

98107
if not namespace:
99108
logging.error(

src/spdx/parser/rdf/extracted_licensing_info_parser.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@
66
from spdx.model.extracted_licensing_info import ExtractedLicensingInfo
77
from spdx.parser.logger import Logger
88
from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages
9-
from spdx.parser.rdf.graph_parsing_functions import parse_literal, parse_literal_or_no_assertion_or_none
9+
from spdx.parser.rdf.graph_parsing_functions import (
10+
get_correctly_typed_triples,
11+
parse_literal,
12+
parse_literal_or_no_assertion_or_none,
13+
)
1014
from spdx.rdfschema.namespace import SPDX_NAMESPACE
1115

1216

@@ -28,7 +32,9 @@ def parse_extracted_licensing_info(
2832
logger, graph, extracted_licensing_info_node, SPDX_NAMESPACE.name
2933
)
3034
cross_references = []
31-
for _, _, cross_reference_node in graph.triples((extracted_licensing_info_node, RDFS.seeAlso, None)):
35+
for _, _, cross_reference_node in get_correctly_typed_triples(
36+
logger, graph, extracted_licensing_info_node, RDFS.seeAlso
37+
):
3238
cross_references.append(cross_reference_node.toPython())
3339
raise_parsing_error_if_logger_has_messages(logger, "ExtractedLicensingInfo")
3440
extracted_licensing_info = construct_or_raise_parsing_error(

src/spdx/parser/rdf/file_parser.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from spdx.parser.rdf.checksum_parser import parse_checksum
1010
from spdx.parser.rdf.graph_parsing_functions import (
1111
apply_parsing_method_or_log_error,
12+
get_correctly_typed_triples,
1213
get_correctly_typed_value,
1314
parse_enum_value,
1415
parse_literal,
@@ -24,7 +25,7 @@ def parse_file(file_node: URIRef, graph: Graph, doc_namespace: str) -> File:
2425
spdx_id = parse_spdx_id(file_node, doc_namespace, graph)
2526
name = parse_literal(logger, graph, file_node, SPDX_NAMESPACE.fileName)
2627
checksums = []
27-
for _, _, checksum_node in graph.triples((file_node, SPDX_NAMESPACE.checksum, None)):
28+
for _, _, checksum_node in get_correctly_typed_triples(logger, graph, file_node, SPDX_NAMESPACE.checksum):
2829
checksums.append(parse_checksum(checksum_node, graph))
2930

3031
file_types = []
@@ -39,25 +40,31 @@ def parse_file(file_node: URIRef, graph: Graph, doc_namespace: str) -> File:
3940
graph,
4041
file_node,
4142
SPDX_NAMESPACE.licenseConcluded,
42-
parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace),
43+
parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger),
4344
)
4445
license_info_in_file = []
4546
for _, _, license_info_from_files_node in graph.triples((file_node, SPDX_NAMESPACE.licenseInfoInFile, None)):
4647
license_info_in_file.append(
4748
get_correctly_typed_value(
48-
logger, license_info_from_files_node, lambda x: parse_license_expression(x, graph, doc_namespace)
49+
logger,
50+
license_info_from_files_node,
51+
lambda x: parse_license_expression(x, graph, doc_namespace, logger),
4952
)
5053
)
5154
license_comment = parse_literal(logger, graph, file_node, SPDX_NAMESPACE.licenseComments)
5255
copyright_text = parse_literal_or_no_assertion_or_none(logger, graph, file_node, SPDX_NAMESPACE.copyrightText)
5356
file_contributors = []
54-
for _, _, file_contributor in graph.triples((file_node, SPDX_NAMESPACE.fileContributor, None)):
57+
for _, _, file_contributor in get_correctly_typed_triples(
58+
logger, graph, file_node, SPDX_NAMESPACE.fileContributor, None
59+
):
5560
file_contributors.append(file_contributor.toPython())
5661

5762
notice_text = parse_literal(logger, graph, file_node, SPDX_NAMESPACE.noticeText)
5863
comment = parse_literal(logger, graph, file_node, RDFS.comment)
5964
attribution_texts = []
60-
for _, _, attribution_text_literal in graph.triples((file_node, SPDX_NAMESPACE.attributionText, None)):
65+
for _, _, attribution_text_literal in get_correctly_typed_triples(
66+
logger, graph, file_node, SPDX_NAMESPACE.attributionText, None
67+
):
6168
attribution_texts.append(attribution_text_literal.toPython())
6269
raise_parsing_error_if_logger_has_messages(logger, "File")
6370
file = construct_or_raise_parsing_error(

src/spdx/parser/rdf/graph_parsing_functions.py

+46-3
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44
from enum import Enum
5-
from typing import Any, Callable, Optional, Type
5+
from typing import Any, Callable, Optional, Tuple, Type, Union
66

7-
from rdflib import Graph, URIRef
7+
from rdflib import RDF, Graph, URIRef
88
from rdflib.exceptions import UniquenessError
99
from rdflib.namespace import NamespaceManager
10-
from rdflib.term import Node
10+
from rdflib.term import BNode, Literal, Node
1111

1212
from spdx.casing_tools import camel_case_to_snake_case
1313
from spdx.model.spdx_no_assertion import SPDX_NO_ASSERTION_STRING, SpdxNoAssertion
@@ -102,3 +102,46 @@ def remove_prefix(string: str, prefix: str) -> str:
102102
if string.startswith(prefix):
103103
return string[len(prefix) :]
104104
return string
105+
106+
107+
def get_correctly_typed_triples(
108+
logger: Logger,
109+
graph: Graph,
110+
subject: Optional[Node] = None,
111+
predicate: Optional[Node] = None,
112+
_object: Optional[Node] = None,
113+
) -> Tuple[Union[BNode, URIRef], Node, Union[BNode, Literal, URIRef]]:
114+
# this is a helper method to cast some rdf types from graph.triples() to be compatible with the
115+
# code that follows
116+
for s, p, o in graph.triples((subject, predicate, _object)):
117+
if not isinstance(s, (BNode, URIRef)):
118+
logger.append(
119+
f"Warning: Subject {s} should be of type BNode or URIRef, but is {type(s).__name__}. "
120+
f"This might lead to a failure."
121+
)
122+
if not isinstance(o, (BNode, Literal, URIRef)):
123+
logger.append(
124+
f"Warning: Object {o} should be of type BNode, Literal or URIRef, but is {type(o).__name__}. "
125+
f"This might lead to a failure."
126+
)
127+
yield s, p, o
128+
129+
130+
def get_value_from_graph(
131+
logger: Logger,
132+
graph: Graph,
133+
subject: Optional[Node] = None,
134+
predicate: Optional[Node] = RDF.value,
135+
_object: Optional[Node] = None,
136+
default: Optional[Any] = None,
137+
_any: Optional[bool] = True,
138+
) -> Optional[Union[URIRef, Literal, BNode]]:
139+
# this is a helper method to cast some rdf types from graph.value() to be compatible with the
140+
# code that follows
141+
value = graph.value(subject=subject, predicate=predicate, object=_object, default=default, any=_any)
142+
if value and not isinstance(value, (URIRef, Literal, BNode)):
143+
logger.append(
144+
f"Warning: Node {value} should be of type BNode, Literal or URIRef, but is {type(value).__name__}. "
145+
f"This might lead to a failure."
146+
)
147+
return value
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,25 @@
11
# SPDX-FileCopyrightText: 2023 spdx contributors
22
#
33
# SPDX-License-Identifier: Apache-2.0
4-
from typing import Union
4+
from typing import Optional, Union
55

66
from license_expression import LicenseExpression, get_spdx_licensing
77
from rdflib import RDF, Graph
88
from rdflib.term import BNode, Identifier, Node, URIRef
99

10-
from spdx.parser.rdf.graph_parsing_functions import remove_prefix
10+
from spdx.parser.logger import Logger
11+
from spdx.parser.rdf.graph_parsing_functions import get_value_from_graph, remove_prefix
1112
from spdx.rdfschema.namespace import LICENSE_NAMESPACE, SPDX_NAMESPACE
1213

1314

1415
def parse_license_expression(
15-
license_expression_node: Union[URIRef, BNode, Node], graph: Graph, doc_namespace: str
16+
license_expression_node: Union[URIRef, BNode, Node],
17+
graph: Graph,
18+
doc_namespace: str,
19+
logger: Optional[Logger] = None,
1620
) -> LicenseExpression:
21+
if not logger:
22+
logger = Logger()
1723
spdx_licensing = get_spdx_licensing()
1824
expression = ""
1925
if license_expression_node.startswith(LICENSE_NAMESPACE):
@@ -27,28 +33,30 @@ def parse_license_expression(
2733
if node_type == SPDX_NAMESPACE.ConjunctiveLicenseSet:
2834
members = []
2935
for _, _, member_node in graph.triples((license_expression_node, SPDX_NAMESPACE.member, None)):
30-
members.append(parse_license_expression(member_node, graph, doc_namespace))
36+
members.append(parse_license_expression(member_node, graph, doc_namespace, logger))
3137
expression = " AND ".join([str(member) for member in members])
3238
if node_type == SPDX_NAMESPACE.DisjunctiveLicenseSet:
3339
members = []
3440
for _, _, member_node in graph.triples((license_expression_node, SPDX_NAMESPACE.member, None)):
35-
members.append(parse_license_expression(member_node, graph, doc_namespace))
41+
members.append(parse_license_expression(member_node, graph, doc_namespace, logger))
3642
expression = " OR ".join([str(member) for member in members])
3743
if node_type == SPDX_NAMESPACE.WithExceptionOperator:
3844
license_expression = parse_license_expression(
39-
graph.value(license_expression_node, SPDX_NAMESPACE.member), graph, doc_namespace
45+
graph.value(license_expression_node, SPDX_NAMESPACE.member), graph, doc_namespace, logger
4046
)
4147
exception = parse_license_exception(
42-
graph.value(license_expression_node, SPDX_NAMESPACE.licenseException), graph
48+
get_value_from_graph(logger, graph, license_expression_node, SPDX_NAMESPACE.licenseException),
49+
graph,
50+
logger,
4351
)
4452
expression = f"{license_expression} WITH {exception}"
4553

4654
return spdx_licensing.parse(expression)
4755

4856

49-
def parse_license_exception(exception_node: Identifier, graph: Graph) -> str:
57+
def parse_license_exception(exception_node: Identifier, graph: Graph, logger) -> str:
5058
if exception_node.startswith(LICENSE_NAMESPACE):
5159
exception = remove_prefix(exception_node, LICENSE_NAMESPACE)
5260
else:
53-
exception = graph.value(exception_node, SPDX_NAMESPACE.licenseExceptionId).toPython()
61+
exception = get_value_from_graph(logger, graph, exception_node, SPDX_NAMESPACE.licenseExceptionId).toPython()
5462
return exception

src/spdx/parser/rdf/package_parser.py

+16-7
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from typing import Optional
55

66
from rdflib import DOAP, RDFS, Graph, URIRef
7+
from rdflib.term import BNode
78

89
from spdx.datetime_conversions import datetime_from_str
910
from spdx.model.package import (
@@ -18,7 +19,9 @@
1819
from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages
1920
from spdx.parser.rdf.checksum_parser import parse_checksum
2021
from spdx.parser.rdf.graph_parsing_functions import (
22+
get_correctly_typed_triples,
2123
get_correctly_typed_value,
24+
get_value_from_graph,
2225
parse_enum_value,
2326
parse_literal,
2427
parse_literal_or_no_assertion_or_none,
@@ -36,7 +39,7 @@ def parse_package(package_node: URIRef, graph: Graph, doc_namespace: str) -> Pac
3639
logger, graph, package_node, SPDX_NAMESPACE.downloadLocation
3740
)
3841
checksums = []
39-
for _, _, checksum_node in graph.triples((package_node, SPDX_NAMESPACE.checksum, None)):
42+
for _, _, checksum_node in get_correctly_typed_triples(logger, graph, package_node, SPDX_NAMESPACE.checksum):
4043
checksums.append(parse_checksum(checksum_node, graph))
4144

4245
version_info = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.versionInfo)
@@ -57,28 +60,34 @@ def parse_package(package_node: URIRef, graph: Graph, doc_namespace: str) -> Pac
5760
)
5861

5962
external_package_refs = []
60-
for _, _, external_package_ref_node in graph.triples((package_node, SPDX_NAMESPACE.externalRef, None)):
63+
for _, _, external_package_ref_node in get_correctly_typed_triples(
64+
logger, graph, package_node, SPDX_NAMESPACE.externalRef
65+
):
6166
external_package_refs.append(parse_external_package_ref(external_package_ref_node, graph, doc_namespace))
62-
files_analyzed = bool(graph.value(package_node, SPDX_NAMESPACE.filesAnalyzed, default=True))
67+
files_analyzed = bool(
68+
get_value_from_graph(logger, graph, package_node, SPDX_NAMESPACE.filesAnalyzed, default=True)
69+
)
6370
license_concluded = parse_literal_or_no_assertion_or_none(
6471
logger,
6572
graph,
6673
package_node,
6774
SPDX_NAMESPACE.licenseConcluded,
68-
parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace),
75+
parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger),
6976
)
7077
license_declared = parse_literal_or_no_assertion_or_none(
7178
logger,
7279
graph,
7380
package_node,
7481
SPDX_NAMESPACE.licenseDeclared,
75-
parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace),
82+
parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger),
7683
)
7784
license_info_from_files = []
7885
for _, _, license_info_from_files_node in graph.triples((package_node, SPDX_NAMESPACE.licenseInfoFromFiles, None)):
7986
license_info_from_files.append(
8087
get_correctly_typed_value(
81-
logger, license_info_from_files_node, lambda x: parse_license_expression(x, graph, doc_namespace)
88+
logger,
89+
license_info_from_files_node,
90+
lambda x: parse_license_expression(x, graph, doc_namespace, logger),
8291
)
8392
)
8493
license_comment = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.licenseComments)
@@ -161,7 +170,7 @@ def parse_package_verification_code(
161170
return package_verification_code
162171

163172

164-
def parse_external_package_ref(external_package_ref_node: URIRef, graph: Graph, doc_namespace) -> ExternalPackageRef:
173+
def parse_external_package_ref(external_package_ref_node: BNode, graph: Graph, doc_namespace) -> ExternalPackageRef:
165174
logger = Logger()
166175
ref_locator = parse_literal(logger, graph, external_package_ref_node, SPDX_NAMESPACE.referenceLocator)
167176
ref_category = parse_literal(

src/spdx/parser/rdf/rdf_parser.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from spdx.parser.rdf.creation_info_parser import parse_creation_info
1515
from spdx.parser.rdf.extracted_licensing_info_parser import parse_extracted_licensing_info
1616
from spdx.parser.rdf.file_parser import parse_file
17+
from spdx.parser.rdf.graph_parsing_functions import get_correctly_typed_triples
1718
from spdx.parser.rdf.package_parser import parse_package
1819
from spdx.parser.rdf.relationship_parser import parse_implicit_relationship, parse_relationship
1920
from spdx.parser.rdf.snippet_parser import parse_snippet
@@ -46,7 +47,7 @@ def translate_graph_to_document(graph: Graph) -> Document:
4647
("snippets", (None, RDF.type, SPDX_NAMESPACE.Snippet), parse_snippet),
4748
]:
4849
elements = []
49-
for element_node, _, _ in graph.triples(triple):
50+
for element_node, _, _ in get_correctly_typed_triples(logger, graph, *triple):
5051
try:
5152
elements.append(parsing_method(element_node, graph, creation_info.document_namespace))
5253
except SPDXParsingError as err:
@@ -69,7 +70,7 @@ def translate_graph_to_document(graph: Graph) -> Document:
6970
((None, SPDX_NAMESPACE.hasFile, None), RelationshipType.CONTAINS),
7071
((None, SPDX_NAMESPACE.describesPackage, None), RelationshipType.DESCRIBES),
7172
]:
72-
for parent_node, _, element_node in graph.triples(triple):
73+
for parent_node, _, element_node in get_correctly_typed_triples(logger, graph, *triple):
7374
try:
7475
relationship = parse_implicit_relationship(
7576
parent_node, relationship_type, element_node, graph, creation_info.document_namespace
@@ -81,7 +82,9 @@ def translate_graph_to_document(graph: Graph) -> Document:
8182
logger.extend(err.get_messages())
8283

8384
extracted_licensing_infos = []
84-
for _, _, extracted_licensing_info_node in graph.triples((None, SPDX_NAMESPACE.hasExtractedLicensingInfo, None)):
85+
for _, _, extracted_licensing_info_node in get_correctly_typed_triples(
86+
logger, graph, None, SPDX_NAMESPACE.hasExtractedLicensingInfo
87+
):
8588
try:
8689
extracted_licensing_infos.append(
8790
parse_extracted_licensing_info(extracted_licensing_info_node, graph, creation_info.document_namespace)

0 commit comments

Comments
 (0)