diff --git a/hsmodels/__init__.py b/hsmodels/__init__.py index e69de29..c385230 100644 --- a/hsmodels/__init__.py +++ b/hsmodels/__init__.py @@ -0,0 +1,7 @@ +from rdflib.serializer import Serializer +from rdflib.plugin import register + + +register( + 'json-ld-pretty', Serializer, + 'hsmodels.serializers', 'PrettyJsonLDSerializer') \ No newline at end of file diff --git a/hsmodels/schemas/__init__.py b/hsmodels/schemas/__init__.py index f1b7c05..2438fa8 100644 --- a/hsmodels/schemas/__init__.py +++ b/hsmodels/schemas/__init__.py @@ -4,7 +4,8 @@ from pydantic import AnyUrl, BaseModel from rdflib import Graph, Literal, URIRef -from hsmodels.namespaces import DC, HSTERMS, ORE, RDF, RDFS1 +from hsmodels.namespaces import DC, HSTERMS, ORE, RDF, RDFS1, HSRESOURCE, DCTERMS, SCHEMA, XML, RDFS, CITOTERMS, XSD, \ + SH, FOAF, DASH, HSUSER from hsmodels.schemas.aggregations import ( FileSetMetadata, GeographicFeatureMetadata, @@ -82,14 +83,31 @@ def parse_file(schema, file, file_format='xml', subject=None): def rdf_graph(schema): + g = Graph() + g.bind('hsresource', HSRESOURCE) + g.bind('dcterms', DCTERMS) + g.bind('rdfs1', RDFS1) + g.bind('schema', SCHEMA) + g.bind('hsterms', HSTERMS) + g.bind('xml', XML) + g.bind('rdfs', RDFS) + g.bind('dc', DC) + g.bind('citoterms', CITOTERMS) + g.bind('xsd', XSD) + g.bind('sh', SH) + g.bind('rdf', RDF) + g.bind('foaf', FOAF) + g.bind('dash', DASH) + g.bind('ORE', ORE) + g.bind('hsuser', HSUSER) for rdf_schema, user_schema in user_schemas.items(): if isinstance(schema, user_schema): - return _rdf_graph(rdf_schema(**schema.dict(to_rdf=True)), Graph()) - return _rdf_graph(schema, Graph()) + return _rdf_graph(rdf_schema(**schema.dict(to_rdf=True)), g) + return _rdf_graph(schema, g) def rdf_string(schema, rdf_format='pretty-xml'): - return rdf_graph(schema).serialize(format=rdf_format).decode() + return rdf_graph(schema).serialize(format=rdf_format, auto_compact=True) def _rdf_fields(schema): diff --git a/hsmodels/serializers.py b/hsmodels/serializers.py new file mode 100644 index 0000000..e4f7de0 --- /dev/null +++ b/hsmodels/serializers.py @@ -0,0 +1,110 @@ +import warnings +from typing import IO, Optional + +from rdflib.graph import Graph +from rdflib.namespace import XSD +from rdflib.plugins.shared.jsonld.util import json +from rdflib.plugins.serializers.jsonld import JsonLDSerializer, from_rdf + +__all__ = ["PrettyJsonLDSerializer", "from_rdf"] + + +PLAIN_LITERAL_TYPES = {XSD.boolean, XSD.integer, XSD.double, XSD.string} + + +class PrettyJsonLDSerializer(JsonLDSerializer): + def __init__(self, store: Graph): + super(PrettyJsonLDSerializer, self).__init__(store) + + def serialize( + self, + stream: IO[bytes], + base: Optional[str] = None, + encoding: Optional[str] = None, + **kwargs, + ): + # TODO: docstring w. args and return value + encoding = encoding or "utf-8" + if encoding not in ("utf-8", "utf-16"): + warnings.warn( + "JSON should be encoded as unicode. " f"Given encoding was: {encoding}" + ) + + context_data = kwargs.get("context") + use_native_types = (kwargs.get("use_native_types", False),) + use_rdf_type = kwargs.get("use_rdf_type", False) + auto_compact = kwargs.get("auto_compact", False) + + indent = kwargs.get("indent", 2) + separators = kwargs.get("separators", (",", ": ")) + sort_keys = kwargs.get("sort_keys", True) + ensure_ascii = kwargs.get("ensure_ascii", False) + + obj = from_rdf( + self.store, + context_data, + base, + use_native_types, + use_rdf_type, + auto_compact=auto_compact, + ) + + '''Here is where the compaction takes place!''' + distribute_nodes(obj) + + data = json.dumps( + obj, + indent=indent, + separators=separators, + sort_keys=sort_keys, + ensure_ascii=ensure_ascii, + ) + + stream.write(data.encode(encoding, "replace")) + + +def distribute_nodes(jld): + # group nodes to be distributed into roots + # nodes are identified by a dictionary with {'@id': "_:N..."} + nodes_by_id = {d.pop('@id'): d for d in jld['@graph'] if d['@id'].startswith("_:N")} + roots = [d for d in jld['@graph'] if '@id' in d and not d['@id'].startswith("_:N")] + + # code for walking dictionaries and lists to replace node identifiers with the nodes + def is_node_id(d) -> bool: + if isinstance(d, dict): + if "@id" in d and d["@id"].startswith("_:N"): + return True + return False + + def get_node(d: dict): + return nodes_by_id[d["@id"]] + + def parse_list(l: list): + nodes = [] + for item in l: + if is_node_id(item): + nodes.append((item, get_node(item))) + if isinstance(item, list): + parse_list(item) + if isinstance(item, dict): + parse_dict(item) + for node in nodes: + l.remove(node[0]) + l.append(node[1]) + + def parse_dict(d: dict): + nodes = [] + for key, value in d.items(): + if is_node_id(value): + nodes.append((key, get_node(value))) + if isinstance(value, list): + parse_list(value) + if isinstance(value, dict): + parse_dict(value) + for node in nodes: + d[node[0]] = node[1] + # run the node replacements for each root + for d in roots: + parse_dict(d) + + diff --git a/requirements.txt b/requirements.txt index ec1d916..b88db18 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -rdflib<6.0.0 +rdflib>=6.0.0 pydantic>=1.8.1,<2.0 email-validator jsonschema2md diff --git a/setup.py b/setup.py index 92807da..f72f7eb 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ packages=find_packages(include=['hsmodels', 'hsmodels.*', 'hsmodels.schemas.*', 'hsmodels.schemas.rdf.*'], exclude=("tests",)), install_requires=[ - 'rdflib<6.0.0', + 'rdflib>=6.0.0', 'pydantic>=1.8.1,<2.0', 'email-validator' ],