Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Json ld serializer #39

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions hsmodels/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from rdflib.serializer import Serializer
from rdflib.plugin import register


register(
'json-ld-pretty', Serializer,
'hsmodels.serializers', 'PrettyJsonLDSerializer')
26 changes: 22 additions & 4 deletions hsmodels/schemas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from pydantic import AnyUrl, BaseModel
from rdflib import Graph, Literal, URIRef

from hsmodels.namespaces import DC, HSTERMS, ORE, RDF, RDFS1
from hsmodels.namespaces import DC, HSTERMS, ORE, RDF, RDFS1, HSRESOURCE, DCTERMS, SCHEMA, XML, RDFS, CITOTERMS, XSD, \
SH, FOAF, DASH, HSUSER
from hsmodels.schemas.aggregations import (
FileSetMetadata,
GeographicFeatureMetadata,
Expand Down Expand Up @@ -82,14 +83,31 @@ def parse_file(schema, file, file_format='xml', subject=None):


def rdf_graph(schema):
g = Graph()
g.bind('hsresource', HSRESOURCE)
g.bind('dcterms', DCTERMS)
g.bind('rdfs1', RDFS1)
g.bind('schema', SCHEMA)
g.bind('hsterms', HSTERMS)
g.bind('xml', XML)
g.bind('rdfs', RDFS)
g.bind('dc', DC)
g.bind('citoterms', CITOTERMS)
g.bind('xsd', XSD)
g.bind('sh', SH)
g.bind('rdf', RDF)
g.bind('foaf', FOAF)
g.bind('dash', DASH)
g.bind('ORE', ORE)
g.bind('hsuser', HSUSER)
for rdf_schema, user_schema in user_schemas.items():
if isinstance(schema, user_schema):
return _rdf_graph(rdf_schema(**schema.dict(to_rdf=True)), Graph())
return _rdf_graph(schema, Graph())
return _rdf_graph(rdf_schema(**schema.dict(to_rdf=True)), g)
return _rdf_graph(schema, g)


def rdf_string(schema, rdf_format='pretty-xml'):
return rdf_graph(schema).serialize(format=rdf_format).decode()
return rdf_graph(schema).serialize(format=rdf_format, auto_compact=True)


def _rdf_fields(schema):
Expand Down
110 changes: 110 additions & 0 deletions hsmodels/serializers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import warnings
from typing import IO, Optional

from rdflib.graph import Graph
from rdflib.namespace import XSD
from rdflib.plugins.shared.jsonld.util import json
from rdflib.plugins.serializers.jsonld import JsonLDSerializer, from_rdf

__all__ = ["PrettyJsonLDSerializer", "from_rdf"]


PLAIN_LITERAL_TYPES = {XSD.boolean, XSD.integer, XSD.double, XSD.string}


class PrettyJsonLDSerializer(JsonLDSerializer):
def __init__(self, store: Graph):
super(PrettyJsonLDSerializer, self).__init__(store)

def serialize(
self,
stream: IO[bytes],
base: Optional[str] = None,
encoding: Optional[str] = None,
**kwargs,
):
# TODO: docstring w. args and return value
encoding = encoding or "utf-8"
if encoding not in ("utf-8", "utf-16"):
warnings.warn(
"JSON should be encoded as unicode. " f"Given encoding was: {encoding}"
)

context_data = kwargs.get("context")
use_native_types = (kwargs.get("use_native_types", False),)
use_rdf_type = kwargs.get("use_rdf_type", False)
auto_compact = kwargs.get("auto_compact", False)

indent = kwargs.get("indent", 2)
separators = kwargs.get("separators", (",", ": "))
sort_keys = kwargs.get("sort_keys", True)
ensure_ascii = kwargs.get("ensure_ascii", False)

obj = from_rdf(
self.store,
context_data,
base,
use_native_types,
use_rdf_type,
auto_compact=auto_compact,
)

'''Here is where the compaction takes place!'''
distribute_nodes(obj)

data = json.dumps(
obj,
indent=indent,
separators=separators,
sort_keys=sort_keys,
ensure_ascii=ensure_ascii,
)

stream.write(data.encode(encoding, "replace"))


def distribute_nodes(jld):
# group nodes to be distributed into roots
# nodes are identified by a dictionary with {'@id': "_:N..."}
nodes_by_id = {d.pop('@id'): d for d in jld['@graph'] if d['@id'].startswith("_:N")}
roots = [d for d in jld['@graph'] if '@id' in d and not d['@id'].startswith("_:N")]

# code for walking dictionaries and lists to replace node identifiers with the nodes
def is_node_id(d) -> bool:
if isinstance(d, dict):
if "@id" in d and d["@id"].startswith("_:N"):
return True
return False

def get_node(d: dict):
return nodes_by_id[d["@id"]]

def parse_list(l: list):
nodes = []
for item in l:
if is_node_id(item):
nodes.append((item, get_node(item)))
if isinstance(item, list):
parse_list(item)
if isinstance(item, dict):
parse_dict(item)
for node in nodes:
l.remove(node[0])
l.append(node[1])

def parse_dict(d: dict):
nodes = []
for key, value in d.items():
if is_node_id(value):
nodes.append((key, get_node(value)))
if isinstance(value, list):
parse_list(value)
if isinstance(value, dict):
parse_dict(value)
for node in nodes:
d[node[0]] = node[1]
# run the node replacements for each root
for d in roots:
parse_dict(d)


2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
rdflib<6.0.0
rdflib>=6.0.0
pydantic>=1.8.1,<2.0
email-validator
jsonschema2md
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
packages=find_packages(include=['hsmodels', 'hsmodels.*', 'hsmodels.schemas.*', 'hsmodels.schemas.rdf.*'],
exclude=("tests",)),
install_requires=[
'rdflib<6.0.0',
'rdflib>=6.0.0',
'pydantic>=1.8.1,<2.0',
'email-validator'
],
Expand Down