Skip to content

Commit b80f88d

Browse files
committed
Prepare adding shacl import
1 parent 7405c9c commit b80f88d

File tree

7 files changed

+310
-2
lines changed

7 files changed

+310
-2
lines changed

docs/introduction.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,11 @@ Importing from alternative modeling frameworks
2929
See :ref:`importers`
3030

3131
* OWL (but this only works for schema-style OWL)
32+
* SHACL (in progress)
3233
* JSON-Schema
3334
* SQL DDL
3435

35-
In future other frameworks will be supported
36+
In future other frameworks will be supported.
3637

3738
Annotating schemas
3839
---------------------------------

docs/packages/importers.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,16 @@ Use robot to convert ahead of time:
4444
robot convert -i schemaorg.ttl -o schemaorg.ofn
4545
schemauto import-owl schemaorg.ofn
4646
47+
Importing from SHACL
48+
--------------------
49+
50+
You can import from a SHACL shapes file.
51+
52+
.. code-block::
53+
54+
schemauto import-shacl tests/resources/test_shacl_simple.ttl
55+
56+
4757
Importing from SQL
4858
---------
4959

schema_automator/cli.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,34 @@ def import_rdfs(rdfsfile, output, metamodel_mappings, **args):
495495
schema = sie.convert(rdfsfile, **args)
496496
write_schema(schema, output)
497497

498+
@main.command()
499+
@click.argument('shaclfile')
500+
@output_option
501+
@schema_name_option
502+
@click.option('--input-type', '-I',
503+
default='turtle',
504+
help="Input format, eg. turtle")
505+
@click.option('--identifier', '-I', help="Slot to use as identifier")
506+
@click.option('--model-uri', help="Model URI prefix")
507+
@click.option('--metamodel-mappings',
508+
help="Path to metamodel mappings YAML dictionary")
509+
@click.option('--output', '-o', help="Path to saved yaml schema")
510+
def import_shacl(shaclfile, output, metamodel_mappings, **args):
511+
"""
512+
Import an SHACL profile to LinkML
513+
514+
Example:
515+
516+
schemauto import-shacl mymodel.shacl.ttl -o mymodel.yaml
517+
"""
518+
mappings_obj = None
519+
if metamodel_mappings:
520+
with open(metamodel_mappings) as f:
521+
mappings_obj = yaml.safe_load(f)
522+
sie = ShaclImportEngine(initial_metamodel_mappings=mappings_obj)
523+
schema = sie.convert(shaclfile, **args)
524+
write_schema(schema, output)
525+
498526
@main.command()
499527
@click.argument('rdffile')
500528
@output_option

schema_automator/importers/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
from schema_automator.importers.owl_import_engine import OwlImportEngine
33
from schema_automator.importers.dosdp_import_engine import DOSDPImportEngine
44
from schema_automator.importers.frictionless_import_engine import FrictionlessImportEngine
5-
5+
from schema_automator.importers.shacl_import_engine import ShaclImportEngine
Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
import logging
2+
3+
from linkml.utils.schema_builder import SchemaBuilder
4+
from linkml_runtime import SchemaView
5+
from linkml_runtime.linkml_model import (
6+
SchemaDefinition,
7+
SlotDefinition,
8+
ClassDefinition,
9+
)
10+
11+
12+
HTTP_SDO = Namespace("http://schema.org/")
13+
14+
DEFAULT_METAMODEL_MAPPINGS = {
15+
"is_a": [RDFS.subClassOf, SKOS.broader],
16+
"domain_of": [HTTP_SDO.domainIncludes, SDO.domainIncludes],
17+
"rangeIncludes": [HTTP_SDO.rangeIncludes, SDO.rangeIncludes],
18+
"exact_mappings": [OWL.sameAs, HTTP_SDO.sameAs],
19+
ClassDefinition.__name__: [RDFS.Class, OWL.Class, SKOS.Concept],
20+
SlotDefinition.__name__: [
21+
RDF.Property,
22+
OWL.ObjectProperty,
23+
OWL.DatatypeProperty,
24+
OWL.AnnotationProperty,
25+
],
26+
}
27+
28+
29+
@dataclass
30+
class ShaclImportEngine(ImportEngine):
31+
"""
32+
An ImportEngine that takes SHACL and converts it to a LinkML schema
33+
"""
34+
35+
mappings: dict = None
36+
initial_metamodel_mappings: Dict[str, List[URIRef]] = None
37+
metamodel_mappings: Dict[str, List[URIRef]] = None
38+
reverse_metamodel_mappings: Dict[URIRef, List[str]] = None
39+
include_unmapped_annotations = False
40+
metamodel = None
41+
metamodel_schemaview: SchemaView = None
42+
classdef_slots: List[str] = None
43+
44+
def __post_init__(self):
45+
sv = package_schemaview("linkml_runtime.linkml_model.meta")
46+
self.metamodel_schemaview = sv
47+
self.metamodel = sv
48+
self.metamodel_mappings = defaultdict(list)
49+
self.reverse_metamodel_mappings = defaultdict(list)
50+
for k, vs in DEFAULT_METAMODEL_MAPPINGS.items():
51+
self.metamodel_mappings[k].extend(vs)
52+
for v in vs:
53+
self.reverse_metamodel_mappings[v].append(k)
54+
if self.initial_metamodel_mappings:
55+
for k, vs in self.initial_metamodel_mappings.items():
56+
if not isinstance(vs, list):
57+
vs = [vs]
58+
self.metamodel_mappings[k].extend(vs)
59+
for v in vs:
60+
self.reverse_metamodel_mappings[URIRef(v)].append(k)
61+
logging.info(f"Adding mapping {k} -> {v}")
62+
for e in sv.all_elements().values():
63+
mappings = []
64+
for ms in sv.get_mappings(e.name, expand=True).values():
65+
for m in ms:
66+
uri = URIRef(m)
67+
mappings.append(uri)
68+
self.reverse_metamodel_mappings[uri].append(e.name)
69+
self.metamodel_mappings[e.name] = mappings
70+
self.defclass_slots = [s.name for s in sv.class_induced_slots(ClassDefinition.class_name)]
71+
72+
def convert(
73+
self,
74+
file: str,
75+
name: str = None,
76+
format="turtle",
77+
default_prefix: str = None,
78+
model_uri: str = None,
79+
identifier: str = None,
80+
**kwargs,
81+
) -> SchemaDefinition:
82+
"""
83+
Converts an OWL schema-style ontology
84+
85+
:param file:
86+
:param name:
87+
:param model_uri:
88+
:param identifier:
89+
:param kwargs:
90+
:return:
91+
"""
92+
self.mappings = {}
93+
g = Graph()
94+
g.parse(file, format=format)
95+
if name is not None and default_prefix is None:
96+
default_prefix = name
97+
if name is None:
98+
name = default_prefix
99+
if name is None:
100+
name = "example"
101+
sb = SchemaBuilder(name=name)
102+
sb.add_defaults()
103+
schema = sb.schema
104+
for k, v in g.namespaces():
105+
if k == "schema" and v != "http://schema.org/":
106+
continue
107+
sb.add_prefix(k, v, replace_if_present=True)
108+
if default_prefix is not None:
109+
schema.default_prefix = default_prefix
110+
if default_prefix not in schema.prefixes:
111+
sb.add_prefix(default_prefix, model_uri, replace_if_present=True)
112+
schema.id = schema.prefixes[default_prefix].prefix_reference
113+
cls_slots = defaultdict(list)
114+
props = []
115+
for rdfs_property_metaclass in self._rdfs_metamodel_iri(
116+
SlotDefinition.__name__
117+
):
118+
for p in g.subjects(RDF.type, rdfs_property_metaclass):
119+
props.append(p)
120+
# implicit properties
121+
for metap in (
122+
self.reverse_metamodel_mappings["domain_of"]
123+
+ self.reverse_metamodel_mappings["rangeIncludes"]
124+
):
125+
for p, _, _o in g.triples((None, metap, None)):
126+
props.append(p)
127+
for p in set(props):
128+
sn = self.iri_to_name(p)
129+
init_dict = self._dict_for_subject(g, p)
130+
if "domain_of" in init_dict:
131+
for x in init_dict["domain_of"]:
132+
cls_slots[x].append(sn)
133+
del init_dict["domain_of"]
134+
if "rangeIncludes" in init_dict:
135+
init_dict["any_of"] = [{"range": x} for x in init_dict["rangeIncludes"]]
136+
del init_dict["rangeIncludes"]
137+
slot = SlotDefinition(sn, **init_dict)
138+
slot.slot_uri = str(p.n3(g.namespace_manager))
139+
sb.add_slot(slot)
140+
rdfs_classes = []
141+
for rdfs_class_metaclass in self._rdfs_metamodel_iri(ClassDefinition.__name__):
142+
for s in g.subjects(RDF.type, rdfs_class_metaclass):
143+
rdfs_classes.append(s)
144+
# implicit classes
145+
for metap in [RDFS.subClassOf]:
146+
for s, _, o in g.triples((None, metap, None)):
147+
rdfs_classes.append(s)
148+
rdfs_classes.append(o)
149+
for s in set(rdfs_classes):
150+
cn = self.iri_to_name(s)
151+
init_dict = self._dict_for_subject(g, s)
152+
c = ClassDefinition(cn, **init_dict)
153+
c.slots = cls_slots.get(cn, [])
154+
c.class_uri = str(s.n3(g.namespace_manager))
155+
sb.add_class(c)
156+
if identifier is not None:
157+
id_slot = SlotDefinition(identifier, identifier=True, range="uriorcurie")
158+
schema.slots[identifier] = id_slot
159+
for c in schema.classes.values():
160+
if not c.is_a and not c.mixins:
161+
if identifier not in c.slots:
162+
c.slots.append(identifier)
163+
return schema
164+
165+
def _dict_for_subject(self, g: Graph, s: URIRef) -> Dict[str, Any]:
166+
"""
167+
Looks up triples for a subject and converts to dict using linkml keys.
168+
169+
:param g:
170+
:param p:
171+
:return:
172+
"""
173+
init_dict = {}
174+
for pp, obj in g.predicate_objects(s):
175+
if pp == RDF.type:
176+
continue
177+
metaslot_name = self._element_from_iri(pp)
178+
logging.debug(f"Mapping {pp} -> {metaslot_name}")
179+
if metaslot_name not in self.defclass_slots:
180+
continue
181+
if metaslot_name is None:
182+
logging.warning(f"Not mapping {pp}")
183+
continue
184+
if metaslot_name == "name":
185+
metaslot_name = "title"
186+
metaslot = self.metamodel.get_slot(metaslot_name)
187+
v = self._object_to_value(obj, metaslot=metaslot)
188+
metaslot_name_safe = underscore(metaslot_name)
189+
if not metaslot or metaslot.multivalued:
190+
if metaslot_name_safe not in init_dict:
191+
init_dict[metaslot_name_safe] = []
192+
init_dict[metaslot_name_safe].append(v)
193+
else:
194+
init_dict[metaslot_name_safe] = v
195+
return init_dict
196+
197+
def _rdfs_metamodel_iri(self, name: str) -> List[URIRef]:
198+
return self.metamodel_mappings.get(name, [])
199+
200+
def _element_from_iri(self, iri: URIRef) -> str:
201+
r = self.reverse_metamodel_mappings.get(iri, [])
202+
if len(r) > 0:
203+
if len(r) > 1:
204+
logging.debug(f"Multiple mappings for {iri}: {r}")
205+
return r[0]
206+
207+
def _object_to_value(self, obj: Any, metaslot: SlotDefinition = None) -> Any:
208+
if isinstance(obj, URIRef):
209+
if metaslot.range == "uriorcurie" or metaslot.range == "uri":
210+
return str(obj)
211+
return self.iri_to_name(obj)
212+
if isinstance(obj, Literal):
213+
return obj.value
214+
return obj
215+
216+
def iri_to_name(self, v: URIRef) -> str:
217+
n = self._as_name(v)
218+
if n != v:
219+
self.mappings[n] = v
220+
return n
221+
222+
def _as_name(self, v: URIRef):
223+
v = str(v)
224+
for sep in ["#", "/", ":"]:
225+
if sep in v:
226+
return v.split(sep)[-1]
227+
return v

tests/resources/test_shacl_simple.ttl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# tbw
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import os
2+
import pytest
3+
4+
from linkml_runtime import SchemaView
5+
6+
from schema_automator.importers.shacl_import_engine import ShaclImportEngine
7+
from linkml.generators.yamlgen import YAMLGenerator
8+
9+
from schema_automator.utils.schemautils import write_schema
10+
from tests import INPUT_DIR, OUTPUT_DIR
11+
12+
# TODO - Write tests (this is a copy of test_rdfs_importer)
13+
14+
REPRO = os.path.join(INPUT_DIR, 'reproschema.ttl')
15+
OUTSCHEMA = os.path.join(OUTPUT_DIR, 'reproschema-from-ttl.yaml')
16+
17+
18+
19+
def test_from_shacl():
20+
"""Test Shacl conversion."""
21+
oie = ShaclImportEngine()
22+
23+
return
24+
schema = oie.convert(REPRO, default_prefix='reproschema', identifier='id')
25+
write_schema(schema, OUTSCHEMA)
26+
# roundtrip
27+
s = YAMLGenerator(OUTSCHEMA).serialize()
28+
print(s[0:100])
29+
sv = SchemaView(OUTSCHEMA)
30+
activity = sv.get_class("Activity")
31+
assert activity
32+
assert activity.name == "Activity"
33+
assert activity.is_a == "CreativeWork"
34+
slots = sv.class_induced_slots(activity.name)
35+
assert len(slots) == 1
36+
slot = slots[0]
37+
assert slot.name == "id"
38+
39+
40+
41+

0 commit comments

Comments
 (0)