|
| 1 | +import logging |
| 2 | + |
| 3 | +from linkml.utils.schema_builder import SchemaBuilder |
| 4 | +from linkml_runtime import SchemaView |
| 5 | +from linkml_runtime.linkml_model import ( |
| 6 | + SchemaDefinition, |
| 7 | + SlotDefinition, |
| 8 | + ClassDefinition, |
| 9 | +) |
| 10 | + |
| 11 | + |
| 12 | +HTTP_SDO = Namespace("http://schema.org/") |
| 13 | + |
| 14 | +DEFAULT_METAMODEL_MAPPINGS = { |
| 15 | + "is_a": [RDFS.subClassOf, SKOS.broader], |
| 16 | + "domain_of": [HTTP_SDO.domainIncludes, SDO.domainIncludes], |
| 17 | + "rangeIncludes": [HTTP_SDO.rangeIncludes, SDO.rangeIncludes], |
| 18 | + "exact_mappings": [OWL.sameAs, HTTP_SDO.sameAs], |
| 19 | + ClassDefinition.__name__: [RDFS.Class, OWL.Class, SKOS.Concept], |
| 20 | + SlotDefinition.__name__: [ |
| 21 | + RDF.Property, |
| 22 | + OWL.ObjectProperty, |
| 23 | + OWL.DatatypeProperty, |
| 24 | + OWL.AnnotationProperty, |
| 25 | + ], |
| 26 | +} |
| 27 | + |
| 28 | + |
| 29 | +@dataclass |
| 30 | +class ShaclImportEngine(ImportEngine): |
| 31 | + """ |
| 32 | + An ImportEngine that takes SHACL and converts it to a LinkML schema |
| 33 | + """ |
| 34 | + |
| 35 | + mappings: dict = None |
| 36 | + initial_metamodel_mappings: Dict[str, List[URIRef]] = None |
| 37 | + metamodel_mappings: Dict[str, List[URIRef]] = None |
| 38 | + reverse_metamodel_mappings: Dict[URIRef, List[str]] = None |
| 39 | + include_unmapped_annotations = False |
| 40 | + metamodel = None |
| 41 | + metamodel_schemaview: SchemaView = None |
| 42 | + classdef_slots: List[str] = None |
| 43 | + |
| 44 | + def __post_init__(self): |
| 45 | + sv = package_schemaview("linkml_runtime.linkml_model.meta") |
| 46 | + self.metamodel_schemaview = sv |
| 47 | + self.metamodel = sv |
| 48 | + self.metamodel_mappings = defaultdict(list) |
| 49 | + self.reverse_metamodel_mappings = defaultdict(list) |
| 50 | + for k, vs in DEFAULT_METAMODEL_MAPPINGS.items(): |
| 51 | + self.metamodel_mappings[k].extend(vs) |
| 52 | + for v in vs: |
| 53 | + self.reverse_metamodel_mappings[v].append(k) |
| 54 | + if self.initial_metamodel_mappings: |
| 55 | + for k, vs in self.initial_metamodel_mappings.items(): |
| 56 | + if not isinstance(vs, list): |
| 57 | + vs = [vs] |
| 58 | + self.metamodel_mappings[k].extend(vs) |
| 59 | + for v in vs: |
| 60 | + self.reverse_metamodel_mappings[URIRef(v)].append(k) |
| 61 | + logging.info(f"Adding mapping {k} -> {v}") |
| 62 | + for e in sv.all_elements().values(): |
| 63 | + mappings = [] |
| 64 | + for ms in sv.get_mappings(e.name, expand=True).values(): |
| 65 | + for m in ms: |
| 66 | + uri = URIRef(m) |
| 67 | + mappings.append(uri) |
| 68 | + self.reverse_metamodel_mappings[uri].append(e.name) |
| 69 | + self.metamodel_mappings[e.name] = mappings |
| 70 | + self.defclass_slots = [s.name for s in sv.class_induced_slots(ClassDefinition.class_name)] |
| 71 | + |
| 72 | + def convert( |
| 73 | + self, |
| 74 | + file: str, |
| 75 | + name: str = None, |
| 76 | + format="turtle", |
| 77 | + default_prefix: str = None, |
| 78 | + model_uri: str = None, |
| 79 | + identifier: str = None, |
| 80 | + **kwargs, |
| 81 | + ) -> SchemaDefinition: |
| 82 | + """ |
| 83 | + Converts an OWL schema-style ontology |
| 84 | +
|
| 85 | + :param file: |
| 86 | + :param name: |
| 87 | + :param model_uri: |
| 88 | + :param identifier: |
| 89 | + :param kwargs: |
| 90 | + :return: |
| 91 | + """ |
| 92 | + self.mappings = {} |
| 93 | + g = Graph() |
| 94 | + g.parse(file, format=format) |
| 95 | + if name is not None and default_prefix is None: |
| 96 | + default_prefix = name |
| 97 | + if name is None: |
| 98 | + name = default_prefix |
| 99 | + if name is None: |
| 100 | + name = "example" |
| 101 | + sb = SchemaBuilder(name=name) |
| 102 | + sb.add_defaults() |
| 103 | + schema = sb.schema |
| 104 | + for k, v in g.namespaces(): |
| 105 | + if k == "schema" and v != "http://schema.org/": |
| 106 | + continue |
| 107 | + sb.add_prefix(k, v, replace_if_present=True) |
| 108 | + if default_prefix is not None: |
| 109 | + schema.default_prefix = default_prefix |
| 110 | + if default_prefix not in schema.prefixes: |
| 111 | + sb.add_prefix(default_prefix, model_uri, replace_if_present=True) |
| 112 | + schema.id = schema.prefixes[default_prefix].prefix_reference |
| 113 | + cls_slots = defaultdict(list) |
| 114 | + props = [] |
| 115 | + for rdfs_property_metaclass in self._rdfs_metamodel_iri( |
| 116 | + SlotDefinition.__name__ |
| 117 | + ): |
| 118 | + for p in g.subjects(RDF.type, rdfs_property_metaclass): |
| 119 | + props.append(p) |
| 120 | + # implicit properties |
| 121 | + for metap in ( |
| 122 | + self.reverse_metamodel_mappings["domain_of"] |
| 123 | + + self.reverse_metamodel_mappings["rangeIncludes"] |
| 124 | + ): |
| 125 | + for p, _, _o in g.triples((None, metap, None)): |
| 126 | + props.append(p) |
| 127 | + for p in set(props): |
| 128 | + sn = self.iri_to_name(p) |
| 129 | + init_dict = self._dict_for_subject(g, p) |
| 130 | + if "domain_of" in init_dict: |
| 131 | + for x in init_dict["domain_of"]: |
| 132 | + cls_slots[x].append(sn) |
| 133 | + del init_dict["domain_of"] |
| 134 | + if "rangeIncludes" in init_dict: |
| 135 | + init_dict["any_of"] = [{"range": x} for x in init_dict["rangeIncludes"]] |
| 136 | + del init_dict["rangeIncludes"] |
| 137 | + slot = SlotDefinition(sn, **init_dict) |
| 138 | + slot.slot_uri = str(p.n3(g.namespace_manager)) |
| 139 | + sb.add_slot(slot) |
| 140 | + rdfs_classes = [] |
| 141 | + for rdfs_class_metaclass in self._rdfs_metamodel_iri(ClassDefinition.__name__): |
| 142 | + for s in g.subjects(RDF.type, rdfs_class_metaclass): |
| 143 | + rdfs_classes.append(s) |
| 144 | + # implicit classes |
| 145 | + for metap in [RDFS.subClassOf]: |
| 146 | + for s, _, o in g.triples((None, metap, None)): |
| 147 | + rdfs_classes.append(s) |
| 148 | + rdfs_classes.append(o) |
| 149 | + for s in set(rdfs_classes): |
| 150 | + cn = self.iri_to_name(s) |
| 151 | + init_dict = self._dict_for_subject(g, s) |
| 152 | + c = ClassDefinition(cn, **init_dict) |
| 153 | + c.slots = cls_slots.get(cn, []) |
| 154 | + c.class_uri = str(s.n3(g.namespace_manager)) |
| 155 | + sb.add_class(c) |
| 156 | + if identifier is not None: |
| 157 | + id_slot = SlotDefinition(identifier, identifier=True, range="uriorcurie") |
| 158 | + schema.slots[identifier] = id_slot |
| 159 | + for c in schema.classes.values(): |
| 160 | + if not c.is_a and not c.mixins: |
| 161 | + if identifier not in c.slots: |
| 162 | + c.slots.append(identifier) |
| 163 | + return schema |
| 164 | + |
| 165 | + def _dict_for_subject(self, g: Graph, s: URIRef) -> Dict[str, Any]: |
| 166 | + """ |
| 167 | + Looks up triples for a subject and converts to dict using linkml keys. |
| 168 | +
|
| 169 | + :param g: |
| 170 | + :param p: |
| 171 | + :return: |
| 172 | + """ |
| 173 | + init_dict = {} |
| 174 | + for pp, obj in g.predicate_objects(s): |
| 175 | + if pp == RDF.type: |
| 176 | + continue |
| 177 | + metaslot_name = self._element_from_iri(pp) |
| 178 | + logging.debug(f"Mapping {pp} -> {metaslot_name}") |
| 179 | + if metaslot_name not in self.defclass_slots: |
| 180 | + continue |
| 181 | + if metaslot_name is None: |
| 182 | + logging.warning(f"Not mapping {pp}") |
| 183 | + continue |
| 184 | + if metaslot_name == "name": |
| 185 | + metaslot_name = "title" |
| 186 | + metaslot = self.metamodel.get_slot(metaslot_name) |
| 187 | + v = self._object_to_value(obj, metaslot=metaslot) |
| 188 | + metaslot_name_safe = underscore(metaslot_name) |
| 189 | + if not metaslot or metaslot.multivalued: |
| 190 | + if metaslot_name_safe not in init_dict: |
| 191 | + init_dict[metaslot_name_safe] = [] |
| 192 | + init_dict[metaslot_name_safe].append(v) |
| 193 | + else: |
| 194 | + init_dict[metaslot_name_safe] = v |
| 195 | + return init_dict |
| 196 | + |
| 197 | + def _rdfs_metamodel_iri(self, name: str) -> List[URIRef]: |
| 198 | + return self.metamodel_mappings.get(name, []) |
| 199 | + |
| 200 | + def _element_from_iri(self, iri: URIRef) -> str: |
| 201 | + r = self.reverse_metamodel_mappings.get(iri, []) |
| 202 | + if len(r) > 0: |
| 203 | + if len(r) > 1: |
| 204 | + logging.debug(f"Multiple mappings for {iri}: {r}") |
| 205 | + return r[0] |
| 206 | + |
| 207 | + def _object_to_value(self, obj: Any, metaslot: SlotDefinition = None) -> Any: |
| 208 | + if isinstance(obj, URIRef): |
| 209 | + if metaslot.range == "uriorcurie" or metaslot.range == "uri": |
| 210 | + return str(obj) |
| 211 | + return self.iri_to_name(obj) |
| 212 | + if isinstance(obj, Literal): |
| 213 | + return obj.value |
| 214 | + return obj |
| 215 | + |
| 216 | + def iri_to_name(self, v: URIRef) -> str: |
| 217 | + n = self._as_name(v) |
| 218 | + if n != v: |
| 219 | + self.mappings[n] = v |
| 220 | + return n |
| 221 | + |
| 222 | + def _as_name(self, v: URIRef): |
| 223 | + v = str(v) |
| 224 | + for sep in ["#", "/", ":"]: |
| 225 | + if sep in v: |
| 226 | + return v.split(sep)[-1] |
| 227 | + return v |
0 commit comments