-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathloader_root.py
185 lines (154 loc) · 7.59 KB
/
loader_root.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
from abc import ABC, abstractmethod
from typing import Iterator, TextIO, Union, Optional, Callable, Dict, Type, Any, List
from logging import getLogger
from pydantic import BaseModel
from hbreader import FileInfo, hbread
from jsonasobj2 import as_dict, JsonObj
from linkml_runtime.utils.yamlutils import YAMLRoot
from linkml_runtime import URI_TO_LOCAL
CACHE_SIZE = 1024
class Loader(ABC):
def __init__(self, source: Union[str, dict, TextIO] = None):
self.source = source
super().__init__()
@staticmethod
def json_clean(inp: Any) -> Any:
"""
Remove empty values and JSON-LD relics from an input file
:param inp: JSON-LD representation
:return: JSON representation
"""
def _is_empty(o) -> bool:
return o is None or o == [] or o == {}
if isinstance(inp, list):
for e in [inp_e for inp_e in inp if _is_empty(inp_e)]:
del(inp[e])
for e in inp:
Loader.json_clean(e)
elif isinstance(inp, dict):
for k, v in list(inp.items()):
if k.startswith('@') or _is_empty(v):
del(inp[k])
else:
Loader.json_clean(v)
return inp
def load_source(self,
source: Union[str, dict, TextIO],
loader: Callable[[Union[str, Dict], FileInfo], Optional[Union[Dict, List]]],
target_class: Union[Type[YAMLRoot], Type[BaseModel]],
accept_header: Optional[str] = "text/plain, application/yaml;q=0.9",
metadata: Optional[FileInfo] = None) -> Optional[Union[BaseModel, YAMLRoot, List[BaseModel], List[YAMLRoot]]]:
""" Base loader - convert a file, url, string, open file handle or dictionary into an instance
of target_class
:param source: URL, file name, block of text, Existing Object or open file handle
:param loader: Take a stringified image or a dictionary and return a loadable dictionary
:param target_class: Destination class
:param accept_header: Accept header to use if doing a request
:param metadata: Metadata about the source. Filled in as we go along
:return: Instance of the target class if loader worked
"""
data = self._read_source(source, metadata=metadata, base_dir=metadata.base_path, accept_header=accept_header)
data_as_dict = loader(data, metadata)
return self._construct_target_class(data_as_dict, target_class=target_class)
def load(self, *args, **kwargs) -> Union[BaseModel, YAMLRoot]:
"""
Load source as an instance of target_class
:param source: source file/text/url to load
:param target_class: destination class
:param base_dir: scoping directory for source if it is a file or url
:param metadata: metadata about the source
:param _: extensions
:return: instance of target_class
"""
results = self.load_any(*args, **kwargs)
if isinstance(results, BaseModel) or isinstance(results, YAMLRoot):
return results
else:
raise ValueError(f'Result is not an instance of BaseModel or YAMLRoot: {type(results)}')
def load_as_dict(self, *args, **kwargs) -> Union[dict, List[dict]]:
raise NotImplementedError()
@abstractmethod
def load_any(self, source: Union[str, dict, TextIO], target_class: Type[Union[BaseModel, YAMLRoot]], *, base_dir: Optional[str] = None,
metadata: Optional[FileInfo] = None, **_) -> Union[BaseModel, YAMLRoot, List[BaseModel], List[YAMLRoot]]:
"""
Load source as an instance of target_class, or list of instances of target_class
@param source: source file/text/url to load
@param target_class: destination class
@param base_dir: scoping directory for source if it is a file or url
@param metadata: metadata about the source
@param _: extensions
@return: instance of target_class
"""
raise NotImplementedError()
def loads_any(self, source: str, target_class: Type[Union[BaseModel, YAMLRoot]], *, metadata: Optional[FileInfo] = None, **_) -> Union[BaseModel, YAMLRoot, List[BaseModel], List[YAMLRoot]]:
"""
Load source as a string as an instance of target_class, or list of instances of target_class
@param source: source
@param target_class: destination class
@param metadata: metadata about the source
@param _: extensions
@return: instance of taarget_class
"""
return self.load_any(source, target_class, metadata=metadata)
def loads(self, source: str, target_class: Type[Union[BaseModel, YAMLRoot]], *, metadata: Optional[FileInfo] = None, **_) -> Union[BaseModel, YAMLRoot]:
"""
Load source as a string
:param source: source
:param target_class: destination class
:param metadata: metadata about the source
:param _: extensions
:return: instance of taarget_class
"""
return self.load(source, target_class, metadata=metadata)
@abstractmethod
def iter_instances(self) -> Iterator[Any]:
"""Lazily load data instances from the source
:return: Iterator over data instances
:rtype: Iterator[Any]
"""
pass
def _construct_target_class(self,
data_as_dict: Union[dict, List[dict]],
target_class: Union[Type[YAMLRoot], Type[BaseModel]]) -> Optional[Union[BaseModel, YAMLRoot, List[BaseModel], List[YAMLRoot]]]:
if data_as_dict:
if isinstance(data_as_dict, list):
if issubclass(target_class, YAMLRoot):
return [target_class(**as_dict(x)) for x in data_as_dict]
elif issubclass(target_class, BaseModel):
return [target_class.parse_obj(as_dict(x)) for x in data_as_dict]
else:
raise ValueError(f'Cannot load list of {target_class}')
elif isinstance(data_as_dict, dict):
if issubclass(target_class, BaseModel):
return target_class.parse_obj(data_as_dict)
else:
return target_class(**data_as_dict)
elif isinstance(data_as_dict, JsonObj):
return [target_class(**as_dict(x)) for x in data_as_dict]
else:
raise ValueError(f'Unexpected type {data_as_dict}')
else:
return None
def _read_source(self,
source: Union[str, dict, TextIO],
*,
base_dir: Optional[str] = None,
metadata: Optional[FileInfo] = None,
accept_header: Optional[str] = "text/plain, application/yaml;q=0.9") -> Union[dict, str]:
if metadata is None:
metadata = FileInfo()
if base_dir and not metadata.base_path:
metadata.base_path = base_dir
if not isinstance(source, dict):
# Try to get local version of schema, if one is known to exist
try:
if str(source) in URI_TO_LOCAL.keys():
source = str(URI_TO_LOCAL[str(source)])
except (TypeError, KeyError) as e:
# Fine, use original `source` value
logger = getLogger('linkml_runtime.loaders.Loader')
logger.debug(f"Error converting stringlike source to local linkml file: {source}, got: {e}")
data = hbread(source, metadata, base_dir, accept_header)
else:
data = source
return data