Skip to content

Commit 19ebfa1

Browse files
committed
Working on improving speed of "results" query, using KG Query API directly.
1 parent 5a7593a commit 19ebfa1

File tree

2 files changed

+114
-3
lines changed

2 files changed

+114
-3
lines changed

validation_service_v2/validation_service/data_models.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import logging
77
from urllib.parse import urlparse, parse_qs
88

9+
from dateutil import parser as date_parser
10+
911
from pydantic import BaseModel, HttpUrl, AnyUrl, validator, ValidationError
1012
from fastapi.encoders import jsonable_encoder
1113
from fastapi import HTTPException, status
@@ -24,6 +26,10 @@
2426
logger = logging.getLogger("validation_service_v2")
2527

2628

29+
def uuid_from_uri(uri):
30+
return uri.split("/")[-1]
31+
32+
2733
def ensure_has_timezone(timestamp):
2834
if timestamp is None:
2935
return timestamp
@@ -726,6 +732,33 @@ def from_kg_object(cls, file_obj):
726732
id=id
727733
)
728734

735+
@classmethod
736+
def from_kg_query(cls, result):
737+
url = result["http://schema.org/downloadURL"]["@id"]
738+
url_parts = urlparse(url)
739+
id = None
740+
local_path = result.get("original_file_name")
741+
if url_parts.netloc == "collab-storage-redirect.brainsimulation.eu":
742+
file_store = "collab-v1"
743+
local_path = url_parts.path
744+
elif url_parts.netloc == "seafile-proxy.brainsimulation.eu":
745+
file_store = "drive"
746+
local_path = url_parts.path
747+
id = parse_qs(url_parts.query).get("username", [None])[0]
748+
elif url_parts.netloc == "object.cscs.ch":
749+
file_store = "swift"
750+
else:
751+
file_store = None
752+
return cls(
753+
download_url=url,
754+
hash=result.get("digest"),
755+
size=result.get("size"),
756+
content_type=result.get("content_type"),
757+
local_path=local_path,
758+
file_store=file_store,
759+
id=id
760+
)
761+
729762
def to_kg_object(self):
730763
if self.download_url is None:
731764
if self.file_store == "drive":
@@ -785,6 +818,27 @@ def from_kg_object(cls, result, client):
785818
normalized_score=result.normalized_score,
786819
)
787820

821+
@classmethod
822+
def from_kg_query(cls, result):
823+
additional_data = []
824+
for item in sorted(result["results_storage"], key=lambda item: item["http://schema.org/downloadURL"]["@id"] ):
825+
additional_data.append(
826+
File.from_kg_query(item)
827+
)
828+
return cls(
829+
id=uuid_from_uri(result["uri"]),
830+
uri=result["uri"],
831+
old_uuid=result["old_uuid"],
832+
model_instance_id=uuid_from_uri(result["model_instance"][0]["model_instance_id"]),
833+
test_instance_id=uuid_from_uri(result["test_instance"][0]["test_instance_id"]),
834+
results_storage=additional_data, # todo: handle collab storage redirects
835+
score=result["score"],
836+
passed=result["passed"],
837+
timestamp=ensure_has_timezone(date_parser.parse(result["timestamp"])),
838+
project_id=result["project_id"],
839+
normalized_score=result["normalized_score"],
840+
)
841+
788842
def to_kg_objects(self, kg_client):
789843
timestamp = ensure_has_timezone(self.timestamp) or datetime.now(timezone.utc)
790844

validation_service_v2/validation_service/resources/results.py

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@
22
from enum import Enum
33
from typing import List
44
from datetime import datetime
5+
from urllib.parse import quote_plus, urlencode
56
import os
67
import logging
8+
import itertools
9+
from requests.exceptions import HTTPError
710

8-
import requests
9-
10-
from fairgraph.client import KGClient
11+
from fairgraph.client import KGClient, SCOPE_MAP
1112
from fairgraph.base import KGQuery, KGProxy, as_list
1213
from fairgraph.brainsimulation import ValidationResult as ValidationResultKG, ValidationActivity
1314

@@ -46,6 +47,12 @@ def query_results(
4647
# from header
4748
token: HTTPAuthorizationCredentials = Depends(auth),
4849
):
50+
return _query_results(passed, project_id, model_instance_id, test_instance_id, model_id, test_id, model_alias, test_alias, score_type, size,
51+
from_index, token)
52+
53+
54+
def _query_results(passed, project_id, model_instance_id, test_instance_id, model_id, test_id, model_alias, test_alias, score_type, size,
55+
from_index, token):
4956
filter_query, context = build_result_filters(
5057
model_instance_id,
5158
test_instance_id,
@@ -76,6 +83,56 @@ def query_results(
7683
return response
7784

7885

86+
def expand_combinations(D):
87+
keys, values = zip(*D.items())
88+
return [dict(zip(keys, v)) for v in itertools.product(*[as_list(v) for v in values])]
89+
90+
91+
def _query_results2(passed, project_id, model_instance_id, test_instance_id, model_id, test_id, model_alias, test_alias, score_type, size,
92+
from_index, token):
93+
# todo : more sophisticated handling of size and from_index
94+
path = "/modelvalidation/simulation/validationresult/v0.1.0"
95+
query_id = "test" # "vf"
96+
scope = SCOPE_MAP["latest"]
97+
query_parameters = {
98+
"start": 0, #from_index,
99+
"size": 100000, #size,
100+
"vocab": "https://schema.hbp.eu/myQuery/",
101+
"scope": scope
102+
}
103+
for filter_name in ("passed", "project_id", "model_instance_id", "test_instance_id",
104+
"model_id", "test_id", "model_alias", "test_alias", "score_type"):
105+
value = locals()[filter_name]
106+
if value is not None:
107+
query_parameters[filter_name] = value
108+
query_parameters_list = expand_combinations(query_parameters)
109+
response = []
110+
for query_parameters in query_parameters_list:
111+
query_string = urlencode(query_parameters, doseq=True)
112+
url = f"{path}/{query_id}/instances?" + query_string
113+
print(url)
114+
try:
115+
kg_response = kg_client._kg_query_client.get(url)
116+
except HTTPError as err:
117+
if err.response.status_code == 403:
118+
kg_response = None
119+
else:
120+
raise
121+
if kg_response and "results" in kg_response:
122+
for result in kg_response["results"]:
123+
try:
124+
obj = ValidationResult.from_kg_query(result)
125+
except ConsistencyError as err: # todo: count these and report them in the response
126+
logger.warning(str(err))
127+
else:
128+
response.append(obj)
129+
if len(response) >= size + from_index:
130+
break
131+
if len(response) >= size + from_index:
132+
break
133+
return response[from_index:from_index + size]
134+
135+
79136
@router.get("/results/{result_id}", response_model=ValidationResult)
80137
def get_result(result_id: UUID, token: HTTPAuthorizationCredentials = Depends(auth)):
81138
result = ValidationResultKG.from_uuid(str(result_id), kg_client, api="nexus")

0 commit comments

Comments
 (0)