Working on improving speed of "results" query, using KG Query API dir…

…ectly.
HumanBrainProject · Sep 17, 2020 · 19ebfa1 · 19ebfa1
1 parent 5a7593a
commit 19ebfa1
Show file tree

Hide file tree

Showing 2 changed files with 114 additions and 3 deletions.
diff --git a/validation_service_v2/validation_service/data_models.py b/validation_service_v2/validation_service/data_models.py
@@ -6,6 +6,8 @@
 import logging
 from urllib.parse import urlparse, parse_qs
 
+from dateutil import parser as date_parser
+
 from pydantic import BaseModel, HttpUrl, AnyUrl, validator, ValidationError
 from fastapi.encoders import jsonable_encoder
 from fastapi import HTTPException, status
@@ -24,6 +26,10 @@
 logger = logging.getLogger("validation_service_v2")
 
 
+def uuid_from_uri(uri):
+    return uri.split("/")[-1]
+
+
 def ensure_has_timezone(timestamp):
     if timestamp is None:
         return timestamp
@@ -726,6 +732,33 @@ def from_kg_object(cls, file_obj):
             id=id
         )
 
+    @classmethod
+    def from_kg_query(cls, result):
+        url = result["http://schema.org/downloadURL"]["@id"]
+        url_parts = urlparse(url)
+        id = None
+        local_path = result.get("original_file_name")
+        if url_parts.netloc == "collab-storage-redirect.brainsimulation.eu":
+            file_store = "collab-v1"
+            local_path = url_parts.path
+        elif url_parts.netloc == "seafile-proxy.brainsimulation.eu":
+            file_store = "drive"
+            local_path = url_parts.path
+            id = parse_qs(url_parts.query).get("username", [None])[0]
+        elif url_parts.netloc == "object.cscs.ch":
+            file_store = "swift"
+        else:
+            file_store = None
+        return cls(
+            download_url=url,
+            hash=result.get("digest"),
+            size=result.get("size"),
+            content_type=result.get("content_type"),
+            local_path=local_path,
+            file_store=file_store,
+            id=id
+        )
+
     def to_kg_object(self):
         if self.download_url is None:
             if self.file_store == "drive":
@@ -785,6 +818,27 @@ def from_kg_object(cls, result, client):
             normalized_score=result.normalized_score,
         )
 
+    @classmethod
+    def from_kg_query(cls, result):
+        additional_data = []
+        for item in sorted(result["results_storage"], key=lambda item: item["http://schema.org/downloadURL"]["@id"] ):
+            additional_data.append(
+                File.from_kg_query(item)
+            )
+        return cls(
+            id=uuid_from_uri(result["uri"]),
+            uri=result["uri"],
+            old_uuid=result["old_uuid"],
+            model_instance_id=uuid_from_uri(result["model_instance"][0]["model_instance_id"]),
+            test_instance_id=uuid_from_uri(result["test_instance"][0]["test_instance_id"]),
+            results_storage=additional_data,  # todo: handle collab storage redirects
+            score=result["score"],
+            passed=result["passed"],
+            timestamp=ensure_has_timezone(date_parser.parse(result["timestamp"])),
+            project_id=result["project_id"],
+            normalized_score=result["normalized_score"],
+        )
+
     def to_kg_objects(self, kg_client):
         timestamp = ensure_has_timezone(self.timestamp) or datetime.now(timezone.utc)
 

diff --git a/validation_service_v2/validation_service/resources/results.py b/validation_service_v2/validation_service/resources/results.py
@@ -2,12 +2,13 @@
 from enum import Enum
 from typing import List
 from datetime import datetime
+from urllib.parse import quote_plus, urlencode
 import os
 import logging
+import itertools
+from requests.exceptions import HTTPError
 
-import requests
-
-from fairgraph.client import KGClient
+from fairgraph.client import KGClient, SCOPE_MAP
 from fairgraph.base import KGQuery, KGProxy, as_list
 from fairgraph.brainsimulation import ValidationResult as ValidationResultKG, ValidationActivity
 
@@ -46,6 +47,12 @@ def query_results(
     # from header
     token: HTTPAuthorizationCredentials = Depends(auth),
 ):
+    return _query_results(passed, project_id, model_instance_id, test_instance_id, model_id, test_id, model_alias, test_alias, score_type,  size,
+from_index, token)
+
+
+def _query_results(passed, project_id, model_instance_id, test_instance_id, model_id, test_id, model_alias, test_alias, score_type,  size,
+from_index, token):
     filter_query, context = build_result_filters(
         model_instance_id,
         test_instance_id,
@@ -76,6 +83,56 @@ def query_results(
     return response
 
 
+def expand_combinations(D):
+    keys, values = zip(*D.items())
+    return [dict(zip(keys, v)) for v in itertools.product(*[as_list(v) for v in values])]
+
+
+def _query_results2(passed, project_id, model_instance_id, test_instance_id, model_id, test_id, model_alias, test_alias, score_type,  size,
+from_index, token):
+    # todo : more sophisticated handling of size and from_index
+    path = "/modelvalidation/simulation/validationresult/v0.1.0"
+    query_id = "test"  # "vf"
+    scope = SCOPE_MAP["latest"]
+    query_parameters = {
+        "start": 0,   #from_index,
+        "size": 100000,  #size,
+        "vocab": "https://schema.hbp.eu/myQuery/",
+        "scope": scope
+    }
+    for filter_name in ("passed", "project_id", "model_instance_id", "test_instance_id",
+                        "model_id", "test_id", "model_alias", "test_alias", "score_type"):
+        value = locals()[filter_name]
+        if value is not None:
+            query_parameters[filter_name] = value
+    query_parameters_list = expand_combinations(query_parameters)
+    response = []
+    for query_parameters in query_parameters_list:
+        query_string = urlencode(query_parameters, doseq=True)
+        url = f"{path}/{query_id}/instances?" + query_string
+        print(url)
+        try:
+            kg_response = kg_client._kg_query_client.get(url)
+        except HTTPError as err:
+            if err.response.status_code == 403:
+                kg_response = None
+            else:
+                raise
+        if kg_response and "results" in kg_response:
+            for result in kg_response["results"]:
+                try:
+                    obj = ValidationResult.from_kg_query(result)
+                except ConsistencyError as err:  # todo: count these and report them in the response
+                    logger.warning(str(err))
+                else:
+                    response.append(obj)
+                if len(response) >= size + from_index:
+                    break
+            if len(response) >= size + from_index:
+                break
+    return response[from_index:from_index + size]
+
+
 @router.get("/results/{result_id}", response_model=ValidationResult)
 def get_result(result_id: UUID, token: HTTPAuthorizationCredentials = Depends(auth)):
     result = ValidationResultKG.from_uuid(str(result_id), kg_client, api="nexus")