Skip to content

Commit

Permalink
Optimization (#455)
Browse files Browse the repository at this point in the history
* timeoutSuggest + remove property Project.load

* make get_partial_csv a separate endpoint

* ammendment to previous commit: until we fix frontend to not expect partial csv, need to return some default

* display all the option for role

* add a cache within wikidata provider

* fix bug onSelectionChange- selectedAnnotationBlock can be undefine

* get partial csv

* add more activating css and striping css

* add an await so we get the spinner for suggest

* onSelectionChange fix bug change area

* switch to a different endpoint

* stop using a database for entities

* remove database from test client

* limit partialCsv to 150 rows for now (will be changed once statement generation limits are changed)

* fix stupid bug that fetches mapping and partial csv three times...

* get partial csv when submitting annotations, and add a spinner when submitting annotations

Co-authored-by: ChanaChelem <[email protected]>
  • Loading branch information
devowit and ChanaChelem authored May 3, 2021
1 parent d4048f3 commit d46b63e
Show file tree
Hide file tree
Showing 26 changed files with 569 additions and 298 deletions.
5 changes: 3 additions & 2 deletions backend/app_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class AppConfig:


#############SQL STUFF

"""
AUTO_MIGRATE = "sqlite" in AppConfig.SQLALCHEMY_DATABASE_URI # only set to true if database is sqlite
Expand All @@ -73,6 +73,7 @@ def auto_constraint_name(constraint, table):
"pk": "pk_%(table_name)s"
}
metadata = MetaData(naming_convention=convention)
db = SQLAlchemy(app, metadata=metadata)
Expand All @@ -83,4 +84,4 @@ def auto_constraint_name(constraint, table):
if AUTO_MIGRATE:
with app.app_context():
upgrade(directory=os.path.join(BASEDIR, 'migrations'))
upgrade(directory=os.path.join(BASEDIR, 'migrations')) """
15 changes: 15 additions & 0 deletions backend/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,21 @@ def get_data():
response.update(calc_response)
return response, code

@app.route('/api/partialcsv', methods=['GET'])
@json_response
def partial_csv():
project = get_project()
calc_params = get_calc_params(project)
response=dict()
try:
response["partialCsv"]=get_partial_csv(calc_params)
except Exception as e:
print(e)
response["partialCsv"]=dict(dims=[1,3],
firstRowIndex=0,
cells=[["subject", "property", "value"]])
return response, 200


@app.route('/api/project', methods=['POST'])
@json_response
Expand Down
19 changes: 11 additions & 8 deletions backend/calc_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
class CalcParams:
def __init__(self, project, data_path, sheet_name, yaml_path=None, annotation_path=None):
self.project_path = project.directory
self.project = Project.load(self.project_path)
self.data_path = Path(project.directory) / data_path
self.sheet_name = sheet_name
self.sheet = Sheet(self.data_path, self.sheet_name)
self.yaml_path = None
if yaml_path:
self.yaml_path = Path(project.directory) / yaml_path
Expand All @@ -17,13 +19,13 @@ def __init__(self, project, data_path, sheet_name, yaml_path=None, annotation_pa
self.annotation_path= Path(project.directory) / annotation_path


@property
def project(self):
return Project.load(self.project_path)
# @property
# def project(self):
# return Project.load(self.project_path)

@property
def sheet(self):
return Sheet(self.data_path, self.sheet_name)
# @property
# def sheet(self):
# return Sheet(self.data_path, self.sheet_name)

@property
def cache(self):
Expand Down Expand Up @@ -53,5 +55,6 @@ def sheet_names(self):

@property
def sparql_endpoint(self):
p = Project.load(self.project_path)
return p.sparql_endpoint
# p = Project.load(self.project_path)
return self.project.sparql_endpoint
# return p.sparql_endpoint
32 changes: 20 additions & 12 deletions backend/database_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ def __init__(self):
super().__init__()
self.cache_id=None
self.project=None
self.property_cache={}

def change_project(self, project):
self.project = project
Expand All @@ -26,29 +27,36 @@ def save_entry(self, wd_id, data_type, from_file=False, **kwargs):
return WikidataEntity.add_or_update(wd_id, data_type, do_session_commit=False, cache_id=cache_id, **kwargs)

def get_entity(self, wikidata_property, *args, **kwargs):
#check for project-specific first
prop = WikidataEntity.query.filter_by(wd_id=wikidata_property, cache_id=self.cache_id).first()
#check for generic wikidata entry
prop=self.property_cache.get(wikidata_property, None)
if not prop:
prop = WikidataEntity.query.filter_by(wd_id=wikidata_property, cache_id=self.sparql_endpoint).first()
if not prop:
raise ValueError("Not found")
#check for project-specific first
prop = WikidataEntity.query.filter_by(wd_id=wikidata_property, cache_id=self.cache_id).first()
#check for generic wikidata entry
if not prop:
prop = WikidataEntity.query.filter_by(wd_id=wikidata_property, cache_id=self.sparql_endpoint).first()
if not prop:
raise ValueError("Not found")
self.property_cache[wikidata_property]=prop
return prop.__dict__


def try_get_property_type(self, wikidata_property, *args, **kwargs):
#check for project-specific first
prop = WikidataEntity.query.filter_by(wd_id=wikidata_property, cache_id=self.cache_id).first()
#check for generic wikidata entry
if not prop or prop.data_type is None or prop.data_type == "Property Not Found":
prop = WikidataEntity.query.filter_by(wd_id=wikidata_property, cache_id=self.sparql_endpoint).first()
prop=self.property_cache.get(wikidata_property, None)
if not prop:
raise ValueError("Not found")
#check for project-specific first
prop = WikidataEntity.query.filter_by(wd_id=wikidata_property, cache_id=self.cache_id).first()
#check for generic wikidata entry
if not prop or prop.data_type is None or prop.data_type == "Property Not Found":
prop = WikidataEntity.query.filter_by(wd_id=wikidata_property, cache_id=self.sparql_endpoint).first()
if not prop:
raise ValueError("Not found")
if prop.data_type == "Property Not Found":
return prop.data_type
if prop.data_type is None:
raise ValueError("No datatype defined for that ID")
self.property_cache[wikidata_property]=prop
return prop.data_type

def __exit__(self, exc_type, exc_value, exc_traceback):
WikidataEntity.do_commit()

32 changes: 32 additions & 0 deletions backend/migrations/versions/0a923cbc1283_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""add index to cache_id
Revision ID: 0a923cbc1283
Revises: 28a3625f6dd6
Create Date: 2021-05-02 13:27:58.664291
"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = '0a923cbc1283'
down_revision = '28a3625f6dd6'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('wikidata_entity', schema=None) as batch_op:
batch_op.create_index(batch_op.f('ix_wikidata_entity_cache_id'), ['cache_id'], unique=False)

# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('wikidata_entity', schema=None) as batch_op:
batch_op.drop_index(batch_op.f('ix_wikidata_entity_cache_id'))

# ### end Alembic commands ###
30 changes: 11 additions & 19 deletions backend/t2wml_web.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
from collections import defaultdict
import os
import json
import numpy as np
import pandas as pd
from pathlib import Path
from numpy.core.numeric import full
from t2wml.api import add_entities_from_file as api_add_entities_from_file
from t2wml.api import (WikifierService, t2wml_settings, KnowledgeGraph, YamlMapper, AnnotationMapper,
kgtk_to_dict, dict_to_kgtk)
from t2wml.mapping.kgtk import get_all_variables
from t2wml.input_processing.annotation_parsing import AnnotationNodeGenerator, Annotation
from t2wml.input_processing.annotation_suggesting import block_finder
from t2wml.mapping.statement_mapper import PartialAnnotationMapper
from t2wml.utils.t2wml_exceptions import T2WMLException
from t2wml.spreadsheets.conversions import cell_str_to_tuple
from t2wml.api import Project
from app_config import db, CACHE_FOLDER
from database_provider import DatabaseProvider
from app_config import CACHE_FOLDER
from web_dict_provider import WebDictionaryProvider
from utils import get_empty_layers
from wikidata_utils import get_labels_and_descriptions, get_qnode_url, QNode

Expand Down Expand Up @@ -47,7 +44,7 @@ def set_web_settings():
if not os.path.isdir(CACHE_FOLDER):
os.makedirs(CACHE_FOLDER, exist_ok=True)
t2wml_settings.cache_data_files_folder = CACHE_FOLDER
t2wml_settings.wikidata_provider = DatabaseProvider()
t2wml_settings.wikidata_provider = WebDictionaryProvider()

def update_t2wml_settings(project):
t2wml_settings.update_from_dict(**project.__dict__)
Expand Down Expand Up @@ -77,8 +74,8 @@ def get_kg(calc_params):
ang.preload(calc_params.sheet, wikifier)
else:
cell_mapper = YamlMapper(calc_params.yaml_path)
kg = KnowledgeGraph.generate(cell_mapper, calc_params.sheet, wikifier)
db.session.commit() # save any queried properties
with t2wml_settings.wikidata_provider as p:
kg = KnowledgeGraph.generate(cell_mapper, calc_params.sheet, wikifier)
return kg


Expand Down Expand Up @@ -119,7 +116,7 @@ def get_qnodes_layer(calc_params):
qNode= QNode(id, value, context),
indices=[[row, col]])

labels_and_descriptions = get_labels_and_descriptions(list(ids_to_get), calc_params.sparql_endpoint)
labels_and_descriptions = get_labels_and_descriptions(t2wml_settings.wikidata_provider, list(ids_to_get), calc_params.sparql_endpoint)
for id in qnode_entries:
if id in labels_and_descriptions:
qnode_entries[id]['qNode'].update(**labels_and_descriptions[id])
Expand Down Expand Up @@ -245,7 +242,7 @@ def get_yaml_layers(calc_params):

cleanedLayer=get_cleaned(kg)

labels = get_labels_and_descriptions(qnodes, calc_params.project.sparql_endpoint)
labels = get_labels_and_descriptions(t2wml_settings.wikidata_provider, qnodes, calc_params.project.sparql_endpoint)
qnodes.update(labels)
for id in qnodes:
if qnodes[id]:
Expand Down Expand Up @@ -301,14 +298,9 @@ def get_layers(response, calc_params):
except Exception as e:
response["yamlError"] = str(e)

try:
response["partialCsv"]=get_partial_csv(calc_params)
except Exception as e:
print(e)
response["partialCsv"]=dict(dims=[1,3],
firstRowIndex=0,
cells=[["subject", "property", "value"]])

response["partialCsv"]=dict(dims=[1,3],
firstRowIndex=0,
cells=[["subject", "property", "value"]])

def get_annotations(calc_params):
annotations_path=calc_params.annotation_path
Expand Down Expand Up @@ -371,7 +363,7 @@ def get_partial_csv(calc_params):
wikifier=calc_params.wikifier
annotation= calc_params.annotation_path
cell_mapper = PartialAnnotationMapper(calc_params.annotation_path)
kg = KnowledgeGraph.generate(cell_mapper, calc_params.sheet, wikifier)
kg = KnowledgeGraph.generate(cell_mapper, calc_params.sheet, wikifier, start=0, end=150)
if not kg.statements:
if cell_mapper.annotation.subject_annotations:
df=pd.DataFrame([], columns=["subject", "property", "value"])
Expand Down
14 changes: 11 additions & 3 deletions backend/tests/files_for_tests/aid/project_results.json
Original file line number Diff line number Diff line change
Expand Up @@ -1171,31 +1171,39 @@
"layerType": "statement",
"qnodes": {
"P17": {
"data_type": "WikibaseItem",
"description": "sovereign state of this item (not to be used for human beings)",
"id": "P17",
"label": "country",
"url": "https://www.wikidata.org/wiki/Property:P17"
},
"P585": {
"data_type": "Time",
"description": "time and date something took place, existed or a statement was true",
"id": "P585",
"label": "point in time",
"url": "https://www.wikidata.org/wiki/Property:P585"
},
"Paid-security-002": {
"description": "",
"P31": "Q18616576",
"data_type": "Quantity",
"from_file": true,
"id": "Paid-security-002",
"label": "UN",
"url": ""
},
"Paid-security-003": {
"description": "",
"P31": "Q18616576",
"data_type": "Quantity",
"from_file": true,
"id": "Paid-security-003",
"label": "INGO",
"url": ""
},
"Paid-security-004": {
"description": "",
"P31": "Q18616576",
"data_type": "Quantity",
"from_file": true,
"id": "Paid-security-004",
"label": "LNGO/NRCS",
"url": ""
Expand Down
14 changes: 11 additions & 3 deletions backend/tests/files_for_tests/aid/results.json
Original file line number Diff line number Diff line change
Expand Up @@ -1763,31 +1763,39 @@
"layerType": "statement",
"qnodes": {
"P17": {
"data_type": "WikibaseItem",
"description": "sovereign state of this item (not to be used for human beings)",
"id": "P17",
"label": "country",
"url": "https://www.wikidata.org/wiki/Property:P17"
},
"P585": {
"data_type": "Time",
"description": "time and date something took place, existed or a statement was true",
"id": "P585",
"label": "point in time",
"url": "https://www.wikidata.org/wiki/Property:P585"
},
"Paid-security-002": {
"description": "",
"P31": "Q18616576",
"data_type": "Quantity",
"from_file": true,
"id": "Paid-security-002",
"label": "UN",
"url": ""
},
"Paid-security-003": {
"description": "",
"P31": "Q18616576",
"data_type": "Quantity",
"from_file": true,
"id": "Paid-security-003",
"label": "INGO",
"url": ""
},
"Paid-security-004": {
"description": "",
"P31": "Q18616576",
"data_type": "Quantity",
"from_file": true,
"id": "Paid-security-004",
"label": "LNGO/NRCS",
"url": ""
Expand Down
4 changes: 1 addition & 3 deletions backend/tests/files_for_tests/empty_cells/project.t2wml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ entity_files:
- item_definitions.tsv
- properties_all.tsv
handle_calendar: leave
sparql_endpoint: https://dsbox02.isi.edu:8888/bigdata/namespace/wdq/sparql
sparql_endpoint: https://query.wikidata.org/bigdata/namespace/wdq/sparql
title: empty cells
url: ''
warn_for_empty_cells: true
Expand All @@ -24,5 +24,3 @@ yaml_sheet_associations:
selected: t2wml.yaml
val_arr:
- t2wml.yaml
- t2wml.yaml
- t2wml.yaml
11 changes: 0 additions & 11 deletions backend/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,6 @@

@pytest.fixture(scope="session")
def client(request):
def fin():
os.close(db_fd)
os.unlink(name)
app.config['TESTING']=True
db_fd, name = tempfile.mkstemp()
app.config['SQLALCHEMY_DATABASE_URI']='sqlite:///' +name
app.config['USE_CACHE']=False
request.addfinalizer(fin)
with app.app_context():
upgrade(directory=os.path.join(BACKEND_DIR, 'migrations'))

with app.test_client() as client:
yield client

Expand Down
Loading

0 comments on commit d46b63e

Please sign in to comment.