Skip to content

Commit

Permalink
Neuro cognates -- ispras/lingvodoc-react#1182
Browse files Browse the repository at this point in the history
Refactoring

Compare limit, stop button, request stamp

Killing previous process
  • Loading branch information
vmonakhov committed Feb 10, 2025
1 parent 61f61a4 commit bf94838
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 16 deletions.
29 changes: 25 additions & 4 deletions lingvodoc/schema/gql_cognate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5629,6 +5629,8 @@ class Arguments:
match_translations = graphene.Boolean()
base_language_id = LingvodocID()
input_pairs = ObjectVal()
truth_threshold = graphene.Float()
stamp = graphene.Float()

debug_flag = graphene.Boolean()

Expand All @@ -5638,6 +5640,7 @@ class Arguments:
message = graphene.String()
perspective_name_list = graphene.List(graphene.String)
transcription_count = graphene.Int()
stamp = graphene.Float()

@staticmethod
def neuro_cognate_statistics(
Expand All @@ -5649,6 +5652,8 @@ def neuro_cognate_statistics(
match_translations,
input_pairs,
locale_id,
truth_threshold,
stamp,
#storage,
debug_flag = False):

Expand Down Expand Up @@ -5700,17 +5705,29 @@ def neuro_cognate_statistics(
message = ""
triumph = True
prediction = None
compare_len = sum(map(len, compare_pairs_list))
stamp_file = f"/tmp/lingvodoc_stamps/{stamp}"

if not input_pairs_list or not sum(map(len, compare_pairs_list)):
if not input_pairs_list or not compare_len:
triumph = False
message = "No input words or words to compare is received!"
message = "No input words or words to compare is received"
elif compare_len > 10 ** 4:
triumph = False
message = "Too large dictionaries to compare"
else:
NeuroCognatesEngine = NeuroCognates(four_tensors=match_translations)
prediction = NeuroCognatesEngine.index(input_pairs_list, compare_pairs_list, input_index)
NeuroCognatesEngine = NeuroCognates(
compare_pairs_list,
input_index,
match_translations,
truth_threshold,
stamp_file)

prediction = NeuroCognatesEngine.index(input_pairs_list)

result_dict = (
dict(
triumph=triumph,
stamp=stamp,
suggestion_list=prediction,
message=message,
perspective_name_list=perspective_name_list,
Expand All @@ -5722,10 +5739,12 @@ def neuro_cognate_statistics(
def mutate(
self,
info,
stamp,
source_perspective_id,
perspective_info_list,
match_translations,
base_language_id,
truth_threshold=0.97,
input_pairs=None,
debug_flag=False):

Expand Down Expand Up @@ -5825,6 +5844,8 @@ def mutate(
match_translations,
input_pairs,
locale_id,
truth_threshold,
stamp,
#storage,
debug_flag)

Expand Down
32 changes: 31 additions & 1 deletion lingvodoc/schema/gql_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@
ObjectTOC as dbObjectTOC,
BaseGroup as dbBaseGroup,
Dictionary as dbDictionary,
TranslationGist as dbTranslationGist
TranslationGist as dbTranslationGist,
Client as dbClient
)
from pyramid.security import authenticated_userid
import logging
Expand All @@ -45,8 +46,10 @@
import json
import requests
from pyramid.request import Request
from pathlib import Path
from pyramid.response import Response
from lingvodoc.utils.search import recursive_sort
from pdb import set_trace as A

from lingvodoc.cache.caching import CACHE

Expand Down Expand Up @@ -375,3 +378,30 @@ def mutate(root, info, **args):
raise ResponseError('network error 2')
task.set(16, 100, "Synchronisation complete (New data still can be downloading from server, look a other tasks)", "")
return Synchronize(triumph=True)


class StopMutation(graphene.Mutation):

class Arguments:
stamp = graphene.Float(required=True)

triumph = graphene.Boolean()

@staticmethod
def mutate(root, info, stamp):

client_id = info.context.client_id
client = DBSession.query(dbClient).filter_by(id=client_id).first()

if not client:
return ResponseError('Only authorized users can stop running mutations.')

stamps_path = "/tmp/lingvodoc_stamps"

# Touch stamp file
Path(stamps_path).mkdir(exist_ok=True)
open(f"{stamps_path}/{stamp}", 'a').close()

print("!!! Stamp-to-stop")

return StopMutation(triumph=True)
4 changes: 3 additions & 1 deletion lingvodoc/schema/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,8 @@
from lingvodoc.schema.gql_sync import (
DownloadDictionaries,
DownloadDictionary,
Synchronize)
Synchronize,
StopMutation)

from lingvodoc.schema.gql_tasks import (
DeleteTask,
Expand Down Expand Up @@ -9224,6 +9225,7 @@ class MyMutations(graphene.ObjectType):
create_markup_group = CreateMarkupGroup.Field()
delete_markup_group = DeleteMarkupGroup.Field()
save_markup_groups = SaveMarkupGroups.Field()
stop_mutation = StopMutation.Field()

schema = graphene.Schema(
query=Query,
Expand Down
50 changes: 40 additions & 10 deletions lingvodoc/utils/neuro_cognates/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,13 @@ def get_config(self):


class NeuroCognates:
def __init__(self, four_tensors):
def __init__(self, compare_lists, input_index, four_tensors, truth_threshold, stamp_file):

self.compare_lists = compare_lists
self.input_index = input_index
self.four_tensors = four_tensors
self.truth_threshold = truth_threshold
self.stamp_file = stamp_file

abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
Expand Down Expand Up @@ -88,7 +92,16 @@ def split_items(items):
list(map(lambda x: x[2], items)))

@staticmethod
def predict_cognates(word_pairs, compare_lists, input_index, tokenizer, model, max_len, four_tensors=False):
def predict_cognates(
word_pairs,
compare_lists,
input_index,
tokenizer,
model,
max_len,
stamp_file,
four_tensors=False,
truth_threshold=0.97):

# Разделяем входные пары на слова и переводы
input_words, input_translations, input_lex_ids = NeuroCognates.split_items(word_pairs)
Expand Down Expand Up @@ -136,15 +149,22 @@ def get_prediction(input_word, input_trans, input_id, X_word, X_trans):

compare_words, compare_translations, compare_lex_ids = NeuroCognates.split_items(compare_list)

count = 0
for compare_word, compare_trans, compare_id, X_comp_word, X_comp_trans in itertools.zip_longest(
compare_words, compare_translations, compare_lex_ids, X_compare_words[i], X_compare_translations[i]):

# Checking stamp-to-stop every hundred comparings
count += 1
if count % 100 == 0 and os.path.isfile(stamp_file):
print("Killed process !!!")
return result

# Передаем 2 или 4 тензора в модель
pred = (model.predict([X_word, X_trans, X_comp_word, X_comp_trans])[0][0]
if four_tensors else
model.predict([X_word, X_comp_word])[0][0])

if pred > 0.97: # Фильтр по вероятности > 97%
if pred > truth_threshold: # Фильтр по вероятности
similarities.append((i, [compare_word, compare_trans], compare_id, f"{pred:.4f}"))

if similarities:
Expand All @@ -170,26 +190,36 @@ def get_prediction(input_word, input_trans, input_id, X_word, X_trans):
p.close()
p.join()

# Removing stamp-to-stop if exists
try:
os.remove(stamp_file)
except OSError:
pass

return plain_results

def index(self, word_pairs, compare_lists, input_index):
def index(self, word_pairs):
if self.four_tensors:
# Вызов функции для сравнения (модель с 4 тензорами)
return NeuroCognates.predict_cognates(
word_pairs,
compare_lists,
input_index,
self.compare_lists,
self.input_index,
self.tokenizer_dict,
self.model_dict,
self.max_len_dict,
self.four_tensors)
self.stamp_file,
self.four_tensors,
self.truth_threshold)
else:
# Вызов функции для сравнения (модель с 2 тензорами)
return NeuroCognates.predict_cognates(
word_pairs,
compare_lists,
input_index,
self.compare_lists,
self.input_index,
self.tokenizer,
self.model,
self.max_len,
self.four_tensors)
self.stamp_file,
self.four_tensors,
self.truth_threshold)

0 comments on commit bf94838

Please sign in to comment.