Neuro cognates -- ispras/lingvodoc-react#1182

Refactoring Compare limit, stop button, request stamp Killing previous process
ispras · Feb 10, 2025 · bf94838 · bf94838
1 parent 61f61a4
commit bf94838
Show file tree

Hide file tree

Showing 4 changed files with 99 additions and 16 deletions.
diff --git a/lingvodoc/schema/gql_cognate.py b/lingvodoc/schema/gql_cognate.py
@@ -5629,6 +5629,8 @@ class Arguments:
         match_translations = graphene.Boolean()
         base_language_id = LingvodocID()
         input_pairs = ObjectVal()
+        truth_threshold = graphene.Float()
+        stamp = graphene.Float()
 
         debug_flag = graphene.Boolean()
 
@@ -5638,6 +5640,7 @@ class Arguments:
     message = graphene.String()
     perspective_name_list = graphene.List(graphene.String)
     transcription_count = graphene.Int()
+    stamp = graphene.Float()
 
     @staticmethod
     def neuro_cognate_statistics(
@@ -5649,6 +5652,8 @@ def neuro_cognate_statistics(
             match_translations,
             input_pairs,
             locale_id,
+            truth_threshold,
+            stamp,
             #storage,
             debug_flag = False):
 
@@ -5700,17 +5705,29 @@ def neuro_cognate_statistics(
         message = ""
         triumph = True
         prediction = None
+        compare_len = sum(map(len, compare_pairs_list))
+        stamp_file = f"/tmp/lingvodoc_stamps/{stamp}"
 
-        if not input_pairs_list or not sum(map(len, compare_pairs_list)):
+        if not input_pairs_list or not compare_len:
             triumph = False
-            message = "No input words or words to compare is received!"
+            message = "No input words or words to compare is received"
+        elif compare_len > 10 ** 4:
+            triumph = False
+            message = "Too large dictionaries to compare"
         else:
-            NeuroCognatesEngine = NeuroCognates(four_tensors=match_translations)
-            prediction = NeuroCognatesEngine.index(input_pairs_list, compare_pairs_list, input_index)
+            NeuroCognatesEngine = NeuroCognates(
+                compare_pairs_list,
+                input_index,
+                match_translations,
+                truth_threshold,
+                stamp_file)
+
+            prediction = NeuroCognatesEngine.index(input_pairs_list)
 
         result_dict = (
             dict(
                 triumph=triumph,
+                stamp=stamp,
                 suggestion_list=prediction,
                 message=message,
                 perspective_name_list=perspective_name_list,
@@ -5722,10 +5739,12 @@ def neuro_cognate_statistics(
     def mutate(
         self,
         info,
+        stamp,
         source_perspective_id,
         perspective_info_list,
         match_translations,
         base_language_id,
+        truth_threshold=0.97,
         input_pairs=None,
         debug_flag=False):
 
@@ -5825,6 +5844,8 @@ def mutate(
                 match_translations,
                 input_pairs,
                 locale_id,
+                truth_threshold,
+                stamp,
                 #storage,
                 debug_flag)
 

diff --git a/lingvodoc/schema/gql_sync.py b/lingvodoc/schema/gql_sync.py
@@ -31,7 +31,8 @@
     ObjectTOC as dbObjectTOC,
     BaseGroup as dbBaseGroup,
     Dictionary as dbDictionary,
-    TranslationGist as dbTranslationGist
+    TranslationGist as dbTranslationGist,
+    Client as dbClient
 )
 from pyramid.security import authenticated_userid
 import logging
@@ -45,8 +46,10 @@
 import json
 import requests
 from pyramid.request import Request
+from pathlib import Path
 from pyramid.response import Response
 from lingvodoc.utils.search import recursive_sort
+from pdb import set_trace as A
 
 from lingvodoc.cache.caching import CACHE
 
@@ -375,3 +378,30 @@ def mutate(root, info, **args):
             raise ResponseError('network error 2')
         task.set(16, 100, "Synchronisation complete (New data still can be downloading from server, look a other tasks)", "")
         return Synchronize(triumph=True)
+
+
+class StopMutation(graphene.Mutation):
+
+    class Arguments:
+        stamp = graphene.Float(required=True)
+
+    triumph = graphene.Boolean()
+
+    @staticmethod
+    def mutate(root, info, stamp):
+
+        client_id = info.context.client_id
+        client = DBSession.query(dbClient).filter_by(id=client_id).first()
+
+        if not client:
+            return ResponseError('Only authorized users can stop running mutations.')
+
+        stamps_path = "/tmp/lingvodoc_stamps"
+
+        # Touch stamp file
+        Path(stamps_path).mkdir(exist_ok=True)
+        open(f"{stamps_path}/{stamp}", 'a').close()
+
+        print("!!! Stamp-to-stop")
+
+        return StopMutation(triumph=True)
diff --git a/lingvodoc/schema/query.py b/lingvodoc/schema/query.py
@@ -275,7 +275,8 @@
 from lingvodoc.schema.gql_sync import (
     DownloadDictionaries,
     DownloadDictionary,
-    Synchronize)
+    Synchronize,
+    StopMutation)
 
 from lingvodoc.schema.gql_tasks import (
     DeleteTask,
@@ -9224,6 +9225,7 @@ class MyMutations(graphene.ObjectType):
     create_markup_group = CreateMarkupGroup.Field()
     delete_markup_group = DeleteMarkupGroup.Field()
     save_markup_groups = SaveMarkupGroups.Field()
+    stop_mutation = StopMutation.Field()
 
 schema = graphene.Schema(
     query=Query,

diff --git a/lingvodoc/utils/neuro_cognates/app.py b/lingvodoc/utils/neuro_cognates/app.py
@@ -20,9 +20,13 @@ def get_config(self):
 
 
 class NeuroCognates:
-    def __init__(self, four_tensors):
+    def __init__(self, compare_lists, input_index, four_tensors, truth_threshold, stamp_file):
 
+        self.compare_lists = compare_lists
+        self.input_index = input_index
         self.four_tensors = four_tensors
+        self.truth_threshold = truth_threshold
+        self.stamp_file = stamp_file
 
         abspath = os.path.abspath(__file__)
         dname = os.path.dirname(abspath)
@@ -88,7 +92,16 @@ def split_items(items):
             list(map(lambda x: x[2], items)))
 
     @staticmethod
-    def predict_cognates(word_pairs, compare_lists, input_index, tokenizer, model, max_len, four_tensors=False):
+    def predict_cognates(
+            word_pairs,
+            compare_lists,
+            input_index,
+            tokenizer,
+            model,
+            max_len,
+            stamp_file,
+            four_tensors=False,
+            truth_threshold=0.97):
 
         # Разделяем входные пары на слова и переводы
         input_words, input_translations, input_lex_ids = NeuroCognates.split_items(word_pairs)
@@ -136,15 +149,22 @@ def get_prediction(input_word, input_trans, input_id, X_word, X_trans):
 
                 compare_words, compare_translations, compare_lex_ids = NeuroCognates.split_items(compare_list)
 
+                count = 0
                 for compare_word, compare_trans, compare_id, X_comp_word, X_comp_trans in itertools.zip_longest(
                     compare_words, compare_translations, compare_lex_ids, X_compare_words[i], X_compare_translations[i]):
 
+                    # Checking stamp-to-stop every hundred comparings
+                    count += 1
+                    if count % 100 == 0 and os.path.isfile(stamp_file):
+                        print("Killed process !!!")
+                        return result
+
                     # Передаем 2 или 4 тензора в модель
                     pred = (model.predict([X_word, X_trans, X_comp_word, X_comp_trans])[0][0]
                             if four_tensors else
                             model.predict([X_word, X_comp_word])[0][0])
 
-                    if pred > 0.97:  # Фильтр по вероятности > 97%
+                    if pred > truth_threshold:  # Фильтр по вероятности
                         similarities.append((i, [compare_word, compare_trans], compare_id, f"{pred:.4f}"))
 
                 if similarities:
@@ -170,26 +190,36 @@ def get_prediction(input_word, input_trans, input_id, X_word, X_trans):
             p.close()
             p.join()
 
+        # Removing stamp-to-stop if exists
+        try:
+            os.remove(stamp_file)
+        except OSError:
+            pass
+
         return plain_results
 
-    def index(self, word_pairs, compare_lists, input_index):
+    def index(self, word_pairs):
         if self.four_tensors:
             # Вызов функции для сравнения (модель с 4 тензорами)
             return NeuroCognates.predict_cognates(
                 word_pairs,
-                compare_lists,
-                input_index,
+                self.compare_lists,
+                self.input_index,
                 self.tokenizer_dict,
                 self.model_dict,
                 self.max_len_dict,
-                self.four_tensors)
+                self.stamp_file,
+                self.four_tensors,
+                self.truth_threshold)
         else:
             # Вызов функции для сравнения (модель с 2 тензорами)
             return NeuroCognates.predict_cognates(
                 word_pairs,
-                compare_lists,
-                input_index,
+                self.compare_lists,
+                self.input_index,
                 self.tokenizer,
                 self.model,
                 self.max_len,
-                self.four_tensors)
+                self.stamp_file,
+                self.four_tensors,
+                self.truth_threshold)