Update examples to use webhook training

KonstantinKorotaev · KonstantinKorotaev · commit 87a33b4f870c · 2022-07-07T11:42:51.000+03:00
diff --git a/label_studio_ml/examples/bert/bert_classifier.py b/label_studio_ml/examples/bert/bert_classifier.py
@@ -12,8 +12,7 @@
 
 from label_studio_ml.model import LabelStudioMLBase
 
-from utils import prepare_texts, calc_slope
-
+from utils import prepare_texts, calc_slope, get_annotated_dataset
 
 if torch.cuda.is_available():
     device = torch.device("cuda")
@@ -128,6 +127,10 @@ def predict(self, tasks, **kwargs):
         return predictions
 
     def fit(self, completions, workdir=None, cache_dir=None, **kwargs):
+        # check if training is from web hook and load tasks from api
+        if kwargs.get('data'):
+            project_id = kwargs['data']['project']['id']
+            completions = get_annotated_dataset(project_id)
         input_texts = []
         output_labels, output_labels_idx = [], []
         label2idx = {l: i for i, l in enumerate(self.labels)}
diff --git a/label_studio_ml/examples/flair/ner_ml_backend.py b/label_studio_ml/examples/flair/ner_ml_backend.py
@@ -9,6 +9,9 @@
 import os
 
 #writing class with inheretance
+from label_studio_ml.utils import get_annotated_dataset
+
+
 class SequenceTaggerModel(LabelStudioMLBase):
     def __init__(self, **kwargs):
         #initialize base class
@@ -87,6 +90,10 @@ def convert_to_ls_annotation(self, flair_sentences):
         return results
     
     def fit(self, completions, workdir=None, **kwargs):
+        # check if training is from web hook
+        if kwargs.get('data'):
+            project_id = kwargs['data']['project']['id']
+            completions = get_annotated_dataset(project_id)
         #completions contain ALL the annotated samples.
         #train a model from scratch here.
         flair_sents = []
diff --git a/label_studio_ml/examples/mmdetection/mmdetection.py b/label_studio_ml/examples/mmdetection/mmdetection.py
@@ -128,6 +128,9 @@ def predict(self, tasks, **kwargs):
             'score': avg_score
         }]
 
+    def fit(self, completions, workdir=None, **kwargs):
+        return {}
+
 
 def json_load(file, int_keys=False):
     with io.open(file, encoding='utf8') as f:
diff --git a/label_studio_ml/examples/ner/ner.py b/label_studio_ml/examples/ner/ner.py
@@ -464,7 +464,7 @@ def fit(
         warmup_steps=0, save_steps=50, dump_dataset=True, cache_dir='~/.heartex/cache', train_logs=None,
         **kwargs
     ):
-        # check if training is from web hook
+        # check if training is from web hook and load tasks from api
         if kwargs.get('data'):
             project_id = kwargs['data']['project']['id']
             completions = get_annotated_dataset(project_id)
diff --git a/label_studio_ml/examples/pytorch_transfer_learning/pytorch_transfer_learning.py b/label_studio_ml/examples/pytorch_transfer_learning/pytorch_transfer_learning.py
@@ -14,7 +14,7 @@
 from torchvision import models, transforms
 
 from label_studio_ml.model import LabelStudioMLBase
-from label_studio_ml.utils import get_single_tag_keys, get_choice, is_skipped, get_local_path
+from label_studio_ml.utils import get_single_tag_keys, get_choice, is_skipped, get_local_path, get_annotated_dataset
 
 device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
 
@@ -177,6 +177,10 @@ def predict(self, tasks, **kwargs):
         return predictions
 
     def fit(self, completions, workdir=None, batch_size=32, num_epochs=10, **kwargs):
+        # check if training is from web hook and load tasks from api
+        if kwargs.get('data'):
+            project_id = kwargs['data']['project']['id']
+            completions = get_annotated_dataset(project_id)
         image_urls, image_classes = [], []
         print('Collecting annotations...')
         for completion in completions:
diff --git a/label_studio_ml/examples/simple_text_classifier/simple_text_classifier.py b/label_studio_ml/examples/simple_text_classifier/simple_text_classifier.py
@@ -102,7 +102,7 @@ def _get_annotated_dataset(self, project_id):
         return json.loads(response.content)
 
     def fit(self, annotations, workdir=None, **kwargs):
-        # check if training is from web hook
+        # check if training is from web hook and load tasks from api
         if kwargs.get('data'):
             project_id = kwargs['data']['project']['id']
             tasks = self._get_annotated_dataset(project_id)
diff --git a/label_studio_ml/examples/substring_matching/substring_matching.py b/label_studio_ml/examples/substring_matching/substring_matching.py
@@ -91,3 +91,7 @@ def _extract_meta(task):
             meta['start'] = task['value']['start']
             meta['end'] = task['value']['end']
         return meta
+
+    def fit(self, completions, workdir=None, **kwargs):
+        # save some training outputs to the job result
+        return {'random': random.randint(1, 10)}
diff --git a/label_studio_ml/examples/tensorflow/mobilenet_finetune.py b/label_studio_ml/examples/tensorflow/mobilenet_finetune.py
@@ -6,7 +6,8 @@
 
 from PIL import Image
 from label_studio_ml.model import LabelStudioMLBase
-from label_studio_ml.utils import get_image_local_path, get_single_tag_keys, get_choice, is_skipped
+from label_studio_ml.utils import get_image_local_path, get_single_tag_keys, get_choice, is_skipped, \
+    get_annotated_dataset
 
 logger = logging.getLogger(__name__)
 feature_extractor_model = 'https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4'
@@ -62,6 +63,10 @@ def predict(self, tasks, **kwargs):
         }]
 
     def fit(self, completions, workdir=None, **kwargs):
+        # check if training is from web hook and load tasks from api
+        if kwargs.get('data'):
+            project_id = kwargs['data']['project']['id']
+            completions = get_annotated_dataset(project_id)
 
         annotations = []
         for completion in completions:
diff --git a/label_studio_ml/examples/tesseract/tesseract.py b/label_studio_ml/examples/tesseract/tesseract.py
@@ -1,3 +1,5 @@
+import random
+
 from PIL import Image
 import pytesseract as pt
 from label_studio_ml.model import LabelStudioMLBase
@@ -74,3 +76,7 @@ def _extract_meta(task):
             meta["original_width"] = task['original_width']
             meta["original_height"] = task['original_height']
         return meta
+
+    def fit(self, completions, workdir=None, **kwargs):
+        # save some training outputs to the job result
+        return {'random': random.randint(1, 10)}