Improve timeseries segmenter

makseq · makseq · commit 7ab5734c6887 · 2025-06-06T17:55:36.000+01:00
diff --git a/README.md b/README.md
@@ -62,7 +62,7 @@ Check the **Required parameters** column to see if you need to set any additiona
 | [sklearn_text_classifier](/label_studio_ml/examples/sklearn_text_classifier)               | Text classification with [scikit-learn](https://scikit-learn.org/stable/)                                                                            | ✅              | ❌                | ✅        | None                        | Arbitrary | 
 | [spacy](/label_studio_ml/examples/spacy)                                                   | NER by [SpaCy](https://spacy.io/)                                                                                                                    | ✅              | ❌                | ❌        | None                       | Set      [(see documentation)](https://spacy.io/usage/linguistic-features) |
 | [tesseract](/label_studio_ml/examples/tesseract)                                           | Interactive OCR. [Details](https://github.com/tesseract-ocr/tesseract)                                                                               | ❌              | ✅                | ❌        | None                       | Set (characters)                                                           | 
-| [timeseries_segmenter](/label_studio_ml/examples/timeseries_segmenter)             | Time series segmentation using scikit-learn | ✅              | ✅                | ✅        | None   | Set |
+| [timeseries_segmenter](/label_studio_ml/examples/timeseries_segmenter)             | Time series segmentation using scikit-learn RandomForest | ✅              | ✅                | ✅        | None   | Set |
 | [watsonX](/label_studio_ml/exampels/watsonx)| LLM inference with [WatsonX](https://www.ibm.com/products/watsonx-ai) and integration with [WatsonX.data](watsonx.data)| ✅ | ✅| ❌ | None| Arbitrary|
 | [yolo](/label_studio_ml/examples/yolo)                                                     | All YOLO tasks are supported: [YOLO](https://docs.ultralytics.com/tasks/) | ✅ | ❌ | ❌ | None | Arbitrary |
 
diff --git a/label_studio_ml/examples/timeseries_segmenter/README.md b/label_studio_ml/examples/timeseries_segmenter/README.md
@@ -1,7 +1,7 @@
 # Time Series Segmenter for Label Studio
 
 This example demonstrates a minimal ML backend that performs time series segmentation.
-It trains a logistic regression model on labeled CSV data and predicts segments
+It trains a random forest classifier on labeled CSV data and predicts segments
 for new tasks. The backend expects the labeling configuration to use
 `<TimeSeries>` and `<TimeSeriesLabels>` tags.
 
@@ -48,14 +48,14 @@ columns.
 
 Training starts automatically when annotations are created or updated. The model
 collects all labeled segments, extracts sensor values inside each segment and
-fits a logistic regression classifier. Model artifacts are stored in the
+fits a random forest classifier. Model artifacts are stored in the
 `MODEL_DIR` (defaults to the current directory).
 
 Steps performed by `fit()`:
 
 1. Fetch all labeled tasks from Label Studio.
 2. Convert labeled ranges to per-row training samples.
-3. Fit a logistic regression model.
+3. Fit a random forest classifier.
 4. Save the trained model to disk.
 
 ## Prediction
@@ -82,7 +82,7 @@ flowchart TD
   B -- no --> C[Skip]
   B -- yes --> D[Load labeled tasks]
   D --> E[Collect per-row samples]
-  E --> F[Fit logistic regression]
+  E --> F[Fit random forest]
   F --> G[Save model]
 ```
 
diff --git a/label_studio_ml/examples/timeseries_segmenter/model.py b/label_studio_ml/examples/timeseries_segmenter/model.py
@@ -1,14 +1,14 @@
-"""Logistic regression based time series segmenter.
+"""Random forest based time series segmenter.
 
-This example shows a very small yet functional ML backend that trains a
+This example demonstrates a small yet functional ML backend that trains a
 classifier on labeled time series CSV files and predicts segments for new
-tasks. The logic is intentionally simple so that it can serve as a starting
-point for your own experiments.
-"""
+from sklearn.ensemble import RandomForestClassifier
+_model: Optional[RandomForestClassifier] = None
+    """Simple random forest based segmenter for time series."""
 
-import os
-import io
-import pickle
+    def _get_model(self, blank: bool = False) -> RandomForestClassifier:
+            _model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
+    def _predict_task(self, task: Dict, model: RandomForestClassifier, params: Dict) -> Dict:
 import logging
 from typing import List, Dict, Optional, Tuple
 
@@ -150,7 +150,7 @@ def _group_rows(self, df: pd.DataFrame, time_col: str) -> List[Dict]:
                 current['end'] = row[time_col]
                 current['scores'].append(row['score'])
             else:
-                if current:
+    def _save_model(self, model: RandomForestClassifier) -> None:
                     segments.append(current)
                 current = {
                     'label': label,
diff --git a/label_studio_ml/examples/timeseries_segmenter/tests/test_segmenter.py b/label_studio_ml/examples/timeseries_segmenter/tests/test_segmenter.py
@@ -71,7 +71,13 @@ def make_task():
                         'from_name': 'label',
                         'to_name': 'ts',
                         'type': 'timeserieslabels',
-                        'value': {
+        segs = results[0]["result"]
+        assert len(segs) == 2
+        assert segs[0]["value"]["start"] == 0
+        assert segs[0]["value"]["timeserieslabels"] == ["Run"]
+        assert segs[1]["value"]["timeserieslabels"] == ["Walk"]
+        assert 80 <= segs[1]["value"]["start"] <= 90
+        assert segs[1]["value"]["end"] == 99
                             'start': 85,
                             'end': 99,
                             'instant': False,