sintel-dev
diff --git a/‎.github/workflows/readme.yml
+30 b/‎.github/workflows/readme.yml
+30
diff --git a/‎.github/workflows/tests.yml
-20 b/‎.github/workflows/tests.yml
-20
diff --git a/‎HISTORY.md
+8 b/‎HISTORY.md
+8
diff --git a/‎README.md
+80-53 b/‎README.md
+80-53
diff --git a/‎setup.cfg
+1-1 b/‎setup.cfg
+1-1
diff --git a/‎setup.py
+2-2 b/‎setup.py
+2-2
diff --git a/‎sigllm/__init__.py
+1-1 b/‎sigllm/__init__.py
+1-1
diff --git a/‎sigllm/core.py
+4-2 b/‎sigllm/core.py
+4-2
diff --git a/‎sigllm/pipelines/prompter/gpt_prompter.json
+65 b/‎sigllm/pipelines/prompter/gpt_prompter.json
+65
@@ -0,0 +1,30 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: README
+
+on:
+  push:
+    branches: [ master ]
+
+jobs:
+  readme:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: [3.8]
+        os: [ubuntu-latest]
+    steps:
+    - uses: actions/checkout@v1
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install package and dependencies
+      run: |
+          python -m pip install --upgrade pip
+          python -m pip install invoke rundoc .
+    - name: invoke readme
+      env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+      run: invoke readme
@@ -48,26 +48,6 @@ jobs:
   #     run: make docs
 
 
-  readme:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        python-version: ['3.8', '3.9', '3.10', '3.11']
-        os: [ubuntu-20.04, macos-13]
-    steps:
-    - uses: actions/checkout@v1
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install package and dependencies
-      run: |
-          python -m pip install --upgrade pip
-          python -m pip install invoke rundoc .
-    - name: invoke readme
-      run: invoke readme
-
-
   unit:
     runs-on: ${{ matrix.os }}
     strategy:
 
@@ -1,5 +1,13 @@
 # History
 
+## 0.0.2 - 2024-10-24
+
+New Prompter pipeline.
+
+* Test README with GPT – [Issue #20](https://github.com/sintel-dev/sigllm/issues/20) by @sarahmish
+* Mistral-prompter – [Issue #19](https://github.com/sintel-dev/sigllm/issues/19) by @Linh-nk
+
+
 ## 0.0.1 - 2024-09-25
 
 First sigllm release to PyPI: https://pypi.org/project/sigllm/
 
@@ -3,91 +3,118 @@
 <i>An open source project from Data to AI Lab at MIT.</i>
 </p>
 
-<!-- Uncomment these lines after releasing the package to PyPI for version and downloads badges -->
-<!--[![PyPI Shield](https://img.shields.io/pypi/v/sigllm.svg)](https://pypi.python.org/pypi/sigllm)-->
-<!--[![Downloads](https://pepy.tech/badge/sigllm)](https://pepy.tech/project/sigllm)-->
-[![Github Actions Shield](https://img.shields.io/github/workflow/status/sintel-dev/sigllm/Run%20Tests)](https://github.com/sintel-dev/sigllm/actions)
+[![Development Status](https://img.shields.io/badge/Development%20Status-2%20--%20Pre--Alpha-yellow)](https://pypi.org/search/?c=Development+Status+%3A%3A+2+-+Pre-Alpha)
+[![Python](https://img.shields.io/badge/Python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue)](https://badge.fury.io/py/sigllm) 
+[![PyPi Shield](https://img.shields.io/pypi/v/sigllm.svg)](https://pypi.python.org/pypi/sigllm)
+[![Run Tests](https://github.com/sintel-dev/sigllm/actions/workflows/tests.yml/badge.svg)](https://github.com/sintel-dev/sigllm/actions/workflows/tests.yml)
+[![Downloads](https://pepy.tech/badge/sigllm)](https://pepy.tech/project/sigllm)
 
 
+# SigLLM
 
-# sigllm
+Using Large Language Models (LLMs) for time series anomaly detection.
 
-Signals plus LLMs
-
-- Documentation: https://sintel-dev.github.io/sigllm
+<!-- - Documentation: https://sintel-dev.github.io/sigllm -->
 - Homepage: https://github.com/sintel-dev/sigllm
 
 # Overview
 
-TODO: Provide a short overview of the project here.
-
-# Install
+SigLLM is an extension of the Orion library, built to detect anomalies in time series data using LLMs.
+We provide two types of pipelines for anomaly detection:
+* **Prompter**: directly prompting LLMs to find anomalies in time series.
+* **Detector**: using LLMs to forecast time series and finding anomalies through by comparing the real and forecasted signals.
 
-## Requirements
+For more details on our pipelines, please read our [paper](https://arxiv.org/pdf/2405.14755).
 
-**sigllm** has been developed and tested on [Python 3.8, 3.9, 3.10 and 3.11](https://www.python.org/downloads/)
+# Quickstart
 
-Also, although it is not strictly required, the usage of a [virtualenv](https://virtualenv.pypa.io/en/latest/)
-is highly recommended in order to avoid interfering with other software installed in the system
-in which **sigllm** is run.
+## Install with pip
 
-These are the minimum commands needed to create a virtualenv using python3.8 for **sigllm**:
+The easiest and recommended way to install **SigLLM** is using [pip](https://pip.pypa.io/en/stable/):
 
 ```bash
-pip install virtualenv
-virtualenv -p $(which python3.6) sigllm-venv
+pip install sigllm
 ```
+This will pull and install the latest stable release from [PyPi](https://pypi.org/).
 
-Afterwards, you have to execute this command to activate the virtualenv:
 
-```bash
-source sigllm-venv/bin/activate
-```
+In the following example we show how to use one of the **SigLLM Pipelines**.
 
-Remember to execute it every time you start a new console to work on **sigllm**!
+# Detect anomalies using a SigLLM pipeline
 
-<!-- Uncomment this section after releasing the package to PyPI for installation instructions
-## Install from PyPI
+We will load a demo data located in `tutorials/data.csv` for this example:
 
-After creating the virtualenv and activating it, we recommend using
-[pip](https://pip.pypa.io/en/stable/) in order to install **sigllm**:
+```python3
+import pandas as pd
 
-```bash
-pip install sigllm
+data = pd.read_csv('data.csv')
+data.head()
 ```
 
-This will pull and install the latest stable release from [PyPI](https://pypi.org/).
--->
+which should show a signal with `timestamp` and `value`.
+```
+     timestamp      value
+0   1222840800   6.357008
+1   1222862400  12.763547
+2   1222884000  18.204697
+3   1222905600  21.972602
+4   1222927200  23.986643
+5   1222948800  24.906765
+```
 
-## Install from source
+In this example we use `gpt_detector` pipeline and set some hyperparameters. In this case, we set the thresholding strategy to dynamic. The hyperparameters are optional and can be removed.
 
-With your virtualenv activated, you can clone the repository and install it from
-source by running `make install` on the `stable` branch:
+In addtion, the `SigLLM` object takes in a `decimal` argument to determine how many digits from the float value include. Here, we don't want to keep any decimal values, so we set it to zero.
 
-```bash
-git clone [email protected]:sintel-dev/sigllm.git
-cd sigllm
-git checkout stable
-make install
+```python3
+from sigllm import SigLLM
+
+hyperparameters = {
+    "orion.primitives.timeseries_anomalies.find_anomalies#1": {
+        "fixed_threshold": False
+    }
+}
+
+sigllm = SigLLM(
+    pipeline='gpt_detector',
+    decimal=0,
+    hyperparameters=hyperparameters
+)
 ```
 
-## Install for Development
+Now that we have initialized the pipeline, we are ready to use it to detect anomalies:
 
-If you want to contribute to the project, a few more steps are required to make the project ready
-for development.
+```python3
+anomalies = sigllm.detect(data)
+```
+> :warning: Depending on the length of your timeseries, this might take time to run.
 
-Please head to the [Contributing Guide](https://sintel-dev.github.io/sigllm/contributing.html#get-started)
-for more details about this process.
+The output of the previous command will be a ``pandas.DataFrame`` containing a table of detected anomalies:
 
-# Quickstart
+```
+        start         end  severity
+0  1225864800  1227139200  0.625879
+```
+
+# Resources
+
+Additional resources that might be of interest:
+* Learn about [Orion](https://github.com/sintel-dev/Orion).
+* Read our [paper](https://arxiv.org/pdf/2405.14755).
 
-In this short tutorial we will guide you through a series of steps that will help you
-getting started with **sigllm**.
 
-TODO: Create a step by step guide here.
+# Citation
 
-# What's next?
+If you use **SigLLM** for your research, please consider citing the following paper:
 
-For more details about **sigllm** and all its possibilities
-and features, please check the [documentation site](
-https://sintel-dev.github.io/sigllm/).
+Sarah Alnegheimish, Linh Nguyen, Laure Berti-Equille, Kalyan Veeramachaneni. [Can Large Language Models be Anomaly Detectors for Time Series?](https://arxiv.org/pdf/2405.14755).
+
+```
+@inproceedings{alnegheimish2024sigllm,
+  title={Can Large Language Models be Anomaly Detectors for Time Series?},
+  author={Alnegheimish, Sarah and Nguyen, Linh and Berti-Equille, Laure and Veeramachaneni, Kalyan},
+  booktitle={2024 IEEE International Conferencze on Data Science and Advanced Analytics (IEEE DSAA)},
+  organization={IEEE},
+  year={2024}
+}
+```
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.0.1
+current_version = 0.0.2.dev1
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
 
@@ -112,11 +112,11 @@
     keywords='sigllm sigllm sigllm',
     name='sigllm',
     packages=find_packages(include=['sigllm', 'sigllm.*']),
-    python_requires='>=3.8',
+    python_requires='>=3.8,<3.12',
     setup_requires=setup_requires,
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/sintel-dev/sigllm',
-    version='0.0.1',
+    version='0.0.2.dev1',
     zip_safe=False,
 )
@@ -4,7 +4,7 @@
 
 __author__ = 'MIT Data To AI Lab'
 __email__ = '[email protected]'
-__version__ = '0.0.1'
+__version__ = '0.0.2.dev1'
 
 import os
 
 
@@ -45,6 +45,9 @@ class SigLLM(Orion):
     DEFAULT_PIPELINE = 'mistral_detector'
 
     def _augment_hyperparameters(self, primitive, key, value):
+        if not value:
+            return
+
         if self._hyperparameters is None:
             self._hyperparameters = {
                 primitive: {}
@@ -53,8 +56,7 @@ def _augment_hyperparameters(self, primitive, key, value):
             if primitive not in self._hyperparameters:
                 self._hyperparameters[primitive] = {}
 
-        if value:
-            self._hyperparameters[primitive][key] = value
+        self._hyperparameters[primitive][key] = value
 
     def __init__(self, pipeline: Union[str, dict, MLPipeline] = None, interval: int = None,
                  decimal: int = None, window_size: int = None, hyperparameters: dict = None):
 
@@ -0,0 +1,65 @@
+{
+    "primitives": [
+        "mlstars.custom.timeseries_preprocessing.time_segments_aggregate",
+        "sklearn.impute.SimpleImputer",
+        "sigllm.primitives.transformation.Float2Scalar",
+        "sigllm.primitives.prompting.timeseries_preprocessing.rolling_window_sequences",
+	    "sigllm.primitives.transformation.format_as_string",
+        "sigllm.primitives.prompting.gpt.GPT",
+        "sigllm.primitives.transformation.format_as_integer",
+        "sigllm.primitives.prompting.anomalies.val2idx",
+        "sigllm.primitives.prompting.anomalies.find_anomalies_in_windows",
+        "sigllm.primitives.prompting.anomalies.merge_anomalous_sequences",
+        "sigllm.primitives.prompting.anomalies.format_anomalies"
+    ],
+    "init_params": {
+        "mlstars.custom.timeseries_preprocessing.time_segments_aggregate#1": {
+            "time_column": "timestamp",
+            "interval": 21600,
+            "method": "mean"
+        },
+        "sigllm.primitives.transformation.Float2Scalar#1": {
+            "decimal": 2,
+            "rescale": true
+        },
+        "sigllm.primitives.prompting.timeseries_preprocessing.rolling_window_sequences#1": {
+            "window_size": 200,
+            "step_size": 40
+        },
+        "sigllm.primitives.transformation.format_as_string#1": {
+            "space": true
+        },
+        "sigllm.primitives.prompting.gpt.GPT#1": {
+            "name": "gpt-3.5-turbo",
+            "samples": 10
+        },
+        "sigllm.primitives.prompting.anomalies.find_anomalies_in_windows#1": {
+            "alpha": 0.4
+        },
+        "sigllm.primitives.prompting.anomalies.merge_anomalous_sequences#1": {
+            "beta": 0.5
+        }
+    },
+    "input_names": {
+        "sigllm.primitives.prompting.gpt.GPT#1": {
+            "X": "X_str"
+        },
+        "sigllm.primitives.transformation.format_as_integer#1":{
+            "X": "y_hat"
+        }
+    },
+    "output_names": {
+        "mlstars.custom.timeseries_preprocessing.time_segments_aggregate#1": {
+            "index": "timestamp"
+        },
+        "sigllm.primitives.transformation.format_as_string#1": {
+            "X": "X_str"
+        },
+        "sigllm.primitives.prompting.gpt.GPT#1": {
+            "y": "y_hat"
+        },
+        "sigllm.primitives.transformation.format_as_integer#1":{
+            "X": "y"
+        }
+    }
+}