sintel-dev
diff --git a/‎setup.py
Lines changed: 2 additions & 1 deletion b/‎setup.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎sigllm/anomalies.py
Lines changed: 132 additions & 0 deletions b/‎sigllm/anomalies.py
Lines changed: 132 additions & 0 deletions
diff --git a/‎sigllm/data.py
Lines changed: 45 additions & 27 deletions b/‎sigllm/data.py
Lines changed: 45 additions & 27 deletions
diff --git a/‎sigllm/gpt.py
Lines changed: 61 additions & 0 deletions b/‎sigllm/gpt.py
Lines changed: 61 additions & 0 deletions
diff --git a/‎sigllm/sigllm.py
Lines changed: 38 additions & 1 deletion b/‎sigllm/sigllm.py
Lines changed: 38 additions & 1 deletion
@@ -12,7 +12,8 @@
     history = history_file.read()
 
 install_requires = [
-    'numpy',
+    'numpy', 'openai', 'pandas','orion', 'matplotlib', 'scikit-learn',
+    'tiktoken',
 ]
 
 setup_requires = [
 
@@ -0,0 +1,132 @@
+# -*- coding: utf-8 -*-
+
+"""
+Result post-processing module.
+
+This module contains functions that help convert model responses back to indices and timestamps.
+"""
+import numpy as np
+
+
+def str2sig(text, sep=',', decimal=0):
+    """Convert a text string to a signal.
+
+    Convert a string containing digits into an array of numbers.
+
+    Args:
+        text (str):
+            A string containing signal values.
+        sep (str):
+            String that was used to separate each element in text, Default to `","`.
+        decimal (int):
+            Number of decimal points to shift each element in text to. Default to `0`.
+
+    Returns:
+        numpy.ndarray:
+            A 1-dimensional array containing parsed elements in `text`.
+    """
+    # Remove all characters from text except the digits and sep and decimal point
+    text = ''.join(i for i in text if (i.isdigit() or i == sep or i == '.'))
+    values = np.fromstring(text, dtype=float, sep=sep)
+    return values * 10**(-decimal)
+
+
+def str2idx(text, len_seq, sep=','):
+    """Convert a text string to indices.
+
+    Convert a string containing digits into an array of indices.
+
+    Args:
+        text (str):
+            A string containing indices values.
+        len_seq (int):
+            The length of processed sequence
+        sep (str):
+            String that was used to separate each element in text, Default to `","`.
+
+    Returns:
+        numpy.ndarray:
+            A 1-dimensional array containing parsed elements in `text`.
+    """
+    # Remove all characters from text except the digits and sep
+    text = ''.join(i for i in text if (i.isdigit() or i == sep))
+
+    values = np.fromstring(text, dtype=int, sep=sep)
+
+    # Remove indices that exceed the length of sequence
+    values = values[values < len_seq]
+    return values
+
+
+def get_anomaly_list_within_seq(res_list, alpha=0.5):
+    """Get the final list of anomalous indices of a sequence
+
+    Choose anomalous index in the sequence based on multiple LLM responses
+
+    Args:
+        res_list (List[numpy.ndarray]):
+            A list of 1-dimensional array containing anomous indices output by LLM
+        alpha (float):
+            Percentage of votes needed for an index to be deemed anomalous. Default: 0.5
+
+    Returns:
+        numpy.ndarray:
+            A 1-dimensional array containing final anomalous indices
+    """
+    min_vote = np.ceil(alpha * len(res_list))
+
+    flattened_res = np.concatenate(res_list)
+
+    unique_elements, counts = np.unique(flattened_res, return_counts=True)
+
+    final_list = unique_elements[counts >= min_vote]
+
+    return final_list
+
+
+def merge_anomaly_seq(anomalies, start_indices, window_size, step_size, beta=0.5):
+    """Get the final list of anomalous indices of a sequence when merging all rolling windows
+
+    Args:
+        anomalies (List[numpy.ndarray]):
+            A list of 1-dimensional array containing anomous indices of each window
+        start_indices (numpy.ndarray):
+            A 1-dimensional array contaning the first index of each window
+        window_size (int):
+            Length of each window
+        step_size (int):
+            Indicating the number of steps the window moves forward each round.
+        beta (float):
+            Percentage of containing windows needed for index to be deemed anomalous. Default: 0.5
+
+    Return:
+        numpy.ndarray:
+            A 1-dimensional array containing final anomalous indices
+    """
+    anomalies = [arr + first_idx for (arr, first_idx) in zip(anomalies, start_indices)]
+
+    min_vote = np.ceil(beta * window_size / step_size)
+
+    flattened_res = np.concatenate(anomalies)
+
+    unique_elements, counts = np.unique(flattened_res, return_counts=True)
+
+    final_list = unique_elements[counts >= min_vote]
+
+    return np.sort(final_list)
+
+
+def idx2time(sequence, idx_list):
+    """Convert list of indices into list of timestamp
+
+    Args:
+        sequence (pandas.Dataframe):
+            Signal with timestamps and values
+        idx_list (numpy.ndarray):
+            A 1-dimensional array of indices
+
+    Returns:
+        numpy.ndarray:
+            A 1-dimensional array containing timestamps
+    """
+    return sequence.iloc[idx_list].timestamp.to_numpy()
@@ -3,28 +3,63 @@
 """
 Data preprocessing module.
 
-This module contains functions to help parse time series into
-text, preparing it for a language model.
+This module contains functions that prepare timeseries for a language model.
 """
 
 import numpy as np
 
 
-def sig2str(values, sep=',', space=False, decimal=0):
+def rolling_window_sequences(X, index, window_size, step_size):
+    """Create rolling window sequences out of time series data.
+
+    The function creates an array of sequences by rolling over the input sequence.
+
+    Args:
+        X (ndarray):
+            The sequence to iterate over.
+        index (ndarray):
+            Array containing the index values of X.
+        window_size (int):
+            Length of window.
+        step_size (int):
+            Indicating the number of steps to move the window forward each round.
+
+    Returns:
+        ndarray, ndarray:
+            * rolling window sequences.
+            * first index value of each input sequence.
+    """
+    out_X = list()
+    X_index = list()
+
+    start = 0
+    max_start = len(X) - window_size + 1
+    while start < max_start:
+        end = start + window_size
+        out_X.append(X[start:end])
+        X_index.append(index[start])
+        start = start + step_size
+
+    return np.asarray(out_X), np.asarray(X_index)
+
+
+def sig2str(values, sep=',', space=False, decimal=0, rescale=True):
     """Convert a signal to a string.
 
-    Convert a 1-dimensional time series into text by casting it
-    to integer values then into a string.
+    Convert a 1-dimensional time series into text by casting and rescaling it
+    to nonnegative integer values then into a string (optional).
 
     Args:
         values (numpy.ndarray):
             A sequence of signal values.
         sep (str):
-            String to separate each element in values, Default to `","`.
+            String to separate each element in values. Default to `","`.
         space (bool):
             Whether to add space between each digit in the result. Default to `False`.
         decimal (int):
             Number of decimal points to keep from the float representation. Default to `0`.
+        rescale(bool):
+            Whether to rescale the time series. Default to `True`
 
     Returns:
         str:
@@ -35,29 +70,12 @@ def sig2str(values, sep=',', space=False, decimal=0):
 
     sequence = sign * (values * 10**decimal).astype(int)
 
+    # Rescale all elements to be nonnegative
+    if rescale:
+        sequence = sequence - min(sequence)
+
     res = sep.join([str(num) for num in sequence])
     if space:
         res = ' '.join(res)
 
     return res
-
-
-def str2sig(text, sep=',', decimal=0):
-    """Convert a text string to a signal.
-
-    Convert a string containing digits into an array of numbers.
-
-    Args:
-        text (str):
-            A string containing signal values.
-        sep (str):
-            String that was used to separate each element in text, Default to `","`.
-        decimal (int):
-            Number of decimal points to shift each element in text to. Default to `0`.
-
-    Returns:
-        numpy.ndarray:
-            A 1-dimensional array containing parsed elements in `text`.
-    """
-    values = np.fromstring(text, dtype=float, sep=sep)
-    return values * 10**(-decimal)
 
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+
+"""
+GPT model module.
+
+This module contains functions that are specifically used for GPT models
+"""
+import os
+
+from openai import OpenAI
+
+
+def load_system_prompt(file_path):
+    with open(file_path) as f:
+        system_prompt = f.read()
+    return system_prompt
+
+
+CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
+
+ZERO_SHOT_FILE = 'gpt_system_prompt_zero_shot.txt'
+ONE_SHOT_FILE = 'gpt_system_prompt_one_shot.txt'
+
+ZERO_SHOT_DIR = os.path.join(CURRENT_DIR, "..", "template", ZERO_SHOT_FILE)
+ONE_SHOT_DIR = os.path.join(CURRENT_DIR, "..", "template", ONE_SHOT_FILE)
+
+
+GPT_model = "gpt-4"  # "gpt-4-0125-preview" #  #  #"gpt-3.5-turbo" #
+client = OpenAI()
+
+
+def get_gpt_model_response(message, gpt_model=GPT_model):
+    completion = client.chat.completions.create(
+        model=gpt_model,
+        messages=message,
+    )
+    return completion.choices[0].message.content
+
+
+def create_message_zero_shot(seq_query, system_prompt_file=ZERO_SHOT_DIR):
+    messages = []
+
+    messages.append({"role": "system", "content": load_system_prompt(system_prompt_file)})
+
+    # final prompt
+    messages.append({"role": "user", "content": f"Sequence: {seq_query}"})
+    return messages
+
+
+def create_message_one_shot(seq_query, seq_ex, ano_idx_ex, system_prompt_file=ONE_SHOT_DIR):
+    messages = []
+
+    messages.append({"role": "system", "content": load_system_prompt(system_prompt_file)})
+
+    # one shot
+    messages.append({"role": "user", "content": f"Sequence: {seq_ex}"})
+    messages.append({"role": "assistant", "content": ano_idx_ex})
+
+    # final prompt
+    messages.append({"role": "user", "content": f"Sequence: {seq_query}"})
+    return messages
@@ -1,3 +1,40 @@
 # -*- coding: utf-8 -*-
 
-"""Main module."""
+"""
+Main module.
+
+This module contains functions that get LLM's anomaly detection results.
+"""
+from anomalies import get_anomaly_list_within_seq, str2idx
+from data import sig2str
+
+
+def get_anomalies(seq, msg_func, model_func, num_iters=1, alpha=0.5):
+    """Get LLM anomaly detection results.
+
+    The function get the LLM's anomaly detection and converts them into an 1D array
+
+    Args:
+        seq (ndarray):
+            The sequence to detect anomalies.
+        msg_func (func):
+            Function to create message prompt.
+        model_func (func):
+            Function to get LLM answer.
+        num_iters (int):
+            Number of times to run the same query.
+        alpha (float):
+            Percentage of total number of votes that an index needs to have to be
+            considered anomalous. Default: 0.5
+
+    Returns:
+        ndarray:
+            1D array containing anomalous indices of the sequence.
+    """
+    message = msg_func(sig2str(seq, space=True))
+    res_list = []
+    for i in range(num_iters):
+        res = model_func(message)
+        ano_ind = str2idx(res, len(seq))
+        res_list.append(ano_ind)
+    return get_anomaly_list_within_seq(res_list, alpha=alpha)
Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,8 @@`
`12`	`12`	`history = history_file.read()`
`13`	`13`
`14`	`14`	`install_requires = [`
`15`		`- 'numpy',`
	`15`	`+ 'numpy', 'openai', 'pandas','orion', 'matplotlib', 'scikit-learn',`
	`16`	`+ 'tiktoken',`
`16`	`17`	`]`
`17`	`18`
`18`	`19`	`setup_requires = [`