sintel-dev
diff --git a/‎gpt_model/system_prompt_one_shot.txt‎
Lines changed: 1 addition & 0 deletions b/‎gpt_model/system_prompt_one_shot.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎gpt_model/system_prompt_zero_shot.txt‎
Lines changed: 1 addition & 0 deletions b/‎gpt_model/system_prompt_zero_shot.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎setup.py‎
Lines changed: 1 addition & 1 deletion b/‎setup.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sigllm/data.py‎
Lines changed: 42 additions & 26 deletions b/‎sigllm/data.py‎
Lines changed: 42 additions & 26 deletions
diff --git a/‎sigllm/gpt.py‎
Lines changed: 52 additions & 0 deletions b/‎sigllm/gpt.py‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎sigllm/result.py‎
Lines changed: 102 additions & 0 deletions b/‎sigllm/result.py‎
Lines changed: 102 additions & 0 deletions
@@ -0,0 +1 @@
+You are a helpful assistant that performs time series anomaly detection. The user will provide an example of a sequence and a list of indices that are anomalous. Then the user will provide sequence and you will be asked to give a list of indices that are anomalous in the sequence. The sequences are represented by decimal strings separated by commas. Please give a list of indices are anomalous in the following sequence without producing any additional text. Do not say anything like 'the anomalous indices in the sequence are', just return the numbers.
@@ -0,0 +1 @@
+You are a helpful assistant that performs time series anomaly detection. The user will provide sequence and you will be asked to give a list of indices that are anomalous in the sequence. The sequences are represented by decimal strings separated by commas. Please give a list of indices are anomalous in the sequence without producing any additional text. Do not say anything like 'the anomalous indices in the sequence are', just return the numbers.
@@ -12,7 +12,7 @@
     history = history_file.read()
 
 install_requires = [
-    'numpy',
+    'numpy', 'openai', 'pandas','orion', #'collections'
 ]
 
 setup_requires = [
 
@@ -3,18 +3,52 @@
 """
 Data preprocessing module.
 
-This module contains functions to help parse time series into
-text, preparing it for a language model.
+This module contains functions that help convert timeseries into string, preparing it for a language model.
 """
 
 import numpy as np
 
+def rolling_window_sequences(X, index, window_size, step_size):
+    """Create rolling window sequences out of time series data.
+
+    The function creates an array of input sequences and an array of target sequences by rolling
+    over the input sequence with a specified window.
+    Optionally, certain values can be dropped from the sequences.
+
+    Args:
+        X (ndarray):
+            The sequence to iterate over.
+        index (ndarray):
+            Array containing the index values of X.
+        window_size (int):
+            Length of window.
+        step_size (int):
+            Indicating the number of steps to move the window forward each round.
+
+    Returns:
+        ndarray, ndarray:
+            * rolling window sequences.
+            * first index value of each input sequence.
+    """
+    out_X = list()
+    X_index = list()
+
+    start = 0
+    max_start = len(X) - window_size + 1
+    while start < max_start:
+        end = start + window_size
+        out_X.append(X[start:end])
+        X_index.append(index[start])
+        start = start + step_size
+
+    return np.asarray(out_X), np.asarray(X_index)
+
 
 def sig2str(values, sep=',', space=False, decimal=0):
     """Convert a signal to a string.
 
-    Convert a 1-dimensional time series into text by casting it
-    to integer values then into a string.
+    Convert a 1-dimensional time series into text by casting and rescaling it
+    to nonnegative integer values then into a string.
 
     Args:
         values (numpy.ndarray):
@@ -34,30 +68,12 @@ def sig2str(values, sep=',', space=False, decimal=0):
     values = np.abs(values)
 
     sequence = sign * (values * 10**decimal).astype(int)
+    
+    #Rescale all elements to be nonnegative
+    sequence = sequence - min(sequence)
 
     res = sep.join([str(num) for num in sequence])
     if space:
         res = ' '.join(res)
 
-    return res
-
-
-def str2sig(text, sep=',', decimal=0):
-    """Convert a text string to a signal.
-
-    Convert a string containing digits into an array of numbers.
-
-    Args:
-        text (str):
-            A string containing signal values.
-        sep (str):
-            String that was used to separate each element in text, Default to `","`.
-        decimal (int):
-            Number of decimal points to shift each element in text to. Default to `0`.
-
-    Returns:
-        numpy.ndarray:
-            A 1-dimensional array containing parsed elements in `text`.
-    """
-    values = np.fromstring(text, dtype=float, sep=sep)
-    return values * 10**(-decimal)
+    return res
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+
+"""
+GPT model module.
+
+This module contains functions that are specifically used for GPT models
+"""
+
+import openai
+
+with open("../gpt_model/openai_api_key.txt", "r") as f:
+    api_key = f.read()
+    
+def load_system_prompt(file_path):
+    with open(file_path) as f:
+        system_prompt = f.read()
+    return system_prompt
+
+GPT_model = "gpt-3.5-turbo" #"gpt-4"
+
+client = openai.Client(api_key=api_key)
+
+def get_gpt_model_response(message, gpt_model=GPT_model):
+    completion = client.chat.completions.create(
+    model=gpt_model,
+    messages=message,
+    )
+    return completion.choices[0].message.content
+
+def create_message_zero_shot(seq_query, system_prompt_file='../gpt_model/system_prompt_zero_shot.txt'):
+    messages = []
+    
+    messages.append({"role": "system", "content":load_system_prompt(system_prompt_file)})
+
+    # final prompt
+    messages.append({"role": "user", "content": f"Sequence: {seq_query}"})
+    return messages
+
+
+def create_message_one_shot(seq_query, seq_ex, ano_ind_ex, system_prompt_file='../gpt_model/system_prompt_one_shot.txt'):
+    messages = []
+    
+    messages.append({"role": "system", "content":load_system_prompt(system_prompt_file)})
+
+    # one shot
+    messages.append({"role": "user", "content": f"Sequence: {seq_ex}"})
+    messages.append({"role": "assistant", "content": ano_ind_ex})
+
+    # final prompt
+    messages.append({"role": "user", "content": f"Sequence: {seq_query}"})
+    return messages
+
@@ -0,0 +1,102 @@
+# -*- coding: utf-8 -*-
+
+"""
+Result post-processing module.
+
+This module contains functions that help convert model responses back to timestamps.
+"""
+import numpy as np
+from collections import Counter
+
+def str2ind(text, len_seq, sep=','):
+    """Convert a text string to indices.
+
+    Convert a string containing digits into an array of indices.
+
+    Args:
+        text (str):
+            A string containing indices values.
+        len_seq (int): 
+            The length of processed sequence 
+        sep (str):
+            String that was used to separate each element in text, Default to `","`.
+
+    Returns:
+        numpy.ndarray:
+            A 1-dimensional array containing parsed elements in `text`.
+    """
+    #Remove all characters from text except the digits and sep
+    text = ''.join(i for i in text if (i.isdigit() or i == sep))
+    
+    values = np.fromstring(text, dtype=int, sep=sep)
+    
+    #Remove indices that exceed the length of sequence
+    values = values[values < len_seq]
+    return values
+
+
+def get_anomaly_list_within_seq(res_list, alpha = 0.5): 
+    """Get the final list of anomalous indices of a sequence
+    
+    Choose which index is considered anomalous in the sequence based on number of votes from multiple LLM responses
+    
+    Args:
+        res_list (list of numpy.ndarray): 
+            A list of 1-dimensional array containing anomous indices output by LLM 
+        alpha (float): 
+            Percentage of total number of votes that an index needs to have to be considered anomalous. Default: 0.5
+    Returns:
+        numpy.ndarray:
+            A 1-dimensional array containing final anomalous indices
+    """
+    min_vote = np.ceil(alpha*len(res_list))
+    
+    flattened_res = np.concatenate(res_list)
+    
+    unique_elements, counts = np.unique(flattened_res, return_counts=True)
+    
+    final_list = unique_elements[counts >= min_vote]
+    
+    return final_list
+
+def get_anomaly_list_across_seq(ano_list, window_size, step_size, beta = 0.5):
+    """Get the final list of anomalous indices of a sequence when combining all rolling windows
+    
+    Args: 
+        ano_list (list of numpy.ndarray): 
+            A list of 1-dimensional array containing anomous indices of each window
+        window_size (int): 
+            Length of each window 
+        step_size (int): 
+            Indicating the number of steps the window moves forward each round.
+        beta (float): 
+            Percentage of number of containing windows that an index needs to have to be considered anomalous. Default: 0.5
+    Return: 
+        numpy.ndarray:
+            A 1-dimensional array containing final anomalous indices        
+    """
+    min_vote = np.ceil(beta * window_size/step_size)
+    
+    flattened_res = np.concatenate(ano_list)
+    
+    unique_elements, counts = np.unique(flattened_res, return_counts=True)
+    
+    final_list = unique_elements[counts >= min_vote]
+    
+    return np.sort(final_list)
+
+def ind2time(sequence, ind_list): 
+    """Convert list of indices into list of timestamp
+    
+    Args: 
+        sequence (pandas.Dataframe): 
+            Signal with timestamps and values
+        ind_list (numpy.ndarray): 
+            A 1-dimensional array of indices
+    Returns: 
+        numpy.ndarray:
+            A 1-dimensional array containing timestamps of `sequence` corresponding to indices in `ind_list` 
+    """
+    return sequence.iloc[ind_list].timestamp.to_numpy()
+
+
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+You are a helpful assistant that performs time series anomaly detection. The user will provide an example of a sequence and a list of indices that are anomalous. Then the user will provide sequence and you will be asked to give a list of indices that are anomalous in the sequence. The sequences are represented by decimal strings separated by commas. Please give a list of indices are anomalous in the following sequence without producing any additional text. Do not say anything like 'the anomalous indices in the sequence are', just return the numbers.
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+You are a helpful assistant that performs time series anomaly detection. The user will provide sequence and you will be asked to give a list of indices that are anomalous in the sequence. The sequences are represented by decimal strings separated by commas. Please give a list of indices are anomalous in the sequence without producing any additional text. Do not say anything like 'the anomalous indices in the sequence are', just return the numbers.`
Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,7 @@`
`12`	`12`	`history = history_file.read()`
`13`	`13`
`14`	`14`	`install_requires = [`
`15`		`- 'numpy',`
	`15`	`+ 'numpy', 'openai', 'pandas','orion', #'collections'`
`16`	`16`	`]`
`17`	`17`
`18`	`18`	`setup_requires = [`