Skip to content

Commit

Permalink
Stop tracking sigllm/sigllm.ipynb
Browse files Browse the repository at this point in the history
  • Loading branch information
Linh-nk committed Mar 14, 2024
1 parent a1aa8c7 commit 0c9098f
Show file tree
Hide file tree
Showing 21 changed files with 509 additions and 497 deletions.
Binary file added results/E1_1.pdf
Binary file not shown.
Binary file added results/E1_2.pdf
Binary file not shown.
Binary file added results/E1_turbo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added results/E1_turbo_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added results/M7.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added results/M7_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added results/M7_2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added results/M7_turbo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
history = history_file.read()

install_requires = [
'numpy', 'openai', 'pandas','orion', #'collections'
'numpy', 'openai', 'pandas','orion', 'matplotlib', 'scikit-learn',
'tiktoken',
]

setup_requires = [
Expand Down
132 changes: 132 additions & 0 deletions sigllm/anomalies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# -*- coding: utf-8 -*-

"""
Result post-processing module.
This module contains functions that help convert model responses back to indices and timestamps.
"""
import numpy as np


def str2sig(text, sep=',', decimal=0):
"""Convert a text string to a signal.
Convert a string containing digits into an array of numbers.
Args:
text (str):
A string containing signal values.
sep (str):
String that was used to separate each element in text, Default to `","`.
decimal (int):
Number of decimal points to shift each element in text to. Default to `0`.
Returns:
numpy.ndarray:
A 1-dimensional array containing parsed elements in `text`.
"""
# Remove all characters from text except the digits and sep and decimal point
text = ''.join(i for i in text if (i.isdigit() or i == sep or i == '.'))
values = np.fromstring(text, dtype=float, sep=sep)
return values * 10**(-decimal)


def str2idx(text, len_seq, sep=','):
"""Convert a text string to indices.
Convert a string containing digits into an array of indices.
Args:
text (str):
A string containing indices values.
len_seq (int):
The length of processed sequence
sep (str):
String that was used to separate each element in text, Default to `","`.
Returns:
numpy.ndarray:
A 1-dimensional array containing parsed elements in `text`.
"""
# Remove all characters from text except the digits and sep
text = ''.join(i for i in text if (i.isdigit() or i == sep))

values = np.fromstring(text, dtype=int, sep=sep)

# Remove indices that exceed the length of sequence
values = values[values < len_seq]
return values


def get_anomaly_list_within_seq(res_list, alpha=0.5):
"""Get the final list of anomalous indices of a sequence
Choose anomalous index in the sequence based on multiple LLM responses
Args:
res_list (List[numpy.ndarray]):
A list of 1-dimensional array containing anomous indices output by LLM
alpha (float):
Percentage of votes needed for an index to be deemed anomalous. Default: 0.5
Returns:
numpy.ndarray:
A 1-dimensional array containing final anomalous indices
"""
min_vote = np.ceil(alpha * len(res_list))

flattened_res = np.concatenate(res_list)

unique_elements, counts = np.unique(flattened_res, return_counts=True)

final_list = unique_elements[counts >= min_vote]

return final_list


def merge_anomaly_seq(anomalies, start_indices, window_size, step_size, beta=0.5):
"""Get the final list of anomalous indices of a sequence when merging all rolling windows
Args:
anomalies (List[numpy.ndarray]):
A list of 1-dimensional array containing anomous indices of each window
start_indices (numpy.ndarray):
A 1-dimensional array contaning the first index of each window
window_size (int):
Length of each window
step_size (int):
Indicating the number of steps the window moves forward each round.
beta (float):
Percentage of containing windows needed for index to be deemed anomalous. Default: 0.5
Return:
numpy.ndarray:
A 1-dimensional array containing final anomalous indices
"""
anomalies = [arr + first_idx for (arr, first_idx) in zip(anomalies, start_indices)]

min_vote = np.ceil(beta * window_size / step_size)

flattened_res = np.concatenate(anomalies)

unique_elements, counts = np.unique(flattened_res, return_counts=True)

final_list = unique_elements[counts >= min_vote]

return np.sort(final_list)


def idx2time(sequence, idx_list):
"""Convert list of indices into list of timestamp
Args:
sequence (pandas.Dataframe):
Signal with timestamps and values
idx_list (numpy.ndarray):
A 1-dimensional array of indices
Returns:
numpy.ndarray:
A 1-dimensional array containing timestamps
"""
return sequence.iloc[idx_list].timestamp.to_numpy()
24 changes: 13 additions & 11 deletions sigllm/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,16 @@
"""
Data preprocessing module.
This module contains functions that help convert timeseries into string, preparing it for a language model.
This module contains functions that prepare timeseries for a language model.
"""

import numpy as np


def rolling_window_sequences(X, index, window_size, step_size):
"""Create rolling window sequences out of time series data.
The function creates an array of input sequences and an array of target sequences by rolling
over the input sequence with a specified window.
Optionally, certain values can be dropped from the sequences.
The function creates an array of sequences by rolling over the input sequence.
Args:
X (ndarray):
Expand Down Expand Up @@ -44,21 +43,23 @@ def rolling_window_sequences(X, index, window_size, step_size):
return np.asarray(out_X), np.asarray(X_index)


def sig2str(values, sep=',', space=False, decimal=0):
def sig2str(values, sep=',', space=False, decimal=0, rescale=True):
"""Convert a signal to a string.
Convert a 1-dimensional time series into text by casting and rescaling it
to nonnegative integer values then into a string.
to nonnegative integer values then into a string (optional).
Args:
values (numpy.ndarray):
A sequence of signal values.
sep (str):
String to separate each element in values, Default to `","`.
String to separate each element in values. Default to `","`.
space (bool):
Whether to add space between each digit in the result. Default to `False`.
decimal (int):
Number of decimal points to keep from the float representation. Default to `0`.
rescale(bool):
Whether to rescale the time series. Default to `True`
Returns:
str:
Expand All @@ -68,12 +69,13 @@ def sig2str(values, sep=',', space=False, decimal=0):
values = np.abs(values)

sequence = sign * (values * 10**decimal).astype(int)

#Rescale all elements to be nonnegative
sequence = sequence - min(sequence)

# Rescale all elements to be nonnegative
if rescale:
sequence = sequence - min(sequence)

res = sep.join([str(num) for num in sequence])
if space:
res = ' '.join(res)

return res
return res
41 changes: 25 additions & 16 deletions sigllm/gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,48 +5,57 @@
This module contains functions that are specifically used for GPT models
"""
import os

from openai import OpenAI

import openai

with open("../gpt_model/openai_api_key.txt", "r") as f:
api_key = f.read()

def load_system_prompt(file_path):
with open(file_path) as f:
system_prompt = f.read()
return system_prompt

GPT_model = "gpt-3.5-turbo" #"gpt-4"

client = openai.Client(api_key=api_key)
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))

ZERO_SHOT_FILE = 'gpt_system_prompt_zero_shot.txt'
ONE_SHOT_FILE = 'gpt_system_prompt_one_shot.txt'

ZERO_SHOT_DIR = os.path.join(CURRENT_DIR, "..", "template", ZERO_SHOT_FILE)
ONE_SHOT_DIR = os.path.join(CURRENT_DIR, "..", "template", ONE_SHOT_FILE)


GPT_model = "gpt-4" # "gpt-4-0125-preview" # # #"gpt-3.5-turbo" #
client = OpenAI()


def get_gpt_model_response(message, gpt_model=GPT_model):
completion = client.chat.completions.create(
model=gpt_model,
messages=message,
model=gpt_model,
messages=message,
)
return completion.choices[0].message.content

def create_message_zero_shot(seq_query, system_prompt_file='../gpt_model/system_prompt_zero_shot.txt'):

def create_message_zero_shot(seq_query, system_prompt_file=ZERO_SHOT_DIR):
messages = []
messages.append({"role": "system", "content":load_system_prompt(system_prompt_file)})

messages.append({"role": "system", "content": load_system_prompt(system_prompt_file)})

# final prompt
messages.append({"role": "user", "content": f"Sequence: {seq_query}"})
return messages


def create_message_one_shot(seq_query, seq_ex, ano_ind_ex, system_prompt_file='../gpt_model/system_prompt_one_shot.txt'):
def create_message_one_shot(seq_query, seq_ex, ano_idx_ex, system_prompt_file=ONE_SHOT_DIR):
messages = []
messages.append({"role": "system", "content":load_system_prompt(system_prompt_file)})

messages.append({"role": "system", "content": load_system_prompt(system_prompt_file)})

# one shot
messages.append({"role": "user", "content": f"Sequence: {seq_ex}"})
messages.append({"role": "assistant", "content": ano_ind_ex})
messages.append({"role": "assistant", "content": ano_idx_ex})

# final prompt
messages.append({"role": "user", "content": f"Sequence: {seq_query}"})
return messages

102 changes: 0 additions & 102 deletions sigllm/result.py

This file was deleted.

Loading

0 comments on commit 0c9098f

Please sign in to comment.