Skip to content

Commit a1aa8c7

Browse files
committed
get anomalies with chatgpt
1 parent f122724 commit a1aa8c7

File tree

10 files changed

+602
-49
lines changed

10 files changed

+602
-49
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
You are a helpful assistant that performs time series anomaly detection. The user will provide an example of a sequence and a list of indices that are anomalous. Then the user will provide sequence and you will be asked to give a list of indices that are anomalous in the sequence. The sequences are represented by decimal strings separated by commas. Please give a list of indices are anomalous in the following sequence without producing any additional text. Do not say anything like 'the anomalous indices in the sequence are', just return the numbers.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
You are a helpful assistant that performs time series anomaly detection. The user will provide sequence and you will be asked to give a list of indices that are anomalous in the sequence. The sequences are represented by decimal strings separated by commas. Please give a list of indices are anomalous in the sequence without producing any additional text. Do not say anything like 'the anomalous indices in the sequence are', just return the numbers.

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
history = history_file.read()
1313

1414
install_requires = [
15-
'numpy',
15+
'numpy', 'openai', 'pandas','orion', #'collections'
1616
]
1717

1818
setup_requires = [

sigllm/data.py

Lines changed: 42 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,52 @@
33
"""
44
Data preprocessing module.
55
6-
This module contains functions to help parse time series into
7-
text, preparing it for a language model.
6+
This module contains functions that help convert timeseries into string, preparing it for a language model.
87
"""
98

109
import numpy as np
1110

11+
def rolling_window_sequences(X, index, window_size, step_size):
12+
"""Create rolling window sequences out of time series data.
13+
14+
The function creates an array of input sequences and an array of target sequences by rolling
15+
over the input sequence with a specified window.
16+
Optionally, certain values can be dropped from the sequences.
17+
18+
Args:
19+
X (ndarray):
20+
The sequence to iterate over.
21+
index (ndarray):
22+
Array containing the index values of X.
23+
window_size (int):
24+
Length of window.
25+
step_size (int):
26+
Indicating the number of steps to move the window forward each round.
27+
28+
Returns:
29+
ndarray, ndarray:
30+
* rolling window sequences.
31+
* first index value of each input sequence.
32+
"""
33+
out_X = list()
34+
X_index = list()
35+
36+
start = 0
37+
max_start = len(X) - window_size + 1
38+
while start < max_start:
39+
end = start + window_size
40+
out_X.append(X[start:end])
41+
X_index.append(index[start])
42+
start = start + step_size
43+
44+
return np.asarray(out_X), np.asarray(X_index)
45+
1246

1347
def sig2str(values, sep=',', space=False, decimal=0):
1448
"""Convert a signal to a string.
1549
16-
Convert a 1-dimensional time series into text by casting it
17-
to integer values then into a string.
50+
Convert a 1-dimensional time series into text by casting and rescaling it
51+
to nonnegative integer values then into a string.
1852
1953
Args:
2054
values (numpy.ndarray):
@@ -34,30 +68,12 @@ def sig2str(values, sep=',', space=False, decimal=0):
3468
values = np.abs(values)
3569

3670
sequence = sign * (values * 10**decimal).astype(int)
71+
72+
#Rescale all elements to be nonnegative
73+
sequence = sequence - min(sequence)
3774

3875
res = sep.join([str(num) for num in sequence])
3976
if space:
4077
res = ' '.join(res)
4178

42-
return res
43-
44-
45-
def str2sig(text, sep=',', decimal=0):
46-
"""Convert a text string to a signal.
47-
48-
Convert a string containing digits into an array of numbers.
49-
50-
Args:
51-
text (str):
52-
A string containing signal values.
53-
sep (str):
54-
String that was used to separate each element in text, Default to `","`.
55-
decimal (int):
56-
Number of decimal points to shift each element in text to. Default to `0`.
57-
58-
Returns:
59-
numpy.ndarray:
60-
A 1-dimensional array containing parsed elements in `text`.
61-
"""
62-
values = np.fromstring(text, dtype=float, sep=sep)
63-
return values * 10**(-decimal)
79+
return res

sigllm/gpt.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# -*- coding: utf-8 -*-
2+
3+
"""
4+
GPT model module.
5+
6+
This module contains functions that are specifically used for GPT models
7+
"""
8+
9+
import openai
10+
11+
with open("../gpt_model/openai_api_key.txt", "r") as f:
12+
api_key = f.read()
13+
14+
def load_system_prompt(file_path):
15+
with open(file_path) as f:
16+
system_prompt = f.read()
17+
return system_prompt
18+
19+
GPT_model = "gpt-3.5-turbo" #"gpt-4"
20+
21+
client = openai.Client(api_key=api_key)
22+
23+
def get_gpt_model_response(message, gpt_model=GPT_model):
24+
completion = client.chat.completions.create(
25+
model=gpt_model,
26+
messages=message,
27+
)
28+
return completion.choices[0].message.content
29+
30+
def create_message_zero_shot(seq_query, system_prompt_file='../gpt_model/system_prompt_zero_shot.txt'):
31+
messages = []
32+
33+
messages.append({"role": "system", "content":load_system_prompt(system_prompt_file)})
34+
35+
# final prompt
36+
messages.append({"role": "user", "content": f"Sequence: {seq_query}"})
37+
return messages
38+
39+
40+
def create_message_one_shot(seq_query, seq_ex, ano_ind_ex, system_prompt_file='../gpt_model/system_prompt_one_shot.txt'):
41+
messages = []
42+
43+
messages.append({"role": "system", "content":load_system_prompt(system_prompt_file)})
44+
45+
# one shot
46+
messages.append({"role": "user", "content": f"Sequence: {seq_ex}"})
47+
messages.append({"role": "assistant", "content": ano_ind_ex})
48+
49+
# final prompt
50+
messages.append({"role": "user", "content": f"Sequence: {seq_query}"})
51+
return messages
52+

sigllm/result.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# -*- coding: utf-8 -*-
2+
3+
"""
4+
Result post-processing module.
5+
6+
This module contains functions that help convert model responses back to timestamps.
7+
"""
8+
import numpy as np
9+
from collections import Counter
10+
11+
def str2ind(text, len_seq, sep=','):
12+
"""Convert a text string to indices.
13+
14+
Convert a string containing digits into an array of indices.
15+
16+
Args:
17+
text (str):
18+
A string containing indices values.
19+
len_seq (int):
20+
The length of processed sequence
21+
sep (str):
22+
String that was used to separate each element in text, Default to `","`.
23+
24+
Returns:
25+
numpy.ndarray:
26+
A 1-dimensional array containing parsed elements in `text`.
27+
"""
28+
#Remove all characters from text except the digits and sep
29+
text = ''.join(i for i in text if (i.isdigit() or i == sep))
30+
31+
values = np.fromstring(text, dtype=int, sep=sep)
32+
33+
#Remove indices that exceed the length of sequence
34+
values = values[values < len_seq]
35+
return values
36+
37+
38+
def get_anomaly_list_within_seq(res_list, alpha = 0.5):
39+
"""Get the final list of anomalous indices of a sequence
40+
41+
Choose which index is considered anomalous in the sequence based on number of votes from multiple LLM responses
42+
43+
Args:
44+
res_list (list of numpy.ndarray):
45+
A list of 1-dimensional array containing anomous indices output by LLM
46+
alpha (float):
47+
Percentage of total number of votes that an index needs to have to be considered anomalous. Default: 0.5
48+
Returns:
49+
numpy.ndarray:
50+
A 1-dimensional array containing final anomalous indices
51+
"""
52+
min_vote = np.ceil(alpha*len(res_list))
53+
54+
flattened_res = np.concatenate(res_list)
55+
56+
unique_elements, counts = np.unique(flattened_res, return_counts=True)
57+
58+
final_list = unique_elements[counts >= min_vote]
59+
60+
return final_list
61+
62+
def get_anomaly_list_across_seq(ano_list, window_size, step_size, beta = 0.5):
63+
"""Get the final list of anomalous indices of a sequence when combining all rolling windows
64+
65+
Args:
66+
ano_list (list of numpy.ndarray):
67+
A list of 1-dimensional array containing anomous indices of each window
68+
window_size (int):
69+
Length of each window
70+
step_size (int):
71+
Indicating the number of steps the window moves forward each round.
72+
beta (float):
73+
Percentage of number of containing windows that an index needs to have to be considered anomalous. Default: 0.5
74+
Return:
75+
numpy.ndarray:
76+
A 1-dimensional array containing final anomalous indices
77+
"""
78+
min_vote = np.ceil(beta * window_size/step_size)
79+
80+
flattened_res = np.concatenate(ano_list)
81+
82+
unique_elements, counts = np.unique(flattened_res, return_counts=True)
83+
84+
final_list = unique_elements[counts >= min_vote]
85+
86+
return np.sort(final_list)
87+
88+
def ind2time(sequence, ind_list):
89+
"""Convert list of indices into list of timestamp
90+
91+
Args:
92+
sequence (pandas.Dataframe):
93+
Signal with timestamps and values
94+
ind_list (numpy.ndarray):
95+
A 1-dimensional array of indices
96+
Returns:
97+
numpy.ndarray:
98+
A 1-dimensional array containing timestamps of `sequence` corresponding to indices in `ind_list`
99+
"""
100+
return sequence.iloc[ind_list].timestamp.to_numpy()
101+
102+

0 commit comments

Comments
 (0)