Skip to content

Commit 53bdd9c

Browse files
authored
Merge pull request #1 from Linh-nk/gpt
get anomalies with chatgpt
2 parents f122724 + 2375a54 commit 53bdd9c

File tree

10 files changed

+611
-46
lines changed

10 files changed

+611
-46
lines changed

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
history = history_file.read()
1313

1414
install_requires = [
15-
'numpy',
15+
'numpy', 'openai', 'pandas','orion', 'matplotlib', 'scikit-learn',
16+
'tiktoken',
1617
]
1718

1819
setup_requires = [

sigllm/anomalies.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# -*- coding: utf-8 -*-
2+
3+
"""
4+
Result post-processing module.
5+
6+
This module contains functions that help convert model responses back to indices and timestamps.
7+
"""
8+
import numpy as np
9+
10+
11+
def str2sig(text, sep=',', decimal=0):
12+
"""Convert a text string to a signal.
13+
14+
Convert a string containing digits into an array of numbers.
15+
16+
Args:
17+
text (str):
18+
A string containing signal values.
19+
sep (str):
20+
String that was used to separate each element in text, Default to `","`.
21+
decimal (int):
22+
Number of decimal points to shift each element in text to. Default to `0`.
23+
24+
Returns:
25+
numpy.ndarray:
26+
A 1-dimensional array containing parsed elements in `text`.
27+
"""
28+
# Remove all characters from text except the digits and sep and decimal point
29+
text = ''.join(i for i in text if (i.isdigit() or i == sep or i == '.'))
30+
values = np.fromstring(text, dtype=float, sep=sep)
31+
return values * 10**(-decimal)
32+
33+
34+
def str2idx(text, len_seq, sep=','):
35+
"""Convert a text string to indices.
36+
37+
Convert a string containing digits into an array of indices.
38+
39+
Args:
40+
text (str):
41+
A string containing indices values.
42+
len_seq (int):
43+
The length of processed sequence
44+
sep (str):
45+
String that was used to separate each element in text, Default to `","`.
46+
47+
Returns:
48+
numpy.ndarray:
49+
A 1-dimensional array containing parsed elements in `text`.
50+
"""
51+
# Remove all characters from text except the digits and sep
52+
text = ''.join(i for i in text if (i.isdigit() or i == sep))
53+
54+
values = np.fromstring(text, dtype=int, sep=sep)
55+
56+
# Remove indices that exceed the length of sequence
57+
values = values[values < len_seq]
58+
return values
59+
60+
61+
def get_anomaly_list_within_seq(res_list, alpha=0.5):
62+
"""Get the final list of anomalous indices of a sequence
63+
64+
Choose anomalous index in the sequence based on multiple LLM responses
65+
66+
Args:
67+
res_list (List[numpy.ndarray]):
68+
A list of 1-dimensional array containing anomous indices output by LLM
69+
alpha (float):
70+
Percentage of votes needed for an index to be deemed anomalous. Default: 0.5
71+
72+
Returns:
73+
numpy.ndarray:
74+
A 1-dimensional array containing final anomalous indices
75+
"""
76+
min_vote = np.ceil(alpha * len(res_list))
77+
78+
flattened_res = np.concatenate(res_list)
79+
80+
unique_elements, counts = np.unique(flattened_res, return_counts=True)
81+
82+
final_list = unique_elements[counts >= min_vote]
83+
84+
return final_list
85+
86+
87+
def merge_anomaly_seq(anomalies, start_indices, window_size, step_size, beta=0.5):
88+
"""Get the final list of anomalous indices of a sequence when merging all rolling windows
89+
90+
Args:
91+
anomalies (List[numpy.ndarray]):
92+
A list of 1-dimensional array containing anomous indices of each window
93+
start_indices (numpy.ndarray):
94+
A 1-dimensional array contaning the first index of each window
95+
window_size (int):
96+
Length of each window
97+
step_size (int):
98+
Indicating the number of steps the window moves forward each round.
99+
beta (float):
100+
Percentage of containing windows needed for index to be deemed anomalous. Default: 0.5
101+
102+
Return:
103+
numpy.ndarray:
104+
A 1-dimensional array containing final anomalous indices
105+
"""
106+
anomalies = [arr + first_idx for (arr, first_idx) in zip(anomalies, start_indices)]
107+
108+
min_vote = np.ceil(beta * window_size / step_size)
109+
110+
flattened_res = np.concatenate(anomalies)
111+
112+
unique_elements, counts = np.unique(flattened_res, return_counts=True)
113+
114+
final_list = unique_elements[counts >= min_vote]
115+
116+
return np.sort(final_list)
117+
118+
119+
def idx2time(sequence, idx_list):
120+
"""Convert list of indices into list of timestamp
121+
122+
Args:
123+
sequence (pandas.Dataframe):
124+
Signal with timestamps and values
125+
idx_list (numpy.ndarray):
126+
A 1-dimensional array of indices
127+
128+
Returns:
129+
numpy.ndarray:
130+
A 1-dimensional array containing timestamps
131+
"""
132+
return sequence.iloc[idx_list].timestamp.to_numpy()

sigllm/data.py

Lines changed: 45 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,28 +3,63 @@
33
"""
44
Data preprocessing module.
55
6-
This module contains functions to help parse time series into
7-
text, preparing it for a language model.
6+
This module contains functions that prepare timeseries for a language model.
87
"""
98

109
import numpy as np
1110

1211

13-
def sig2str(values, sep=',', space=False, decimal=0):
12+
def rolling_window_sequences(X, index, window_size, step_size):
13+
"""Create rolling window sequences out of time series data.
14+
15+
The function creates an array of sequences by rolling over the input sequence.
16+
17+
Args:
18+
X (ndarray):
19+
The sequence to iterate over.
20+
index (ndarray):
21+
Array containing the index values of X.
22+
window_size (int):
23+
Length of window.
24+
step_size (int):
25+
Indicating the number of steps to move the window forward each round.
26+
27+
Returns:
28+
ndarray, ndarray:
29+
* rolling window sequences.
30+
* first index value of each input sequence.
31+
"""
32+
out_X = list()
33+
X_index = list()
34+
35+
start = 0
36+
max_start = len(X) - window_size + 1
37+
while start < max_start:
38+
end = start + window_size
39+
out_X.append(X[start:end])
40+
X_index.append(index[start])
41+
start = start + step_size
42+
43+
return np.asarray(out_X), np.asarray(X_index)
44+
45+
46+
def sig2str(values, sep=',', space=False, decimal=0, rescale=True):
1447
"""Convert a signal to a string.
1548
16-
Convert a 1-dimensional time series into text by casting it
17-
to integer values then into a string.
49+
Convert a 1-dimensional time series into text by casting and rescaling it
50+
to nonnegative integer values then into a string (optional).
1851
1952
Args:
2053
values (numpy.ndarray):
2154
A sequence of signal values.
2255
sep (str):
23-
String to separate each element in values, Default to `","`.
56+
String to separate each element in values. Default to `","`.
2457
space (bool):
2558
Whether to add space between each digit in the result. Default to `False`.
2659
decimal (int):
2760
Number of decimal points to keep from the float representation. Default to `0`.
61+
rescale(bool):
62+
Whether to rescale the time series. Default to `True`
2863
2964
Returns:
3065
str:
@@ -35,29 +70,12 @@ def sig2str(values, sep=',', space=False, decimal=0):
3570

3671
sequence = sign * (values * 10**decimal).astype(int)
3772

73+
# Rescale all elements to be nonnegative
74+
if rescale:
75+
sequence = sequence - min(sequence)
76+
3877
res = sep.join([str(num) for num in sequence])
3978
if space:
4079
res = ' '.join(res)
4180

4281
return res
43-
44-
45-
def str2sig(text, sep=',', decimal=0):
46-
"""Convert a text string to a signal.
47-
48-
Convert a string containing digits into an array of numbers.
49-
50-
Args:
51-
text (str):
52-
A string containing signal values.
53-
sep (str):
54-
String that was used to separate each element in text, Default to `","`.
55-
decimal (int):
56-
Number of decimal points to shift each element in text to. Default to `0`.
57-
58-
Returns:
59-
numpy.ndarray:
60-
A 1-dimensional array containing parsed elements in `text`.
61-
"""
62-
values = np.fromstring(text, dtype=float, sep=sep)
63-
return values * 10**(-decimal)

sigllm/gpt.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# -*- coding: utf-8 -*-
2+
3+
"""
4+
GPT model module.
5+
6+
This module contains functions that are specifically used for GPT models
7+
"""
8+
import os
9+
10+
from openai import OpenAI
11+
12+
13+
def load_system_prompt(file_path):
14+
with open(file_path) as f:
15+
system_prompt = f.read()
16+
return system_prompt
17+
18+
19+
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
20+
21+
ZERO_SHOT_FILE = 'gpt_system_prompt_zero_shot.txt'
22+
ONE_SHOT_FILE = 'gpt_system_prompt_one_shot.txt'
23+
24+
ZERO_SHOT_DIR = os.path.join(CURRENT_DIR, "..", "template", ZERO_SHOT_FILE)
25+
ONE_SHOT_DIR = os.path.join(CURRENT_DIR, "..", "template", ONE_SHOT_FILE)
26+
27+
28+
GPT_model = "gpt-4" # "gpt-4-0125-preview" # # #"gpt-3.5-turbo" #
29+
client = OpenAI()
30+
31+
32+
def get_gpt_model_response(message, gpt_model=GPT_model):
33+
completion = client.chat.completions.create(
34+
model=gpt_model,
35+
messages=message,
36+
)
37+
return completion.choices[0].message.content
38+
39+
40+
def create_message_zero_shot(seq_query, system_prompt_file=ZERO_SHOT_DIR):
41+
messages = []
42+
43+
messages.append({"role": "system", "content": load_system_prompt(system_prompt_file)})
44+
45+
# final prompt
46+
messages.append({"role": "user", "content": f"Sequence: {seq_query}"})
47+
return messages
48+
49+
50+
def create_message_one_shot(seq_query, seq_ex, ano_idx_ex, system_prompt_file=ONE_SHOT_DIR):
51+
messages = []
52+
53+
messages.append({"role": "system", "content": load_system_prompt(system_prompt_file)})
54+
55+
# one shot
56+
messages.append({"role": "user", "content": f"Sequence: {seq_ex}"})
57+
messages.append({"role": "assistant", "content": ano_idx_ex})
58+
59+
# final prompt
60+
messages.append({"role": "user", "content": f"Sequence: {seq_query}"})
61+
return messages

sigllm/sigllm.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,40 @@
11
# -*- coding: utf-8 -*-
22

3-
"""Main module."""
3+
"""
4+
Main module.
5+
6+
This module contains functions that get LLM's anomaly detection results.
7+
"""
8+
from anomalies import get_anomaly_list_within_seq, str2idx
9+
from data import sig2str
10+
11+
12+
def get_anomalies(seq, msg_func, model_func, num_iters=1, alpha=0.5):
13+
"""Get LLM anomaly detection results.
14+
15+
The function get the LLM's anomaly detection and converts them into an 1D array
16+
17+
Args:
18+
seq (ndarray):
19+
The sequence to detect anomalies.
20+
msg_func (func):
21+
Function to create message prompt.
22+
model_func (func):
23+
Function to get LLM answer.
24+
num_iters (int):
25+
Number of times to run the same query.
26+
alpha (float):
27+
Percentage of total number of votes that an index needs to have to be
28+
considered anomalous. Default: 0.5
29+
30+
Returns:
31+
ndarray:
32+
1D array containing anomalous indices of the sequence.
33+
"""
34+
message = msg_func(sig2str(seq, space=True))
35+
res_list = []
36+
for i in range(num_iters):
37+
res = model_func(message)
38+
ano_ind = str2idx(res, len(seq))
39+
res_list.append(ano_ind)
40+
return get_anomaly_list_within_seq(res_list, alpha=alpha)

0 commit comments

Comments
 (0)