Skip to content

Commit 0c9098f

Browse files
committed
Stop tracking sigllm/sigllm.ipynb
1 parent a1aa8c7 commit 0c9098f

21 files changed

+509
-497
lines changed

results/E1_1.pdf

20.5 KB
Binary file not shown.

results/E1_2.pdf

17.2 KB
Binary file not shown.

results/E1_turbo.png

43.6 KB
Loading

results/E1_turbo_1.png

41.9 KB
Loading

results/M7.png

33.5 KB
Loading

results/M7_1.png

31.3 KB
Loading

results/M7_2.png

31.6 KB
Loading

results/M7_turbo.png

30.4 KB
Loading

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
history = history_file.read()
1313

1414
install_requires = [
15-
'numpy', 'openai', 'pandas','orion', #'collections'
15+
'numpy', 'openai', 'pandas','orion', 'matplotlib', 'scikit-learn',
16+
'tiktoken',
1617
]
1718

1819
setup_requires = [

sigllm/anomalies.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# -*- coding: utf-8 -*-
2+
3+
"""
4+
Result post-processing module.
5+
6+
This module contains functions that help convert model responses back to indices and timestamps.
7+
"""
8+
import numpy as np
9+
10+
11+
def str2sig(text, sep=',', decimal=0):
12+
"""Convert a text string to a signal.
13+
14+
Convert a string containing digits into an array of numbers.
15+
16+
Args:
17+
text (str):
18+
A string containing signal values.
19+
sep (str):
20+
String that was used to separate each element in text, Default to `","`.
21+
decimal (int):
22+
Number of decimal points to shift each element in text to. Default to `0`.
23+
24+
Returns:
25+
numpy.ndarray:
26+
A 1-dimensional array containing parsed elements in `text`.
27+
"""
28+
# Remove all characters from text except the digits and sep and decimal point
29+
text = ''.join(i for i in text if (i.isdigit() or i == sep or i == '.'))
30+
values = np.fromstring(text, dtype=float, sep=sep)
31+
return values * 10**(-decimal)
32+
33+
34+
def str2idx(text, len_seq, sep=','):
35+
"""Convert a text string to indices.
36+
37+
Convert a string containing digits into an array of indices.
38+
39+
Args:
40+
text (str):
41+
A string containing indices values.
42+
len_seq (int):
43+
The length of processed sequence
44+
sep (str):
45+
String that was used to separate each element in text, Default to `","`.
46+
47+
Returns:
48+
numpy.ndarray:
49+
A 1-dimensional array containing parsed elements in `text`.
50+
"""
51+
# Remove all characters from text except the digits and sep
52+
text = ''.join(i for i in text if (i.isdigit() or i == sep))
53+
54+
values = np.fromstring(text, dtype=int, sep=sep)
55+
56+
# Remove indices that exceed the length of sequence
57+
values = values[values < len_seq]
58+
return values
59+
60+
61+
def get_anomaly_list_within_seq(res_list, alpha=0.5):
62+
"""Get the final list of anomalous indices of a sequence
63+
64+
Choose anomalous index in the sequence based on multiple LLM responses
65+
66+
Args:
67+
res_list (List[numpy.ndarray]):
68+
A list of 1-dimensional array containing anomous indices output by LLM
69+
alpha (float):
70+
Percentage of votes needed for an index to be deemed anomalous. Default: 0.5
71+
72+
Returns:
73+
numpy.ndarray:
74+
A 1-dimensional array containing final anomalous indices
75+
"""
76+
min_vote = np.ceil(alpha * len(res_list))
77+
78+
flattened_res = np.concatenate(res_list)
79+
80+
unique_elements, counts = np.unique(flattened_res, return_counts=True)
81+
82+
final_list = unique_elements[counts >= min_vote]
83+
84+
return final_list
85+
86+
87+
def merge_anomaly_seq(anomalies, start_indices, window_size, step_size, beta=0.5):
88+
"""Get the final list of anomalous indices of a sequence when merging all rolling windows
89+
90+
Args:
91+
anomalies (List[numpy.ndarray]):
92+
A list of 1-dimensional array containing anomous indices of each window
93+
start_indices (numpy.ndarray):
94+
A 1-dimensional array contaning the first index of each window
95+
window_size (int):
96+
Length of each window
97+
step_size (int):
98+
Indicating the number of steps the window moves forward each round.
99+
beta (float):
100+
Percentage of containing windows needed for index to be deemed anomalous. Default: 0.5
101+
102+
Return:
103+
numpy.ndarray:
104+
A 1-dimensional array containing final anomalous indices
105+
"""
106+
anomalies = [arr + first_idx for (arr, first_idx) in zip(anomalies, start_indices)]
107+
108+
min_vote = np.ceil(beta * window_size / step_size)
109+
110+
flattened_res = np.concatenate(anomalies)
111+
112+
unique_elements, counts = np.unique(flattened_res, return_counts=True)
113+
114+
final_list = unique_elements[counts >= min_vote]
115+
116+
return np.sort(final_list)
117+
118+
119+
def idx2time(sequence, idx_list):
120+
"""Convert list of indices into list of timestamp
121+
122+
Args:
123+
sequence (pandas.Dataframe):
124+
Signal with timestamps and values
125+
idx_list (numpy.ndarray):
126+
A 1-dimensional array of indices
127+
128+
Returns:
129+
numpy.ndarray:
130+
A 1-dimensional array containing timestamps
131+
"""
132+
return sequence.iloc[idx_list].timestamp.to_numpy()

sigllm/data.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,16 @@
33
"""
44
Data preprocessing module.
55
6-
This module contains functions that help convert timeseries into string, preparing it for a language model.
6+
This module contains functions that prepare timeseries for a language model.
77
"""
88

99
import numpy as np
1010

11+
1112
def rolling_window_sequences(X, index, window_size, step_size):
1213
"""Create rolling window sequences out of time series data.
1314
14-
The function creates an array of input sequences and an array of target sequences by rolling
15-
over the input sequence with a specified window.
16-
Optionally, certain values can be dropped from the sequences.
15+
The function creates an array of sequences by rolling over the input sequence.
1716
1817
Args:
1918
X (ndarray):
@@ -44,21 +43,23 @@ def rolling_window_sequences(X, index, window_size, step_size):
4443
return np.asarray(out_X), np.asarray(X_index)
4544

4645

47-
def sig2str(values, sep=',', space=False, decimal=0):
46+
def sig2str(values, sep=',', space=False, decimal=0, rescale=True):
4847
"""Convert a signal to a string.
4948
5049
Convert a 1-dimensional time series into text by casting and rescaling it
51-
to nonnegative integer values then into a string.
50+
to nonnegative integer values then into a string (optional).
5251
5352
Args:
5453
values (numpy.ndarray):
5554
A sequence of signal values.
5655
sep (str):
57-
String to separate each element in values, Default to `","`.
56+
String to separate each element in values. Default to `","`.
5857
space (bool):
5958
Whether to add space between each digit in the result. Default to `False`.
6059
decimal (int):
6160
Number of decimal points to keep from the float representation. Default to `0`.
61+
rescale(bool):
62+
Whether to rescale the time series. Default to `True`
6263
6364
Returns:
6465
str:
@@ -68,12 +69,13 @@ def sig2str(values, sep=',', space=False, decimal=0):
6869
values = np.abs(values)
6970

7071
sequence = sign * (values * 10**decimal).astype(int)
71-
72-
#Rescale all elements to be nonnegative
73-
sequence = sequence - min(sequence)
72+
73+
# Rescale all elements to be nonnegative
74+
if rescale:
75+
sequence = sequence - min(sequence)
7476

7577
res = sep.join([str(num) for num in sequence])
7678
if space:
7779
res = ' '.join(res)
7880

79-
return res
81+
return res

sigllm/gpt.py

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,48 +5,57 @@
55
66
This module contains functions that are specifically used for GPT models
77
"""
8+
import os
9+
10+
from openai import OpenAI
811

9-
import openai
1012

11-
with open("../gpt_model/openai_api_key.txt", "r") as f:
12-
api_key = f.read()
13-
1413
def load_system_prompt(file_path):
1514
with open(file_path) as f:
1615
system_prompt = f.read()
1716
return system_prompt
1817

19-
GPT_model = "gpt-3.5-turbo" #"gpt-4"
2018

21-
client = openai.Client(api_key=api_key)
19+
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
20+
21+
ZERO_SHOT_FILE = 'gpt_system_prompt_zero_shot.txt'
22+
ONE_SHOT_FILE = 'gpt_system_prompt_one_shot.txt'
23+
24+
ZERO_SHOT_DIR = os.path.join(CURRENT_DIR, "..", "template", ZERO_SHOT_FILE)
25+
ONE_SHOT_DIR = os.path.join(CURRENT_DIR, "..", "template", ONE_SHOT_FILE)
26+
27+
28+
GPT_model = "gpt-4" # "gpt-4-0125-preview" # # #"gpt-3.5-turbo" #
29+
client = OpenAI()
30+
2231

2332
def get_gpt_model_response(message, gpt_model=GPT_model):
2433
completion = client.chat.completions.create(
25-
model=gpt_model,
26-
messages=message,
34+
model=gpt_model,
35+
messages=message,
2736
)
2837
return completion.choices[0].message.content
2938

30-
def create_message_zero_shot(seq_query, system_prompt_file='../gpt_model/system_prompt_zero_shot.txt'):
39+
40+
def create_message_zero_shot(seq_query, system_prompt_file=ZERO_SHOT_DIR):
3141
messages = []
32-
33-
messages.append({"role": "system", "content":load_system_prompt(system_prompt_file)})
42+
43+
messages.append({"role": "system", "content": load_system_prompt(system_prompt_file)})
3444

3545
# final prompt
3646
messages.append({"role": "user", "content": f"Sequence: {seq_query}"})
3747
return messages
3848

3949

40-
def create_message_one_shot(seq_query, seq_ex, ano_ind_ex, system_prompt_file='../gpt_model/system_prompt_one_shot.txt'):
50+
def create_message_one_shot(seq_query, seq_ex, ano_idx_ex, system_prompt_file=ONE_SHOT_DIR):
4151
messages = []
42-
43-
messages.append({"role": "system", "content":load_system_prompt(system_prompt_file)})
52+
53+
messages.append({"role": "system", "content": load_system_prompt(system_prompt_file)})
4454

4555
# one shot
4656
messages.append({"role": "user", "content": f"Sequence: {seq_ex}"})
47-
messages.append({"role": "assistant", "content": ano_ind_ex})
57+
messages.append({"role": "assistant", "content": ano_idx_ex})
4858

4959
# final prompt
5060
messages.append({"role": "user", "content": f"Sequence: {seq_query}"})
5161
return messages
52-

sigllm/result.py

Lines changed: 0 additions & 102 deletions
This file was deleted.

0 commit comments

Comments
 (0)