-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest__prompts_dot_graph_creator.py
169 lines (141 loc) · 5.43 KB
/
test__prompts_dot_graph_creator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
from prompts_dot_graph_creator import EXPERT_COMMANDS, GetPromptToDescribeWorkflow
import core
def test():
command_messages = core.create_command_messages(EXPERT_COMMANDS)
decisions_tests_first = {
"name": "Simple workflow to model a tree of decisions",
"prompts": [
"Create a flow that makes a series of decisions about whether to approve a mortgage application"
]
}
decisions_tests = [
decisions_tests_first,
{
"name": "Simple workflow to model a tree of decisions",
"prompts": [
"Create a flow that makes a series of decisions about whether to approve a mortgage application. The criteria are: good credit score, income at least 100000 USD, employed at least 3 years, can make a down payment of at least 10%, has no criminal record in last 5 years.",
"Create a flow that makes a series of decisions about whether to recommend a job interview candidate.",
"Create a flow that makes a series of decisions about an animal, to decide what kind of animal is it",
]
}
]
list_advanced_test = {
"name": "Workflow that iterates conditionally over items in a list",
"prompts": [
"Create a flow that iterates over Job Applications in a list. For each Job Application, call another flow that checks if the application should proceed to interview stage",
]
}
list_tests = [
list_advanced_test,
{
"name": "Simple workflow adding an item to a list",
"prompts": [
"Create a flow that takes a list and adds an item of the same type",
"Create a flow that takes two lists and concatenates them",
"Create a flow that takes a list and an object. Call another flow to get a boolean result. If the boolean is true, then add the item to the list."
]
},
{
"name": "Workflow that iterates over items in a list",
"prompts": [
"Create a flow that iterates over items in a list, performing an action on each item"
]
}
]
storage_first_test = {
"name": "Workflow reads orders from storage and writes back the total",
"prompts": [
"Create a Workflow that reads a list of Orders for a Customer from storage, and then iterates over the orders, calculating the total amount. Write the total back to storage.",
]
}
storage_tests = [
storage_first_test
]
describe_tests = [
{
"name": "Describe the given workflow",
"prompts": [
GetPromptToDescribeWorkflow(""""
digraph G {
// start
start [shape=ellipse, label="Start"];
// decision_credit_score
start -> decision_credit_score;
decision_credit_score [shape=Mdiamond, label="Credit Score > 700?"];
// decision_income
decision_credit_score -> decision_income;
decision_income [shape=Mdiamond, label="Income > $50,000?"];
// decision_employment
decision_income -> decision_employment;
decision_employment [shape=Mdiamond, label="Employment > 2 years?"];
// decision_down_payment
decision_employment -> decision_down_payment;
decision_down_payment [shape=Mdiamond, label="Down Payment > 20%?"];
// approve
decision_down_payment -> approve;
approve [shape=box, label="Approve"];
// reject
decision_credit_score -> reject;
reject [shape=box, label="Reject"];
decision_income -> reject;
decision_employment -> reject;
decision_down_payment -> reject;
}
"""),
GetPromptToDescribeWorkflow(""""
digraph G {
// start
start [shape=ellipse, label="Start"];
// decision_has_feathers
start -> decision_has_feathers;
decision_has_feathers [shape=Mdiamond, label="Has feathers?"];
// decision_can_fly
decision_has_feathers -> decision_can_fly;
decision_can_fly [shape=Mdiamond, label="Can fly?"];
// decision_has_fins
decision_has_feathers -> decision_has_fins;
decision_has_fins [shape=Mdiamond, label="Has fins?"];
// Hawk
decision_can_fly -> Hawk;
Hawk [shape=box, label="Hawk"];
// Penguin
decision_can_fly -> Penguin;
Penguin [shape=box, label="Penguin"];
// Dolphin
decision_has_fins -> Dolphin;
Dolphin [shape=box, label="Dolphin"];
// Bear
decision_has_fins -> Bear;
Bear [shape=box, label="Bear"];
}
""")
]
}
]
irrelevant_tests = [
{
"name": "Irrelevant prompts",
"prompts": [
# other prompts, that should NOT be handled by the Commands:
"what is 2 + 5 divided by 10 ?",
"Who won the battle of Agincourt, and why was it fought?",
"What is my favourite color?",
]
}
]
tests = decisions_tests + list_tests + storage_tests + describe_tests + irrelevant_tests
# for debugging:
# tests = [decisions_tests_first] # xxx
# tests = [list_advanced_test] # xxx
# tests = [storage_first_test] # xxx
prompt_id = 1
for test in tests:
previous_messages = []
print(f"[[[TEST {test['name']}]]]")
for user_prompt in test['prompts']:
print("---")
print(f">> {user_prompt}")
# should route to the right 'expert' chain!
rsp = core.execute_prompt(user_prompt, previous_messages, command_messages, prompt_id)
print(rsp)
prompt_id += 1