fix: capture Usage, ChatResponseCached, and ToolResults

thedadams · thedadams · commit 73494ba4b39c · 2024-09-25T10:49:51.000-04:00
Additionally add tests to ensure these are captured properly.

Signed-off-by: Donnie Adams &lt;donnie@acorn.io&gt;
diff --git a/frame.go b/frame.go
@@ -52,14 +52,16 @@ type RunFrame struct {
 type CallFrame struct {
 	CallContext `json:",inline"`
 
-	Type        EventType `json:"type"`
-	Start       time.Time `json:"start"`
-	End         time.Time `json:"end"`
-	Input       string    `json:"input"`
-	Output      []Output  `json:"output"`
-	Usage       Usage     `json:"usage"`
-	LLMRequest  any       `json:"llmRequest"`
-	LLMResponse any       `json:"llmResponse"`
+	Type               EventType `json:"type"`
+	Start              time.Time `json:"start"`
+	End                time.Time `json:"end"`
+	Input              string    `json:"input"`
+	Output             []Output  `json:"output"`
+	Usage              Usage     `json:"usage"`
+	ChatResponseCached bool      `json:"chatResponseCached"`
+	ToolResults        int       `json:"toolResults"`
+	LLMRequest         any       `json:"llmRequest"`
+	LLMResponse        any       `json:"llmResponse"`
 }
 
 type Usage struct {
diff --git a/gptscript_test.go b/gptscript_test.go
@@ -161,9 +161,7 @@ func TestAbortRun(t *testing.T) {
 func TestSimpleEvaluate(t *testing.T) {
 	tool := ToolDef{Instructions: "What is the capital of the united states?"}
 
-	run, err := g.Evaluate(context.Background(), Options{
-		GlobalOptions: GlobalOptions{},
-	}, tool)
+	run, err := g.Evaluate(context.Background(), Options{DisableCache: true}, tool)
 	if err != nil {
 		t.Errorf("Error executing tool: %v", err)
 	}
@@ -190,6 +188,17 @@ func TestSimpleEvaluate(t *testing.T) {
 	if run.Program() == nil {
 		t.Error("Run program not set")
 	}
+
+	var promptTokens, completionTokens, totalTokens int
+	for _, c := range run.calls {
+		promptTokens += c.Usage.PromptTokens
+		completionTokens += c.Usage.CompletionTokens
+		totalTokens += c.Usage.TotalTokens
+	}
+
+	if promptTokens == 0 || completionTokens == 0 || totalTokens == 0 {
+		t.Errorf("Usage not set: %d, %d, %d", promptTokens, completionTokens, totalTokens)
+	}
 }
 
 func TestEvaluateWithContext(t *testing.T) {
@@ -285,6 +294,16 @@ func TestEvaluateWithToolList(t *testing.T) {
 	if !strings.Contains(out, "hello there") {
 		t.Errorf("Unexpected output: %s", out)
 	}
+
+	// In this case, we expect the total number of tool results to be 1
+	var toolResults int
+	for _, c := range run.calls {
+		toolResults += c.ToolResults
+	}
+
+	if toolResults != 1 {
+		t.Errorf("Unexpected number of tool results: %d", toolResults)
+	}
 }
 
 func TestEvaluateWithToolListAndSubTool(t *testing.T) {
@@ -361,6 +380,54 @@ func TestStreamEvaluate(t *testing.T) {
 	}
 }
 
+func TestSimpleRun(t *testing.T) {
+	wd, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("Error getting working directory: %v", err)
+	}
+
+	run, err := g.Run(context.Background(), wd+"/test/catcher.gpt", Options{})
+	if err != nil {
+		t.Fatalf("Error executing file: %v", err)
+	}
+
+	out, err := run.Text()
+	if err != nil {
+		t.Errorf("Error reading output: %v", err)
+	}
+
+	if !strings.Contains(out, "Salinger") {
+		t.Errorf("Unexpected output: %s", out)
+	}
+
+	if len(run.ErrorOutput()) != 0 {
+		t.Error("Should have no stderr output")
+	}
+
+	// Run it a second time, ensuring the same output and that a cached response is used
+	run, err = g.Run(context.Background(), wd+"/test/catcher.gpt", Options{})
+	if err != nil {
+		t.Fatalf("Error executing file: %v", err)
+	}
+
+	secondOut, err := run.Text()
+	if err != nil {
+		t.Errorf("Error reading output: %v", err)
+	}
+
+	if secondOut != out {
+		t.Errorf("Unexpected output on second run: %s != %s", out, secondOut)
+	}
+
+	// In this case, we expect a single call and that the response is cached
+	for _, c := range run.calls {
+		if !c.ChatResponseCached {
+			t.Error("Chat response should be cached")
+		}
+		break
+	}
+}
+
 func TestStreamRun(t *testing.T) {
 	wd, err := os.Getwd()
 	if err != nil {