Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactored debug log for ai-collab #23565

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion examples/apps/ai-collab/src/components/TaskCard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import {
aiCollab,
type AiCollabErrorResponse,
type AiCollabSuccessResponse,
type ApplyEditSuccess,
type Difference,
type DifferenceChange,
type DifferenceMove,
Expand Down Expand Up @@ -179,8 +180,26 @@ export function TaskCard(props: {
},
planningStep: true,
finalReviewStep: true,
dumpDebugLog: true,
validator: aiCollabLlmTreeNodeValidator,
debugEventLogHandler: (event) => {
console.log(`Received event: ${event.eventName}`);
if (
event.eventName === "APPLIED_EDIT_SUCCESS" ||
event.eventName === "APPLIED_EDIT_FAILURE"
) {
console.log(
`${
event.eventName === "APPLIED_EDIT_SUCCESS"
? "Succesfully applied"
: "Failed to appply"
} tree edit: ${JSON.stringify(
(event as unknown as ApplyEditSuccess).edit,
undefined,
2,
)}`,
);
}
},
});

if (response.status !== "success") {
Expand Down
21 changes: 20 additions & 1 deletion examples/apps/ai-collab/src/components/TaskGroup.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {
aiCollab,
type AiCollabErrorResponse,
type AiCollabSuccessResponse,
type ApplyEditSuccess,
type Difference,
SharedTreeBranchManager,
} from "@fluidframework/ai-collab/alpha";
Expand Down Expand Up @@ -229,8 +230,26 @@ export function TaskGroup(props: {
},
planningStep: true,
finalReviewStep: false,
dumpDebugLog: true,
validator: aiCollabLlmTreeNodeValidator,
debugEventLogHandler: (event) => {
console.log(`Received event: ${event.eventName}`);
if (
event.eventName === "APPLIED_EDIT_SUCCESS" ||
event.eventName === "APPLIED_EDIT_FAILURE"
) {
console.log(
`${
event.eventName === "APPLIED_EDIT_SUCCESS"
? "Succesfully applied"
: "Failed to appply"
} tree edit: ${JSON.stringify(
(event as unknown as ApplyEditSuccess).edit,
undefined,
2,
)}`,
);
}
},
});

// 3. Handle the response from the ai collaboration
Expand Down
71 changes: 67 additions & 4 deletions packages/framework/ai-collab/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,14 @@ export class PlannerAppState extends sf.object("PlannerAppState", {
### Example 1: Collaborate with AI

```ts
import { aiCollab } from "@fluidframework/ai-collab/alpha";
import { aiCollab, DebugEvent } from "@fluidframework/ai-collab/alpha";
import { PlannerAppState } from "./types.ts"
// This is not a real file, this is meant to represent how you initialize your app data.
import { initializeAppState } from "./yourAppInitializationFile.ts"

// --------- File name: "app.ts" ---------

// Initialize your app state somehow
// Initialize your Fluid app state somehow
const appState: PlannerAppState = initializeAppState({
taskGroups: [
{
Expand Down Expand Up @@ -143,9 +143,12 @@ const response = await aiCollab({
"You are a manager that is helping out with a project management tool. You have been asked to edit a group of tasks.",
userAsk: userAsk,
},
limiters: {
maxModelCalls: 25
}
planningStep: true,
finalReviewStep: true,
dumpDebugLog: true,
debugEventLogHandler: (event: DebugEvent) => {console.log(event);}
});

if (response.status === 'sucess') {
Expand Down Expand Up @@ -174,12 +177,72 @@ Once the `aiCollab` function call is initiated, an LLM will immediately begin at
- `promptGeneration.ts`: Logic for producing the different types of prompts sent to an LLM in order to edit a SharedTree.
- `typeGeneration.ts`: Generates serialized(/able) representations of a SharedTree Schema which is used within prompts and the generated of the structured output JSON schema
- `utils.ts`: Utilities for interacting with a SharedTree
- `debugEvents.ts`: Types and helper functions for `DebugEvent`'s emitted to the callback provided to the aiCollab's `debugEventLogHandler`
- `/implicit-strategy`: The original implicit strategy, currently not used under the exported aiCollab API surface.

## Debug Events
This package allows users to consume `DebugEvents` that can be very helpful in understanding what's going on internally and debugging potential issues.
Users can consume these events by passing in a `debugEventLogHandler` when calling the `aiCollab()` function:
```ts
aiCollab({
openAI: {
client: new OpenAI({
apiKey: OPENAI_API_KEY,
}),
modelName: "gpt-4o",
},
treeNode: view.root.taskGroups[0],
prompt: {
systemRoleContext:
"You are a manager that is helping out with a project management tool. You have been asked to edit a group of tasks.",
userAsk: userAsk,
},
limiters: {
maxModelCalls: 25
}
planningStep: true,
finalReviewStep: true,
debugEventLogHandler: (event: DebugEvent) => {console.log(event);} // This should be your debug event log handler
});

```

All debug events implement the `DebugEvent` interface. Some also implement `EventFlowDebugEvent`, which lets them mark a progress point in a specific logic flow within a given execution of `aiCollab()`.

### Event flows
1. `CORE_EVENT_LOOP`: All events with this `eventFlowName` are used to mark the start and end of the life cycle of a single execution of the ai-collab function.
- Events:
1. `CoreEventLoopStarted`: Events with the `eventName` `CORE_EVENT_LOOP_STARTED`. This event marks the start of the ai-collab function execution life cycle. There will be exactly 1 of these events per ai-collab function execution.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm thinking all this documentation should actually go in the TSDoc for the corresponding event. Something like "This event will be emitted exactly once per ai-collab function execution" makes a lot of sense there, more than in the README. It would also make some of the docs redundant (e.g. docs for "this event has eventName X" are probably not necessary when one is looking at the interface definition already). Keeping the list of events in each flow here is ok, and maybe we can link to the source file where all the events live for those who want more details. I'd particularly advocate for this to remove the easy-to-miss **IMPORTANT**: If you change this file make sure the root README.md file is updated to reflect the changes. in the source file.

1. `CoreEventLoopCompleted`:Events with the `eventName` `CORE_EVENT_LOOP_COMPLETED`. This event marks the end of the ai-collab function execution life cycle. There will be exactly 1 of these events per ai-collab function execution.
2. `GENERATE_PLANNING_PROMPT`: All events with this `eventFlowName` are used to mark the start, end and outcome of the LLM generating the planning prompt used to assist the LLM to plan how it will edit the SharedTree based on the user ask
- Events
1. `PlanningPromptStarted`:Events with the `eventName` `GENERATE_PLANNING_PROMPT_STARTED`. This event marks the start of the logic flow for generating the planning prompt. There will be exactly 1 of these events per ai-collab function execution.
- Child `DebugEvent`'s triggered:
1. `LlmApiCallDebugEvent`: In order to generate the planning prompt, a call to the LLM is necessary. This `DebugEvent` captures the request and its raw result from said API call.
1. `PlanningPromptCompleted`:Events with the `eventName` `GENERATE_PLANNING_PROMPT_COMPLETED`: This event marks the end and outcome of the LLM generating the planning prompt There will be exactly 1 of these events per ai-collab function execution.
3. `GENERATE_TREE_EDIT`: All events with this `eventFlowName` are used to mark the start, end and outcome of the LLM generating a single TreeEdit that will be applied to the tree. It is expected that the LLM will generate multiple of these events when it must generate multiple tree edits to satisfy the user request
- Events:
1. `GenerateTreeEditStarted`: Events with the `eventName` `GENERATE_TREE_EDIT_STARTED`: This event marks the start of the logic flow for generating a single tree edit
- Child `DebugEvent`'s triggered:
1. `LlmApiCallDebugEvent`: In order to generate a Tree Edit, a call to the LLM is necessary. This `DebugEvent` captures the request and its raw result from said API call.
1. `GenerateTreeEditCompleted`: Events with the `eventName` `GENERATE_TREE_EDIT_COMPLETED`. This event marks the end and outcome of the LLM generating a single tree edit. Note that if the LLM returns `null` as its edit at this step, it is signaling that it things no more edits are necessary.
4. `FINAL_REVIEW`: All events with this `eventFlowName` are used to mark the start, end and outcome of the requesting the LLM to review its work and determine whether the users ask was accomplished or more edits are needed.
- Events:
- `FinalReviewStarted`: Events with the `eventName` `FINAL_REVIEW_STARTED`: This event marks the start of the logic flow for requesting the LLM complete a final review of the edits it has created and whether it believes the users ask was accomplished or more edits are needed. If the LLM thinks more edits are needed, the `GENERATE_TREE_EDIT` will start again.
- Child `DebugEvent`'s triggered:
1. `LlmApiCallDebugEvent`: In order to conduct the final review, a call to the LLM is necessary. This `DebugEvent` captures the request and its raw result from said API call.
- `FinalReviewCompleted`: Events with the `eventName` `FINAL_REVIEW_COMPLETED`. This event marks the end and outcome of the logic flow for requesting the LLM complete a final review of the edits it has created.


### using Trace Id's
Debug Events in ai-collab have two different types of trace id's:
- `traceId`: This field exists on all debug events and can be used to correlate all debug events that happened in a single execution of `aiCollab()`. Sorting the events by timestamp will show the proper chronological order of the events. Note that the events should already be emitted in chronological order.
- `eventFlowTraceId`: this field exists on all `EventFlowDebugEvents` and can be used to correlate all events from a particular event flow. Additionally all `LlmApiCallDebugEvent` events will contain the `eventFlowTraceId` field as well as a `triggeringEventFlowName` so you can link LLM API calls to a particular event flow.


## Known Issues & limitations

1. Union types for a TreeNode are not present when generating App Schema. This will require extracting a field schema instead of TreeNodeSchema when passed a non root node.
1. The Editing System prompt & structured out schema currently provide array related edits even when there are no arrays. This forces you to have an array in your schema to produce a valid json schema
1. Optional roots are not allowed, This is because if you pass undefined as your treeNode to the API, we cannot disambiguate whether you passed the root or not.
1. Primitive root nodes are not allowed to be passed to the API. You must use an object or array as your root.
1. Optional nodes are not supported -- when we use optional nodes, the OpenAI API returns an error complaining that the structured output JSON schema is invalid. I have introduced a fix that should work upon manual validation of the json schema, but there looks to be an issue with their API. I have filed a ticket with OpenAI to address this
Expand Down
172 changes: 170 additions & 2 deletions packages/framework/ai-collab/api-report/ai-collab.alpha.api.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ export function aiCollab(options: AiCollabOptions): Promise<AiCollabSuccessRespo

// @alpha
export interface AiCollabErrorResponse {
readonly errorMessage: "tokenLimitExceeded" | "tooManyErrors" | "tooManyModelCalls" | "aborted";
readonly errorMessage: "tokenLimitExceeded" | "tooManyErrors" | "tooManyModelCalls" | "aborted" | "unexpectedError";
readonly status: "failure" | "partial-failure";
readonly tokensUsed: TokenUsage;
}

// @alpha
export interface AiCollabOptions {
readonly dumpDebugLog?: boolean;
readonly debugEventLogHandler?: DebugEventLogHandler;
readonly finalReviewStep?: boolean;
readonly limiters?: {
readonly abortController?: AbortController;
Expand All @@ -40,12 +40,75 @@ export interface AiCollabSuccessResponse {
readonly tokensUsed: TokenUsage;
}

// @alpha
export interface ApplyEditFailure extends EventFlowDebugEvent {
edit: LlmTreeEdit;
errorMessage: string;
// (undocumented)
eventFlowName: "GENERATE_AND_APPLY_TREE_EDIT";
// (undocumented)
eventFlowStatus: "IN_PROGRESS";
eventFlowTraceId: string;
// (undocumented)
eventName: "APPLIED_EDIT_FAILURE";
sequentialErrorCount: number;
}

// @alpha
export interface ApplyEditSuccess extends EventFlowDebugEvent {
edit: LlmTreeEdit;
// (undocumented)
eventFlowName: "GENERATE_AND_APPLY_TREE_EDIT";
// (undocumented)
eventFlowStatus: "IN_PROGRESS";
eventFlowTraceId: string;
// (undocumented)
eventName: "APPLIED_EDIT_SUCCESS";
}

// @alpha
export interface CoreEventLoopCompleted extends EventFlowDebugEvent {
// (undocumented)
errorMessage?: string;
// (undocumented)
eventFlowName: "CORE_EVENT_LOOP";
// (undocumented)
eventFlowStatus: "COMPLETED";
// (undocumented)
eventName: "CORE_EVENT_LOOP_COMPLETED";
// (undocumented)
failureReason?: string;
// (undocumented)
status: "success" | "failure";
}

// @alpha
export interface CoreEventLoopStarted extends EventFlowDebugEvent {
// (undocumented)
eventFlowName: "CORE_EVENT_LOOP";
// (undocumented)
eventFlowStatus: "STARTED";
// (undocumented)
eventName: "CORE_EVENT_LOOP_STARTED";
}

// @alpha
export function createMergableDiffSeries(diffs: Difference[]): Difference[];

// @alpha
export function createMergableIdDiffSeries(oldObject: unknown, diffs: Difference[], idAttributeName: string | number): Difference[];

// @alpha
export interface DebugEvent {
eventName: string;
id: string;
timestamp: string;
traceId: string;
}

// @alpha
export type DebugEventLogHandler = <T extends DebugEvent>(event: T) => unknown;

// @alpha
export type Difference = DifferenceCreate | DifferenceRemove | DifferenceChange | DifferenceMove;

Expand Down Expand Up @@ -99,6 +162,89 @@ export interface DifferenceRemove {
type: "REMOVE";
}

// @alpha
export interface EventFlowDebugEvent extends DebugEvent {
eventFlowName: string;
eventFlowStatus: "STARTED" | "COMPLETED" | "IN_PROGRESS";
eventFlowTraceId: string;
}

// @alpha
export type EventFlowDebugName = (typeof EventFlowDebugNames)[keyof typeof EventFlowDebugNames];

// @alpha
export const EventFlowDebugNames: {
readonly CORE_EVENT_LOOP: "CORE_EVENT_LOOP";
readonly GENERATE_PLANNING_PROMPT: "GENERATE_PLANNING_PROMPT";
readonly GENERATE_AND_APPLY_TREE_EDIT: "GENERATE_AND_APPLY_TREE_EDIT";
readonly FINAL_REVIEW: "FINAL_REVIEW";
};

// @alpha
export interface FinalReviewCompleted<TIsLlmResponseValid = boolean, TReviewResponse = TIsLlmResponseValid extends true ? "yes" | "no" : undefined> extends EventFlowDebugEvent {
didLlmAccomplishGoal: TReviewResponse;
// (undocumented)
eventFlowName: "FINAL_REVIEW";
// (undocumented)
eventFlowStatus: "COMPLETED";
// (undocumented)
eventName: "FINAL_REVIEW_COMPLETED";
isLlmResponseValid: TIsLlmResponseValid;
}

// @alpha
export interface FinalReviewStarted extends EventFlowDebugEvent {
// (undocumented)
eventFlowName: "FINAL_REVIEW";
// (undocumented)
eventFlowStatus: "STARTED";
// (undocumented)
eventName: "FINAL_REVIEW_STARTED";
llmPrompt: string;
}

// @alpha
export interface GenerateTreeEditCompleted<TIsLlmResponseValid = boolean, TEdit = TIsLlmResponseValid extends true ? LlmTreeEdit | null : undefined> extends EventFlowDebugEvent {
// (undocumented)
eventFlowName: "GENERATE_AND_APPLY_TREE_EDIT";
// (undocumented)
eventFlowStatus: "COMPLETED";
// (undocumented)
eventName: "GENERATE_TREE_EDIT_COMPLETED";
isLlmResponseValid: TIsLlmResponseValid;
llmGeneratedEdit: TEdit;
}

// @alpha
export interface GenerateTreeEditStarted extends EventFlowDebugEvent {
// (undocumented)
eventFlowName: "GENERATE_AND_APPLY_TREE_EDIT";
// (undocumented)
eventFlowStatus: "STARTED";
// (undocumented)
eventName: "GENERATE_TREE_EDIT_STARTED";
// (undocumented)
llmPrompt: string;
}

// @alpha
export interface LlmApiCallDebugEvent extends DebugEvent {
eventFlowTraceId: string;
// (undocumented)
eventName: "LLM_API_CALL";
modelName: string;
requestParams: unknown;
response: unknown;
tokenUsage?: {
promptTokens: number;
completionTokens: number;
};
triggeringEventFlowName: EventFlowDebugName;
}

// @alpha
export type LlmTreeEdit = Record<string, unknown>;

// @alpha
export type ObjectPath = (string | number)[];

Expand All @@ -118,6 +264,28 @@ export interface Options {
} | undefined;
}

// @alpha
export interface PlanningPromptCompleted<TIsLlmResponseValid = boolean, TPlan = TIsLlmResponseValid extends true ? string : undefined> extends EventFlowDebugEvent {
// (undocumented)
eventFlowName: "GENERATE_PLANNING_PROMPT";
// (undocumented)
eventFlowStatus: "COMPLETED";
// (undocumented)
eventName: "GENERATE_PLANNING_PROMPT_COMPLETED";
isLlmResponseValid: TIsLlmResponseValid;
llmGeneratedPlan: TPlan;
}

// @alpha
export interface PlanningPromptStarted extends EventFlowDebugEvent {
// (undocumented)
eventFlowName: "GENERATE_PLANNING_PROMPT";
// (undocumented)
eventFlowStatus: "STARTED";
// (undocumented)
eventName: "GENERATE_PLANNING_PROMPT_STARTED";
}

// @alpha
export class SharedTreeBranchManager {
constructor(params?: {
Expand Down
Loading
Loading