diff --git a/lib/api.js b/lib/api.js index e522263..f2223d6 100644 --- a/lib/api.js +++ b/lib/api.js @@ -5,7 +5,6 @@ export class RealtimeAPI extends RealtimeEventHandler { /** * Create a new RealtimeAPI instance * @param {{url?: string, apiKey?: string, dangerouslyAllowAPIKeyInBrowser?: boolean, debug?: boolean}} [settings] - * @returns {RealtimeAPI} */ constructor({ url, apiKey, dangerouslyAllowAPIKeyInBrowser, debug } = {}) { super(); @@ -14,6 +13,7 @@ export class RealtimeAPI extends RealtimeEventHandler { this.apiKey = apiKey || null; this.debug = !!debug; this.ws = null; + if (globalThis.document && this.apiKey) { if (!dangerouslyAllowAPIKeyInBrowser) { throw new Error( @@ -156,7 +156,7 @@ export class RealtimeAPI extends RealtimeEventHandler { /** * Disconnects from Realtime API server - * @param {WebSocket} [ws] + * @param {typeof globalThis.WebSocket} [ws] * @returns {true} */ disconnect(ws) { @@ -182,15 +182,12 @@ export class RealtimeAPI extends RealtimeEventHandler { /** * Sends an event to WebSocket and dispatches as "client.{eventName}" and "client.*" events - * @param {string} eventName - * @param {{[key: string]: any}} event - * @returns {true} + * @type {import('./types').SendEvent} */ send(eventName, data) { if (!this.isConnected()) { throw new Error(`RealtimeAPI is not connected`); } - data = data || {}; if (typeof data !== 'object') { throw new Error(`data must be an object`); } diff --git a/lib/client.js b/lib/client.js index be37288..896418c 100644 --- a/lib/client.js +++ b/lib/client.js @@ -59,7 +59,7 @@ import { RealtimeUtils } from './utils.js'; /** * @typedef {Object} InputAudioContentType * @property {"input_audio"} type - * @property {string} [audio] base64-encoded audio data + * @property {string|ArrayBuffer|Int16Array} [audio] base64-encoded audio data * @property {string|null} [transcript] */ @@ -118,6 +118,7 @@ import { RealtimeUtils } from './utils.js'; * @property {string|null} [previous_item_id] * @property {"function_call_output"} type * @property {string} call_id + * @property {string} status * @property {string} output */ @@ -143,6 +144,7 @@ import { RealtimeUtils } from './utils.js'; * @typedef {Object} FormattedItemType * @property {string} id * @property {string} object + * @property {string} status * @property {"user"|"assistant"|"system"} [role] * @property {FormattedPropertyType} formatted */ @@ -193,6 +195,7 @@ export class RealtimeClient extends RealtimeEventHandler { */ constructor({ url, apiKey, dangerouslyAllowAPIKeyInBrowser, debug } = {}) { super(); + /* @type { import('./types').SessionConfig }*/ this.defaultSessionConfig = { modalities: ['text', 'audio'], instructions: '', @@ -295,6 +298,7 @@ export class RealtimeClient extends RealtimeEventHandler { throw new Error(`Tool "${tool.name}" has not been added`); } const result = await toolConfig.handler(jsonArguments); + this.realtime.send('conversation.item.create', { item: { type: 'function_call_output', @@ -344,6 +348,7 @@ export class RealtimeClient extends RealtimeEventHandler { 'server.response.audio_transcript.delta', handlerWithDispatch, ); + this.realtime.on('server.response.audio.delta', handlerWithDispatch); this.realtime.on('server.response.text.delta', handlerWithDispatch); this.realtime.on( @@ -533,7 +538,7 @@ export class RealtimeClient extends RealtimeEventHandler { }; }), ); - const session = { ...this.sessionConfig }; + const session = { ...this.sessionConfig, tools: useTools }; session.tools = useTools; if (this.realtime.isConnected()) { this.realtime.send('session.update', { session }); @@ -559,6 +564,7 @@ export class RealtimeClient extends RealtimeEventHandler { item: { type: 'message', role: 'user', + //@ts-ignore TODO fix content, }, }); @@ -594,11 +600,11 @@ export class RealtimeClient extends RealtimeEventHandler { this.getTurnDetectionType() === null && this.inputAudioBuffer.byteLength > 0 ) { - this.realtime.send('input_audio_buffer.commit'); + this.realtime.send('input_audio_buffer.commit', null); this.conversation.queueInputAudio(this.inputAudioBuffer); this.inputAudioBuffer = new Int16Array(0); } - this.realtime.send('response.create'); + this.realtime.send('response.create', null); return true; } @@ -611,7 +617,7 @@ export class RealtimeClient extends RealtimeEventHandler { */ cancelResponse(id, sampleCount = 0) { if (!id) { - this.realtime.send('response.cancel'); + this.realtime.send('response.cancel', null); return { item: null }; } else if (id) { const item = this.conversation.getItem(id); @@ -625,7 +631,7 @@ export class RealtimeClient extends RealtimeEventHandler { `Can only cancelResponse messages with role "assistant"`, ); } - this.realtime.send('response.cancel'); + this.realtime.send('response.cancel', null); const audioIndex = item.content.findIndex((c) => c.type === 'audio'); if (audioIndex === -1) { throw new Error(`Could not find audio on item to cancel`); @@ -643,7 +649,6 @@ export class RealtimeClient extends RealtimeEventHandler { /** * Utility for waiting for the next `conversation.item.appended` event to be triggered by the server - * @returns {Promise<{item: ItemType}>} */ async waitForNextItem() { const event = await this.waitForNext('conversation.item.appended'); @@ -653,7 +658,6 @@ export class RealtimeClient extends RealtimeEventHandler { /** * Utility for waiting for the next `conversation.item.completed` event to be triggered by the server - * @returns {Promise<{item: ItemType}>} */ async waitForNextCompletedItem() { const event = await this.waitForNext('conversation.item.completed'); diff --git a/lib/conversation.js b/lib/conversation.js index f6b8f09..1f96945 100644 --- a/lib/conversation.js +++ b/lib/conversation.js @@ -18,7 +18,8 @@ import { RealtimeUtils } from './utils.js'; export class RealtimeConversation { defaultFrequency = 24_000; // 24,000 Hz - EventProcessors = { + /** @type { import('./types').EventProcessors} */ + eventProcessors = { 'conversation.item.created': (event) => { const { item } = event; // deep copy values @@ -240,7 +241,6 @@ export class RealtimeConversation { /** * Create a new RealtimeConversation instance - * @returns {RealtimeConversation} */ constructor() { this.clear(); @@ -275,7 +275,7 @@ export class RealtimeConversation { * Process an event from the WebSocket server and compose items * @param {Object} event * @param {...any} args - * @returns {item: import('./client.js').ItemType | null, delta: ItemContentDeltaType | null} + * @returns {{item: import('./client.js').ItemType | null, delta: ItemContentDeltaType | null}} */ processEvent(event, ...args) { if (!event.event_id) { @@ -286,7 +286,7 @@ export class RealtimeConversation { console.error(event); throw new Error(`Missing "type" on event`); } - const eventProcessor = this.EventProcessors[event.type]; + const eventProcessor = this.eventProcessors[event.type]; if (!eventProcessor) { throw new Error( `Missing conversation event processor for "${event.type}"`, diff --git a/lib/event_handler.js b/lib/event_handler.js index 8d37dbe..0a5bb02 100644 --- a/lib/event_handler.js +++ b/lib/event_handler.js @@ -1,6 +1,10 @@ /** - * EventHandler callback - * @typedef {(event: {[key: string]: any}): void} EventHandlerCallbackType + * @typedef {import('./types').Listener} Listener + * @typedef {import('./types').ListenerBool} ListenerBool + * @typedef {import('./types').WaitForNext} WaitForNext + * @typedef {import('./types').EventNames} EventNames + * @typedef {Object.} EventHandlers + */ const sleep = (t) => new Promise((r) => setTimeout(() => r(), t)); @@ -13,10 +17,11 @@ const sleep = (t) => new Promise((r) => setTimeout(() => r(), t)); export class RealtimeEventHandler { /** * Create a new RealtimeEventHandler instance - * @returns {RealtimeEventHandler} */ constructor() { + /** @type {EventHandlers} */ this.eventHandlers = {}; + /** @type {EventHandlers} */ this.nextEventHandlers = {}; } @@ -30,11 +35,9 @@ export class RealtimeEventHandler { return true; } - /** - * Listen to specific events - * @param {string} eventName The name of the event to listen to - * @param {EventHandlerCallbackType} callback Code to execute on event - * @returns {EventHandlerCallbackType} + /** + * Register an event listener + * @type {Listener} */ on(eventName, callback) { this.eventHandlers[eventName] = this.eventHandlers[eventName] || []; @@ -44,9 +47,7 @@ export class RealtimeEventHandler { /** * Listen for the next event of a specified type - * @param {string} eventName The name of the event to listen to - * @param {EventHandlerCallbackType} callback Code to execute on event - * @returns {EventHandlerCallbackType} + * @type {Listener} */ onNext(eventName, callback) { this.nextEventHandlers[eventName] = this.nextEventHandlers[eventName] || []; @@ -57,9 +58,7 @@ export class RealtimeEventHandler { /** * Turns off event listening for specific events * Calling without a callback will remove all listeners for the event - * @param {string} eventName - * @param {EventHandlerCallbackType} [callback] - * @returns {true} + * @type {ListenerBool} */ off(eventName, callback) { const handlers = this.eventHandlers[eventName] || []; @@ -80,9 +79,7 @@ export class RealtimeEventHandler { /** * Turns off event listening for the next event of a specific type * Calling without a callback will remove all listeners for the next event - * @param {string} eventName - * @param {EventHandlerCallbackType} [callback] - * @returns {true} + * @type {ListenerBool} */ offNext(eventName, callback) { const nextHandlers = this.nextEventHandlers[eventName] || []; @@ -102,9 +99,7 @@ export class RealtimeEventHandler { /** * Waits for next event of a specific type and returns the payload - * @param {string} eventName - * @param {number|null} [timeout] - * @returns {Promise<{[key: string]: any}|null>} + * @type {WaitForNext} */ async waitForNext(eventName, timeout = null) { const t0 = Date.now(); diff --git a/lib/types.ts b/lib/types.ts new file mode 100644 index 0000000..7b6d7c1 --- /dev/null +++ b/lib/types.ts @@ -0,0 +1,373 @@ +import { EventEmitter } from 'events'; +type BaseItem = { + id: string; + object: 'realtime.item'; + status: 'completed' | 'in_progress' | 'incomplete'; + role: 'user' | 'assistant' | 'system'; +} + +type TextContent = { + type: 'text' | 'input_text'; + text: string +} + +type AudioContent = { + type: 'audio' | 'input_audio'; + audio?: string; + transcript?: string; +} + + +type MessageItem = { + type: 'message', + content: (TextContent | AudioContent)[] +} + +type FunctionCallItem = { + type: 'function_call', + name: string; + call_id: string; + arguments: string; +} + +type FunctionCallOutputItem = { + type: 'function_call_output', + call_id: string; + output: string; +} + +export type ServerItem = BaseItem & (MessageItem | FunctionCallItem ); +export type ClientItem = Partial & (MessageItem | FunctionCallOutputItem ); + + +export type Error = { + type: string; + code: string; + message: string; + param: string; +} + +type Usasge = { + total_tokens: number; + input_tokens: number; + output_tokens: number; +} + +type Output = { + id: string; + object: 'realtime.item'; + type: string; + status: string; + role: 'user' | 'assistant' | 'system'; + content: [ + { + type: string; + text: string; + } + ] +} + +type Response = { + id: string; + object: 'realtime.response'; + status: T; + status_details: any; + output: Output[]; + usage: any; +} + +type AudioFormatType = "pcm16" | "g711_ulaw" | "g711_alaw"; + + +export type SessionConfig = Partial<{ + modalities: ('text' | 'audio')[]; + instructions: string; + voice: 'alloy' | 'echo' | 'shimmer'; + input_audio_format: AudioFormatType; + output_audio_format: AudioFormatType; + input_audio_transcriptions?: { + model: 'whisper-1' + }; + turn_detection?: { + type: 'server_vad'; + threshold?: number; + prefix_padding_ms?: number; + silence_duration_ms?: number; + }; + tools?: Array<{ + type: 'function'; + name: string; + description: string; + parameters: Record + + }>; + tool_choice?: 'auto' | 'none' | 'required' + temperature?: number; + max_output_tokens?: number; +}> + +type SessionUpdate = { + event_id?: string; + type?: 'session.update'; + session: SessionConfig +} + + +type ResponseContent = { + event_id: string; + response_id: string; + item_id: string; + output_index: number; + content_index: number; +} + +type ContentPart = { + type: 'text'; + text: string; +} | { + type: 'audio'; + audio: string; + transcript: string; +} + +export interface ClientEvent { + 'session.update': SessionUpdate; + 'input_audio_buffer.append': { + event_id?: string; + type?: 'input_audio_buffer.append'; + audio: string; + } + 'input_audio_buffer.commit': { + event_id?: string + type?: 'input_audio_buffer.commit'; + } | null; + + 'input_audio_buffer.clear': { + event_id?: string; + type?: 'input_audio_buffer.clear'; + } | null; + 'conversation.item.create': { + event_id?: string; + type?: 'conversation.item.create'; + previous_item_id?: string; + item: ClientItem; + } | { + type?: 'conversation.item.create'; + item: FunctionCallOutputItem + } + 'conversation.item.truncate': { + event_id?: string; + type?: 'conversation.item.truncate'; + item_id: string; + content_index: number; + audio_end_ms: number; + } + 'conversation.item.delete': { + event_id?: string; + type?: 'conversation.item.delete'; + item_id: string; + } + + 'response.create': { + event_id?: string; + type?: 'response.create'; + response?: SessionUpdate['session']; + } | null; + 'response.cancel': { + event_id?: string; + type: 'response.cancel'; + } | null; +} + +export interface ServerEvent { + 'conversation.created': { + event_id: string; + type: 'conversation.created', + conversation: { + id: string; + object: 'realtime.conversation'; + } + }; + 'conversation.item.created': { + event_id: string; + type: 'conversation.item.created'; + previous_item_id: string; + item: ServerItem; + }; + + 'conversation.item.completed': { + event_id: string; + type: 'conversation.item.completed'; + item: ServerItem; + }; + + 'conversation.item.input_audio_transcription.completed': { + event_id: string; + type: 'conversation.item.input_audio_transcription.completed', + item_id: string; + content_index: number; + transcript: string + } + 'conversation.item.input_audio_transcription.failed': { + event_id: string; + type: 'conversation.item.input_audio_transcription.failed', + item_id: string; + content_index: number; + error: Error + } + 'conversation.item.truncated': { + event_id: string; + type: 'conversation.item.truncated', + item_id: string; + content_index: number; + audio_end_ms: number; + } + 'conversation.item.deleted': { + event_id: string; + type: 'conversation.item.deleted', + item_id: string; + } + 'conversation.item.appended': { + event_id: string; + type: 'conversation.item.appended', + item: ServerItem; + + } + 'input_audio_buffer.committed': { + event_id: string; + type: 'input_audio_buffer.committed', + previous_item_id: string; + item_id: string + } + 'input_audio_buffer.cleared': { + event_id: string; + type: 'input_audio_buffer.cleared' + } + 'input_audio_buffer.speech_started': { + event_id: string; + type: 'input_audio_buffer.speech_started'; + item_id: string; + audio_start_ms: number; + } + 'input_audio_buffer.speech_stopped': { + event_id: string; + type: 'input_audio_buffer.speech_stopped'; + audio_end_ms: number; + item_id: string; + } + 'response.created' : { + event_id: string; + type: 'response.created'; + response: Response<'in_progress'>; + } + 'response.audio.delta': ResponseContent & { + type: 'response.audio.delta'; + delta: string; + } + 'response.audio_transcript.delta': ResponseContent & { + type: 'response.audio_transcript.delta'; + delta: string; + } + 'response.audio_transcript.done': ResponseContent & { + type: 'response.audio_transcript.done'; + transcript: string; + } + 'response.content_part.added': { + event_id?: string; + type: 'response.content_part.added'; + response_id: string; + item_id: string; + output_index: number; + content_index: number; + part: ContentPart; + } + 'response.done': { + event_id: string; + type: 'response.done'; + ressponse: Response<'completed' | 'cancelled' | 'failed' | 'incomplete'>; + } + 'response.output_item.added': { + event_id: string; + type: 'response.output_item.added'; + response_id: string; + output_index: number; + item: ServerItem; + } + 'response.output_item.done': { + event_id: string; + type: 'response.output_item.done'; + response_id: string; + output_index: number; + item: ServerItem; + } + 'response.function_call_arguments.delta': { + event_id: string; + type: 'response.function_call_arguments.delta'; + response_id: string; + item_id: string; + output_index: number; + call_id: string; + arguments: string; + delta: string; + } + 'response.function_call_arguments.done': { + event_id: string; + type: 'response.function_call_arguments.done'; + response_id: string; + item_id: string; + output_index: number; + call_id: string; + arguments: string; + } + 'response.text.delta': { + event_id; + type: 'response.text.delta'; + response_id: string; + item_id: string; + output_index: number; + content_index: number; + delta: string + } + + 'rate_limits.updated': { + event_id: string; + type: 'rate_limits.updated'; + rate_limits: { + name: string; + limit: number; + remaining: number; + reset_seconds: number; + } + } + 'session.updated': { + event_id: string; + type: 'session.updated'; + session: SessionUpdate['session']; + } + 'session.created': { + event_id: string; + type: 'session.created'; + session: SessionUpdate['session']; + } +} + +// Utility type to prefix keys with 'server.' +type PrefixedServerEvent = { + [K in keyof T as `server.${string & K}`]: T[K]; +}; + +// New type with prefixed keys +type ServerServerEvent = PrefixedServerEvent; + +type AllMap = ServerEvent & ServerServerEvent & { 'client.*': any, 'server.*': any }; + +type Exact = T extends U ? (U extends T ? T : never) : never; + +export type SendEvent = (event: K, data: Exact) => boolean; +export type Listener = (event: K, listener: (data: AllMap[K]) => void) => AllMap[K]; +export type ListenerBool = (event: K, listener: (data: AllMap[K]) => void) => boolean; +export type WaitForNext = (event: K, timeout?: number) => Promise; +export type EventNames = keyof AllMap; +export type EventProcessors = { + [K in keyof AllMap]?: (data: AllMap[K]) => void; +}; \ No newline at end of file diff --git a/tsconfig.json b/tsconfig.json index cc88801..28ba12d 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -2,6 +2,7 @@ "include": ["index.js"], "compilerOptions": { "allowJs": true, + "checkJs": true, "declaration": true, "emitDeclarationOnly": true, "outDir": "dist",