|
| 1 | +<script lang="ts"> |
| 2 | + import { WavRecorder, WavStreamPlayer } from '$lib/realtime/wavtools/index.js'; |
| 3 | + import { RealtimeClient } from '@openai/realtime-api-beta'; |
| 4 | + import type { ItemType } from '@openai/realtime-api-beta/dist/lib/client'; |
| 5 | +
|
| 6 | + import { onDestroy, onMount } from 'svelte'; |
| 7 | +
|
| 8 | + export let turnDetection: 'server_vad' | 'none' = 'server_vad'; |
| 9 | + export let apiKey: string = ''; |
| 10 | + export let useRelayServer: boolean = false; |
| 11 | + export let relayServer: string = ''; |
| 12 | +
|
| 13 | + export let instructions: string = 'You are a great, upbeat friend. Speak fast.'; |
| 14 | +
|
| 15 | + export let voice: 'alloy' | 'echo' | 'shimmer' = 'shimmer'; |
| 16 | +
|
| 17 | + /** |
| 18 | + * Type for all event logs |
| 19 | + */ |
| 20 | + interface RealtimeEvent { |
| 21 | + time: string; |
| 22 | + source: 'client' | 'server'; |
| 23 | + count?: number; |
| 24 | + event: { [key: string]: any }; |
| 25 | + } |
| 26 | +
|
| 27 | + export let client: RealtimeClient | undefined = undefined; |
| 28 | + export let wavRecorder: WavRecorder = new WavRecorder({ sampleRate: 24000 }); |
| 29 | + export let wavStreamPlayer: WavStreamPlayer = new WavStreamPlayer({ sampleRate: 24000 }); |
| 30 | +
|
| 31 | + let startTime = ''; |
| 32 | +
|
| 33 | + export let isConnected = false; |
| 34 | + export let isRecording = false; |
| 35 | +
|
| 36 | + export let canPushToTalk = true; |
| 37 | +
|
| 38 | + export let items: ItemType[] = []; |
| 39 | + export let realtimeEvents: RealtimeEvent[] = []; |
| 40 | +
|
| 41 | + async function setupClient() { |
| 42 | + console.log('setting up client'); |
| 43 | +
|
| 44 | + client = new RealtimeClient( |
| 45 | + useRelayServer |
| 46 | + ? { url: relayServer } |
| 47 | + : { |
| 48 | + apiKey, |
| 49 | + dangerouslyAllowAPIKeyInBrowser: true |
| 50 | + } |
| 51 | + ); |
| 52 | +
|
| 53 | + // Can set parameters ahead of connecting, either separately or all at once |
| 54 | + client.updateSession({ instructions }); |
| 55 | + client.updateSession({ voice }); |
| 56 | + client.updateSession({ |
| 57 | + input_audio_transcription: { model: 'whisper-1' } |
| 58 | + }); |
| 59 | +
|
| 60 | + client.on('realtime.event', (realtimeEvent: RealtimeEvent) => { |
| 61 | + const lastEvent = realtimeEvents[realtimeEvents.length - 1]; |
| 62 | + if (lastEvent?.event.type === realtimeEvent.event.type) { |
| 63 | + // if we receive multiple events in a row, aggregate them for display purposes |
| 64 | + lastEvent.count = (lastEvent.count || 0) + 1; |
| 65 | + realtimeEvents = realtimeEvents.slice(0, -1).concat(lastEvent); |
| 66 | + } else { |
| 67 | + realtimeEvents = realtimeEvents.concat(realtimeEvent); |
| 68 | + } |
| 69 | + }); |
| 70 | + client.on('error', (event: any) => console.error(event)); |
| 71 | + client.on('conversation.interrupted', async () => { |
| 72 | + console.log('conversation interrupted'); |
| 73 | + const trackSampleOffset = await wavStreamPlayer.interrupt(); |
| 74 | + if (trackSampleOffset?.trackId) { |
| 75 | + const { trackId, offset } = trackSampleOffset; |
| 76 | + await client?.cancelResponse(trackId, offset); |
| 77 | + } |
| 78 | + }); |
| 79 | + client.on('conversation.updated', async ({ item, delta }: any) => { |
| 80 | + const newItems = client?.conversation.getItems(); |
| 81 | + if (delta?.audio) { |
| 82 | + wavStreamPlayer.add16BitPCM(delta.audio, item.id); |
| 83 | + } |
| 84 | + if (item.status === 'completed' && item.formatted.audio?.length) { |
| 85 | + const wavFile = await WavRecorder.decode(item.formatted.audio, 24000, 24000); |
| 86 | + item.formatted.file = wavFile; |
| 87 | + } |
| 88 | +
|
| 89 | + if (newItems) items = newItems; |
| 90 | + }); |
| 91 | + } |
| 92 | +
|
| 93 | + export async function startConversation(sendHello: boolean = true) { |
| 94 | + console.log('starting conversation'); |
| 95 | +
|
| 96 | + // Set state variables |
| 97 | + startTime = new Date().toISOString(); |
| 98 | +
|
| 99 | + isConnected = true; |
| 100 | +
|
| 101 | + realtimeEvents = []; |
| 102 | + items = []; |
| 103 | +
|
| 104 | + // Connect to microphone |
| 105 | + await wavRecorder.begin(); |
| 106 | +
|
| 107 | + // Connect to audio output |
| 108 | + await wavStreamPlayer.connect(); |
| 109 | +
|
| 110 | + // Connect to realtime API |
| 111 | + await client?.connect(); |
| 112 | +
|
| 113 | + if (sendHello) { |
| 114 | + console.log('sending user message'); |
| 115 | + client?.sendUserMessageContent([ |
| 116 | + { |
| 117 | + type: `input_text`, |
| 118 | + text: `Hello!` |
| 119 | + // text: `For testing purposes, I want you to list ten car brands. Number each item, e.g. "one (or whatever number you are on): the item name".` |
| 120 | + } |
| 121 | + ]); |
| 122 | + } |
| 123 | +
|
| 124 | + if (client?.getTurnDetectionType() === 'server_vad') { |
| 125 | + await wavRecorder.record((data) => client?.appendInputAudio(data.mono)); |
| 126 | + } |
| 127 | + } |
| 128 | +
|
| 129 | + export async function endConversation() { |
| 130 | + isConnected = false; |
| 131 | + realtimeEvents = []; |
| 132 | + items = []; |
| 133 | +
|
| 134 | + client?.disconnect(); |
| 135 | +
|
| 136 | + await wavRecorder.end(); |
| 137 | +
|
| 138 | + await wavStreamPlayer.interrupt(); |
| 139 | + } |
| 140 | +
|
| 141 | + /** |
| 142 | + * In push-to-talk mode, start recording |
| 143 | + * .appendInputAudio() for each sample |
| 144 | + */ |
| 145 | + export async function startRecording() { |
| 146 | + if (!canPushToTalk) { |
| 147 | + console.error('cannot start recording, if not in push-to-talk mode'); |
| 148 | + return; |
| 149 | + } |
| 150 | +
|
| 151 | + isRecording = true; |
| 152 | +
|
| 153 | + const trackSampleOffset = await wavStreamPlayer.interrupt(); |
| 154 | + if (trackSampleOffset?.trackId) { |
| 155 | + const { trackId, offset } = trackSampleOffset; |
| 156 | + await client?.cancelResponse(trackId, offset); |
| 157 | + } |
| 158 | + await wavRecorder.record((data) => client?.appendInputAudio(data.mono)); |
| 159 | + } |
| 160 | +
|
| 161 | + /** |
| 162 | + * In push-to-talk mode, stop recording |
| 163 | + */ |
| 164 | + export const stopRecording = async () => { |
| 165 | + if (!canPushToTalk) { |
| 166 | + console.error('cannot stop recording, if not in push-to-talk mode'); |
| 167 | + return; |
| 168 | + } |
| 169 | +
|
| 170 | + isRecording = false; |
| 171 | +
|
| 172 | + await wavRecorder.pause(); |
| 173 | + client?.createResponse(); |
| 174 | + }; |
| 175 | +
|
| 176 | + $: if (turnDetection && client) { |
| 177 | + changeTurnEndType(turnDetection); |
| 178 | + } |
| 179 | +
|
| 180 | + const changeTurnEndType = async (value: 'none' | 'server_vad') => { |
| 181 | + console.log('setting turn detection to', value); |
| 182 | + if (value === 'none' && wavRecorder.getStatus() === 'recording') { |
| 183 | + await wavRecorder.pause(); |
| 184 | + } |
| 185 | + client?.updateSession({ |
| 186 | + turn_detection: value === 'none' ? null : { type: 'server_vad' } |
| 187 | + }); |
| 188 | + if (value === 'server_vad' && client?.isConnected()) { |
| 189 | + await wavRecorder.record((data) => client?.appendInputAudio(data.mono)); |
| 190 | + } |
| 191 | +
|
| 192 | + canPushToTalk = value === 'none'; |
| 193 | + }; |
| 194 | +
|
| 195 | + onMount(() => { |
| 196 | + if (useRelayServer && !relayServer) { |
| 197 | + relayServer = `ws://${window.location.host}/api/realtime`; |
| 198 | + } |
| 199 | + setupClient(); |
| 200 | + }); |
| 201 | +
|
| 202 | + onDestroy(() => { |
| 203 | + endConversation(); |
| 204 | + }); |
| 205 | +</script> |
0 commit comments