Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 147 additions & 0 deletions examples/browser-agent-live/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
<!--
WARNING: This example is currently non-functional. You may encounter issues
with browser support during the beta release of the Voice Agent API.
-->
<!DOCTYPE html>
<html>
<head>
<script src="../../dist/umd/deepgram.js"></script>
</head>
<body>
Running test... check the developer console.
<button type="button">Start</button>
</body>
<script>
const { createClient, AgentEvents } = deepgram;
const _deepgram = createClient("put yo key here");

const audioContext = new AudioContext();

console.log("Deepgram Instance: ", _deepgram);

(async () => {
const connection = _deepgram.agent();
connection.on(AgentEvents.Welcome, () => {
console.log("WS Connected");
});
connection.on(AgentEvents.Open, async () => {
console.log("Connection opened");

await connection.configure({
audio: {
input: {
encoding: "opus",
container: "ogg",
},
output: {
encoding: "linear16",
bitrate: 48000,
container: "none",
},
},
agent: {
listen: {
model: "nova-2",
},
speak: {
model: "aura-asteria-en",
},
think: {
provider: {
type: "anthropic",
},
model: "claude-3-haiku-20240307",
},
},
});
console.log("Deepgram Agent configured.");

setInterval(() => {
console.log("Keep alive!");
void connection.keepAlive();
}, 5000);
});

connection.on(AgentEvents.Close, () => {
console.log("Connection closed");
});

connection.on(AgentEvents.UserStartedSpeaking, () => {
console.log("Interrupting agent.");
});

connection.on(AgentEvents.AgentThinking, () => {
console.log("Agent thinking.");
});

connection.on(AgentEvents.AgentStartedSpeaking, () => {
console.log("Agent started speaking.");
});

connection.on(AgentEvents.ConversationText, (data) => {
console.log(JSON.stringify(data, null, 2));
});

connection.on(AgentEvents.Metadata, (data) => {
console.dir(data);
});

connection.on(AgentEvents.Audio, async (data) => {
console.log("Playing audio.");
const audioBuffer = await audioContext.decodeAudioData(data);
const source = audioContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(audioContext.destination);
source.start();
});

connection.on(AgentEvents.Error, (err) => {
console.error("Error!");
console.error(err);
console.error(err.message);
});

connection.on(AgentEvents.AgentAudioDone, async () => {
console.log("Agent audio done.");
});

connection.on(AgentEvents.Unhandled, (data) => {
console.dir(data);
});

const media = await navigator.mediaDevices.getUserMedia({
audio: {
sampleRate: 48000,
channelCount: 1,
echoCancellation: true,
autoGainControl: true,
noiseSuppression: false,
},
video: false,
});
const mic = new MediaRecorder(media, { mimeType: "audio/ogg" });
const btn = document.querySelector("button");
console.log(btn);
btn.addEventListener("click", (event) => {
if (mic.state === "recording") {
mic.stop();
event.target.innerText = "Start";
} else {
mic.start();
event.target.innerText = "Stop";
}
});

mic.onerror = (event) => {
console.error("Microphone Error:", event.error);
};

mic.ondataavailable = async (event) => {
console.log(mic.mimeType);
console.log("Data available.");
await connection.send(event.data);
};
})();
// ...
</script>
</html>
2 changes: 2 additions & 0 deletions examples/node-agent-live/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
chatlog.txt
output-*.wav
110 changes: 110 additions & 0 deletions examples/node-agent-live/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
const { writeFile, appendFile } = require("fs/promises");
const { createClient, AgentEvents } = require("../../dist/main/index");
const fetch = require("cross-fetch");
const { join } = require("path");

const deepgram = createClient(process.env.DEEPGRAM_API_KEY);

const agent = async () => {
let audioBuffer = Buffer.alloc(0);
let i = 0;
const url = "https://dpgr.am/spacewalk.wav";
const connection = deepgram.agent();
connection.on(AgentEvents.Welcome, () => {
console.log("Welcome to the Deepgram Voice Agent!");

connection.configure({
audio: {
input: {
encoding: "linear16",
sampleRate: 44100,
},
output: {
encoding: "linear16",
sampleRate: 16000,
container: "wav",
},
},
agent: {
listen: {
model: "nova-2",
},
speak: {
model: "aura-asteria-en",
},
think: {
provider: {
type: "open_ai",
},
model: "gpt-4o-mini",
},
},
});

console.log("Deepgram agent configured!");

setInterval(() => {
console.log("Keep alive!");
connection.keepAlive();
}, 5000);

fetch(url)
.then((r) => r.body)
.then((res) => {
res.on("readable", () => {
console.log("Sending audio chunk");
connection.send(res.read());
});
});
});

connection.on(AgentEvents.Open, () => {
console.log("Connection opened");
});

connection.on(AgentEvents.Close, () => {
console.log("Connection closed");
process.exit(0);
});

connection.on(AgentEvents.ConversationText, async (data) => {
await appendFile(join(__dirname, `chatlog.txt`), JSON.stringify(data) + "\n");
});

connection.on(AgentEvents.UserStartedSpeaking, () => {
if (audioBuffer.length) {
console.log("Interrupting agent.");
audioBuffer = Buffer.alloc(0);
}
});

connection.on(AgentEvents.Metadata, (data) => {
console.dir(data, { depth: null });
});

connection.on(AgentEvents.Audio, (data) => {
console.log("Audio chunk received");
// Concatenate the audio chunks into a single buffer
const buffer = Buffer.from(data);
audioBuffer = Buffer.concat([audioBuffer, buffer]);
});

connection.on(AgentEvents.Error, (err) => {
console.error("Error!");
console.error(JSON.stringify(err, null, 2));
console.error(err.message);
});

connection.on(AgentEvents.AgentAudioDone, async () => {
console.log("Agent audio done");
await writeFile(join(__dirname, `output-${i}.wav`), audioBuffer);
audioBuffer = Buffer.alloc(0);
i++;
});

connection.on(AgentEvents.Unhandled, (data) => {
console.dir(data, { depth: null });
});
};

void agent();
11 changes: 11 additions & 0 deletions src/DeepgramClient.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { DeepgramVersionError } from "./lib/errors";
import {
AbstractClient,
AgentLiveClient,
ListenClient,
ManageClient,
ReadClient,
Expand Down Expand Up @@ -80,6 +81,16 @@ export default class DeepgramClient extends AbstractClient {
return new SpeakClient(this.options);
}

/**
* Returns a new instance of the AgentLiveClient, which provides access to Deepgram's Voice Agent API.
*
* @returns {AgentLiveClient} A new instance of the AgentLiveClient.
* @beta
*/
public agent(endpoint: string = "/agent"): AgentLiveClient {
return new AgentLiveClient(this.options, endpoint);
}

/**
* @deprecated
* @see https://dpgr.am/js-v3
Expand Down
9 changes: 9 additions & 0 deletions src/lib/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ export const DEFAULT_HEADERS = {
};

export const DEFAULT_URL = "https://api.deepgram.com";
export const DEFAULT_AGENT_URL = "wss://agent.deepgram.com";

export const DEFAULT_GLOBAL_OPTIONS: Partial<DefaultNamespaceOptions> = {
fetch: { options: { url: DEFAULT_URL, headers: DEFAULT_HEADERS } },
Expand All @@ -44,8 +45,16 @@ export const DEFAULT_GLOBAL_OPTIONS: Partial<DefaultNamespaceOptions> = {
},
};

export const DEFAULT_AGENT_OPTIONS: Partial<DefaultNamespaceOptions> = {
fetch: { options: { url: DEFAULT_URL, headers: DEFAULT_HEADERS } },
websocket: {
options: { url: DEFAULT_AGENT_URL, _nodeOnlyHeaders: DEFAULT_HEADERS },
},
};

export const DEFAULT_OPTIONS: DefaultClientOptions = {
global: DEFAULT_GLOBAL_OPTIONS,
agent: DEFAULT_AGENT_OPTIONS,
};

export enum SOCKET_STATES {
Expand Down
78 changes: 78 additions & 0 deletions src/lib/enums/AgentEvents.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
export enum AgentEvents {
/**
* Built in socket events.
*/
Open = "Open",
Close = "Close",
Error = "Error",
/**
* Audio event?
*/
Audio = "Audio",
/**
* Confirms the successful connection to the websocket.
* { type: "Welcome", session_id: "String"}
*/
Welcome = "Welcome",
/**
* Confirms that your `configure` request was successful.
* { type: "SettingsApplied" }
*/
SettingsApplied = "SettingsApplied",
/**
* Triggered when the agent "hears" the user say something.
* { type: "ConversationText", role: string, content: string }
*/
ConversationText = "ConversationText",
/**
* Triggered when the agent begins receiving user audio.
* { type: "UserStartedSpeaking" }
*/
UserStartedSpeaking = "UserStartedSpeaking",
/**
* Triggered when the user has stopped speaking and the agent is processing the audio.
* { type: "AgentThinking", content: string }
*/
AgentThinking = "AgentThinking",
/**
* A request to call client-side functions.
* { type: "FunctionCallRequest", function_call_id: string, function_name: string, input: Record<string, any> }
*/
FunctionCallRequest = "FunctionCallRequest",
/**
* Debug message triggered when the agent is calling a function.
* { type: "FunctionCalling" }
*/
FunctionCalling = "FunctionCalling",
/**
* Triggered when the agent begins streaming an audio response.
* { type: "AgentStartedSpeaking", total_latency: number, tts_latency: number, ttt_latency: number }
*/
AgentStartedSpeaking = "AgentStartedSpeaking",
/**
* Triggered when the agent has finished streaming an audio response.
* { type: "AgentAudioDone" }
*/
AgentAudioDone = "AgentAudioDone",
/**
* This event is only emitted when you send an `InjectAgentMessage` request while
* the user is currently speaking or the server is processing user audio.
* { type: "InjectionRefused" }
*/
InjectionRefused = "InjectionRefused",
/**
* A successful response to the `UpdateInstructions` request.
* { type: "InstructionsUpdated" }
*/
InstructionsUpdated = "InstructionsUpdated",
/**
* A successful response to the `UpdateSpeak` request.
* { type: "SpeakUpdated" }
*/
SpeakUpdated = "SpeakUpdated",

/**
* Catch all for any other message event
*/
Unhandled = "Unhandled",
}
1 change: 1 addition & 0 deletions src/lib/enums/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
export * from "./AgentEvents";
export * from "./LiveConnectionState";
export * from "./LiveTranscriptionEvents";
export * from "./LiveTTSEvents";
Loading
Loading