Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: release voice agent #357

Merged
merged 3 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 147 additions & 0 deletions examples/browser-agent-live/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
<!--
WARNING: This example is currently non-functional. You may encounter issues
naomi-lgbt marked this conversation as resolved.
Show resolved Hide resolved
with browser support during the beta release of the Voice Agent API.
-->
<!DOCTYPE html>
<html>
<head>
<script src="../../dist/umd/deepgram.js"></script>
</head>
<body>
Running test... check the developer console.
<button type="button">Start</button>
</body>
<script>
const { createClient, AgentEvents } = deepgram;
const _deepgram = createClient("put yo key here");
naomi-lgbt marked this conversation as resolved.
Show resolved Hide resolved

const audioContext = new AudioContext();

console.log("Deepgram Instance: ", _deepgram);
naomi-lgbt marked this conversation as resolved.
Show resolved Hide resolved

(async () => {
const connection = _deepgram.agent();
connection.on(AgentEvents.Welcome, () => {
console.log("WS Connected");
});
connection.on(AgentEvents.Open, async () => {
console.log("Connection opened");

await connection.configure({
audio: {
input: {
encoding: "opus",
container: "ogg",
},
output: {
encoding: "linear16",
bitrate: 48000,
container: "none",
},
},
agent: {
listen: {
model: "nova-2",
},
speak: {
model: "aura-asteria-en",
},
think: {
provider: {
type: "anthropic",
},
model: "claude-3-haiku-20240307",
},
},
});
jpvajda marked this conversation as resolved.
Show resolved Hide resolved
console.log("Deepgram Agent configured.");

setInterval(() => {
console.log("Keep alive!");
void connection.keepAlive();
}, 5000);
});
naomi-lgbt marked this conversation as resolved.
Show resolved Hide resolved

connection.on(AgentEvents.Close, () => {
console.log("Connection closed");
});

connection.on(AgentEvents.UserStartedSpeaking, () => {
console.log("Interrupting agent.");
});

connection.on(AgentEvents.AgentThinking, () => {
console.log("Agent thinking.");
});

connection.on(AgentEvents.AgentStartedSpeaking, () => {
console.log("Agent started speaking.");
});

connection.on(AgentEvents.ConversationText, (data) => {
console.log(JSON.stringify(data, null, 2));
});

connection.on(AgentEvents.Metadata, (data) => {
console.dir(data);
});

connection.on(AgentEvents.Audio, async (data) => {
console.log("Playing audio.");
const audioBuffer = await audioContext.decodeAudioData(data);
const source = audioContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(audioContext.destination);
source.start();
});

connection.on(AgentEvents.Error, (err) => {
console.error("Error!");
console.error(err);
console.error(err.message);
});

connection.on(AgentEvents.AgentAudioDone, async () => {
console.log("Agent audio done.");
});

connection.on(AgentEvents.Unhandled, (data) => {
console.dir(data);
});

const media = await navigator.mediaDevices.getUserMedia({
audio: {
sampleRate: 48000,
channelCount: 1,
echoCancellation: true,
autoGainControl: true,
noiseSuppression: false,
},
video: false,
});
const mic = new MediaRecorder(media, { mimeType: "audio/ogg" });
const btn = document.querySelector("button");
console.log(btn);
btn.addEventListener("click", (event) => {
if (mic.state === "recording") {
mic.stop();
event.target.innerText = "Start";
} else {
mic.start();
event.target.innerText = "Stop";
}
});

mic.onerror = (event) => {
console.error("Microphone Error:", event.error);
};

mic.ondataavailable = async (event) => {
console.log(mic.mimeType);
console.log("Data available.");
await connection.send(event.data);
};
naomi-lgbt marked this conversation as resolved.
Show resolved Hide resolved
})();
// ...
</script>
</html>
2 changes: 2 additions & 0 deletions examples/node-agent-live/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
chatlog.txt
output-*.wav
110 changes: 110 additions & 0 deletions examples/node-agent-live/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
const { writeFile, appendFile } = require("fs/promises");
const { createClient, AgentEvents } = require("../../dist/main/index");
const fetch = require("cross-fetch");
const { join } = require("path");
naomi-lgbt marked this conversation as resolved.
Show resolved Hide resolved

const deepgram = createClient(process.env.DEEPGRAM_API_KEY);
naomi-lgbt marked this conversation as resolved.
Show resolved Hide resolved

const agent = async () => {
let audioBuffer = Buffer.alloc(0);
let i = 0;
const url = "https://dpgr.am/spacewalk.wav";
const connection = deepgram.agent();
connection.on(AgentEvents.Welcome, () => {
console.log("Welcome to the Deepgram Voice Agent!");

connection.configure({
audio: {
input: {
encoding: "linear16",
sampleRate: 44100,
},
output: {
encoding: "linear16",
sampleRate: 16000,
container: "wav",
},
},
agent: {
listen: {
model: "nova-2",
},
speak: {
model: "aura-asteria-en",
},
think: {
provider: {
type: "open_ai",
},
model: "gpt-4o-mini",
jpvajda marked this conversation as resolved.
Show resolved Hide resolved
},
},
});

console.log("Deepgram agent configured!");

setInterval(() => {
console.log("Keep alive!");
connection.keepAlive();
}, 5000);
naomi-lgbt marked this conversation as resolved.
Show resolved Hide resolved

fetch(url)
.then((r) => r.body)
.then((res) => {
res.on("readable", () => {
console.log("Sending audio chunk");
connection.send(res.read());
});
});
naomi-lgbt marked this conversation as resolved.
Show resolved Hide resolved
});

connection.on(AgentEvents.Open, () => {
console.log("Connection opened");
});

connection.on(AgentEvents.Close, () => {
console.log("Connection closed");
process.exit(0);
});

connection.on(AgentEvents.ConversationText, async (data) => {
await appendFile(join(__dirname, `chatlog.txt`), JSON.stringify(data) + "\n");
});
naomi-lgbt marked this conversation as resolved.
Show resolved Hide resolved

connection.on(AgentEvents.UserStartedSpeaking, () => {
if (audioBuffer.length) {
console.log("Interrupting agent.");
audioBuffer = Buffer.alloc(0);
}
});

connection.on(AgentEvents.Metadata, (data) => {
console.dir(data, { depth: null });
});

connection.on(AgentEvents.Audio, (data) => {
console.log("Audio chunk received");
// Concatenate the audio chunks into a single buffer
const buffer = Buffer.from(data);
audioBuffer = Buffer.concat([audioBuffer, buffer]);
});

connection.on(AgentEvents.Error, (err) => {
console.error("Error!");
console.error(JSON.stringify(err, null, 2));
console.error(err.message);
});

connection.on(AgentEvents.AgentAudioDone, async () => {
console.log("Agent audio done");
await writeFile(join(__dirname, `output-${i}.wav`), audioBuffer);
audioBuffer = Buffer.alloc(0);
i++;
});
naomi-lgbt marked this conversation as resolved.
Show resolved Hide resolved

connection.on(AgentEvents.Unhandled, (data) => {
console.dir(data, { depth: null });
});
};

void agent();
11 changes: 11 additions & 0 deletions src/DeepgramClient.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { DeepgramVersionError } from "./lib/errors";
import {
AbstractClient,
AgentLiveClient,
ListenClient,
ManageClient,
ReadClient,
Expand Down Expand Up @@ -80,6 +81,16 @@ export default class DeepgramClient extends AbstractClient {
return new SpeakClient(this.options);
}

/**
* Returns a new instance of the AgentLiveClient, which provides access to Deepgram's Voice Agent API.
*
* @returns {AgentLiveClient} A new instance of the AgentLiveClient.
* @beta
*/
public agent(endpoint: string = "/agent"): AgentLiveClient {
return new AgentLiveClient(this.options, endpoint);
}

/**
* @deprecated
* @see https://dpgr.am/js-v3
Expand Down
9 changes: 9 additions & 0 deletions src/lib/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ export const DEFAULT_HEADERS = {
};

export const DEFAULT_URL = "https://api.deepgram.com";
export const DEFAULT_AGENT_URL = "wss://agent.deepgram.com";

export const DEFAULT_GLOBAL_OPTIONS: Partial<DefaultNamespaceOptions> = {
fetch: { options: { url: DEFAULT_URL, headers: DEFAULT_HEADERS } },
Expand All @@ -44,8 +45,16 @@ export const DEFAULT_GLOBAL_OPTIONS: Partial<DefaultNamespaceOptions> = {
},
};

export const DEFAULT_AGENT_OPTIONS: Partial<DefaultNamespaceOptions> = {
fetch: { options: { url: DEFAULT_URL, headers: DEFAULT_HEADERS } },
websocket: {
options: { url: DEFAULT_AGENT_URL, _nodeOnlyHeaders: DEFAULT_HEADERS },
},
};

export const DEFAULT_OPTIONS: DefaultClientOptions = {
global: DEFAULT_GLOBAL_OPTIONS,
agent: DEFAULT_AGENT_OPTIONS,
};

export enum SOCKET_STATES {
Expand Down
78 changes: 78 additions & 0 deletions src/lib/enums/AgentEvents.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
export enum AgentEvents {
/**
* Built in socket events.
*/
Open = "Open",
Close = "Close",
Error = "Error",
/**
* Audio event?
*/
Audio = "Audio",
/**
* Confirms the successful connection to the websocket.
* { type: "Welcome", session_id: "String"}
*/
Welcome = "Welcome",
/**
* Confirms that your `configure` request was successful.
* { type: "SettingsApplied" }
*/
SettingsApplied = "SettingsApplied",
/**
* Triggered when the agent "hears" the user say something.
* { type: "ConversationText", role: string, content: string }
*/
ConversationText = "ConversationText",
/**
* Triggered when the agent begins receiving user audio.
* { type: "UserStartedSpeaking" }
*/
UserStartedSpeaking = "UserStartedSpeaking",
/**
* Triggered when the user has stopped speaking and the agent is processing the audio.
* { type: "AgentThinking", content: string }
*/
AgentThinking = "AgentThinking",
/**
* A request to call client-side functions.
* { type: "FunctionCallRequest", function_call_id: string, function_name: string, input: Record<string, any> }
*/
FunctionCallRequest = "FunctionCallRequest",
/**
* Debug message triggered when the agent is calling a function.
* { type: "FunctionCalling" }
*/
FunctionCalling = "FunctionCalling",
/**
* Triggered when the agent begins streaming an audio response.
* { type: "AgentStartedSpeaking", total_latency: number, tts_latency: number, ttt_latency: number }
*/
AgentStartedSpeaking = "AgentStartedSpeaking",
/**
* Triggered when the agent has finished streaming an audio response.
* { type: "AgentAudioDone" }
*/
AgentAudioDone = "AgentAudioDone",
/**
* This event is only emitted when you send an `InjectAgentMessage` request while
* the user is currently speaking or the server is processing user audio.
* { type: "InjectionRefused" }
*/
InjectionRefused = "InjectionRefused",
/**
* A successful response to the `UpdateInstructions` request.
* { type: "InstructionsUpdated" }
*/
InstructionsUpdated = "InstructionsUpdated",
/**
* A successful response to the `UpdateSpeak` request.
* { type: "SpeakUpdated" }
*/
SpeakUpdated = "SpeakUpdated",

/**
* Catch all for any other message event
*/
Unhandled = "Unhandled",
}
1 change: 1 addition & 0 deletions src/lib/enums/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
export * from "./AgentEvents";
export * from "./LiveConnectionState";
export * from "./LiveTranscriptionEvents";
export * from "./LiveTTSEvents";
Loading
Loading