Skip to content

feat: release voice agent #357

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions examples/node-agent-live/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
chatlog.txt
output-*.wav
110 changes: 110 additions & 0 deletions examples/node-agent-live/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
const { writeFile, appendFile } = require("fs/promises");
const { createClient, AgentEvents } = require("../../dist/main/index");
const fetch = require("cross-fetch");
const { join } = require("path");

const deepgram = createClient(process.env.DEEPGRAM_API_KEY);

const agent = async () => {
let audioBuffer = Buffer.alloc(0);
let i = 0;
const url = "https://dpgr.am/spacewalk.wav";
const connection = deepgram.agent();
connection.on(AgentEvents.Welcome, () => {
console.log("Welcome to the Deepgram Voice Agent!");

connection.configure({
audio: {
input: {
encoding: "linear16",
sampleRate: 44100,
},
output: {
encoding: "linear16",
sampleRate: 16000,
container: "wav",
},
},
agent: {
listen: {
model: "nova-2",
},
speak: {
model: "aura-asteria-en",
},
think: {
provider: {
type: "open_ai",
},
model: "gpt-4o-mini",
},
},
});

console.log("Deepgram agent configured!");

setInterval(() => {
console.log("Keep alive!");
connection.keepAlive();
}, 5000);

fetch(url)
.then((r) => r.body)
.then((res) => {
res.on("readable", () => {
console.log("Sending audio chunk");
connection.send(res.read());
});
});
});

connection.on(AgentEvents.Open, () => {
console.log("Connection opened");
});

connection.on(AgentEvents.Close, () => {
console.log("Connection closed");
process.exit(0);
});

connection.on(AgentEvents.ConversationText, async (data) => {
await appendFile(join(__dirname, `chatlog.txt`), JSON.stringify(data) + "\n");
});

connection.on(AgentEvents.UserStartedSpeaking, () => {
if (audioBuffer.length) {
console.log("Interrupting agent.");
audioBuffer = Buffer.alloc(0);
}
});

connection.on(AgentEvents.Metadata, (data) => {
console.dir(data, { depth: null });
});

connection.on(AgentEvents.Audio, (data) => {
console.log("Audio chunk received");
// Concatenate the audio chunks into a single buffer
const buffer = Buffer.from(data);
audioBuffer = Buffer.concat([audioBuffer, buffer]);
});

connection.on(AgentEvents.Error, (err) => {
console.error("Error!");
console.error(JSON.stringify(err, null, 2));
console.error(err.message);
});

connection.on(AgentEvents.AgentAudioDone, async () => {
console.log("Agent audio done");
await writeFile(join(__dirname, `output-${i}.wav`), audioBuffer);
audioBuffer = Buffer.alloc(0);
i++;
});

connection.on(AgentEvents.Unhandled, (data) => {
console.dir(data, { depth: null });
});
};

void agent();
11 changes: 11 additions & 0 deletions src/DeepgramClient.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { DeepgramVersionError } from "./lib/errors";
import {
AbstractClient,
AgentLiveClient,
ListenClient,
ManageClient,
ReadClient,
Expand Down Expand Up @@ -80,6 +81,16 @@ export default class DeepgramClient extends AbstractClient {
return new SpeakClient(this.options);
}

/**
* Returns a new instance of the AgentLiveClient, which provides access to Deepgram's Voice Agent API.
*
* @returns {AgentLiveClient} A new instance of the AgentLiveClient.
* @beta
*/
public agent(endpoint: string = "/agent"): AgentLiveClient {
return new AgentLiveClient(this.options, endpoint);
}

/**
* @deprecated
* @see https://dpgr.am/js-v3
Expand Down
9 changes: 9 additions & 0 deletions src/lib/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ export const DEFAULT_HEADERS = {
};

export const DEFAULT_URL = "https://api.deepgram.com";
export const DEFAULT_AGENT_URL = "wss://agent.deepgram.com";

export const DEFAULT_GLOBAL_OPTIONS: Partial<DefaultNamespaceOptions> = {
fetch: { options: { url: DEFAULT_URL, headers: DEFAULT_HEADERS } },
Expand All @@ -44,8 +45,16 @@ export const DEFAULT_GLOBAL_OPTIONS: Partial<DefaultNamespaceOptions> = {
},
};

export const DEFAULT_AGENT_OPTIONS: Partial<DefaultNamespaceOptions> = {
fetch: { options: { url: DEFAULT_URL, headers: DEFAULT_HEADERS } },
websocket: {
options: { url: DEFAULT_AGENT_URL, _nodeOnlyHeaders: DEFAULT_HEADERS },
},
};

export const DEFAULT_OPTIONS: DefaultClientOptions = {
global: DEFAULT_GLOBAL_OPTIONS,
agent: DEFAULT_AGENT_OPTIONS,
};

export enum SOCKET_STATES {
Expand Down
78 changes: 78 additions & 0 deletions src/lib/enums/AgentEvents.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
export enum AgentEvents {
/**
* Built in socket events.
*/
Open = "Open",
Close = "Close",
Error = "Error",
/**
* Audio event?
*/
Audio = "Audio",
/**
* Confirms the successful connection to the websocket.
* { type: "Welcome", session_id: "String"}
*/
Welcome = "Welcome",
/**
* Confirms that your `configure` request was successful.
* { type: "SettingsApplied" }
*/
SettingsApplied = "SettingsApplied",
/**
* Triggered when the agent "hears" the user say something.
* { type: "ConversationText", role: string, content: string }
*/
ConversationText = "ConversationText",
/**
* Triggered when the agent begins receiving user audio.
* { type: "UserStartedSpeaking" }
*/
UserStartedSpeaking = "UserStartedSpeaking",
/**
* Triggered when the user has stopped speaking and the agent is processing the audio.
* { type: "AgentThinking", content: string }
*/
AgentThinking = "AgentThinking",
/**
* A request to call client-side functions.
* { type: "FunctionCallRequest", function_call_id: string, function_name: string, input: Record<string, any> }
*/
FunctionCallRequest = "FunctionCallRequest",
/**
* Debug message triggered when the agent is calling a function.
* { type: "FunctionCalling" }
*/
FunctionCalling = "FunctionCalling",
/**
* Triggered when the agent begins streaming an audio response.
* { type: "AgentStartedSpeaking", total_latency: number, tts_latency: number, ttt_latency: number }
*/
AgentStartedSpeaking = "AgentStartedSpeaking",
/**
* Triggered when the agent has finished streaming an audio response.
* { type: "AgentAudioDone" }
*/
AgentAudioDone = "AgentAudioDone",
/**
* This event is only emitted when you send an `InjectAgentMessage` request while
* the user is currently speaking or the server is processing user audio.
* { type: "InjectionRefused" }
*/
InjectionRefused = "InjectionRefused",
/**
* A successful response to the `UpdateInstructions` request.
* { type: "InstructionsUpdated" }
*/
InstructionsUpdated = "InstructionsUpdated",
/**
* A successful response to the `UpdateSpeak` request.
* { type: "SpeakUpdated" }
*/
SpeakUpdated = "SpeakUpdated",

/**
* Catch all for any other message event
*/
Unhandled = "Unhandled",
}
1 change: 1 addition & 0 deletions src/lib/enums/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
export * from "./AgentEvents";
export * from "./LiveConnectionState";
export * from "./LiveTranscriptionEvents";
export * from "./LiveTTSEvents";
Loading