deepgram · naomi-lgbt · Feb 3, 2025 · Oct 19, 2024 · Jan 29, 2025 · Jan 30, 2025
@@ -0,0 +1,2 @@
+chatlog.txt
+output-*.wav
@@ -0,0 +1,110 @@
+const { writeFile, appendFile } = require("fs/promises");
+const { createClient, AgentEvents } = require("../../dist/main/index");
+const fetch = require("cross-fetch");
+const { join } = require("path");
+
+const deepgram = createClient(process.env.DEEPGRAM_API_KEY);
+
+const agent = async () => {
+  let audioBuffer = Buffer.alloc(0);
+  let i = 0;
+  const url = "https://dpgr.am/spacewalk.wav";
+  const connection = deepgram.agent();
+  connection.on(AgentEvents.Welcome, () => {
+    console.log("Welcome to the Deepgram Voice Agent!");
+
+    connection.configure({
+      audio: {
+        input: {
+          encoding: "linear16",
+          sampleRate: 44100,
+        },
+        output: {
+          encoding: "linear16",
+          sampleRate: 16000,
+          container: "wav",
+        },
+      },
+      agent: {
+        listen: {
+          model: "nova-2",
+        },
+        speak: {
+          model: "aura-asteria-en",
+        },
+        think: {
+          provider: {
+            type: "open_ai",
+          },
+          model: "gpt-4o-mini",
+        },
+      },
+    });
+
+    console.log("Deepgram agent configured!");
+
+    setInterval(() => {
+      console.log("Keep alive!");
+      connection.keepAlive();
+    }, 5000);
+
+    fetch(url)
+      .then((r) => r.body)
+      .then((res) => {
+        res.on("readable", () => {
+          console.log("Sending audio chunk");
+          connection.send(res.read());
+        });
+      });
+  });
+
+  connection.on(AgentEvents.Open, () => {
+    console.log("Connection opened");
+  });
+
+  connection.on(AgentEvents.Close, () => {
+    console.log("Connection closed");
+    process.exit(0);
+  });
+
+  connection.on(AgentEvents.ConversationText, async (data) => {
+    await appendFile(join(__dirname, `chatlog.txt`), JSON.stringify(data) + "\n");
+  });
+
+  connection.on(AgentEvents.UserStartedSpeaking, () => {
+    if (audioBuffer.length) {
+      console.log("Interrupting agent.");
+      audioBuffer = Buffer.alloc(0);
+    }
+  });
+
+  connection.on(AgentEvents.Metadata, (data) => {
+    console.dir(data, { depth: null });
+  });
+
+  connection.on(AgentEvents.Audio, (data) => {
+    console.log("Audio chunk received");
+    // Concatenate the audio chunks into a single buffer
+    const buffer = Buffer.from(data);
+    audioBuffer = Buffer.concat([audioBuffer, buffer]);
+  });
+
+  connection.on(AgentEvents.Error, (err) => {
+    console.error("Error!");
+    console.error(JSON.stringify(err, null, 2));
+    console.error(err.message);
+  });
+
+  connection.on(AgentEvents.AgentAudioDone, async () => {
+    console.log("Agent audio done");
+    await writeFile(join(__dirname, `output-${i}.wav`), audioBuffer);
+    audioBuffer = Buffer.alloc(0);
+    i++;
+  });
+
+  connection.on(AgentEvents.Unhandled, (data) => {
+    console.dir(data, { depth: null });
+  });
+};
+
+void agent();
@@ -1,6 +1,7 @@
 import { DeepgramVersionError } from "./lib/errors";
 import {
   AbstractClient,
+  AgentLiveClient,
   ListenClient,
   ManageClient,
   ReadClient,
@@ -80,6 +81,16 @@ export default class DeepgramClient extends AbstractClient {
     return new SpeakClient(this.options);
   }
 
+  /**
+   * Returns a new instance of the AgentLiveClient, which provides access to Deepgram's Voice Agent API.
+   *
+   * @returns {AgentLiveClient} A new instance of the AgentLiveClient.
+   * @beta
+   */
+  public agent(endpoint: string = "/agent"): AgentLiveClient {
+    return new AgentLiveClient(this.options, endpoint);
+  }
+
   /**
    * @deprecated
    * @see https://dpgr.am/js-v3

@@ -36,6 +36,7 @@ export const DEFAULT_HEADERS = {
 };
 
 export const DEFAULT_URL = "https://api.deepgram.com";
+export const DEFAULT_AGENT_URL = "wss://agent.deepgram.com";
 
 export const DEFAULT_GLOBAL_OPTIONS: Partial<DefaultNamespaceOptions> = {
   fetch: { options: { url: DEFAULT_URL, headers: DEFAULT_HEADERS } },
@@ -44,8 +45,16 @@ export const DEFAULT_GLOBAL_OPTIONS: Partial<DefaultNamespaceOptions> = {
   },
 };
 
+export const DEFAULT_AGENT_OPTIONS: Partial<DefaultNamespaceOptions> = {
+  fetch: { options: { url: DEFAULT_URL, headers: DEFAULT_HEADERS } },
+  websocket: {
+    options: { url: DEFAULT_AGENT_URL, _nodeOnlyHeaders: DEFAULT_HEADERS },
+  },
+};
+
 export const DEFAULT_OPTIONS: DefaultClientOptions = {
   global: DEFAULT_GLOBAL_OPTIONS,
+  agent: DEFAULT_AGENT_OPTIONS,
 };
 
 export enum SOCKET_STATES {

@@ -0,0 +1,78 @@
+export enum AgentEvents {
+  /**
+   * Built in socket events.
+   */
+  Open = "Open",
+  Close = "Close",
+  Error = "Error",
+  /**
+   * Audio event?
+   */
+  Audio = "Audio",
+  /**
+   * Confirms the successful connection to the websocket.
+   * { type: "Welcome", session_id: "String"}
+   */
+  Welcome = "Welcome",
+  /**
+   * Confirms that your `configure` request was successful.
+   * { type: "SettingsApplied" }
+   */
+  SettingsApplied = "SettingsApplied",
+  /**
+   * Triggered when the agent "hears" the user say something.
+   * { type: "ConversationText", role: string, content: string }
+   */
+  ConversationText = "ConversationText",
+  /**
+   * Triggered when the agent begins receiving user audio.
+   * { type: "UserStartedSpeaking" }
+   */
+  UserStartedSpeaking = "UserStartedSpeaking",
+  /**
+   * Triggered when the user has stopped speaking and the agent is processing the audio.
+   * { type: "AgentThinking", content: string }
+   */
+  AgentThinking = "AgentThinking",
+  /**
+   * A request to call client-side functions.
+   * { type: "FunctionCallRequest", function_call_id: string, function_name: string, input: Record<string, any> }
+   */
+  FunctionCallRequest = "FunctionCallRequest",
+  /**
+   * Debug message triggered when the agent is calling a function.
+   * { type: "FunctionCalling" }
+   */
+  FunctionCalling = "FunctionCalling",
+  /**
+   * Triggered when the agent begins streaming an audio response.
+   * { type: "AgentStartedSpeaking", total_latency: number, tts_latency: number, ttt_latency: number }
+   */
+  AgentStartedSpeaking = "AgentStartedSpeaking",
+  /**
+   * Triggered when the agent has finished streaming an audio response.
+   * { type: "AgentAudioDone" }
+   */
+  AgentAudioDone = "AgentAudioDone",
+  /**
+   * This event is only emitted when you send an `InjectAgentMessage` request while
+   * the user is currently speaking or the server is processing user audio.
+   * { type: "InjectionRefused" }
+   */
+  InjectionRefused = "InjectionRefused",
+  /**
+   * A successful response to the `UpdateInstructions` request.
+   * { type: "InstructionsUpdated" }
+   */
+  InstructionsUpdated = "InstructionsUpdated",
+  /**
+   * A successful response to the `UpdateSpeak` request.
+   * { type: "SpeakUpdated" }
+   */
+  SpeakUpdated = "SpeakUpdated",
+
+  /**
+   * Catch all for any other message event
+   */
+  Unhandled = "Unhandled",
+}
@@ -1,3 +1,4 @@
+export * from "./AgentEvents";
 export * from "./LiveConnectionState";
 export * from "./LiveTranscriptionEvents";
 export * from "./LiveTTSEvents";