deepgram · naomi-lgbt · Feb 3, 2025 · Oct 19, 2024 · Jan 29, 2025 · Jan 30, 2025
diff --git a/examples/browser-agent-live/index.html b/examples/browser-agent-live/index.html
@@ -0,0 +1,147 @@
+<!--
+WARNING: This example is currently non-functional. You may encounter issues
+with browser support during the beta release of the Voice Agent API.
+-->
+<!DOCTYPE html>
+<html>
+  <head>
+    <script src="../../dist/umd/deepgram.js"></script>
+  </head>
+  <body>
+    Running test... check the developer console.
+    <button type="button">Start</button>
+  </body>
+  <script>
+    const { createClient, AgentEvents } = deepgram;
+    const _deepgram = createClient("put yo key here");
+
+    const audioContext = new AudioContext();
+
+    console.log("Deepgram Instance: ", _deepgram);
+
+    (async () => {
+      const connection = _deepgram.agent();
+      connection.on(AgentEvents.Welcome, () => {
+        console.log("WS Connected");
+      });
+      connection.on(AgentEvents.Open, async () => {
+        console.log("Connection opened");
+
+        await connection.configure({
+          audio: {
+            input: {
+              encoding: "opus",
+              container: "ogg",
+            },
+            output: {
+              encoding: "linear16",
+              bitrate: 48000,
+              container: "none",
+            },
+          },
+          agent: {
+            listen: {
+              model: "nova-2",
+            },
+            speak: {
+              model: "aura-asteria-en",
+            },
+            think: {
+              provider: {
+                type: "anthropic",
+              },
+              model: "claude-3-haiku-20240307",
+            },
+          },
+        });
+        console.log("Deepgram Agent configured.");
+
+        setInterval(() => {
+          console.log("Keep alive!");
+          void connection.keepAlive();
+        }, 5000);
+      });
+
+      connection.on(AgentEvents.Close, () => {
+        console.log("Connection closed");
+      });
+
+      connection.on(AgentEvents.UserStartedSpeaking, () => {
+        console.log("Interrupting agent.");
+      });
+
+      connection.on(AgentEvents.AgentThinking, () => {
+        console.log("Agent thinking.");
+      });
+
+      connection.on(AgentEvents.AgentStartedSpeaking, () => {
+        console.log("Agent started speaking.");
+      });
+
+      connection.on(AgentEvents.ConversationText, (data) => {
+        console.log(JSON.stringify(data, null, 2));
+      });
+
+      connection.on(AgentEvents.Metadata, (data) => {
+        console.dir(data);
+      });
+
+      connection.on(AgentEvents.Audio, async (data) => {
+        console.log("Playing audio.");
+        const audioBuffer = await audioContext.decodeAudioData(data);
+        const source = audioContext.createBufferSource();
+        source.buffer = audioBuffer;
+        source.connect(audioContext.destination);
+        source.start();
+      });
+
+      connection.on(AgentEvents.Error, (err) => {
+        console.error("Error!");
+        console.error(err);
+        console.error(err.message);
+      });
+
+      connection.on(AgentEvents.AgentAudioDone, async () => {
+        console.log("Agent audio done.");
+      });
+
+      connection.on(AgentEvents.Unhandled, (data) => {
+        console.dir(data);
+      });
+
+      const media = await navigator.mediaDevices.getUserMedia({
+        audio: {
+          sampleRate: 48000,
+          channelCount: 1,
+          echoCancellation: true,
+          autoGainControl: true,
+          noiseSuppression: false,
+        },
+        video: false,
+      });
+      const mic = new MediaRecorder(media, { mimeType: "audio/ogg" });
+      const btn = document.querySelector("button");
+      console.log(btn);
+      btn.addEventListener("click", (event) => {
+        if (mic.state === "recording") {
+          mic.stop();
+          event.target.innerText = "Start";
+        } else {
+          mic.start();
+          event.target.innerText = "Stop";
+        }
+      });
+
+      mic.onerror = (event) => {
+        console.error("Microphone Error:", event.error);
+      };
+
+      mic.ondataavailable = async (event) => {
+        console.log(mic.mimeType);
+        console.log("Data available.");
+        await connection.send(event.data);
+      };
+    })();
+    // ...
+  </script>
+</html>
diff --git a/examples/node-agent-live/.gitignore b/examples/node-agent-live/.gitignore
@@ -0,0 +1,2 @@
+chatlog.txt
+output-*.wav
diff --git a/examples/node-agent-live/index.js b/examples/node-agent-live/index.js
@@ -0,0 +1,110 @@
+const { writeFile, appendFile } = require("fs/promises");
+const { createClient, AgentEvents } = require("../../dist/main/index");
+const fetch = require("cross-fetch");
+const { join } = require("path");
+
+const deepgram = createClient(process.env.DEEPGRAM_API_KEY);
+
+const agent = async () => {
+  let audioBuffer = Buffer.alloc(0);
+  let i = 0;
+  const url = "https://dpgr.am/spacewalk.wav";
+  const connection = deepgram.agent();
+  connection.on(AgentEvents.Welcome, () => {
+    console.log("Welcome to the Deepgram Voice Agent!");
+
+    connection.configure({
+      audio: {
+        input: {
+          encoding: "linear16",
+          sampleRate: 44100,
+        },
+        output: {
+          encoding: "linear16",
+          sampleRate: 16000,
+          container: "wav",
+        },
+      },
+      agent: {
+        listen: {
+          model: "nova-2",
+        },
+        speak: {
+          model: "aura-asteria-en",
+        },
+        think: {
+          provider: {
+            type: "open_ai",
+          },
+          model: "gpt-4o-mini",
+        },
+      },
+    });
+
+    console.log("Deepgram agent configured!");
+
+    setInterval(() => {
+      console.log("Keep alive!");
+      connection.keepAlive();
+    }, 5000);
+
+    fetch(url)
+      .then((r) => r.body)
+      .then((res) => {
+        res.on("readable", () => {
+          console.log("Sending audio chunk");
+          connection.send(res.read());
+        });
+      });
+  });
+
+  connection.on(AgentEvents.Open, () => {
+    console.log("Connection opened");
+  });
+
+  connection.on(AgentEvents.Close, () => {
+    console.log("Connection closed");
+    process.exit(0);
+  });
+
+  connection.on(AgentEvents.ConversationText, async (data) => {
+    await appendFile(join(__dirname, `chatlog.txt`), JSON.stringify(data) + "\n");
+  });
+
+  connection.on(AgentEvents.UserStartedSpeaking, () => {
+    if (audioBuffer.length) {
+      console.log("Interrupting agent.");
+      audioBuffer = Buffer.alloc(0);
+    }
+  });
+
+  connection.on(AgentEvents.Metadata, (data) => {
+    console.dir(data, { depth: null });
+  });
+
+  connection.on(AgentEvents.Audio, (data) => {
+    console.log("Audio chunk received");
+    // Concatenate the audio chunks into a single buffer
+    const buffer = Buffer.from(data);
+    audioBuffer = Buffer.concat([audioBuffer, buffer]);
+  });
+
+  connection.on(AgentEvents.Error, (err) => {
+    console.error("Error!");
+    console.error(JSON.stringify(err, null, 2));
+    console.error(err.message);
+  });
+
+  connection.on(AgentEvents.AgentAudioDone, async () => {
+    console.log("Agent audio done");
+    await writeFile(join(__dirname, `output-${i}.wav`), audioBuffer);
+    audioBuffer = Buffer.alloc(0);
+    i++;
+  });
+
+  connection.on(AgentEvents.Unhandled, (data) => {
+    console.dir(data, { depth: null });
+  });
+};
+
+void agent();
diff --git a/src/DeepgramClient.ts b/src/DeepgramClient.ts
@@ -1,6 +1,7 @@
 import { DeepgramVersionError } from "./lib/errors";
 import {
   AbstractClient,
+  AgentLiveClient,
   ListenClient,
   ManageClient,
   ReadClient,
@@ -80,6 +81,16 @@ export default class DeepgramClient extends AbstractClient {
     return new SpeakClient(this.options);
   }
 
+  /**
+   * Returns a new instance of the AgentLiveClient, which provides access to Deepgram's Voice Agent API.
+   *
+   * @returns {AgentLiveClient} A new instance of the AgentLiveClient.
+   * @beta
+   */
+  public agent(endpoint: string = "/agent"): AgentLiveClient {
+    return new AgentLiveClient(this.options, endpoint);
+  }
+
   /**
    * @deprecated
    * @see https://dpgr.am/js-v3

diff --git a/src/lib/constants.ts b/src/lib/constants.ts
@@ -36,6 +36,7 @@ export const DEFAULT_HEADERS = {
 };
 
 export const DEFAULT_URL = "https://api.deepgram.com";
+export const DEFAULT_AGENT_URL = "wss://agent.deepgram.com";
 
 export const DEFAULT_GLOBAL_OPTIONS: Partial<DefaultNamespaceOptions> = {
   fetch: { options: { url: DEFAULT_URL, headers: DEFAULT_HEADERS } },
@@ -44,8 +45,16 @@ export const DEFAULT_GLOBAL_OPTIONS: Partial<DefaultNamespaceOptions> = {
   },
 };
 
+export const DEFAULT_AGENT_OPTIONS: Partial<DefaultNamespaceOptions> = {
+  fetch: { options: { url: DEFAULT_URL, headers: DEFAULT_HEADERS } },
+  websocket: {
+    options: { url: DEFAULT_AGENT_URL, _nodeOnlyHeaders: DEFAULT_HEADERS },
+  },
+};
+
 export const DEFAULT_OPTIONS: DefaultClientOptions = {
   global: DEFAULT_GLOBAL_OPTIONS,
+  agent: DEFAULT_AGENT_OPTIONS,
 };
 
 export enum SOCKET_STATES {

diff --git a/src/lib/enums/AgentEvents.ts b/src/lib/enums/AgentEvents.ts
@@ -0,0 +1,78 @@
+export enum AgentEvents {
+  /**
+   * Built in socket events.
+   */
+  Open = "Open",
+  Close = "Close",
+  Error = "Error",
+  /**
+   * Audio event?
+   */
+  Audio = "Audio",
+  /**
+   * Confirms the successful connection to the websocket.
+   * { type: "Welcome", session_id: "String"}
+   */
+  Welcome = "Welcome",
+  /**
+   * Confirms that your `configure` request was successful.
+   * { type: "SettingsApplied" }
+   */
+  SettingsApplied = "SettingsApplied",
+  /**
+   * Triggered when the agent "hears" the user say something.
+   * { type: "ConversationText", role: string, content: string }
+   */
+  ConversationText = "ConversationText",
+  /**
+   * Triggered when the agent begins receiving user audio.
+   * { type: "UserStartedSpeaking" }
+   */
+  UserStartedSpeaking = "UserStartedSpeaking",
+  /**
+   * Triggered when the user has stopped speaking and the agent is processing the audio.
+   * { type: "AgentThinking", content: string }
+   */
+  AgentThinking = "AgentThinking",
+  /**
+   * A request to call client-side functions.
+   * { type: "FunctionCallRequest", function_call_id: string, function_name: string, input: Record<string, any> }
+   */
+  FunctionCallRequest = "FunctionCallRequest",
+  /**
+   * Debug message triggered when the agent is calling a function.
+   * { type: "FunctionCalling" }
+   */
+  FunctionCalling = "FunctionCalling",
+  /**
+   * Triggered when the agent begins streaming an audio response.
+   * { type: "AgentStartedSpeaking", total_latency: number, tts_latency: number, ttt_latency: number }
+   */
+  AgentStartedSpeaking = "AgentStartedSpeaking",
+  /**
+   * Triggered when the agent has finished streaming an audio response.
+   * { type: "AgentAudioDone" }
+   */
+  AgentAudioDone = "AgentAudioDone",
+  /**
+   * This event is only emitted when you send an `InjectAgentMessage` request while
+   * the user is currently speaking or the server is processing user audio.
+   * { type: "InjectionRefused" }
+   */
+  InjectionRefused = "InjectionRefused",
+  /**
+   * A successful response to the `UpdateInstructions` request.
+   * { type: "InstructionsUpdated" }
+   */
+  InstructionsUpdated = "InstructionsUpdated",
+  /**
+   * A successful response to the `UpdateSpeak` request.
+   * { type: "SpeakUpdated" }
+   */
+  SpeakUpdated = "SpeakUpdated",
+
+  /**
+   * Catch all for any other message event
+   */
+  Unhandled = "Unhandled",
+}
diff --git a/src/lib/enums/index.ts b/src/lib/enums/index.ts
@@ -1,3 +1,4 @@
+export * from "./AgentEvents";
 export * from "./LiveConnectionState";
 export * from "./LiveTranscriptionEvents";
 export * from "./LiveTTSEvents";