Skip to content

Commit

Permalink
Add tts-to-webrtc example
Browse files Browse the repository at this point in the history
  • Loading branch information
tyohan committed Feb 2, 2025
1 parent addb95a commit 884c01a
Show file tree
Hide file tree
Showing 3 changed files with 572 additions and 0 deletions.
79 changes: 79 additions & 0 deletions tts-to-webrtc/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# WebRTC Text-to-Speech Example

This is an example app that combines WebRTC with OpenAI's Text-to-Speech API to stream audio in real-time.

## Prerequisites

- Go 1.20 or later
- An OpenAI API key
- Web browser with WebRTC support (Chrome, Firefox, Safari, etc.)

## Installation

1. Clone the repository:
```bash
git clone <https://github.com/pion/example-webrtc-applications>
cd tts-to-webrtc
```

2. Install module dependencies:

[Resampler](https://github.com/dh1tw/gosamplerate) and [opus encoder](https://github.com/hraban/opus) packages are using cgo modules and need to setup. Follow the instructions below to install the required packages.

Linux:
using apt (Ubuntu), yum (Centos)...etc.
```bash
$ sudo apt install libsamplerate0 pkg-config libopus-dev libopusfile-dev
```

MacOS
using Homebrew:
```bash
$ brew install libsamplerate pkg-config opus opusfile
```

3. Install Go dependencies:
```bash
export GO111MODULE=on
go install github.com/pion/example-webrtc-applications/v4/tts-to-webrtc@latest
```

## Configuration

Set your OpenAI API key as an environment variable:

```bash
export OPENAI_API_KEY=your_api_key_here
```

## Running the Application

1. Start the server:
```bash
go run main.go
```

2. Open your web browser and navigate to:
```
http://localhost:8080
```

## Usage

1. Click the "Connect" button to establish a WebRTC connection
2. Wait for the connection status to show "connected"
3. Type some text in the textarea
4. Click "Convert to Speech" to hear the text being spoken

## Technical Details

- The application uses OpenAI's TTS API to convert text to speech
- Audio is streamed using WebRTC with Opus codec
- Sample rate conversion is handled automatically (24kHz to 48kHz)
- The server implements a simple audio buffer to handle streaming



## License

This project is licensed under the MIT License - see the LICENSE file for details.
172 changes: 172 additions & 0 deletions tts-to-webrtc/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
<!DOCTYPE html>
<html>
<head>
<title>WebRTC TTS Demo</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<style>
body {
font-family: Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 20px;
}
.form-group {
margin-bottom: 15px;
}
textarea {
width: 100%;
padding: 8px;
}
button {
padding: 10px 20px;
background-color: #4CAF50;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
}
button:hover {
background-color: #45a049;
}
button.connected {
background-color: #dc3545;
}
button.connected:hover {
background-color: #c82333;
}
</style>
</head>
<body>
<h1>Text to Speech with WebRTC</h1>
<div id="connectionStatus" style="margin-bottom: 20px; padding: 10px; background-color: #f0f0f0; border-radius: 4px;">
<div>Connection State: <span id="connectionState">new</span></div>
<div>ICE Connection: <span id="iceConnectionState">new</span></div>
<div>Signaling State: <span id="signalingState">new</span></div>
</div>
<div class="form-group">
<button id="connectButton" onclick="toggleConnection()">Connect</button>
</div>
<div class="form-group">
<textarea id="textInput" rows="4" placeholder="Enter text to convert to speech"></textarea>
</div>
<div class="form-group">
<button id="convertButton" onclick="submitText()" disabled>Convert to Speech</button>
</div>

<script>
let pc;
let isConnected = false;

async function toggleConnection() {
const connectButton = document.getElementById('connectButton');
if (!isConnected) {
connectButton.disabled = true;
await initWebRTC();
} else {
if (pc) {
await pc.close();
pc = null;
}
document.getElementById('convertButton').disabled = true;
document.getElementById('connectionState').textContent = 'new';
document.getElementById('iceConnectionState').textContent = 'new';
document.getElementById('signalingState').textContent = 'new';
connectButton.textContent = 'Connect';
connectButton.classList.remove('connected');
isConnected = false;
}
}

async function initWebRTC() {
pc = new RTCPeerConnection({
iceServers: [{
urls: 'stun:stun.l.google.com:19302'
}]
});

// Add connection state monitoring
pc.onconnectionstatechange = () => {
const state = pc.connectionState;
const connectButton = document.getElementById('connectButton');
document.getElementById('connectionState').textContent = state;
if (state === 'connected') {
document.getElementById('convertButton').disabled = false;
connectButton.disabled = false;
connectButton.textContent = 'Disconnect';
connectButton.classList.add('connected');
isConnected = true;
} else {
document.getElementById('convertButton').disabled = true;
}
};

pc.oniceconnectionstatechange = () => {
document.getElementById('iceConnectionState').textContent = pc.iceConnectionState;
};

pc.onsignalingstatechange = () => {
document.getElementById('signalingState').textContent = pc.signalingState;
};

pc.ontrack = function(event) {
const audio = new Audio();
audio.srcObject = event.streams[0];
audio.play();
};

// Create promise to wait for ICE gathering
const iceCandidatesComplete = new Promise((resolve) => {
pc.onicegatheringstatechange = () => {
if (pc.iceGatheringState === 'complete') {
resolve();
}
};
});

pc.addTransceiver('audio', { direction: 'recvonly' });

const offer = await pc.createOffer({
offerToReceiveAudio: true
});
await pc.setLocalDescription(offer);

// Wait for ICE gathering to complete
await iceCandidatesComplete;

const response = await fetch('/webrtc', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
sdp: JSON.stringify(pc.localDescription)
})
});

const answer = await response.json();
await pc.setRemoteDescription(new RTCSessionDescription(answer));
}

async function submitText() {
const text = document.getElementById('textInput').value;
if (!text) return;

const response = await fetch('/tts', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
text: text
})
});

const audioBlob = await response.blob();
const audio = new Audio(URL.createObjectURL(audioBlob));
audio.play();
}

// Remove the automatic initWebRTC() call at the end
</script>
</body>
</html>
Loading

0 comments on commit 884c01a

Please sign in to comment.