Skip to content

Commit 884c01a

Browse files
committed
Add tts-to-webrtc example
1 parent addb95a commit 884c01a

File tree

3 files changed

+572
-0
lines changed

3 files changed

+572
-0
lines changed

tts-to-webrtc/README.md

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# WebRTC Text-to-Speech Example
2+
3+
This is an example app that combines WebRTC with OpenAI's Text-to-Speech API to stream audio in real-time.
4+
5+
## Prerequisites
6+
7+
- Go 1.20 or later
8+
- An OpenAI API key
9+
- Web browser with WebRTC support (Chrome, Firefox, Safari, etc.)
10+
11+
## Installation
12+
13+
1. Clone the repository:
14+
```bash
15+
git clone <https://github.com/pion/example-webrtc-applications>
16+
cd tts-to-webrtc
17+
```
18+
19+
2. Install module dependencies:
20+
21+
[Resampler](https://github.com/dh1tw/gosamplerate) and [opus encoder](https://github.com/hraban/opus) packages are using cgo modules and need to setup. Follow the instructions below to install the required packages.
22+
23+
Linux:
24+
using apt (Ubuntu), yum (Centos)...etc.
25+
```bash
26+
$ sudo apt install libsamplerate0 pkg-config libopus-dev libopusfile-dev
27+
```
28+
29+
MacOS
30+
using Homebrew:
31+
```bash
32+
$ brew install libsamplerate pkg-config opus opusfile
33+
```
34+
35+
3. Install Go dependencies:
36+
```bash
37+
export GO111MODULE=on
38+
go install github.com/pion/example-webrtc-applications/v4/tts-to-webrtc@latest
39+
```
40+
41+
## Configuration
42+
43+
Set your OpenAI API key as an environment variable:
44+
45+
```bash
46+
export OPENAI_API_KEY=your_api_key_here
47+
```
48+
49+
## Running the Application
50+
51+
1. Start the server:
52+
```bash
53+
go run main.go
54+
```
55+
56+
2. Open your web browser and navigate to:
57+
```
58+
http://localhost:8080
59+
```
60+
61+
## Usage
62+
63+
1. Click the "Connect" button to establish a WebRTC connection
64+
2. Wait for the connection status to show "connected"
65+
3. Type some text in the textarea
66+
4. Click "Convert to Speech" to hear the text being spoken
67+
68+
## Technical Details
69+
70+
- The application uses OpenAI's TTS API to convert text to speech
71+
- Audio is streamed using WebRTC with Opus codec
72+
- Sample rate conversion is handled automatically (24kHz to 48kHz)
73+
- The server implements a simple audio buffer to handle streaming
74+
75+
76+
77+
## License
78+
79+
This project is licensed under the MIT License - see the LICENSE file for details.

tts-to-webrtc/index.html

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<title>WebRTC TTS Demo</title>
5+
<meta name="viewport" content="width=device-width, initial-scale=1">
6+
<style>
7+
body {
8+
font-family: Arial, sans-serif;
9+
max-width: 800px;
10+
margin: 0 auto;
11+
padding: 20px;
12+
}
13+
.form-group {
14+
margin-bottom: 15px;
15+
}
16+
textarea {
17+
width: 100%;
18+
padding: 8px;
19+
}
20+
button {
21+
padding: 10px 20px;
22+
background-color: #4CAF50;
23+
color: white;
24+
border: none;
25+
border-radius: 4px;
26+
cursor: pointer;
27+
}
28+
button:hover {
29+
background-color: #45a049;
30+
}
31+
button.connected {
32+
background-color: #dc3545;
33+
}
34+
button.connected:hover {
35+
background-color: #c82333;
36+
}
37+
</style>
38+
</head>
39+
<body>
40+
<h1>Text to Speech with WebRTC</h1>
41+
<div id="connectionStatus" style="margin-bottom: 20px; padding: 10px; background-color: #f0f0f0; border-radius: 4px;">
42+
<div>Connection State: <span id="connectionState">new</span></div>
43+
<div>ICE Connection: <span id="iceConnectionState">new</span></div>
44+
<div>Signaling State: <span id="signalingState">new</span></div>
45+
</div>
46+
<div class="form-group">
47+
<button id="connectButton" onclick="toggleConnection()">Connect</button>
48+
</div>
49+
<div class="form-group">
50+
<textarea id="textInput" rows="4" placeholder="Enter text to convert to speech"></textarea>
51+
</div>
52+
<div class="form-group">
53+
<button id="convertButton" onclick="submitText()" disabled>Convert to Speech</button>
54+
</div>
55+
56+
<script>
57+
let pc;
58+
let isConnected = false;
59+
60+
async function toggleConnection() {
61+
const connectButton = document.getElementById('connectButton');
62+
if (!isConnected) {
63+
connectButton.disabled = true;
64+
await initWebRTC();
65+
} else {
66+
if (pc) {
67+
await pc.close();
68+
pc = null;
69+
}
70+
document.getElementById('convertButton').disabled = true;
71+
document.getElementById('connectionState').textContent = 'new';
72+
document.getElementById('iceConnectionState').textContent = 'new';
73+
document.getElementById('signalingState').textContent = 'new';
74+
connectButton.textContent = 'Connect';
75+
connectButton.classList.remove('connected');
76+
isConnected = false;
77+
}
78+
}
79+
80+
async function initWebRTC() {
81+
pc = new RTCPeerConnection({
82+
iceServers: [{
83+
urls: 'stun:stun.l.google.com:19302'
84+
}]
85+
});
86+
87+
// Add connection state monitoring
88+
pc.onconnectionstatechange = () => {
89+
const state = pc.connectionState;
90+
const connectButton = document.getElementById('connectButton');
91+
document.getElementById('connectionState').textContent = state;
92+
if (state === 'connected') {
93+
document.getElementById('convertButton').disabled = false;
94+
connectButton.disabled = false;
95+
connectButton.textContent = 'Disconnect';
96+
connectButton.classList.add('connected');
97+
isConnected = true;
98+
} else {
99+
document.getElementById('convertButton').disabled = true;
100+
}
101+
};
102+
103+
pc.oniceconnectionstatechange = () => {
104+
document.getElementById('iceConnectionState').textContent = pc.iceConnectionState;
105+
};
106+
107+
pc.onsignalingstatechange = () => {
108+
document.getElementById('signalingState').textContent = pc.signalingState;
109+
};
110+
111+
pc.ontrack = function(event) {
112+
const audio = new Audio();
113+
audio.srcObject = event.streams[0];
114+
audio.play();
115+
};
116+
117+
// Create promise to wait for ICE gathering
118+
const iceCandidatesComplete = new Promise((resolve) => {
119+
pc.onicegatheringstatechange = () => {
120+
if (pc.iceGatheringState === 'complete') {
121+
resolve();
122+
}
123+
};
124+
});
125+
126+
pc.addTransceiver('audio', { direction: 'recvonly' });
127+
128+
const offer = await pc.createOffer({
129+
offerToReceiveAudio: true
130+
});
131+
await pc.setLocalDescription(offer);
132+
133+
// Wait for ICE gathering to complete
134+
await iceCandidatesComplete;
135+
136+
const response = await fetch('/webrtc', {
137+
method: 'POST',
138+
headers: {
139+
'Content-Type': 'application/json'
140+
},
141+
body: JSON.stringify({
142+
sdp: JSON.stringify(pc.localDescription)
143+
})
144+
});
145+
146+
const answer = await response.json();
147+
await pc.setRemoteDescription(new RTCSessionDescription(answer));
148+
}
149+
150+
async function submitText() {
151+
const text = document.getElementById('textInput').value;
152+
if (!text) return;
153+
154+
const response = await fetch('/tts', {
155+
method: 'POST',
156+
headers: {
157+
'Content-Type': 'application/json'
158+
},
159+
body: JSON.stringify({
160+
text: text
161+
})
162+
});
163+
164+
const audioBlob = await response.blob();
165+
const audio = new Audio(URL.createObjectURL(audioBlob));
166+
audio.play();
167+
}
168+
169+
// Remove the automatic initWebRTC() call at the end
170+
</script>
171+
</body>
172+
</html>

0 commit comments

Comments
 (0)