-
Notifications
You must be signed in to change notification settings - Fork 259
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
572 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
# WebRTC Text-to-Speech Example | ||
|
||
This is an example app that combines WebRTC with OpenAI's Text-to-Speech API to stream audio in real-time. | ||
|
||
## Prerequisites | ||
|
||
- Go 1.20 or later | ||
- An OpenAI API key | ||
- Web browser with WebRTC support (Chrome, Firefox, Safari, etc.) | ||
|
||
## Installation | ||
|
||
1. Clone the repository: | ||
```bash | ||
git clone <https://github.com/pion/example-webrtc-applications> | ||
cd tts-to-webrtc | ||
``` | ||
|
||
2. Install module dependencies: | ||
|
||
[Resampler](https://github.com/dh1tw/gosamplerate) and [opus encoder](https://github.com/hraban/opus) packages are using cgo modules and need to setup. Follow the instructions below to install the required packages. | ||
|
||
Linux: | ||
using apt (Ubuntu), yum (Centos)...etc. | ||
```bash | ||
$ sudo apt install libsamplerate0 pkg-config libopus-dev libopusfile-dev | ||
``` | ||
|
||
MacOS | ||
using Homebrew: | ||
```bash | ||
$ brew install libsamplerate pkg-config opus opusfile | ||
``` | ||
|
||
3. Install Go dependencies: | ||
```bash | ||
export GO111MODULE=on | ||
go install github.com/pion/example-webrtc-applications/v4/tts-to-webrtc@latest | ||
``` | ||
|
||
## Configuration | ||
|
||
Set your OpenAI API key as an environment variable: | ||
|
||
```bash | ||
export OPENAI_API_KEY=your_api_key_here | ||
``` | ||
|
||
## Running the Application | ||
|
||
1. Start the server: | ||
```bash | ||
go run main.go | ||
``` | ||
|
||
2. Open your web browser and navigate to: | ||
``` | ||
http://localhost:8080 | ||
``` | ||
|
||
## Usage | ||
|
||
1. Click the "Connect" button to establish a WebRTC connection | ||
2. Wait for the connection status to show "connected" | ||
3. Type some text in the textarea | ||
4. Click "Convert to Speech" to hear the text being spoken | ||
|
||
## Technical Details | ||
|
||
- The application uses OpenAI's TTS API to convert text to speech | ||
- Audio is streamed using WebRTC with Opus codec | ||
- Sample rate conversion is handled automatically (24kHz to 48kHz) | ||
- The server implements a simple audio buffer to handle streaming | ||
|
||
|
||
|
||
## License | ||
|
||
This project is licensed under the MIT License - see the LICENSE file for details. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
<!DOCTYPE html> | ||
<html> | ||
<head> | ||
<title>WebRTC TTS Demo</title> | ||
<meta name="viewport" content="width=device-width, initial-scale=1"> | ||
<style> | ||
body { | ||
font-family: Arial, sans-serif; | ||
max-width: 800px; | ||
margin: 0 auto; | ||
padding: 20px; | ||
} | ||
.form-group { | ||
margin-bottom: 15px; | ||
} | ||
textarea { | ||
width: 100%; | ||
padding: 8px; | ||
} | ||
button { | ||
padding: 10px 20px; | ||
background-color: #4CAF50; | ||
color: white; | ||
border: none; | ||
border-radius: 4px; | ||
cursor: pointer; | ||
} | ||
button:hover { | ||
background-color: #45a049; | ||
} | ||
button.connected { | ||
background-color: #dc3545; | ||
} | ||
button.connected:hover { | ||
background-color: #c82333; | ||
} | ||
</style> | ||
</head> | ||
<body> | ||
<h1>Text to Speech with WebRTC</h1> | ||
<div id="connectionStatus" style="margin-bottom: 20px; padding: 10px; background-color: #f0f0f0; border-radius: 4px;"> | ||
<div>Connection State: <span id="connectionState">new</span></div> | ||
<div>ICE Connection: <span id="iceConnectionState">new</span></div> | ||
<div>Signaling State: <span id="signalingState">new</span></div> | ||
</div> | ||
<div class="form-group"> | ||
<button id="connectButton" onclick="toggleConnection()">Connect</button> | ||
</div> | ||
<div class="form-group"> | ||
<textarea id="textInput" rows="4" placeholder="Enter text to convert to speech"></textarea> | ||
</div> | ||
<div class="form-group"> | ||
<button id="convertButton" onclick="submitText()" disabled>Convert to Speech</button> | ||
</div> | ||
|
||
<script> | ||
let pc; | ||
let isConnected = false; | ||
|
||
async function toggleConnection() { | ||
const connectButton = document.getElementById('connectButton'); | ||
if (!isConnected) { | ||
connectButton.disabled = true; | ||
await initWebRTC(); | ||
} else { | ||
if (pc) { | ||
await pc.close(); | ||
pc = null; | ||
} | ||
document.getElementById('convertButton').disabled = true; | ||
document.getElementById('connectionState').textContent = 'new'; | ||
document.getElementById('iceConnectionState').textContent = 'new'; | ||
document.getElementById('signalingState').textContent = 'new'; | ||
connectButton.textContent = 'Connect'; | ||
connectButton.classList.remove('connected'); | ||
isConnected = false; | ||
} | ||
} | ||
|
||
async function initWebRTC() { | ||
pc = new RTCPeerConnection({ | ||
iceServers: [{ | ||
urls: 'stun:stun.l.google.com:19302' | ||
}] | ||
}); | ||
|
||
// Add connection state monitoring | ||
pc.onconnectionstatechange = () => { | ||
const state = pc.connectionState; | ||
const connectButton = document.getElementById('connectButton'); | ||
document.getElementById('connectionState').textContent = state; | ||
if (state === 'connected') { | ||
document.getElementById('convertButton').disabled = false; | ||
connectButton.disabled = false; | ||
connectButton.textContent = 'Disconnect'; | ||
connectButton.classList.add('connected'); | ||
isConnected = true; | ||
} else { | ||
document.getElementById('convertButton').disabled = true; | ||
} | ||
}; | ||
|
||
pc.oniceconnectionstatechange = () => { | ||
document.getElementById('iceConnectionState').textContent = pc.iceConnectionState; | ||
}; | ||
|
||
pc.onsignalingstatechange = () => { | ||
document.getElementById('signalingState').textContent = pc.signalingState; | ||
}; | ||
|
||
pc.ontrack = function(event) { | ||
const audio = new Audio(); | ||
audio.srcObject = event.streams[0]; | ||
audio.play(); | ||
}; | ||
|
||
// Create promise to wait for ICE gathering | ||
const iceCandidatesComplete = new Promise((resolve) => { | ||
pc.onicegatheringstatechange = () => { | ||
if (pc.iceGatheringState === 'complete') { | ||
resolve(); | ||
} | ||
}; | ||
}); | ||
|
||
pc.addTransceiver('audio', { direction: 'recvonly' }); | ||
|
||
const offer = await pc.createOffer({ | ||
offerToReceiveAudio: true | ||
}); | ||
await pc.setLocalDescription(offer); | ||
|
||
// Wait for ICE gathering to complete | ||
await iceCandidatesComplete; | ||
|
||
const response = await fetch('/webrtc', { | ||
method: 'POST', | ||
headers: { | ||
'Content-Type': 'application/json' | ||
}, | ||
body: JSON.stringify({ | ||
sdp: JSON.stringify(pc.localDescription) | ||
}) | ||
}); | ||
|
||
const answer = await response.json(); | ||
await pc.setRemoteDescription(new RTCSessionDescription(answer)); | ||
} | ||
|
||
async function submitText() { | ||
const text = document.getElementById('textInput').value; | ||
if (!text) return; | ||
|
||
const response = await fetch('/tts', { | ||
method: 'POST', | ||
headers: { | ||
'Content-Type': 'application/json' | ||
}, | ||
body: JSON.stringify({ | ||
text: text | ||
}) | ||
}); | ||
|
||
const audioBlob = await response.blob(); | ||
const audio = new Audio(URL.createObjectURL(audioBlob)); | ||
audio.play(); | ||
} | ||
|
||
// Remove the automatic initWebRTC() call at the end | ||
</script> | ||
</body> | ||
</html> |
Oops, something went wrong.