kantord · neilruaro-camb · Apr 14, 2026
diff --git a/src/librelingo_audios/tests/test_cambai_provider.py b/src/librelingo_audios/tests/test_cambai_provider.py
@@ -0,0 +1,160 @@
+import io
+import json
+
+import pytest
+
+from librelingo_fakes import fakes
+from librelingo_types import AudioSettings, Settings, TextToSpeechSettings
+
+from librelingo_audios import cli
+from librelingo_audios.update_audios import update_audios_for_course
+
+cambai_course = fakes.customize(
+    fakes.course1,
+    settings=Settings(
+        audio_settings=AudioSettings(
+            enabled=True,
+            text_to_speech_settings_list=[
+                TextToSpeechSettings(
+                    provider="CambAI", voice="147320", engine="mars-pro"
+                )
+            ],
+        )
+    ),
+)
+
+
+@pytest.fixture
+def cambai_http(mocker, monkeypatch):
+    monkeypatch.setenv("CAMB_API_KEY", "test-key")
+
+    calls = []
+
+    class FakeResponse:
+        def __init__(self, payload):
+            self._buf = io.BytesIO(payload)
+
+        def read(self, size=-1):
+            return self._buf.read(size) if size != -1 else self._buf.read()
+
+        def __enter__(self):
+            return self
+
+        def __exit__(self, *_):
+            return False
+
+    def fake_urlopen(request, *args, **kwargs):
+        calls.append(
+            {
+                "url": request.full_url,
+                "method": request.get_method(),
+                "headers": dict(request.header_items()),
+                "body": json.loads(request.data.decode("utf-8")),
+            }
+        )
+        return FakeResponse(b"fake-mp3-bytes")
+
+    mocker.patch(
+        "librelingo_audios.update_audios.urllib.request.urlopen",
+        side_effect=fake_urlopen,
+    )
+    return calls
+
+
+def test_cambai_provider_calls_api_and_writes_file(
+    cambai_http, tmp_path, index_file
+):
+    update_audios_for_course(
+        tmp_path, "test", cambai_course, cli.Settings(dry_run=False, destructive=False)
+    )
+
+    assert len(cambai_http) == 4
+    for call in cambai_http:
+        assert call["url"] == "https://client.camb.ai/apis/tts-stream"
+        assert call["method"] == "POST"
+        assert call["headers"]["X-api-key"] == "test-key"
+        assert call["body"]["voice_id"] == 147320
+        assert call["body"]["language"] == "de-de"
+        assert call["body"]["speech_model"] == "mars-pro"
+        assert call["body"]["output_configuration"] == {"format": "mp3"}
+
+    generated_files = sorted(p.name for p in tmp_path.iterdir() if p.suffix == ".mp3")
+    assert len(generated_files) == 4
+    for mp3 in tmp_path.glob("*.mp3"):
+        assert mp3.read_bytes() == b"fake-mp3-bytes"
+
+
+def test_cambai_provider_records_provider_in_index(
+    cambai_http, tmp_path, index_file
+):
+    update_audios_for_course(
+        tmp_path, "test", cambai_course, cli.Settings(dry_run=False, destructive=False)
+    )
+    with open(tmp_path / "test.json", "r") as f:
+        entries = json.loads(f.read())
+    assert len(entries) == 4
+    for entry in entries:
+        assert entry["ttsProvider"] == "CambAI"
+        assert entry["ttsVoice"] == "147320"
+        assert entry["ttsEngine"] == "mars-pro"
+
+
+def test_cambai_provider_missing_api_key_raises(mocker, monkeypatch, tmp_path):
+    monkeypatch.delenv("CAMB_API_KEY", raising=False)
+    with pytest.raises(RuntimeError, match="CAMB_API_KEY"):
+        update_audios_for_course(
+            tmp_path,
+            "test",
+            cambai_course,
+            cli.Settings(dry_run=False, destructive=False),
+        )
+
+
+def test_cambai_provider_non_numeric_voice_raises(
+    mocker, monkeypatch, tmp_path
+):
+    monkeypatch.setenv("CAMB_API_KEY", "test-key")
+    bad_course = fakes.customize(
+        fakes.course1,
+        settings=Settings(
+            audio_settings=AudioSettings(
+                enabled=True,
+                text_to_speech_settings_list=[
+                    TextToSpeechSettings(
+                        provider="CambAI", voice="Lupe", engine="mars-pro"
+                    )
+                ],
+            )
+        ),
+    )
+    with pytest.raises(RuntimeError, match="numeric voice_id"):
+        update_audios_for_course(
+            tmp_path,
+            "test",
+            bad_course,
+            cli.Settings(dry_run=False, destructive=False),
+        )
+
+
+def test_unknown_provider_raises(monkeypatch, tmp_path):
+    monkeypatch.setenv("CAMB_API_KEY", "test-key")
+    bad_course = fakes.customize(
+        fakes.course1,
+        settings=Settings(
+            audio_settings=AudioSettings(
+                enabled=True,
+                text_to_speech_settings_list=[
+                    TextToSpeechSettings(
+                        provider="SomeOtherTTS", voice="X", engine="y"
+                    )
+                ],
+            )
+        ),
+    )
+    with pytest.raises(RuntimeError, match="Unknown TTS provider"):
+        update_audios_for_course(
+            tmp_path,
+            "test",
+            bad_course,
+            cli.Settings(dry_run=False, destructive=False),
+        )
diff --git a/src/librelingo_audios/update_audios.py b/src/librelingo_audios/update_audios.py
@@ -1,6 +1,9 @@
 import json
+import os
 import random
 import subprocess
+import urllib.error
+import urllib.request
 from pathlib import Path
 from typing import Set, Union
 
@@ -9,6 +12,29 @@
 
 from librelingo_audios.functions import list_required_audios
 
+CAMBAI_TTS_STREAM_URL = "https://client.camb.ai/apis/tts-stream"
+
+# Maps LibreLingo's target_language codes (ISO-639-1) to CAMB AI BCP-47 codes.
+# Extend as new courses are added.
+_CAMBAI_LANGUAGE_CODES = {
+    "en": "en-us",
+    "es": "es-es",
+    "fr": "fr-fr",
+    "de": "de-de",
+    "it": "it-it",
+    "pt": "pt-br",
+    "nl": "nl-nl",
+    "ru": "ru-ru",
+    "ja": "ja-jp",
+    "ko": "ko-kr",
+    "zh": "zh-cn",
+    "hi": "hi-in",
+    "ar": "ar-sa",
+    "ta": "ta-in",
+    "te": "te-in",
+    "bn": "bn-in",
+}
+
 
 def update_audios_for_course(
     output_path: str, course_name: str, course: Course, settings
@@ -116,38 +142,121 @@ def _generate_audio_with_tts(
             f"Generating {destination_path} "
             f"using {chosen_tts_settings.voice} {chosen_tts_settings.engine}"
         )
-        # This is where more more TTS providers would be added with an if statement.
-        # For now there is only Polly.
-        tts_provider = "polly"
-        subprocess.run(
-            [
-                "aws",
-                tts_provider,
-                "synthesize-speech",
-                "--output-format",
-                "mp3",
-                "--voice-id",
-                chosen_tts_settings.voice,
-                "--engine",
-                chosen_tts_settings.engine,
-                "--text",
-                phrase_identity.text,
-                destination_path,
-            ],
-            stdout=subprocess.DEVNULL,
-        )
+        provider = (chosen_tts_settings.provider or "Polly").lower()
+        if provider == "polly":
+            _synthesize_with_polly(
+                phrase_identity, chosen_tts_settings, destination_path
+            )
+        elif provider == "cambai":
+            _synthesize_with_cambai(
+                phrase_identity, chosen_tts_settings, destination_path, course
+            )
+        else:
+            raise RuntimeError(
+                f"Unknown TTS provider '{chosen_tts_settings.provider}'. "
+                f"Supported providers: Polly, CambAI."
+            )
 
     return {
         "id": file_name,
         "text": phrase_identity.text,
         "source": "TTS",
         "license": course.license.full_name,
-        "ttsProvider": "Polly",
+        "ttsProvider": chosen_tts_settings.provider,
         "ttsVoice": chosen_tts_settings.voice,
         "ttsEngine": chosen_tts_settings.engine,
     }
 
 
+def _synthesize_with_polly(
+    phrase_identity: PhraseIdentity,
+    chosen_tts_settings,
+    destination_path: Path,
+):
+    subprocess.run(
+        [
+            "aws",
+            "polly",
+            "synthesize-speech",
+            "--output-format",
+            "mp3",
+            "--voice-id",
+            chosen_tts_settings.voice,
+            "--engine",
+            chosen_tts_settings.engine,
+            "--text",
+            phrase_identity.text,
+            destination_path,
+        ],
+        stdout=subprocess.DEVNULL,
+    )
+
+
+def _synthesize_with_cambai(
+    phrase_identity: PhraseIdentity,
+    chosen_tts_settings,
+    destination_path: Path,
+    course: Course,
+):
+    api_key = os.environ.get("CAMB_API_KEY")
+    if not api_key:
+        raise RuntimeError(
+            "CAMB_API_KEY environment variable is required for the CambAI TTS provider. "
+            "Get a key at https://studio.camb.ai and export CAMB_API_KEY=<your key>."
+        )
+
+    language_code = course.target_language.code
+    language = _CAMBAI_LANGUAGE_CODES.get(language_code)
+    if language is None:
+        raise RuntimeError(
+            f"CambAI provider does not yet have a BCP-47 mapping for target_language "
+            f"code '{language_code}'. Add it to _CAMBAI_LANGUAGE_CODES in "
+            f"update_audios.py."
+        )
+
+    try:
+        voice_id = int(chosen_tts_settings.voice)
+    except (TypeError, ValueError) as exc:
+        raise RuntimeError(
+            f"CambAI voice must be a numeric voice_id (e.g. 147320); got "
+            f"'{chosen_tts_settings.voice}'. List voices at "
+            f"https://client.camb.ai/apis/list-voices."
+        ) from exc
+
+    payload = {
+        "text": phrase_identity.text,
+        "voice_id": voice_id,
+        "language": language,
+        "speech_model": chosen_tts_settings.engine or "mars-pro",
+        "output_configuration": {"format": "mp3"},
+    }
+
+    request = urllib.request.Request(
+        CAMBAI_TTS_STREAM_URL,
+        data=json.dumps(payload).encode("utf-8"),
+        headers={
+            "x-api-key": api_key,
+            "Content-Type": "application/json",
+        },
+        method="POST",
+    )
+
+    try:
+        with urllib.request.urlopen(request) as response, open(
+            destination_path, "wb"
+        ) as out_file:
+            while True:
+                chunk = response.read(8192)
+                if not chunk:
+                    break
+                out_file.write(chunk)
+    except urllib.error.HTTPError as exc:
+        body = exc.read().decode("utf-8", errors="replace")
+        raise RuntimeError(
+            f"CambAI TTS request failed ({exc.code}): {body}"
+        ) from exc
+
+
 def _delete_phrases(
     phrases: Set[PhraseIdentity], output_path: str, existing_index, settings
 ):