Skip to content

Commit 5d829fb

Browse files
authored
Merge pull request #1474 from pipecat-ai/khk/mem0-changelog
Changelog entry for mem0 service
2 parents acddddc + a978a5c commit 5d829fb

File tree

4 files changed

+26
-16
lines changed

4 files changed

+26
-16
lines changed

CHANGELOG.md

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
### Added
1111

12+
- Added `Mem0MemoryService`. Mem0 is a self-improving memory layer for LLM
13+
applications. Learn more at: https://mem0.ai/.
14+
1215
- Added `SmallWebRTCTransport`, a new P2P WebRTC transport.
13-
- Created two examples in `p2p-webrtc`:
14-
- **video-transform**: Demonstrates sending and receiving audio/video with `SmallWebRTCTransport` using `TypeScript`.
15-
Includes video frame processing with OpenCV.
16-
- **voice-agent**: A minimal example of creating a voice agent with `SmallWebRTCTransport`.
1716

18-
- Added support to `ProtobufFrameSerializer` to send the messages from `TransportMessageFrame` and `TransportMessageUrgentFrame`.
17+
- Created two examples in `p2p-webrtc`:
18+
- **video-transform**: Demonstrates sending and receiving audio/video with
19+
`SmallWebRTCTransport` using `TypeScript`. Includes video frame
20+
processing with OpenCV.
21+
- **voice-agent**: A minimal example of creating a voice agent with
22+
`SmallWebRTCTransport`.
23+
24+
- Added support to `ProtobufFrameSerializer` to send the messages from
25+
`TransportMessageFrame` and `TransportMessageUrgentFrame`.
1926

2027
- Added support for a new TTS service, `PiperTTSService`.
2128
(see https://github.com/rhasspy/piper/)

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ pip install "pipecat-ai[option,...]"
6363
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) | `pip install "pipecat-ai[google]"` |
6464
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local | `pip install "pipecat-ai[daily]"` |
6565
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | `pip install "pipecat-ai[tavus,simli]"` |
66+
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) | `pip install "pipecat-ai[mem0]"` |
6667
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) | `pip install "pipecat-ai[moondream]"` |
6768
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) | `pip install "pipecat-ai[silero]"` |
6869
| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | `pip install "pipecat-ai[canonical]"` |

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ langchain = [ "langchain~=0.3.20", "langchain-community~=0.3.20", "langchain-ope
6464
livekit = [ "livekit~=0.22.0", "livekit-api~=0.8.2", "tenacity~=9.0.0" ]
6565
lmnt = [ "websockets~=13.1" ]
6666
local = [ "pyaudio~=0.2.14" ]
67+
mem0 = [ "mem0ai~=0.1.76" ]
6768
mlx-whisper = [ "mlx-whisper~=0.4.2" ]
6869
moondream = [ "einops~=0.8.0", "timm~=1.0.13", "transformers~=4.48.0" ]
6970
nim = []
@@ -86,7 +87,6 @@ ultravox = [ "transformers~=4.48.0", "vllm~=0.7.3" ]
8687
webrtc = [ "aiortc~=1.10.1", "opencv-python~=4.11.0.86" ]
8788
websocket = [ "websockets~=13.1", "fastapi~=0.115.6" ]
8889
whisper = [ "faster-whisper~=1.1.1" ]
89-
mem0 = [ "mem0ai~=0.1.76" ]
9090

9191
[tool.setuptools.packages.find]
9292
# All the following settings are optional:

src/pipecat/services/whisper.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@
99
import asyncio
1010
from enum import Enum
1111
from typing import AsyncGenerator, Optional
12-
from typing_extensions import TYPE_CHECKING, override
1312

1413
import numpy as np
1514
from loguru import logger
15+
from typing_extensions import TYPE_CHECKING, override
1616

1717
from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame
1818
from pipecat.services.ai_services import SegmentedSTTService
@@ -26,7 +26,7 @@
2626
logger.error(f"Exception: {e}")
2727
logger.error("In order to use Whisper, you need to `pip install pipecat-ai[whisper]`.")
2828
raise Exception(f"Missing module: {e}")
29-
29+
3030
try:
3131
import mlx_whisper
3232
except ModuleNotFoundError as e:
@@ -332,6 +332,7 @@ def _load(self):
332332
"""
333333
try:
334334
from faster_whisper import WhisperModel
335+
335336
logger.debug("Loading Whisper model...")
336337
self._model = WhisperModel(
337338
self.model_name, device=self._device, compute_type=self._compute_type
@@ -414,22 +415,22 @@ def __init__(
414415
):
415416
# Skip WhisperSTTService.__init__ and call its parent directly
416417
SegmentedSTTService.__init__(self, **kwargs)
417-
418+
418419
self.set_model_name(model if isinstance(model, str) else model.value)
419420
self._no_speech_prob = no_speech_prob
420421
self._temperature = temperature
421422

422423
self._settings = {
423424
"language": language,
424425
}
425-
426+
426427
# No need to call _load() as MLX Whisper loads models on demand
427428

428429
@override
429430
def _load(self):
430431
"""MLX Whisper loads models on demand, so this is a no-op."""
431432
pass
432-
433+
433434
@override
434435
async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
435436
"""Transcribes given audio using MLX Whisper.
@@ -447,7 +448,7 @@ async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
447448
"""
448449
try:
449450
import mlx_whisper
450-
451+
451452
await self.start_processing_metrics()
452453
await self.start_ttfb_metrics()
453454

@@ -456,10 +457,11 @@ async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
456457

457458
whisper_lang = self.language_to_service_language(self._settings["language"])
458459
chunk = await asyncio.to_thread(
459-
mlx_whisper.transcribe, audio_float,
460+
mlx_whisper.transcribe,
461+
audio_float,
460462
path_or_hf_repo=self.model_name,
461463
temperature=self._temperature,
462-
language=whisper_lang
464+
language=whisper_lang,
463465
)
464466
text: str = ""
465467
for segment in chunk.get("segments", []):
@@ -475,11 +477,11 @@ async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
475477

476478
await self.stop_ttfb_metrics()
477479
await self.stop_processing_metrics()
478-
480+
479481
if text:
480482
logger.debug(f"Transcription: [{text}]")
481483
yield TranscriptionFrame(text, "", time_now_iso8601(), self._settings["language"])
482-
484+
483485
except Exception as e:
484486
logger.exception(f"MLX Whisper transcription error: {e}")
485487
yield ErrorFrame(f"MLX Whisper transcription error: {str(e)}")

0 commit comments

Comments
 (0)