Start adding some quantization.

LaurentMazare · LaurentMazare · commit 998adca5c274 · 2025-02-20T14:46:10.000+01:00
diff --git a/moshi_mlx/moshi_mlx/modules/__init__.py b/moshi_mlx/moshi_mlx/modules/__init__.py
@@ -5,6 +5,7 @@
 """Modules used for building the models."""
 
 from .conv import Conv1d, ConvTranspose1d, StreamableConv1d, StreamableConvTranspose1d, NormConv1d, NormConvTranspose1d, ConvDownsample1d, ConvTrUpsample1d
-from .seanet import SeanetConfig, Seanet
+from .quantization import SplitResidualVectorQuantizer
+from .seanet import SeanetConfig, SeanetEncoder, SeanetDecoder
 from .kv_cache import KVCache, RotatingKVCache
 from .transformer import Transformer, TransformerConfig
diff --git a/moshi_mlx/moshi_mlx/modules/quantization.py b/moshi_mlx/moshi_mlx/modules/quantization.py
@@ -0,0 +1,36 @@
+# Copyright (c) Kyutai, all rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import mlx.core as mx
+import mlx.nn as nn
+
+class EuclideanCodebook(nn.Module):
+    def __init__(self, dim: int, codebook_size: int):
+        super().__init__()
+        self._epsilon = 1e-5
+        self._dim = dim
+
+    def __call__(self, xs: mx.array) -> mx.array:
+        return xs
+
+class VectorQuantization(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def __call__(self, xs: mx.array) -> mx.array:
+        return xs
+
+class ResidualVectorQuantization(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def __call__(self, xs: mx.array) -> mx.array:
+        return xs
+
+class SplitResidualVectorQuantizer(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def __call__(self, xs: mx.array) -> mx.array:
+        return xs
diff --git a/moshi_mlx/moshi_mlx/modules/seanet.py b/moshi_mlx/moshi_mlx/modules/seanet.py
@@ -27,6 +27,7 @@ class SeanetConfig:
 
 class SeanetResnetBlock(nn.Module):
     def __init__(self, cfg: SeanetConfig, dim: int, ksizes_and_dilations: list):
+        super().__init__()
         block = []
         hidden = dim // cfg.compress
         for i, (ksize, dilation) in enumerate(ksizes_and_dilations):
@@ -80,6 +81,7 @@ def __call__(self, xs: mx.array) -> mx.array:
 
 class EncoderLayer(nn.Module):
     def __init__(self, cfg: SeanetConfig, ratio: int, mult: int):
+        super().__init__()
         residuals = []
         dilation = 1
         for _ in range(cfg.nresidual_layers):
@@ -115,6 +117,7 @@ def __call__(self, xs: mx.array) -> mx.array:
 
 class SeanetEncoder(nn.Module):
     def __init__(self, cfg: SeanetConfig):
+        super().__init__()
         mult = 1
         self.init_conv1d = StreamableConv1d(
             in_channels=cfg.channels,
@@ -159,6 +162,7 @@ def __call__(self, xs: mx.array) -> mx.array:
 
 class DecoderLayer(nn.Module):
     def __init__(self, cfg: SeanetConfig, ratio: int, mult: int):
+        super().__init__()
         self.upsample = StreamableConvTranspose1d(
             in_channels=mult * cfg.nfilters,
             out_channels=mult * cfg.nfilters // 2,
@@ -183,6 +187,7 @@ def __call__(self, xs: mx.array) -> mx.array:
 
 class SeanetDecoder(nn.Module):
     def __init__(self, cfg: SeanetConfig):
+        super().__init__()
         mult = 1 << len(cfg.ratios)
         self.init_conv1d = StreamableConv1d(
             in_channels=cfg.dimension,
@@ -227,5 +232,6 @@ def __call__(self, xs: mx.array) -> mx.array:
 
 class Seanet(nn.Module):
     def __init__(self, cfg: SeanetConfig):
+        super().__init__()
         self.encoder = SeanetEncoder(cfg)
         self.decoder = SeanetDecoder(cfg)