Marking the flag as really not the fastest and BETA.

Narsil · Narsil · commit 3e26028245c1 · 2024-02-01T15:30:02.000Z
diff --git a/launcher/src/main.rs b/launcher/src/main.rs
@@ -47,8 +47,11 @@ enum Quantization {
     /// Bitsandbytes 4bit. nf4 should be preferred in most cases but maybe this one has better
     /// perplexity performance for you model
     BitsandbytesFP4,
+    /// [BETA]
     /// [FP8](https://developer.nvidia.com/blog/nvidia-arm-and-intel-publish-fp8-specification-for-standardization-as-an-interchange-format-for-ai/) (e4m3) works on H100 and above
     /// This dtype has native ops should be the fastest if available.
+    /// This is currently not the fastest because of local unpacking + padding to satisfy matrix
+    /// multiplication limitations.
     Fp8,
 }