stanfordnlp · yaswanth169 · Mar 7, 2026 · chatgpt-codex-connector · Mar 7, 2026
diff --git a/pyvene/models/basic_utils.py b/pyvene/models/basic_utils.py
@@ -43,14 +43,23 @@ def create_directory(path):
 
 def embed_to_distrib(model, embed, log=False, logits=False):
     """Convert an embedding to a distribution over the vocabulary"""
-    if "gpt2" in model.config.architectures[0].lower():
+    arch = (model.config.architectures or [None])[0]
+    if arch is None:
+        arch = type(model).__name__
+    arch_lower = (arch or "").lower()
+    if "gpt2" in arch_lower:
         with torch.inference_mode():
-            vocab = torch.matmul(embed, model.wte.weight.t())
+            wte = model.wte if hasattr(model, "wte") else model.transformer.wte
+            vocab = torch.matmul(embed, wte.weight.t())
+            if logits:
+                return vocab
+            return lsm(vocab) if log else sm(vocab)
+    elif "llama" in arch_lower:
+        with torch.inference_mode():
+            vocab = model.lm_head(embed)
             if logits:
                 return vocab
             return lsm(vocab) if log else sm(vocab)
-    elif "llama" in model.config.architectures[0].lower():
-        assert False, "Support for LLaMA is not here yet"
 
 
 def set_seed(seed: int):

diff --git a/tests/unit_tests/BasicUtilsTestCase.py b/tests/unit_tests/BasicUtilsTestCase.py
@@ -0,0 +1,86 @@
+import unittest
+import torch
+from ..utils import (
+    create_gpt2_lm,
+    create_llama,
+    embed_to_distrib,
+    GPT2Config,
+    LlamaConfig,
+)
+
+
+class BasicUtilsTestCase(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        print("=== Test Suite: BasicUtilsTestCase ===")
+        cls.gpt2_config, cls.gpt2_tokenizer, cls.gpt2 = create_gpt2_lm(
+            config=GPT2Config(
+                n_embd=24,
+                attn_pdrop=0.0,
+                embd_pdrop=0.0,
+                resid_pdrop=0.0,
+                summary_first_dropout=0.0,
+                n_layer=2,
+                bos_token_id=0,
+                eos_token_id=0,
+                n_positions=128,
+                vocab_size=10,
+            )
+        )
+        cls.llama_config, cls.llama_tokenizer, cls.llama = create_llama(
+            config=LlamaConfig(
+                bos_token_id=1,
+                eos_token_id=2,
+                hidden_size=64,
+                intermediate_size=128,
+                max_position_embeddings=128,
+                num_attention_heads=4,
+                num_hidden_layers=2,
+                num_key_value_heads=4,
+                rms_norm_eps=1e-5,
+                vocab_size=100,
+            )
+        )
+        cls.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        cls.gpt2 = cls.gpt2.to(cls.device)
+        cls.llama = cls.llama.to(cls.device)
+
+    def test_embed_to_distrib_gpt2_logits(self):
+        batch, seq, _ = 2, 5, self.gpt2.config.n_embd
+        embed = torch.randn(batch, seq, self.gpt2.config.n_embd).to(self.device)
+        out = embed_to_distrib(self.gpt2, embed, logits=True)
+        self.assertEqual(out.shape, (batch, seq, self.gpt2.config.vocab_size))
+
+    def test_embed_to_distrib_gpt2_softmax(self):
+        batch, seq, _ = 2, 5, self.gpt2.config.n_embd
+        embed = torch.randn(batch, seq, self.gpt2.config.n_embd).to(self.device)
+        out = embed_to_distrib(self.gpt2, embed, log=False, logits=False)
+        self.assertEqual(out.shape, (batch, seq, self.gpt2.config.vocab_size))
+        self.assertTrue(torch.allclose(out.sum(dim=-1), torch.ones(batch, seq).to(self.device)))
+
+    def test_embed_to_distrib_llama_logits(self):
+        batch, seq, _ = 2, 5, self.llama.config.hidden_size
+        embed = torch.randn(batch, seq, self.llama.config.hidden_size).to(self.device)
+        out = embed_to_distrib(self.llama, embed, logits=True)
+        self.assertEqual(out.shape, (batch, seq, self.llama.config.vocab_size))
+
+    def test_embed_to_distrib_llama_softmax(self):
+        batch, seq, _ = 2, 5, self.llama.config.hidden_size
+        embed = torch.randn(batch, seq, self.llama.config.hidden_size).to(self.device)
+        out = embed_to_distrib(self.llama, embed, log=False, logits=False)
+        self.assertEqual(out.shape, (batch, seq, self.llama.config.vocab_size))
+        self.assertTrue(torch.allclose(out.sum(dim=-1), torch.ones(batch, seq).to(self.device)))
+
+
+def suite():
+    s = unittest.TestSuite()
+    s.addTest(BasicUtilsTestCase("test_embed_to_distrib_gpt2_logits"))
+    s.addTest(BasicUtilsTestCase("test_embed_to_distrib_gpt2_softmax"))
+    s.addTest(BasicUtilsTestCase("test_embed_to_distrib_llama_logits"))
+    s.addTest(BasicUtilsTestCase("test_embed_to_distrib_llama_softmax"))
+    return s
+
+
+if __name__ == "__main__":
+    runner = unittest.TextTestRunner()
+    runner.run(suite())