LLaMA 2 Chat Formatter List Message Content Fix (#1272)

vmpuri · Jack-Khuu · vmpuri · web-flow · commit 273902396751 · 2024-10-07T09:47:21.000-07:00
* initial test

* Pad casual mask with zeroes and set decoder max_seq_len to the max sequence length so their shapes are both set to the max_seq_len

* Fix control bug for image inputs

* Clear image input after submitting a chat

* Include empty assistant message for chat

* Pipe image input from CLI

* List-type message content parsing in LLaMA 2 Chat Formatter

---------

Co-authored-by: Jack-Khuu &lt;jack.khuu.7@gmail.com&gt;
Co-authored-by: vmpuri &lt;puri@meta.com&gt;
diff --git a/torchchat/generate.py b/torchchat/generate.py
@@ -103,7 +103,11 @@ def encode_dialog_prompt(self, dialog) -> List[int]:
         tokens = self.tokenizer.encode(f"{B_INST} ")
         first_message = True  # Bool to handle placing the B_INST token. Behavior is weird - the system prompt should have the B_INST, but not the first user message. All following user messages *should* have it. Also, if there is no system prompt, then the user message should have it.
         for message in dialog:
-            content = message["content"].strip()
+            if isinstance(message["content"], list):
+                content = message["content"][0]["text"]
+            else:
+                content = message["content"]
+            content = content.strip()
             if message["role"] == "system":
                 encoded = self.tokenizer.encode(f"{B_SYS}\n{content}\n{E_SYS}")
                 first_message = False
diff --git a/torchchat/usages/openai_api.py b/torchchat/usages/openai_api.py
@@ -376,7 +376,7 @@ def chunked_completion(self, completion_request: CompletionRequest):
             encoded_prompt=encoded,
             temperature=float(completion_request.temperature),
             chat_mode=False,
-            sequential_prefill=False,
+            sequential_prefill=True,
         )
 
         def callback(x, *, done_generating=False):

Original file line number	Diff line number	Diff line change
`@@ -376,7 +376,7 @@ def chunked_completion(self, completion_request: CompletionRequest):`
`376`	`376`	`encoded_prompt=encoded,`
`377`	`377`	`temperature=float(completion_request.temperature),`
`378`	`378`	`chat_mode=False,`
`379`		`- sequential_prefill=False,`
	`379`	`+ sequential_prefill=True,`
`380`	`380`	`)`
`381`	`381`
`382`	`382`	`def callback(x, *, done_generating=False):`