Refactor chat_adapter somewhat working

isaacbmiller · isaacbmiller · commit 37f8c04181b9 · 2024-11-15T02:31:00.000Z
diff --git a/dspy/adapters/chat_adapter.py b/dspy/adapters/chat_adapter.py
@@ -254,8 +254,7 @@ def parse_value(value, annotation):
     return TypeAdapter(annotation).validate_python(parsed_value)
 
 
-def format_turn(signature, values, role, incomplete=False): 
-    fields_to_collapse = []      
+def format_turn(signature, values, role, incomplete=False):
     """
     Constructs a new message ("turn") to append to a chat thread. The message is carefully formatted
     so that it can instruct an LLM to generate responses conforming to the specified DSPy signature.
@@ -271,90 +270,77 @@ def format_turn(signature, values, role, incomplete=False):
       A chat message that can be appended to a chat thread. The message contains two string fields:
       ``role`` ("user" or "assistant") and ``content`` (the message text).
     """
-    content = []
-
     if role == "user":
-        fields: Dict[str, FieldInfo] = signature.input_fields
-        if incomplete:
-            fields_to_collapse.append({"type": "text", "text": "This is an example of the task, though some input or output fields are not supplied."})
+        fields = signature.input_fields
+        message_prefix = "This is an example of the task, though some input or output fields are not supplied." if incomplete else ""
     else:
-        fields: Dict[str, FieldInfo] = signature.output_fields
-        # Add the built-in field indicating that the chat turn has been completed
-        fields[BuiltInCompletedOutputFieldInfo.name] = BuiltInCompletedOutputFieldInfo.info
+        # Add the completed field for the assistant turn
+        fields = {**signature.output_fields, BuiltInCompletedOutputFieldInfo.name: BuiltInCompletedOutputFieldInfo.info}
         values = {**values, BuiltInCompletedOutputFieldInfo.name: ""}
-    field_names: KeysView = fields.keys()
-    if not incomplete:
-        if not set(values).issuperset(set(field_names)):
-            raise ValueError(f"Expected {field_names} but got {values.keys()}")
-    
-    fields_to_collapse.extend(format_fields(
-        fields_with_values={
-            FieldInfoWithName(name=field_name, info=field_info): values.get(
-                field_name, "Not supplied for this particular example."
-            )
-            for field_name, field_info in fields.items()
-        },
-        assume_text=False
-    ))
+        message_prefix = ""
 
-    if role == "user":
-        output_fields = list(signature.output_fields.keys())
-        def type_info(v):
-            return f" (must be formatted as a valid Python {get_annotation_name(v.annotation)})" \
-                if v.annotation is not str else ""
-        if output_fields:
-            fields_to_collapse.append({
-                "type": "text",
-                "text":  "Respond with the corresponding output fields, starting with the field "
-                + ", then ".join(f"`[[ ## {f} ## ]]`{type_info(v)}" for f, v in signature.output_fields.items())
-                + ", and then ending with the marker for `[[ ## completed ## ]]`."
-            })
-        
-    # flatmap the list if any items are lists otherwise keep the item
-    flattened_list = list(chain.from_iterable(
-        item if isinstance(item, list) else [item] for item in fields_to_collapse
-    ))
-    final_list = []
-    while flattened_list:
-        item = flattened_list.pop(0)
-        image_tag_regex = r'"?<DSPY_IMAGE_START>(.*?)<DSPY_IMAGE_END>"?'
-        if re.search(image_tag_regex, item.get("text")):
-            image_tag = re.search(image_tag_regex, item.get("text")).group(1)
-            # get the prefix and suffix
-            prefix, suffix = item.get("text").split('"<DSPY_IMAGE_START>', 1)[0], "".join(item.get("text").split('<DSPY_IMAGE_END>"', 1)[1:])
-            final_list.append({"type": "text", "text": prefix})
-            final_list.append({"type": "image_url", "image_url": {"url": image_tag}})
-            flattened_list.insert(0, {"type": "text", "text": suffix})
-        else:
-            final_list.append({"type": "text", "text": item.get("text")})
-
-    if all(message.get("type", None) == "text" for message in final_list):
-        content = "\n\n".join(message.get("text") for message in final_list)
-        return {"role": role, "content": content}
-
-    # Collapse all consecutive text messages into a single message.
-    collapsed_messages = []
-    for item in final_list:
-        # First item is always added
-        if not collapsed_messages:
-            collapsed_messages.append(item)
-            continue
-        
-        # If current item is image, add to collapsed_messages
-        if item.get("type") == "image_url":
-            if collapsed_messages[-1].get("type") == "text":
-                collapsed_messages[-1]["text"] += "\n"
-            collapsed_messages.append(item)
-        # If previous item is text and current item is text, append to previous item
-        elif collapsed_messages[-1].get("type") == "text":
-            collapsed_messages[-1]["text"] += "\n\n" + item["text"]
-        # If previous item is not text(aka image), add current item as a new item
-        else:
-            item["text"] = "\n\n" + item["text"]
-            collapsed_messages.append(item)
+    if not incomplete and not set(values).issuperset(fields.keys()):
+        raise ValueError(f"Expected {fields.keys()} but got {values.keys()}")
+
+    messages = []
+    if message_prefix:
+        messages.append({"type": "text", "text": message_prefix})
 
-    return {"role": role, "content": collapsed_messages}
+    field_messages = format_fields(
+        {FieldInfoWithName(name=k, info=v): values.get(k, "Not supplied for this particular example.")
+         for k, v in fields.items()},
+        assume_text=False
+    )
+    messages.extend(field_messages)
+
+    # Add output field instructions for user messages
+    if role == "user" and signature.output_fields:
+        type_info = lambda v: f" (must be formatted as a valid Python {get_annotation_name(v.annotation)})" if v.annotation is not str else ""
+        field_instructions = "Respond with the corresponding output fields, starting with the field " + \
+            ", then ".join(f"`[[ ## {f} ## ]]`{type_info(v)}" for f, v in signature.output_fields.items()) + \
+            ", and then ending with the marker for `[[ ## completed ## ]]`."
+        messages.append({"type": "text", "text": field_instructions})
+
+    # Process messages to handle image tags and collapse text
+    processed_messages = process_messages(messages)
+    
+    if all(msg.get("type") == "text" for msg in processed_messages):
+        return {"role": role, "content": "\n\n".join(msg["text"] for msg in processed_messages)}
+    return {"role": role, "content": processed_messages}
+
+def process_messages(messages):
+    """Process messages to handle image tags and collapse consecutive text messages."""
+    processed = []
+    current_text = []
+    
+    for msg in flatten_messages(messages):
+        if msg["type"] == "text":
+            # Handle image tags in text
+            parts = re.split(r'(<DSPY_IMAGE_START>.*?<DSPY_IMAGE_END>)', msg["text"])
+            for part in parts:
+                if match := re.match(r'<DSPY_IMAGE_START>(.*?)<DSPY_IMAGE_END>', part):
+                    if current_text:
+                        processed.append({"type": "text", "text": "\n\n".join(current_text)})
+                        current_text = []
+                    processed.append({"type": "image_url", "image_url": {"url": match.group(1)}})
+                elif part.strip():
+                    current_text.append(part)
+        else:
+            if current_text:
+                processed.append({"type": "text", "text": "\n\n".join(current_text)})
+                current_text = []
+            processed.append(msg)
+    
+    if current_text:
+        processed.append({"type": "text", "text": "\n\n".join(current_text)})
+    
+    return processed
 
+def flatten_messages(messages):
+    """Flatten nested message lists."""
+    return list(chain.from_iterable(
+        item if isinstance(item, list) else [item] for item in messages
+    ))
 
 def get_annotation_name(annotation):
     origin = get_origin(annotation)
diff --git a/dspy/predict/predict.py b/dspy/predict/predict.py
@@ -47,11 +47,7 @@ def dump_state(self, save_verbose=None):
 
             for field in demo:
                 # FIXME: Saving BaseModels as strings in examples doesn't matter because you never re-access as an object
-                # It does matter for images
-                if isinstance(demo[field], Image):
-                    demo[field] = demo[field].model_dump()
-                elif isinstance(demo[field], BaseModel):
-                    demo[field] = demo[field].model_dump_json()
+                demo[field] = serialize_object(demo[field])
 
             state["demos"].append(demo)
 
@@ -296,6 +292,26 @@ def v2_5_generate(lm, lm_kwargs, signature, demos, inputs, _parse_values=True):
         lm, lm_kwargs=lm_kwargs, signature=signature, demos=demos, inputs=inputs, _parse_values=_parse_values
     )
 
+def serialize_object(obj):
+    """
+    Recursively serialize a given object into a JSON-compatible format.
+    Supports Pydantic models, lists, dicts, and primitive types.
+    """
+    if isinstance(obj, BaseModel):
+        # Use model_dump to convert the model into a JSON-serializable dict
+        return obj.model_dump_json()
+    elif isinstance(obj, list):
+        # Recursively process each item in the list
+        return [serialize_object(item) for item in obj]
+    elif isinstance(obj, tuple):
+        return tuple(serialize_object(item) for item in obj)
+    elif isinstance(obj, dict):
+        # Recursively process each key-value pair in the dict
+        return {key: serialize_object(value) for key, value in obj.items()}
+    else:
+        # Assume the object is already JSON-compatible (e.g., int, str, float)
+        return obj
+
 # TODO: get some defaults during init from the context window?
 # # TODO: FIXME: Hmm, I guess expected behavior is that contexts can
 # affect execution. Well, we need to determine whether context dominates, __init__ demoninates, or forward dominates.
diff --git a/tests/signatures/test_adapter_image.py b/tests/signatures/test_adapter_image.py
@@ -246,17 +246,38 @@ def test_predictor_save_load(sample_url, sample_pil_image):
     print(result)
     assert messages_contain_image_url_pattern(lm.history[-1]["messages"])
     print(lm.history[-1]["messages"])
-    assert False
+    assert "<DSPY_IMAGE_START>" not in str(lm.history[-1]["messages"])
 
 def test_save_load_complex_types():
-    pass
-    # class ComplexTypeSignature(dspy.Signature):
-    #     image_list: List[dspy.Image] = dspy.InputField(desc="A list of images")
-    #     caption: str = dspy.OutputField(desc="A caption for the image list")
+    examples = [
+        dspy.Example(image_list=[dspy.Image.from_url("https://example.com/dog.jpg"), dspy.Image.from_url("https://example.com/cat.jpg")], caption="Example 1").with_inputs("image_list"),
+    ]
+
+    class ComplexTypeSignature(dspy.Signature):
+        image_list: List[dspy.Image] = dspy.InputField(desc="A list of images")
+        caption: str = dspy.OutputField(desc="A caption for the image list")
+
+    lm = DummyLM([{"caption": "A list of images"}, {"caption": "A list of images"}])
+    dspy.settings.configure(lm=lm)
 
-    # lm = DummyLM([{"caption": "A list of images"}])
-    # dspy.settings.configure(lm=lm)
+    predictor = dspy.Predict(ComplexTypeSignature)
+    result = predictor(**examples[0].inputs())
+    
+    print(lm.history[-1]["messages"])
+    assert "<DSPY_IMAGE_START>" not in str(lm.history[-1]["messages"])
+    assert str(lm.history[-1]["messages"]).count("'url'") == 2
 
-    # predictor = dspy.Predict(ComplexTypeSignature)
-    # result = predictor(image_list=[dspy.Image.from_url("https://example.com/dog.jpg")])
-    # assert isinstance(result.caption, str)
+    optimizer = dspy.teleprompt.LabeledFewShot(k=1)
+    compiled_predictor = optimizer.compile(student=predictor, trainset=examples, sample=False)
+    print(compiled_predictor.demos)
+
+    with tempfile.NamedTemporaryFile(mode='w+', delete=True) as temp_file:
+        print("compiled_predictor state: ", compiled_predictor.dump_state())
+        compiled_predictor.save(temp_file.name)
+        loaded_predictor = dspy.Predict(ComplexTypeSignature)
+        loaded_predictor.load(temp_file.name)
+    
+    print("loaded_predictor state: ", loaded_predictor.dump_state())
+    result = loaded_predictor(**examples[0].inputs())
+    assert result.caption == "A list of images"
+    assert str(lm.history[-1]["messages"]).count("'url'") == 4