TablewareBox
diff --git a/‎evals/completion_fns/gemini.py
+46-5 b/‎evals/completion_fns/gemini.py
+46-5
diff --git a/‎evals/completion_fns/uni_finder.py
+2-2 b/‎evals/completion_fns/uni_finder.py
+2-2
diff --git a/‎evals/elsuite/rag_reaction_extract.py
+122 b/‎evals/elsuite/rag_reaction_extract.py
+122
diff --git a/‎evals/elsuite/rag_table_extract.py
+2 b/‎evals/elsuite/rag_table_extract.py
+2
@@ -33,6 +33,42 @@ def get_completions(self) -> list[str]:
         return [self.response.strip()]
 
 
+def truncate_multimodal_prompt(prompt_list, max_images=16, max_size_bytes=4 * 1024 * 1024):
+    """
+    Truncates a list of texts and images to meet the constraints of maximum images and total size in bytes.
+
+    Parameters:
+    - prompt_list: List containing texts and images. Images are expected to be dictionaries with keys 'mime_type' and 'data'.
+    - max_images: Maximum number of images allowed.
+    - max_size_bytes: Maximum total size allowed in bytes.
+
+    Returns:
+    - A truncated list that fits the constraints.
+    """
+    truncated_list = []
+    total_size = 0
+    image_count = 0
+
+    for item in prompt_list:
+        if isinstance(item, str):  # It's text
+            item_size = len(item.encode('utf-8'))  # Size in bytes
+        elif isinstance(item, dict) and item.get('mime_type') and item.get('data'):  # It's an image
+            # The image data is a string representation of bytes; calculate its length accordingly.
+            item_size = len(item['data'])  # Approximation of size in bytes
+            image_count += 1
+        else:
+            continue  # Skip any item that doesn't fit expected structure
+
+        # Check if adding this item would exceed limits
+        if total_size + item_size > max_size_bytes or image_count > max_images:
+            break  # Stop adding items
+
+        total_size += item_size
+        truncated_list.append(item)
+
+    return truncated_list
+
+
 class GeminiCompletionFn(CompletionFn):
     def __init__(
         self,
@@ -71,7 +107,7 @@ def __call__(
         if "file_name" in kwargs:
             max_tokens = model_max_tokens.get(self.model, 1000000)
             attached_file_content = ["The file is as follows:"]
-            
+
             if self.model == "gemini-pro-vision":
                 attached_file_content += extract_text_and_fill_in_images(kwargs["file_name"], None, False)
                 content_types = [type(c) for c in attached_file_content]
@@ -81,11 +117,16 @@ def __call__(
                     attached_file_content = ["The file is as follows:"] + ["".join(extract_text(kwargs["file_name"]))]
             else:
                 attached_file_content += ["".join(extract_text(kwargs["file_name"]))]
+
+            contents = [openai_create_prompt] + attached_file_content
+
             if self.model == "gemini-pro":
-                while num_tokens_from_string(attached_file_content[1], "cl100k_base") > max_tokens:
-                    attached_file_content[1] = attached_file_content[1][:-1000]
+                while num_tokens_from_string(contents[2], "cl100k_base") > max_tokens:
+                    contents[2] = contents[2][:-1000]
+            elif self.model == "gemini-pro-vision":
+                contents = truncate_multimodal_prompt(contents, max_images=16, max_size_bytes=4 * 1024 * 1024)
         else:
-            attached_file_content = []
+            contents = [openai_create_prompt]
             self.model = "gemini-pro"
 
         genai.configure(api_key=np.random.choice(self.api_keys))
@@ -119,7 +160,7 @@ def __call__(
                                       safety_settings=safety_settings)
         # response = request_with_timeout(model.generate_content, contents=[openai_create_prompt] + attached_file_content)
         response = model.generate_content(
-            contents=[openai_create_prompt] + attached_file_content,
+            contents=contents,
         )
         # answer = response.text
 
 
@@ -66,7 +66,7 @@ def __call__(self, prompt: Union[str, list[dict]], **kwargs: Any) -> UniFinderCo
                 url = f"{self.api_base}/api/external/upload_pdf"
                 files = {'file': open(kwargs["file_name"], 'rb')}
                 data = {
-                    # 'pdf_parse_mode': self.pdf_parse_mode,
+                    'pdf_parse_mode': "fast",
                     'api_key': self.api_key,
                     'model_engine': 'gpt',
                 }
@@ -89,7 +89,7 @@ def __call__(self, prompt: Union[str, list[dict]], **kwargs: Any) -> UniFinderCo
             prompt = CompletionPrompt(prompt).to_formatted_prompt()
 
         payload = {
-            # "model_engine": self.model,
+            "model_engine": "gpt",
             "pdf_token": pdf_token,
             "query": prompt,
             'api_key': self.api_key,
 
@@ -0,0 +1,122 @@
+import json
+import os
+import re
+import traceback
+import uuid
+from pathlib import Path
+from typing import Optional
+
+import numpy as np
+from pydantic import BaseModel
+
+import evals.metrics
+from evals.api import CompletionFn
+from evals.elsuite.rag_match import get_rag_dataset
+from evals.elsuite.utils import ReactionDictMatching, ReactionDictMatchingSimple
+from evals.record import RecorderBase, record_match
+
+code_pattern = r"```[\s\S]*?\n([\s\S]+?)\n```"
+json_pattern = r"```json[\s\S]*?\n([\s\S]+?)\n```"
+csv_pattern = r"```csv[\s\S]*?\n([\s\S]+?)\n```"
+table_pattern = r"\n({index0}[\s\S]+)\n[`]*"
+outlink_pattern = r"\[Download[a-zA-Z0-9 ]+?\]\((https://[a-zA-Z0-9_. /]+?)\)"
+
+
+class FileSampleWithInput(BaseModel):
+    input: Optional[str]
+    file_name: Optional[str]
+    file_link: Optional[str]
+    answerfile_name: Optional[str]
+    answerfile_link: Optional[str]
+
+
+class ReactionExtract(evals.Eval):
+    def __init__(
+        self,
+        completion_fns: list[CompletionFn],
+        samples_jsonl: str,
+        *args,
+        instructions: Optional[str] = "",
+        **kwargs,
+    ):
+        super().__init__(completion_fns, *args, **kwargs)
+        assert len(completion_fns) < 3, "ReactionExtract only supports 3 completion fns"
+        self.samples_jsonl = samples_jsonl
+        self.instructions = instructions
+
+    def eval_sample(self, sample, rng):
+        assert isinstance(sample, dict)
+
+        input_formatted = sample["input"] if type(sample["input"]) == list else [{"role": "user", "content": sample["input"]}]
+        if self.instructions:
+            prompt = [{"role": "system", "content": self.instructions}] + input_formatted
+        else:
+            prompt = input_formatted
+
+        result = self.completion_fn(
+            prompt=prompt,
+            temperature=0.0,
+            file_name=sample["file_name"],
+            file_link=sample["file_link"]
+        )
+        sampled = result.get_completions()[0]
+        # correct_str = open(sample["answerfile_name"], 'r').read()
+        # correct_answer = json.loads(correct_str)
+        correct_answer = json.load(open(sample["answerfile_name"], 'r'))["inputs"]
+        correct_str = json.dumps(correct_answer, indent=4)
+
+        try:
+            if re.search(outlink_pattern, sampled) is not None:
+                code = re.search(outlink_pattern, sampled).group()
+                link = re.sub(outlink_pattern, r"\1", code)
+
+                fname = f"/tmp/LLMEvals_{uuid.uuid4()}.json"
+                os.system(f"wget {link} -O {fname}")
+                answer = json.load(open(fname, 'r'))
+            elif "json" in self.instructions:
+                code = re.search(json_pattern, sampled).group()
+                code_content = re.sub(json_pattern, r"\1", code)
+                code_content = code_content.replace("\"", '"')
+
+                # Delete comments
+                code_content = re.sub(r'//.*', '', code_content)
+                answer = json.loads(code_content)
+            else:
+                answer = {}
+            picked_str = json.dumps(answer, indent=4)
+            open(sample["answerfile_name"].replace(".json", "_out.json"), 'w').write(picked_str)
+        except:
+            print(Path(sample["file_name"]).stem)
+            traceback.print_exc()
+            record_match(
+                prompt=prompt,
+                correct=False,
+                expected=correct_str,
+                picked=sampled,
+                file_name=sample["file_name"],
+                jobtype="match_all"
+            )
+            picked_str = "Failed to parse"
+            answer = {}
+            return {"accuracy_leaves": 0}
+            
+        accuracy_leaves, df = ReactionDictMatchingSimple(correct_answer, answer, content="raw")
+        record_match(
+            prompt=prompt,
+            correct=(accuracy_leaves == 1.0),
+            expected=correct_str,
+            picked=picked_str,
+            file_name=sample["file_name"],
+            jobtype="match_all"
+        )
+        return {"accuracy_leaves": accuracy_leaves}
+
+    def run(self, recorder: RecorderBase):
+        samples = get_rag_dataset(self._prefix_registry_path(self.samples_jsonl).as_posix())
+        metrics_all_sample = self.eval_all_samples(recorder, samples)
+
+        metrics = {key: np.mean([sample_metrics[key] for sample_metrics in metrics_all_sample]) for key in metrics_all_sample[0].keys()}
+        # if "SMILES" in raw_samples[0]["compare_fields"]:
+        #     metrics["recall_SMILES"] = np.mean([sample_metrics["recall_SMILES"] for sample_metrics in metrics_all_sample
+        #                                         if "recall_SMILES" in sample_metrics])
+        return metrics
@@ -238,6 +238,8 @@ def eval_sample(self, sample, rng):
 
             if table.shape[0] != 0:
                 idxlist = table.columns
+                if type(sample.index) == str and table.columns.nlevels > 1:
+                    sample.index = tuple([sample.index] + ["" for _ in range(table.columns.nlevels - 1)])
                 if type(sample.index) in [str, tuple]:
                     if sample.index not in table.columns:
                         idxlist = [sample.index] + list(table.columns)[1:]