updated vqa examples (lm-sys#3378)

lisadunlap · web-flow · commit d2bc0933c273 · 2024-06-06T10:07:18.000-07:00
diff --git a/fastchat/serve/vision/create_vqa_examples_dir.py b/fastchat/serve/vision/create_vqa_examples_dir.py
@@ -12,6 +12,10 @@
 
 np.random.seed(0)
 
+"""
+Creates a directory with images and JSON files for VQA examples. Final json is located in metadata_sampled.json
+"""
+
 
 def download_images_and_create_json(
     dataset_info, cache_dir="~/vqa_examples_cache", base_dir="./vqa_examples"
@@ -22,32 +26,31 @@ def download_images_and_create_json(
 
         if info["subset"]:
             dataset = load_dataset(
-                info["path"], info["subset"], cache_dir=dataset_cache_dir, split="test"
+                info["path"],
+                info["subset"],
+                cache_dir=dataset_cache_dir,
+                split=info["split"],
             )
         else:
             dataset = load_dataset(
-                info["path"], cache_dir=dataset_cache_dir, split="test"
+                info["path"], cache_dir=dataset_cache_dir, split=info["split"]
             )
         dataset_dir = os.path.join(base_dir, dataset_name)
         os.makedirs(dataset_dir, exist_ok=True)
 
         json_data = []
-        # add tqdm to show progress bar
         for i, item in enumerate(tqdm.tqdm(dataset)):
             id_key = i if info["id_key"] == "index" else item[info["id_key"]]
             image_pil = item[info["image_key"]].convert("RGB")
             image_path = os.path.join(dataset_dir, f"{id_key}.jpg")
-            # save the image
             image_pil.save(image_path)
-            # Append data to JSON list
             json_entry = {
                 "dataset": dataset_name,
                 "question": item[info["question_key"]],
                 "path": image_path,
             }
             json_data.append(json_entry)
 
-        # Save the JSON data to a file
         with open(os.path.join(dataset_dir, "data.json"), "w") as json_file:
             json.dump(json_data, json_file, indent=4)
         # Delete the cache directory for the dataset
@@ -60,35 +63,54 @@ def download_images_and_create_json(
     parser.add_argument("--output_dir", type=str, default="./vqa_examples")
     args = parser.parse_args()
 
-    # Define the dataset information
     datasets_info = {
         "DocVQA": {
             "path": "lmms-lab/DocVQA",
             "image_key": "image",
             "question_key": "question",
             "id_key": "questionId",
             "subset": "DocVQA",
+            "split": "test",
         },
         "ChartQA": {
             "path": "HuggingFaceM4/ChartQA",
             "image_key": "image",
             "question_key": "query",
             "id_key": "index",
             "subset": False,
+            "split": "test",
         },
         "realworldqa": {
             "path": "visheratin/realworldqa",
             "image_key": "image",
             "question_key": "question",
             "id_key": "index",
             "subset": False,
+            "split": "test",
+        },
+        "NewYorker": {
+            "path": "jmhessel/newyorker_caption_contest",
+            "image_key": "image",
+            "question_key": "questions",
+            "id_key": "index",
+            "subset": "explanation",
+            "split": "train",
+        },
+        "WikiArt": {
+            "path": "huggan/wikiart",
+            "image_key": "image",
+            "question_key": "artist",
+            "id_key": "index",
+            "subset": False,
+            "split": "train",
         },
-        "VizWiz-VQA": {
-            "path": "lmms-lab/VizWiz-VQA",
+        "TextVQA": {
+            "path": "facebook/textvqa",
             "image_key": "image",
             "question_key": "question",
             "id_key": "question_id",
             "subset": False,
+            "split": "train",
         },
     }
 
@@ -99,8 +121,7 @@ def download_images_and_create_json(
     for dataset_name in datasets_info.keys():
         with open(f"{args.output_dir}/{dataset_name}/data.json") as f:
             data = json.load(f)
-            dataset_json.extend(np.random.choice(data, 765))
+            dataset_json.extend(np.random.choice(data, 500))
 
-    # save dataset_json to ../vqa_examples/metadata.json
     with open(f"{args.output_dir}/metadata_sampled.json", "w") as f:
         json.dump(dataset_json, f, indent=4)