Skip to content

Commit d2bc093

Browse files
authored
updated vqa examples (lm-sys#3378)
1 parent 9a9a6ef commit d2bc093

File tree

1 file changed

+32
-11
lines changed

1 file changed

+32
-11
lines changed

fastchat/serve/vision/create_vqa_examples_dir.py

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212

1313
np.random.seed(0)
1414

15+
"""
16+
Creates a directory with images and JSON files for VQA examples. Final json is located in metadata_sampled.json
17+
"""
18+
1519

1620
def download_images_and_create_json(
1721
dataset_info, cache_dir="~/vqa_examples_cache", base_dir="./vqa_examples"
@@ -22,32 +26,31 @@ def download_images_and_create_json(
2226

2327
if info["subset"]:
2428
dataset = load_dataset(
25-
info["path"], info["subset"], cache_dir=dataset_cache_dir, split="test"
29+
info["path"],
30+
info["subset"],
31+
cache_dir=dataset_cache_dir,
32+
split=info["split"],
2633
)
2734
else:
2835
dataset = load_dataset(
29-
info["path"], cache_dir=dataset_cache_dir, split="test"
36+
info["path"], cache_dir=dataset_cache_dir, split=info["split"]
3037
)
3138
dataset_dir = os.path.join(base_dir, dataset_name)
3239
os.makedirs(dataset_dir, exist_ok=True)
3340

3441
json_data = []
35-
# add tqdm to show progress bar
3642
for i, item in enumerate(tqdm.tqdm(dataset)):
3743
id_key = i if info["id_key"] == "index" else item[info["id_key"]]
3844
image_pil = item[info["image_key"]].convert("RGB")
3945
image_path = os.path.join(dataset_dir, f"{id_key}.jpg")
40-
# save the image
4146
image_pil.save(image_path)
42-
# Append data to JSON list
4347
json_entry = {
4448
"dataset": dataset_name,
4549
"question": item[info["question_key"]],
4650
"path": image_path,
4751
}
4852
json_data.append(json_entry)
4953

50-
# Save the JSON data to a file
5154
with open(os.path.join(dataset_dir, "data.json"), "w") as json_file:
5255
json.dump(json_data, json_file, indent=4)
5356
# Delete the cache directory for the dataset
@@ -60,35 +63,54 @@ def download_images_and_create_json(
6063
parser.add_argument("--output_dir", type=str, default="./vqa_examples")
6164
args = parser.parse_args()
6265

63-
# Define the dataset information
6466
datasets_info = {
6567
"DocVQA": {
6668
"path": "lmms-lab/DocVQA",
6769
"image_key": "image",
6870
"question_key": "question",
6971
"id_key": "questionId",
7072
"subset": "DocVQA",
73+
"split": "test",
7174
},
7275
"ChartQA": {
7376
"path": "HuggingFaceM4/ChartQA",
7477
"image_key": "image",
7578
"question_key": "query",
7679
"id_key": "index",
7780
"subset": False,
81+
"split": "test",
7882
},
7983
"realworldqa": {
8084
"path": "visheratin/realworldqa",
8185
"image_key": "image",
8286
"question_key": "question",
8387
"id_key": "index",
8488
"subset": False,
89+
"split": "test",
90+
},
91+
"NewYorker": {
92+
"path": "jmhessel/newyorker_caption_contest",
93+
"image_key": "image",
94+
"question_key": "questions",
95+
"id_key": "index",
96+
"subset": "explanation",
97+
"split": "train",
98+
},
99+
"WikiArt": {
100+
"path": "huggan/wikiart",
101+
"image_key": "image",
102+
"question_key": "artist",
103+
"id_key": "index",
104+
"subset": False,
105+
"split": "train",
85106
},
86-
"VizWiz-VQA": {
87-
"path": "lmms-lab/VizWiz-VQA",
107+
"TextVQA": {
108+
"path": "facebook/textvqa",
88109
"image_key": "image",
89110
"question_key": "question",
90111
"id_key": "question_id",
91112
"subset": False,
113+
"split": "train",
92114
},
93115
}
94116

@@ -99,8 +121,7 @@ def download_images_and_create_json(
99121
for dataset_name in datasets_info.keys():
100122
with open(f"{args.output_dir}/{dataset_name}/data.json") as f:
101123
data = json.load(f)
102-
dataset_json.extend(np.random.choice(data, 765))
124+
dataset_json.extend(np.random.choice(data, 500))
103125

104-
# save dataset_json to ../vqa_examples/metadata.json
105126
with open(f"{args.output_dir}/metadata_sampled.json", "w") as f:
106127
json.dump(dataset_json, f, indent=4)

0 commit comments

Comments
 (0)