12
12
13
13
np .random .seed (0 )
14
14
15
+ """
16
+ Creates a directory with images and JSON files for VQA examples. Final json is located in metadata_sampled.json
17
+ """
18
+
15
19
16
20
def download_images_and_create_json (
17
21
dataset_info , cache_dir = "~/vqa_examples_cache" , base_dir = "./vqa_examples"
@@ -22,32 +26,31 @@ def download_images_and_create_json(
22
26
23
27
if info ["subset" ]:
24
28
dataset = load_dataset (
25
- info ["path" ], info ["subset" ], cache_dir = dataset_cache_dir , split = "test"
29
+ info ["path" ],
30
+ info ["subset" ],
31
+ cache_dir = dataset_cache_dir ,
32
+ split = info ["split" ],
26
33
)
27
34
else :
28
35
dataset = load_dataset (
29
- info ["path" ], cache_dir = dataset_cache_dir , split = "test"
36
+ info ["path" ], cache_dir = dataset_cache_dir , split = info [ "split" ]
30
37
)
31
38
dataset_dir = os .path .join (base_dir , dataset_name )
32
39
os .makedirs (dataset_dir , exist_ok = True )
33
40
34
41
json_data = []
35
- # add tqdm to show progress bar
36
42
for i , item in enumerate (tqdm .tqdm (dataset )):
37
43
id_key = i if info ["id_key" ] == "index" else item [info ["id_key" ]]
38
44
image_pil = item [info ["image_key" ]].convert ("RGB" )
39
45
image_path = os .path .join (dataset_dir , f"{ id_key } .jpg" )
40
- # save the image
41
46
image_pil .save (image_path )
42
- # Append data to JSON list
43
47
json_entry = {
44
48
"dataset" : dataset_name ,
45
49
"question" : item [info ["question_key" ]],
46
50
"path" : image_path ,
47
51
}
48
52
json_data .append (json_entry )
49
53
50
- # Save the JSON data to a file
51
54
with open (os .path .join (dataset_dir , "data.json" ), "w" ) as json_file :
52
55
json .dump (json_data , json_file , indent = 4 )
53
56
# Delete the cache directory for the dataset
@@ -60,35 +63,54 @@ def download_images_and_create_json(
60
63
parser .add_argument ("--output_dir" , type = str , default = "./vqa_examples" )
61
64
args = parser .parse_args ()
62
65
63
- # Define the dataset information
64
66
datasets_info = {
65
67
"DocVQA" : {
66
68
"path" : "lmms-lab/DocVQA" ,
67
69
"image_key" : "image" ,
68
70
"question_key" : "question" ,
69
71
"id_key" : "questionId" ,
70
72
"subset" : "DocVQA" ,
73
+ "split" : "test" ,
71
74
},
72
75
"ChartQA" : {
73
76
"path" : "HuggingFaceM4/ChartQA" ,
74
77
"image_key" : "image" ,
75
78
"question_key" : "query" ,
76
79
"id_key" : "index" ,
77
80
"subset" : False ,
81
+ "split" : "test" ,
78
82
},
79
83
"realworldqa" : {
80
84
"path" : "visheratin/realworldqa" ,
81
85
"image_key" : "image" ,
82
86
"question_key" : "question" ,
83
87
"id_key" : "index" ,
84
88
"subset" : False ,
89
+ "split" : "test" ,
90
+ },
91
+ "NewYorker" : {
92
+ "path" : "jmhessel/newyorker_caption_contest" ,
93
+ "image_key" : "image" ,
94
+ "question_key" : "questions" ,
95
+ "id_key" : "index" ,
96
+ "subset" : "explanation" ,
97
+ "split" : "train" ,
98
+ },
99
+ "WikiArt" : {
100
+ "path" : "huggan/wikiart" ,
101
+ "image_key" : "image" ,
102
+ "question_key" : "artist" ,
103
+ "id_key" : "index" ,
104
+ "subset" : False ,
105
+ "split" : "train" ,
85
106
},
86
- "VizWiz-VQA " : {
87
- "path" : "lmms-lab/VizWiz-VQA " ,
107
+ "TextVQA " : {
108
+ "path" : "facebook/textvqa " ,
88
109
"image_key" : "image" ,
89
110
"question_key" : "question" ,
90
111
"id_key" : "question_id" ,
91
112
"subset" : False ,
113
+ "split" : "train" ,
92
114
},
93
115
}
94
116
@@ -99,8 +121,7 @@ def download_images_and_create_json(
99
121
for dataset_name in datasets_info .keys ():
100
122
with open (f"{ args .output_dir } /{ dataset_name } /data.json" ) as f :
101
123
data = json .load (f )
102
- dataset_json .extend (np .random .choice (data , 765 ))
124
+ dataset_json .extend (np .random .choice (data , 500 ))
103
125
104
- # save dataset_json to ../vqa_examples/metadata.json
105
126
with open (f"{ args .output_dir } /metadata_sampled.json" , "w" ) as f :
106
127
json .dump (dataset_json , f , indent = 4 )
0 commit comments