diff --git a/create_input_files.py b/create_input_files.py index 34abc406e..42b00c849 100644 --- a/create_input_files.py +++ b/create_input_files.py @@ -3,9 +3,9 @@ if __name__ == '__main__': # Create input files (along with word map) create_input_files(dataset='coco', - karpathy_json_path='../caption data/dataset_coco.json', - image_folder='/media/ssd/caption data/', + karpathy_json_path='./data_sets/dataset_coco.json', + image_folder='./data_sets/training/', captions_per_image=5, min_word_freq=5, - output_folder='/media/ssd/caption data/', + output_folder='./data_sets/output/', max_len=50) diff --git a/utils.py b/utils.py index 121242d0d..a7ebf99fb 100644 --- a/utils.py +++ b/utils.py @@ -3,7 +3,10 @@ import h5py import json import torch -from scipy.misc import imread, imresize +import numpy as np +# from scipy.misc import imread, imresize +import imageio +from PIL import Image from tqdm import tqdm from collections import Counter from random import seed, choice, sample @@ -110,13 +113,21 @@ def create_input_files(dataset, karpathy_json_path, image_folder, captions_per_i # Sanity check assert len(captions) == captions_per_image - + + # READ image + # print("Processing: %s" % (impaths[i])) + # Read images - img = imread(impaths[i]) + img = imageio.imread(impaths[i]) + # img = np.array(Image.open(impaths[i])) + if len(img.shape) == 2: img = img[:, :, np.newaxis] img = np.concatenate([img, img, img], axis=2) - img = imresize(img, (256, 256)) + + # img = img.resize(img, (256, 256)) + newsize = (256, 256) + img = np.array(Image.fromarray(img).resize(newsize)) img = img.transpose(2, 0, 1) assert img.shape == (3, 256, 256) assert np.max(img) <= 255