-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataset.py
471 lines (388 loc) · 18.2 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms
import pandas as pd
import numpy as np
from PIL import Image
import re
import os
import utils
from utils import gaussian_kernel, gaussian_blur
from torchvision.transforms import functional as tvF
from clickme import process_clickmaps, make_heatmap
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
import random
# Define data transformations
data_transforms = {
'aug': transforms.Compose([
transforms.RandomResizedCrop(size=224, scale=(0.9, 1.0)),
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomVerticalFlip(p=0.5),
transforms.RandomRotation(degrees=(-15, 15)),
]),
'norm': transforms.Compose([
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
BRUSH_SIZE = 11
BRUSH_SIZE_SIGMA = np.sqrt(BRUSH_SIZE)
class ClickMe(Dataset):
"""
A custom Dataset class for the ClickMe dataset.
This dataset loads image, heatmap, and label data from file paths,
and applies optional data augmentation during training.
Attributes:
file_paths (list): List of file paths to the data.
is_training (bool): Flag to indicate if the dataset is for training.
"""
def __init__(self, image_folder, label_to_category_map, is_training=True):
"""
Initialize the ClickMe dataset.
Args:
file_paths (list): List of file paths to the data.
is_training (bool, optional): Flag to indicate if the dataset is for training. Defaults to True.
"""
super().__init__()
self.image_folder = image_folder
self.label_to_category_map = label_to_category_map
self.is_training = is_training
self.data = []
self.data_dictionary = {}
if is_training:
image_path = "data/CO3D_ClickMe_Training/"
co3d_clickme = pd.read_csv("data/CO3D_ClickMe_Training.csv")
output_dir = "assets"
image_output_dir = "clickme_test_images"
img_heatmaps = {}
image_shape = [256, 256]
thresh = 50
exponential_decay = False
plot_images = False
category_index = 0
os.makedirs(image_output_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)
# TODO: Change recursive "projects"
text_file = "/cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/CausalVisionModeling/data_lists/filtered_binocular_renders_test.txt"
root_dir = "/cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/PeRFception/data/co3d_v2/"
# text_file = "/media/data_cifs/projects/prj_video_imagenet/CausalVisionModeling/data_lists/filtered_binocular_renders_test.txt"
# root_dir = "/media/data_cifs/projects/projects/prj_video_imagenet/PeRFception/data/co3d_v2/"
with open(text_file, 'r') as file:
lines = file.readlines()
for line in lines:
parts = line.split('/')
label = parts[1]
path = '/'.join(parts[0:3]) + '/' + parts[3]
files = parts[4].split()
# Create label to category and category to label dictionaries
if label not in self.label_to_category_map.keys():
self.label_to_category_map[label] = category_index
category_index += 1
for i in range(0,50, 9):
full_path = os.path.join(root_dir, path, files[i].strip())
# print(f"2 full_path: {full_path}")
image_name = "_".join(full_path.split("/")[-4:])
label = full_path.split("/")[-4]
# print(f"2 image_name: {image_name}")
# print("ImageName: ", image_name, list(self.data_dictionary.keys())[0])
self.data.append({
'image': full_path,
'heatmap': torch.from_numpy(np.zeros((256, 256))).float(),
'category_label': self.label_to_category_map[label],
'has_heatmap': False,
})
print("Done processing training images WITHOUT ClickMaps.")
# if image_name in self.data_dictionary.keys():
# self.data.append({
# 'image': self.data_dictionary[image_name]['image'],
# 'heatmap': self.data_dictionary[image_name]['heatmap'],
# 'category_label': self.data_dictionary[image_name]['category_label']
# })
processed_maps, num_maps = process_clickmaps(co3d_clickme, is_training=is_training)
gaussian_kernel = utils.gaussian_kernel(size=BRUSH_SIZE, sigma=BRUSH_SIZE_SIGMA)
heatmap_count = 0
for idx, (image, maps) in enumerate(processed_maps.items()):
full_path = os.path.join(image_path, image)
image_name, image, heatmap = make_heatmap(os.path.join(image_path, image), maps, gaussian_kernel, image_shape=image_shape, exponential_decay=exponential_decay)
# print(f"image_name: {image_name}")
label = image_name.split("/")[2]
if image_name is None:
continue
# img_heatmaps[image_name] = {"image":image, "heatmap":heatmap}
image_name = "_".join(image_name.split("/")[-2:])
self.data.append({"image":full_path, "heatmap":heatmap, "category_label":self.label_to_category_map[label], "has_heatmap":True})
# print("1 image_name: ", image_name)
heatmap_count += 1
print("Done processing training images WITH ClickMaps. There are ", heatmap_count, "heatmaps.")
# else:
else:
image_path = "data/CO3D_ClickMe_Validation/"
co3d_clickme = pd.read_csv("data/CO3D_ClickMe_Validation.csv")
output_dir = "assets"
image_output_dir = "clickme_test_images"
img_heatmaps = {}
image_shape = [256, 256]
thresh = 50
exponential_decay = False
plot_images = False
category_index = 0
os.makedirs(image_output_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)
processed_maps, num_maps = process_clickmaps(co3d_clickme, is_training=is_training)
gaussian_kernel = utils.gaussian_kernel(size=BRUSH_SIZE, sigma=BRUSH_SIZE_SIGMA)
for idx, (image, maps) in enumerate(processed_maps.items()):
# print(f"Vimage: {image}")
full_path = os.path.join(image_path, image)
image_name, image, heatmap = make_heatmap(os.path.join(image_path, image), maps, gaussian_kernel, image_shape=image_shape, exponential_decay=exponential_decay)
# print(f"image_name: {image_name}")
label = image_name.split("/")[2]
# Create label to category and category to label dictionaries
if label not in self.label_to_category_map.keys():
self.label_to_category_map[label] = category_index
category_index += 1
if image_name is None:
continue
# img_heatmaps[image_name] = {"image":image, "heatmap":heatmap}
image_name = "_".join(image_name.split("/")[-2:])
self.data.append({"image": full_path, "heatmap":heatmap, "category_label":self.label_to_category_map[label], "has_heatmap":True})
def __getitem__(self, index):
"""
Get an item from the dataset.
Args:
index (int): Index of the item to retrieve.
Returns:
tuple: A tuple containing the processed image, heatmap, and label.
"""
center_crop=[224, 224]
img, hmp, label = self.data[index]['image'], self.data[index]['heatmap'], self.data[index]['category_label']
# print("Data Label:", self.data[index]['category_label'])
# print("GI img: ", img)
image_name = img#.split("/")[2]
img = Image.open(img)
img = tvF.center_crop(img, center_crop)
# hmp = Image.open(hmp)
# print(f"img: {img.size} hmp: {hmp.shape} label: {label}")
img = self._preprocess_image(img)
# if not len(hmp) == 0:
# hmp = Image.fromarray(hmp)
hmp = tvF.center_crop(hmp, center_crop)
hmp = self._preprocess_heatmap(hmp)
label = self._preprocess_label(label)
if self.is_training:
img, hmp = self._apply_augmentation(img, hmp)
img = data_transforms['norm'](img) # Apply ImageNet mean and std
return img, hmp, label, image_name
def __len__(self):
"""
Get the length of the dataset.
Returns:
int: The number of items in the dataset.
"""
return len(self.data)
def _preprocess_image(self, img):
"""
Preprocess the image tensor.
Args:
img (torch.Tensor): The input image tensor.
Returns:
torch.Tensor: The preprocessed image tensor.
"""
# Convert PIL Image to numpy array
img_array = np.array(img)
# Convert numpy array to torch tensor
img_tensor = torch.from_numpy(img_array)
# Rearrange dimensions to [C, H, W] format
img_tensor = img_tensor.permute(2, 0, 1)
return img_tensor.to(torch.float32) / 255.0 # Normalize to [0, 1]
def _preprocess_heatmap(self, hmp):
"""
Preprocess the heatmap tensor.
Args:
hmp (torch.Tensor): The input heatmap tensor.
Returns:
torch.Tensor: The preprocessed heatmap tensor.
"""
hmp = torch.tensor(hmp)
return hmp.to(torch.float32) / 255.0
def _preprocess_label(self, label):
"""
Preprocess the label tensor.
Args:
label (torch.Tensor): The input label tensor.
Returns:
torch.Tensor: The preprocessed label tensor.
"""
label = torch.tensor(label)
return torch.squeeze(label.to(torch.int64))
def _apply_augmentation(self, img, hmp):
"""
Apply data augmentation to the image and heatmap.
Args:
img (torch.Tensor): The input image tensor.
hmp (torch.Tensor): The input heatmap tensor.
Returns:
tuple: A tuple containing the augmented image and heatmap tensors.
"""
# Add an extra dimension to hmp to make it 3D
# if has_heatmap:
hmp = hmp.unsqueeze(0)
stacked_img = torch.cat((img, hmp), dim=0)
stacked_img = data_transforms['aug'](stacked_img)
return stacked_img[:-1, :, :], stacked_img[-1, :, :] # Remove the extra dimension from hmp
# else:
# return data_transforms['aug'](img), np.array([])
def build_co3d_eval_loader(args, transform=None, return_all=False, label_to_index_map=None):
cifs = "/cifs/data/tserre_lrs/projects/prj_video_imagenet/"
if not os.path.exists(cifs):
cifs = "/cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/"
TRAIN_LIST_PATH = 'data_lists/filtered_binocular_renders_train.txt'
DATA_ROOT = os.path.join(cifs, 'PeRFception/data/co3d_v2/binocular_trajectory/')
DATA_PATH = os.path.join(cifs, 'Evaluation/')
TEST_DATA_PATH = './assets/co3d_clickmaps_normalized.npy'
TEST_HUMAN_RESULTS = './assets/human_ceiling_results.npz'
label_to_index_map = label_to_index_map
# transform_train = transforms.Compose([
# transforms.RandomResizedCrop(224, interpolation=3),
# transforms.RandomHorizontalFlip(),
# transforms.ToTensor(),
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
if transform is None:
transform = transforms.Compose([
transforms.Resize(256, interpolation=3),
transforms.CenterCrop(224),
transforms.ToTensor(),
# transforms.Normalize(mean=args.mean, std=args.std)])
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
co3d_dataset_test = Co3dTestDataset(numpy_file=TEST_DATA_PATH, human_results_file=TEST_HUMAN_RESULTS, label_to_index=label_to_index_map, transform=transform)
co3d_dataloader_test = DataLoader(co3d_dataset_test, batch_size=1, num_workers=args.num_workers, shuffle=False)
if return_all:
co3d_dataset_train = Co3dLpDataset(root=DATA_ROOT,
train = True,
transform=transform,
datapath = os.path.join(DATA_PATH,"filtered_co3d_train.txt"),
train_start_frame=0,
train_end_frame=40,
val_start_frame=41,
val_end_frame=49)
co3d_dataset_val = Co3dLpDataset(root=DATA_ROOT,
train = False,
transform=transform,
datapath = os.path.join(DATA_PATH, "filtered_co3d_test.txt"),
train_start_frame=0,
train_end_frame=40,
val_start_frame=41,
val_end_frame=49)
co3d_dataloader_train = DataLoader(co3d_dataset_train, batch_size=256, shuffle=False, num_workers=args.num_workers)
co3d_dataloader_val = DataLoader(co3d_dataset_val, batch_size=256, shuffle=False, num_workers=args.num_workers)
return co3d_dataloader_train, co3d_dataloader_val, co3d_dataloader_test
return co3d_dataloader_test
from sklearn import preprocessing
class Co3dLpDataset(torch.utils.data.Dataset):
def __init__(self,
root,
train=True,
datapath="",
transform=None,
lazy_init=False,
reverse_sequence=False,
train_start_frame = 0,
train_end_frame = 39,
val_start_frame = 40,
val_end_frame = 49):
super(Co3dLpDataset, self).__init__()
self.root = root
self.train = train
self.datapath = datapath,
self.transform = transform
self.lazy_init = lazy_init
self.reverse_sequence = reverse_sequence
self.train_start_frame = train_start_frame
self.train_end_frame = train_end_frame
self.val_start_frame = val_start_frame
self.val_end_frame = val_end_frame
if not self.lazy_init:
self.clips = self.make_dataset_samples()
if len(self.clips) == 0:
raise(RuntimeError("Found 0 video clips in subfolders of: " + root + "\n"
"Check your data directory (opt.data-dir)."))
# Preprocessing the label
all_classes = os.listdir(self.root)
self.label_preprocessing = preprocessing.LabelEncoder()
self.label_preprocessing.fit(np.array(all_classes))
def __getitem__(self, index):
sample = self.clips[index%len(self.clips)]
image = self.load_frame(sample)
# Adding the labels
label = sample[0].split('/')[0]
label = self.label_preprocessing.transform([label])
label = torch.tensor(label)
if self.transform:
image = self.transform(image)
return image, label[0]
def __len__(self):
return len(self.clips)
def load_frame(self, sample):
fname = os.path.join(self.root, sample[0])
frames_list = sample[1:]
if not (os.path.exists(fname)):
print(f"Frame of {fname} does not exist")
return []
# Train on only training frames
if self.train:
frames_list = frames_list[self.train_start_frame: self.train_end_frame]
# Validate on remaining frames
else:
frames_list = frames_list[self.val_start_frame: self.val_end_frame]
selected_random_frame = random.choice(frames_list)
img = Image.open(os.path.join(fname, selected_random_frame)).convert('RGB') # Open the image
return img
def make_dataset_samples(self):
self.datapath = self.datapath[0]
with open(self.datapath, 'r') as fopen:
lines = fopen.readlines()
all_seqs = [line.split() for line in lines]
return all_seqs
class EmbeddingDataset(Dataset):
def __init__(self, embeddings, labels):
self.embeddings = embeddings
self.labels = labels
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
return self.embeddings[idx], self.labels[idx]
class Co3dTestDataset(Dataset):
def __init__(self, numpy_file, human_results_file, label_to_index, transform=None):
super(Co3dTestDataset, self).__init__()
self.image_names = []
self.image_files = []
self.heatmaps = []
self.categories = []
self.transform = transform
self.label_to_index = label_to_index
all_data = np.load(numpy_file, allow_pickle=True)
human_data = np.load(human_results_file, allow_pickle=True)
# Weird indexing because I didn't save the dictionary right
all_data = all_data[None][0]
filtered_imgs = human_data['final_clickmaps_thresholded'].tolist().keys()
for image_name in all_data.keys():
cat = image_name.split('_')[0]
if image_name.replace(f'{cat}_', f'{cat}/') in filtered_imgs:
self.image_files.append(all_data[image_name]['image'])
self.heatmaps.append(all_data[image_name]['heatmap'])
self.image_names.append(image_name)
self.categories.append(cat)
def __len__(self):
return len(self.image_files)
def __getitem__(self, idx):
img_file = self.image_files[idx]
img_label = self.image_names[idx].split('_')[0]
img_name = self.image_names[idx]
numeric_label = torch.tensor(self.label_to_index[img_label], dtype=torch.long)
heatmap = self.heatmaps[idx]
if self.transform:
image = self.transform(img_file)
else:
image = img_file
# print("Test:", image.shape)
cat = self.categories[idx]
return image, heatmap, numeric_label, img_name, cat