update code

silenceagle · web-flow · commit 96c4fb62f9ed · 2019-05-13T10:58:47.000+08:00
add Class "Data3" in Data.py to support triple input 
fix  bug : when the input with 'int' dtype is passed to norm_with_l2 function in data_prepare.py, it will first convert to float to prevent uperflow while doing power operation.
diff --git a/Data.py b/Data.py
@@ -98,3 +98,113 @@ def next_batch(self, batch_size, shuffle=True):
             self._index_in_step += batch_size
             end = self._index_in_step
             return self._images[start:end], self._labels[start:end]
+
+
+class Data3(object):
+    def __init__(self, images, labels1, labels2):
+        self._num_examples = images.shape[0]
+        self._images = images
+        self._labels1 = labels1
+        self._labels2 = labels2
+        self._steps_completed = 0
+        self._index_in_step = 0
+
+    @property
+    def images(self):
+        return self._images
+
+    @property
+    def labels1(self):
+        return self._labels1
+
+    @property
+    def labels2(self):
+        return self._labels2
+
+    @property
+    def num_examples(self):
+        return self._num_examples
+
+    @property
+    def steps_completed(self):
+        return self._steps_completed
+
+    def next_batch(self, batch_size, shuffle=True):
+        """Return the next `batch_size` examples from this data set."""
+        "go through all the data"
+        start = self._index_in_step
+        # 对第一个step进行打乱
+        if self._steps_completed == 0 and start == 0 and shuffle:
+            # 返回一个array对象且间隔为1
+            perm0 = np.arange(self._num_examples)
+            # 打乱列表
+            np.random.shuffle(perm0)
+            self._images = self.images[perm0]
+            self._labels1 = self.labels1[perm0]
+            self._labels2 = self.labels2[perm0]
+        # 进入下一个step之前，有余下数据的处理
+        if start + batch_size > self._num_examples:
+            if start + batch_size < 2 * self._num_examples:
+                # 完成一个step的标志位
+                self._steps_completed += 1
+                # 得到该step余下的数据
+                rest_num_examples = self._num_examples - start
+                images_rest_part = self._images[start:self._num_examples]
+                labels_rest_part1 = self._labels1[start:self._num_examples]
+                labels_rest_part2 = self._labels2[start:self._num_examples]
+                # 对数据进行打乱
+                if shuffle:
+                    perm = np.arange(self._num_examples)
+                    np.random.shuffle(perm)
+                    self._images = self._images[perm]
+                    self._labels1 = self._labels1[perm]
+                    self._labels2 = self._labels2[perm]
+                # 开始下一个step，并凑齐一个batch
+                start = 0
+                self._index_in_step = batch_size - rest_num_examples
+                end = self._index_in_step
+                images_new_part = self._images[start:end]
+                labels_new_part1 = self._labels1[start:end]
+                labels_new_part2 = self._labels2[start:end]
+                return np.concatenate((images_rest_part, images_new_part), axis=0), \
+                    np.concatenate((labels_rest_part1, labels_new_part1), axis=0), \
+                    np.concatenate((labels_rest_part2, labels_new_part2), axis=0)
+            else:
+                reuse_times = np.int(np.floor((start + batch_size) / self._num_examples) - 1)
+                self._steps_completed += reuse_times + 1
+                images_rest_part = self._images[start:self._num_examples]
+                labels_rest_part1 = self._labels1[start:self._num_examples]
+                labels_rest_part2 = self._labels2[start:self._num_examples]
+                batch_images = images_rest_part
+                batch_labels1 = labels_rest_part1
+                batch_labels2 = labels_rest_part2
+                for ind_resuse in range(reuse_times):
+                    if shuffle:
+                        perm = np.arange(self._num_examples)
+                        np.random.shuffle(perm)
+                        self._images = self._images[perm]
+                        self._labels1 = self._labels1[perm]
+                        self._labels2 = self._labels2[perm]
+                    batch_images = np.concatenate((batch_images, self._images), axis=0)
+                    batch_labels1 = np.concatenate((batch_labels1, self._labels1), axis=0)
+                    batch_labels2 = np.concatenate((batch_labels2, self._labels2), axis=0)
+                if (start + batch_size) % self._num_examples == 0:
+                    self._index_in_step = 0
+                    return batch_images, batch_labels1, batch_labels2
+                else:
+                    if shuffle:
+                        perm = np.arange(self._num_examples)
+                        np.random.shuffle(perm)
+                        self._images = self._images[perm]
+                        self._labels1 = self._labels1[perm]
+                        self._labels2 = self._labels2[perm]
+                    self._index_in_step = (start + batch_size) % self._num_examples
+                    end = self._index_in_step
+                    batch_images = np.concatenate((batch_images, self._images[0:end]), axis=0)
+                    batch_labels1 = np.concatenate((batch_labels1, self._labels1[0:end]), axis=0)
+                    batch_labels2 = np.concatenate((batch_labels2, self._labels2[0:end]), axis=0)
+                    return batch_images, batch_labels1, batch_labels2
+        else:
+            self._index_in_step += batch_size
+            end = self._index_in_step
+            return self._images[start:end], self._labels1[start:end], self._labels2[start:end]
diff --git a/data_prepare.py b/data_prepare.py
@@ -0,0 +1,103 @@
+## data_prepare.py
+
+import os
+# from libtiff import TIFF
+import numpy as np
+import cv2
+
+
+def random_perm3(data_x, data_y, data_z):
+    """
+        do random perm on x, y and z, x and z are list object, y is ndarray object
+        :param data_x: x data, list object
+        :param data_y: label data, ndarray object
+        :param data_z: x data size, list object
+    """
+    data_size = data_y.shape[0]
+    rand_perm = np.arange(data_size)
+    np.random.shuffle(rand_perm)
+    random_data_x = []
+    random_data_z = []
+    for indices in rand_perm:  # 'List' object, more complicated !!!
+        random_data_x.append(data_x[indices])
+        random_data_z.append(data_z[indices])
+    # random_data_x = data_x[rand_perm]
+    random_data_y = data_y[rand_perm]
+    # random_data_z = data_z[rand_perm]
+    return random_data_x, random_data_y, random_data_z
+
+
+# def generate_dataset_multisize(source_path, file_extension='tif'):
+#     """
+#     load multisize source images and generate datasets
+#     :param source_path: the store path of source images, source_path/category/image files
+#     :param file_extension: image files' extension, default to 'tif', support tif, png, npy
+#     :return: x, y, x_shape
+#     """
+#     if not os.path.exists(source_path):
+#         raise FileExistsError('file not found! : %s' % source_path)
+#     number_of_categories = 0
+#     for category in os.scandir(source_path):
+#         if category.is_dir():
+#             number_of_categories += 1
+#     number_of_image_per_category = np.zeros(number_of_categories, dtype=np.int32)
+#     category_name = []
+#     dataset_x = []
+#     dataset_x_shape = []
+#     index_category = 0
+#     for category in os.scandir(source_path):
+#         if category.is_dir():
+#             index_category += 1
+#             number_of_images = 0
+#             category_name.append(category.name)
+#             image = []
+#             for img_file in os.scandir(category.path):
+#                 extension = os.path.splitext(img_file.path)[1][1:]
+#                 if file_extension == extension:
+#                     number_of_images += 1
+#                     if extension == 'tif':
+#                         tif = TIFF.open(img_file.path, mode='r')
+#                         image = tif.read_image()
+#                         this_x = np.reshape(np.sqrt(np.power(image[:, :, 0], 2) + np.power(image[:, :, 1], 2)), (1, -1),
+#                                             order='C')
+#                     elif extension == 'png':
+#                         image = cv2.imread(img_file.path, -1)
+#                         this_x = image
+#                     elif extension == 'npy':
+#                         image = np.load(img_file.path)
+#                         this_x = image
+#                     else:
+#                         raise ValueError('''unsupported image file's extension: %s''' % file_extension)
+#                     dataset_x_shape.append([image.shape[0], image.shape[1]])
+#                     this_x_norml2 = (this_x * 1.0) / np.sqrt(np.sum(np.square(this_x)))
+#                     dataset_x.append(this_x_norml2)
+#             number_of_image_per_category[index_category-1] = number_of_images
+#     # print(number_of_image_per_category)
+#     dataset_y = np.zeros(
+#         [sum(number_of_image_per_category), number_of_categories],
+#         dtype=np.int32)
+#     for index_category in range(number_of_categories):
+#         dataset_y[sum(number_of_image_per_category[0:index_category]):
+#                   sum(number_of_image_per_category[0:index_category+1]),
+#                   index_category] = 1
+#     # print(len(dataset_x))
+#     return dataset_x, dataset_y, dataset_x_shape
+
+
+def norm_with_l2(original_mat):
+    """
+    devided by original mat's L2 norm to got identity length mat
+    each row is a datapoint
+    :param original_mat:
+    :return: normed mat
+    """
+    normed_mat = np.zeros(original_mat.shape, dtype=np.float32)
+    if len(original_mat.shape) == 2:
+        for ind_r in range(original_mat.shape[0]):
+            a = np.square(original_mat[ind_r]*1.0)
+            b = np.sum(a)
+            c = np.sqrt(b)
+            normed_mat[ind_r] = (original_mat[ind_r] * 1.0) / c
+            # normed_mat[ind_r] = (original_mat[ind_r] * 1.0) / np.sqrt(np.sum(np.square(original_mat[ind_r])*1.0))
+    return normed_mat
+