diff --git a/generate_tfrecord_for_py3.8.x.py b/generate_tfrecord_for_py3.8.x.py new file mode 100644 index 00000000..eb7438e3 --- /dev/null +++ b/generate_tfrecord_for_py3.8.x.py @@ -0,0 +1,104 @@ +""" +Usage: + # From tensorflow/models/ + # Create train data: + python generate_tfrecord.py --csv_input=images/train_labels.csv --image_dir=images/train --output_path=train.record + + # Create test data: + python generate_tfrecord.py --csv_input=images/test_labels.csv --image_dir=images/test --output_path=test.record +""" +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import + +import os +import io +import pandas as pd +import tensorflow as tf + +import argparse + +from PIL import Image +from object_detection.utils import dataset_util +from collections import namedtuple, OrderedDict + +# construct the argument parse and parse the arguments +ap = argparse.ArgumentParser() +ap.add_argument("-c", "--csv_input", required=True,help="csv dosyasının yeri (csv file)") +ap.add_argument("-i", "--image_dir", required=True,help="gorsellerin olduğu yer (get img files from)") +ap.add_argument("-o", "--output_path", required=True,help="nereye kaydedelim (save img files to)") +args = vars(ap.parse_args()) + + +# TO-DO replace this with label map +def class_text_to_int(row_label): + if row_label == 'maske': + return 1 + else: + None + + +def split(df, group): + data = namedtuple('data', ['filename', 'object']) + gb = df.groupby(group) + return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)] + + +def create_tf_example(group, path): + with tf.io.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: + encoded_jpg = fid.read() + encoded_jpg_io = io.BytesIO(encoded_jpg) + image = Image.open(encoded_jpg_io) + width, height = image.size + + filename = group.filename.encode('utf8') + image_format = b'jpg' + xmins = [] + xmaxs = [] + ymins = [] + ymaxs = [] + classes_text = [] + classes = [] + + for index, row in group.object.iterrows(): + xmins.append(row['xmin'] / width) + xmaxs.append(row['xmax'] / width) + ymins.append(row['ymin'] / height) + ymaxs.append(row['ymax'] / height) + classes_text.append(row['class'].encode('utf8')) + classes.append(class_text_to_int(row['class'])) + + tf_example = tf.train.Example(features=tf.train.Features(feature={ + 'image/height': dataset_util.int64_feature(height), + 'image/width': dataset_util.int64_feature(width), + 'image/filename': dataset_util.bytes_feature(filename), + 'image/source_id': dataset_util.bytes_feature(filename), + 'image/encoded': dataset_util.bytes_feature(encoded_jpg), + 'image/format': dataset_util.bytes_feature(image_format), + 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), + 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), + 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), + 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), + 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), + 'image/object/class/label': dataset_util.int64_list_feature(classes), + })) + return tf_example + + +def main(): + writer = tf.io.TFRecordWriter(args["output_path"]) + path = os.path.join(os.getcwd(), args["image_dir"]) + examples = pd.read_csv(args["csv_input"]) + grouped = split(examples, 'filename') + for group in grouped: + tf_example = create_tf_example(group, path) + writer.write(tf_example.SerializeToString()) + + writer.close() + output_path = os.path.join(os.getcwd(), args["output_path"]) + print('Successfully created the TFRecords: {}'.format(output_path)) + + +if __name__ == '__main__': + main() + #tf.app.run()