Skip to content

Commit 10ae80b

Browse files
authored
Add files via upload
notebook for training, testing added. Config file added
1 parent fbec380 commit 10ae80b

File tree

3 files changed

+167
-0
lines changed

3 files changed

+167
-0
lines changed

mask_rcnn_eval.ipynb

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"mask_rcnn_eval.ipynb","version":"0.3.2","provenance":[],"private_outputs":true,"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"accelerator":"GPU"},"cells":[{"metadata":{"id":"V8-yl-s-WKMG","colab_type":"text"},"cell_type":"markdown","source":["# UE ROLL Blue speaker detection\n","A model trained to detect Blue UE roll blutooth speaker.\n","Only 16 images has been used for current training. Performace could be increased using more images."]},{"metadata":{"id":"1Z_8-XxaPLNX","colab_type":"text"},"cell_type":"markdown","source":["#Environment Setup"]},{"metadata":{"id":"ZT31NO9WGpRD","colab_type":"code","colab":{}},"cell_type":"code","source":["!apt-get install protobuf-compiler python-pil python-lxml python-tk\n","!pip install Cython\n","!pip install jupyter\n","!pip install matplotlib\n","\n","!git clone https://github.com/tensorflow/models.git\n","\n","%cd /content/models/research\n","\n","!protoc object_detection/protos/*.proto --python_out=.\n","\n","%set_env PYTHONPATH=/content/models/research:/content/models/research/slim\n","\n","!python object_detection/builders/model_builder_test.py"],"execution_count":0,"outputs":[]},{"metadata":{"id":"kFSqkTCdWKMI","colab_type":"text"},"cell_type":"markdown","source":["# Imports"]},{"metadata":{"id":"hV4P5gyTWKMI","colab_type":"code","colab":{}},"cell_type":"code","source":["import numpy as np\n","import os\n","import six.moves.urllib as urllib\n","import sys\n","import tarfile\n","import tensorflow as tf\n","import zipfile\n","\n","from collections import defaultdict\n","from io import StringIO\n","from matplotlib import pyplot as plt\n","from PIL import Image\n","\n","# This is needed since the notebook is stored in the object_detection folder.\n","sys.path.append(\"..\")\n","from object_detection.utils import ops as utils_ops\n","from object_detection.utils import label_map_util\n","from object_detection.utils import visualization_utils as vis_util\n","\n","# This is needed to display the images.\n","%matplotlib inline\n"],"execution_count":0,"outputs":[]},{"metadata":{"id":"cfn_tRFOWKMO","colab_type":"text"},"cell_type":"markdown","source":["# Model preparation "]},{"metadata":{"id":"X_sEBLpVWKMQ","colab_type":"text"},"cell_type":"markdown","source":["## Variables\n","\n","Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_FROZEN_GRAPH` to point to a new .pb file. \n","\n","By default we use an \"SSD with Mobilenet\" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies."]},{"metadata":{"id":"VyPz_t8WWKMQ","colab_type":"code","colab":{}},"cell_type":"code","source":["\n","\n","# Path to frozen detection graph. This is the actual model that is used for the object detection.\n","PATH_TO_FROZEN_GRAPH = '/content/drive/learning/DL/projects/object_detection/multiple_object_mask/IG/frozen_inference_graph.pb'\n","\n","# List of the strings that is used to add correct label for each box.\n","PATH_TO_LABELS = '/content/drive/learning/DL/projects/object_detection/multiple_object_mask/dataset/label.pbtxt'\n","\n","#Number of classes\n","NUM_CLASSES = 2"],"execution_count":0,"outputs":[]},{"metadata":{"id":"YBcB9QHLWKMU","colab_type":"text"},"cell_type":"markdown","source":["## Load a (frozen) Tensorflow model into memory."]},{"metadata":{"id":"KezjCRVvWKMV","colab_type":"code","colab":{}},"cell_type":"code","source":["detection_graph = tf.Graph()\n","with detection_graph.as_default():\n"," od_graph_def = tf.GraphDef()\n"," with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:\n"," serialized_graph = fid.read()\n"," od_graph_def.ParseFromString(serialized_graph)\n"," tf.import_graph_def(od_graph_def, name='')"],"execution_count":0,"outputs":[]},{"metadata":{"id":"_1MVVTcLWKMW","colab_type":"text"},"cell_type":"markdown","source":["## Loading label map\n","Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine"]},{"metadata":{"id":"hDbpHkiWWKMX","colab_type":"code","colab":{}},"cell_type":"code","source":["label_map = label_map_util.load_labelmap(PATH_TO_LABELS)\n","categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)\n","category_index = label_map_util.create_category_index(categories)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"EFsoUHvbWKMZ","colab_type":"text"},"cell_type":"markdown","source":["## Helper code"]},{"metadata":{"id":"aSlYc3JkWKMa","colab_type":"code","colab":{}},"cell_type":"code","source":["def load_image_into_numpy_array(image):\n"," (im_width, im_height) = image.size\n"," return np.array(image.getdata()).reshape(\n"," (im_height, im_width, 3)).astype(np.uint8)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"H0_1AGhrWKMc","colab_type":"text"},"cell_type":"markdown","source":["# Detection"]},{"metadata":{"id":"jG-zn5ykWKMd","colab_type":"code","colab":{}},"cell_type":"code","source":["# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.\n","PATH_TO_TEST_IMAGES_DIR = '/content/drive/learning/DL/projects/object_detection/multiple_object_mask/dataset/testImages'\n","TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 7) ]\n","\n","# Size, in inches, of the output images.\n","IMAGE_SIZE = (12, 8)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"92BHxzcNWKMf","colab_type":"code","colab":{}},"cell_type":"code","source":["def run_inference_for_single_image(image, graph):\n"," with graph.as_default():\n"," with tf.Session() as sess:\n"," # Get handles to input and output tensors\n"," ops = tf.get_default_graph().get_operations()\n"," all_tensor_names = {output.name for op in ops for output in op.outputs}\n"," tensor_dict = {}\n"," for key in [\n"," 'num_detections', 'detection_boxes', 'detection_scores',\n"," 'detection_classes', 'detection_masks'\n"," ]:\n"," tensor_name = key + ':0'\n"," if tensor_name in all_tensor_names:\n"," tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(\n"," tensor_name)\n"," if 'detection_masks' in tensor_dict:\n"," # The following processing is only for single image\n"," detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])\n"," detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])\n"," # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.\n"," real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)\n"," detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])\n"," detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])\n"," detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(\n"," detection_masks, detection_boxes, image.shape[0], image.shape[1])\n"," detection_masks_reframed = tf.cast(\n"," tf.greater(detection_masks_reframed, 0.5), tf.uint8)\n"," # Follow the convention by adding back the batch dimension\n"," tensor_dict['detection_masks'] = tf.expand_dims(\n"," detection_masks_reframed, 0)\n"," image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')\n","\n"," # Run inference\n"," output_dict = sess.run(tensor_dict,\n"," feed_dict={image_tensor: np.expand_dims(image, 0)})\n","\n"," # all outputs are float32 numpy arrays, so convert types as appropriate\n"," output_dict['num_detections'] = int(output_dict['num_detections'][0])\n"," output_dict['detection_classes'] = output_dict[\n"," 'detection_classes'][0].astype(np.uint8)\n"," output_dict['detection_boxes'] = output_dict['detection_boxes'][0]\n"," output_dict['detection_scores'] = output_dict['detection_scores'][0]\n"," if 'detection_masks' in output_dict:\n"," output_dict['detection_masks'] = output_dict['detection_masks'][0]\n"," return output_dict"],"execution_count":0,"outputs":[]},{"metadata":{"id":"3a5wMHN8WKMh","colab_type":"code","colab":{}},"cell_type":"code","source":["for image_path in TEST_IMAGE_PATHS:\n"," image = Image.open(image_path)\n"," # the array based representation of the image will be used later in order to prepare the\n"," # result image with boxes and labels on it.\n"," image_np = load_image_into_numpy_array(image)\n"," # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n"," image_np_expanded = np.expand_dims(image_np, axis=0)\n"," # Actual detection.\n"," output_dict = run_inference_for_single_image(image_np, detection_graph)\n"," # Visualization of the results of a detection.\n"," vis_util.visualize_boxes_and_labels_on_image_array(\n"," image_np,\n"," output_dict['detection_boxes'],\n"," output_dict['detection_classes'],\n"," output_dict['detection_scores'],\n"," category_index,\n"," instance_masks=output_dict.get('detection_masks'),\n"," use_normalized_coordinates=True,\n"," line_thickness=8)\n"," fig = plt.figure(figsize=IMAGE_SIZE)\n"," ax = fig.gca()\n"," ax.grid(False)\n"," plt.imshow(image_np)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"HycEbEKkztwM","colab_type":"code","colab":{}},"cell_type":"code","source":[""],"execution_count":0,"outputs":[]}]}

mask_rcnn_inception_v2_coco.config

+165
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
# Mask R-CNN with Inception V2
2+
# Configured for MSCOCO Dataset.
3+
# Users should configure the fine_tune_checkpoint field in the train config as
4+
# well as the label_map_path and input_path fields in the train_input_reader and
5+
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
6+
# should be configured.
7+
8+
model {
9+
faster_rcnn {
10+
num_classes: 90
11+
image_resizer {
12+
keep_aspect_ratio_resizer {
13+
min_dimension: 800
14+
max_dimension: 1365
15+
}
16+
}
17+
number_of_stages: 3
18+
feature_extractor {
19+
type: 'faster_rcnn_inception_v2'
20+
first_stage_features_stride: 16
21+
}
22+
first_stage_anchor_generator {
23+
grid_anchor_generator {
24+
scales: [0.25, 0.5, 1.0, 2.0]
25+
aspect_ratios: [0.5, 1.0, 2.0]
26+
height_stride: 16
27+
width_stride: 16
28+
}
29+
}
30+
first_stage_box_predictor_conv_hyperparams {
31+
op: CONV
32+
regularizer {
33+
l2_regularizer {
34+
weight: 0.0
35+
}
36+
}
37+
initializer {
38+
truncated_normal_initializer {
39+
stddev: 0.01
40+
}
41+
}
42+
}
43+
first_stage_nms_score_threshold: 0.0
44+
first_stage_nms_iou_threshold: 0.7
45+
first_stage_max_proposals: 300
46+
first_stage_localization_loss_weight: 2.0
47+
first_stage_objectness_loss_weight: 1.0
48+
initial_crop_size: 14
49+
maxpool_kernel_size: 2
50+
maxpool_stride: 2
51+
second_stage_box_predictor {
52+
mask_rcnn_box_predictor {
53+
use_dropout: false
54+
dropout_keep_probability: 1.0
55+
predict_instance_masks: true
56+
mask_height: 15
57+
mask_width: 15
58+
mask_prediction_conv_depth: 0
59+
mask_prediction_num_conv_layers: 2
60+
fc_hyperparams {
61+
op: FC
62+
regularizer {
63+
l2_regularizer {
64+
weight: 0.0
65+
}
66+
}
67+
initializer {
68+
variance_scaling_initializer {
69+
factor: 1.0
70+
uniform: true
71+
mode: FAN_AVG
72+
}
73+
}
74+
}
75+
conv_hyperparams {
76+
op: CONV
77+
regularizer {
78+
l2_regularizer {
79+
weight: 0.0
80+
}
81+
}
82+
initializer {
83+
truncated_normal_initializer {
84+
stddev: 0.01
85+
}
86+
}
87+
}
88+
}
89+
}
90+
second_stage_post_processing {
91+
batch_non_max_suppression {
92+
score_threshold: 0.0
93+
iou_threshold: 0.6
94+
max_detections_per_class: 100
95+
max_total_detections: 300
96+
}
97+
score_converter: SOFTMAX
98+
}
99+
second_stage_localization_loss_weight: 2.0
100+
second_stage_classification_loss_weight: 1.0
101+
second_stage_mask_prediction_loss_weight: 4.0
102+
}
103+
}
104+
105+
train_config: {
106+
batch_size: 1
107+
optimizer {
108+
momentum_optimizer: {
109+
learning_rate: {
110+
manual_step_learning_rate {
111+
initial_learning_rate: 0.0002
112+
schedule {
113+
step: 900000
114+
learning_rate: .00002
115+
}
116+
schedule {
117+
step: 1200000
118+
learning_rate: .000002
119+
}
120+
}
121+
}
122+
momentum_optimizer_value: 0.9
123+
}
124+
use_moving_average: false
125+
}
126+
gradient_clipping_by_norm: 10.0
127+
fine_tune_checkpoint: '/content/drive/learning/DL/pre_trained_models/mask_rcnn_inception_v2_coco/model.ckpt'
128+
from_detection_checkpoint: true
129+
# Note: The below line limits the training process to 200K steps, which we
130+
# empirically found to be sufficient enough to train the pets dataset. This
131+
# effectively bypasses the learning rate schedule (the learning rate will
132+
# never decay). Remove the below line to train indefinitely.
133+
num_steps: 2000
134+
data_augmentation_options {
135+
random_horizontal_flip {
136+
}
137+
}
138+
}
139+
140+
train_input_reader: {
141+
tf_record_input_reader {
142+
input_path: '/content/drive/learning/DL/projects/object_detection/multiple_object_mask/dataset/train.record'
143+
}
144+
label_map_path: '/content/drive/learning/DL/projects/object_detection/multiple_object_mask/dataset/label.pbtxt'
145+
load_instance_masks: true
146+
mask_type: PNG_MASKS
147+
}
148+
149+
eval_config: {
150+
num_examples: 8000
151+
# Note: The below line limits the evaluation process to 10 evaluations.
152+
# Remove the below line to evaluate indefinitely.
153+
max_evals: 10
154+
}
155+
156+
eval_input_reader: {
157+
tf_record_input_reader {
158+
input_path: '/content/drive/learning/DL/projects/object_detection/multiple_object_mask/dataset/train.record'
159+
}
160+
label_map_path: '/content/drive/learning/DL/projects/object_detection/multiple_object_mask/dataset/label.pbtxt'
161+
load_instance_masks: true
162+
mask_type: PNG_MASKS
163+
shuffle: false
164+
num_readers: 1
165+
}

mask_rcnn_train.ipynb

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"mask_rcnn_train.ipynb","version":"0.3.2","provenance":[],"private_outputs":true,"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"accelerator":"GPU"},"cells":[{"metadata":{"id":"kBwSn8yCQeOt","colab_type":"text"},"cell_type":"markdown","source":["#Environment Setup"]},{"metadata":{"id":"9vV5WmzT2Aey","colab_type":"code","colab":{}},"cell_type":"code","source":["!apt-get install protobuf-compiler python-pil python-lxml python-tk\n","!pip install Cython\n","!pip install jupyter\n","!pip install matplotlib\n","\n","!git clone https://github.com/tensorflow/models.git\n","\n","%cd /content/models/research\n","\n","!protoc object_detection/protos/*.proto --python_out=.\n","\n","%set_env PYTHONPATH=/content/models/research:/content/models/research/slim\n","\n","!python object_detection/builders/model_builder_test.py"],"execution_count":0,"outputs":[]},{"metadata":{"id":"7T3RDLqtU6IT","colab_type":"text"},"cell_type":"markdown","source":["#Train Model"]},{"metadata":{"id":"tDKvkhu32k1I","colab_type":"code","colab":{}},"cell_type":"code","source":["!python object_detection/legacy/train.py \\\n","--train_dir=/content/drive/learning/DL/projects/object_detection/multiple_object_mask/CP \\\n","--pipeline_config_path=/content/drive/learning/DL/projects/object_detection/multiple_object_mask/mask_rcnn_inception_v2_coco.config"],"execution_count":0,"outputs":[]},{"metadata":{"id":"dEiNEqC5U-mP","colab_type":"text"},"cell_type":"markdown","source":["#Save inference graph"]},{"metadata":{"id":"u_ixlaX62mMF","colab_type":"code","colab":{}},"cell_type":"code","source":["!python object_detection/export_inference_graph.py \\\n","--input_type=image_tensor \\\n","--pipeline_config_path=/content/drive/learning/DL/projects/object_detection/speaker_detection_mask/mask_rcnn_inception_v2_coco.config \\\n","--trained_checkpoint_prefix=/content/drive/learning/DL/projects/object_detection/multiple_object_mask/CP/model.ckpt-2000 \\\n","--output_directory=/content/drive/learning/DL/projects/object_detection/multiple_object_mask/IG"],"execution_count":0,"outputs":[]},{"metadata":{"id":"oiSnn-DhMDFK","colab_type":"code","colab":{}},"cell_type":"code","source":[""],"execution_count":0,"outputs":[]}]}

0 commit comments

Comments
 (0)