Skip to content
This repository has been archived by the owner on May 11, 2024. It is now read-only.

Commit

Permalink
Merge branch 'gm_r1.1' into 'master'
Browse files Browse the repository at this point in the history
Merge r1.1 code into master branch

See merge request intelai/tools!42
  • Loading branch information
chuanqi129 committed Aug 5, 2020
2 parents d88dede + 4c5d8ff commit 00d591f
Show file tree
Hide file tree
Showing 22 changed files with 1,120 additions and 316 deletions.
2 changes: 1 addition & 1 deletion api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ of specific models in Model Zoo as examples.

```bash
$ cd ~
$ git clone https://github.com/IntelAI/tools.git quantization && cd quantization
$ git clone https://github.com/IntelAI/tools.git quantization && cd quantization
```

## Step-by-step Procedure for ResNet-50 Quantization
Expand Down
6 changes: 3 additions & 3 deletions api/examples/quantize_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ def main(_):
outputs = []

if args.excluded_ops:
excluded_ops = args.exclude_ops.split(',')
excluded_ops = args.excluded_ops.split(',')
else:
excluded_ops = []

if args.excluded_nodes:
excluded_nodes = args.exclude_nodes.split(',')
excluded_nodes = args.excluded_nodes.split(',')
else:
excluded_nodes = []

Expand All @@ -61,7 +61,7 @@ def main(_):
callback_cmd = prefix + 'input_graph={} ' + postfix
else:
callback_cmd = args.callback
qt.gen_calib_data_cmds = args.callback
qt.gen_calib_data_cmds = callback_cmd
qt.convert()


Expand Down
1 change: 1 addition & 0 deletions api/intel_quantization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
__version__="1.1"
162 changes: 121 additions & 41 deletions api/intel_quantization/graph_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,22 @@
# from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference

# from intel_quantization.quantize_graph import GraphRewriter
from intel_quantization.transform_graph.strip_unused import StripUnusedNodes
from intel_quantization.transform_graph.fold_batch_norm import FoldBatchNormNodes
from intel_quantization.transform_graph.insert_logging import InsertLogging
from intel_quantization.transform_graph.freeze_max_min import freeze_max
from intel_quantization.transform_graph.freeze_max_min import freeze_min
from intel_quantization.transform_graph.freeze_max_min import freeze_requantization_range
from intel_quantization.transform_graph.fuse_quantized_conv_and_requantize import fuse_quantized_conv_and_requantize
from intel_quantization.transform_graph.fuse_column_wise_mul import FuseColumnWiseMul
from intel_quantization.transform_graph.rerange_quantized_concat import RerangeQuantizedConcat
from intel_quantization.util import read_graph, write_graph
from intel_quantization.quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel
from .transform_graph.strip_unused import StripUnusedNodes
from .transform_graph.fold_batch_norm import FoldBatchNormNodes
from .transform_graph.insert_logging import InsertLogging
from .transform_graph.freeze_max_min import freeze_max
from .transform_graph.freeze_max_min import freeze_min
from .transform_graph.freeze_max_min import freeze_requantization_range
from .transform_graph.freeze_max_min import get_all_fp32_data, get_tensor_histogram, combine_histogram
from .transform_graph.fuse_quantized_conv_and_requantize import fuse_quantized_conv_and_requantize
from .transform_graph.fuse_column_wise_mul import FuseColumnWiseMul
from .transform_graph.rerange_quantized_concat import RerangeQuantizedConcat
from .util import read_graph, write_graph
from .quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel
from .quantize_graph.quantize_graph_common import QuantizeGraphHelper
import os
import shlex
import subprocess
import sys
import logging

logging.getLogger().setLevel(level=logging.INFO)
Expand All @@ -52,7 +53,7 @@

class GraphConverter:
def __init__(self, input_graph, output_graph, inputs=[], outputs=[], excluded_ops=[], excluded_nodes=[],
per_channel=False, input_graph_is_binary=True):
per_channel=False, input_graph_is_binary=True, algo='DIRECT'):
"""Convert graph.
:param input_graph: input graph pb file.
Expand All @@ -73,13 +74,18 @@ def __init__(self, input_graph, output_graph, inputs=[], outputs=[], excluded_op
self.per_channel = per_channel
self.excluded_ops = excluded_ops
self.excluded_nodes = excluded_nodes
self.algo = algo
self._low_precision_mode = 'eightbit'

self._calibration_data = []
self._fp32_print_data = []
self.gen_calib_data_cmds = None
self.debug = False
self._check_tf_version()
self._check_args()
self._gen_tmp_filenames()
self._kl_op_dict = {}
self._kl_keys = []
self._print_node_mapping = {}

def _check_tf_version(self):
is_supported_version = False
Expand Down Expand Up @@ -113,7 +119,7 @@ def _gen_tmp_filenames(self):
self._fp32_optimized_graph = os.path.join(self._output_path, 'fp32_optimized_graph.pb')
self._int8_dynamic_range_graph = os.path.join(self._output_path, 'int8_dynamic_range_graph.pb')
self._int8_logged_graph = os.path.join(self._output_path, 'int8_logged_graph.pb')
self._requant_min_max_log = os.path.join(self._output_path, 'requant_min_max_log.txt')
self._fp32_logged_graph = os.path.join(self._output_path, 'fp32_logged_graph.pb')
self._int8_frozen_range_graph = os.path.join(self._output_path, 'int8_frozen_range_graph.pb')
if not self.output_graph:
self.output_graph = os.path.join(self._output_path, 'int8_final_fused_graph.pb')
Expand All @@ -137,6 +143,58 @@ def convert(self):
else:
self.quantize()

def _get_fp32_print_node_names(self):
offset_map = {
"QuantizedConv2DWithBiasSumAndRelu": 3,
"QuantizedConv2DWithBiasAndRelu": 2,
"QuantizedConv2DWithBias": 1,
}
target_conv_op = []
sorted_graph = QuantizeGraphHelper().get_sorted_graph(
self._fp32_origin_graph, self.outputs)

node_name_mapping = {
node.name: node
for node in self._tmp_graph_def.node if node.op != "Const"
}

for node in self._tmp_graph_def.node:
if node.op in offset_map:
target_conv_op.append(node.name.split('_eightbit_')[0])
fp32_node_name_mapping = {
node.name: node
for node in sorted_graph.node if node.op != "Const"
}
sorted_node_names = [i.name for i in sorted_graph.node if i.op != "Const"]

output_node_names = []
for i in target_conv_op:
if node_name_mapping[
i + "_eightbit_quantized_conv"].op == 'QuantizedConv2DWithBiasSumAndRelu':
start_index = sorted_node_names.index(i)
for index, value in enumerate(sorted_node_names[start_index:]):
if fp32_node_name_mapping[value].op.startswith(
"Add") and fp32_node_name_mapping[
sorted_node_names[start_index + index + 1]].op == "Relu":
output_node_names.append(
sorted_node_names[start_index + index + 1])
self._print_node_mapping[sorted_node_names[start_index + index + 1]] = i
elif i in sorted_node_names:
start_index = sorted_node_names.index(i)
end_index = start_index + offset_map[node_name_mapping[
i + "_eightbit_quantized_conv"].op]
output_node_names.append(sorted_node_names[end_index])
self._print_node_mapping[sorted_node_names[end_index]] = i

for i in output_node_names:
self._kl_keys.append(';' + i + '__print__;__KL')

InsertLogging(self._fp32_origin_graph,
node_name_list=output_node_names,
message="__KL:",
summarize=-1, dump_fp32=True).do_transformation()
write_graph(self._fp32_origin_graph, self._fp32_logged_graph)

def quantize(self):
"""Quantize graph only (without optimizing fp32 graph), including:
1) quantize graph,
Expand All @@ -150,9 +208,14 @@ def quantize(self):
'to generate calibration data.')
try:
self._quantize_graph()
if self.algo == "KL":
self._get_fp32_print_node_names()
self._generate_calibration_data(self._fp32_logged_graph,
self._fp32_print_data, True)

self._insert_logging()
self._generate_calibration_data()
self._freeze_requantization_ranges()
self._generate_calibration_data(self._int8_logged_graph, self._calibration_data)
self._freeze_requantization_ranges(self._kl_op_dict)
self._fuse_requantize_with_fused_quantized_conv()
except Exception as e:
logging.error('Failed to quantize graph due to: %s', str(e))
Expand All @@ -172,6 +235,7 @@ def _optimize_frozen_fp32_graph(self):
self._tmp_graph_def = graph_util.remove_training_nodes(self._tmp_graph_def, self.outputs)
self._tmp_graph_def = FoldBatchNormNodes(self._tmp_graph_def).do_transform()
write_graph(self._tmp_graph_def, self._fp32_optimized_graph)
self._fp32_origin_graph = self._tmp_graph_def

def _quantize_graph(self):
"""quantize graph."""
Expand Down Expand Up @@ -199,32 +263,50 @@ def _insert_logging(self):
ops=["RequantizationRange{}".format("PerChannel" if self.per_channel else "")],
message="__requant_min_max:").do_transformation()
InsertLogging(self._tmp_graph_def, ops=["Min"], message="__min:").do_transformation()
InsertLogging(self._tmp_graph_def, ops=["Max"], message="__max:").do_transformation()
InsertLogging(self._tmp_graph_def, ops=["Max"],
message="__max:").do_transformation()
# InsertLogging(
# self._tmp_graph_def,
# ops=["QuantizedConv2DWithBiasAndRelu",
# "QuantizedConv2DWithBias"
# ],
# message="__KL:",
# summarize=-1).do_transformation()

write_graph(self._tmp_graph_def, self._int8_logged_graph)
self._tmp_graph_def.CopyFrom(int8_dynamic_range_graph_def)

def _generate_calibration_data(self):
def _generate_calibration_data(self, graph, output, enable_kl_algo=False):
cmd = self.gen_calib_data_cmds
cmd = cmd.format(self._int8_logged_graph)
f = open(self._requant_min_max_log, 'w', buffering=1)
p = subprocess.Popen(shlex.split(cmd), stderr=subprocess.STDOUT, stdout=subprocess.PIPE)
try:
for line in p.stdout:
line_str = line.decode(sys.stdout.encoding)
sys.stdout.write(line_str)
f.write(line_str)
p.communicate()
except Exception:
p.kill()
p.wait()
raise
if p.poll():
raise SystemExit('ERROR generating calibration data, command: \n{}'.format(cmd))

def _freeze_requantization_ranges(self):
self._tmp_graph_def = freeze_max(self._tmp_graph_def, self._requant_min_max_log)
self._tmp_graph_def = freeze_min(self._tmp_graph_def, self._requant_min_max_log)
self._tmp_graph_def = freeze_requantization_range(self._tmp_graph_def, self._requant_min_max_log)
cmd = cmd.format(graph)
p = subprocess.Popen(shlex.split(cmd),
stderr=subprocess.STDOUT,
stdout=subprocess.PIPE)
while p.poll() is None:
line = p.stdout.readline().strip().decode()
if line and line.startswith(';'):
if not enable_kl_algo:
output.append(line)

if enable_kl_algo and line.rsplit(':')[0] in self._kl_keys:
fp32_data = get_all_fp32_data(line.rsplit(':')[-1])
key = self._print_node_mapping[line[1:].split('__print')[0]] + '_eightbit_requant_range'
if key not in self._kl_op_dict:
self._kl_op_dict[key] = get_tensor_histogram(fp32_data)
else:
self._kl_op_dict[key] = combine_histogram(self._kl_op_dict[key], fp32_data)

def _freeze_requantization_ranges(self, additional_data=None):
use_moving_average = self.algo == "MA"
self._tmp_graph_def = freeze_max(self._tmp_graph_def,
self._calibration_data,
use_moving_average)
self._tmp_graph_def = freeze_min(self._tmp_graph_def,
self._calibration_data,
use_moving_average)
self._tmp_graph_def = freeze_requantization_range(
self._tmp_graph_def, self._calibration_data, use_moving_average,
additional_data)
if self.debug:
write_graph(self._tmp_graph_def, self._int8_frozen_range_graph)

Expand Down Expand Up @@ -256,5 +338,3 @@ def _post_clean(self):
"""
if gfile.Exists(self._int8_logged_graph):
os.remove(self._int8_logged_graph)
if gfile.Exists(self._requant_min_max_log):
os.remove(self._requant_min_max_log)
12 changes: 9 additions & 3 deletions api/intel_quantization/quantize_graph/quantize_graph_concatv2.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# -*- coding: utf-8 -*-
from tensorflow.python.framework import dtypes
from tensorflow.core.framework import node_def_pb2
from intel_quantization.quantize_graph.quantize_graph_base import QuantizeNodeBase
from intel_quantization.quantize_graph.quantize_graph_common import QuantizeGraphHelper as helper
from .quantize_graph_base import QuantizeNodeBase
from .quantize_graph_common import QuantizeGraphHelper as helper

import re

Expand Down Expand Up @@ -53,9 +53,15 @@ def _apply_concatv2_transform(self, original_node):
self._add_dequantize_result_node(quantized_concat_name,
original_node.name)

def _quantizable_concat(self, node):
for input_node_name in node.input[:node.attr['N'].i]:
if self.node_name_mapping[helper.node_name_from_input(input_node_name)].node.op != "Dequantize":
return False
return True

def _apply_concatv2_quantization(self):
for _, v in self.node_name_mapping.items():
if v.node.op in ("ConcatV2") and not re.search(
if v.node.op in ("ConcatV2") and self._quantizable_concat(v.node) and not re.search(
r'map(_\d+)?/while', v.node.name) and dtypes.as_dtype(
v.node.attr["T"].type) == dtypes.float32:
self._apply_concatv2_transform(v.node)
Expand Down
6 changes: 3 additions & 3 deletions api/intel_quantization/quantize_graph/quantize_graph_conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from tensorflow.core.framework import node_def_pb2
from tensorflow.python.framework import dtypes

from intel_quantization.quantize_graph.quantize_graph_common import QuantizeGraphHelper as helper
from intel_quantization.quantize_graph.quantize_graph_base import QuantizeNodeBase
from .quantize_graph_common import QuantizeGraphHelper as helper
from .quantize_graph_base import QuantizeNodeBase

import logging

Expand Down Expand Up @@ -232,7 +232,7 @@ def apply_conv_biasadd_fusion(self, match_node_name):
helper.set_attr_dtype(quantized_conv_node, "out_type",
dtypes.qint32)
self.add_output_graph_node(quantized_conv_node)
requantize_type = dtypes.qint8 if self.per_channel else dtypes.quint8
requantize_type = dtypes.qint8

quantize_down_name = self._add_quantize_down_nodes(
node, quantized_node_name, requantize_type, False)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
from tensorflow.python.platform import gfile
from tensorflow.python.framework import graph_util

from intel_quantization.quantize_graph.quantize_graph_base import QuantizeGraphBase
from intel_quantization.quantize_graph.quantize_graph_common import QuantizeGraphHelper
from intel_quantization.quantize_graph.quantize_graph_conv import FuseNodeStartWithConv2d
from intel_quantization.quantize_graph.quantize_graph_concatv2 import FuseNodeStartWithConcatV2
from intel_quantization.quantize_graph.quantize_graph_matmul import FuseNodeStartWithMatmul
from intel_quantization.quantize_graph.quantize_graph_pooling import FuseNodeStartWithPooling
from intel_quantization.quantize_graph.quantize_graph_pad import FuseNodeStartWithPad
from .quantize_graph_base import QuantizeGraphBase
from .quantize_graph_common import QuantizeGraphHelper
from .quantize_graph_conv import FuseNodeStartWithConv2d
from .quantize_graph_concatv2 import FuseNodeStartWithConcatV2
from .quantize_graph_matmul import FuseNodeStartWithMatmul
from .quantize_graph_pooling import FuseNodeStartWithPooling
from .quantize_graph_pad import FuseNodeStartWithPad


class QuantizeGraphForIntel(QuantizeGraphBase):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from tensorflow.core.framework import node_def_pb2
from tensorflow.python.framework import dtypes

from intel_quantization.quantize_graph.quantize_graph_common import QuantizeGraphHelper as helper
from intel_quantization.quantize_graph.quantize_graph_base import QuantizeNodeBase
from .quantize_graph_common import QuantizeGraphHelper as helper
from .quantize_graph_base import QuantizeNodeBase

import logging

Expand Down
4 changes: 2 additions & 2 deletions api/intel_quantization/quantize_graph/quantize_graph_pad.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from tensorflow.core.framework import node_def_pb2
from tensorflow.python.framework import tensor_util

from intel_quantization.quantize_graph.quantize_graph_base import QuantizeNodeBase
from intel_quantization.quantize_graph.quantize_graph_common import QuantizeGraphHelper as helper
from .quantize_graph_base import QuantizeNodeBase
from .quantize_graph_common import QuantizeGraphHelper as helper


class FuseNodeStartWithPad(QuantizeNodeBase):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from tensorflow.core.framework import node_def_pb2
from tensorflow.python.framework import dtypes

from intel_quantization.quantize_graph.quantize_graph_base import QuantizeNodeBase
from intel_quantization.quantize_graph.quantize_graph_common import QuantizeGraphHelper as helper
from .quantize_graph_base import QuantizeNodeBase
from .quantize_graph_common import QuantizeGraphHelper as helper


class FuseNodeStartWithPooling(QuantizeNodeBase):
Expand All @@ -24,7 +24,8 @@ def _add_pool_function(self, original_node, quantized_op_node):

def _apply_pool_quantization(self):
for _, v in self.node_name_mapping.items():
if v.node.op in ("AvgPool", "MaxPool"):
if v.node.op in ("AvgPool", "MaxPool") and self._find_relu_node(
v.node):
self.eightbitize_single_input_tensor_node(
v.node, self._add_pool_function)
else:
Expand Down
Loading

0 comments on commit 00d591f

Please sign in to comment.