Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
c2a14b8
Improve UT Coverage for TF 3x
zehao-intel Jun 6, 2024
40a1e2e
fix depthconv and sepconv
zehao-intel Jun 6, 2024
1cd24d2
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 6, 2024
eea3029
set qdq instancenorm as no cover
zehao-intel Jun 6, 2024
d1802b0
Merge branch 'zehao/utc' of https://github.com/intel/neural-compresso…
zehao-intel Jun 6, 2024
09ee46c
fix test keras layers
zehao-intel Jun 6, 2024
1f4996b
fix test keras layers
zehao-intel Jun 6, 2024
42076c7
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 6, 2024
42ed3c8
fix test keras layer
zehao-intel Jun 6, 2024
84db7fd
fix tf.py
zehao-intel Jun 6, 2024
85d477a
remove set_tensor ut
zehao-intel Jun 6, 2024
148752f
imporve keras layer and kl algo
zehao-intel Jun 6, 2024
917f192
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 6, 2024
f457216
update graph_converter
zehao-intel Jun 7, 2024
1edcc0c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 7, 2024
8744714
Merge branch 'master' into zehao/utc
chensuyue Jun 12, 2024
5e43c59
collect tf new API coverage
chensuyue Jun 12, 2024
0a5003e
add pt omit path
chensuyue Jun 12, 2024
b3257cf
fix the issue
chensuyue Jun 12, 2024
90d4012
use sv param
zehao-intel Jun 13, 2024
c048cd8
run single case for pytest
chensuyue Jun 13, 2024
4a8152d
update test status show case
chensuyue Jun 13, 2024
dd7a4b5
add comments
chensuyue Jun 13, 2024
12f8628
for debug
chensuyue Jun 13, 2024
e38ae03
for test
chensuyue Jun 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Improve UT Coverage for TF 3x
Signed-off-by: zehao-intel <[email protected]>
  • Loading branch information
zehao-intel committed Jun 6, 2024
commit c2a14b833a26c79d5e8f9135f7f163eb4cc5da6a
104 changes: 0 additions & 104 deletions neural_compressor/adaptor/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1030,110 +1030,6 @@ def check_match(patterns, input_pattern):

return capability

def set_tensor(self, model, tensor_dict):
"""Quantize the bias and weight tensors in tensor_dict."""
from .tf_utils.graph_util import GraphAnalyzer

g = GraphAnalyzer()
g.graph = model.graph_def
graph_info = g.parse_graph()

def _get_fp32_op_name(model, tensor_name):
is_weight = False
is_biasadd = False
last_node_name = None
current_node_name = None
for each_node in model.graph_def.node:
if tensor_name in each_node.input:
tensor_index = list(each_node.input).index(tensor_name)
if each_node.op.find("Quantized") != -1 and tensor_index == 2:
is_biasadd = True
last_node_name = each_node.input[0]
current_node_name = each_node.name

if tensor_name + "_qint8_const" in each_node.input:
pass

return is_weight, is_biasadd, current_node_name, last_node_name

from tensorflow.core.framework import attr_value_pb2
from tensorflow.python.framework import dtypes, tensor_util

from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper

qint32_type = dtypes.qint32.as_datatype_enum

for tensor_name, tensor_content in tensor_dict.items():
is_weight, is_biasadd, current_node_name, last_node_name = _get_fp32_op_name(model, tensor_name)

if is_biasadd:
is_biasadd_dtype_is_fp32 = graph_info[current_node_name].node.attr["Tbias"] == attr_value_pb2.AttrValue(
type=dtypes.float32.as_datatype_enum
)
current_node = graph_info[current_node_name].node
bias_add_node = graph_info[current_node.input[2]].node
if is_biasadd_dtype_is_fp32:
bias_add_node.attr["value"].CopyFrom(
attr_value_pb2.AttrValue(
tensor=tensor_util.make_tensor_proto(tensor_content, dtypes.float32, tensor_content.shape)
)
)
else:
last_node = graph_info[last_node_name].node
min_input = graph_info[last_node.input[-2]].node.attr["value"].tensor.float_val[0]
max_input = graph_info[last_node.input[-1]].node.attr["value"].tensor.float_val[0]
channel_size = tensor_content.shape[0]
max_filter_node = graph_info[current_node.input[6]].node
min_filter_node = graph_info[current_node.input[5]].node
if max_filter_node.attr["value"].tensor.float_val:
max_filter_tensor = []
min_filter_tensor = []
max_filter_tensor.append((max_filter_node.attr["value"].tensor.float_val)[0])
min_filter_tensor.append((min_filter_node.attr["value"].tensor.float_val)[0])
else:
max_filter_tensor = tensor_util.MakeNdarray(min_filter_node.attr["value"].tensor)
min_filter_tensor = tensor_util.MakeNdarray(min_filter_node.attr["value"].tensor)
activation_range = 127.0 if current_node.attr["Tinput"].type == dtypes.qint8 else 255.0
updated_bias = Helper.generate_int32_bias_for_conv(
tensor_content,
channel_size,
max_input,
min_input,
max_filter_tensor,
min_filter_tensor,
activation_range,
)

bias_add_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=qint32_type))
bias_add_node.attr["value"].CopyFrom(
attr_value_pb2.AttrValue(
tensor=tensor_util.make_tensor_proto(updated_bias, dtypes.int32, tensor_content.shape)
)
)
bias_add_node.attr["value"].tensor.dtype = qint32_type
current_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=qint32_type))

if is_weight:
tmp_const_node = Helper.create_constant_node(
current_node.name + "_weights_tmp", tensor_content.transpose(2, 3, 1, 0), dtypes.float32
)
min_filter_node = graph_info[current_node.input[5]].node
per_channel = True if min_filter_node.attr["value"].tensor.tensor_shape else False
from .tf_utils.quantize_graph_common import QuantizeGraphHelper

original_fp32_op = current_node.op.split("With")[0].split("Quantized")[-1]
if original_fp32_op.find("Depthwise") != -1:
original_fp32_op = "DepthwiseConv2dNative"
qint8_const_node, min_node, max_node = QuantizeGraphHelper.generate_quantized_weight_node(
original_fp32_op, tmp_const_node, per_channel
)
g.add_node(qint8_const_node, [], [current_node.name])
g.add_node(min_node, [], [current_node.name])
g.add_node(max_node, [], [current_node.name])
g.replace_constant_graph_with_constant_node(qint8_const_node, tensor_name)
g.replace_constant_graph_with_constant_node(min_node, current_node.input[5])
g.replace_constant_graph_with_constant_node(max_node, current_node.input[6])

def inspect_weight_and_bias(self, node_list, graph_def, graph_info, graph_node_name_mapping):
"""Inspect the weights and biases."""
import tensorflow as tf
Expand Down
131 changes: 6 additions & 125 deletions neural_compressor/tensorflow/algorithms/static_quant/keras.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,46 +90,13 @@ def __init__(self, framework_specific_info):
os.mkdir(DEFAULT_WORKSPACE)
self.tmp_dir = (DEFAULT_WORKSPACE + "tmp_model.keras") if self.keras3 else (DEFAULT_WORKSPACE + "tmp_model")

def _check_itex(self):
"""Check if the Intel® Extension for TensorFlow has been installed."""
try:
import intel_extension_for_tensorflow
except:
raise ImportError(
"The Intel® Extension for TensorFlow is not installed. "
"Please install it to run models on ITEX backend"
)

def convert_bf16(self):
"""Execute the BF16 conversion."""
tf.keras.mixed_precision.set_global_policy("mixed_bfloat16")
model = self.pre_optimized_model

for layer in model.layers:
if layer.name in self.bf16_ops:
layer.dtype = "mixed_bfloat16"

model.save(self.tmp_dir)
converted_model = tf.keras.models.load_model(self.tmp_dir)
tf.keras.mixed_precision.set_global_policy("float32")

return converted_model

# (TODO) choose the properly quantize mode
def _check_quantize_mode(self, model):
"""Check what quantize mode to use."""
for layer in model.layers:
if "ReLU" in layer.__class__.__name__:
return "MIN_FIRST"
return "SCALED"

def _set_weights(self, qmodel, layer_weights):
"""Set fp32 weights to qmodel."""
for qlayer in qmodel.layers:
if qlayer.get_weights():
if qlayer.name in layer_weights:
qlayer.set_weights(layer_weights[qlayer.name])
else:
else: # pragma: no cover
hit_layer = False
for sub_layer in qlayer.submodules:
if sub_layer.name in layer_weights:
Expand Down Expand Up @@ -164,7 +131,7 @@ def _check_quantize_format(self, model):
self.conv_format[layer.name] = "u8"
break

def _fuse_bn_keras3(self, fuse_conv_bn, fp32_layers):
def _fuse_bn_keras3(self, fuse_conv_bn, fp32_layers): # pragma: no cover
fuse_layers = []
fused_bn_name = ""
for idx, layer in enumerate(fp32_layers):
Expand Down Expand Up @@ -211,7 +178,7 @@ def _fuse_bn_keras3(self, fuse_conv_bn, fp32_layers):

return fuse_layers

def _fuse_bn_keras2(self, fuse_conv_bn, fp32_layers):
def _fuse_bn_keras2(self, fuse_conv_bn, fp32_layers): # pragma: no cover
fuse_layers = []
for idx, layer in enumerate(fp32_layers):
if hasattr(layer, "_inbound_nodes"):
Expand Down Expand Up @@ -272,7 +239,7 @@ def _fuse_bn_keras2(self, fuse_conv_bn, fp32_layers):

return fuse_layers

def _fuse_bn(self, model):
def _fuse_bn(self, model): # pragma: no cover
"""Fusing Batch Normalization."""
model.save(self.tmp_dir)
fuse_bn_model = tf.keras.models.load_model(self.tmp_dir)
Expand Down Expand Up @@ -362,14 +329,6 @@ def quantize(self, quant_config, model, dataloader, iteration, q_func=None):
tune_cfg = converter.parse_to_tune_cfg()
self.tuning_cfg_to_fw(tune_cfg)

# just convert the input model to mixed_bfloat16
if self.bf16_ops and not self.quantize_config["op_wise_config"]:
converted_model = self.convert_bf16()
return converted_model

# if self.backend == "itex":
# self._check_itex()

logger.debug("Dump quantization configurations:")
logger.debug(self.quantize_config)
calib_sampling_size = tune_cfg.get("calib_sampling_size", 1)
Expand Down Expand Up @@ -469,59 +428,6 @@ def _calibrate(self, model, dataloader, calib_interation):

return quantized_model

@dump_elapsed_time(customized_msg="Model inference")
def evaluate(
self,
model,
dataloader,
postprocess=None,
metrics=None,
measurer=None,
iteration=-1,
fp32_baseline=False,
):
"""The function is used to run evaluation on validation dataset.

Args:
model (object): The model to do calibration.
dataloader (generator): generate the data and labels.
postprocess (object, optional): process the result from the model
metric (object, optional): Depends on model category. Defaults to None.
measurer (object, optional): for precise benchmark measurement.
iteration(int, optional): control steps of mini-batch
fp32_baseline (boolean, optional): only for compare_label=False pipeline
"""
# use keras object
keras_model = model.model
logger.info("Start to evaluate the Keras model.")
results = []
for idx, (inputs, labels) in enumerate(dataloader):
# use predict on batch
if measurer is not None:
measurer.start()
predictions = keras_model.predict_on_batch(inputs)
measurer.end()
else:
predictions = keras_model.predict_on_batch(inputs)

if self.fp32_preds_as_label:
self.fp32_results.append(predictions) if fp32_baseline else results.append(predictions)

if postprocess is not None:
predictions, labels = postprocess((predictions, labels))
if metrics:
for metric in metrics:
if not hasattr(metric, "compare_label") or (
hasattr(metric, "compare_label") and metric.compare_label
):
metric.update(predictions, labels)
if idx + 1 == iteration:
break

acc = 0 if metrics is None else [metric.result() for metric in metrics]

return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0]

def query_fw_capability(self, model):
"""The function is used to return framework tuning capability.

Expand Down Expand Up @@ -621,7 +527,7 @@ def tuning_cfg_to_fw(self, tuning_cfg):
for each_op_info in tuning_cfg["op"]:
op_name = each_op_info[0]

if tuning_cfg["op"][each_op_info]["activation"]["dtype"] == "bf16":
if tuning_cfg["op"][each_op_info]["activation"]["dtype"] == "bf16": # pragma: no cover
if each_op_info[1] in bf16_type:
bf16_ops.append(op_name)
continue
Expand Down Expand Up @@ -693,31 +599,6 @@ def _get_specified_version_cfg(self, data):

return default_config

def get_version(self):
"""Get the current backend version information.

Returns:
[string]: version string.
"""
return self.cur_config["version"]["name"]

def get_precisions(self):
"""Get supported precisions for current backend.

Returns:
[string list]: the precisions' name.
"""
return self.cur_config["precisions"]["names"]

def get_op_types(self):
"""Get the supported op types by all precisions.

Returns:
[dictionary list]: A list composed of dictionary which key is precision
and value is the op types.
"""
return self.cur_config["ops"]

def get_quantization_capability(self):
"""Get the supported op types' quantization capability.

Expand Down Expand Up @@ -846,7 +727,7 @@ def _parse_inputs(self, BN_fused_layers=None, conv_names=None):

try:
model_input = self.model.input
except ValueError:
except ValueError: # pragma: no cover
model_input = self.model.inputs[0]

return input_layer_dict, model_input
Expand Down
Loading