fix act_algo

Signed-off-by: Cheng, Zixuan <[email protected]>
intel · violetch24 · Jun 14, 2024 · Jun 9, 2024 · Jun 11, 2024 · Jun 11, 2024
commit 959170d961a2b8eca1b9bbf95c13080059f00f03
diff --git a/neural_compressor/torch/algorithms/static_quant/static_quant.py b/neural_compressor/torch/algorithms/static_quant/static_quant.py
@@ -85,7 +85,15 @@ def prepare(self, model, example_inputs, inplace=True, *args, **kwargs):
             from torch.ao.quantization import MinMaxObserver, PerChannelMinMaxObserver, QConfig
 
             if ipex_ver.release >= Version("2.1").release:
-                static_qconfig = ipex.quantization.default_static_qconfig_mapping
+                # HistogramObserver will cause a performance issue.
+                # static_qconfig = ipex.quantization.default_static_qconfig_mapping
+                qconfig = QConfig(
+                    activation=MinMaxObserver.with_args(qscheme=torch.per_tensor_affine, dtype=torch.quint8),
+                    weight=PerChannelMinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_channel_symmetric),
+                )
+                from torch.ao.quantization import QConfigMapping
+
+                static_qconfig = QConfigMapping().set_global(qconfig)
             else:
                 static_qconfig = QConfig(
                     activation=MinMaxObserver.with_args(qscheme=torch.per_tensor_affine, dtype=torch.quint8),

diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py
@@ -980,7 +980,7 @@ def __init__(
         act_dtype: str = "uint8",
         act_sym: bool = False,
         act_granularity: str = "per_tensor",
-        act_algo: str = "kl",
+        act_algo: str = "minmax",
         excluded_precisions: list = [],
         white_list: Optional[List[OP_NAME_OR_MODULE_TYPE]] = DEFAULT_WHITE_LIST,
     ):