fix for sq

Signed-off-by: Cheng, Zixuan <[email protected]>
intel · violetch24 · Jun 14, 2024 · Jun 9, 2024 · Jun 11, 2024 · Jun 11, 2024
commit 7236eb2a6df9d16f7ab602ca5b0bc74b418bfbab
diff --git a/neural_compressor/torch/algorithms/smooth_quant/smooth_quant.py b/neural_compressor/torch/algorithms/smooth_quant/smooth_quant.py
@@ -82,13 +82,14 @@ def prepare(self, model, example_inputs, inplace=True, *args, **kwargs):
             model.output_tensor_id_op_name,
         )
 
-        # Update json file in ipex_config_path
-        cfg_to_qconfig(self.quant_config, cfgs, op_infos_from_cfgs, output_tensor_id_op_name)
-        model.eval()
-
         # check smoothquant alpha and act_algo value
         recipe_cfgs = self.quant_config.get("recipe_cfgs", None)
         alpha = recipe_cfgs["smooth_quant_args"]["alpha"]
+
+        # Update json file in ipex_config_path
+        cfg_to_qconfig(self.quant_config, cfgs, op_infos_from_cfgs, output_tensor_id_op_name, alpha, smooth_quant=True)
+        model.eval()
+
         for op, _ in self.quant_config["op"].items():
             act_algo = self.quant_config["op"][op]["activation"]["algorithm"]
 

diff --git a/neural_compressor/torch/algorithms/smooth_quant/utility.py b/neural_compressor/torch/algorithms/smooth_quant/utility.py
@@ -164,7 +164,7 @@ def get_quantizable_ops_recursively(model, example_inputs, alpha, act_algo, inpl
 
 
 def check_cfg_and_qconfig(
-    tune_cfg, cfgs, op_infos_from_cfgs, output_tensor_ids_op_name, smooth_quant=False
+    tune_cfg, cfgs, op_infos_from_cfgs, output_tensor_ids_op_name, alpha, smooth_quant=True
 ):  # pragma: no cover
     """Check configs and quantization configs.
 
@@ -205,7 +205,7 @@ def check_cfg_and_qconfig(
                         else:
                             smooth_quant_enable = False
                         activation_observer = generate_activation_observer(
-                            inc_scheme, inc_algorithm, smooth_quant, smooth_quant_enable
+                            inc_scheme, inc_algorithm, smooth_quant, smooth_quant_enable, alpha
                         )
                         if not smooth_quant:
                             if inc_scheme == "sym":
@@ -241,7 +241,7 @@ def check_cfg_and_qconfig(
 
 
 def cfg_to_qconfig(
-    tune_cfg, cfgs, op_infos_from_cfgs, output_tensor_id_op_name, smooth_quant=False
+    tune_cfg, cfgs, op_infos_from_cfgs, output_tensor_id_op_name, alpha, smooth_quant=True
 ):  # pragma: no cover
     assert cfgs is not None, "No configure for IPEX int8 model..."
     op_infos = copy.deepcopy(op_infos_from_cfgs)

diff --git a/neural_compressor/torch/algorithms/static_quant/utility.py b/neural_compressor/torch/algorithms/static_quant/utility.py
@@ -157,7 +157,9 @@ def check_cfg_and_qconfig(user_cfg, cfgs, op_infos_from_cfgs, output_tensor_ids_
     return cfgs, user_cfg
 
 
-def generate_activation_observer(scheme, algorithm, smooth_quant=False, smooth_quant_enable=False):  # pragma: no cover
+def generate_activation_observer(
+    scheme, algorithm, smooth_quant=False, smooth_quant_enable=False, alpha=0.5
+):  # pragma: no cover
     """This is a helper method to generate an activation observer.
 
     Args:
@@ -193,7 +195,7 @@ def generate_activation_observer(scheme, algorithm, smooth_quant=False, smooth_q
         "reduce_range": False,
         "quant_min": 0,
         "quant_max": 255,
-        "alpha": 0.5,
+        "alpha": 0.5 if alpha == "auto" else alpha,
         "act_observer": kl_activation_observer,
         "act_ic_observer": {
             "name": "PerChannelMinMaxObserver",
@@ -213,7 +215,7 @@ def generate_activation_observer(scheme, algorithm, smooth_quant=False, smooth_q
         "reduce_range": False,
         "quant_min": 0,
         "quant_max": 255,
-        "alpha": 0.5,
+        "alpha": 0.5 if alpha == "auto" else alpha,
         "act_observer": minmax_activation_observer,
         "act_ic_observer": {
             "name": "PerChannelMinMaxObserver",