update config

Signed-off-by: Kaihui-intel <[email protected]>
intel · chensuyue · Jul 23, 2024 · Jul 19, 2024 · Jul 22, 2024 · Jul 22, 2024
commit 650d4356fc01b38ede2463835b695e2e41016cea
diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
@@ -4926,7 +4926,7 @@ def autoround_quantize(self, model, tune_cfg, dataloader):
         act_group_size = self.recipes["autoround_args"].get("act_group_size", None)
         act_sym = self.recipes["autoround_args"].get("act_sym", None)
         act_dynamic = self.recipes["autoround_args"].get("act_dynamic", True)
-        multimodal = self.recipes["autoround_args"].get("multimodal", False)
+        quant_block_list = self.recipes["autoround_args"].get("quant_block_list", None)
         use_layer_wise = self.recipes["autoround_args"].get("use_layer_wise", False)
 
         if dataloader is not None:
@@ -4959,7 +4959,7 @@ def autoround_quantize(self, model, tune_cfg, dataloader):
             dynamic_max_gap=dynamic_max_gap,
             data_type=data_type,
             scale_dtype=scale_dtype,
-            multimodal=multimodal,
+            quant_block_list=quant_block_list,
             act_bits=act_bits,
             act_group_size=act_group_size,
             act_sym=act_sym,

diff --git a/neural_compressor/adaptor/torch_utils/weight_only.py b/neural_compressor/adaptor/torch_utils/weight_only.py
@@ -706,7 +706,7 @@ def autoround_quantize(
     dynamic_max_gap: int = -1,
     data_type: str = "int",  ##only support int for now
     scale_dtype: str = "fp16",
-    multimodal: bool = False,
+    quant_block_list: list = None,
     act_bits: int = 32,
     act_group_size: int = None,
     act_sym: bool = None,
@@ -761,7 +761,7 @@ def autoround_quantize(
         data_type (str): The data type to be used (default is "int").
         scale_dtype (str): The data type of quantization scale to be used (default is "float32"), different kernels
                            have different choices.
-        multimodal(bool): Enable multimodal model quantization, (default is "False").
+        quant_block_list (list): A list whose elements are list of block's layer names to be quantized.
         act_bits (int): Number of bits for activation quantization. Default is 32.
         act_group_size (int): Group size for activation quantization. Default is None.
         act_sym (bool): Whether to use symmetric activation quantization. Default is None.
@@ -800,7 +800,7 @@ def autoround_quantize(
         dynamic_max_gap=dynamic_max_gap,
         data_type=data_type,  ## only support data_type
         scale_dtype=scale_dtype,
-        multimodal=multimodal,
+        quant_block_list=quant_block_list,
         act_bits=act_bits,
         act_group_size=act_group_size,
         act_sym=act_sym,

diff --git a/neural_compressor/torch/algorithms/weight_only/autoround.py b/neural_compressor/torch/algorithms/weight_only/autoround.py
@@ -53,7 +53,7 @@ def __init__(
         dynamic_max_gap: int = -1,
         data_type: str = "int",
         scale_dtype: str = "fp16",
-        multimodal: bool = False,
+        quant_block_list: list = None,
         act_bits: int = 32,
         act_group_size: int = None,
         act_sym: bool = None,
@@ -112,7 +112,7 @@ def __init__(
             data_type (str): The data type to be used (default is "int").
             scale_dtype (str): The data type of quantization scale to be used (default is "float16"), different kernels
                 have different choices.
-            multimodal(bool): Enable multimodal model quantization, (default is "False").
+            quant_block_list (list): A list whose elements are list of block's layer names to be quantized.
             act_bits (int): Number of bits for activation quantization. Default is 32.
             act_group_size (int): Group size for activation quantization. Default is None.
             act_sym (bool): Whether to use symmetric activation quantization. Default is None.
@@ -144,7 +144,7 @@ def __init__(
         self.dynamic_max_gap = dynamic_max_gap
         self.data_type = data_type
         self.scale_dtype = scale_dtype
-        self.multimodal = multimodal
+        self.quant_block_list = quant_block_list
         self.act_bits = act_bits
         self.act_group_size = act_group_size
         self.act_sym = act_sym
@@ -191,7 +191,7 @@ def convert(self, model: torch.nn.Module, *args, **kwargs):
             dynamic_max_gap=self.dynamic_max_gap,
             data_type=self.data_type,
             scale_dtype=self.scale_dtype,
-            multimodal=self.multimodal,
+            quant_block_list=self.quant_block_list,
             act_bits=self.act_bits,
             act_group_size=self.act_group_size,
             act_sym=self.act_sym,

diff --git a/neural_compressor/torch/quantization/algorithm_entry.py b/neural_compressor/torch/quantization/algorithm_entry.py
@@ -600,7 +600,7 @@ def autoround_quantize_entry(
             not_use_best_mse = quant_config.not_use_best_mse
             dynamic_max_gap = quant_config.dynamic_max_gap
             scale_dtype = quant_config.scale_dtype
-            multimodal = quant_config.multimodal
+            quant_block_list = quant_config.quant_block_list
             low_cpu_mem_usage = quant_config.use_layer_wise
 
     kwargs.pop("example_inputs")
@@ -627,7 +627,7 @@ def autoround_quantize_entry(
         not_use_best_mse=not_use_best_mse,
         dynamic_max_gap=dynamic_max_gap,
         scale_dtype=scale_dtype,
-        multimodal=multimodal,
+        quant_block_list=quant_block_list,
         low_cpu_mem_usage=low_cpu_mem_usage,
     )
     model = quantizer.execute(model=model, mode=mode, *args, **kwargs)

diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py
@@ -773,7 +773,7 @@ def __init__(
         dynamic_max_gap: int = -1,
         scale_dtype: str = "fp16",
         use_layer_wise: bool = False,
-        multimodal: bool = False,
+        quant_block_list: list = None,
         white_list: Optional[List[OP_NAME_OR_MODULE_TYPE]] = DEFAULT_WHITE_LIST,
     ):
         """Init AUTOROUND weight-only quantization config.
@@ -807,7 +807,7 @@ def __init__(
             scale_dtype (str): The data type of quantization scale to be used (default is "float16"), different kernels
                         have different choices.
             use_layer_wise (bool): Enables quantize model per layer. Defaults to False.
-            multimodal(bool): Enable multimodal model quantization, (default is "False").
+            quant_block_list (list): A list whose elements are list of block's layer names to be quantized.
         """
         super().__init__(white_list=white_list)
         self.dtype = dtype
@@ -837,7 +837,7 @@ def __init__(
         self.dynamic_max_gap = dynamic_max_gap
         self.scale_dtype = scale_dtype
         self.use_layer_wise = use_layer_wise
-        self.multimodal = multimodal
+        self.quant_block_list = quant_block_list
         self._post_init()
 
     @classmethod