Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
update config
Signed-off-by: Kaihui-intel <[email protected]>
  • Loading branch information
Kaihui-intel committed Jul 23, 2024
commit 650d4356fc01b38ede2463835b695e2e41016cea
4 changes: 2 additions & 2 deletions neural_compressor/adaptor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4926,7 +4926,7 @@ def autoround_quantize(self, model, tune_cfg, dataloader):
act_group_size = self.recipes["autoround_args"].get("act_group_size", None)
act_sym = self.recipes["autoround_args"].get("act_sym", None)
act_dynamic = self.recipes["autoround_args"].get("act_dynamic", True)
multimodal = self.recipes["autoround_args"].get("multimodal", False)
quant_block_list = self.recipes["autoround_args"].get("quant_block_list", None)
use_layer_wise = self.recipes["autoround_args"].get("use_layer_wise", False)

if dataloader is not None:
Expand Down Expand Up @@ -4959,7 +4959,7 @@ def autoround_quantize(self, model, tune_cfg, dataloader):
dynamic_max_gap=dynamic_max_gap,
data_type=data_type,
scale_dtype=scale_dtype,
multimodal=multimodal,
quant_block_list=quant_block_list,
act_bits=act_bits,
act_group_size=act_group_size,
act_sym=act_sym,
Expand Down
6 changes: 3 additions & 3 deletions neural_compressor/adaptor/torch_utils/weight_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,7 @@ def autoround_quantize(
dynamic_max_gap: int = -1,
data_type: str = "int", ##only support int for now
scale_dtype: str = "fp16",
multimodal: bool = False,
quant_block_list: list = None,
act_bits: int = 32,
act_group_size: int = None,
act_sym: bool = None,
Expand Down Expand Up @@ -761,7 +761,7 @@ def autoround_quantize(
data_type (str): The data type to be used (default is "int").
scale_dtype (str): The data type of quantization scale to be used (default is "float32"), different kernels
have different choices.
multimodal(bool): Enable multimodal model quantization, (default is "False").
quant_block_list (list): A list whose elements are list of block's layer names to be quantized.
act_bits (int): Number of bits for activation quantization. Default is 32.
act_group_size (int): Group size for activation quantization. Default is None.
act_sym (bool): Whether to use symmetric activation quantization. Default is None.
Expand Down Expand Up @@ -800,7 +800,7 @@ def autoround_quantize(
dynamic_max_gap=dynamic_max_gap,
data_type=data_type, ## only support data_type
scale_dtype=scale_dtype,
multimodal=multimodal,
quant_block_list=quant_block_list,
act_bits=act_bits,
act_group_size=act_group_size,
act_sym=act_sym,
Expand Down
8 changes: 4 additions & 4 deletions neural_compressor/torch/algorithms/weight_only/autoround.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def __init__(
dynamic_max_gap: int = -1,
data_type: str = "int",
scale_dtype: str = "fp16",
multimodal: bool = False,
quant_block_list: list = None,
act_bits: int = 32,
act_group_size: int = None,
act_sym: bool = None,
Expand Down Expand Up @@ -112,7 +112,7 @@ def __init__(
data_type (str): The data type to be used (default is "int").
scale_dtype (str): The data type of quantization scale to be used (default is "float16"), different kernels
have different choices.
multimodal(bool): Enable multimodal model quantization, (default is "False").
quant_block_list (list): A list whose elements are list of block's layer names to be quantized.
act_bits (int): Number of bits for activation quantization. Default is 32.
act_group_size (int): Group size for activation quantization. Default is None.
act_sym (bool): Whether to use symmetric activation quantization. Default is None.
Expand Down Expand Up @@ -144,7 +144,7 @@ def __init__(
self.dynamic_max_gap = dynamic_max_gap
self.data_type = data_type
self.scale_dtype = scale_dtype
self.multimodal = multimodal
self.quant_block_list = quant_block_list
self.act_bits = act_bits
self.act_group_size = act_group_size
self.act_sym = act_sym
Expand Down Expand Up @@ -191,7 +191,7 @@ def convert(self, model: torch.nn.Module, *args, **kwargs):
dynamic_max_gap=self.dynamic_max_gap,
data_type=self.data_type,
scale_dtype=self.scale_dtype,
multimodal=self.multimodal,
quant_block_list=self.quant_block_list,
act_bits=self.act_bits,
act_group_size=self.act_group_size,
act_sym=self.act_sym,
Expand Down
4 changes: 2 additions & 2 deletions neural_compressor/torch/quantization/algorithm_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,7 +600,7 @@ def autoround_quantize_entry(
not_use_best_mse = quant_config.not_use_best_mse
dynamic_max_gap = quant_config.dynamic_max_gap
scale_dtype = quant_config.scale_dtype
multimodal = quant_config.multimodal
quant_block_list = quant_config.quant_block_list
low_cpu_mem_usage = quant_config.use_layer_wise

kwargs.pop("example_inputs")
Expand All @@ -627,7 +627,7 @@ def autoround_quantize_entry(
not_use_best_mse=not_use_best_mse,
dynamic_max_gap=dynamic_max_gap,
scale_dtype=scale_dtype,
multimodal=multimodal,
quant_block_list=quant_block_list,
low_cpu_mem_usage=low_cpu_mem_usage,
)
model = quantizer.execute(model=model, mode=mode, *args, **kwargs)
Expand Down
6 changes: 3 additions & 3 deletions neural_compressor/torch/quantization/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,7 @@ def __init__(
dynamic_max_gap: int = -1,
scale_dtype: str = "fp16",
use_layer_wise: bool = False,
multimodal: bool = False,
quant_block_list: list = None,
white_list: Optional[List[OP_NAME_OR_MODULE_TYPE]] = DEFAULT_WHITE_LIST,
):
"""Init AUTOROUND weight-only quantization config.
Expand Down Expand Up @@ -807,7 +807,7 @@ def __init__(
scale_dtype (str): The data type of quantization scale to be used (default is "float16"), different kernels
have different choices.
use_layer_wise (bool): Enables quantize model per layer. Defaults to False.
multimodal(bool): Enable multimodal model quantization, (default is "False").
quant_block_list (list): A list whose elements are list of block's layer names to be quantized.
"""
super().__init__(white_list=white_list)
self.dtype = dtype
Expand Down Expand Up @@ -837,7 +837,7 @@ def __init__(
self.dynamic_max_gap = dynamic_max_gap
self.scale_dtype = scale_dtype
self.use_layer_wise = use_layer_wise
self.multimodal = multimodal
self.quant_block_list = quant_block_list
self._post_init()

@classmethod
Expand Down