Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
update entry
Signed-off-by: Kaihui-intel <[email protected]>
  • Loading branch information
Kaihui-intel committed Jul 15, 2024
commit cc61136b8d42a853339f854e3164a8bca3a4824a
25 changes: 16 additions & 9 deletions neural_compressor/torch/algorithms/weight_only/awq.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def _get_absorb_per_block(model, example_inputs, folding=False, weight_config={}
return block_absorb_dict, absorb_layer_dict


def _get_block_absorb_dict(model, absorb_layer_dict):
def _get_absorb_dict(model, absorb_layer_dict):
"""Get absorbed layer per block from absorbed layer dict.

Args:
Expand All @@ -101,15 +101,22 @@ def _get_block_absorb_dict(model, absorb_layer_dict):
"""
block_absorb_dict = {}
block_prefix, block_num = get_block_prefix(model)
new_absorb_layer_dict = {}
for i in range(block_num):
block_absorb_dict[i] = []
block_name = block_prefix + "." + str(i) + "."

for k, v in absorb_layer_dict.items():
if all(block_name in elem for elem in k):
block_absorb_dict[i].append(k)

if isinstance(v, str):
name_list = (block_name + v, )
else:
name_list = tuple(block_name + vv for vv in v)
block_absorb_dict[i].append(name_list)
new_absorb_layer_dict[name_list] = block_name + k
logger.debug(f"The absorbed layers per block: {block_absorb_dict}")
logger.debug(f"The absorb_layer_dict: {absorb_layer_dict}")
return block_absorb_dict
return block_absorb_dict, new_absorb_layer_dict


@torch.no_grad()
Expand Down Expand Up @@ -198,7 +205,7 @@ def quantize(self, use_auto_scale=True, use_mse_search=True, folding=False, retu
weight_config=self.weight_config,
)
else:
self.block_absorb_dict = _get_block_absorb_dict(self.model, self.absorb_layer_dict)
self.block_absorb_dict, self.absorb_layer_dict = _get_absorb_dict(self.model, self.absorb_layer_dict)
# process per block
for i, module_list in self.block_absorb_dict.items():
logger.info(f"Processing block: {i+1}/{self.block_num}")
Expand Down Expand Up @@ -519,15 +526,15 @@ def module_inference(self, model, inputs):


class AWQQuantizer(Quantizer):
def __init__(self, quant_config: OrderedDict = {}, absorb_to_layer: dict = {}):
def __init__(self, quant_config: OrderedDict = {}, absorb_layer_dict: dict = {}):
"""Init an AWQQuantizer object.

Args:
quant_config (OrderedDict, optional): quantization config for ops. Defaults to {}.
absorb_to_layer (dict): The layer dict that scale can be absorbed, default is {}.
absorb_layer_dict (dict): The layer dict that scale can be absorbed, default is {}.
"""
super().__init__(quant_config)
self.absorb_to_layer = absorb_to_layer
self.absorb_layer_dict = absorb_layer_dict

@torch.no_grad()
def prepare(self, model, *args, **kwargs):
Expand Down Expand Up @@ -596,7 +603,7 @@ def convert(
weight_config=self.quant_config,
total_block_args=total_block_args,
total_block_kwargs=total_block_kwargs,
absorb_layer_dict=self.absorb_to_layer,
absorb_layer_dict=self.absorb_layer_dict,
)
qdq_model = awq.quantize(
use_auto_scale=use_auto_scale,
Expand Down
4 changes: 2 additions & 2 deletions neural_compressor/torch/quantization/algorithm_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,15 +347,15 @@ def awq_quantize_entry(
use_mse_search = quant_config.use_auto_clip # for awq clip
folding = quant_config.folding
use_full_range = quant_config.use_full_range
absorb_to_layer = quant_config.absorb_to_layer
absorb_layer_dict = quant_config.absorb_layer_dict

run_fn = kwargs.get("run_fn", None)
run_args = kwargs.get("run_args", None)
example_inputs = kwargs.get("example_inputs", None)
assert example_inputs is not None, "Please provide example_inputs for AWQ quantization."

quantizer = get_quantizer(
model, quantizer_cls=AWQQuantizer, quant_config=weight_config, absorb_to_layer=absorb_to_layer
model, quantizer_cls=AWQQuantizer, quant_config=weight_config, absorb_layer_dict=absorb_layer_dict
)
model = quantizer.execute(
model,
Expand Down
8 changes: 4 additions & 4 deletions neural_compressor/torch/quantization/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ class AWQConfig(BaseConfig):
"use_auto_scale",
"use_auto_clip",
"folding",
"absorb_to_layer",
"absorb_layer_dict",
]
name = AWQ

Expand All @@ -452,7 +452,7 @@ def __init__(
use_auto_clip: bool = True,
folding: bool = False,
white_list: Optional[List[OP_NAME_OR_MODULE_TYPE]] = DEFAULT_WHITE_LIST,
absorb_to_layer: dict = {},
absorb_layer_dict: dict = {},
):
"""Init AWQ weight-only quantization config.

Expand All @@ -475,7 +475,7 @@ def __init__(
use_auto_clip (bool): Enables clip range search. Defaults to True.
folding(bool): Allow insert mul before linear when the scale cannot be absorbed by last layer,
default is False.
absorb_to_layer (dict): The layer dict that scale can be absorbed, default is {}.
absorb_layer_dict (dict): The layer dict that scale can be absorbed, default is {}.
"""
super().__init__(white_list=white_list)
self.dtype = dtype
Expand All @@ -496,7 +496,7 @@ def __init__(
self.use_auto_scale = use_auto_scale
self.use_auto_clip = use_auto_clip
self.folding = folding
self.absorb_to_layer = absorb_to_layer
self.absorb_layer_dict = absorb_layer_dict
self._post_init()

@classmethod
Expand Down