Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Jul 15, 2024
commit 1bf8db0d1fb5881bdc535c9d97cdf4fe90ad344d
2 changes: 2 additions & 0 deletions neural_compressor/torch/algorithms/weight_only/awq.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def _get_absorb_per_block(model, example_inputs, folding=False, weight_config={}
logger.debug(f"The absorb_layer_dict: {absorb_layer_dict}")
return block_absorb_dict, absorb_layer_dict


def _get_block_absorb_dict(model, absorb_layer_dict):
"""Get absorbed layer per block from absorbed layer dict.

Expand All @@ -110,6 +111,7 @@ def _get_block_absorb_dict(model, absorb_layer_dict):
logger.debug(f"The absorb_layer_dict: {absorb_layer_dict}")
return block_absorb_dict


@torch.no_grad()
def _get_weight_scale(weight, q_group_size=-1):
org_shape = weight.shape
Expand Down
4 changes: 3 additions & 1 deletion neural_compressor/torch/quantization/algorithm_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,9 @@ def awq_quantize_entry(
example_inputs = kwargs.get("example_inputs", None)
assert example_inputs is not None, "Please provide example_inputs for AWQ quantization."

quantizer = get_quantizer(model, quantizer_cls=AWQQuantizer, quant_config=weight_config, absorb_to_layer=absorb_to_layer)
quantizer = get_quantizer(
model, quantizer_cls=AWQQuantizer, quant_config=weight_config, absorb_to_layer=absorb_to_layer
)
model = quantizer.execute(
model,
mode=mode,
Expand Down
63 changes: 43 additions & 20 deletions test/3x/torch/quantization/weight_only/test_awq.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,21 +160,46 @@ def test_quant_lm_head(self):

def test_awq_absorb_to_layer(self):
absorb_to_layer = {
('transformer.h.0.attn.q_proj', 'transformer.h.0.attn.k_proj', 'transformer.h.0.attn.v_proj', 'transformer.h.0.mlp.fc_in'): 'transformer.h.0.ln_1',
('transformer.h.0.attn.out_proj',): 'transformer.h.0.attn.out_proj',
('transformer.h.0.mlp.fc_out',): 'transformer.h.0.mlp.fc_out',
('transformer.h.1.attn.q_proj', 'transformer.h.1.attn.k_proj', 'transformer.h.1.attn.v_proj', 'transformer.h.1.mlp.fc_in'): 'transformer.h.1.ln_1',
('transformer.h.1.attn.out_proj',): 'transformer.h.1.attn.out_proj',
('transformer.h.1.mlp.fc_out',): 'transformer.h.1.mlp.fc_out',
('transformer.h.2.attn.q_proj', 'transformer.h.2.attn.k_proj', 'transformer.h.2.attn.v_proj', 'transformer.h.2.mlp.fc_in'): 'transformer.h.2.ln_1',
('transformer.h.2.attn.out_proj',): 'transformer.h.2.attn.out_proj',
('transformer.h.2.mlp.fc_out',): 'transformer.h.2.mlp.fc_out',
('transformer.h.3.attn.q_proj', 'transformer.h.3.attn.k_proj', 'transformer.h.3.attn.v_proj', 'transformer.h.3.mlp.fc_in'): 'transformer.h.3.ln_1',
('transformer.h.3.attn.out_proj',): 'transformer.h.3.attn.out_proj',
('transformer.h.3.mlp.fc_out',): 'transformer.h.3.mlp.fc_out',
('transformer.h.4.attn.q_proj', 'transformer.h.4.attn.k_proj', 'transformer.h.4.attn.v_proj', 'transformer.h.4.mlp.fc_in'): 'transformer.h.4.ln_1',
('transformer.h.4.attn.out_proj',): 'transformer.h.4.attn.out_proj',
('transformer.h.4.mlp.fc_out',): 'transformer.h.4.mlp.fc_out'
(
"transformer.h.0.attn.q_proj",
"transformer.h.0.attn.k_proj",
"transformer.h.0.attn.v_proj",
"transformer.h.0.mlp.fc_in",
): "transformer.h.0.ln_1",
("transformer.h.0.attn.out_proj",): "transformer.h.0.attn.out_proj",
("transformer.h.0.mlp.fc_out",): "transformer.h.0.mlp.fc_out",
(
"transformer.h.1.attn.q_proj",
"transformer.h.1.attn.k_proj",
"transformer.h.1.attn.v_proj",
"transformer.h.1.mlp.fc_in",
): "transformer.h.1.ln_1",
("transformer.h.1.attn.out_proj",): "transformer.h.1.attn.out_proj",
("transformer.h.1.mlp.fc_out",): "transformer.h.1.mlp.fc_out",
(
"transformer.h.2.attn.q_proj",
"transformer.h.2.attn.k_proj",
"transformer.h.2.attn.v_proj",
"transformer.h.2.mlp.fc_in",
): "transformer.h.2.ln_1",
("transformer.h.2.attn.out_proj",): "transformer.h.2.attn.out_proj",
("transformer.h.2.mlp.fc_out",): "transformer.h.2.mlp.fc_out",
(
"transformer.h.3.attn.q_proj",
"transformer.h.3.attn.k_proj",
"transformer.h.3.attn.v_proj",
"transformer.h.3.mlp.fc_in",
): "transformer.h.3.ln_1",
("transformer.h.3.attn.out_proj",): "transformer.h.3.attn.out_proj",
("transformer.h.3.mlp.fc_out",): "transformer.h.3.mlp.fc_out",
(
"transformer.h.4.attn.q_proj",
"transformer.h.4.attn.k_proj",
"transformer.h.4.attn.v_proj",
"transformer.h.4.mlp.fc_in",
): "transformer.h.4.ln_1",
("transformer.h.4.attn.out_proj",): "transformer.h.4.attn.out_proj",
("transformer.h.4.mlp.fc_out",): "transformer.h.4.mlp.fc_out",
}
quant_config = AWQConfig(absorb_to_layer=absorb_to_layer)
logger.info(f"Test AWQ with config {quant_config}")
Expand All @@ -187,7 +212,7 @@ def test_awq_absorb_to_layer(self):
calib_func(model)
model = convert(model)
out1 = model(self.example_inputs)

quant_config = AWQConfig()
logger.info(f"Test AWQ with config {quant_config}")

Expand All @@ -200,7 +225,5 @@ def test_awq_absorb_to_layer(self):
calib_func(model)
model = convert(model)
out2 = model(self.example_inputs)

assert torch.all(
out1[0].eq(out2[0])
), "The results should be equal."

assert torch.all(out1[0].eq(out2[0])), "The results should be equal."