Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
db16753
enable auto_round format export
WeiweiZhang1 Sep 12, 2024
1eceb6d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 12, 2024
26fe175
Update auto_round dependency to commit 5dd16fc34a974a8c2f5a4288ce72e6…
XuehaoSun Sep 12, 2024
2e67cd5
fix docscan issues
WeiweiZhang1 Sep 12, 2024
b99140c
Merge branch 'enable_autoround_format_quantization' of https://github…
WeiweiZhang1 Sep 12, 2024
a7d1431
fixtypos
WeiweiZhang1 Sep 12, 2024
8e78efc
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 12, 2024
0adc4ef
fix self.quantization_config
Kaihui-intel Sep 12, 2024
73d8c2e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 12, 2024
dc49120
Merge branch 'master' into enable_autoround_format_quantization
xin3he Sep 13, 2024
27b4f43
rm ar ut
Kaihui-intel Sep 13, 2024
46f3c76
fixtypos
WeiweiZhang1 Sep 13, 2024
28e4878
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 13, 2024
8bb25c9
Merge branch 'enable_autoround_format_quantization' of https://github…
Kaihui-intel Sep 13, 2024
c744130
revert ar ut
WeiweiZhang1 Sep 14, 2024
39d66e0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 14, 2024
79f44f4
refine UT
WeiweiZhang1 Sep 14, 2024
16a296e
refine UT
WeiweiZhang1 Sep 14, 2024
91f7985
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 14, 2024
01136d7
fix unit test
XuehaoSun Sep 14, 2024
07ae762
against code coverage issue
WeiweiZhang1 Sep 14, 2024
d3c3f39
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 14, 2024
461379a
fixtypo
WeiweiZhang1 Sep 14, 2024
7fbf186
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 14, 2024
41bfca5
fixtypo
WeiweiZhang1 Sep 14, 2024
7a72f52
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 14, 2024
f3bf7fb
fixtypo
WeiweiZhang1 Sep 14, 2024
a280b10
fixtypo
WeiweiZhang1 Sep 14, 2024
7f41ff0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fixtypos
Signed-off-by: Zhang, Weiwei1 <[email protected]>
  • Loading branch information
WeiweiZhang1 committed Sep 12, 2024
commit a7d1431902ea385d949888b3bd38e9f9493e7fa4
14 changes: 7 additions & 7 deletions neural_compressor/torch/algorithms/weight_only/save_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
device_woqlinear_mapping = {"cpu": INCWeightOnlyLinear, "hpu": HPUWeightOnlyLinear}


def save(model, output_dir="./saved_results", format="default", **kwargs):
def save(model, output_dir="./saved_results", format=LoadFormat.DEFAULT, **kwargs):
"""Save the quantized model and config to the output path.

Args:
Expand All @@ -56,7 +56,7 @@ def save(model, output_dir="./saved_results", format="default", **kwargs):
if format == "huggingface":
config = model.config
quantization_config = config.quantization_config if hasattr(config, "quantization_config") else None
if "backend" in quantization_config and "auto_round" in quantization_config["backend"]:
if "backend" in quantization_config and 'auto_round' in quantization_config['backend']:
safe_serialization = kwargs.get("safe_serialization", True)
tokenizer = kwargs.get("tokenizer", None)
max_shard_size = kwargs.get("max_shard_size", "5GB")
Expand All @@ -65,7 +65,7 @@ def save(model, output_dir="./saved_results", format="default", **kwargs):
del model.save
model.save_pretrained(output_dir, max_shard_size=max_shard_size, safe_serialization=safe_serialization)
return

qmodel_weight_file_path = os.path.join(os.path.abspath(os.path.expanduser(output_dir)), WEIGHT_NAME)
qconfig_file_path = os.path.join(os.path.abspath(os.path.expanduser(output_dir)), QCONFIG_NAME)
# saving process
Expand Down Expand Up @@ -140,7 +140,7 @@ def load_woq_model(self):
"""
if self.format == LoadFormat.HUGGINGFACE:
assert self.model_name_or_path is not None, "'model_name_or_path' can't be None."

model = self.load_hf_format_woq_model()
logger.info("Loading HuggingFace weight-only quantization model successfully.")
elif self.format == LoadFormat.DEFAULT:
Expand Down Expand Up @@ -213,7 +213,7 @@ def load_hf_format_woq_model(self):
"""
# check required package
from neural_compressor.torch.utils import is_package_available

if not is_package_available("transformers"):
raise ImportError("Loading huggingface model requires transformers: `pip install transformers`")
if not is_package_available("accelerate"):
Expand All @@ -222,10 +222,9 @@ def load_hf_format_woq_model(self):
# get model class and config
model_class, config = self._get_model_class_and_config()
quantization_config = config.quantization_config if hasattr(config, "quantization_config") else None
if "backend" in quantization_config and "auto_round" in quantization_config["backend"]:
if "backend" in quantization_config and 'auto_round' in quantization_config['backend']:
# load autoround format quantized model
from auto_round import AutoRoundConfig

model = model_class.from_pretrained(self.model_name_or_path)
return model
# get loaded state_dict
Expand Down Expand Up @@ -889,3 +888,4 @@ def _use_hpu_module(self): # pragma: no cover
if os.path.exists(os.path.join(self._model_local_dir, HPU_WEIGHT_NAME)):
return True
return False