fixtypos

Signed-off-by: Zhang, Weiwei1 <[email protected]>
intel · XuehaoSun · Sep 14, 2024 · Sep 12, 2024 · Sep 12, 2024 · Sep 12, 2024
commit a7d1431902ea385d949888b3bd38e9f9493e7fa4
diff --git a/neural_compressor/torch/algorithms/weight_only/save_load.py b/neural_compressor/torch/algorithms/weight_only/save_load.py
@@ -40,7 +40,7 @@
 device_woqlinear_mapping = {"cpu": INCWeightOnlyLinear, "hpu": HPUWeightOnlyLinear}
 
 
-def save(model, output_dir="./saved_results", format="default", **kwargs):
+def save(model, output_dir="./saved_results", format=LoadFormat.DEFAULT, **kwargs):
     """Save the quantized model and config to the output path.
 
     Args:
@@ -56,7 +56,7 @@ def save(model, output_dir="./saved_results", format="default", **kwargs):
     if format == "huggingface":
         config = model.config
         quantization_config = config.quantization_config if hasattr(config, "quantization_config") else None
-        if "backend" in quantization_config and "auto_round" in quantization_config["backend"]:
+        if "backend" in quantization_config and 'auto_round' in quantization_config['backend']:
             safe_serialization = kwargs.get("safe_serialization", True)
             tokenizer = kwargs.get("tokenizer", None)
             max_shard_size = kwargs.get("max_shard_size", "5GB")
@@ -65,7 +65,7 @@ def save(model, output_dir="./saved_results", format="default", **kwargs):
             del model.save
             model.save_pretrained(output_dir, max_shard_size=max_shard_size, safe_serialization=safe_serialization)
             return
-
+    
     qmodel_weight_file_path = os.path.join(os.path.abspath(os.path.expanduser(output_dir)), WEIGHT_NAME)
     qconfig_file_path = os.path.join(os.path.abspath(os.path.expanduser(output_dir)), QCONFIG_NAME)
     # saving process
@@ -140,7 +140,7 @@ def load_woq_model(self):
         """
         if self.format == LoadFormat.HUGGINGFACE:
             assert self.model_name_or_path is not None, "'model_name_or_path' can't be None."
-
+            
             model = self.load_hf_format_woq_model()
             logger.info("Loading HuggingFace weight-only quantization model successfully.")
         elif self.format == LoadFormat.DEFAULT:
@@ -213,7 +213,7 @@ def load_hf_format_woq_model(self):
         """
         # check required package
         from neural_compressor.torch.utils import is_package_available
-
+        
         if not is_package_available("transformers"):
             raise ImportError("Loading huggingface model requires transformers: `pip install transformers`")
         if not is_package_available("accelerate"):
@@ -222,10 +222,9 @@ def load_hf_format_woq_model(self):
         # get model class and config
         model_class, config = self._get_model_class_and_config()
         quantization_config = config.quantization_config if hasattr(config, "quantization_config") else None
-        if "backend" in quantization_config and "auto_round" in quantization_config["backend"]:
+        if "backend" in quantization_config and 'auto_round' in quantization_config['backend']:
             # load autoround format quantized model
             from auto_round import AutoRoundConfig
-
             model = model_class.from_pretrained(self.model_name_or_path)
             return model
         # get loaded state_dict
@@ -889,3 +888,4 @@ def _use_hpu_module(self):  # pragma: no cover
                 if os.path.exists(os.path.join(self._model_local_dir, HPU_WEIGHT_NAME)):
                     return True
         return False
+