update docstring

Signed-off-by: Kaihui-intel <[email protected]>
intel · XuehaoSun · Aug 19, 2024 · Aug 13, 2024 · Aug 13, 2024 · Aug 13, 2024
commit 0894f0e4b3b133342328190966299b987c32f4df
diff --git a/neural_compressor/torch/algorithms/weight_only/gptq.py b/neural_compressor/torch/algorithms/weight_only/gptq.py
@@ -213,6 +213,7 @@ def __init__(
             dataloader: an iterable containing calibration datasets, contains (inputs, targets)
             use_layer_wise (bool): Enables quantize model per layer. Defaults to False.
             model_path (str): Model path that is used to load state_dict per layer.
+            quant_lm_head (bool): Indicates whether quantize the lm_head layer in transformers. Defaults to False.
             device (str): cpu or cuda.
         """
         # model

diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py
@@ -604,7 +604,7 @@ def __init__(
             double_quant_bits (int): Number of bits used to represent double_quant scale, default is 4.
             double_quant_use_sym (bool): Indicates whether double_quant scale are symmetric, default is True.
             double_quant_group_size (int): Size of double_quant groups, default is 32.
-            quant_lm_head (bool): Indicates whether quantize the lm_head layer in transformers。 Default is False.
+            quant_lm_head (bool): Indicates whether quantize the lm_head layer in transformer, default is False.
             use_auto_scale (bool): Enables best scales search based on activation distribution, default is True.
             use_auto_clip (bool):  Enables clip range search. Defaults to True.
             folding(bool): Allow insert mul before linear when the scale cannot be absorbed by last layer,