Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
addd save and load
Signed-off-by: changwangss <[email protected]>
  • Loading branch information
changwangss committed Aug 26, 2024
commit f7dc2ef5b9ea586730ad2eab3f6f6e36efbb405a
4 changes: 4 additions & 0 deletions neural_compressor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
QuantizationAwareTrainingConfig,
MixedPrecisionConfig,
)
from .transformers import(
GPTQConfig,
RtnConfig
)
from .contrib import *
from .model import *
from .metric import *
Expand Down
1 change: 1 addition & 0 deletions neural_compressor/transformers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .utils.quantization_config import GPTQConfig, RtnConfig
1 change: 1 addition & 0 deletions neural_compressor/transformers/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .modeling_auto import _BaseQBitsAutoModelClass
711 changes: 711 additions & 0 deletions neural_compressor/transformers/models/modeling_auto.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions neural_compressor/transformers/quantization/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .utils import convert_to_quantized_model, save_low_bit
552 changes: 321 additions & 231 deletions neural_compressor/transformers/quantization/utils.py

Large diffs are not rendered by default.

16 changes: 15 additions & 1 deletion neural_compressor/transformers/utils/quantization_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,15 @@ def post_init_cpu(self):
if self.scale_dtype is not None and self.scale_dtype not in [
"fp32",
"bf16",
<<<<<<< Updated upstream
]:
raise ValueError("scale_dtype must be a string in 'fp32', 'bf16' ")
=======
"fp16"]:
raise ValueError(
"scale_dtype must be a string in 'fp32', 'bf16' "
)
>>>>>>> Stashed changes
elif self.scale_dtype is None:
self.scale_dtype = "fp32"

Expand Down Expand Up @@ -274,7 +281,10 @@ def __init__(
"modules_to_not_convert", ["lm_head", "transformer.output_layer", "embed_out"]
)
self.device = kwargs.get("device", "auto")

if self.use_layer_wise:
self.model_path = kwargs("model_path", None)
if self.model_path is None:
raise AssertionError("model_path is necessary if you would like to use_layer_wise for weight only quantization.")
def to_diff_dict(self) -> Dict[str, Any]:
"""Removes all attributes from config which correspond to the default config attributes
for better readability and serializes to a Python dictionary.
Expand Down Expand Up @@ -344,6 +354,10 @@ def __init__(
)
self.device = kwargs.get("device", "auto")
self.scheme = "sym" if self.sym else "asym"
if self.use_layer_wise:
self.model_path = kwargs("model_path", None)
if self.model_path is None:
raise AssertionError("model_path is necessary if you would like to use_layer_wise for weight only quantization.")

if isinstance(compute_dtype, torch.dtype):
self.compute_dtype = compute_dtype
Expand Down