Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
0658a83
add inc woq and remove itrex dependency
changwangss Aug 27, 2024
4955b8a
Update optimum/intel/neural_compressor/modeling_base.py
changwangss Aug 29, 2024
7fe5ac5
Update optimum/intel/neural_compressor/modeling_base.py
changwangss Aug 29, 2024
1d6797c
Update optimum/intel/neural_compressor/modeling_base.py
changwangss Aug 29, 2024
ab178e9
Update optimum/intel/neural_compressor/modeling_base.py
changwangss Aug 29, 2024
c078ca2
fix code according comment
changwangss Aug 29, 2024
c257101
add logger setting
changwangss Aug 29, 2024
d55004b
improve ut
changwangss Aug 29, 2024
fcadbac
move woq quantization to quantization.py
changwangss Sep 5, 2024
8cf22de
Update examples/neural_compressor/language-modeling/run_clm.py
changwangss Sep 5, 2024
a31fc6a
Update examples/neural_compressor/language-modeling/run_clm.py
changwangss Sep 5, 2024
3b5f228
remove dependency
changwangss Sep 5, 2024
7f8c2a2
Update examples/neural_compressor/language-modeling/run_clm.py
IlyasMoutawwakil Sep 5, 2024
6eba7c4
add woq saving and loading ut and logger info
changwangss Sep 5, 2024
2683608
Merge branch 'main' into wangchang/inc_woq
changwangss Sep 5, 2024
1401c89
set transformers version limit
changwangss Sep 5, 2024
bc3b95a
fix installation neural_compressor[pt]
changwangss Sep 6, 2024
99f797d
improve ut
changwangss Sep 6, 2024
8321a24
refactoring
echarlaix Sep 6, 2024
08091bc
Refactor
echarlaix Sep 6, 2024
09acbd9
revert
echarlaix Sep 6, 2024
28a10d9
fix datasets loading issue
changwangss Sep 9, 2024
1ad67f1
fix
echarlaix Sep 9, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix code according comment
Signed-off-by: changwangss <[email protected]>
  • Loading branch information
changwangss committed Aug 29, 2024
commit c078ca2a4902ea3e2b0c5aa689436c5792d484b3
6 changes: 4 additions & 2 deletions examples/neural_compressor/language-modeling/run_clm.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,14 @@
from datasets import load_dataset
from neural_compressor import (
DistillationConfig,
GPTQConfig,
PostTrainingQuantConfig,
QuantizationAwareTrainingConfig,
RtnConfig,
WeightPruningConfig,
)
from neural_compressor.transformers import (
GPTQConfig,
RtnConfig,
)
from transformers import (
CONFIG_MAPPING,
MODEL_FOR_CAUSAL_LM_MAPPING,
Expand Down
38 changes: 24 additions & 14 deletions optimum/intel/neural_compressor/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
from neural_compressor.transformers import GPTQConfig, RtnConfig
from neural_compressor.transformers.quantization import convert_to_quantized_model, save_low_bit
from neural_compressor.utils.pytorch import load
from packaging import version
from transformers import (
AutoConfig,
AutoModel,
Expand All @@ -52,17 +51,19 @@

from ...modeling_base import OptimizedModel
from ..utils.import_utils import (
_ipex_version,
_neural_compressor_version,
_torch_version,
is_ipex_version,
is_neural_compressor_version,
is_torch_version,
)
from .configuration import INCConfig
from .utils import QUANTIZATION_CONFIG_NAME


logger = logging.getLogger(__name__)

handler = logging.StreamHandler()
formatter = logging.Formatter("%(levelname)s - %(message)s")
handler.setFormatter(formatter)

MODEL_START_DOCSTRING = r"""
This model check the superclass documentation for the generic methods the
Expand Down Expand Up @@ -126,9 +127,13 @@ def _from_pretrained(
device_map = kwargs.get("device_map", "cpu")
use_xpu = True if device_map == torch.device("xpu") or device_map == "xpu" else False

config = kwargs.pop("config", None)

quantization_config = kwargs.pop("quantization_config", None)
if not isinstance(config, PretrainedConfig):
config, _ = AutoConfig.from_pretrained(
model_id,
return_unused_kwargs=True,
**kwargs,
)
if hasattr(config, "quantization_config"):
if config.quantization_config is None:
logger.warning(
Expand All @@ -137,13 +142,14 @@ def _from_pretrained(
)
else:
logger.info("quantization_config: {}".format(config.quantization_config))

try:
from neural_compressor.transformers.models.modeling_auto import (
_BaseQBitsAutoModelClass,
_BaseINCAutoModelClass,
)

_BaseQBitsAutoModelClass.ORIG_MODEL = cls.auto_model_class
model = _BaseQBitsAutoModelClass.load_low_bit(
_BaseINCAutoModelClass.ORIG_MODEL = cls.auto_model_class
model = _BaseINCAutoModelClass.load_low_bit(
model_id,
subfolder=subfolder,
revision=revision,
Expand All @@ -163,10 +169,14 @@ def _from_pretrained(
exit(0)
if isinstance(quantization_config, (RtnConfig, GPTQConfig)):
logger.info("Applying Weight Only Quantization.")
if version.parse(_neural_compressor_version) <= version.parse("2.6"):
raise AssertionError("Please use neural_compressor version > 2.6.")
if version.parse(_ipex_version) < version.parse("2.3.1"):
raise AssertionError("Please use intel_extension_for_pytorch version > 2.3.1.")
warnings.warn(
"Weight only quantization provided by intel_extension_for_transformers is deprecated and it is provided by INC now.",
DeprecationWarning,
)
if is_neural_compressor_version("<=", "3.0"):
raise AssertionError("Please use neural_compressor version > 3.0.")
if is_ipex_version("<", "2.3.1") and use_xpu:
raise AssertionError("Please use intel_extension_for_pytorch version >= 2.3.1.")

if use_xpu:
# TODO: if low_cpu_mem_uasge is True, gptj will have accuracy issue on CPU device.
Expand Down Expand Up @@ -207,7 +217,6 @@ def _from_pretrained(
model.eval()

if use_xpu:

assert hasattr(torch, "xpu") and torch.xpu.is_available(), "There is no xpu device in this system!"
quantization_config.update(**{"device": "xpu"})
quantization_config.post_init_xpu()
Expand Down Expand Up @@ -275,6 +284,7 @@ def _from_pretrained(
)

model_save_dir = Path(model_cache_path).parent

try:
inc_config = INCConfig.from_pretrained(model_id, subfolder=subfolder, revision=revision)
if not is_torch_version("==", inc_config.torch_version):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
QUALITY_REQUIRE = ["black~=23.1", "ruff==0.4.4"]

EXTRAS_REQUIRE = {
"neural-compressor": ["neural-compressor>=2.2.0,<3.0", "accelerate", "transformers<4.43.0"],
"neural-compressor": ["neural-compressor>3.0", "accelerate", "transformers<4.43.0"],
"openvino": ["openvino>=2023.3", "nncf>=2.11.0", "openvino-tokenizers[transformers]"],
"nncf": ["nncf>=2.11.0"],
"ipex": ["intel-extension-for-pytorch", "transformers>=4.39.0,<4.44.0"],
Expand Down