Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Aug 26, 2024
commit 5304d4192de71afb58f56aaac055243efd175c83
5 changes: 1 addition & 4 deletions neural_compressor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,7 @@
QuantizationAwareTrainingConfig,
MixedPrecisionConfig,
)
from .transformers import(
GPTQConfig,
RtnConfig
)
from .transformers import GPTQConfig, RtnConfig
from .contrib import *
from .model import *
from .metric import *
Expand Down
14 changes: 14 additions & 0 deletions neural_compressor/transformers/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,15 @@
# Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .utils.quantization_config import GPTQConfig, RtnConfig
14 changes: 14 additions & 0 deletions neural_compressor/transformers/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,15 @@
# Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .modeling_auto import _BaseQBitsAutoModelClass
32 changes: 15 additions & 17 deletions neural_compressor/transformers/models/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,29 +41,30 @@
import torch
import transformers
from accelerate import init_empty_weights

from transformers import AutoConfig
from transformers.configuration_utils import PretrainedConfig
from transformers.modeling_utils import load_state_dict
from transformers.utils import has_file, is_safetensors_available
from neural_compressor.transformers.quantization.utils import save_low_bit, replace_linear
from neural_compressor.torch.algorithms.weight_only.modules import INCWeightOnlyLinear

from ..quantization.utils import (
convert_dtype_torch2str,
replace_linear,
from transformers.utils import (
SAFE_WEIGHTS_INDEX_NAME,
SAFE_WEIGHTS_NAME,
WEIGHTS_INDEX_NAME,
WEIGHTS_NAME,
has_file,
is_safetensors_available,
)

from neural_compressor.torch.algorithms.weight_only.modules import INCWeightOnlyLinear
from neural_compressor.torch.utils import is_ipex_available
from neural_compressor.transformers import GPTQConfig, RtnConfig
from neural_compressor.utils.utility import LazyImport, CpuInfo
from neural_compressor.transformers.quantization.utils import replace_linear, save_low_bit
from neural_compressor.utils import logger
from neural_compressor.torch.utils import is_ipex_available
from transformers.utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, WEIGHTS_INDEX_NAME, WEIGHTS_NAME
from neural_compressor.utils.utility import CpuInfo, LazyImport

from ..quantization.utils import convert_dtype_torch2str, replace_linear

torch = LazyImport("torch")



def build_woq_model(model, quantization_config):
from neural_compressor.adaptor.torch_utils.util import set_module

Expand Down Expand Up @@ -523,9 +524,7 @@ def load_low_bit(cls, pretrained_model_name_or_path, *model_args, **kwargs):
else:
logger.warning("bits number only supports 4, 8.")
quantization_config.weight_dtype = "int4"
logger.warning(
"int4 weight_dtype is used, please change the config.json if you don't want to use it."
)
logger.warning("int4 weight_dtype is used, please change the config.json if you don't want to use it.")
else:
if quantization_config.weight_dtype not in [
"int4_fullrange",
Expand Down Expand Up @@ -554,7 +553,6 @@ def load_low_bit(cls, pretrained_model_name_or_path, *model_args, **kwargs):

model = build_woq_model(model, quantization_config)


if is_sharded:
loaded_state_dict_keys = sharded_metadata["all_checkpoint_keys"]
else:
Expand Down Expand Up @@ -708,4 +706,4 @@ class AutoModel(_BaseQBitsAutoModelClass):


class AutoModelForSeq2SeqLM(_BaseQBitsAutoModelClass):
ORIG_MODEL = transformers.AutoModelForSeq2SeqLM
ORIG_MODEL = transformers.AutoModelForSeq2SeqLM
14 changes: 14 additions & 0 deletions neural_compressor/transformers/quantization/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,15 @@
# Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .utils import convert_to_quantized_model, save_low_bit
26 changes: 12 additions & 14 deletions neural_compressor/transformers/quantization/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,25 @@
import logging
import math
import os

<<<<<<< Updated upstream

from accelerate import init_empty_weights
from datasets import load_dataset

=======
import types
import json
import types

from datasets import load_dataset

from neural_compressor.torch.algorithms.weight_only.modules import INCWeightOnlyLinear
from neural_compressor.torch.quantization import (
GPTQConfig,
RTNConfig,
convert,
prepare,
)
from neural_compressor.utils.utility import LazyImport, CpuInfo
from neural_compressor.torch.quantization import GPTQConfig, RTNConfig, convert, prepare
from neural_compressor.utils.utility import CpuInfo, LazyImport

>>>>>>> Stashed changes
from transformers import AutoTokenizer
from transformers.utils import SAFE_WEIGHTS_NAME, WEIGHTS_NAME
from transformers.utils import SAFE_WEIGHTS_NAME, WEIGHTS_NAME

from neural_compressor.torch.algorithms.weight_only.modules import INCWeightOnlyLinear as WeightOnlyLinear
from neural_compressor.torch.quantization import GPTQConfig, RTNConfig, convert, prepare
Expand Down Expand Up @@ -298,9 +297,7 @@ def _replace_linear(
or device == "auto"
>>>>>>> Stashed changes
):
from intel_extension_for_pytorch.nn.modules import (
WeightOnlyQuantizedLinear as ipex_linear,
)
from intel_extension_for_pytorch.nn.modules import WeightOnlyQuantizedLinear as ipex_linear
from intel_extension_for_pytorch.utils.weight_only_quantization import (
_convert_optimum_format_to_desired,
)
Expand Down Expand Up @@ -355,8 +352,9 @@ def _replace_linear(
)

elif device == "xpu" or device == torch.device("xpu"):
from intel_extension_for_pytorch.nn.utils._quantize_convert import \
WeightOnlyQuantizedLinear as ipex_linear # pylint: disable=E0401
from intel_extension_for_pytorch.nn.utils._quantize_convert import (
WeightOnlyQuantizedLinear as ipex_linear, # pylint: disable=E0401
)
model._modules[name] = ipex_linear(
in_features,
out_features,
Expand Down