fix layerwise woq forward

Signed-off-by: Kaihui-intel <[email protected]>
intel · Kaihui-intel · Jul 16, 2024 · Jun 25, 2024 · Jun 25, 2024 · Jun 25, 2024
commit 4ce74db461e8eb6d34462084e388b8ba113773a6
diff --git a/neural_compressor/torch/algorithms/layer_wise/utils.py b/neural_compressor/torch/algorithms/layer_wise/utils.py
@@ -25,12 +25,13 @@
 from accelerate.utils import set_module_tensor_to_device
 from transformers import AutoConfig, AutoModelForCausalLM
 from transformers.models.auto.auto_factory import _BaseAutoModelClass
+from neural_compressor.torch.algorithms.weight_only.modules import WeightOnlyLinear
 
 from neural_compressor.common import options
 
 from .load import load
 
-LWQ_WORKSPACE = os.path.join(options.workspace, "layer_wise_tmp")
+LWQ_WORKSPACE = os.path.join(options.workspace, "lwq_tmpdir")
 
 
 class QDQLayer(torch.nn.Module):
@@ -250,13 +251,17 @@ def hook(module, input):
             state_dict = None
             if os.path.exists(os.path.join(LWQ_WORKSPACE, f"{name}.pt")):
                 state_dict = torch.load(os.path.join(LWQ_WORKSPACE, f"{name}.pt"))
-            for n, p in module.named_parameters():
-                param_name = name + "." + n
-                if state_dict:
-                    value = state_dict[n]
-                else:
-                    value = load_value(model, param_name, path)
-                set_module_tensor_to_device(model, param_name, device, value)
+            if isinstance(module, WeightOnlyLinear):
+                for n, p in module._buffers.items():
+                    setattr(module, n, state_dict[n]) 
+            else:
+                for n, p in module.named_parameters():
+                    param_name = name + "." + n
+                    if state_dict:
+                        value = state_dict[n]
+                    else:
+                        value = load_value(model, param_name, path)
+                    set_module_tensor_to_device(model, param_name, device, value)
 
         return hook
 
@@ -278,13 +283,13 @@ def hook(module, input, output):
     return handle
 
 
-def clean_module_weight(module, woq_type=False):
+def clean_module_weight(module):
     if isinstance(module, QDQLayer):
         submodule = module.module
     else:
         submodule = module
 
-    if woq_type is True:
+    if isinstance(module, WeightOnlyLinear):
         for n, m in submodule._buffers.items():
             old_value = getattr(submodule, n)
             with torch.no_grad():

diff --git a/neural_compressor/torch/algorithms/weight_only/rtn.py b/neural_compressor/torch/algorithms/weight_only/rtn.py
@@ -228,7 +228,7 @@ def convert(
                 from neural_compressor.torch.algorithms.layer_wise.utils import clean_module_weight
 
                 torch.save(new_module.state_dict(), os.path.join(lwq_workspace, f"{name}.pt"))
-                clean_module_weight(new_module, woq_type=True)
+                clean_module_weight(new_module)
                 clean_module_weight(m)
                 del m
                 gc.collect()