Add failing HF tests

Signed-off-by: Peter St. John <[email protected]>
NVIDIA · pstjohn · Oct 16, 2025 · Oct 16, 2025 · 48dbeadc9698c673bf3c3979bf8849df91a0d14f
commit 48dbeadc9698c673bf3c3979bf8849df91a0d14f
diff --git a/tests/pytorch/test_hf_integration.py b/tests/pytorch/test_hf_integration.py
@@ -2,11 +2,15 @@
 #
 # See LICENSE for license information.
 
+from unittest import mock
 import pytest
-from transformers.configuration_utils import PretrainedConfig
-from transformers.modeling_utils import PreTrainedModel
+from transformer_engine.pytorch import TransformerLayer, quantized_model_init, QuantizedTensor
 
-from transformer_engine.pytorch import TransformerLayer
+# I think this API is changing quite fast, looks like ToT torchao and transformers are incompatible.
+# (pstjohn, 10/16/2025)
+with mock.patch("transformers.utils.is_torchao_available", return_value=False):
+    from transformers.configuration_utils import PretrainedConfig
+    from transformers.modeling_utils import PreTrainedModel
 
 
 class SimpleTEModel(PreTrainedModel):
@@ -19,21 +23,77 @@ def __init__(self, config: PretrainedConfig):
             num_attention_heads=16,
             ffn_hidden_size=1024,
             layer_number=None,
+            fuse_qkv_params=True,
+            qkv_weight_interleaved=True,
         )
 
     def forward(self, hidden_states, attention_mask):
         return self.my_layer(hidden_states, attention_mask)
 
 
-def test_save_hf_model(tmp_path):
-    model = SimpleTEModel(PretrainedConfig())
+class SimpleTEModelNoQKVFusion(PreTrainedModel):
+    config_class = PretrainedConfig
+
+    def __init__(self, config: PretrainedConfig):
+        super().__init__(config)
+        self.my_layer = TransformerLayer(
+            hidden_size=320,
+            num_attention_heads=16,
+            ffn_hidden_size=1024,
+            layer_number=None,
+            fuse_qkv_params=False,
+        )
+
+    def forward(self, hidden_states, attention_mask):
+        return self.my_layer(hidden_states, attention_mask)
+
+
+@pytest.fixture
+def checkpoint_path(tmp_path):
+    config = PretrainedConfig()
+    model = SimpleTEModel(config)
     model.save_pretrained(tmp_path / "simple_te_model")
+    return tmp_path / "simple_te_model"
 
 
-@pytest.mark.xfail(reason="This test is failing until huggingface/transformers#38155 is merged.")
-def test_save_and_load_hf_model(tmp_path):
-    model = SimpleTEModel(PretrainedConfig())
+def test_save_hf_model(tmp_path):
+    config = PretrainedConfig()
+    model = SimpleTEModel(config)
     model.save_pretrained(tmp_path / "simple_te_model")
-    del model
-    model = SimpleTEModel.from_pretrained(tmp_path / "simple_te_model")
+
+
+def test_save_and_load_hf_model(checkpoint_path):
+    model = SimpleTEModel.from_pretrained(checkpoint_path)
     assert model is not None
+
+
+def test_quantized_model_init_context_manager(checkpoint_path):
+    config = PretrainedConfig()
+    with quantized_model_init(enabled=True):
+        model = SimpleTEModel(config)
+    assert isinstance(model.my_layer.layernorm_mlp.fc1_weight, QuantizedTensor)
+
+
+def test_quantized_model_init_context_manager_no_qkv_fusion(checkpoint_path):
+    # RuntimeError: Splitting QuantizedTensor into multiple params is not supported
+    config = PretrainedConfig()
+    with quantized_model_init(enabled=True):
+        model = SimpleTEModelNoQKVFusion(config)
+    assert isinstance(model.my_layer.layernorm_mlp.fc1_weight, QuantizedTensor)
+
+
+def test_from_pretrained_with_quantized_model_init(checkpoint_path):
+    # TypeError: Float8TensorStorage.__new__() missing 3 required keyword-only arguments: 'data',
+    # 'fp8_scale_inv', and 'fp8_dtype'
+    with quantized_model_init(enabled=True):
+        model = SimpleTEModel.from_pretrained(checkpoint_path)
+    assert isinstance(model.my_layer.layernorm_mlp.fc1_weight, QuantizedTensor)
+
+
+def test_save_pretrained_with_quantized_model_init(tmp_path):
+    # RuntimeError: Attempted to access the data pointer on an invalid python storage.
+    config = PretrainedConfig()
+    with quantized_model_init(enabled=True):
+        model = SimpleTEModel(config)
+
+    model.save_pretrained(tmp_path / "simple_te_model")