added fast process alert

LiyuanLucasLiu · LiyuanLucasLiu · commit 1c53561ac338 · 2025-08-29T21:59:11.000-07:00
diff --git a/flash_rl/vllm_patch.py b/flash_rl/vllm_patch.py
@@ -108,7 +108,12 @@ def hacked_process_weights_after_loading(
             quant_method = getattr(module, "quant_method", None)
             if isinstance(quant_method, QuantizeMethodBase):
                 
-                if isinstance(quant_method, Fp8LinearMethod) or isinstance(quant_method, CompressedTensorsW8A8Int8):
+                if isinstance(quant_method, Fp8LinearMethod):
+                    # for fast processing, we will do manual processing later
+                    assert not quant_method.use_marlin, 'marlin (w8a16) does not support fp8_fast processing'
+                    continue
+                    
+                if isinstance(quant_method, CompressedTensorsW8A8Int8):
                     # for fast processing, we will do manual processing later
                     continue