We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent cc798b2 commit 3e4eb53Copy full SHA for 3e4eb53
flash_rl/flash_quantization.py
@@ -115,15 +115,20 @@ def flash_quantize_fp8_tensor(weights, profile):
115
else:
116
yield (name, tensor)
117
118
+def flash_noquantize(weights, profile):
119
+ logger.debug("flash_rl quantization is called")
120
+ for name, tensor in weights:
121
+ yield (name, tensor)
122
+
123
quant_fn_map = {
124
'int8': flash_quantize,
125
'int8_fast': flash_quantize,
126
'int8_wo_prune': flash_quantize,
127
'int8_prune': flash_quantize_with_prune,
- 'fp8': lambda weights, profile: weights,
- 'fp8_vllm': lambda weights, profile: weights,
- 'fp8_fast': lambda weights, profile: weights,
- 'fp8_vllm_fast': lambda weights, profile: weights,
128
+ 'fp8': flash_noquantize,
129
+ 'fp8_vllm': flash_noquantize,
130
+ 'fp8_fast': flash_noquantize,
131
+ 'fp8_vllm_fast': flash_noquantize,
132
'fp8_tensor': flash_quantize_fp8_tensor,
133
'fp8_channel': flash_quantize_fp8_channel,
134
}
0 commit comments