diff --git a/tensorrt_llm/llmapi/llm.py b/tensorrt_llm/llmapi/llm.py index 6d3410bf3c2..ac869d765a5 100644 --- a/tensorrt_llm/llmapi/llm.py +++ b/tensorrt_llm/llmapi/llm.py @@ -491,8 +491,8 @@ def generate_async( elif 'multi_modal_embeddings' in inputs: mm_embedding_info = inputs['multi_modal_embeddings'] prompt_token_ids, extra_processed_inputs = cast( - self.input_processor, - BaseMultimodalInputProcessor).attach_multimodal_embeddings( + BaseMultimodalInputProcessor, + self.input_processor).attach_multimodal_embeddings( inputs, mm_embedding_info, sampling_params) else: with nvtx_range_debug("input_processor"):