Skip to content
Prev Previous commit
Remove logs and add helper func
Signed-off-by: Indrajit Bhosale <[email protected]>
  • Loading branch information
indrajit96 committed Dec 2, 2025
commit 1f152c076ecc8896cb3abff8d12e6b95a08af9c2
2 changes: 1 addition & 1 deletion components/src/dynamo/trtllm/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ class DisaggregationMode(Enum):
AGGREGATED = "prefill_and_decode"
PREFILL = "prefill"
DECODE = "decode"
ENCODE = "encode"
ENCODE = "encode"
6 changes: 4 additions & 2 deletions components/src/dynamo/trtllm/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ async def initialize(self):
self._llm = MultimodalEncoder(
model=model,
max_batch_size=max_batch_size,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of curiosity, why not forward the rest of the self.engine_args? MultimodalEncoder is also a LLM class: https://github.com/NVIDIA/TensorRT-LLM/blob/v1.2.0rc4/tensorrt_llm/llmapi/mm_encoder.py#L16

)
)
else:
self._llm = self._llm_cls(**self.engine_args)

Expand Down Expand Up @@ -106,7 +106,9 @@ def _warn_about_unsupported_field(field_name: str) -> None:


@asynccontextmanager
async def get_llm_engine(engine_args, disaggregation_mode: DisaggregationMode) -> AsyncGenerator[TensorRTLLMEngine, None]:
async def get_llm_engine(
engine_args, disaggregation_mode: DisaggregationMode
) -> AsyncGenerator[TensorRTLLMEngine, None]:
engine = TensorRTLLMEngine(engine_args, disaggregation_mode)
try:
await engine.initialize()
Expand Down
2 changes: 1 addition & 1 deletion components/src/dynamo/trtllm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from tensorrt_llm.llmapi.tokenizer import tokenizer_factory
from tensorrt_llm.metrics import MetricsCollector
from torch.cuda import device_count
from transformers import AutoConfig, GenerationConfig
from transformers import AutoConfig

import dynamo.nixl_connect as nixl_connect
from dynamo.common.config_dump import dump_config
Expand Down
4 changes: 2 additions & 2 deletions components/src/dynamo/trtllm/utils/disagg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def decode(
multimodal_embedding_handles=getattr(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe stupid question: these already default to None in the DisaggregatedParams definition from TRTLLM: https://github.com/NVIDIA/TensorRT-LLM/blob/v1.2.0rc4/tensorrt_llm/disaggregated_params.py#L37

Why not use:

dataclasses.replace(disaggregated_params, opaque_state=opaque_state)

since opaque_state seems to be the only actual difference between the input disaggergate_params and what we're returning here?

Similar question for the encode method below.

disaggregated_params, "multimodal_embedding_handles", None
),
multimodal_hashes=getattr(disaggregated_params, "multimodal_hashes", None),
multimodal_hashes=getattr(disaggregated_params, "multimodal_hashes", None),
)

@staticmethod
Expand All @@ -73,5 +73,5 @@ def encode(
draft_tokens=disaggregated_params.draft_tokens,
# E-P Disaggregated Params (for full EPD flow)
multimodal_embedding_handles=mm_handles,
multimodal_hashes=mm_hashes,
multimodal_hashes=mm_hashes,
)
Loading