nv-auto-deploy · nvchenghaoz · Jul 10, 2025 · Jul 1, 2025 · Jul 8, 2025 · Jul 9, 2025
diff --git a/tensorrt_llm/_torch/auto_deploy/custom_ops/__init__.py b/tensorrt_llm/_torch/auto_deploy/custom_ops/__init__.py
@@ -8,6 +8,7 @@
 from .mla import *
 from .quant import *
 from .torch_attention import *
+from .torch_backend_attention import *
 from .torch_moe import *
 from .torch_rope import *
 from .triton_attention import *

diff --git a/tensorrt_llm/_torch/auto_deploy/custom_ops/torch_attention.py b/tensorrt_llm/_torch/auto_deploy/custom_ops/torch_attention.py
@@ -7,6 +7,8 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
+# TODO (chenghao): Remove related kernels once we have a backend-specific implementation for attention.
+
 
 @torch.library.custom_op("auto_deploy::torch_attention_repeat_kv", mutates_args=())
 def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: