Skip to content
1 change: 1 addition & 0 deletions tensorrt_llm/_torch/auto_deploy/custom_ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .mla import *
from .quant import *
from .torch_attention import *
from .torch_backend_attention import *
from .torch_moe import *
from .torch_rope import *
from .triton_attention import *
Expand Down
2 changes: 2 additions & 0 deletions tensorrt_llm/_torch/auto_deploy/custom_ops/torch_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import torch.nn as nn
import torch.nn.functional as F

# TODO (chenghao): Remove related kernels once we have a backend-specific implementation for attention.


@torch.library.custom_op("auto_deploy::torch_attention_repeat_kv", mutates_args=())
def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
Expand Down
Loading