-
Notifications
You must be signed in to change notification settings - Fork 2k
[None][feat] Add C++ RequestSpecificException #6362
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4c15b47
0433b3e
d2f20b0
d58290d
cfef7aa
0cc4731
a61c7aa
d9478fd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,15 +18,33 @@ | |
|
|
||
| #include <array> | ||
| #include <cstddef> | ||
| #include <cstdint> | ||
| #include <stdexcept> | ||
| #include <string> | ||
|
|
||
| #define NEW_TLLM_EXCEPTION(...) \ | ||
| tensorrt_llm::common::TllmException(__FILE__, __LINE__, tensorrt_llm::common::fmtstr(__VA_ARGS__).c_str()) | ||
|
|
||
| #define NEW_TLLM_REQUEST_SPECIFIC_EXCEPTION_WITH_ERROR_CODE(requestID, errorCode, ...) \ | ||
| tensorrt_llm::common::RequestSpecificException( \ | ||
| __FILE__, __LINE__, tensorrt_llm::common::fmtstr(__VA_ARGS__).c_str(), requestID, errorCode) | ||
|
|
||
| namespace tensorrt_llm::common | ||
| { | ||
|
|
||
| /// @brief Enumeration of different error codes for request-specific exceptions | ||
| enum class RequestErrorCode : uint32_t | ||
| { | ||
| // General errors (0-999) | ||
| kUNKNOWN_ERROR = 0, | ||
|
|
||
| // Network and communication errors (1000-1999) | ||
| kNETWORK_ERROR = 1000, | ||
| }; | ||
|
|
||
| /// @brief Constant for unknown request ID | ||
| static constexpr uint64_t kUNKNOWN_REQUEST_ID = static_cast<uint64_t>(-1); | ||
|
|
||
| class TllmException : public std::runtime_error | ||
| { | ||
| public: | ||
|
|
@@ -45,4 +63,21 @@ class TllmException : public std::runtime_error | |
| int mNbFrames; | ||
| }; | ||
|
|
||
| class RequestSpecificException : public std::runtime_error | ||
| { | ||
| public: | ||
| explicit RequestSpecificException( | ||
| char const* file, std::size_t line, char const* msg, uint64_t requestID, RequestErrorCode errorCode); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use |
||
|
|
||
| ~RequestSpecificException() noexcept override; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please confirm the behavior of |
||
|
|
||
| [[nodiscard]] uint64_t getRequestId() const noexcept; | ||
|
|
||
| [[nodiscard]] RequestErrorCode getErrorCode() const noexcept; | ||
|
|
||
| private: | ||
| uint64_t mRequestID; | ||
| RequestErrorCode mErrorCode; | ||
| }; | ||
|
|
||
| } // namespace tensorrt_llm::common | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,6 +29,7 @@ | |
| namespace tensorrt_llm::executor | ||
| { | ||
|
|
||
| // Forward declaration | ||
| class Serialization; | ||
|
|
||
| namespace kv_cache | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,117 @@ | ||||||||||||||||||||||||||||||||
| /* | ||||||||||||||||||||||||||||||||
| * Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved. | ||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||||||||||||||||||||||||||||||||
| * you may not use this file except in compliance with the License. | ||||||||||||||||||||||||||||||||
| * You may obtain a copy of the License at | ||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||
| * Unless required by applicable law or agreed to in writing, software | ||||||||||||||||||||||||||||||||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||||||||||||||||||||||||||||||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||||||||||||||||||||||||||||
| * See the License for the specific language governing permissions and | ||||||||||||||||||||||||||||||||
| * limitations under the License. | ||||||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| #pragma once | ||||||||||||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing include guard. Header files must use include guards named Apply this diff to add the required include guard: -#pragma once
+#ifndef TRTLLM_DATATRANSCEIVERSTATE_UTILS_H
+#define TRTLLM_DATATRANSCEIVERSTATE_UTILS_HAnd add the closing endif at the end of the file: } // namespace tensorrt_llm::executor
+
+#endif // TRTLLM_DATATRANSCEIVERSTATE_UTILS_H
🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| #include "tensorrt_llm/executor/dataTransceiverState.h" | ||||||||||||||||||||||||||||||||
| #include "tensorrt_llm/executor/serialization.h" | ||||||||||||||||||||||||||||||||
| #include <vector> | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| namespace tensorrt_llm::executor | ||||||||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| // Utility functions for creating and serializing DataTransceiverState | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||||||
| * @brief Create a serialized DataTransceiverState with socket communication state | ||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||
| * @param nbKvHeadsPerLayer Vector of number of KV heads per layer | ||||||||||||||||||||||||||||||||
| * @param sizePerHead Size of each attention head | ||||||||||||||||||||||||||||||||
| * @param tokensPerBlock Number of tokens per block | ||||||||||||||||||||||||||||||||
| * @param tensorParallelism Tensor parallelism size | ||||||||||||||||||||||||||||||||
| * @param pipelineParallelism Pipeline parallelism size | ||||||||||||||||||||||||||||||||
| * @param dataType Data type for the cache | ||||||||||||||||||||||||||||||||
| * @param socketAddresses Vector of socket addresses for communication | ||||||||||||||||||||||||||||||||
| * @param attentionType Attention type (DEFAULT or MLA) | ||||||||||||||||||||||||||||||||
| * @param kvFactor KV factor (default: 2) | ||||||||||||||||||||||||||||||||
| * @param enableAttentionDP Whether to enable attention data parallelism | ||||||||||||||||||||||||||||||||
| * @param dpRank Data parallelism rank (default: 0) | ||||||||||||||||||||||||||||||||
| * @param dpSize Data parallelism size (default: 0) | ||||||||||||||||||||||||||||||||
| * @param rank Current rank | ||||||||||||||||||||||||||||||||
| * @return std::vector<char> The serialized DataTransceiverState as bytes | ||||||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||||||
| inline std::vector<char> createDataTransceiverStateSocket(std::vector<SizeType32> const& nbKvHeadsPerLayer, | ||||||||||||||||||||||||||||||||
| SizeType32 sizePerHead, SizeType32 tokensPerBlock, SizeType32 tensorParallelism, SizeType32 pipelineParallelism, | ||||||||||||||||||||||||||||||||
| nvinfer1::DataType dataType, std::vector<std::string> const& socketAddresses, | ||||||||||||||||||||||||||||||||
| kv_cache::CacheState::AttentionType attentionType, int kvFactor, bool enableAttentionDP, int dpRank, int dpSize, | ||||||||||||||||||||||||||||||||
| int rank) | ||||||||||||||||||||||||||||||||
|
Comment on lines
+46
to
+50
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Consider adding input validation for critical parameters. Both functions accept multiple parameters that could be invalid (e.g., empty vectors, zero values for critical dimensions). Consider adding basic validation to prevent runtime errors downstream. Add validation at the beginning of both functions: inline std::vector<char> createDataTransceiverStateSocket(std::vector<SizeType32> const& nbKvHeadsPerLayer,
SizeType32 sizePerHead, SizeType32 tokensPerBlock, SizeType32 tensorParallelism, SizeType32 pipelineParallelism,
nvinfer1::DataType dataType, std::vector<std::string> const& socketAddresses,
kv_cache::CacheState::AttentionType attentionType, int kvFactor, bool enableAttentionDP, int dpRank, int dpSize,
int rank)
{
+ if (nbKvHeadsPerLayer.empty() || socketAddresses.empty() || sizePerHead == 0 || tokensPerBlock == 0)
+ {
+ throw std::invalid_argument("Invalid parameters: vectors cannot be empty and dimensions must be positive");
+ }Also applies to: 91-95 🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||||||||
| // Create CacheState using the simpler constructor | ||||||||||||||||||||||||||||||||
| kv_cache::CacheState cacheState(nbKvHeadsPerLayer, sizePerHead, tokensPerBlock, tensorParallelism, | ||||||||||||||||||||||||||||||||
| pipelineParallelism, dataType, attentionType, kvFactor, enableAttentionDP, dpRank, dpSize); | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| // Create Socket CommState | ||||||||||||||||||||||||||||||||
| std::vector<kv_cache::SocketState> socketStates; | ||||||||||||||||||||||||||||||||
| for (size_t i = 0; i < socketAddresses.size(); ++i) | ||||||||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||||||||
| kv_cache::SocketState socketState{static_cast<uint16_t>(8000 + i), socketAddresses[i]}; | ||||||||||||||||||||||||||||||||
| socketStates.emplace_back(std::move(socketState)); | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
|
Comment on lines
+58
to
+62
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Potential integer overflow and type safety concerns. There are several issues with the socket state creation:
Apply this diff to add proper bounds checking and type safety: for (size_t i = 0; i < socketAddresses.size(); ++i)
{
- kv_cache::SocketState socketState{static_cast<uint16_t>(8000 + i), socketAddresses[i]};
+ if (8000 + i > std::numeric_limits<uint16_t>::max())
+ {
+ throw std::runtime_error("Port number exceeds uint16_t range");
+ }
+ kv_cache::SocketState socketState{static_cast<uint16_t>(8000 + i), socketAddresses[i]};
socketStates.emplace_back(std::move(socketState));
}📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| kv_cache::CommState commState(std::move(socketStates), rank); | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| // Create DataTransceiverState | ||||||||||||||||||||||||||||||||
| DataTransceiverState state(std::move(cacheState), std::move(commState)); | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| // Serialize and return the serialized data | ||||||||||||||||||||||||||||||||
| return Serialization::serialize(state); | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||||||
| * @brief Create a serialized DataTransceiverState with agent communication state | ||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||
| * @param nbKvHeadsPerLayer Vector of number of KV heads per layer | ||||||||||||||||||||||||||||||||
| * @param sizePerHead Size of each attention head | ||||||||||||||||||||||||||||||||
| * @param tokensPerBlock Number of tokens per block | ||||||||||||||||||||||||||||||||
| * @param tensorParallelism Tensor parallelism size | ||||||||||||||||||||||||||||||||
| * @param pipelineParallelism Pipeline parallelism size | ||||||||||||||||||||||||||||||||
| * @param dataType Data type for the cache | ||||||||||||||||||||||||||||||||
| * @param agentNames Vector of agent names for communication | ||||||||||||||||||||||||||||||||
| * @param attentionType Attention type (DEFAULT or MLA) | ||||||||||||||||||||||||||||||||
| * @param kvFactor KV factor (default: 2) | ||||||||||||||||||||||||||||||||
| * @param enableAttentionDP Whether to enable attention data parallelism | ||||||||||||||||||||||||||||||||
| * @param dpRank Data parallelism rank (default: 0) | ||||||||||||||||||||||||||||||||
| * @param dpSize Data parallelism size (default: 0) | ||||||||||||||||||||||||||||||||
| * @param rank Current rank | ||||||||||||||||||||||||||||||||
| * @return std::vector<char> The serialized DataTransceiverState as bytes | ||||||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||||||
| inline std::vector<char> createDataTransceiverStateAgent(std::vector<SizeType32> const& nbKvHeadsPerLayer, | ||||||||||||||||||||||||||||||||
| SizeType32 sizePerHead, SizeType32 tokensPerBlock, SizeType32 tensorParallelism, SizeType32 pipelineParallelism, | ||||||||||||||||||||||||||||||||
| nvinfer1::DataType dataType, std::vector<std::string> const& agentNames, | ||||||||||||||||||||||||||||||||
| kv_cache::CacheState::AttentionType attentionType, int kvFactor, bool enableAttentionDP, int dpRank, int dpSize, | ||||||||||||||||||||||||||||||||
| int rank) | ||||||||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||||||||
| // Create CacheState using the simpler constructor | ||||||||||||||||||||||||||||||||
| kv_cache::CacheState cacheState(nbKvHeadsPerLayer, sizePerHead, tokensPerBlock, tensorParallelism, | ||||||||||||||||||||||||||||||||
| pipelineParallelism, dataType, attentionType, kvFactor, enableAttentionDP, dpRank, dpSize); | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| // Create Agent CommState | ||||||||||||||||||||||||||||||||
| std::vector<kv_cache::AgentState> agentStates; | ||||||||||||||||||||||||||||||||
| for (size_t i = 0; i < agentNames.size(); ++i) | ||||||||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||||||||
| agentStates.emplace_back(agentNames[i], "127.0.0.1:" + std::to_string(8000 + i)); | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
|
Comment on lines
+103
to
+106
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Potential integer overflow in agent connection info. Similar to the socket version, there's a potential overflow when converting Apply this diff to add bounds checking: for (size_t i = 0; i < agentNames.size(); ++i)
{
+ if (8000 + i > std::numeric_limits<uint16_t>::max())
+ {
+ throw std::runtime_error("Port number exceeds uint16_t range");
+ }
agentStates.emplace_back(agentNames[i], "127.0.0.1:" + std::to_string(8000 + i));
}📝 Committable suggestion
Suggested change
|
||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| kv_cache::CommState commState(std::move(agentStates), rank); | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| // Create DataTransceiverState | ||||||||||||||||||||||||||||||||
| DataTransceiverState state(std::move(cacheState), std::move(commState)); | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| // Serialize and return the serialized data | ||||||||||||||||||||||||||||||||
| return Serialization::serialize(state); | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| } // namespace tensorrt_llm::executor | ||||||||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| /* | ||
| * Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| // THIS FILE IS AUTO GENERATED FROM cmake/templates/version.h. DO NOT EDIT. | ||
|
|
||
| namespace tensorrt_llm::executor | ||
| { | ||
| static auto constexpr kTensorRtLlmVersion = "1.1.0rc0"; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -106,4 +106,25 @@ std::string TllmException::demangle(char const* name) | |
| #endif | ||
| } | ||
|
|
||
| RequestSpecificException::RequestSpecificException( | ||
| char const* file, std::size_t line, char const* msg, uint64_t requestID, RequestErrorCode errorCode) | ||
| : std::runtime_error{fmtstr( | ||
| "%s (Request ID: %lu, Error Code: %u) (%s:%zu)", msg, requestID, static_cast<uint32_t>(errorCode), file, line)} | ||
| , mRequestID{requestID} | ||
| , mErrorCode{errorCode} | ||
| { | ||
| } | ||
|
Comment on lines
+109
to
+116
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use portable format specifiers for cross-platform compatibility. The format specifier Add the include at the top of the file: #include <cinttypes>Then update the format string: - "%s (Request ID: %lu, Error Code: %u) (%s:%zu)", msg, requestID, static_cast<uint32_t>(errorCode), file, line)}
+ "%s (Request ID: %" PRIu64 ", Error Code: %u) (%s:%zu)", msg, requestID, static_cast<uint32_t>(errorCode), file, line)}🤖 Prompt for AI Agents |
||
|
|
||
| RequestSpecificException::~RequestSpecificException() noexcept = default; | ||
|
|
||
| uint64_t RequestSpecificException::getRequestId() const noexcept | ||
| { | ||
| return mRequestID; | ||
| } | ||
|
|
||
| RequestErrorCode RequestSpecificException::getErrorCode() const noexcept | ||
| { | ||
| return mErrorCode; | ||
| } | ||
|
|
||
| } // namespace tensorrt_llm::common | ||
Uh oh!
There was an error while loading. Please reload this page.