diff --git a/docs/source/performance/perf-overview.md b/docs/source/performance/perf-overview.md
index 2cf4204d2f6..2b0df0f9066 100644
--- a/docs/source/performance/perf-overview.md
+++ b/docs/source/performance/perf-overview.md
@@ -28,101 +28,119 @@ nvidia/Llama-3.1-405B-Instruct-FP4
 ```
 
 #### Llama 3.3 70B FP4
+
 |                         | GPU     | B200      |           |           |           |
-|:-----------------------------|:---|:----------|:----------|:----------|:----------|
-|         | TP Size    | 1         | 2         | 4         | 8         |
-| ISL, OSL|    |           |           |           |           |
-|                              |    |           |           |           |           |
-| 128, 128                     |    | 11,253.28 | 17,867.66 | 24,944.50 | 27,471.49 |
-| 128, 2048                    |    | 9,925.00  | 15,459.71 | 23,608.58 | 30,742.86 |
-| 128, 4096                    |    | 6,318.92  | 8,711.88  | 17,659.74 | 24,947.05 |
-| 500, 2000                    |    | 7,559.88  | 10,602.27 | 20,910.23 | 28,182.34 |
-| 1000, 1000                   |    | 6,866.96  | 10,838.01 | 16,567.86 | 19,991.64 |
-| 1000, 2000                   |    | 6,736.88  | 9,132.08  | 15,737.02 | 20,518.04 |
-| 1024, 2048                   |    | 6,580.56  | 8,767.45  | 15,722.55 | 20,437.96 |
-| 2048, 128                    |    | 1,375.49  | 1,610.69  | 2,707.58  | 3,717.82  |
-| 2048, 2048                   |    | 4,544.73  | 6,956.14  | 12,292.23 | 15,661.22 |
-| 5000, 500                    |    | 1,488.19  | 2,379.73  | 3,588.45  | 4,810.21  |
-| 20000, 2000                  |    | 580.96    | 1,043.58  | 1,957.84  | 3,167.30  |
+|:------------------------|:--------|:----------|:----------|:----------|:----------|
+|                         | TP Size | 1         | 2         | 4         | 8         |
+| ISL, OSL                |         |           |           |           |           |
+|                         |         |           |           |           |           |
+| 128, 128                |         | 10,994.48 | 17,542.11 | 24,667.31 | 27,272.27 |
+| 128, 2048               |         | 9,580.46  | 15,432.35 | 23,568.12 | 31,174.31 |
+| 128, 4096               |         | 6,418.39  | 9,841.53  | 17,808.76 | 25,229.25 |
+| 500, 2000               |         | 7,343.32  | 11,850.57 | 20,709.67 | 28,038.78 |
+| 1000, 1000              |         | 6,752.53  | 10,815.88 | 16,413.04 | 20,060.66 |
+| 1000, 2000              |         | 6,670.07  | 9,830.73  | 15,597.49 | 20,672.37 |
+| 1024, 2048              |         | 6,636.75  | 9,807.13  | 15,519.23 | 20,617.28 |
+| 2048, 128               |         | 1,342.17  | 1,989.41  | 3,033.14  | 4,035.64  |
+| 5000, 500               |         | 1,429.67  | 2,419.67  | 3,686.84  | 5,182.96  |
+| 20000, 2000             |         | 629.77    | 1,177.01  | 2,120.66  | 3,429.03  |
 
 #### Llama 3.1 405B FP4
-|                          | GPU    | B200      |
-|:-----------------------------|:---|:----------|
-|          | TP Size   | 8         |
-| ISL, OSL|    |           |
-|                              |    |           |
-| 128, 128                     |    | 9,184.83  |
-| 128, 2048                    |    | 10,387.23 |
-| 128, 4096                    |    | 8,741.80  |
-| 500, 2000                    |    | 9,242.34  |
-| 1000, 1000                   |    | 7,565.50  |
-| 1000, 2000                   |    | 7,696.76  |
-| 1024, 2048                   |    | 7,568.93  |
-| 2048, 128                    |    | 953.57    |
-| 2048, 2048                   |    | 6,092.32  |
-| 5000, 500                    |    | 1,332.22  |
-| 20000, 2000                  |    | 961.58    |
+
+|                         | GPU     | B200     |           |
+|:------------------------|:------- |:---------|:----------|
+|                         | TP Size | 4        | 8         |
+| ISL, OSL                |         |          |           |
+|                         |         |          |           |
+| 128, 128                |         | 6,163.81 | 9,002.90  |
+| 128, 2048               |         | 7,081.21 | 10,288.28 |
+| 128, 4096               |         | 6,028.37 | 8,713.77  |
+| 500, 2000               |         | 5,858.75 | 9,125.86  |
+| 1000, 1000              |         | 4,848.00 | 7,582.97  |
+| 1000, 2000              |         | 5,375.25 | 7,626.28  |
+| 1024, 2048              |         | 5,345.70 | 7,464.03  |
+| 2048, 128               |         | 693.55   | 1,086.56  |
+| 5000, 500               |         | 947.49   | 1,532.45  |
+| 20000, 2000             |         | 641.11   | 1,097.84  |
 
 ### FP8 Models:
 ```
 nvidia/Llama-3.1-8B-Instruct-FP8
-nvidia/Llama-3.1-70B-Instruct-FP8
+nvidia/Llama-3.3-70B-Instruct-FP8
 nvidia/Llama-3.1-405B-Instruct-FP8
+nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8
 ```
 
 #### Llama 3.1 8B FP8
-|                          | GPU    | H200 141GB HBM3   | H100 80GB HBM3   |
+
+|                         | GPU     | H200 141GB HBM3   | H100 80GB HBM3   |
 |:-----------------------------|:---|:------------------|:-----------------|
-|          | TP Size   | 1                 | 1                |
+|    | TP Size   | 1              | 1             |
 | ISL, OSL |    |                   |                  |
 |                              |    |                   |                  |
-| 128, 128                     |    | 28,447.38         | 27,568.68        |
-| 128, 2048                    |    | 23,294.74         | 22,003.62        |
-| 128, 4096                    |    | 17,481.48         | 13,640.35        |
-| 500, 2000                    |    | 21,462.57         | 17,794.39        |
-| 1000, 1000                   |    | 17,590.60         | 15,270.02        |
-| 1000, 2000                   |    | 17,139.51         | 13,850.22        |
-| 1024, 2048                   |    | 16,970.63         | 13,374.15        |
-| 2048, 128                    |    | 3,531.33          | 3,495.05         |
-| 2048, 2048                   |    | 12,022.38         | 9,653.67         |
-| 5000, 500                    |    | 3,851.65          | 3,371.16         |
-| 20000, 2000                  |    | 1,706.06          | 1,340.92         |
-
-#### Llama 3.1 70B FP8
-|                          | GPU   | H200 141GB HBM3   |          |           |           | H100 80GB HBM3   |          |           |           |
+| 128, 128                     |    | 27,970.14         | 27,688.36        |
+| 128, 2048                    |    | 23,326.38         | 21,841.15        |
+| 128, 4096                    |    | 17,508.51         | 13,730.89        |
+| 500, 2000                    |    | 21,390.41         | 17,833.34        |
+| 1000, 1000                   |    | 17,366.89         | 15,270.62        |
+| 1000, 2000                   |    | 16,831.31         | 13,798.08        |
+| 1024, 2048                   |    | 16,737.03         | 13,385.50        |
+| 2048, 128                    |    | 3,488.03          | 3,414.67         |
+| 5000, 500                    |    | 3,813.69          | 3,394.54         |
+| 20000, 2000                  |    | 1,696.66          | 1,345.42         |
+
+#### Llama 3.3 70B FP8
+
+|                          | GPU    | H200 141GB HBM3   |          |           |           | H100 80GB HBM3   |          |           |           |
 |:-----------------------------|:---|:------------------|:---------|:----------|:----------|:-----------------|:---------|:----------|:----------|
-|    | TP Size   | 1                 | 2        | 4         | 8         | 1                | 2        | 4         | 8         |
-| ISL, OSL|    |                   |          |           |           |                  |          |           |           |
+|    | TP Size   | 1              | 2     | 4      | 8      | 1            | 2     | 4      | 8      |
+| ISL, OSL |    |                   |          |           |           |                  |          |           |           |
 |                              |    |                   |          |           |           |                  |          |           |           |
-| 128, 128                     |    | 3,657.58          | 6,477.50 | 10,466.04 | 15,554.57 | 3,191.27         | 6,183.41 | 10,260.68 | 14,686.01 |
-| 128, 2048                    |    | 4,351.07          | 8,450.31 | 13,438.71 | 20,750.58 | 745.19           | 5,822.02 | 11,442.01 | 17,463.99 |
-| 128, 4096                    |    | 2,696.61          | 5,598.92 | 11,524.93 | 16,634.90 |                  | 3,714.87 | 8,209.91  | 12,598.55 |
-| 500, 2000                    |    | 3,475.58          | 6,712.35 | 12,332.32 | 17,311.28 |                  | 4,704.31 | 10,278.02 | 14,630.41 |
-| 1000, 1000                   |    | 2,727.42          | 5,097.36 | 8,698.15  | 12,794.92 | 734.67           | 4,191.26 | 7,427.35  | 11,082.48 |
-| 1000, 2000                   |    | 2,913.54          | 5,841.15 | 9,016.49  | 13,174.68 | 526.31           | 3,920.44 | 7,590.35  | 11,108.11 |
-| 1024, 2048                   |    | 2,893.02          | 5,565.28 | 9,017.72  | 13,117.34 | 525.43           | 3,896.14 | 7,557.32  | 11,028.32 |
-| 2048, 128                    |    | 433.30            | 772.97   | 1,278.26  | 1,947.33  | 315.90           | 747.51   | 1,240.12  | 1,840.12  |
-| 2048, 2048                   |    | 1,990.25          | 3,822.83 | 7,068.68  | 10,529.06 | 357.98           | 2,732.86 | 5,640.31  | 8,772.88  |
-| 5000, 500                    |    | 543.88            | 1,005.81 | 1,714.77  | 2,683.22  | 203.27           | 866.77   | 1,571.92  | 2,399.78  |
-| 20000, 2000                  |    | 276.99            | 618.01   | 1,175.35  | 2,021.08  |                  | 408.43   | 910.77    | 1,568.84  |
+| 128, 128                     |    | 3,605.47          | 6,427.69 | 10,407.42 | 15,434.37 | 3,128.33         | 6,216.91 |           |           |
+| 128, 2048                    |    | 4,315.80          | 8,464.03 | 13,508.59 | 20,759.72 | 756.42           | 5,782.57 | 11,464.94 | 17,424.32 |
+| 128, 4096                    |    | 2,701.17          | 5,573.55 | 11,458.56 | 16,668.75 |                  | 3,868.37 | 8,206.39  | 12,624.61 |
+| 500, 2000                    |    | 3,478.76          | 6,740.06 | 12,200.18 |           |                  | 4,684.06 | 9,903.53  | 14,553.93 |
+| 1000, 1000                   |    | 2,744.32          | 5,119.72 | 8,685.44  | 12,744.51 | 742.14           | 4,247.19 | 7,435.65  | 11,018.81 |
+| 1000, 2000                   |    | 2,896.44          | 5,847.26 | 9,031.21  | 13,141.17 | 533.74           | 3,866.53 | 7,611.12  | 11,139.22 |
+| 1024, 2048                   |    | 2,874.18          | 5,568.61 | 8,946.71  | 13,082.62 | 530.16           | 3,796.68 | 7,575.24  | 11,004.31 |
+| 2048, 128                    |    | 435.90            | 772.67   | 1,264.76  |           |                  | 736.89   | 1,213.33  | 1,839.22  |
+| 2048, 2048                   |    |                   |          |           | 10,412.85 |                  |          |           |           |
+| 5000, 500                    |    | 545.96            | 997.15   | 1,698.22  | 2,655.28  | 204.94           | 862.91   | 1,552.68  | 2,369.84  |
+| 20000, 2000                  |    | 276.66            | 620.33   | 1,161.29  | 1,985.85  |                  | 416.13   | 903.66    | 1,554.10  |
 
 #### Llama 3.1 405B FP8
-|                          | GPU   | H200 141GB HBM3   | H100 80GB HBM3   |
+
+|                          | GPU    | H200 141GB HBM3   | H100 80GB HBM3   |
 |:-----------------------------|:---|:------------------|:-----------------|
-|          | TP Size   | 8                 | 8                |
+|   | TP Size   | 8              | 8             |
 | ISL, OSL |    |                   |                  |
 |                              |    |                   |                  |
-| 128, 128                     |    | 3,800.11          | 3,732.40         |
-| 128, 2048                    |    | 5,661.13          | 4,572.23         |
-| 128, 4096                    |    | 5,167.18          | 2,911.42         |
-| 500, 2000                    |    | 4,854.29          | 3,661.85         |
-| 1000, 1000                   |    | 3,332.15          | 2,963.36         |
-| 1000, 2000                   |    | 3,682.15          | 3,253.17         |
-| 1024, 2048                   |    | 3,685.56          | 3,089.16         |
-| 2048, 128                    |    | 453.42            | 448.89           |
-| 2048, 2048                   |    | 3,055.73          | 2,139.94         |
-| 5000, 500                    |    | 656.11            | 579.14           |
-| 20000, 2000                  |    | 514.02            | 370.26           |
+| 128, 2048                    |    | 5,567.87          |                  |
+| 128, 4096                    |    | 5,136.85          |                  |
+| 500, 2000                    |    | 4,787.61          | 3,673.91         |
+| 1000, 1000                   |    | 3,286.30          | 3,012.22         |
+| 1000, 2000                   |    | 3,636.76          | 3,262.20         |
+| 1024, 2048                   |    | 3,618.66          | 3,109.70         |
+| 2048, 128                    |    | 443.10            | 449.02           |
+| 5000, 500                    |    | 645.46            |                  |
+| 20000, 2000                  |    |                   | 372.12           |
+
+#### Llama 4 Maverick FP8
+
+|                          | GPU    | H200 141GB HBM3   | H100 80GB HBM3   |
+|:-----------------------------|:---|:------------------|:-----------------|
+|    | TP Size    | 8              | 8             |
+| ISL, OSL |    |                   |                  |
+|                              |    |                   |                  |
+| 128, 2048                    |    | 27,543.87         |                  |
+| 128, 4096                    |    | 18,541.01         | 11,163.12        |
+| 500, 2000                    |    | 21,117.34         |                  |
+| 1000, 2000                   |    |                   | 10,556.00        |
+| 1024, 2048                   |    | 16,859.45         | 11,584.33        |
+| 2048, 128                    |    | 4,364.06          | 3,832.38         |
+| 2048, 2048                   |    | 12,800.89         |                  |
+| 5000, 500                    |    | 5,128.60          |                  |
+| 20000, 2000                  |    | 1,764.27          | 1,400.79         |
 
 ## Reproducing Benchmarked Results
 
@@ -198,6 +216,8 @@ a model name (HuggingFace reference or path to a local model), a [generated data
 trtllm-bench --model $model_name throughput --dataset $dataset_file --backend pytorch --extra_llm_api_options $llm_options
 ```
 
+The data collected for the v0.20 benchmarks was run with the following file:
+
 `llm_options.yml`
 ```yaml
 use_cuda_graph: true
@@ -220,7 +240,7 @@ cuda_graph_batch_sizes:
   - 8192
 ```
 
-In majority of cases, we also use a higher KV cache percentage by setting `--kv_cache_free_gpu_mem_fraction 0.95` in the benchmark command. This allows us to obtain better performance than the default setting of `0.90`. We fall back to `0.90` if we hit an out of memory issue.
+In a majority of cases, we also use a higher KV cache percentage by setting `--kv_cache_free_gpu_mem_fraction 0.95` in the benchmark command. This allows us to obtain better performance than the default setting of `0.90`. We fall back to `0.90` if we hit an out of memory issue.
 
 The results will be printed to the terminal upon benchmark completion. For example,
 
diff --git a/docs/source/quick-start-guide.md b/docs/source/quick-start-guide.md
index d7acb4a0be6..53519e61047 100644
--- a/docs/source/quick-start-guide.md
+++ b/docs/source/quick-start-guide.md
@@ -14,6 +14,9 @@ There are multiple ways to install and run TensorRT-LLM. For most users, the opt
 
 1. [Building from source](installation/build-from-source-linux)
 
+The following examples can most easily be executed using the prebuilt [Docker release container available on NGC](https://registry.ngc.nvidia.com/orgs/nvstaging/teams/tensorrt-llm/containers/release) (see also [release.md](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docker/release.md) on GitHub). Ensure to run these commands as a user with appropriate permissions, preferably `root`, to streamline the setup process.
+
+
 ## LLM API
 
 The LLM API is a Python API designed to facilitate setup and inference with TensorRT-LLM directly within Python. It enables model optimization by simply specifying a HuggingFace repository name or a model checkpoint. The LLM API streamlines the process by managing checkpoint conversion, engine building, engine loading, and model inference, all through a single Python object.
@@ -89,7 +92,7 @@ For detailed examples and command syntax, refer to the [trtllm-serve](commands/t
 
 2. Open a new terminal and use the following command to directly attach to the running container:
 
-```bash
+```bash:docs/source/quick-start-guide.md
 docker exec -it <container_id> bash
 ```
 
diff --git a/docs/source/release-notes.md b/docs/source/release-notes.md
index bb663aba7d2..d5c239b82e4 100644
--- a/docs/source/release-notes.md
+++ b/docs/source/release-notes.md
@@ -4,6 +4,82 @@
 
 All published functionality in the Release Notes has been fully tested and verified with known limitations documented. To share feedback about this release, access our [NVIDIA Developer Forum](https://forums.developer.nvidia.com/).
 
+## TensorRT-LLM Release 0.20.0
+
+### Key Features and Enhancements
+- **Model Support**
+  - Added Qwen3 support.Refer to “Qwen3” section in `examples/models/core/qwen/README.md`.
+  - Added HyperCLOVAX-SEED-Vision support in PyTorch flow. Refer to `examples/models/contrib/hyperclovax/README.md`
+  - Added Dynasor-CoT in scaffolding examples. Refer to `examples/scaffolding/contrib/Dynasor/README.md`
+  - Added Mistral Small 3.1 24B VLM support in TRT workflow
+  - Added Gemma3-1b-it support in PyTorch workflow
+  - Added Nemotron-H model support
+  - Added Eagle-3 support for LLAMA4
+- **PyTorch workflow**
+  - Added lora support
+  - Added return logits support
+  - Adopt new logprob definition in PyTorch flow
+  - Enabled per-request stats with PyTorch backend
+  - Enabled LogitsProcessor in PyTorch backend
+- Benchmark:
+  - Add beam width to low latency.
+  - Fix trtllm-bench iter_stats and cuda_graph_batch_sizes errors.
+  - Remove deprecated Python runtime benchmark
+  - Add benchmark support for scaffolding
+- Multimodal models
+  - Added support in trtllm-serve
+  - Added support in trtllm-bench, the support is limited to image only for now
+- Supported DeepSeek-R1 W4A8 on Hopper
+- Add the RTX Pro 6000 support on single GPU
+- Integrated Llama4 input processor
+- Added CGA reduction FHMA kernels on Blackwell
+- Enabled chunked context for FlashInfer
+- Supported KV cache reuse for MLA
+- Added Piecewise CUDA Graph support
+- Supported multiple LoRA adapters and TP
+- Added KV cache-aware router for disaggregated serving
+- Unfused attention for native support
+- Added group_rms_norm kernel to normalize multiple inputs in a single operator
+- Added smart router for the MoE module
+- Added head size 72 support for QKV preprocessing kernel
+- Added MNNVL MoE A2A support
+- Optimized Large Embedding Tables in Multimodal Models
+- Supported Top-K logprobs and prompt_logprobs in LLMAPI
+- Enabled overlap scheduler in TRT workflow via executor API
+
+### Infrastructure Changes
+- **TRT-LLM team formally releases docker image on [NGC](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags)**.
+- The pre-built TensorRT-LLM wheel on PyPI is linked against PyTorch 2.7.0 now, which uses the CXX11 ABI
+- The dependent TensorRT version is updated to 10.10.0
+- The dependent CUDA version is updated to 12.9.0
+- The dependent public PyTorch version is updated to 2.7.0
+- The dependent NVIDIA ModelOpt version is updated to 0.29.0
+- The dependent NCCL version is maintained at 2.25.1
+- Open-sourced XQA kernels
+- Dependent datasets version was upgraded to 3.1.0
+- Migrate Triton Backend to TensorRT LLM repo to TensorRT LLM submodule
+- Downgrade gcc toolset version from 13 to 11
+
+### API Changes
+- [Breaking Change]:Enable scheduling overlap by default
+- Remove deprecated GptSession/V1 from TRT workflow
+- Set _AutoDeployLlmArgs as primary config object
+- Allow overriding CLI arguments with YAML file in trtllm-serve
+- Introduced multimodal embedding field in LlmRequest
+
+
+### Fixed Issues
+- Fix hang bug when context server doesn't have enough capacity for KV Cache (#3095)
+- Fix C++ decoder synchronization in PyTorch (#3106)
+- Fix bug of create cuda stream as default parameter which will be initialized during importing (#3764)
+- Fix bug related to creating CUDA stream as default parameter, which will be initialized during importing (#3764)
+- Fix attention DP bug on Qwen3 MoE model (#4141)
+- Fix illegal memory access when running LLaMA 4 with CUDA Graph enabled (#4101)
+- Reset planned states to avoid memory leak in TrtllmAttentionWrapper (#4227)
+
+### Known Issues
+- multi-GPU model support on RTX Pro 6000
+
 
 ## TensorRT-LLM Release 0.19.0
 
diff --git a/triton_backend/ci/L0_backend_trtllm/custom_metrics_verification_tests.py b/triton_backend/ci/L0_backend_trtllm/custom_metrics_verification_tests.py
index db3093a5b47..3523dff6819 100644
--- a/triton_backend/ci/L0_backend_trtllm/custom_metrics_verification_tests.py
+++ b/triton_backend/ci/L0_backend_trtllm/custom_metrics_verification_tests.py
@@ -82,7 +82,7 @@ def _parse_log_file(self, filename):
 
                     return json.loads(json_string)
 
-    def _parse_triton_metrics(self, filename, is_v1):
+    def _parse_triton_metrics(self, filename):
         curl_counts = {}
         with open(filename) as metrics_file:
             for line in metrics_file:
@@ -91,12 +91,11 @@ def _parse_triton_metrics(self, filename, is_v1):
                     metric_output = re.sub(r"^.*?{", "{", line).split()
                     metric_key = metric_output[0]
                     metric_value = metric_output[1]
-                    key = self._convert_metric_key_to_stats_key(
-                        metric_key, is_v1)
+                    key = self._convert_metric_key_to_stats_key(metric_key)
                     curl_counts[key] = metric_value
         return curl_counts
 
-    def _convert_metric_key_to_stats_key(self, metric_output, is_v1):
+    def _convert_metric_key_to_stats_key(self, metric_output):
         # Converts:
         # '{model="tensorrt_llm",request_type="context",version="1"}'
         # to:
@@ -107,15 +106,12 @@ def _convert_metric_key_to_stats_key(self, metric_output, is_v1):
             if not i.startswith('model') and not i.startswith('version')
         ][0]
         self.assertIn(key, metric_to_stat_dict)
-        if (is_v1):
-            self.assertNotIn("inflight_batcher_specific_metric", key)
-        else:
-            self.assertNotIn("v1_specific_metric", key)
+        self.assertNotIn("v1_specific_metric", key)
         return metric_to_stat_dict[key]
 
-    def _base_test(self, stats_file, metrics_file, is_v1):
+    def _base_test(self, stats_file, metrics_file):
         stats = self._parse_log_file(stats_file)
-        metrics = self._parse_triton_metrics(metrics_file, is_v1)
+        metrics = self._parse_triton_metrics(metrics_file)
         self.assertEqual(len(stats.keys()), len(metrics.keys()))
         self.assertEqual(list(stats.keys()).sort(), list(metrics.keys()).sort())
         for metric_key in stats.keys():
@@ -140,45 +136,33 @@ def _base_test(self, stats_file, metrics_file, is_v1):
                     timedelta(seconds=-1) <= difference, difference
                     <= timedelta(seconds=1))
 
-    def test_1_gpu_v1(self):
-        self._base_test("1gpu_v1_no_streaming_server.log",
-                        "1gpu_v1_no_stream_metrics.out", True)
-
     def test_1_gpu_IFB_no_stream(self):
         self._base_test("1gpu_IFB_no_streaming_server.log",
-                        "1gpu_IFB_no_stream_metrics.out", False)
+                        "1gpu_IFB_no_stream_metrics.out")
 
     def test_1_gpu_IFB_stream(self):
         self._base_test("1gpu_IFB_streaming_server.log",
-                        "1gpu_IFB_stream_metrics.out", False)
+                        "1gpu_IFB_stream_metrics.out")
 
     if AVAILABLE_GPUS >= 2:
 
-        def test_2_gpu_v1(self):
-            self._base_test("2gpu_v1_no_streaming_server.log",
-                            "2gpu_v1_no_stream_metrics.out", True)
-
         def test_2_gpu_IFB_no_stream(self):
             self._base_test("2gpu_IFB_no_streaming_server.log",
-                            "2gpu_IFB_no_stream_metrics.out", False)
+                            "2gpu_IFB_no_stream_metrics.out")
 
         def test_2_gpu_IFB_stream(self):
             self._base_test("2gpu_IFB_streaming_server.log",
-                            "2gpu_IFB_stream_metrics.out", False)
+                            "2gpu_IFB_stream_metrics.out")
 
     if AVAILABLE_GPUS >= 4:
 
-        def test_4_gpu_v1(self):
-            self._base_test("4gpu_v1_no_streaming_server.log",
-                            "4gpu_v1_no_stream_metrics.out", True)
-
         def test_4_gpu_IFB_no_stream(self):
             self._base_test("4gpu_IFB_no_streaming_server.log",
-                            "4gpu_IFB_no_stream_metrics.out", False)
+                            "4gpu_IFB_no_stream_metrics.out")
 
         def test_4_gpu_IFB_stream(self):
             self._base_test("4gpu_IFB_streaming_server.log",
-                            "4gpu_IFB_stream_metrics.out", False)
+                            "4gpu_IFB_stream_metrics.out")
 
 
 if __name__ == "__main__":
diff --git a/triton_backend/ci/L0_backend_trtllm/test.sh b/triton_backend/ci/L0_backend_trtllm/test.sh
index c09e985a266..83967d1c58c 100644
--- a/triton_backend/ci/L0_backend_trtllm/test.sh
+++ b/triton_backend/ci/L0_backend_trtllm/test.sh
@@ -228,49 +228,13 @@ for NUM_GPU in "${NUM_GPUS_TO_TEST[@]}"; do
 
     run_server "${SERVER_ARGS}"
     wait_for_server_ready ${SERVER_TIMEOUT} ${SERVER_PID[@]}
-    if [ "$WAIT_RET" != "0" ]; then
-        # Cleanup
-        kill $SERVER_PID > /dev/null 2>&1 || true
-        echo -e "\n***\n*** Failed to start $SERVER\n***"
-        cat $SERVER_LOG
-        exit 1
-    fi
-
-    set -e
-    python3 ${TOOLS_DIR}/inflight_batcher_llm/benchmark_core_model.py \
-        --max-input-len=500 \
-        dataset --dataset=${DATASET} \
-        --tokenizer-dir=${TOKENIZER_DIR}
-
-    if [ $? -ne 0 ]; then
-        cat $SERVER_LOG
-        echo -e "\n***\n*** Error executing v1 benchmark_core_model test with ${NUM_GPU}GPU(s): line ${LINENO}\n***"
-        kill_server
-        wait_for_server_terminated ${SERVER_TIMEOUT} ${SERVER_PID[@]}
-        RET=1
-    fi
-    set +e
-
-    set -e
-    python3 ${TOOLS_DIR}/inflight_batcher_llm/end_to_end_test.py \
-        --max-input-len=500 \
-        --dataset=${DATASET}
 
-    if [ $? -ne 0 ]; then
+    # Expect invalid GPT model type error to be gracefully handled
+    if [ `grep -c "Static batching type is deprecated" $SERVER_LOG` == "0" ]; then
+        echo -e "\n***\n*** GPT model type error not handled gracefully: line ${LINENO}\n***"
         cat $SERVER_LOG
-        echo -e "\n***\n*** Error executing v1 end-to-end test with ${NUM_GPU}GPU(s): line ${LINENO}\n***"
-        kill_server
-        wait_for_server_terminated ${SERVER_TIMEOUT} ${SERVER_PID[@]}
-        RET=1
+        exit 1
     fi
-    set +e
-
-    # Make sure the metrics is retrieved after the server has updated the metrics internally
-    sleep ${SLEEP_DURATION}
-    curl localhost:8002/metrics -o ${NUM_GPU}gpu_v1_no_stream_metrics.out
-
-    kill_server
-    wait_for_server_terminated ${SERVER_TIMEOUT} ${SERVER_PID[@]}
 
     # inflight batching ON
     # streaming OFF