Skip to content

Commit f80395e

Browse files
authored
TensorRT-LLM backend v0.13 Update (triton-inference-server#607)
1 parent 9a78477 commit f80395e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+2894
-1184
lines changed

README.md

Lines changed: 510 additions & 318 deletions
Large diffs are not rendered by default.

all_models/gpt/postprocessing/config.pbtxt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
name: "postprocessing"
22
backend: "python"
33
max_batch_size: 1024
4+
dynamic_batching {}
45
input [
56
{
67
name: "TOKENS_BATCH"

all_models/gpt/tensorrt_llm/1/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def execute(self, requests):
160160
sampling_config.output_log_probs = inputs['output_log_probs']
161161
sampling_config.return_dict = True
162162

163-
outputs = self.runner.generate(input_ids, sampling_config)
163+
outputs = self.runner.generate(input_ids, None, sampling_config)
164164
output_ids = outputs["output_ids"]
165165

166166
if self.rank == 0:

all_models/inflight_batcher_llm/ensemble/config.pbtxt

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,6 @@ input [
3939
dims: [ 1 ]
4040
optional: true
4141
},
42-
{
43-
name: "image_input"
44-
data_type: TYPE_FP16
45-
dims: [ 3, 224, 224 ]
46-
optional: true
47-
},
4842
{
4943
name: "max_tokens"
5044
data_type: TYPE_INT32
@@ -164,6 +158,12 @@ input [
164158
dims: [ -1, -1 ]
165159
optional: true
166160
},
161+
{
162+
name: "prompt_table_extra_id"
163+
data_type: TYPE_UINT64
164+
dims: [ 1 ]
165+
optional: true
166+
},
167167
{
168168
name: "prompt_vocab_size"
169169
data_type: TYPE_INT32
@@ -228,10 +228,6 @@ ensemble_scheduling {
228228
key: "DECODER_QUERY"
229229
value: "decoder_text_input"
230230
}
231-
input_map {
232-
key: "IMAGE"
233-
value: "image_input"
234-
}
235231
input_map {
236232
key: "REQUEST_OUTPUT_LEN"
237233
value: "max_tokens"
@@ -261,8 +257,8 @@ ensemble_scheduling {
261257
value: "pad_id"
262258
}
263259
input_map {
264-
key: "PROMPT_EMBEDDING_TABLE"
265-
value: "prompt_embedding_table"
260+
key: "PROMPT_TABLE_EXTRA_ID"
261+
value: "prompt_table_extra_id"
266262
}
267263
output_map {
268264
key: "REQUEST_INPUT_LEN"
@@ -305,8 +301,8 @@ ensemble_scheduling {
305301
value: "_PREPROCESSOR_PAD_ID"
306302
}
307303
output_map {
308-
key: "OUT_PROMPT_EMBEDDING_TABLE"
309-
value: "out_prompt_embedding_table"
304+
key: "OUT_PROMPT_TABLE_EXTRA_IDS"
305+
value: "_OUT_PROMPT_TABLE_EXTRA_IDS"
310306
}
311307
},
312308
{
@@ -402,7 +398,7 @@ ensemble_scheduling {
402398
}
403399
input_map {
404400
key: "prompt_embedding_table"
405-
value: "out_prompt_embedding_table"
401+
value: "prompt_embedding_table"
406402
}
407403
input_map {
408404
key: "prompt_vocab_size"
@@ -416,6 +412,10 @@ ensemble_scheduling {
416412
key: "bad_words_list"
417413
value: "_BAD_WORDS_IDS"
418414
}
415+
input_map {
416+
key: "prompt_table_extra_ids"
417+
value: "_OUT_PROMPT_TABLE_EXTRA_IDS"
418+
}
419419
output_map {
420420
key: "output_ids"
421421
value: "_TOKENS_BATCH"

all_models/inflight_batcher_llm/postprocessing/1/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ def _postprocessing(self, tokens_batch, sequence_lengths):
240240
# Exclude fake ids in multimodal models
241241
fake_id_len = 0
242242
for i in range(seq_len):
243-
if tokens[i] < self.tokenizer.vocab_size:
243+
if tokens[i] < len(self.tokenizer.vocab):
244244
fake_id_len = i
245245
break
246246
output = self.tokenizer.decode(

all_models/inflight_batcher_llm/postprocessing/config.pbtxt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
name: "postprocessing"
2828
backend: "python"
2929
max_batch_size: ${triton_max_batch_size}
30+
dynamic_batching {}
3031
input [
3132
{
3233
name: "TOKENS_BATCH"

0 commit comments

Comments
 (0)