Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
828 changes: 510 additions & 318 deletions README.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions all_models/gpt/postprocessing/config.pbtxt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
name: "postprocessing"
backend: "python"
max_batch_size: 1024
dynamic_batching {}
input [
{
name: "TOKENS_BATCH"
Expand Down
2 changes: 1 addition & 1 deletion all_models/gpt/tensorrt_llm/1/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def execute(self, requests):
sampling_config.output_log_probs = inputs['output_log_probs']
sampling_config.return_dict = True

outputs = self.runner.generate(input_ids, sampling_config)
outputs = self.runner.generate(input_ids, None, sampling_config)
output_ids = outputs["output_ids"]

if self.rank == 0:
Expand Down
30 changes: 15 additions & 15 deletions all_models/inflight_batcher_llm/ensemble/config.pbtxt
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,6 @@ input [
dims: [ 1 ]
optional: true
},
{
name: "image_input"
data_type: TYPE_FP16
dims: [ 3, 224, 224 ]
optional: true
},
{
name: "max_tokens"
data_type: TYPE_INT32
Expand Down Expand Up @@ -164,6 +158,12 @@ input [
dims: [ -1, -1 ]
optional: true
},
{
name: "prompt_table_extra_id"
data_type: TYPE_UINT64
dims: [ 1 ]
optional: true
},
{
name: "prompt_vocab_size"
data_type: TYPE_INT32
Expand Down Expand Up @@ -228,10 +228,6 @@ ensemble_scheduling {
key: "DECODER_QUERY"
value: "decoder_text_input"
}
input_map {
key: "IMAGE"
value: "image_input"
}
input_map {
key: "REQUEST_OUTPUT_LEN"
value: "max_tokens"
Expand Down Expand Up @@ -261,8 +257,8 @@ ensemble_scheduling {
value: "pad_id"
}
input_map {
key: "PROMPT_EMBEDDING_TABLE"
value: "prompt_embedding_table"
key: "PROMPT_TABLE_EXTRA_ID"
value: "prompt_table_extra_id"
}
output_map {
key: "REQUEST_INPUT_LEN"
Expand Down Expand Up @@ -305,8 +301,8 @@ ensemble_scheduling {
value: "_PREPROCESSOR_PAD_ID"
}
output_map {
key: "OUT_PROMPT_EMBEDDING_TABLE"
value: "out_prompt_embedding_table"
key: "OUT_PROMPT_TABLE_EXTRA_IDS"
value: "_OUT_PROMPT_TABLE_EXTRA_IDS"
}
},
{
Expand Down Expand Up @@ -402,7 +398,7 @@ ensemble_scheduling {
}
input_map {
key: "prompt_embedding_table"
value: "out_prompt_embedding_table"
value: "prompt_embedding_table"
}
input_map {
key: "prompt_vocab_size"
Expand All @@ -416,6 +412,10 @@ ensemble_scheduling {
key: "bad_words_list"
value: "_BAD_WORDS_IDS"
}
input_map {
key: "prompt_table_extra_ids"
value: "_OUT_PROMPT_TABLE_EXTRA_IDS"
}
output_map {
key: "output_ids"
value: "_TOKENS_BATCH"
Expand Down
2 changes: 1 addition & 1 deletion all_models/inflight_batcher_llm/postprocessing/1/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def _postprocessing(self, tokens_batch, sequence_lengths):
# Exclude fake ids in multimodal models
fake_id_len = 0
for i in range(seq_len):
if tokens[i] < self.tokenizer.vocab_size:
if tokens[i] < len(self.tokenizer.vocab):
fake_id_len = i
break
output = self.tokenizer.decode(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
name: "postprocessing"
backend: "python"
max_batch_size: ${triton_max_batch_size}
dynamic_batching {}
input [
{
name: "TOKENS_BATCH"
Expand Down
Loading