Skip to content

Commit de61541

Browse files
ixlmarfaradawn
authored andcommitted
[TRTLLM-8269][test] do not explicitly pass temperature=0 to select greedy sampling (NVIDIA#8110)
Signed-off-by: ixlmar <206748156+ixlmar@users.noreply.github.com> Signed-off-by: Faradawn Yang <faradawny@gmail.com>
1 parent ed85512 commit de61541

File tree

5 files changed

+15
-11
lines changed

5 files changed

+15
-11
lines changed

jenkins/L0_MergeRequest.groovy

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,8 @@ def getMultiGpuFileChanged(pipeline, testFilter, globalVars)
705705
"tensorrt_llm/_torch/pyexecutor/_util.py",
706706
"tensorrt_llm/_torch/pyexecutor/model_engine.py",
707707
"tensorrt_llm/_torch/pyexecutor/py_executor.py",
708+
"tensorrt_llm/evaluate/json_mode_eval.py",
709+
"tensorrt_llm/evaluate/mmlu.py",
708710
"tensorrt_llm/executor/",
709711
"tensorrt_llm/functional.py",
710712
"tensorrt_llm/llmapi/",

tensorrt_llm/evaluate/json_mode_eval.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,7 @@ def generate_samples(self) -> Iterable[tuple]:
6464
schema["x-guidance"] = {"lenient": True}
6565
schema = json.dumps(schema)
6666
sampling_args = {
67-
"guided_decoding": GuidedDecodingParams(json=schema),
68-
"temperature": 0,
67+
"guided_decoding": GuidedDecodingParams(json=schema)
6968
}
7069
yield sample["prompt"], sampling_args, sample["completion"], sample[
7170
"schema"]

tensorrt_llm/evaluate/mmlu.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ def generate_samples(self) -> Iterable[tuple]:
219219
include_answer=False)
220220
prompt = train_prompt + prompt_end
221221
label = test_df.iloc[i, test_df.shape[1] - 1]
222-
yield prompt, {"temperature": 0}, label, subject
222+
yield prompt, None, label, subject
223223

224224
def compute_score(self, outputs: List[RequestOutput], references: List[str],
225225
subjects: List[str]) -> float:

tests/integration/defs/accuracy/test_disaggregated_serving.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -230,11 +230,17 @@ def send_request(prompt: str, sampling_params: SamplingParams,
230230
streaming: bool):
231231
kwargs = {}
232232
if sampling_params is not None:
233-
kwargs.update(max_tokens=sampling_params.max_tokens,
234-
temperature=sampling_params.temperature,
235-
top_p=sampling_params.top_p,
236-
stop=sampling_params.stop,
237-
seed=sampling_params.seed)
233+
kwargs.update(
234+
max_tokens=sampling_params.max_tokens,
235+
# NB: 'LLM' (cf. SamplingParams) and OpenAI API
236+
# defaults differ (top_p=0 vs. top_p=1).
237+
# FIXME: Because 'LLM' does not permit expressly setting
238+
# top_p=0, diverting to temperature=0.
239+
temperature=(sampling_params.temperature
240+
if sampling_params.top_p is not None else 0),
241+
top_p=sampling_params.top_p,
242+
stop=sampling_params.stop,
243+
seed=sampling_params.seed)
238244
if (guided_decoding_params :=
239245
sampling_params.guided_decoding) is not None:
240246
extra_body = {}

tests/unittest/llmapi/apps/_test_openai_misc.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,12 +94,9 @@ async def test_request_cancellation(server: RemoteOpenAIServer,
9494
# Request about 2 million tokens
9595
for _ in range(200):
9696
task = asyncio.create_task(
97-
# FIXME: Some requests complete quickly without temperature=0,
98-
# despite min_tokens being specified, cf. https://nvbugs/5513423
9997
client.chat.completions.create(messages=chat_input,
10098
model=model_name,
10199
max_tokens=10000,
102-
temperature=0,
103100
extra_body={"min_tokens": 10000}))
104101
tasks.append(task)
105102

0 commit comments

Comments
 (0)