Skip to content

Commit a3ae3ce

Browse files
author
binghanc (generated by with_the_same_user script)
committed
code formatting
Signed-off-by: binghanc <176802681+binghanc@users.noreply.github.com>
1 parent abfdfbf commit a3ae3ce

File tree

1 file changed

+32
-19
lines changed

1 file changed

+32
-19
lines changed

tensorrt_llm/serve/scripts/benchmark_serving.py

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -253,13 +253,20 @@ def calculate_metrics(
253253
percentiles_e2el_ms=[(p, np.percentile(e2els or 0, p) * 1000)
254254
for p in selected_percentiles],
255255
tput_user=np.mean(tput_user or 0),
256-
mean_avg_decoded_tokens_per_iter=np.mean(avg_decoded_tokens_per_iter_list or 0),
257-
min_avg_decoded_tokens_per_iter=np.min(avg_decoded_tokens_per_iter_list) if avg_decoded_tokens_per_iter_list else 0.0,
258-
max_avg_decoded_tokens_per_iter=np.max(avg_decoded_tokens_per_iter_list) if avg_decoded_tokens_per_iter_list else 0.0,
259-
median_avg_decoded_tokens_per_iter=np.median(avg_decoded_tokens_per_iter_list or 0),
260-
std_avg_decoded_tokens_per_iter=np.std(avg_decoded_tokens_per_iter_list or 0),
261-
percentiles_avg_decoded_tokens_per_iter=[(p, np.percentile(avg_decoded_tokens_per_iter_list or 0, p))
262-
for p in selected_percentiles],
256+
mean_avg_decoded_tokens_per_iter=np.mean(
257+
avg_decoded_tokens_per_iter_list or 0),
258+
min_avg_decoded_tokens_per_iter=np.min(avg_decoded_tokens_per_iter_list)
259+
if avg_decoded_tokens_per_iter_list else 0.0,
260+
max_avg_decoded_tokens_per_iter=np.max(avg_decoded_tokens_per_iter_list)
261+
if avg_decoded_tokens_per_iter_list else 0.0,
262+
median_avg_decoded_tokens_per_iter=np.median(
263+
avg_decoded_tokens_per_iter_list or 0),
264+
std_avg_decoded_tokens_per_iter=np.std(avg_decoded_tokens_per_iter_list
265+
or 0),
266+
percentiles_avg_decoded_tokens_per_iter=[
267+
(p, np.percentile(avg_decoded_tokens_per_iter_list or 0, p))
268+
for p in selected_percentiles
269+
],
263270
)
264271
return metrics, actual_output_lens
265272

@@ -502,7 +509,10 @@ async def limited_request_func(request_func_input, streaming, pbar,
502509
"max": metrics.max_avg_decoded_tokens_per_iter,
503510
"median": metrics.median_avg_decoded_tokens_per_iter,
504511
"std": metrics.std_avg_decoded_tokens_per_iter,
505-
"percentiles": {f"p{p}": v for p, v in metrics.percentiles_avg_decoded_tokens_per_iter}
512+
"percentiles": {
513+
f"p{p}": v
514+
for p, v in metrics.percentiles_avg_decoded_tokens_per_iter
515+
}
506516
},
507517
"input_lens": [output.prompt_len for output in outputs],
508518
"output_lens": actual_output_lens,
@@ -524,14 +534,15 @@ def process_one_metric(
524534
):
525535
# This function prints and adds statistics of the specified metric.
526536
# Skip if not in selected metrics (except avg_decoded_tokens_per_iter which has its own condition)
527-
if (metric_attribute_name not in selected_percentile_metrics and metric_attribute_name != "avg_decoded_tokens_per_iter"):
537+
if (metric_attribute_name not in selected_percentile_metrics
538+
and metric_attribute_name != "avg_decoded_tokens_per_iter"):
528539
return
529-
540+
530541
# Build attribute suffix (e.g., "_ms" or "")
531542
attr_suffix = f"_{unit_suffix}" if unit_suffix else ""
532543
# Build display unit (e.g., " (ms)" or "")
533544
display_unit = f" ({unit_suffix})" if unit_suffix else ""
534-
545+
535546
print("{s:{c}^{n}}".format(s=metric_header, n=50, c='-'))
536547
print("{:<40} {:<10.2f}".format(
537548
f"Mean {metric_name}{display_unit}:",
@@ -557,23 +568,25 @@ def process_one_metric(
557568
getattr(metrics, f"max_{metric_attribute_name}{attr_suffix}")))
558569
result[f"max_{metric_attribute_name}{attr_suffix}"] = getattr(
559570
metrics, f"max_{metric_attribute_name}{attr_suffix}")
560-
571+
561572
result[f"mean_{metric_attribute_name}{attr_suffix}"] = getattr(
562573
metrics, f"mean_{metric_attribute_name}{attr_suffix}")
563574
result[f"median_{metric_attribute_name}{attr_suffix}"] = getattr(
564575
metrics, f"median_{metric_attribute_name}{attr_suffix}")
565-
566-
for p, value in getattr(metrics,
567-
f"percentiles_{metric_attribute_name}{attr_suffix}"):
576+
577+
for p, value in getattr(
578+
metrics, f"percentiles_{metric_attribute_name}{attr_suffix}"):
568579
p_word = str(int(p)) if int(p) == p else str(p)
569-
print("{:<40} {:<10.2f}".format(f"P{p_word} {metric_name}{display_unit}:",
570-
value))
580+
print("{:<40} {:<10.2f}".format(
581+
f"P{p_word} {metric_name}{display_unit}:", value))
571582
result[f"p{p_word}_{metric_attribute_name}{attr_suffix}"] = value
572583

573584
# Print avg_decoded_tokens_per_iter statistics if available
574585
if metrics.mean_avg_decoded_tokens_per_iter > 0.0:
575-
process_one_metric("avg_decoded_tokens_per_iter", "Avg Decoded Tokens per Iter",
576-
"Avg Decoded Tokens per Iter", unit_suffix="")
586+
process_one_metric("avg_decoded_tokens_per_iter",
587+
"Avg Decoded Tokens per Iter",
588+
"Avg Decoded Tokens per Iter",
589+
unit_suffix="")
577590

578591
process_one_metric("ttft", "TTFT", "Time to First Token")
579592
process_one_metric("tpot", "TPOT",

0 commit comments

Comments
 (0)