@@ -80,7 +80,13 @@ class BenchmarkMetrics:
8080 std_e2el_ms : float
8181 percentiles_e2el_ms : list [tuple [float , float ]]
8282 tput_user : list [float ]
83- avg_decoded_tokens_per_iter : float
83+ # Statistics for avg_decoded_tokens_per_iter across all requests
84+ mean_avg_decoded_tokens_per_iter : float
85+ min_avg_decoded_tokens_per_iter : float
86+ max_avg_decoded_tokens_per_iter : float
87+ median_avg_decoded_tokens_per_iter : float
88+ std_avg_decoded_tokens_per_iter : float
89+ percentiles_avg_decoded_tokens_per_iter : list [tuple [float , float ]]
8490
8591
8692async def get_request (
@@ -144,7 +150,7 @@ def calculate_metrics(
144150 ttfts : list [float ] = []
145151 e2els : list [float ] = []
146152 tput_user : list [float ] = []
147- latest_avg_decoded_tokens_per_iter : float = 0.0
153+ avg_decoded_tokens_per_iter_list : list [ float ] = []
148154 error_counts : dict [str , int ] = {}
149155 for i in range (len (outputs )):
150156 if outputs [i ].exception_type :
@@ -177,11 +183,11 @@ def calculate_metrics(
177183 tput_user .append (output_len / (outputs [i ].latency ))
178184 completed += 1
179185
180- # Track the latest avg_decoded_tokens_per_iter if available
186+ # Collect avg_decoded_tokens_per_iter for all requests
181187 if hasattr (outputs [i ], 'avg_decoded_tokens_per_iter'
182188 ) and outputs [i ].avg_decoded_tokens_per_iter is not None :
183- latest_avg_decoded_tokens_per_iter = outputs [
184- i ].avg_decoded_tokens_per_iter
189+ avg_decoded_tokens_per_iter_list . append (
190+ outputs [ i ].avg_decoded_tokens_per_iter )
185191 else :
186192 actual_output_lens .append (0 )
187193
@@ -247,7 +253,13 @@ def calculate_metrics(
247253 percentiles_e2el_ms = [(p , np .percentile (e2els or 0 , p ) * 1000 )
248254 for p in selected_percentiles ],
249255 tput_user = np .mean (tput_user or 0 ),
250- avg_decoded_tokens_per_iter = latest_avg_decoded_tokens_per_iter ,
256+ mean_avg_decoded_tokens_per_iter = np .mean (avg_decoded_tokens_per_iter_list or 0 ),
257+ min_avg_decoded_tokens_per_iter = np .min (avg_decoded_tokens_per_iter_list ) if avg_decoded_tokens_per_iter_list else 0.0 ,
258+ max_avg_decoded_tokens_per_iter = np .max (avg_decoded_tokens_per_iter_list ) if avg_decoded_tokens_per_iter_list else 0.0 ,
259+ median_avg_decoded_tokens_per_iter = np .median (avg_decoded_tokens_per_iter_list or 0 ),
260+ std_avg_decoded_tokens_per_iter = np .std (avg_decoded_tokens_per_iter_list or 0 ),
261+ percentiles_avg_decoded_tokens_per_iter = [(p , np .percentile (avg_decoded_tokens_per_iter_list or 0 , p ))
262+ for p in selected_percentiles ],
251263 )
252264 return metrics , actual_output_lens
253265
@@ -466,10 +478,6 @@ async def limited_request_func(request_func_input, streaming, pbar,
466478 print ("{:<40} {:<10.2f}" .format ("User throughput (tok/s):" ,
467479 metrics .tput_user ))
468480
469- # Print last avg_decoded_tokens_per_iter value if available
470- if metrics .avg_decoded_tokens_per_iter > 0.0 :
471- print ("{:<40} {:<10.2f}" .format ("Avg Decoded Tokens per Iter:" ,
472- metrics .avg_decoded_tokens_per_iter ))
473481 if len (outputs ) - metrics .completed > 0 :
474482 print (
475483 f"=======================!FAILED REQUESTS!=======================" )
@@ -488,7 +496,14 @@ async def limited_request_func(request_func_input, streaming, pbar,
488496 "output_throughput" : metrics .output_throughput ,
489497 "total_token_throughput" : metrics .total_token_throughput ,
490498 "user_throughput" : metrics .tput_user ,
491- "avg_decoded_tokens_per_iter" : metrics .avg_decoded_tokens_per_iter ,
499+ "avg_decoded_tokens_per_iter" : {
500+ "mean" : metrics .mean_avg_decoded_tokens_per_iter ,
501+ "min" : metrics .min_avg_decoded_tokens_per_iter ,
502+ "max" : metrics .max_avg_decoded_tokens_per_iter ,
503+ "median" : metrics .median_avg_decoded_tokens_per_iter ,
504+ "std" : metrics .std_avg_decoded_tokens_per_iter ,
505+ "percentiles" : {f"p{ p } " : v for p , v in metrics .percentiles_avg_decoded_tokens_per_iter }
506+ },
492507 "input_lens" : [output .prompt_len for output in outputs ],
493508 "output_lens" : actual_output_lens ,
494509 "ttfts" : [output .ttft for output in outputs ],
@@ -504,30 +519,61 @@ def process_one_metric(
504519 metric_name : str ,
505520 # E.g., "Time to First Token"
506521 metric_header : str ,
522+ # E.g., "ms" or "" for no unit
523+ unit_suffix : str = "ms" ,
507524 ):
508- # This function prints and adds statistics of the specified
509- # metric.
510- if metric_attribute_name not in selected_percentile_metrics :
525+ # This function prints and adds statistics of the specified metric.
526+ # Skip if not in selected metrics (except avg_decoded_tokens_per_iter which has its own condition)
527+ if ( metric_attribute_name not in selected_percentile_metrics and metric_attribute_name != "avg_decoded_tokens_per_iter" ) :
511528 return
529+
530+ # Build attribute suffix (e.g., "_ms" or "")
531+ attr_suffix = f"_{ unit_suffix } " if unit_suffix else ""
532+ # Build display unit (e.g., " (ms)" or "")
533+ display_unit = f" ({ unit_suffix } )" if unit_suffix else ""
534+
512535 print ("{s:{c}^{n}}" .format (s = metric_header , n = 50 , c = '-' ))
513536 print ("{:<40} {:<10.2f}" .format (
514- f"Mean { metric_name } (ms) :" ,
515- getattr (metrics , f"mean_{ metric_attribute_name } _ms " )))
537+ f"Mean { metric_name } { display_unit } :" ,
538+ getattr (metrics , f"mean_{ metric_attribute_name } { attr_suffix } " )))
516539 print ("{:<40} {:<10.2f}" .format (
517- f"Median { metric_name } (ms):" ,
518- getattr (metrics , f"median_{ metric_attribute_name } _ms" )))
519- result [f"mean_{ metric_attribute_name } _ms" ] = getattr (
520- metrics , f"mean_{ metric_attribute_name } _ms" )
521- result [f"median_{ metric_attribute_name } _ms" ] = getattr (
522- metrics , f"median_{ metric_attribute_name } _ms" )
523- result [f"std_{ metric_attribute_name } _ms" ] = getattr (
524- metrics , f"std_{ metric_attribute_name } _ms" )
540+ f"Median { metric_name } { display_unit } :" ,
541+ getattr (metrics , f"median_{ metric_attribute_name } { attr_suffix } " )))
542+ if hasattr (metrics , f"std_{ metric_attribute_name } { attr_suffix } " ):
543+ print ("{:<40} {:<10.2f}" .format (
544+ f"Std Dev { metric_name } { display_unit } :" ,
545+ getattr (metrics , f"std_{ metric_attribute_name } { attr_suffix } " )))
546+ result [f"std_{ metric_attribute_name } { attr_suffix } " ] = getattr (
547+ metrics , f"std_{ metric_attribute_name } { attr_suffix } " )
548+ if hasattr (metrics , f"min_{ metric_attribute_name } { attr_suffix } " ):
549+ print ("{:<40} {:<10.2f}" .format (
550+ f"Min { metric_name } { display_unit } :" ,
551+ getattr (metrics , f"min_{ metric_attribute_name } { attr_suffix } " )))
552+ result [f"min_{ metric_attribute_name } { attr_suffix } " ] = getattr (
553+ metrics , f"min_{ metric_attribute_name } { attr_suffix } " )
554+ if hasattr (metrics , f"max_{ metric_attribute_name } { attr_suffix } " ):
555+ print ("{:<40} {:<10.2f}" .format (
556+ f"Max { metric_name } { display_unit } :" ,
557+ getattr (metrics , f"max_{ metric_attribute_name } { attr_suffix } " )))
558+ result [f"max_{ metric_attribute_name } { attr_suffix } " ] = getattr (
559+ metrics , f"max_{ metric_attribute_name } { attr_suffix } " )
560+
561+ result [f"mean_{ metric_attribute_name } { attr_suffix } " ] = getattr (
562+ metrics , f"mean_{ metric_attribute_name } { attr_suffix } " )
563+ result [f"median_{ metric_attribute_name } { attr_suffix } " ] = getattr (
564+ metrics , f"median_{ metric_attribute_name } { attr_suffix } " )
565+
525566 for p , value in getattr (metrics ,
526- f"percentiles_{ metric_attribute_name } _ms " ):
567+ f"percentiles_{ metric_attribute_name } { attr_suffix } " ):
527568 p_word = str (int (p )) if int (p ) == p else str (p )
528- print ("{:<40} {:<10.2f}" .format (f"P{ p_word } { metric_name } (ms) :" ,
569+ print ("{:<40} {:<10.2f}" .format (f"P{ p_word } { metric_name } { display_unit } :" ,
529570 value ))
530- result [f"p{ p_word } _{ metric_attribute_name } _ms" ] = value
571+ result [f"p{ p_word } _{ metric_attribute_name } { attr_suffix } " ] = value
572+
573+ # Print avg_decoded_tokens_per_iter statistics if available
574+ if metrics .mean_avg_decoded_tokens_per_iter > 0.0 :
575+ process_one_metric ("avg_decoded_tokens_per_iter" , "Avg Decoded Tokens per Iter" ,
576+ "Avg Decoded Tokens per Iter" , unit_suffix = "" )
531577
532578 process_one_metric ("ttft" , "TTFT" , "Time to First Token" )
533579 process_one_metric ("tpot" , "TPOT" ,
0 commit comments