Update InferenceJobOutput schema type and tweak run_inference metrics…

… calc code
mozilla-ai · peteski22 · Mar 27, 2025 · Mar 20, 2025 · Mar 25, 2025 · Mar 26, 2025
commit acd8bbbf46d2d1346be1f6ed5aaa3b608b963701
diff --git a/lumigator/jobs/inference/inference.py b/lumigator/jobs/inference/inference.py
@@ -119,12 +119,18 @@ def run_inference(config: InferenceJobConfig, api_key: str | None = None) -> Pat
     output["inference_time"] = inference_time
 
     artifacts = InferenceJobOutput.model_validate(output)
+
+    # Only attempt to metric calculate averages if we have a metric for EVERY prediction result.
     if all(p.metrics is not None for p in prediction_results):
-        avg_prompt_tokens = sum([p.metrics.prompt_tokens for p in prediction_results]) / len(prediction_results)
-        avg_total_tokens = sum([p.metrics.total_tokens for p in prediction_results]) / len(prediction_results)
-        avg_completion_tokens = sum([p.metrics.completion_tokens for p in prediction_results]) / len(prediction_results)
-        avg_reasoning_tokens = sum([p.metrics.reasoning_tokens for p in prediction_results]) / len(prediction_results)
-        avg_answer_tokens = sum([p.metrics.answer_tokens for p in prediction_results]) / len(prediction_results)
+        total_results = len(prediction_results)
+
+        avg_prompt_tokens = sum(p.metrics.prompt_tokens for p in prediction_results) / total_results
+        avg_total_tokens = sum(p.metrics.total_tokens for p in prediction_results) / total_results
+        avg_completion_tokens = sum(p.metrics.completion_tokens for p in prediction_results) / total_results
+        # Provide a default for optional fields so we don't affect the average.
+        avg_reasoning_tokens = sum((p.metrics.reasoning_tokens or 0) for p in prediction_results) / total_results
+        avg_answer_tokens = sum((p.metrics.answer_tokens or 0) for p in prediction_results) / total_results
+
         metrics = AverageInferenceMetrics(
             avg_prompt_tokens=avg_prompt_tokens,
             avg_total_tokens=avg_total_tokens,

diff --git a/lumigator/jobs/inference/schemas.py b/lumigator/jobs/inference/schemas.py
@@ -91,7 +91,7 @@ class InferenceJobOutput(BaseModel):
     ground_truth: list | None = None
     model: str
     inference_time: float
-    inference_metrics: list[InferenceMetrics] | list[None] = None
+    inference_metrics: list[InferenceMetrics | None] = []
 
 
 class PredictionResult(BaseModel):