fix: add GEPA feedback type hooks to AxCompileOptions (#376)

monotykamary · web-flow · commit 4700c7e8e92e · 2025-10-04T00:50:04.000-07:00
* feat: type GEPA compile options feedback hooks

* chore: biome format
diff --git a/docs/OPTIMIZE.md b/docs/OPTIMIZE.md
@@ -841,13 +841,21 @@ const result = await optimizer.compile(
   multiMetric as any,
   {
     validationExamples: val,
+    feedbackExamples: val,
+    feedbackFn: ({ prediction, example }) =>
+      prediction?.isSafe === example?.isSafe
+        ? '✅ Matched label'
+        : [
+            `Expected: ${example?.isSafe ?? 'unknown'}`,
+            `Received: ${prediction?.isSafe ?? 'unknown'}`,
+          ],
     // Required to bound evaluation cost
     maxMetricCalls: 200,
     // Optional: provide a tie-break scalarizer for selection logic
     // paretoMetricKey: 'accuracy',
     // or
     // paretoScalarize: (s) => 0.7*s.accuracy + 0.3*s.brevity,
-  } as any
+  }
 );
 
 console.log(`✅ Found ${result.paretoFrontSize} Pareto points`);
@@ -904,6 +912,10 @@ if (result.optimizedProgram) {
 }
 ```
 
+> 💡 **Feedback hook**: `feedbackFn` lets you surface rich guidance for each evaluation, whether it's a short string or multiple
+> bullet points. The hook receives the raw `prediction` and original `example`, making it easy to emit reviewer-style comments
+> alongside scores. Pair it with `feedbackExamples` to keep cost-efficient review sets separate from validation metrics.
+
 #### GEPA-Flow (Multi-Module)
 
 ```typescript
@@ -917,7 +929,7 @@ const pipeline = flow<{ emailText: string }>()
   .m((s) => ({ priority: s.classifierResult.priority, rationale: s.rationaleResult.rationale }));
 
 const optimizer = new AxGEPAFlow({ studentAI: ai({ name: 'openai', apiKey: process.env.OPENAI_APIKEY!, config: { model: 'gpt-4o-mini' } }), numTrials: 16 });
-const result = await optimizer.compile(pipeline as any, train, multiMetric as any, { validationExamples: val, maxMetricCalls: 240 } as any);
+const result = await optimizer.compile(pipeline as any, train, multiMetric as any, { validationExamples: val, maxMetricCalls: 240 });
 console.log(`Front size: ${result.paretoFrontSize}, Hypervolume: ${result.hypervolume}`);
 ```
 
@@ -976,7 +988,7 @@ const multiMetric = ({ prediction, example }) => ({
 #### Understanding the Results
 
 ```typescript
-const result = await optimizer.compile(program, examples, multiMetric, { maxMetricCalls: 200 } as any);
+const result = await optimizer.compile(program, examples, multiMetric, { maxMetricCalls: 200 });
 
 // Key properties of AxParetoResult:
 console.log(`Pareto frontier size: ${result.paretoFrontSize}`);
diff --git a/src/ax/dsp/common_types.ts b/src/ax/dsp/common_types.ts
@@ -176,6 +176,11 @@ export interface AxCompileOptions {
   saveCheckpointOnComplete?: boolean;
   // GEPA core options (adapter-based)
   gepaAdapter?: AxGEPAAdapter<any, any, any>;
+  validationExamples?: readonly AxTypedExample<any>[];
+  feedbackExamples?: readonly AxTypedExample<any>[];
+  feedbackFn?: (
+    args: Readonly<{ prediction: unknown; example: AxExample }>
+  ) => string | string[] | undefined;
   skipPerfectScore?: boolean;
   perfectScore?: number;
   maxMetricCalls?: number;
diff --git a/src/docs/src/content/docs/optimize.md b/src/docs/src/content/docs/optimize.md
@@ -846,13 +846,21 @@ const result = await optimizer.compile(
   multiMetric as any,
   {
     validationExamples: val,
+    feedbackExamples: val,
+    feedbackFn: ({ prediction, example }) =>
+      prediction?.isSafe === example?.isSafe
+        ? '✅ Matched label'
+        : [
+            `Expected: ${example?.isSafe ?? 'unknown'}`,
+            `Received: ${prediction?.isSafe ?? 'unknown'}`,
+          ],
     // Required to bound evaluation cost
     maxMetricCalls: 200,
     // Optional: provide a tie-break scalarizer for selection logic
     // paretoMetricKey: 'accuracy',
     // or
     // paretoScalarize: (s) => 0.7*s.accuracy + 0.3*s.brevity,
-  } as any
+  }
 );
 
 console.log(`✅ Found ${result.paretoFrontSize} Pareto points`);
@@ -909,6 +917,10 @@ if (result.optimizedProgram) {
 }
 ```
 
+> 💡 **Feedback hook**: `feedbackFn` lets you surface rich guidance for each evaluation, whether it's a short string or multiple
+> bullet points. The hook receives the raw `prediction` and original `example`, making it easy to emit reviewer-style comments
+> alongside scores. Pair it with `feedbackExamples` to keep cost-efficient review sets separate from validation metrics.
+
 #### GEPA-Flow (Multi-Module)
 
 ```typescript
@@ -922,7 +934,7 @@ const pipeline = flow<{ emailText: string }>()
   .m((s) => ({ priority: s.classifierResult.priority, rationale: s.rationaleResult.rationale }));
 
 const optimizer = new AxGEPAFlow({ studentAI: ai({ name: 'openai', apiKey: process.env.OPENAI_APIKEY!, config: { model: 'gpt-4o-mini' } }), numTrials: 16 });
-const result = await optimizer.compile(pipeline as any, train, multiMetric as any, { validationExamples: val, maxMetricCalls: 240 } as any);
+const result = await optimizer.compile(pipeline as any, train, multiMetric as any, { validationExamples: val, maxMetricCalls: 240 });
 console.log(`Front size: ${result.paretoFrontSize}, Hypervolume: ${result.hypervolume}`);
 ```
 
@@ -981,7 +993,7 @@ const multiMetric = ({ prediction, example }) => ({
 #### Understanding the Results
 
 ```typescript
-const result = await optimizer.compile(program, examples, multiMetric, { maxMetricCalls: 200 } as any);
+const result = await optimizer.compile(program, examples, multiMetric, { maxMetricCalls: 200 });
 
 // Key properties of AxParetoResult:
 console.log(`Pareto frontier size: ${result.paretoFrontSize}`);
diff --git a/src/examples/gepa-flow.ts b/src/examples/gepa-flow.ts
@@ -101,7 +101,7 @@ async function main() {
       verbose: true,
       validationExamples: val,
       maxMetricCalls: 240,
-    } as any
+    }
   );
 
   console.log('\n✅ Pareto optimization complete');
diff --git a/src/examples/gepa-quality-vs-speed-optimization.ts b/src/examples/gepa-quality-vs-speed-optimization.ts
@@ -324,7 +324,7 @@ async function demonstrateGEPAOptimization() {
       auto: 'medium',
       verbose: true,
       maxMetricCalls: 150,
-    } as any
+    }
   );
 
   console.log('\n✅ GEPA optimization completed!');
diff --git a/src/examples/gepa-train-inference.ts b/src/examples/gepa-train-inference.ts
@@ -92,19 +92,14 @@ async function main() {
   });
 
   console.log('🔧 Running GEPA Pareto optimization (accuracy + brevity)...');
-  const result = await optimizer.compile(
-    program as any,
-    train,
-    metric as any,
-    {
-      auto: 'medium',
-      verbose: true,
-      validationExamples: val,
-      maxMetricCalls: 200, // required to bound evaluation cost
-      // Optionally guide scalarization with a specific metric key
-      // paretoMetricKey: 'accuracy',
-    } as any
-  );
+  const result = await optimizer.compile(program as any, train, metric as any, {
+    auto: 'medium',
+    verbose: true,
+    validationExamples: val,
+    maxMetricCalls: 200, // required to bound evaluation cost
+    // Optionally guide scalarization with a specific metric key
+    // paretoMetricKey: 'accuracy',
+  });
 
   console.log(`\n✅ Pareto optimization complete`);
   console.log(`Front size: ${result.paretoFrontSize}`);
diff --git a/src/examples/gepa.ts b/src/examples/gepa.ts
@@ -88,7 +88,7 @@ async function main() {
       verbose: true,
       validationExamples: val,
       maxMetricCalls: 200,
-    } as any
+    }
   );
 
   console.log('\n✅ Pareto optimization complete');