Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add interactive prompts and prettier output to eval CLI
Co-authored-by: JReinhold <[email protected]>
  • Loading branch information
Copilot and JReinhold committed Nov 4, 2025
commit ab9663b2ffe8caeb3ee48d3ea75ed5c23b957da0
129 changes: 116 additions & 13 deletions eval/eval.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { parseArgs } from 'node:util';
import * as v from 'valibot';
import * as p from '@clack/prompts';
import { claudeCodeCli } from './lib/agents/claude-code-cli.ts';
import * as path from 'node:path';
import * as fs from 'node:fs/promises';
Expand All @@ -18,7 +19,7 @@ import { x } from 'tinyexec';
const Args = v.pipe(
v.object({
values: v.object({
agent: v.union([v.literal('claude-code'), v.literal('copilot')]),
agent: v.optional(v.union([v.literal('claude-code'), v.literal('copilot')])),
verbose: v.boolean(),
}),
positionals: v.array(v.string()),
Expand All @@ -29,11 +30,11 @@ const Args = v.pipe(
})),
);

const args = v.parse(
const parsedArgs = v.parse(
Args,
parseArgs({
options: {
agent: { type: 'string', default: 'claude-code', short: 'a' },
agent: { type: 'string', short: 'a' },
verbose: { type: 'boolean', default: false, short: 'v' },
},
strict: false,
Expand All @@ -42,6 +43,76 @@ const args = v.parse(
}),
);

// Display intro
p.intro('πŸ§ͺ Storybook MCP Evaluations');

// Get available eval directories
const evalsDir = path.join(process.cwd(), 'evals');
const availableEvals = await fs.readdir(evalsDir, { withFileTypes: true });
const evalOptions = availableEvals
.filter((dirent) => dirent.isDirectory())
.map((dirent) => ({
value: dirent.name,
label: dirent.name,
}));

// Prompt for missing arguments
const promptResults = await p.group(
{
evals: async () => {
if (parsedArgs.evals.length > 0) {
return parsedArgs.evals;
}

const result = await p.multiselect({
message: 'Select evaluations to run:',
options: evalOptions,
required: true,
});

if (p.isCancel(result)) {
p.cancel('Operation cancelled.');
process.exit(0);
}

return result as string[];
},
agent: async () => {
if (parsedArgs.agent) {
return parsedArgs.agent;
}

const result = await p.select({
message: 'Select agent to use:',
options: [
{ value: 'claude-code', label: 'Claude Code CLI' },
{ value: 'copilot', label: 'GitHub Copilot', disabled: true },
],
});

if (p.isCancel(result)) {
p.cancel('Operation cancelled.');
process.exit(0);
}

return result as 'claude-code' | 'copilot';
},
verbose: async () => parsedArgs.verbose,
},
{
onCancel: () => {
p.cancel('Operation cancelled.');
process.exit(0);
},
},
);

const args = {
agent: promptResults.agent,
verbose: promptResults.verbose,
evals: promptResults.evals,
};

const evalDirsToPaths = Object.fromEntries(
args.evals.map((evalDir) => [
evalDir,
Expand All @@ -50,15 +121,20 @@ const evalDirsToPaths = Object.fromEntries(
);

// Validate that all eval directories exist
const s = p.spinner();
s.start('Validating eval directories');
for (const evalPath of Object.values(evalDirsToPaths)) {
const dirExists = await fs
.access(evalPath)
.then(() => true)
.catch(() => false);
if (!dirExists) {
throw new TypeError(`Eval directory does not exist: ${evalPath}`);
s.stop('Validation failed');
p.log.error(`Eval directory does not exist: ${evalPath}`);
process.exit(1);
}
}
s.stop('All eval directories validated');

let agent;

Expand All @@ -69,6 +145,8 @@ switch (args.agent) {
}
}

p.log.info(`Running ${args.evals.length} evaluation(s) with ${args.agent}`);

await Promise.all(
Object.entries(evalDirsToPaths).map(async ([evalDir, evalPath]) => {
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
Expand All @@ -88,13 +166,15 @@ await Promise.all(
verbose: args.verbose,
};

console.group(`Running ${evalDir} with ${args.agent}...`);
p.log.step(`\n${evalDir}: Starting evaluation`);

console.log('Setting up experiment...');
await setupExperiment(experimentArgs)
const setupSpinner = p.spinner();
setupSpinner.start('Setting up experiment');
await setupExperiment(experimentArgs);
setupSpinner.stop('Experiment set up');


console.log(`Executing prompt with ${args.agent}...`);
const agentSpinner = p.spinner();
agentSpinner.start(`Executing prompt with ${args.agent}`);
const prompt = await fs.readFile(path.join(evalPath, 'prompt.md'), 'utf8');
const enhancedPrompt = dedent`${prompt}
<constraints>
Expand All @@ -105,11 +185,16 @@ await Promise.all(
env: process.env,
...experimentArgs,
});
agentSpinner.stop(
`Agent completed (${promptResult.turns} turns, ${promptResult.duration}s, $${promptResult.cost})`,
);

console.log('Setting up evaluations...');
const evalSetupSpinner = p.spinner();
evalSetupSpinner.start('Setting up evaluations');
await setupEvaluations(experimentArgs);
evalSetupSpinner.stop('Evaluations set up');

console.log('Starting evaluation...');
p.log.info('Running evaluations...');
const [buildSuccess, typeCheckSuccess, lintSuccess, { tests, a11y }] =
await Promise.all([
build(experimentArgs),
Expand All @@ -118,7 +203,11 @@ await Promise.all(
testStories(experimentArgs),
saveEnvironment(experimentArgs, args.agent),
]);

const prettierSpinner = p.spinner();
prettierSpinner.start('Formatting results');
await x('pnpm', ['exec', 'prettier', '--write', resultsPath]);
prettierSpinner.stop('Results formatted');

const summary = {
...promptResult,
Expand All @@ -132,7 +221,21 @@ await Promise.all(
path.join(resultsPath, 'summary.json'),
JSON.stringify(summary, null, 2),
);
console.log('Evaluation complete. Summary:');
console.log(JSON.stringify(summary, null, 2));

// Log summary with styled output
p.log.success(`${evalDir}: Evaluation complete`);
p.log.info('Summary:');
p.log.message(` Build: ${buildSuccess ? 'βœ…' : '❌'}`);
p.log.message(` Type Check: ${typeCheckSuccess ? 'βœ…' : '❌'}`);
p.log.message(` Lint: ${lintSuccess ? 'βœ…' : '❌'}`);
p.log.message(` Tests: ${tests ? 'βœ…' : '❌'}`);
p.log.message(` A11y: ${a11y ? 'βœ…' : '❌'}`);
p.log.message(
` Duration: ${promptResult.duration}s (API: ${promptResult.durationApi}s, Wall: ${promptResult.durationWall}s)`,
);
p.log.message(` Cost: $${promptResult.cost}`);
p.log.message(` Turns: ${promptResult.turns}`);
}),
);

p.outro('✨ All evaluations complete!');
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>mcp-eval-project-template</title>
</head>
<body>
<div id="root"></div>
<script type="module" src="/src/main.tsx"></script>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"name": "mcp-eval-project-template",
"version": "0.0.0",
"private": true,
"type": "module",
"scripts": {
"build": "vite build",
"dev": "vite",
"lint": "eslint .",
"preview": "vite preview",
"test": "vitest",
"storybook": "storybook dev --port 6006",
"typecheck": "tsc --noEmit --project ./tsconfig.app.json"
},
"dependencies": {
"react": "^19.1.1",
"react-dom": "^19.1.1"
},
"devDependencies": {
"@eslint/js": "^9.36.0",
"@types/node": "^24.6.0",
"@types/react": "^19.1.16",
"@types/react-dom": "^19.1.9",
"@vitejs/plugin-react-swc": "^4.1.0",
"eslint": "^9.36.0",
"eslint-plugin-react-hooks": "^5.2.0",
"eslint-plugin-react-refresh": "^0.4.22",
"globals": "^16.4.0",
"typescript": "~5.9.3",
"typescript-eslint": "^8.45.0",
"vite": "^7.1.12"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import { StrictMode } from 'react'
import { createRoot } from 'react-dom/client'

createRoot(document.getElementById('root')!).render(
<StrictMode>
<div>Hello World</div>
</StrictMode>,
)

const foo: string = 2;
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"compilerOptions": {
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
"target": "ES2022",
"useDefineForClassFields": true,
"lib": ["ES2022", "DOM", "DOM.Iterable"],
"module": "ESNext",
"types": ["vite/client"],
"typeRoots": ["./node_modules/@types/", "./node_modules"],
"skipLibCheck": true,

/* Bundler mode */
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"verbatimModuleSyntax": false,
"moduleDetection": "force",
"noEmit": true,
"jsx": "react-jsx",

/* Linting */
"strict": true,
"noUnusedLocals": false,
"noUnusedParameters": false,
"erasableSyntaxOnly": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedSideEffectImports": true
},
"include": ["src"]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"files": [],
"references": [
{ "path": "./tsconfig.app.json" },
{ "path": "./tsconfig.node.json" }
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"compilerOptions": {
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
"target": "ES2023",
"lib": ["ES2023"],
"module": "ESNext",
"types": ["node"],
"skipLibCheck": true,

/* Bundler mode */
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"verbatimModuleSyntax": true,
"moduleDetection": "force",
"noEmit": true,

/* Linting */
"strict": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"erasableSyntaxOnly": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedSideEffectImports": true
},
"include": ["vite.config.ts"]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import { defineConfig } from 'vite'
import react from '@vitejs/plugin-react-swc'

// https://vite.dev/config/
export default defineConfig({
plugins: [react()],
optimizeDeps: {
include: ['react/jsx-dev-runtime']
}
})