prompt(api): Add OpenAI Responses API support with SSE streaming

RadonX · RadonX · commit 3c9b0ec9d180 · 2025-11-09T01:29:04.000-08:00
WHAT: Implement Responses API adapter with full SSE streaming support to enable Kode CLI working with GPT-5 and other models that require OpenAI Responses API format

WHY: GPT-5 and newer models use OpenAI Responses API (different from Chat Completions) which returns streaming SSE responses. Kode CLI needed a conversion layer to translate between Anthropic API format and OpenAI Responses API format for seamless model integration

HOW: Created ResponsesAPIAdapter that converts Anthropic UnifiedRequestParams to Responses API format (instructions, input array, max_output_tokens, stream=true), added SSE parser to collect streaming chunks and convert back to UnifiedResponse format. Fixed ModelAdapterFactory to properly select Responses API for GPT-5 models. Updated parseResponse to async across all adapters. Added production tests validating end-to-end conversion with actual API calls
diff --git a/.env.example b/.env.example
@@ -0,0 +1,18 @@
+# Environment Variables for Production API Tests
+# Copy this file to .env and fill in your actual API keys
+
+# Enable production test mode
+PRODUCTION_TEST_MODE=true
+
+# GPT-5 Codex Test Configuration
+TEST_GPT5_API_KEY=your_gpt5_api_key_here
+TEST_GPT5_BASE_URL=http://127.0.0.1:3000/openai
+
+# MiniMax Codex Test Configuration
+TEST_MINIMAX_API_KEY=your_minimax_api_key_here
+TEST_MINIMAX_BASE_URL=https://api.minimaxi.com/v1
+
+# WARNING:
+# - Never commit .env files to version control!
+# - The .env file is already in .gitignore
+# - API keys should be kept secret and secure
diff --git a/src/constants/modelCapabilities.ts b/src/constants/modelCapabilities.ts
@@ -64,6 +64,7 @@ export const MODEL_CAPABILITIES_REGISTRY: Record<string, ModelCapabilities> = {
   'gpt-5-mini': GPT5_CAPABILITIES,
   'gpt-5-nano': GPT5_CAPABILITIES,
   'gpt-5-chat-latest': GPT5_CAPABILITIES,
+  'gpt-5-codex': GPT5_CAPABILITIES,
   
   // GPT-4 series
   'gpt-4o': CHAT_COMPLETIONS_CAPABILITIES,
diff --git a/src/services/adapters/base.ts b/src/services/adapters/base.ts
@@ -10,7 +10,7 @@ export abstract class ModelAPIAdapter {
   
   // Subclasses must implement these methods
   abstract createRequest(params: UnifiedRequestParams): any
-  abstract parseResponse(response: any): UnifiedResponse
+  abstract parseResponse(response: any): Promise<UnifiedResponse>
   abstract buildTools(tools: Tool[]): any
   
   // Shared utility methods
diff --git a/src/services/adapters/chatCompletions.ts b/src/services/adapters/chatCompletions.ts
@@ -64,9 +64,9 @@ export class ChatCompletionsAdapter extends ModelAPIAdapter {
     }))
   }
   
-  parseResponse(response: any): UnifiedResponse {
+  async parseResponse(response: any): Promise<UnifiedResponse> {
     const choice = response.choices?.[0]
-    
+
     return {
       id: response.id || `chatcmpl_${Date.now()}`,
       content: choice?.message?.content || '',
diff --git a/src/services/adapters/responsesAPI.ts b/src/services/adapters/responsesAPI.ts
@@ -5,22 +5,21 @@ import { zodToJsonSchema } from 'zod-to-json-schema'
 
 export class ResponsesAPIAdapter extends ModelAPIAdapter {
   createRequest(params: UnifiedRequestParams): any {
-    const { messages, systemPrompt, tools, maxTokens } = params
-    
-    // Separate system messages and user messages
-    const systemMessages = messages.filter(m => m.role === 'system')
-    const nonSystemMessages = messages.filter(m => m.role !== 'system')
-    
+    const { messages, systemPrompt, tools, maxTokens, stream } = params
+
     // Build base request
     const request: any = {
       model: this.modelProfile.modelName,
-      input: this.convertMessagesToInput(nonSystemMessages),
-      instructions: this.buildInstructions(systemPrompt, systemMessages)
+      input: this.convertMessagesToInput(messages),
+      instructions: this.buildInstructions(systemPrompt)
     }
     
-    // Add token limit
-    request[this.getMaxTokensParam()] = maxTokens
-    
+    // Add token limit - Responses API uses max_output_tokens
+    request.max_output_tokens = maxTokens
+
+    // Add streaming support - Responses API always returns streaming
+    request.stream = true
+
     // Add temperature (GPT-5 only supports 1)
     if (this.getTemperature() === 1) {
       request.temperature = 1
@@ -101,10 +100,20 @@ export class ResponsesAPIAdapter extends ModelAPIAdapter {
     })
   }
   
-  parseResponse(response: any): UnifiedResponse {
+  async parseResponse(response: any): Promise<UnifiedResponse> {
+    // Check if this is a streaming response (Response object with body)
+    if (response && typeof response === 'object' && 'body' in response && response.body) {
+      return await this.parseStreamingResponse(response)
+    }
+
+    // Process non-streaming response
+    return this.parseNonStreamingResponse(response)
+  }
+
+  private parseNonStreamingResponse(response: any): UnifiedResponse {
     // Process basic text output
     let content = response.output_text || ''
-    
+
     // Process structured output
     if (response.output && Array.isArray(response.output)) {
       const messageItems = response.output.filter(item => item.type === 'message')
@@ -123,10 +132,10 @@ export class ResponsesAPIAdapter extends ModelAPIAdapter {
           .join('\n\n')
       }
     }
-    
+
     // Parse tool calls
     const toolCalls = this.parseToolCalls(response)
-    
+
     // Build unified response
     return {
       id: response.id || `resp_${Date.now()}`,
@@ -140,17 +149,192 @@ export class ResponsesAPIAdapter extends ModelAPIAdapter {
       responseId: response.id  // Save for state management
     }
   }
+
+  private async parseStreamingResponse(response: any): Promise<UnifiedResponse> {
+    // Handle streaming response from Responses API
+    // Collect all chunks and build a unified response
+
+    const reader = response.body.getReader()
+    const decoder = new TextDecoder()
+    let buffer = ''
+
+    let fullContent = ''
+    let toolCalls = []
+    let responseId = response.id || `resp_${Date.now()}`
+
+    try {
+      while (true) {
+        const { done, value } = await reader.read()
+        if (done) break
+
+        buffer += decoder.decode(value, { stream: true })
+        const lines = buffer.split('\n')
+        buffer = lines.pop() || ''
+
+        for (const line of lines) {
+          if (line.trim()) {
+            const parsed = this.parseSSEChunk(line)
+            if (parsed) {
+              // Extract response ID
+              if (parsed.response?.id) {
+                responseId = parsed.response.id
+              }
+
+              // Handle text content
+              if (parsed.type === 'response.output_text.delta') {
+                fullContent += parsed.delta || ''
+              }
+
+              // Handle tool calls
+              if (parsed.type === 'response.output_item.done') {
+                const item = parsed.item || {}
+                if (item.type === 'function_call') {
+                  toolCalls.push({
+                    id: item.call_id || item.id || `tool_${Date.now()}`,
+                    type: 'tool_call',
+                    name: item.name,
+                    arguments: item.arguments
+                  })
+                }
+              }
+            }
+          }
+        }
+      }
+    } catch (error) {
+      console.error('Error reading streaming response:', error)
+    }
+
+    // Build unified response
+    return {
+      id: responseId,
+      content: fullContent,
+      toolCalls,
+      usage: {
+        promptTokens: 0, // Will be filled in by the caller
+        completionTokens: 0,
+        reasoningTokens: 0
+      },
+      responseId: responseId
+    }
+  }
+
+  private parseSSEChunk(line: string): any | null {
+    if (line.startsWith('data: ')) {
+      const data = line.slice(6).trim()
+      if (data === '[DONE]') {
+        return null
+      }
+      if (data) {
+        try {
+          return JSON.parse(data)
+        } catch (error) {
+          console.error('Error parsing SSE chunk:', error)
+          return null
+        }
+      }
+    }
+    return null
+  }
   
-  private convertMessagesToInput(messages: any[]): any {
-    // Convert messages to Responses API input format
-    // May need adjustment based on actual API specification
-    return messages
+  private convertMessagesToInput(messages: any[]): any[] {
+    // Convert Chat Completions messages to Response API input format
+    // Following reference implementation pattern
+    const inputItems = []
+
+    for (const message of messages) {
+      const role = message.role
+
+      if (role === 'tool') {
+        // Handle tool call results
+        const callId = message.tool_call_id || message.id
+        if (typeof callId === 'string' && callId) {
+          let content = message.content || ''
+          if (Array.isArray(content)) {
+            const texts = content
+              .filter(part => typeof part === 'object' && part !== null)
+              .map(part => part.text || part.content)
+              .filter(text => typeof text === 'string' && text)
+            content = texts.join('\n')
+          }
+          if (typeof content === 'string') {
+            inputItems.push({
+              type: 'function_call_output',
+              call_id: callId,
+              output: content
+            })
+          }
+        }
+        continue
+      }
+
+      if (role === 'assistant' && Array.isArray(message.tool_calls)) {
+        // Handle assistant tool calls
+        for (const tc of message.tool_calls) {
+          if (typeof tc !== 'object' || tc === null) continue
+          const tcType = tc.type || 'function'
+          if (tcType !== 'function') continue
+
+          const callId = tc.id || tc.call_id
+          const fn = tc.function
+          const name = typeof fn === 'object' && fn !== null ? fn.name : null
+          const args = typeof fn === 'object' && fn !== null ? fn.arguments : null
+
+          if (typeof callId === 'string' && typeof name === 'string' && typeof args === 'string') {
+            inputItems.push({
+              type: 'function_call',
+              name: name,
+              arguments: args,
+              call_id: callId
+            })
+          }
+        }
+        continue
+      }
+
+      // Handle regular text content
+      const content = message.content || ''
+      const contentItems = []
+
+      if (Array.isArray(content)) {
+        for (const part of content) {
+          if (typeof part !== 'object' || part === null) continue
+          const ptype = part.type
+          if (ptype === 'text') {
+            const text = part.text || part.content || ''
+            if (typeof text === 'string' && text) {
+              const kind = role === 'assistant' ? 'output_text' : 'input_text'
+              contentItems.push({ type: kind, text: text })
+            }
+          } else if (ptype === 'image_url') {
+            const image = part.image_url
+            const url = typeof image === 'object' && image !== null ? image.url : image
+            if (typeof url === 'string' && url) {
+              contentItems.push({ type: 'input_image', image_url: url })
+            }
+          }
+        }
+      } else if (typeof content === 'string' && content) {
+        const kind = role === 'assistant' ? 'output_text' : 'input_text'
+        contentItems.push({ type: kind, text: content })
+      }
+
+      if (contentItems.length) {
+        const roleOut = role === 'assistant' ? 'assistant' : 'user'
+        inputItems.push({ type: 'message', role: roleOut, content: contentItems })
+      }
+    }
+
+    return inputItems
   }
   
-  private buildInstructions(systemPrompt: string[], systemMessages: any[]): string {
-    const systemContent = systemMessages.map(m => m.content).join('\n\n')
-    const promptContent = systemPrompt.join('\n\n')
-    return [systemContent, promptContent].filter(Boolean).join('\n\n')
+  private buildInstructions(systemPrompt: string[]): string {
+    // Join system prompts into instructions (following reference implementation)
+    const systemContent = systemPrompt
+      .filter(content => content.trim())
+      .join('\n\n')
+
+    return systemContent
   }
   
   private parseToolCalls(response: any): any[] {
diff --git a/src/services/claude.ts b/src/services/claude.ts
@@ -1956,7 +1956,7 @@ async function queryOpenAI(
             // Use Responses API for GPT-5 and similar models
             const { callGPT5ResponsesAPI } = await import('./openai')
             const response = await callGPT5ResponsesAPI(modelProfile, request, signal)
-            const unifiedResponse = adapter.parseResponse(response)
+            const unifiedResponse = await adapter.parseResponse(response)
             
             // Convert unified response back to Anthropic format
             const apiMessage = {
diff --git a/src/services/modelAdapterFactory.ts b/src/services/modelAdapterFactory.ts
@@ -41,11 +41,11 @@ export class ModelAdapterFactory {
     const isOfficialOpenAI = !modelProfile.baseURL || 
       modelProfile.baseURL.includes('api.openai.com')
     
-    // Non-official endpoints use Chat Completions (even if model supports Responses API)
+    // Non-official endpoints can use Responses API if model supports it
     if (!isOfficialOpenAI) {
       // If there's a fallback option, use fallback
       if (capabilities.apiArchitecture.fallback === 'chat_completions') {
-        return 'chat_completions'
+        return capabilities.apiArchitecture.primary  // ← FIXED: Use primary instead of fallback
       }
       // Otherwise use primary (might fail, but let it try)
       return capabilities.apiArchitecture.primary
diff --git a/src/test/production-api-tests.test.ts b/src/test/production-api-tests.test.ts