Skip to content

Commit 3c9b0ec

Browse files
committed
prompt(api): Add OpenAI Responses API support with SSE streaming
WHAT: Implement Responses API adapter with full SSE streaming support to enable Kode CLI working with GPT-5 and other models that require OpenAI Responses API format WHY: GPT-5 and newer models use OpenAI Responses API (different from Chat Completions) which returns streaming SSE responses. Kode CLI needed a conversion layer to translate between Anthropic API format and OpenAI Responses API format for seamless model integration HOW: Created ResponsesAPIAdapter that converts Anthropic UnifiedRequestParams to Responses API format (instructions, input array, max_output_tokens, stream=true), added SSE parser to collect streaming chunks and convert back to UnifiedResponse format. Fixed ModelAdapterFactory to properly select Responses API for GPT-5 models. Updated parseResponse to async across all adapters. Added production tests validating end-to-end conversion with actual API calls
1 parent a4c3f16 commit 3c9b0ec

8 files changed

Lines changed: 692 additions & 29 deletions

File tree

.env.example

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Environment Variables for Production API Tests
2+
# Copy this file to .env and fill in your actual API keys
3+
4+
# Enable production test mode
5+
PRODUCTION_TEST_MODE=true
6+
7+
# GPT-5 Codex Test Configuration
8+
TEST_GPT5_API_KEY=your_gpt5_api_key_here
9+
TEST_GPT5_BASE_URL=http://127.0.0.1:3000/openai
10+
11+
# MiniMax Codex Test Configuration
12+
TEST_MINIMAX_API_KEY=your_minimax_api_key_here
13+
TEST_MINIMAX_BASE_URL=https://api.minimaxi.com/v1
14+
15+
# WARNING:
16+
# - Never commit .env files to version control!
17+
# - The .env file is already in .gitignore
18+
# - API keys should be kept secret and secure

src/constants/modelCapabilities.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ export const MODEL_CAPABILITIES_REGISTRY: Record<string, ModelCapabilities> = {
6464
'gpt-5-mini': GPT5_CAPABILITIES,
6565
'gpt-5-nano': GPT5_CAPABILITIES,
6666
'gpt-5-chat-latest': GPT5_CAPABILITIES,
67+
'gpt-5-codex': GPT5_CAPABILITIES,
6768

6869
// GPT-4 series
6970
'gpt-4o': CHAT_COMPLETIONS_CAPABILITIES,

src/services/adapters/base.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ export abstract class ModelAPIAdapter {
1010

1111
// Subclasses must implement these methods
1212
abstract createRequest(params: UnifiedRequestParams): any
13-
abstract parseResponse(response: any): UnifiedResponse
13+
abstract parseResponse(response: any): Promise<UnifiedResponse>
1414
abstract buildTools(tools: Tool[]): any
1515

1616
// Shared utility methods

src/services/adapters/chatCompletions.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,9 @@ export class ChatCompletionsAdapter extends ModelAPIAdapter {
6464
}))
6565
}
6666

67-
parseResponse(response: any): UnifiedResponse {
67+
async parseResponse(response: any): Promise<UnifiedResponse> {
6868
const choice = response.choices?.[0]
69-
69+
7070
return {
7171
id: response.id || `chatcmpl_${Date.now()}`,
7272
content: choice?.message?.content || '',

src/services/adapters/responsesAPI.ts

Lines changed: 207 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,21 @@ import { zodToJsonSchema } from 'zod-to-json-schema'
55

66
export class ResponsesAPIAdapter extends ModelAPIAdapter {
77
createRequest(params: UnifiedRequestParams): any {
8-
const { messages, systemPrompt, tools, maxTokens } = params
9-
10-
// Separate system messages and user messages
11-
const systemMessages = messages.filter(m => m.role === 'system')
12-
const nonSystemMessages = messages.filter(m => m.role !== 'system')
13-
8+
const { messages, systemPrompt, tools, maxTokens, stream } = params
9+
1410
// Build base request
1511
const request: any = {
1612
model: this.modelProfile.modelName,
17-
input: this.convertMessagesToInput(nonSystemMessages),
18-
instructions: this.buildInstructions(systemPrompt, systemMessages)
13+
input: this.convertMessagesToInput(messages),
14+
instructions: this.buildInstructions(systemPrompt)
1915
}
2016

21-
// Add token limit
22-
request[this.getMaxTokensParam()] = maxTokens
23-
17+
// Add token limit - Responses API uses max_output_tokens
18+
request.max_output_tokens = maxTokens
19+
20+
// Add streaming support - Responses API always returns streaming
21+
request.stream = true
22+
2423
// Add temperature (GPT-5 only supports 1)
2524
if (this.getTemperature() === 1) {
2625
request.temperature = 1
@@ -101,10 +100,20 @@ export class ResponsesAPIAdapter extends ModelAPIAdapter {
101100
})
102101
}
103102

104-
parseResponse(response: any): UnifiedResponse {
103+
async parseResponse(response: any): Promise<UnifiedResponse> {
104+
// Check if this is a streaming response (Response object with body)
105+
if (response && typeof response === 'object' && 'body' in response && response.body) {
106+
return await this.parseStreamingResponse(response)
107+
}
108+
109+
// Process non-streaming response
110+
return this.parseNonStreamingResponse(response)
111+
}
112+
113+
private parseNonStreamingResponse(response: any): UnifiedResponse {
105114
// Process basic text output
106115
let content = response.output_text || ''
107-
116+
108117
// Process structured output
109118
if (response.output && Array.isArray(response.output)) {
110119
const messageItems = response.output.filter(item => item.type === 'message')
@@ -123,10 +132,10 @@ export class ResponsesAPIAdapter extends ModelAPIAdapter {
123132
.join('\n\n')
124133
}
125134
}
126-
135+
127136
// Parse tool calls
128137
const toolCalls = this.parseToolCalls(response)
129-
138+
130139
// Build unified response
131140
return {
132141
id: response.id || `resp_${Date.now()}`,
@@ -140,17 +149,192 @@ export class ResponsesAPIAdapter extends ModelAPIAdapter {
140149
responseId: response.id // Save for state management
141150
}
142151
}
152+
153+
private async parseStreamingResponse(response: any): Promise<UnifiedResponse> {
154+
// Handle streaming response from Responses API
155+
// Collect all chunks and build a unified response
156+
157+
const reader = response.body.getReader()
158+
const decoder = new TextDecoder()
159+
let buffer = ''
160+
161+
let fullContent = ''
162+
let toolCalls = []
163+
let responseId = response.id || `resp_${Date.now()}`
164+
165+
try {
166+
while (true) {
167+
const { done, value } = await reader.read()
168+
if (done) break
169+
170+
buffer += decoder.decode(value, { stream: true })
171+
const lines = buffer.split('\n')
172+
buffer = lines.pop() || ''
173+
174+
for (const line of lines) {
175+
if (line.trim()) {
176+
const parsed = this.parseSSEChunk(line)
177+
if (parsed) {
178+
// Extract response ID
179+
if (parsed.response?.id) {
180+
responseId = parsed.response.id
181+
}
182+
183+
// Handle text content
184+
if (parsed.type === 'response.output_text.delta') {
185+
fullContent += parsed.delta || ''
186+
}
187+
188+
// Handle tool calls
189+
if (parsed.type === 'response.output_item.done') {
190+
const item = parsed.item || {}
191+
if (item.type === 'function_call') {
192+
toolCalls.push({
193+
id: item.call_id || item.id || `tool_${Date.now()}`,
194+
type: 'tool_call',
195+
name: item.name,
196+
arguments: item.arguments
197+
})
198+
}
199+
}
200+
}
201+
}
202+
}
203+
}
204+
} catch (error) {
205+
console.error('Error reading streaming response:', error)
206+
}
207+
208+
// Build unified response
209+
return {
210+
id: responseId,
211+
content: fullContent,
212+
toolCalls,
213+
usage: {
214+
promptTokens: 0, // Will be filled in by the caller
215+
completionTokens: 0,
216+
reasoningTokens: 0
217+
},
218+
responseId: responseId
219+
}
220+
}
221+
222+
private parseSSEChunk(line: string): any | null {
223+
if (line.startsWith('data: ')) {
224+
const data = line.slice(6).trim()
225+
if (data === '[DONE]') {
226+
return null
227+
}
228+
if (data) {
229+
try {
230+
return JSON.parse(data)
231+
} catch (error) {
232+
console.error('Error parsing SSE chunk:', error)
233+
return null
234+
}
235+
}
236+
}
237+
return null
238+
}
143239

144-
private convertMessagesToInput(messages: any[]): any {
145-
// Convert messages to Responses API input format
146-
// May need adjustment based on actual API specification
147-
return messages
240+
private convertMessagesToInput(messages: any[]): any[] {
241+
// Convert Chat Completions messages to Response API input format
242+
// Following reference implementation pattern
243+
const inputItems = []
244+
245+
for (const message of messages) {
246+
const role = message.role
247+
248+
if (role === 'tool') {
249+
// Handle tool call results
250+
const callId = message.tool_call_id || message.id
251+
if (typeof callId === 'string' && callId) {
252+
let content = message.content || ''
253+
if (Array.isArray(content)) {
254+
const texts = content
255+
.filter(part => typeof part === 'object' && part !== null)
256+
.map(part => part.text || part.content)
257+
.filter(text => typeof text === 'string' && text)
258+
content = texts.join('\n')
259+
}
260+
if (typeof content === 'string') {
261+
inputItems.push({
262+
type: 'function_call_output',
263+
call_id: callId,
264+
output: content
265+
})
266+
}
267+
}
268+
continue
269+
}
270+
271+
if (role === 'assistant' && Array.isArray(message.tool_calls)) {
272+
// Handle assistant tool calls
273+
for (const tc of message.tool_calls) {
274+
if (typeof tc !== 'object' || tc === null) continue
275+
const tcType = tc.type || 'function'
276+
if (tcType !== 'function') continue
277+
278+
const callId = tc.id || tc.call_id
279+
const fn = tc.function
280+
const name = typeof fn === 'object' && fn !== null ? fn.name : null
281+
const args = typeof fn === 'object' && fn !== null ? fn.arguments : null
282+
283+
if (typeof callId === 'string' && typeof name === 'string' && typeof args === 'string') {
284+
inputItems.push({
285+
type: 'function_call',
286+
name: name,
287+
arguments: args,
288+
call_id: callId
289+
})
290+
}
291+
}
292+
continue
293+
}
294+
295+
// Handle regular text content
296+
const content = message.content || ''
297+
const contentItems = []
298+
299+
if (Array.isArray(content)) {
300+
for (const part of content) {
301+
if (typeof part !== 'object' || part === null) continue
302+
const ptype = part.type
303+
if (ptype === 'text') {
304+
const text = part.text || part.content || ''
305+
if (typeof text === 'string' && text) {
306+
const kind = role === 'assistant' ? 'output_text' : 'input_text'
307+
contentItems.push({ type: kind, text: text })
308+
}
309+
} else if (ptype === 'image_url') {
310+
const image = part.image_url
311+
const url = typeof image === 'object' && image !== null ? image.url : image
312+
if (typeof url === 'string' && url) {
313+
contentItems.push({ type: 'input_image', image_url: url })
314+
}
315+
}
316+
}
317+
} else if (typeof content === 'string' && content) {
318+
const kind = role === 'assistant' ? 'output_text' : 'input_text'
319+
contentItems.push({ type: kind, text: content })
320+
}
321+
322+
if (contentItems.length) {
323+
const roleOut = role === 'assistant' ? 'assistant' : 'user'
324+
inputItems.push({ type: 'message', role: roleOut, content: contentItems })
325+
}
326+
}
327+
328+
return inputItems
148329
}
149330

150-
private buildInstructions(systemPrompt: string[], systemMessages: any[]): string {
151-
const systemContent = systemMessages.map(m => m.content).join('\n\n')
152-
const promptContent = systemPrompt.join('\n\n')
153-
return [systemContent, promptContent].filter(Boolean).join('\n\n')
331+
private buildInstructions(systemPrompt: string[]): string {
332+
// Join system prompts into instructions (following reference implementation)
333+
const systemContent = systemPrompt
334+
.filter(content => content.trim())
335+
.join('\n\n')
336+
337+
return systemContent
154338
}
155339

156340
private parseToolCalls(response: any): any[] {

src/services/claude.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1956,7 +1956,7 @@ async function queryOpenAI(
19561956
// Use Responses API for GPT-5 and similar models
19571957
const { callGPT5ResponsesAPI } = await import('./openai')
19581958
const response = await callGPT5ResponsesAPI(modelProfile, request, signal)
1959-
const unifiedResponse = adapter.parseResponse(response)
1959+
const unifiedResponse = await adapter.parseResponse(response)
19601960

19611961
// Convert unified response back to Anthropic format
19621962
const apiMessage = {

src/services/modelAdapterFactory.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,11 @@ export class ModelAdapterFactory {
4141
const isOfficialOpenAI = !modelProfile.baseURL ||
4242
modelProfile.baseURL.includes('api.openai.com')
4343

44-
// Non-official endpoints use Chat Completions (even if model supports Responses API)
44+
// Non-official endpoints can use Responses API if model supports it
4545
if (!isOfficialOpenAI) {
4646
// If there's a fallback option, use fallback
4747
if (capabilities.apiArchitecture.fallback === 'chat_completions') {
48-
return 'chat_completions'
48+
return capabilities.apiArchitecture.primary // ← FIXED: Use primary instead of fallback
4949
}
5050
// Otherwise use primary (might fail, but let it try)
5151
return capabilities.apiArchitecture.primary

0 commit comments

Comments
 (0)