prompt(queryOpenAI): Separate adapter context from API execution

WHAT: Refactored queryOpenAI to prepare adapter context outside withRetry and execute API calls inside withRetry

WHY: The previous implementation mixed adapter preparation and execution, causing type confusion and state management issues

HOW: Created AdapterExecutionContext and QueryResult types, moved adapter context creation before withRetry block, wrapped all API calls (Responses API, Chat Completions, and legacy) inside withRetry with unified return structure, added normalizeUsage() helper to handle token field variations, ensured responseId and content are properly preserved through the unified return path
This commit is contained in:
Radon Co 2025-11-11 00:49:01 -08:00
parent 8288378dbd
commit 25adc80161

View File

@ -1882,7 +1882,20 @@ async function queryOpenAI(
let start = Date.now()
// Extract adapter path BEFORE withRetry for cleaner flow
type AdapterExecutionContext = {
adapter: ReturnType<typeof ModelAdapterFactory.createAdapter>
request: any
shouldUseResponses: boolean
}
type QueryResult = {
assistantMessage: AssistantMessage
rawResponse?: any
apiFormat: 'openai' | 'openai_responses'
}
let adapterContext: AdapterExecutionContext | null = null
if (modelProfile && modelProfile.modelName) {
debugLogger.api('CHECKING_ADAPTER_SYSTEM', {
modelProfileName: modelProfile.modelName,
@ -1894,101 +1907,121 @@ async function queryOpenAI(
const USE_NEW_ADAPTER_SYSTEM = process.env.USE_NEW_ADAPTERS !== 'false'
if (USE_NEW_ADAPTER_SYSTEM) {
// New adapter system - extract before withRetry
const adapter = ModelAdapterFactory.createAdapter(modelProfile)
// Build unified request parameters
const reasoningEffort = await getReasoningEffort(modelProfile, messages)
const unifiedParams: UnifiedRequestParams = {
messages: openaiMessages,
systemPrompt: openaiSystem.map(s => s.content as string),
tools: tools,
tools,
maxTokens: getMaxTokensFromProfile(modelProfile),
stream: config.stream,
reasoningEffort: reasoningEffort as any,
temperature: isGPT5Model(model) ? 1 : MAIN_QUERY_TEMPERATURE,
previousResponseId: toolUseContext?.responseState?.previousResponseId,
verbosity: 'high' // High verbosity for coding tasks
verbosity: 'high',
}
// Create request using adapter
const request = adapter.createRequest(unifiedParams)
// Determine which API to use
const shouldUseResponses = ModelAdapterFactory.shouldUseResponsesAPI(modelProfile)
if (shouldUseResponses) {
// Use Responses API for GPT-5 and similar models
// NOTE: Direct call without withRetry for separation of concerns
// If retry logic is needed later, wrap in withRetry:
// const response = await withRetry(() => callGPT5ResponsesAPI(modelProfile, request, signal))
const { callGPT5ResponsesAPI } = await import('./openai')
const response = await callGPT5ResponsesAPI(modelProfile, request, signal)
const unifiedResponse = await adapter.parseResponse(response)
// Convert unified response back to Anthropic format
const apiMessage = {
role: 'assistant' as const,
content: unifiedResponse.content,
tool_calls: unifiedResponse.toolCalls,
usage: {
prompt_tokens: unifiedResponse.usage.promptTokens,
completion_tokens: unifiedResponse.usage.completionTokens,
}
}
const assistantMsg: AssistantMessage = {
type: 'assistant',
message: apiMessage as any,
costUSD: 0, // Will be calculated later
durationMs: Date.now() - start,
uuid: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}` as any,
responseId: unifiedResponse.responseId
}
return assistantMsg
} else {
// Use Chat Completions adapter (not withRetry)
// NOTE: The ChatCompletionsAdapter is created above and used to build the request,
// but parseResponse() is not called here. Instead, we use legacy functions for backward compatibility.
// Future improvement: Call adapter.parseResponse() to fully utilize the adapter pattern.
const s = await getCompletionWithProfile(modelProfile, request, 0, 10, signal)
let finalResponse
if (config.stream) {
finalResponse = await handleMessageStream(s as ChatCompletionStream, signal)
} else {
finalResponse = s
}
const message = convertOpenAIResponseToAnthropic(finalResponse, tools)
const assistantMsg: AssistantMessage = {
type: 'assistant',
message: message as any,
costUSD: 0, // Will be calculated later
durationMs: Date.now() - start,
uuid: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}` as any
}
return assistantMsg
adapterContext = {
adapter,
request: adapter.createRequest(unifiedParams),
shouldUseResponses: ModelAdapterFactory.shouldUseResponsesAPI(
modelProfile,
),
}
}
}
// Legacy ChatCompletion path uses withRetry
let response
let queryResult: QueryResult
let startIncludingRetries = Date.now()
try {
response = await withRetry(async () => {
queryResult = await withRetry(async () => {
start = Date.now()
// 🔥 GPT-5 Enhanced Parameter Construction
if (adapterContext) {
if (adapterContext.shouldUseResponses) {
const { callGPT5ResponsesAPI } = await import('./openai')
const response = await callGPT5ResponsesAPI(
modelProfile,
adapterContext.request,
signal,
)
const unifiedResponse = await adapterContext.adapter.parseResponse(
response,
)
const assistantMsg: AssistantMessage = {
type: 'assistant',
message: {
role: 'assistant',
content: unifiedResponse.content,
tool_calls: unifiedResponse.toolCalls,
usage: {
input_tokens: unifiedResponse.usage.promptTokens ?? 0,
output_tokens: unifiedResponse.usage.completionTokens ?? 0,
prompt_tokens: unifiedResponse.usage.promptTokens ?? 0,
completion_tokens: unifiedResponse.usage.completionTokens ?? 0,
},
},
costUSD: 0,
durationMs: Date.now() - start,
uuid: `${Date.now()}-${Math.random()
.toString(36)
.substr(2, 9)}` as any,
responseId: unifiedResponse.responseId,
}
return {
assistantMessage: assistantMsg,
rawResponse: unifiedResponse,
apiFormat: 'openai_responses',
}
}
const s = await getCompletionWithProfile(
modelProfile,
adapterContext.request,
0,
10,
signal,
)
let finalResponse
if (config.stream) {
finalResponse = await handleMessageStream(
s as ChatCompletionStream,
signal,
)
} else {
finalResponse = s
}
const message = convertOpenAIResponseToAnthropic(finalResponse, tools)
const assistantMsg: AssistantMessage = {
type: 'assistant',
message: message as any,
costUSD: 0,
durationMs: Date.now() - start,
uuid: `${Date.now()}-${Math.random()
.toString(36)
.substr(2, 9)}` as any,
}
return {
assistantMessage: assistantMsg,
rawResponse: finalResponse,
apiFormat: 'openai',
}
}
const maxTokens = getMaxTokensFromProfile(modelProfile)
const isGPT5 = isGPT5Model(model)
const opts: OpenAI.ChatCompletionCreateParams = {
model,
...(isGPT5 ? { max_completion_tokens: maxTokens } : { max_tokens: maxTokens }),
...(isGPT5
? { max_completion_tokens: maxTokens }
: { max_tokens: maxTokens }),
messages: [...openaiSystem, ...openaiMessages],
temperature: isGPT5 ? 1 : MAIN_QUERY_TEMPERATURE,
}
if (config.stream) {
@ -2007,19 +2040,34 @@ async function queryOpenAI(
opts.reasoning_effort = reasoningEffort
}
// Legacy system (preserved for fallback)
const completionFunction = isGPT5Model(modelProfile?.modelName || '')
? getGPT5CompletionWithProfile
: getCompletionWithProfile
const s = await completionFunction(modelProfile, opts, 0, 10, signal)
let finalResponse
if (opts.stream) {
finalResponse = await handleMessageStream(s as ChatCompletionStream, signal)
finalResponse = await handleMessageStream(
s as ChatCompletionStream,
signal,
)
} else {
finalResponse = s
}
const r = convertOpenAIResponseToAnthropic(finalResponse, tools)
return r
const message = convertOpenAIResponseToAnthropic(finalResponse, tools)
const assistantMsg: AssistantMessage = {
type: 'assistant',
message: message as any,
costUSD: 0,
durationMs: Date.now() - start,
uuid: `${Date.now()}-${Math.random()
.toString(36)
.substr(2, 9)}` as any,
}
return {
assistantMessage: assistantMsg,
rawResponse: finalResponse,
apiFormat: 'openai',
}
}, { signal })
} catch (error) {
logError(error)
@ -2029,12 +2077,20 @@ async function queryOpenAI(
const durationMs = Date.now() - start
const durationMsIncludingRetries = Date.now() - startIncludingRetries
const inputTokens = response.usage?.prompt_tokens ?? 0
const outputTokens = response.usage?.completion_tokens ?? 0
const cacheReadInputTokens =
response.usage?.prompt_token_details?.cached_tokens ?? 0
const assistantMessage = queryResult.assistantMessage
assistantMessage.message.content = normalizeContentFromAPI(
assistantMessage.message.content || [],
)
const normalizedUsage = normalizeUsage(assistantMessage.message.usage)
assistantMessage.message.usage = normalizedUsage
const inputTokens = normalizedUsage.input_tokens ?? 0
const outputTokens = normalizedUsage.output_tokens ?? 0
const cacheReadInputTokens = normalizedUsage.cache_read_input_tokens ?? 0
const cacheCreationInputTokens =
response.usage?.prompt_token_details?.cached_tokens ?? 0
normalizedUsage.cache_creation_input_tokens ?? 0
const costUSD =
(inputTokens / 1_000_000) * SONNET_COST_PER_MILLION_INPUT_TOKENS +
(outputTokens / 1_000_000) * SONNET_COST_PER_MILLION_OUTPUT_TOKENS +
@ -2045,41 +2101,26 @@ async function queryOpenAI(
addToTotalCost(costUSD, durationMsIncludingRetries)
// 记录完整的 LLM 交互调试信息 (OpenAI path)
logLLMInteraction({
systemPrompt: systemPrompt.join('\n'),
messages: [...openaiSystem, ...openaiMessages],
response: response,
response: queryResult.rawResponse || assistantMessage.message,
usage: {
inputTokens: inputTokens,
outputTokens: outputTokens,
inputTokens,
outputTokens,
},
timing: {
start: start,
start,
end: Date.now(),
},
apiFormat: 'openai',
apiFormat: queryResult.apiFormat,
})
// Extract content from OpenAI response structure
const messageContent = response.choices?.[0]?.message?.content || []
assistantMessage.costUSD = costUSD
assistantMessage.durationMs = durationMs
assistantMessage.uuid = assistantMessage.uuid || (randomUUID() as UUID)
return {
message: {
role: 'assistant',
content: normalizeContentFromAPI(Array.isArray(messageContent) ? messageContent : [{ type: 'text', text: String(messageContent) }]),
usage: {
input_tokens: inputTokens,
output_tokens: outputTokens,
cache_read_input_tokens: cacheReadInputTokens,
cache_creation_input_tokens: 0,
},
},
costUSD,
durationMs,
type: 'assistant',
uuid: randomUUID(),
}
return assistantMessage
}
function getMaxTokensFromProfile(modelProfile: any): number {
@ -2087,6 +2128,45 @@ function getMaxTokensFromProfile(modelProfile: any): number {
return modelProfile?.maxTokens || 8000
}
function normalizeUsage(usage?: any) {
if (!usage) {
return {
input_tokens: 0,
output_tokens: 0,
cache_read_input_tokens: 0,
cache_creation_input_tokens: 0,
}
}
const inputTokens =
usage.input_tokens ??
usage.prompt_tokens ??
usage.inputTokens ??
0
const outputTokens =
usage.output_tokens ??
usage.completion_tokens ??
usage.outputTokens ??
0
const cacheReadInputTokens =
usage.cache_read_input_tokens ??
usage.prompt_token_details?.cached_tokens ??
usage.cacheReadInputTokens ??
0
const cacheCreationInputTokens =
usage.cache_creation_input_tokens ??
usage.cacheCreatedInputTokens ??
0
return {
...usage,
input_tokens: inputTokens,
output_tokens: outputTokens,
cache_read_input_tokens: cacheReadInputTokens,
cache_creation_input_tokens: cacheCreationInputTokens,
}
}
function getModelInputTokenCostUSD(model: string): number {
// Find the model in the models object
for (const providerModels of Object.values(models)) {