prompt(queryOpenAI): Separate adapter context from API execution
WHAT: Refactored queryOpenAI to prepare adapter context outside withRetry and execute API calls inside withRetry WHY: The previous implementation mixed adapter preparation and execution, causing type confusion and state management issues HOW: Created AdapterExecutionContext and QueryResult types, moved adapter context creation before withRetry block, wrapped all API calls (Responses API, Chat Completions, and legacy) inside withRetry with unified return structure, added normalizeUsage() helper to handle token field variations, ensured responseId and content are properly preserved through the unified return path
This commit is contained in:
parent
8288378dbd
commit
25adc80161
@ -1882,7 +1882,20 @@ async function queryOpenAI(
|
||||
|
||||
let start = Date.now()
|
||||
|
||||
// Extract adapter path BEFORE withRetry for cleaner flow
|
||||
type AdapterExecutionContext = {
|
||||
adapter: ReturnType<typeof ModelAdapterFactory.createAdapter>
|
||||
request: any
|
||||
shouldUseResponses: boolean
|
||||
}
|
||||
|
||||
type QueryResult = {
|
||||
assistantMessage: AssistantMessage
|
||||
rawResponse?: any
|
||||
apiFormat: 'openai' | 'openai_responses'
|
||||
}
|
||||
|
||||
let adapterContext: AdapterExecutionContext | null = null
|
||||
|
||||
if (modelProfile && modelProfile.modelName) {
|
||||
debugLogger.api('CHECKING_ADAPTER_SYSTEM', {
|
||||
modelProfileName: modelProfile.modelName,
|
||||
@ -1894,101 +1907,121 @@ async function queryOpenAI(
|
||||
const USE_NEW_ADAPTER_SYSTEM = process.env.USE_NEW_ADAPTERS !== 'false'
|
||||
|
||||
if (USE_NEW_ADAPTER_SYSTEM) {
|
||||
// New adapter system - extract before withRetry
|
||||
const adapter = ModelAdapterFactory.createAdapter(modelProfile)
|
||||
|
||||
// Build unified request parameters
|
||||
const reasoningEffort = await getReasoningEffort(modelProfile, messages)
|
||||
const unifiedParams: UnifiedRequestParams = {
|
||||
messages: openaiMessages,
|
||||
systemPrompt: openaiSystem.map(s => s.content as string),
|
||||
tools: tools,
|
||||
tools,
|
||||
maxTokens: getMaxTokensFromProfile(modelProfile),
|
||||
stream: config.stream,
|
||||
reasoningEffort: reasoningEffort as any,
|
||||
temperature: isGPT5Model(model) ? 1 : MAIN_QUERY_TEMPERATURE,
|
||||
previousResponseId: toolUseContext?.responseState?.previousResponseId,
|
||||
verbosity: 'high' // High verbosity for coding tasks
|
||||
verbosity: 'high',
|
||||
}
|
||||
|
||||
// Create request using adapter
|
||||
const request = adapter.createRequest(unifiedParams)
|
||||
|
||||
// Determine which API to use
|
||||
const shouldUseResponses = ModelAdapterFactory.shouldUseResponsesAPI(modelProfile)
|
||||
|
||||
if (shouldUseResponses) {
|
||||
// Use Responses API for GPT-5 and similar models
|
||||
// NOTE: Direct call without withRetry for separation of concerns
|
||||
// If retry logic is needed later, wrap in withRetry:
|
||||
// const response = await withRetry(() => callGPT5ResponsesAPI(modelProfile, request, signal))
|
||||
const { callGPT5ResponsesAPI } = await import('./openai')
|
||||
const response = await callGPT5ResponsesAPI(modelProfile, request, signal)
|
||||
const unifiedResponse = await adapter.parseResponse(response)
|
||||
|
||||
// Convert unified response back to Anthropic format
|
||||
const apiMessage = {
|
||||
role: 'assistant' as const,
|
||||
content: unifiedResponse.content,
|
||||
tool_calls: unifiedResponse.toolCalls,
|
||||
usage: {
|
||||
prompt_tokens: unifiedResponse.usage.promptTokens,
|
||||
completion_tokens: unifiedResponse.usage.completionTokens,
|
||||
}
|
||||
}
|
||||
const assistantMsg: AssistantMessage = {
|
||||
type: 'assistant',
|
||||
message: apiMessage as any,
|
||||
costUSD: 0, // Will be calculated later
|
||||
durationMs: Date.now() - start,
|
||||
uuid: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}` as any,
|
||||
responseId: unifiedResponse.responseId
|
||||
}
|
||||
|
||||
return assistantMsg
|
||||
} else {
|
||||
// Use Chat Completions adapter (not withRetry)
|
||||
// NOTE: The ChatCompletionsAdapter is created above and used to build the request,
|
||||
// but parseResponse() is not called here. Instead, we use legacy functions for backward compatibility.
|
||||
// Future improvement: Call adapter.parseResponse() to fully utilize the adapter pattern.
|
||||
const s = await getCompletionWithProfile(modelProfile, request, 0, 10, signal)
|
||||
let finalResponse
|
||||
if (config.stream) {
|
||||
finalResponse = await handleMessageStream(s as ChatCompletionStream, signal)
|
||||
} else {
|
||||
finalResponse = s
|
||||
}
|
||||
const message = convertOpenAIResponseToAnthropic(finalResponse, tools)
|
||||
const assistantMsg: AssistantMessage = {
|
||||
type: 'assistant',
|
||||
message: message as any,
|
||||
costUSD: 0, // Will be calculated later
|
||||
durationMs: Date.now() - start,
|
||||
uuid: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}` as any
|
||||
}
|
||||
return assistantMsg
|
||||
adapterContext = {
|
||||
adapter,
|
||||
request: adapter.createRequest(unifiedParams),
|
||||
shouldUseResponses: ModelAdapterFactory.shouldUseResponsesAPI(
|
||||
modelProfile,
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Legacy ChatCompletion path uses withRetry
|
||||
let response
|
||||
let queryResult: QueryResult
|
||||
let startIncludingRetries = Date.now()
|
||||
|
||||
try {
|
||||
response = await withRetry(async () => {
|
||||
queryResult = await withRetry(async () => {
|
||||
start = Date.now()
|
||||
|
||||
// 🔥 GPT-5 Enhanced Parameter Construction
|
||||
if (adapterContext) {
|
||||
if (adapterContext.shouldUseResponses) {
|
||||
const { callGPT5ResponsesAPI } = await import('./openai')
|
||||
const response = await callGPT5ResponsesAPI(
|
||||
modelProfile,
|
||||
adapterContext.request,
|
||||
signal,
|
||||
)
|
||||
const unifiedResponse = await adapterContext.adapter.parseResponse(
|
||||
response,
|
||||
)
|
||||
|
||||
const assistantMsg: AssistantMessage = {
|
||||
type: 'assistant',
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: unifiedResponse.content,
|
||||
tool_calls: unifiedResponse.toolCalls,
|
||||
usage: {
|
||||
input_tokens: unifiedResponse.usage.promptTokens ?? 0,
|
||||
output_tokens: unifiedResponse.usage.completionTokens ?? 0,
|
||||
prompt_tokens: unifiedResponse.usage.promptTokens ?? 0,
|
||||
completion_tokens: unifiedResponse.usage.completionTokens ?? 0,
|
||||
},
|
||||
},
|
||||
costUSD: 0,
|
||||
durationMs: Date.now() - start,
|
||||
uuid: `${Date.now()}-${Math.random()
|
||||
.toString(36)
|
||||
.substr(2, 9)}` as any,
|
||||
responseId: unifiedResponse.responseId,
|
||||
}
|
||||
|
||||
return {
|
||||
assistantMessage: assistantMsg,
|
||||
rawResponse: unifiedResponse,
|
||||
apiFormat: 'openai_responses',
|
||||
}
|
||||
}
|
||||
|
||||
const s = await getCompletionWithProfile(
|
||||
modelProfile,
|
||||
adapterContext.request,
|
||||
0,
|
||||
10,
|
||||
signal,
|
||||
)
|
||||
let finalResponse
|
||||
if (config.stream) {
|
||||
finalResponse = await handleMessageStream(
|
||||
s as ChatCompletionStream,
|
||||
signal,
|
||||
)
|
||||
} else {
|
||||
finalResponse = s
|
||||
}
|
||||
|
||||
const message = convertOpenAIResponseToAnthropic(finalResponse, tools)
|
||||
const assistantMsg: AssistantMessage = {
|
||||
type: 'assistant',
|
||||
message: message as any,
|
||||
costUSD: 0,
|
||||
durationMs: Date.now() - start,
|
||||
uuid: `${Date.now()}-${Math.random()
|
||||
.toString(36)
|
||||
.substr(2, 9)}` as any,
|
||||
}
|
||||
|
||||
return {
|
||||
assistantMessage: assistantMsg,
|
||||
rawResponse: finalResponse,
|
||||
apiFormat: 'openai',
|
||||
}
|
||||
}
|
||||
|
||||
const maxTokens = getMaxTokensFromProfile(modelProfile)
|
||||
const isGPT5 = isGPT5Model(model)
|
||||
|
||||
const opts: OpenAI.ChatCompletionCreateParams = {
|
||||
model,
|
||||
|
||||
...(isGPT5 ? { max_completion_tokens: maxTokens } : { max_tokens: maxTokens }),
|
||||
...(isGPT5
|
||||
? { max_completion_tokens: maxTokens }
|
||||
: { max_tokens: maxTokens }),
|
||||
messages: [...openaiSystem, ...openaiMessages],
|
||||
|
||||
temperature: isGPT5 ? 1 : MAIN_QUERY_TEMPERATURE,
|
||||
}
|
||||
if (config.stream) {
|
||||
@ -2007,19 +2040,34 @@ async function queryOpenAI(
|
||||
opts.reasoning_effort = reasoningEffort
|
||||
}
|
||||
|
||||
// Legacy system (preserved for fallback)
|
||||
const completionFunction = isGPT5Model(modelProfile?.modelName || '')
|
||||
? getGPT5CompletionWithProfile
|
||||
: getCompletionWithProfile
|
||||
const s = await completionFunction(modelProfile, opts, 0, 10, signal)
|
||||
let finalResponse
|
||||
if (opts.stream) {
|
||||
finalResponse = await handleMessageStream(s as ChatCompletionStream, signal)
|
||||
finalResponse = await handleMessageStream(
|
||||
s as ChatCompletionStream,
|
||||
signal,
|
||||
)
|
||||
} else {
|
||||
finalResponse = s
|
||||
}
|
||||
const r = convertOpenAIResponseToAnthropic(finalResponse, tools)
|
||||
return r
|
||||
const message = convertOpenAIResponseToAnthropic(finalResponse, tools)
|
||||
const assistantMsg: AssistantMessage = {
|
||||
type: 'assistant',
|
||||
message: message as any,
|
||||
costUSD: 0,
|
||||
durationMs: Date.now() - start,
|
||||
uuid: `${Date.now()}-${Math.random()
|
||||
.toString(36)
|
||||
.substr(2, 9)}` as any,
|
||||
}
|
||||
return {
|
||||
assistantMessage: assistantMsg,
|
||||
rawResponse: finalResponse,
|
||||
apiFormat: 'openai',
|
||||
}
|
||||
}, { signal })
|
||||
} catch (error) {
|
||||
logError(error)
|
||||
@ -2029,12 +2077,20 @@ async function queryOpenAI(
|
||||
const durationMs = Date.now() - start
|
||||
const durationMsIncludingRetries = Date.now() - startIncludingRetries
|
||||
|
||||
const inputTokens = response.usage?.prompt_tokens ?? 0
|
||||
const outputTokens = response.usage?.completion_tokens ?? 0
|
||||
const cacheReadInputTokens =
|
||||
response.usage?.prompt_token_details?.cached_tokens ?? 0
|
||||
const assistantMessage = queryResult.assistantMessage
|
||||
assistantMessage.message.content = normalizeContentFromAPI(
|
||||
assistantMessage.message.content || [],
|
||||
)
|
||||
|
||||
const normalizedUsage = normalizeUsage(assistantMessage.message.usage)
|
||||
assistantMessage.message.usage = normalizedUsage
|
||||
|
||||
const inputTokens = normalizedUsage.input_tokens ?? 0
|
||||
const outputTokens = normalizedUsage.output_tokens ?? 0
|
||||
const cacheReadInputTokens = normalizedUsage.cache_read_input_tokens ?? 0
|
||||
const cacheCreationInputTokens =
|
||||
response.usage?.prompt_token_details?.cached_tokens ?? 0
|
||||
normalizedUsage.cache_creation_input_tokens ?? 0
|
||||
|
||||
const costUSD =
|
||||
(inputTokens / 1_000_000) * SONNET_COST_PER_MILLION_INPUT_TOKENS +
|
||||
(outputTokens / 1_000_000) * SONNET_COST_PER_MILLION_OUTPUT_TOKENS +
|
||||
@ -2045,41 +2101,26 @@ async function queryOpenAI(
|
||||
|
||||
addToTotalCost(costUSD, durationMsIncludingRetries)
|
||||
|
||||
// 记录完整的 LLM 交互调试信息 (OpenAI path)
|
||||
logLLMInteraction({
|
||||
systemPrompt: systemPrompt.join('\n'),
|
||||
messages: [...openaiSystem, ...openaiMessages],
|
||||
response: response,
|
||||
response: queryResult.rawResponse || assistantMessage.message,
|
||||
usage: {
|
||||
inputTokens: inputTokens,
|
||||
outputTokens: outputTokens,
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
},
|
||||
timing: {
|
||||
start: start,
|
||||
start,
|
||||
end: Date.now(),
|
||||
},
|
||||
apiFormat: 'openai',
|
||||
apiFormat: queryResult.apiFormat,
|
||||
})
|
||||
|
||||
// Extract content from OpenAI response structure
|
||||
const messageContent = response.choices?.[0]?.message?.content || []
|
||||
assistantMessage.costUSD = costUSD
|
||||
assistantMessage.durationMs = durationMs
|
||||
assistantMessage.uuid = assistantMessage.uuid || (randomUUID() as UUID)
|
||||
|
||||
return {
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: normalizeContentFromAPI(Array.isArray(messageContent) ? messageContent : [{ type: 'text', text: String(messageContent) }]),
|
||||
usage: {
|
||||
input_tokens: inputTokens,
|
||||
output_tokens: outputTokens,
|
||||
cache_read_input_tokens: cacheReadInputTokens,
|
||||
cache_creation_input_tokens: 0,
|
||||
},
|
||||
},
|
||||
costUSD,
|
||||
durationMs,
|
||||
type: 'assistant',
|
||||
uuid: randomUUID(),
|
||||
}
|
||||
return assistantMessage
|
||||
}
|
||||
|
||||
function getMaxTokensFromProfile(modelProfile: any): number {
|
||||
@ -2087,6 +2128,45 @@ function getMaxTokensFromProfile(modelProfile: any): number {
|
||||
return modelProfile?.maxTokens || 8000
|
||||
}
|
||||
|
||||
function normalizeUsage(usage?: any) {
|
||||
if (!usage) {
|
||||
return {
|
||||
input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
cache_read_input_tokens: 0,
|
||||
cache_creation_input_tokens: 0,
|
||||
}
|
||||
}
|
||||
|
||||
const inputTokens =
|
||||
usage.input_tokens ??
|
||||
usage.prompt_tokens ??
|
||||
usage.inputTokens ??
|
||||
0
|
||||
const outputTokens =
|
||||
usage.output_tokens ??
|
||||
usage.completion_tokens ??
|
||||
usage.outputTokens ??
|
||||
0
|
||||
const cacheReadInputTokens =
|
||||
usage.cache_read_input_tokens ??
|
||||
usage.prompt_token_details?.cached_tokens ??
|
||||
usage.cacheReadInputTokens ??
|
||||
0
|
||||
const cacheCreationInputTokens =
|
||||
usage.cache_creation_input_tokens ??
|
||||
usage.cacheCreatedInputTokens ??
|
||||
0
|
||||
|
||||
return {
|
||||
...usage,
|
||||
input_tokens: inputTokens,
|
||||
output_tokens: outputTokens,
|
||||
cache_read_input_tokens: cacheReadInputTokens,
|
||||
cache_creation_input_tokens: cacheCreationInputTokens,
|
||||
}
|
||||
}
|
||||
|
||||
function getModelInputTokenCostUSD(model: string): number {
|
||||
// Find the model in the models object
|
||||
for (const providerModels of Object.values(models)) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user