- Implement model adapter factory for unified API handling - Add response state manager for conversation continuity - Support GPT-5 Responses API with continuation tokens - Add model capabilities type system - Include deployment guide and test infrastructure - Enhance error handling and debugging for model interactions
2080 lines
64 KiB
TypeScript
2080 lines
64 KiB
TypeScript
import '@anthropic-ai/sdk/shims/node'
|
||
import Anthropic, { APIConnectionError, APIError } from '@anthropic-ai/sdk'
|
||
import { AnthropicBedrock } from '@anthropic-ai/bedrock-sdk'
|
||
import { AnthropicVertex } from '@anthropic-ai/vertex-sdk'
|
||
import type { BetaUsage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
|
||
import chalk from 'chalk'
|
||
import { createHash, randomUUID, UUID } from 'crypto'
|
||
import 'dotenv/config'
|
||
|
||
import { addToTotalCost } from '../cost-tracker'
|
||
import models from '../constants/models'
|
||
import type { AssistantMessage, UserMessage } from '../query'
|
||
import { Tool } from '../Tool'
|
||
import {
|
||
getAnthropicApiKey,
|
||
getOrCreateUserID,
|
||
getGlobalConfig,
|
||
ModelProfile,
|
||
} from '../utils/config'
|
||
import { getProjectDocs } from '../context'
|
||
import { logError, SESSION_ID } from '../utils/log'
|
||
import { USER_AGENT } from '../utils/http'
|
||
import {
|
||
createAssistantAPIErrorMessage,
|
||
normalizeContentFromAPI,
|
||
} from '../utils/messages'
|
||
import { countTokens } from '../utils/tokens'
|
||
import { logEvent } from './statsig'
|
||
import { withVCR } from './vcr'
|
||
import {
|
||
debug as debugLogger,
|
||
markPhase,
|
||
getCurrentRequest,
|
||
logLLMInteraction,
|
||
logSystemPromptConstruction,
|
||
logErrorWithDiagnosis,
|
||
} from '../utils/debugLogger'
|
||
import {
|
||
MessageContextManager,
|
||
createRetentionStrategy,
|
||
} from '../utils/messageContextManager'
|
||
import { getModelManager } from '../utils/model'
|
||
import { zodToJsonSchema } from 'zod-to-json-schema'
|
||
import type { BetaMessageStream } from '@anthropic-ai/sdk/lib/BetaMessageStream.mjs'
|
||
import { ModelAdapterFactory } from './modelAdapterFactory'
|
||
import { UnifiedRequestParams } from '../types/modelCapabilities'
|
||
import { responseStateManager, getConversationId } from './responseStateManager'
|
||
import type { ToolUseContext } from '../Tool'
|
||
import type {
|
||
Message as APIMessage,
|
||
MessageParam,
|
||
TextBlockParam,
|
||
} from '@anthropic-ai/sdk/resources/index.mjs'
|
||
import { USE_BEDROCK, USE_VERTEX } from '../utils/model'
|
||
import { getCLISyspromptPrefix } from '../constants/prompts'
|
||
import { getVertexRegionForModel } from '../utils/model'
|
||
import OpenAI from 'openai'
|
||
import type { ChatCompletionStream } from 'openai/lib/ChatCompletionStream'
|
||
import { ContentBlock } from '@anthropic-ai/sdk/resources/messages/messages'
|
||
import { nanoid } from 'nanoid'
|
||
import { getCompletionWithProfile, getGPT5CompletionWithProfile } from './openai'
|
||
import { getReasoningEffort } from '../utils/thinking'
|
||
import { generateSystemReminders } from './systemReminder'
|
||
|
||
// Helper function to check if a model is GPT-5
|
||
function isGPT5Model(modelName: string): boolean {
|
||
return modelName.startsWith('gpt-5')
|
||
}
|
||
|
||
// Helper function to extract model configuration for debug logging
|
||
function getModelConfigForDebug(model: string): {
|
||
modelName: string
|
||
provider: string
|
||
apiKeyStatus: 'configured' | 'missing' | 'invalid'
|
||
baseURL?: string
|
||
maxTokens?: number
|
||
reasoningEffort?: string
|
||
isStream?: boolean
|
||
temperature?: number
|
||
} {
|
||
const config = getGlobalConfig()
|
||
const modelManager = getModelManager()
|
||
|
||
// 🔧 Fix: Use ModelManager to get the actual current model profile
|
||
const modelProfile = modelManager.getModel('main')
|
||
|
||
let apiKeyStatus: 'configured' | 'missing' | 'invalid' = 'missing'
|
||
let baseURL: string | undefined
|
||
let maxTokens: number | undefined
|
||
let reasoningEffort: string | undefined
|
||
|
||
// 🔧 Fix: Use ModelProfile configuration exclusively
|
||
if (modelProfile) {
|
||
apiKeyStatus = modelProfile.apiKey ? 'configured' : 'missing'
|
||
baseURL = modelProfile.baseURL
|
||
maxTokens = modelProfile.maxTokens
|
||
reasoningEffort = modelProfile.reasoningEffort
|
||
} else {
|
||
// 🚨 No ModelProfile available - this should not happen in modern system
|
||
apiKeyStatus = 'missing'
|
||
maxTokens = undefined
|
||
reasoningEffort = undefined
|
||
}
|
||
|
||
return {
|
||
modelName: model,
|
||
provider: modelProfile?.provider || config.primaryProvider || 'anthropic',
|
||
apiKeyStatus,
|
||
baseURL,
|
||
maxTokens,
|
||
reasoningEffort,
|
||
isStream: config.stream || false,
|
||
temperature: MAIN_QUERY_TEMPERATURE,
|
||
}
|
||
}
|
||
|
||
// KodeContext管理器 - 用于项目文档的同步缓存和访问
|
||
class KodeContextManager {
|
||
private static instance: KodeContextManager
|
||
private projectDocsCache: string = ''
|
||
private cacheInitialized: boolean = false
|
||
private initPromise: Promise<void> | null = null
|
||
|
||
private constructor() {}
|
||
|
||
public static getInstance(): KodeContextManager {
|
||
if (!KodeContextManager.instance) {
|
||
KodeContextManager.instance = new KodeContextManager()
|
||
}
|
||
return KodeContextManager.instance
|
||
}
|
||
|
||
public async initialize(): Promise<void> {
|
||
if (this.cacheInitialized) return
|
||
|
||
if (this.initPromise) {
|
||
return this.initPromise
|
||
}
|
||
|
||
this.initPromise = this.loadProjectDocs()
|
||
await this.initPromise
|
||
}
|
||
|
||
private async loadProjectDocs(): Promise<void> {
|
||
try {
|
||
const projectDocs = await getProjectDocs()
|
||
this.projectDocsCache = projectDocs || ''
|
||
this.cacheInitialized = true
|
||
|
||
// 在调试模式下记录加载结果
|
||
if (process.env.NODE_ENV === 'development') {
|
||
console.log(
|
||
`[KodeContext] Loaded ${this.projectDocsCache.length} characters from project docs`,
|
||
)
|
||
}
|
||
} catch (error) {
|
||
console.warn('[KodeContext] Failed to load project docs:', error)
|
||
this.projectDocsCache = ''
|
||
this.cacheInitialized = true
|
||
}
|
||
}
|
||
|
||
public getKodeContext(): string {
|
||
if (!this.cacheInitialized) {
|
||
// 如果未初始化,异步初始化但立即返回空字符串
|
||
this.initialize().catch(console.warn)
|
||
return ''
|
||
}
|
||
return this.projectDocsCache
|
||
}
|
||
|
||
public async refreshCache(): Promise<void> {
|
||
this.cacheInitialized = false
|
||
this.initPromise = null
|
||
await this.initialize()
|
||
}
|
||
}
|
||
|
||
// 导出函数保持向后兼容
|
||
const kodeContextManager = KodeContextManager.getInstance()
|
||
|
||
// 在模块加载时异步初始化
|
||
kodeContextManager.initialize().catch(console.warn)
|
||
|
||
export const generateKodeContext = (): string => {
|
||
return kodeContextManager.getKodeContext()
|
||
}
|
||
|
||
export const refreshKodeContext = async (): Promise<void> => {
|
||
await kodeContextManager.refreshCache()
|
||
}
|
||
|
||
interface StreamResponse extends APIMessage {
|
||
ttftMs?: number
|
||
}
|
||
|
||
export const API_ERROR_MESSAGE_PREFIX = 'API Error'
|
||
export const PROMPT_TOO_LONG_ERROR_MESSAGE = 'Prompt is too long'
|
||
export const CREDIT_BALANCE_TOO_LOW_ERROR_MESSAGE = 'Credit balance is too low'
|
||
export const INVALID_API_KEY_ERROR_MESSAGE =
|
||
'Invalid API key · Please run /login'
|
||
export const NO_CONTENT_MESSAGE = '(no content)'
|
||
const PROMPT_CACHING_ENABLED = !process.env.DISABLE_PROMPT_CACHING
|
||
|
||
// @see https://docs.anthropic.com/en/docs/about-claude/models#model-comparison-table
|
||
const HAIKU_COST_PER_MILLION_INPUT_TOKENS = 0.8
|
||
const HAIKU_COST_PER_MILLION_OUTPUT_TOKENS = 4
|
||
const HAIKU_COST_PER_MILLION_PROMPT_CACHE_WRITE_TOKENS = 1
|
||
const HAIKU_COST_PER_MILLION_PROMPT_CACHE_READ_TOKENS = 0.08
|
||
|
||
const SONNET_COST_PER_MILLION_INPUT_TOKENS = 3
|
||
const SONNET_COST_PER_MILLION_OUTPUT_TOKENS = 15
|
||
const SONNET_COST_PER_MILLION_PROMPT_CACHE_WRITE_TOKENS = 3.75
|
||
const SONNET_COST_PER_MILLION_PROMPT_CACHE_READ_TOKENS = 0.3
|
||
|
||
export const MAIN_QUERY_TEMPERATURE = 1 // to get more variation for binary feedback
|
||
|
||
function getMetadata() {
|
||
return {
|
||
user_id: `${getOrCreateUserID()}_${SESSION_ID}`,
|
||
}
|
||
}
|
||
|
||
const MAX_RETRIES = process.env.USER_TYPE === 'SWE_BENCH' ? 100 : 10
|
||
const BASE_DELAY_MS = 500
|
||
|
||
interface RetryOptions {
|
||
maxRetries?: number
|
||
signal?: AbortSignal
|
||
}
|
||
|
||
// Helper function to create an abortable delay
|
||
function abortableDelay(delayMs: number, signal?: AbortSignal): Promise<void> {
|
||
return new Promise((resolve, reject) => {
|
||
// Check if already aborted
|
||
if (signal?.aborted) {
|
||
reject(new Error('Request was aborted'))
|
||
return
|
||
}
|
||
|
||
const timeoutId = setTimeout(() => {
|
||
resolve()
|
||
}, delayMs)
|
||
|
||
// If signal is provided, listen for abort event
|
||
if (signal) {
|
||
const abortHandler = () => {
|
||
clearTimeout(timeoutId)
|
||
reject(new Error('Request was aborted'))
|
||
}
|
||
signal.addEventListener('abort', abortHandler, { once: true })
|
||
}
|
||
})
|
||
}
|
||
|
||
function getRetryDelay(
|
||
attempt: number,
|
||
retryAfterHeader?: string | null,
|
||
): number {
|
||
if (retryAfterHeader) {
|
||
const seconds = parseInt(retryAfterHeader, 10)
|
||
if (!isNaN(seconds)) {
|
||
return seconds * 1000
|
||
}
|
||
}
|
||
return Math.min(BASE_DELAY_MS * Math.pow(2, attempt - 1), 32000) // Max 32s delay
|
||
}
|
||
|
||
function shouldRetry(error: APIError): boolean {
|
||
// Check for overloaded errors first and only retry for SWE_BENCH
|
||
if (error.message?.includes('"type":"overloaded_error"')) {
|
||
return process.env.USER_TYPE === 'SWE_BENCH'
|
||
}
|
||
|
||
// Note this is not a standard header.
|
||
const shouldRetryHeader = error.headers?.['x-should-retry']
|
||
|
||
// If the server explicitly says whether or not to retry, obey.
|
||
if (shouldRetryHeader === 'true') return true
|
||
if (shouldRetryHeader === 'false') return false
|
||
|
||
if (error instanceof APIConnectionError) {
|
||
return true
|
||
}
|
||
|
||
if (!error.status) return false
|
||
|
||
// Retry on request timeouts.
|
||
if (error.status === 408) return true
|
||
|
||
// Retry on lock timeouts.
|
||
if (error.status === 409) return true
|
||
|
||
// Retry on rate limits.
|
||
if (error.status === 429) return true
|
||
|
||
// Retry internal errors.
|
||
if (error.status && error.status >= 500) return true
|
||
|
||
return false
|
||
}
|
||
|
||
async function withRetry<T>(
|
||
operation: (attempt: number) => Promise<T>,
|
||
options: RetryOptions = {},
|
||
): Promise<T> {
|
||
const maxRetries = options.maxRetries ?? MAX_RETRIES
|
||
let lastError: unknown
|
||
|
||
for (let attempt = 1; attempt <= maxRetries + 1; attempt++) {
|
||
try {
|
||
return await operation(attempt)
|
||
} catch (error) {
|
||
lastError = error
|
||
// Only retry if the error indicates we should
|
||
if (
|
||
attempt > maxRetries ||
|
||
!(error instanceof APIError) ||
|
||
!shouldRetry(error)
|
||
) {
|
||
throw error
|
||
}
|
||
// 🔧 CRITICAL FIX: Check abort signal BEFORE showing retry message
|
||
if (options.signal?.aborted) {
|
||
throw new Error('Request cancelled by user')
|
||
}
|
||
|
||
// Get retry-after header if available
|
||
const retryAfter = error.headers?.['retry-after'] ?? null
|
||
const delayMs = getRetryDelay(attempt, retryAfter)
|
||
|
||
console.log(
|
||
` ⎿ ${chalk.red(`API ${error.name} (${error.message}) · Retrying in ${Math.round(delayMs / 1000)} seconds… (attempt ${attempt}/${maxRetries})`)}`,
|
||
)
|
||
|
||
logEvent('tengu_api_retry', {
|
||
attempt: String(attempt),
|
||
delayMs: String(delayMs),
|
||
error: error.message,
|
||
status: String(error.status),
|
||
provider: USE_BEDROCK ? 'bedrock' : USE_VERTEX ? 'vertex' : '1p',
|
||
})
|
||
|
||
try {
|
||
await abortableDelay(delayMs, options.signal)
|
||
} catch (delayError) {
|
||
// If aborted during delay, throw the error to stop retrying
|
||
if (delayError.message === 'Request was aborted') {
|
||
throw new Error('Request cancelled by user')
|
||
}
|
||
throw delayError
|
||
}
|
||
}
|
||
}
|
||
|
||
throw lastError
|
||
}
|
||
|
||
/**
|
||
* Fetch available models from Anthropic API
|
||
*/
|
||
export async function fetchAnthropicModels(
|
||
baseURL: string,
|
||
apiKey: string,
|
||
): Promise<any[]> {
|
||
try {
|
||
// Use provided baseURL or default to official Anthropic API
|
||
const modelsURL = baseURL
|
||
? `${baseURL.replace(/\/+$/, '')}/v1/models`
|
||
: 'https://api.anthropic.com/v1/models'
|
||
|
||
const response = await fetch(modelsURL, {
|
||
method: 'GET',
|
||
headers: {
|
||
'x-api-key': apiKey,
|
||
'anthropic-version': '2023-06-01',
|
||
'User-Agent': USER_AGENT,
|
||
},
|
||
})
|
||
|
||
if (!response.ok) {
|
||
// Provide user-friendly error messages based on status code
|
||
if (response.status === 401) {
|
||
throw new Error(
|
||
'Invalid API key. Please check your Anthropic API key and try again.',
|
||
)
|
||
} else if (response.status === 403) {
|
||
throw new Error(
|
||
'API key does not have permission to access models. Please check your API key permissions.',
|
||
)
|
||
} else if (response.status === 429) {
|
||
throw new Error(
|
||
'Too many requests. Please wait a moment and try again.',
|
||
)
|
||
} else if (response.status >= 500) {
|
||
throw new Error(
|
||
'Anthropic service is temporarily unavailable. Please try again later.',
|
||
)
|
||
} else {
|
||
throw new Error(
|
||
`Unable to connect to Anthropic API (${response.status}). Please check your internet connection and API key.`,
|
||
)
|
||
}
|
||
}
|
||
|
||
const data = await response.json()
|
||
return data.data || []
|
||
} catch (error) {
|
||
// If it's already our custom error, pass it through
|
||
if (
|
||
(error instanceof Error && error.message.includes('API key')) ||
|
||
(error instanceof Error && error.message.includes('Anthropic'))
|
||
) {
|
||
throw error
|
||
}
|
||
|
||
// For network errors or other issues
|
||
console.error('Failed to fetch Anthropic models:', error)
|
||
throw new Error(
|
||
'Unable to connect to Anthropic API. Please check your internet connection and try again.',
|
||
)
|
||
}
|
||
}
|
||
|
||
export async function verifyApiKey(
|
||
apiKey: string,
|
||
baseURL?: string,
|
||
provider?: string,
|
||
): Promise<boolean> {
|
||
if (!apiKey) {
|
||
return false
|
||
}
|
||
|
||
// For non-Anthropic providers, use OpenAI-compatible verification
|
||
if (provider && provider !== 'anthropic') {
|
||
try {
|
||
const headers: Record<string, string> = {
|
||
Authorization: `Bearer ${apiKey}`,
|
||
'Content-Type': 'application/json',
|
||
}
|
||
|
||
// 🔧 Fix: Proper URL construction for verification
|
||
if (!baseURL) {
|
||
console.warn(
|
||
'No baseURL provided for non-Anthropic provider verification',
|
||
)
|
||
return false
|
||
}
|
||
|
||
const modelsURL = `${baseURL.replace(/\/+$/, '')}/models`
|
||
|
||
const response = await fetch(modelsURL, {
|
||
method: 'GET',
|
||
headers,
|
||
})
|
||
|
||
return response.ok
|
||
} catch (error) {
|
||
console.warn('API verification failed for non-Anthropic provider:', error)
|
||
return false
|
||
}
|
||
}
|
||
|
||
// For Anthropic and Anthropic-compatible APIs
|
||
const clientConfig: any = {
|
||
apiKey,
|
||
dangerouslyAllowBrowser: true,
|
||
maxRetries: 3,
|
||
defaultHeaders: {
|
||
'User-Agent': USER_AGENT,
|
||
},
|
||
}
|
||
|
||
// Only add baseURL for true Anthropic-compatible APIs
|
||
if (
|
||
baseURL &&
|
||
(provider === 'anthropic' ||
|
||
provider === 'bigdream' ||
|
||
provider === 'opendev')
|
||
) {
|
||
clientConfig.baseURL = baseURL
|
||
}
|
||
|
||
const anthropic = new Anthropic(clientConfig)
|
||
|
||
try {
|
||
await withRetry(
|
||
async () => {
|
||
const model = 'claude-sonnet-4-20250514'
|
||
const messages: MessageParam[] = [{ role: 'user', content: 'test' }]
|
||
await anthropic.messages.create({
|
||
model,
|
||
max_tokens: 1000, // Simple test token limit for API verification
|
||
messages,
|
||
temperature: 0,
|
||
metadata: getMetadata(),
|
||
})
|
||
return true
|
||
},
|
||
{ maxRetries: 2 }, // Use fewer retries for API key verification
|
||
)
|
||
return true
|
||
} catch (error) {
|
||
logError(error)
|
||
// Check for authentication error
|
||
if (
|
||
error instanceof Error &&
|
||
error.message.includes(
|
||
'{"type":"error","error":{"type":"authentication_error","message":"invalid x-api-key"}}',
|
||
)
|
||
) {
|
||
return false
|
||
}
|
||
throw error
|
||
}
|
||
}
|
||
|
||
function convertAnthropicMessagesToOpenAIMessages(
|
||
messages: (UserMessage | AssistantMessage)[],
|
||
): (
|
||
| OpenAI.ChatCompletionMessageParam
|
||
| OpenAI.ChatCompletionToolMessageParam
|
||
)[] {
|
||
const openaiMessages: (
|
||
| OpenAI.ChatCompletionMessageParam
|
||
| OpenAI.ChatCompletionToolMessageParam
|
||
)[] = []
|
||
|
||
const toolResults: Record<string, OpenAI.ChatCompletionToolMessageParam> = {}
|
||
|
||
for (const message of messages) {
|
||
let contentBlocks = []
|
||
if (typeof message.message.content === 'string') {
|
||
contentBlocks = [
|
||
{
|
||
type: 'text',
|
||
text: message.message.content,
|
||
},
|
||
]
|
||
} else if (!Array.isArray(message.message.content)) {
|
||
contentBlocks = [message.message.content]
|
||
} else {
|
||
contentBlocks = message.message.content
|
||
}
|
||
|
||
for (const block of contentBlocks) {
|
||
if (block.type === 'text') {
|
||
openaiMessages.push({
|
||
role: message.message.role,
|
||
content: block.text,
|
||
})
|
||
} else if (block.type === 'tool_use') {
|
||
openaiMessages.push({
|
||
role: 'assistant',
|
||
content: undefined,
|
||
tool_calls: [
|
||
{
|
||
type: 'function',
|
||
function: {
|
||
name: block.name,
|
||
arguments: JSON.stringify(block.input),
|
||
},
|
||
id: block.id,
|
||
},
|
||
],
|
||
})
|
||
} else if (block.type === 'tool_result') {
|
||
// Ensure content is always a string for role:tool messages
|
||
let toolContent = block.content
|
||
if (typeof toolContent !== 'string') {
|
||
// Convert content to string if it's not already
|
||
toolContent = JSON.stringify(toolContent)
|
||
}
|
||
|
||
toolResults[block.tool_use_id] = {
|
||
role: 'tool',
|
||
content: toolContent,
|
||
tool_call_id: block.tool_use_id,
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
const finalMessages: (
|
||
| OpenAI.ChatCompletionMessageParam
|
||
| OpenAI.ChatCompletionToolMessageParam
|
||
)[] = []
|
||
|
||
for (const message of openaiMessages) {
|
||
finalMessages.push(message)
|
||
|
||
if ('tool_calls' in message && message.tool_calls) {
|
||
for (const toolCall of message.tool_calls) {
|
||
if (toolResults[toolCall.id]) {
|
||
finalMessages.push(toolResults[toolCall.id])
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return finalMessages
|
||
}
|
||
|
||
function messageReducer(
|
||
previous: OpenAI.ChatCompletionMessage,
|
||
item: OpenAI.ChatCompletionChunk,
|
||
): OpenAI.ChatCompletionMessage {
|
||
const reduce = (acc: any, delta: OpenAI.ChatCompletionChunk.Choice.Delta) => {
|
||
acc = { ...acc }
|
||
for (const [key, value] of Object.entries(delta)) {
|
||
if (acc[key] === undefined || acc[key] === null) {
|
||
acc[key] = value
|
||
// OpenAI.Chat.Completions.ChatCompletionMessageToolCall does not have a key, .index
|
||
if (Array.isArray(acc[key])) {
|
||
for (const arr of acc[key]) {
|
||
delete arr.index
|
||
}
|
||
}
|
||
} else if (typeof acc[key] === 'string' && typeof value === 'string') {
|
||
acc[key] += value
|
||
} else if (typeof acc[key] === 'number' && typeof value === 'number') {
|
||
acc[key] = value
|
||
} else if (Array.isArray(acc[key]) && Array.isArray(value)) {
|
||
const accArray = acc[key]
|
||
for (let i = 0; i < value.length; i++) {
|
||
const { index, ...chunkTool } = value[i]
|
||
if (index - accArray.length > 1) {
|
||
throw new Error(
|
||
`Error: An array has an empty value when tool_calls are constructed. tool_calls: ${accArray}; tool: ${value}`,
|
||
)
|
||
}
|
||
accArray[index] = reduce(accArray[index], chunkTool)
|
||
}
|
||
} else if (typeof acc[key] === 'object' && typeof value === 'object') {
|
||
acc[key] = reduce(acc[key], value)
|
||
}
|
||
}
|
||
return acc
|
||
}
|
||
|
||
const choice = item.choices?.[0]
|
||
if (!choice) {
|
||
// chunk contains information about usage and token counts
|
||
return previous
|
||
}
|
||
return reduce(previous, choice.delta) as OpenAI.ChatCompletionMessage
|
||
}
|
||
async function handleMessageStream(
|
||
stream: ChatCompletionStream,
|
||
signal?: AbortSignal, // 🔧 Add AbortSignal support to stream handler
|
||
): Promise<OpenAI.ChatCompletion> {
|
||
const streamStartTime = Date.now()
|
||
let ttftMs: number | undefined
|
||
let chunkCount = 0
|
||
let errorCount = 0
|
||
|
||
debugLogger.api('OPENAI_STREAM_START', {
|
||
streamStartTime: String(streamStartTime),
|
||
})
|
||
|
||
let message = {} as OpenAI.ChatCompletionMessage
|
||
|
||
let id, model, created, object, usage
|
||
try {
|
||
for await (const chunk of stream) {
|
||
// 🔧 CRITICAL FIX: Check abort signal in OpenAI streaming loop
|
||
if (signal?.aborted) {
|
||
debugLogger.flow('OPENAI_STREAM_ABORTED', {
|
||
chunkCount,
|
||
timestamp: Date.now()
|
||
})
|
||
throw new Error('Request was cancelled')
|
||
}
|
||
|
||
chunkCount++
|
||
|
||
try {
|
||
if (!id) {
|
||
id = chunk.id
|
||
debugLogger.api('OPENAI_STREAM_ID_RECEIVED', {
|
||
id,
|
||
chunkNumber: String(chunkCount),
|
||
})
|
||
}
|
||
if (!model) {
|
||
model = chunk.model
|
||
debugLogger.api('OPENAI_STREAM_MODEL_RECEIVED', {
|
||
model,
|
||
chunkNumber: String(chunkCount),
|
||
})
|
||
}
|
||
if (!created) {
|
||
created = chunk.created
|
||
}
|
||
if (!object) {
|
||
object = chunk.object
|
||
}
|
||
if (!usage) {
|
||
usage = chunk.usage
|
||
}
|
||
|
||
message = messageReducer(message, chunk)
|
||
|
||
if (chunk?.choices?.[0]?.delta?.content) {
|
||
if (!ttftMs) {
|
||
ttftMs = Date.now() - streamStartTime
|
||
debugLogger.api('OPENAI_STREAM_FIRST_TOKEN', {
|
||
ttftMs: String(ttftMs),
|
||
chunkNumber: String(chunkCount),
|
||
})
|
||
}
|
||
}
|
||
} catch (chunkError) {
|
||
errorCount++
|
||
debugLogger.error('OPENAI_STREAM_CHUNK_ERROR', {
|
||
chunkNumber: String(chunkCount),
|
||
errorMessage:
|
||
chunkError instanceof Error
|
||
? chunkError.message
|
||
: String(chunkError),
|
||
errorType:
|
||
chunkError instanceof Error
|
||
? chunkError.constructor.name
|
||
: typeof chunkError,
|
||
})
|
||
// Continue processing other chunks
|
||
}
|
||
}
|
||
|
||
debugLogger.api('OPENAI_STREAM_COMPLETE', {
|
||
totalChunks: String(chunkCount),
|
||
errorCount: String(errorCount),
|
||
totalDuration: String(Date.now() - streamStartTime),
|
||
ttftMs: String(ttftMs || 0),
|
||
finalMessageId: id || 'undefined',
|
||
})
|
||
} catch (streamError) {
|
||
debugLogger.error('OPENAI_STREAM_FATAL_ERROR', {
|
||
totalChunks: String(chunkCount),
|
||
errorCount: String(errorCount),
|
||
errorMessage:
|
||
streamError instanceof Error
|
||
? streamError.message
|
||
: String(streamError),
|
||
errorType:
|
||
streamError instanceof Error
|
||
? streamError.constructor.name
|
||
: typeof streamError,
|
||
})
|
||
throw streamError
|
||
}
|
||
return {
|
||
id,
|
||
created,
|
||
model,
|
||
object,
|
||
choices: [
|
||
{
|
||
index: 0,
|
||
message,
|
||
finish_reason: 'stop',
|
||
logprobs: undefined,
|
||
},
|
||
],
|
||
usage,
|
||
}
|
||
}
|
||
|
||
function convertOpenAIResponseToAnthropic(response: OpenAI.ChatCompletion) {
|
||
let contentBlocks: ContentBlock[] = []
|
||
const message = response.choices?.[0]?.message
|
||
if (!message) {
|
||
logEvent('weird_response', {
|
||
response: JSON.stringify(response),
|
||
})
|
||
return {
|
||
role: 'assistant',
|
||
content: [],
|
||
stop_reason: response.choices?.[0]?.finish_reason,
|
||
type: 'message',
|
||
usage: response.usage,
|
||
}
|
||
}
|
||
|
||
if (message?.tool_calls) {
|
||
for (const toolCall of message.tool_calls) {
|
||
const tool = toolCall.function
|
||
const toolName = tool.name
|
||
let toolArgs = {}
|
||
try {
|
||
toolArgs = JSON.parse(tool.arguments)
|
||
} catch (e) {
|
||
// console.log(e)
|
||
}
|
||
|
||
contentBlocks.push({
|
||
type: 'tool_use',
|
||
input: toolArgs,
|
||
name: toolName,
|
||
id: toolCall.id?.length > 0 ? toolCall.id : nanoid(),
|
||
})
|
||
}
|
||
}
|
||
|
||
if ((message as any).reasoning) {
|
||
contentBlocks.push({
|
||
type: 'thinking',
|
||
thinking: (message as any).reasoning,
|
||
signature: '',
|
||
})
|
||
}
|
||
|
||
// NOTE: For deepseek api, the key for its returned reasoning process is reasoning_content
|
||
if ((message as any).reasoning_content) {
|
||
contentBlocks.push({
|
||
type: 'thinking',
|
||
thinking: (message as any).reasoning_content,
|
||
signature: '',
|
||
})
|
||
}
|
||
|
||
if (message.content) {
|
||
contentBlocks.push({
|
||
type: 'text',
|
||
text: message?.content,
|
||
citations: [],
|
||
})
|
||
}
|
||
|
||
const finalMessage = {
|
||
role: 'assistant',
|
||
content: contentBlocks,
|
||
stop_reason: response.choices?.[0]?.finish_reason,
|
||
type: 'message',
|
||
usage: response.usage,
|
||
}
|
||
|
||
return finalMessage
|
||
}
|
||
|
||
let anthropicClient: Anthropic | AnthropicBedrock | AnthropicVertex | null =
|
||
null
|
||
|
||
/**
|
||
* Get the Anthropic client, creating it if it doesn't exist
|
||
*/
|
||
export function getAnthropicClient(
|
||
model?: string,
|
||
): Anthropic | AnthropicBedrock | AnthropicVertex {
|
||
const config = getGlobalConfig()
|
||
const provider = config.primaryProvider
|
||
|
||
// Reset client if provider has changed to ensure correct configuration
|
||
if (anthropicClient && provider) {
|
||
// Always recreate client for provider-specific configurations
|
||
anthropicClient = null
|
||
}
|
||
|
||
if (anthropicClient) {
|
||
return anthropicClient
|
||
}
|
||
|
||
const region = getVertexRegionForModel(model)
|
||
|
||
const defaultHeaders: { [key: string]: string } = {
|
||
'x-app': 'cli',
|
||
'User-Agent': USER_AGENT,
|
||
}
|
||
if (process.env.ANTHROPIC_AUTH_TOKEN) {
|
||
defaultHeaders['Authorization'] =
|
||
`Bearer ${process.env.ANTHROPIC_AUTH_TOKEN}`
|
||
}
|
||
|
||
const ARGS = {
|
||
defaultHeaders,
|
||
maxRetries: 0, // Disabled auto-retry in favor of manual implementation
|
||
timeout: parseInt(process.env.API_TIMEOUT_MS || String(60 * 1000), 10),
|
||
}
|
||
if (USE_BEDROCK) {
|
||
const client = new AnthropicBedrock(ARGS)
|
||
anthropicClient = client
|
||
return client
|
||
}
|
||
if (USE_VERTEX) {
|
||
const vertexArgs = {
|
||
...ARGS,
|
||
region: region || process.env.CLOUD_ML_REGION || 'us-east5',
|
||
}
|
||
const client = new AnthropicVertex(vertexArgs)
|
||
anthropicClient = client
|
||
return client
|
||
}
|
||
|
||
// Get appropriate API key and baseURL from ModelProfile
|
||
const modelManager = getModelManager()
|
||
const modelProfile = modelManager.getModel('main')
|
||
|
||
let apiKey: string
|
||
let baseURL: string | undefined
|
||
|
||
if (modelProfile) {
|
||
apiKey = modelProfile.apiKey || ''
|
||
baseURL = modelProfile.baseURL
|
||
} else {
|
||
// Fallback to default anthropic if no ModelProfile
|
||
apiKey = getAnthropicApiKey()
|
||
baseURL = undefined
|
||
}
|
||
|
||
if (process.env.USER_TYPE === 'ant' && !apiKey && provider === 'anthropic') {
|
||
console.error(
|
||
chalk.red(
|
||
'[ANT-ONLY] Please set the ANTHROPIC_API_KEY environment variable to use the CLI. To create a new key, go to https://console.anthropic.com/settings/keys.',
|
||
),
|
||
)
|
||
}
|
||
|
||
// Create client with custom baseURL for BigDream/OpenDev
|
||
// Anthropic SDK will append the appropriate paths (like /v1/messages)
|
||
const clientConfig = {
|
||
apiKey,
|
||
dangerouslyAllowBrowser: true,
|
||
...ARGS,
|
||
...(baseURL && { baseURL }), // Use baseURL directly, SDK will handle API versioning
|
||
}
|
||
|
||
anthropicClient = new Anthropic(clientConfig)
|
||
return anthropicClient
|
||
}
|
||
|
||
/**
|
||
* Reset the Anthropic client to null, forcing a new client to be created on next use
|
||
*/
|
||
export function resetAnthropicClient(): void {
|
||
anthropicClient = null
|
||
}
|
||
|
||
/**
|
||
* Environment variables for different client types:
|
||
*
|
||
* Direct API:
|
||
* - ANTHROPIC_API_KEY: Required for direct API access
|
||
*
|
||
* AWS Bedrock:
|
||
* - AWS credentials configured via aws-sdk defaults
|
||
*
|
||
* Vertex AI:
|
||
* - Model-specific region variables (highest priority):
|
||
* - VERTEX_REGION_CLAUDE_3_5_HAIKU: Region for Claude 3.5 Haiku model
|
||
* - VERTEX_REGION_CLAUDE_3_5_SONNET: Region for Claude 3.5 Sonnet model
|
||
* - VERTEX_REGION_CLAUDE_3_7_SONNET: Region for Claude 3.7 Sonnet model
|
||
* - CLOUD_ML_REGION: Optional. The default GCP region to use for all models
|
||
* If specific model region not specified above
|
||
* - ANTHROPIC_VERTEX_PROJECT_ID: Required. Your GCP project ID
|
||
* - Standard GCP credentials configured via google-auth-library
|
||
*
|
||
* Priority for determining region:
|
||
* 1. Hardcoded model-specific environment variables
|
||
* 2. Global CLOUD_ML_REGION variable
|
||
* 3. Default region from config
|
||
* 4. Fallback region (us-east5)
|
||
*/
|
||
|
||
export function userMessageToMessageParam(
|
||
message: UserMessage,
|
||
addCache = false,
|
||
): MessageParam {
|
||
if (addCache) {
|
||
if (typeof message.message.content === 'string') {
|
||
return {
|
||
role: 'user',
|
||
content: [
|
||
{
|
||
type: 'text',
|
||
text: message.message.content,
|
||
...(PROMPT_CACHING_ENABLED
|
||
? { cache_control: { type: 'ephemeral' } }
|
||
: {}),
|
||
},
|
||
],
|
||
}
|
||
} else {
|
||
return {
|
||
role: 'user',
|
||
content: message.message.content.map((_, i) => ({
|
||
..._,
|
||
...(i === message.message.content.length - 1
|
||
? PROMPT_CACHING_ENABLED
|
||
? { cache_control: { type: 'ephemeral' } }
|
||
: {}
|
||
: {}),
|
||
})),
|
||
}
|
||
}
|
||
}
|
||
return {
|
||
role: 'user',
|
||
content: message.message.content,
|
||
}
|
||
}
|
||
|
||
export function assistantMessageToMessageParam(
|
||
message: AssistantMessage,
|
||
addCache = false,
|
||
): MessageParam {
|
||
if (addCache) {
|
||
if (typeof message.message.content === 'string') {
|
||
return {
|
||
role: 'assistant',
|
||
content: [
|
||
{
|
||
type: 'text',
|
||
text: message.message.content,
|
||
...(PROMPT_CACHING_ENABLED
|
||
? { cache_control: { type: 'ephemeral' } }
|
||
: {}),
|
||
},
|
||
],
|
||
}
|
||
} else {
|
||
return {
|
||
role: 'assistant',
|
||
content: message.message.content.map((_, i) => ({
|
||
..._,
|
||
...(i === message.message.content.length - 1 &&
|
||
_.type !== 'thinking' &&
|
||
_.type !== 'redacted_thinking'
|
||
? PROMPT_CACHING_ENABLED
|
||
? { cache_control: { type: 'ephemeral' } }
|
||
: {}
|
||
: {}),
|
||
})),
|
||
}
|
||
}
|
||
}
|
||
return {
|
||
role: 'assistant',
|
||
content: message.message.content,
|
||
}
|
||
}
|
||
|
||
function splitSysPromptPrefix(systemPrompt: string[]): string[] {
|
||
// split out the first block of the system prompt as the "prefix" for API
|
||
// to match on in https://console.statsig.com/4aF3Ewatb6xPVpCwxb5nA3/dynamic_configs/claude_cli_system_prompt_prefixes
|
||
const systemPromptFirstBlock = systemPrompt[0] || ''
|
||
const systemPromptRest = systemPrompt.slice(1)
|
||
return [systemPromptFirstBlock, systemPromptRest.join('\n')].filter(Boolean)
|
||
}
|
||
|
||
export async function queryLLM(
|
||
messages: (UserMessage | AssistantMessage)[],
|
||
systemPrompt: string[],
|
||
maxThinkingTokens: number,
|
||
tools: Tool[],
|
||
signal: AbortSignal,
|
||
options: {
|
||
safeMode: boolean
|
||
model: string | import('../utils/config').ModelPointerType
|
||
prependCLISysprompt: boolean
|
||
toolUseContext?: ToolUseContext
|
||
},
|
||
): Promise<AssistantMessage> {
|
||
// 🔧 统一的模型解析:支持指针、model ID 和真实模型名称
|
||
const modelManager = getModelManager()
|
||
const modelResolution = modelManager.resolveModelWithInfo(options.model)
|
||
|
||
if (!modelResolution.success || !modelResolution.profile) {
|
||
throw new Error(
|
||
modelResolution.error || `Failed to resolve model: ${options.model}`,
|
||
)
|
||
}
|
||
|
||
const modelProfile = modelResolution.profile
|
||
const resolvedModel = modelProfile.modelName
|
||
|
||
// Initialize response state if toolUseContext is provided
|
||
const toolUseContext = options.toolUseContext
|
||
if (toolUseContext && !toolUseContext.responseState) {
|
||
const conversationId = getConversationId(toolUseContext.agentId, toolUseContext.messageId)
|
||
const previousResponseId = responseStateManager.getPreviousResponseId(conversationId)
|
||
|
||
toolUseContext.responseState = {
|
||
previousResponseId,
|
||
conversationId
|
||
}
|
||
}
|
||
|
||
debugLogger.api('MODEL_RESOLVED', {
|
||
inputParam: options.model,
|
||
resolvedModelName: resolvedModel,
|
||
provider: modelProfile.provider,
|
||
isPointer: ['main', 'task', 'reasoning', 'quick'].includes(options.model),
|
||
hasResponseState: !!toolUseContext?.responseState,
|
||
conversationId: toolUseContext?.responseState?.conversationId,
|
||
requestId: getCurrentRequest()?.id,
|
||
})
|
||
|
||
const currentRequest = getCurrentRequest()
|
||
debugLogger.api('LLM_REQUEST_START', {
|
||
messageCount: messages.length,
|
||
systemPromptLength: systemPrompt.join(' ').length,
|
||
toolCount: tools.length,
|
||
model: resolvedModel,
|
||
originalModelParam: options.model,
|
||
requestId: getCurrentRequest()?.id,
|
||
})
|
||
|
||
markPhase('LLM_CALL')
|
||
|
||
try {
|
||
const result = await withVCR(messages, () =>
|
||
queryLLMWithPromptCaching(
|
||
messages,
|
||
systemPrompt,
|
||
maxThinkingTokens,
|
||
tools,
|
||
signal,
|
||
{ ...options, model: resolvedModel, modelProfile, toolUseContext }, // Pass resolved ModelProfile and toolUseContext
|
||
),
|
||
)
|
||
|
||
debugLogger.api('LLM_REQUEST_SUCCESS', {
|
||
costUSD: result.costUSD,
|
||
durationMs: result.durationMs,
|
||
responseLength: result.message.content?.length || 0,
|
||
requestId: getCurrentRequest()?.id,
|
||
})
|
||
|
||
// Update response state for GPT-5 Responses API continuation
|
||
if (toolUseContext?.responseState?.conversationId && result.responseId) {
|
||
responseStateManager.setPreviousResponseId(
|
||
toolUseContext.responseState.conversationId,
|
||
result.responseId
|
||
)
|
||
|
||
debugLogger.api('RESPONSE_STATE_UPDATED', {
|
||
conversationId: toolUseContext.responseState.conversationId,
|
||
responseId: result.responseId,
|
||
requestId: getCurrentRequest()?.id,
|
||
})
|
||
}
|
||
|
||
return result
|
||
} catch (error) {
|
||
// 使用错误诊断系统记录 LLM 相关错误
|
||
logErrorWithDiagnosis(
|
||
error,
|
||
{
|
||
messageCount: messages.length,
|
||
systemPromptLength: systemPrompt.join(' ').length,
|
||
model: options.model,
|
||
toolCount: tools.length,
|
||
phase: 'LLM_CALL',
|
||
},
|
||
currentRequest?.id,
|
||
)
|
||
|
||
throw error
|
||
}
|
||
}
|
||
|
||
export function formatSystemPromptWithContext(
|
||
systemPrompt: string[],
|
||
context: { [k: string]: string },
|
||
agentId?: string,
|
||
skipContextReminders = false, // Parameter kept for API compatibility but not used anymore
|
||
): { systemPrompt: string[]; reminders: string } {
|
||
// 构建增强的系统提示 - 对齐官方 Claude Code 直接注入方式
|
||
const enhancedPrompt = [...systemPrompt]
|
||
let reminders = ''
|
||
|
||
// Step 0: Add GPT-5 Agent persistence support for coding tasks
|
||
const modelManager = getModelManager()
|
||
const modelProfile = modelManager.getModel('main')
|
||
if (modelProfile && isGPT5Model(modelProfile.modelName)) {
|
||
// Add coding-specific persistence instructions based on GPT-5 documentation
|
||
const persistencePrompts = [
|
||
"\n# Agent Persistence for Long-Running Coding Tasks",
|
||
"You are working on a coding project that may involve multiple steps and iterations. Please maintain context and continuity throughout the session:",
|
||
"- Remember architectural decisions and design patterns established earlier",
|
||
"- Keep track of file modifications and their relationships",
|
||
"- Maintain awareness of the overall project structure and goals",
|
||
"- Reference previous implementations when making related changes",
|
||
"- Ensure consistency with existing code style and conventions",
|
||
"- Build incrementally on previous work rather than starting from scratch"
|
||
]
|
||
enhancedPrompt.push(...persistencePrompts)
|
||
}
|
||
|
||
// 只有当上下文存在时才处理
|
||
const hasContext = Object.entries(context).length > 0
|
||
|
||
if (hasContext) {
|
||
// 步骤1: 直接注入 Kode 上下文到系统提示 - 对齐官方设计
|
||
if (!skipContextReminders) {
|
||
const kodeContext = generateKodeContext()
|
||
if (kodeContext) {
|
||
// 添加分隔符和标识,使项目文档在系统提示中更清晰
|
||
enhancedPrompt.push('\n---\n# 项目上下文\n')
|
||
enhancedPrompt.push(kodeContext)
|
||
enhancedPrompt.push('\n---\n')
|
||
}
|
||
}
|
||
|
||
// 步骤2: 生成其他动态提醒返回给调用方 - 保持现有动态提醒功能
|
||
const reminderMessages = generateSystemReminders(hasContext, agentId)
|
||
if (reminderMessages.length > 0) {
|
||
reminders = reminderMessages.map(r => r.content).join('\n') + '\n'
|
||
}
|
||
|
||
// 步骤3: 添加其他上下文到系统提示
|
||
enhancedPrompt.push(
|
||
`\nAs you answer the user's questions, you can use the following context:\n`,
|
||
)
|
||
|
||
// 过滤掉已经由 Kode 上下文处理的项目文档(避免重复)
|
||
const filteredContext = Object.fromEntries(
|
||
Object.entries(context).filter(
|
||
([key]) => key !== 'projectDocs' && key !== 'userDocs',
|
||
),
|
||
)
|
||
|
||
enhancedPrompt.push(
|
||
...Object.entries(filteredContext).map(
|
||
([key, value]) => `<context name="${key}">${value}</context>`,
|
||
),
|
||
)
|
||
}
|
||
|
||
return { systemPrompt: enhancedPrompt, reminders }
|
||
}
|
||
|
||
async function queryLLMWithPromptCaching(
|
||
messages: (UserMessage | AssistantMessage)[],
|
||
systemPrompt: string[],
|
||
maxThinkingTokens: number,
|
||
tools: Tool[],
|
||
signal: AbortSignal,
|
||
options: {
|
||
safeMode: boolean
|
||
model: string
|
||
prependCLISysprompt: boolean
|
||
modelProfile?: ModelProfile | null
|
||
toolUseContext?: ToolUseContext
|
||
},
|
||
): Promise<AssistantMessage> {
|
||
const config = getGlobalConfig()
|
||
const modelManager = getModelManager()
|
||
const toolUseContext = options.toolUseContext
|
||
|
||
// 🔧 Fix: 使用传入的ModelProfile,而不是硬编码的'main'指针
|
||
const modelProfile = options.modelProfile || modelManager.getModel('main')
|
||
let provider: string
|
||
|
||
if (modelProfile) {
|
||
provider = modelProfile.provider || config.primaryProvider || 'anthropic'
|
||
} else {
|
||
provider = config.primaryProvider || 'anthropic'
|
||
}
|
||
|
||
// Use native Anthropic SDK for Anthropic and some Anthropic-compatible providers
|
||
if (
|
||
provider === 'anthropic' ||
|
||
provider === 'bigdream' ||
|
||
provider === 'opendev'
|
||
) {
|
||
return queryAnthropicNative(
|
||
messages,
|
||
systemPrompt,
|
||
maxThinkingTokens,
|
||
tools,
|
||
signal,
|
||
{ ...options, modelProfile, toolUseContext },
|
||
)
|
||
}
|
||
|
||
// Use OpenAI-compatible interface for all other providers
|
||
return queryOpenAI(messages, systemPrompt, maxThinkingTokens, tools, signal, {
|
||
...options,
|
||
modelProfile,
|
||
toolUseContext,
|
||
})
|
||
}
|
||
|
||
async function queryAnthropicNative(
|
||
messages: (UserMessage | AssistantMessage)[],
|
||
systemPrompt: string[],
|
||
maxThinkingTokens: number,
|
||
tools: Tool[],
|
||
signal: AbortSignal,
|
||
options?: {
|
||
safeMode: boolean
|
||
model: string
|
||
prependCLISysprompt: boolean
|
||
modelProfile?: ModelProfile | null
|
||
toolUseContext?: ToolUseContext
|
||
},
|
||
): Promise<AssistantMessage> {
|
||
const config = getGlobalConfig()
|
||
const modelManager = getModelManager()
|
||
const toolUseContext = options?.toolUseContext
|
||
|
||
// 🔧 Fix: 使用传入的ModelProfile,而不是硬编码的'main'指针
|
||
const modelProfile = options?.modelProfile || modelManager.getModel('main')
|
||
let anthropic: Anthropic | AnthropicBedrock | AnthropicVertex
|
||
let model: string
|
||
let provider: string
|
||
|
||
// 🔍 Debug: 记录模型配置详情
|
||
debugLogger.api('MODEL_CONFIG_ANTHROPIC', {
|
||
modelProfileFound: !!modelProfile,
|
||
modelProfileId: modelProfile?.modelName,
|
||
modelProfileName: modelProfile?.name,
|
||
modelProfileModelName: modelProfile?.modelName,
|
||
modelProfileProvider: modelProfile?.provider,
|
||
modelProfileBaseURL: modelProfile?.baseURL,
|
||
modelProfileApiKeyExists: !!modelProfile?.apiKey,
|
||
optionsModel: options?.model,
|
||
requestId: getCurrentRequest()?.id,
|
||
})
|
||
|
||
if (modelProfile) {
|
||
// 使用ModelProfile的完整配置
|
||
model = modelProfile.modelName
|
||
provider = modelProfile.provider || config.primaryProvider || 'anthropic'
|
||
|
||
// 基于ModelProfile创建专用的API客户端
|
||
if (
|
||
modelProfile.provider === 'anthropic' ||
|
||
modelProfile.provider === 'bigdream' ||
|
||
modelProfile.provider === 'opendev'
|
||
) {
|
||
const clientConfig: any = {
|
||
apiKey: modelProfile.apiKey,
|
||
dangerouslyAllowBrowser: true,
|
||
maxRetries: 0,
|
||
timeout: parseInt(process.env.API_TIMEOUT_MS || String(60 * 1000), 10),
|
||
defaultHeaders: {
|
||
'x-app': 'cli',
|
||
'User-Agent': USER_AGENT,
|
||
},
|
||
}
|
||
|
||
// 使用ModelProfile的baseURL而不是全局配置
|
||
if (modelProfile.baseURL) {
|
||
clientConfig.baseURL = modelProfile.baseURL
|
||
}
|
||
|
||
anthropic = new Anthropic(clientConfig)
|
||
} else {
|
||
// 其他提供商的处理逻辑
|
||
anthropic = getAnthropicClient(model)
|
||
}
|
||
} else {
|
||
// 🚨 降级:没有有效的ModelProfile时,应该抛出错误
|
||
const errorDetails = {
|
||
modelProfileExists: !!modelProfile,
|
||
modelProfileModelName: modelProfile?.modelName,
|
||
requestedModel: options?.model,
|
||
requestId: getCurrentRequest()?.id,
|
||
}
|
||
debugLogger.error('ANTHROPIC_FALLBACK_ERROR', errorDetails)
|
||
throw new Error(
|
||
`No valid ModelProfile available for Anthropic provider. Please configure model through /model command. Debug: ${JSON.stringify(errorDetails)}`,
|
||
)
|
||
}
|
||
|
||
// Prepend system prompt block for easy API identification
|
||
if (options?.prependCLISysprompt) {
|
||
// Log stats about first block for analyzing prefix matching config
|
||
const [firstSyspromptBlock] = splitSysPromptPrefix(systemPrompt)
|
||
logEvent('tengu_sysprompt_block', {
|
||
snippet: firstSyspromptBlock?.slice(0, 20),
|
||
length: String(firstSyspromptBlock?.length ?? 0),
|
||
hash: firstSyspromptBlock
|
||
? createHash('sha256').update(firstSyspromptBlock).digest('hex')
|
||
: '',
|
||
})
|
||
|
||
systemPrompt = [getCLISyspromptPrefix(), ...systemPrompt]
|
||
}
|
||
|
||
const system: TextBlockParam[] = splitSysPromptPrefix(systemPrompt).map(
|
||
_ => ({
|
||
...(PROMPT_CACHING_ENABLED
|
||
? { cache_control: { type: 'ephemeral' } }
|
||
: {}),
|
||
text: _,
|
||
type: 'text',
|
||
}),
|
||
)
|
||
|
||
const toolSchemas = tools.map(
|
||
tool =>
|
||
({
|
||
name: tool.name,
|
||
description: tool.description,
|
||
input_schema: zodToJsonSchema(tool.inputSchema),
|
||
}) as unknown as Anthropic.Beta.Messages.BetaTool,
|
||
)
|
||
|
||
const anthropicMessages = addCacheBreakpoints(messages)
|
||
const startIncludingRetries = Date.now()
|
||
|
||
// 记录系统提示构建过程
|
||
logSystemPromptConstruction({
|
||
basePrompt: systemPrompt.join('\n'),
|
||
kodeContext: generateKodeContext() || '',
|
||
reminders: [], // 这里可以从 generateSystemReminders 获取
|
||
finalPrompt: systemPrompt.join('\n'),
|
||
})
|
||
|
||
let start = Date.now()
|
||
let attemptNumber = 0
|
||
let response
|
||
|
||
try {
|
||
response = await withRetry(async attempt => {
|
||
attemptNumber = attempt
|
||
start = Date.now()
|
||
|
||
const params: Anthropic.Beta.Messages.MessageCreateParams = {
|
||
model,
|
||
max_tokens: getMaxTokensFromProfile(modelProfile),
|
||
messages: anthropicMessages,
|
||
system,
|
||
tools: toolSchemas.length > 0 ? toolSchemas : undefined,
|
||
tool_choice: toolSchemas.length > 0 ? { type: 'auto' } : undefined,
|
||
}
|
||
|
||
if (maxThinkingTokens > 0) {
|
||
;(params as any).extra_headers = {
|
||
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15',
|
||
}
|
||
;(params as any).thinking = { max_tokens: maxThinkingTokens }
|
||
}
|
||
|
||
// 🔥 REAL-TIME API CALL DEBUG - 使用全局日志系统 (Anthropic Streaming)
|
||
debugLogger.api('ANTHROPIC_API_CALL_START_STREAMING', {
|
||
endpoint: modelProfile?.baseURL || 'DEFAULT_ANTHROPIC',
|
||
model,
|
||
provider,
|
||
apiKeyConfigured: !!modelProfile?.apiKey,
|
||
apiKeyPrefix: modelProfile?.apiKey
|
||
? modelProfile.apiKey.substring(0, 8)
|
||
: null,
|
||
maxTokens: params.max_tokens,
|
||
temperature: MAIN_QUERY_TEMPERATURE,
|
||
messageCount: params.messages?.length || 0,
|
||
streamMode: true,
|
||
toolsCount: toolSchemas.length,
|
||
thinkingTokens: maxThinkingTokens,
|
||
timestamp: new Date().toISOString(),
|
||
modelProfileId: modelProfile?.modelName,
|
||
modelProfileName: modelProfile?.name,
|
||
})
|
||
|
||
if (config.stream) {
|
||
// 🔧 CRITICAL FIX: Connect AbortSignal to Anthropic API call
|
||
const stream = await anthropic.beta.messages.create({
|
||
...params,
|
||
stream: true,
|
||
}, {
|
||
signal: signal // ← CRITICAL: Connect the AbortSignal to API call
|
||
})
|
||
|
||
let finalResponse: any | null = null
|
||
let messageStartEvent: any = null
|
||
const contentBlocks: any[] = []
|
||
let usage: any = null
|
||
let stopReason: string | null = null
|
||
let stopSequence: string | null = null
|
||
|
||
for await (const event of stream) {
|
||
// 🔧 CRITICAL FIX: Check abort signal in streaming loop
|
||
if (signal.aborted) {
|
||
debugLogger.flow('STREAM_ABORTED', {
|
||
eventType: event.type,
|
||
timestamp: Date.now()
|
||
})
|
||
throw new Error('Request was cancelled')
|
||
}
|
||
if (event.type === 'message_start') {
|
||
messageStartEvent = event
|
||
finalResponse = {
|
||
...event.message,
|
||
content: [], // Will be populated from content blocks
|
||
}
|
||
} else if (event.type === 'content_block_start') {
|
||
contentBlocks[event.index] = { ...event.content_block }
|
||
} else if (event.type === 'content_block_delta') {
|
||
if (!contentBlocks[event.index]) {
|
||
contentBlocks[event.index] = {
|
||
type: event.delta.type === 'text_delta' ? 'text' : 'unknown',
|
||
text: '',
|
||
}
|
||
}
|
||
if (event.delta.type === 'text_delta') {
|
||
contentBlocks[event.index].text += event.delta.text
|
||
}
|
||
} else if (event.type === 'message_delta') {
|
||
if (event.delta.stop_reason) stopReason = event.delta.stop_reason
|
||
if (event.delta.stop_sequence)
|
||
stopSequence = event.delta.stop_sequence
|
||
if (event.usage) usage = { ...usage, ...event.usage }
|
||
} else if (event.type === 'message_stop') {
|
||
break
|
||
}
|
||
}
|
||
|
||
if (!finalResponse || !messageStartEvent) {
|
||
throw new Error('Stream ended without proper message structure')
|
||
}
|
||
|
||
// Construct the final response
|
||
finalResponse = {
|
||
...messageStartEvent.message,
|
||
content: contentBlocks.filter(Boolean),
|
||
stop_reason: stopReason,
|
||
stop_sequence: stopSequence,
|
||
usage: {
|
||
...messageStartEvent.message.usage,
|
||
...usage,
|
||
},
|
||
}
|
||
|
||
return finalResponse
|
||
} else {
|
||
// 🔥 REAL-TIME API CALL DEBUG - 使用全局日志系统 (Anthropic Non-Streaming)
|
||
debugLogger.api('ANTHROPIC_API_CALL_START_NON_STREAMING', {
|
||
endpoint: modelProfile?.baseURL || 'DEFAULT_ANTHROPIC',
|
||
model,
|
||
provider,
|
||
apiKeyConfigured: !!modelProfile?.apiKey,
|
||
apiKeyPrefix: modelProfile?.apiKey
|
||
? modelProfile.apiKey.substring(0, 8)
|
||
: null,
|
||
maxTokens: params.max_tokens,
|
||
temperature: MAIN_QUERY_TEMPERATURE,
|
||
messageCount: params.messages?.length || 0,
|
||
streamMode: false,
|
||
toolsCount: toolSchemas.length,
|
||
thinkingTokens: maxThinkingTokens,
|
||
timestamp: new Date().toISOString(),
|
||
modelProfileId: modelProfile?.modelName,
|
||
modelProfileName: modelProfile?.name,
|
||
})
|
||
|
||
// 🔧 CRITICAL FIX: Connect AbortSignal to non-streaming API call
|
||
return await anthropic.beta.messages.create(params, {
|
||
signal: signal // ← CRITICAL: Connect the AbortSignal to API call
|
||
})
|
||
}
|
||
}, { signal }) // 🔧 CRITICAL FIX: Pass AbortSignal to withRetry
|
||
|
||
const ttftMs = start - Date.now()
|
||
const durationMs = Date.now() - startIncludingRetries
|
||
|
||
const content = response.content.map((block: ContentBlock) => {
|
||
if (block.type === 'text') {
|
||
return {
|
||
type: 'text' as const,
|
||
text: block.text,
|
||
}
|
||
} else if (block.type === 'tool_use') {
|
||
return {
|
||
type: 'tool_use' as const,
|
||
id: block.id,
|
||
name: block.name,
|
||
input: block.input,
|
||
}
|
||
}
|
||
return block
|
||
})
|
||
|
||
const assistantMessage: AssistantMessage = {
|
||
message: {
|
||
id: response.id,
|
||
content,
|
||
model: response.model,
|
||
role: 'assistant',
|
||
stop_reason: response.stop_reason,
|
||
stop_sequence: response.stop_sequence,
|
||
type: 'message',
|
||
usage: response.usage,
|
||
},
|
||
type: 'assistant',
|
||
uuid: nanoid() as UUID,
|
||
durationMs,
|
||
costUSD: 0, // Will be calculated below
|
||
}
|
||
|
||
// 记录完整的 LLM 交互调试信息 (Anthropic path)
|
||
// 注意:Anthropic API将system prompt和messages分开,这里重构为完整的API调用视图
|
||
const systemMessages = system.map(block => ({
|
||
role: 'system',
|
||
content: block.text,
|
||
}))
|
||
|
||
logLLMInteraction({
|
||
systemPrompt: systemPrompt.join('\n'),
|
||
messages: [...systemMessages, ...anthropicMessages],
|
||
response: response,
|
||
usage: response.usage
|
||
? {
|
||
inputTokens: response.usage.input_tokens,
|
||
outputTokens: response.usage.output_tokens,
|
||
}
|
||
: undefined,
|
||
timing: {
|
||
start: start,
|
||
end: Date.now(),
|
||
},
|
||
apiFormat: 'anthropic',
|
||
})
|
||
|
||
// Calculate cost using native Anthropic usage data
|
||
const inputTokens = response.usage.input_tokens
|
||
const outputTokens = response.usage.output_tokens
|
||
const cacheCreationInputTokens =
|
||
response.usage.cache_creation_input_tokens ?? 0
|
||
const cacheReadInputTokens = response.usage.cache_read_input_tokens ?? 0
|
||
|
||
const costUSD =
|
||
(inputTokens / 1_000_000) * getModelInputTokenCostUSD(model) +
|
||
(outputTokens / 1_000_000) * getModelOutputTokenCostUSD(model) +
|
||
(cacheCreationInputTokens / 1_000_000) *
|
||
getModelInputTokenCostUSD(model) +
|
||
(cacheReadInputTokens / 1_000_000) *
|
||
(getModelInputTokenCostUSD(model) * 0.1) // Cache reads are 10% of input cost
|
||
|
||
assistantMessage.costUSD = costUSD
|
||
addToTotalCost(costUSD, durationMs)
|
||
|
||
logEvent('api_response_anthropic_native', {
|
||
model,
|
||
input_tokens: String(inputTokens),
|
||
output_tokens: String(outputTokens),
|
||
cache_creation_input_tokens: String(cacheCreationInputTokens),
|
||
cache_read_input_tokens: String(cacheReadInputTokens),
|
||
cost_usd: String(costUSD),
|
||
duration_ms: String(durationMs),
|
||
ttft_ms: String(ttftMs),
|
||
attempt_number: String(attemptNumber),
|
||
})
|
||
|
||
return assistantMessage
|
||
} catch (error) {
|
||
return getAssistantMessageFromError(error)
|
||
}
|
||
}
|
||
|
||
function getAssistantMessageFromError(error: unknown): AssistantMessage {
|
||
if (error instanceof Error && error.message.includes('prompt is too long')) {
|
||
return createAssistantAPIErrorMessage(PROMPT_TOO_LONG_ERROR_MESSAGE)
|
||
}
|
||
if (
|
||
error instanceof Error &&
|
||
error.message.includes('Your credit balance is too low')
|
||
) {
|
||
return createAssistantAPIErrorMessage(CREDIT_BALANCE_TOO_LOW_ERROR_MESSAGE)
|
||
}
|
||
if (
|
||
error instanceof Error &&
|
||
error.message.toLowerCase().includes('x-api-key')
|
||
) {
|
||
return createAssistantAPIErrorMessage(INVALID_API_KEY_ERROR_MESSAGE)
|
||
}
|
||
if (error instanceof Error) {
|
||
if (process.env.NODE_ENV === 'development') {
|
||
console.log(error)
|
||
}
|
||
return createAssistantAPIErrorMessage(
|
||
`${API_ERROR_MESSAGE_PREFIX}: ${error.message}`,
|
||
)
|
||
}
|
||
return createAssistantAPIErrorMessage(API_ERROR_MESSAGE_PREFIX)
|
||
}
|
||
|
||
function addCacheBreakpoints(
|
||
messages: (UserMessage | AssistantMessage)[],
|
||
): MessageParam[] {
|
||
return messages.map((msg, index) => {
|
||
return msg.type === 'user'
|
||
? userMessageToMessageParam(msg, index > messages.length - 3)
|
||
: assistantMessageToMessageParam(msg, index > messages.length - 3)
|
||
})
|
||
}
|
||
|
||
async function queryOpenAI(
|
||
messages: (UserMessage | AssistantMessage)[],
|
||
systemPrompt: string[],
|
||
maxThinkingTokens: number,
|
||
tools: Tool[],
|
||
signal: AbortSignal,
|
||
options?: {
|
||
safeMode: boolean
|
||
model: string
|
||
prependCLISysprompt: boolean
|
||
modelProfile?: ModelProfile | null
|
||
toolUseContext?: ToolUseContext
|
||
},
|
||
): Promise<AssistantMessage> {
|
||
const config = getGlobalConfig()
|
||
const modelManager = getModelManager()
|
||
const toolUseContext = options?.toolUseContext
|
||
|
||
// 🔧 Fix: 使用传入的ModelProfile,而不是硬编码的'main'指针
|
||
const modelProfile = options?.modelProfile || modelManager.getModel('main')
|
||
let model: string
|
||
|
||
// 🔍 Debug: 记录模型配置详情
|
||
const currentRequest = getCurrentRequest()
|
||
debugLogger.api('MODEL_CONFIG_OPENAI', {
|
||
modelProfileFound: !!modelProfile,
|
||
modelProfileId: modelProfile?.modelName,
|
||
modelProfileName: modelProfile?.name,
|
||
modelProfileModelName: modelProfile?.modelName,
|
||
modelProfileProvider: modelProfile?.provider,
|
||
modelProfileBaseURL: modelProfile?.baseURL,
|
||
modelProfileApiKeyExists: !!modelProfile?.apiKey,
|
||
optionsModel: options?.model,
|
||
requestId: getCurrentRequest()?.id,
|
||
})
|
||
|
||
if (modelProfile) {
|
||
model = modelProfile.modelName
|
||
} else {
|
||
model = options?.model || modelProfile?.modelName || ''
|
||
}
|
||
// Prepend system prompt block for easy API identification
|
||
if (options?.prependCLISysprompt) {
|
||
// Log stats about first block for analyzing prefix matching config (see https://console.statsig.com/4aF3Ewatb6xPVpCwxb5nA3/dynamic_configs/claude_cli_system_prompt_prefixes)
|
||
const [firstSyspromptBlock] = splitSysPromptPrefix(systemPrompt)
|
||
logEvent('tengu_sysprompt_block', {
|
||
snippet: firstSyspromptBlock?.slice(0, 20),
|
||
length: String(firstSyspromptBlock?.length ?? 0),
|
||
hash: firstSyspromptBlock
|
||
? createHash('sha256').update(firstSyspromptBlock).digest('hex')
|
||
: '',
|
||
})
|
||
|
||
systemPrompt = [getCLISyspromptPrefix(), ...systemPrompt]
|
||
}
|
||
|
||
const system: TextBlockParam[] = splitSysPromptPrefix(systemPrompt).map(
|
||
_ => ({
|
||
...(PROMPT_CACHING_ENABLED
|
||
? { cache_control: { type: 'ephemeral' } }
|
||
: {}),
|
||
text: _,
|
||
type: 'text',
|
||
}),
|
||
)
|
||
|
||
const toolSchemas = await Promise.all(
|
||
tools.map(
|
||
async _ =>
|
||
({
|
||
type: 'function',
|
||
function: {
|
||
name: _.name,
|
||
description: await _.prompt({
|
||
safeMode: options?.safeMode,
|
||
}),
|
||
// Use tool's JSON schema directly if provided, otherwise convert Zod schema
|
||
parameters:
|
||
'inputJSONSchema' in _ && _.inputJSONSchema
|
||
? _.inputJSONSchema
|
||
: zodToJsonSchema(_.inputSchema),
|
||
},
|
||
}) as OpenAI.ChatCompletionTool,
|
||
),
|
||
)
|
||
|
||
const openaiSystem = system.map(
|
||
s =>
|
||
({
|
||
role: 'system',
|
||
content: s.text,
|
||
}) as OpenAI.ChatCompletionMessageParam,
|
||
)
|
||
|
||
const openaiMessages = convertAnthropicMessagesToOpenAIMessages(messages)
|
||
const startIncludingRetries = Date.now()
|
||
|
||
// 记录系统提示构建过程 (OpenAI path)
|
||
logSystemPromptConstruction({
|
||
basePrompt: systemPrompt.join('\n'),
|
||
kodeContext: generateKodeContext() || '',
|
||
reminders: [], // 这里可以从 generateSystemReminders 获取
|
||
finalPrompt: systemPrompt.join('\n'),
|
||
})
|
||
|
||
let start = Date.now()
|
||
let attemptNumber = 0
|
||
let response
|
||
|
||
try {
|
||
response = await withRetry(async attempt => {
|
||
attemptNumber = attempt
|
||
start = Date.now()
|
||
// 🔥 GPT-5 Enhanced Parameter Construction
|
||
const maxTokens = getMaxTokensFromProfile(modelProfile)
|
||
const isGPT5 = isGPT5Model(model)
|
||
|
||
const opts: OpenAI.ChatCompletionCreateParams = {
|
||
model,
|
||
// 🔧 Use correct parameter name based on model type
|
||
...(isGPT5 ? { max_completion_tokens: maxTokens } : { max_tokens: maxTokens }),
|
||
messages: [...openaiSystem, ...openaiMessages],
|
||
// 🔧 GPT-5 temperature constraint: only 1 or undefined
|
||
temperature: isGPT5 ? 1 : MAIN_QUERY_TEMPERATURE,
|
||
}
|
||
if (config.stream) {
|
||
;(opts as OpenAI.ChatCompletionCreateParams).stream = true
|
||
opts.stream_options = {
|
||
include_usage: true,
|
||
}
|
||
}
|
||
|
||
if (toolSchemas.length > 0) {
|
||
opts.tools = toolSchemas
|
||
opts.tool_choice = 'auto'
|
||
}
|
||
const reasoningEffort = await getReasoningEffort(modelProfile, messages)
|
||
if (reasoningEffort) {
|
||
logEvent('debug_reasoning_effort', {
|
||
effort: reasoningEffort,
|
||
})
|
||
opts.reasoning_effort = reasoningEffort
|
||
}
|
||
|
||
// 🔧 Fix: 如果有ModelProfile配置,直接使用它 (更宽松的条件)
|
||
if (modelProfile && modelProfile.modelName) {
|
||
debugLogger.api('USING_MODEL_PROFILE_PATH', {
|
||
modelProfileName: modelProfile.modelName,
|
||
modelName: modelProfile.modelName,
|
||
provider: modelProfile.provider,
|
||
baseURL: modelProfile.baseURL,
|
||
apiKeyExists: !!modelProfile.apiKey,
|
||
requestId: getCurrentRequest()?.id,
|
||
})
|
||
|
||
// Enable new adapter system with environment variable
|
||
const USE_NEW_ADAPTER_SYSTEM = process.env.USE_NEW_ADAPTERS !== 'false'
|
||
|
||
if (USE_NEW_ADAPTER_SYSTEM) {
|
||
// New adapter system
|
||
const adapter = ModelAdapterFactory.createAdapter(modelProfile)
|
||
|
||
// Build unified request parameters
|
||
const unifiedParams: UnifiedRequestParams = {
|
||
messages: openaiMessages,
|
||
systemPrompt: openaiSystem.map(s => s.content as string),
|
||
tools: tools,
|
||
maxTokens: getMaxTokensFromProfile(modelProfile),
|
||
stream: config.stream,
|
||
reasoningEffort: reasoningEffort as any,
|
||
temperature: isGPT5Model(model) ? 1 : MAIN_QUERY_TEMPERATURE,
|
||
previousResponseId: toolUseContext?.responseState?.previousResponseId,
|
||
verbosity: 'high' // High verbosity for coding tasks
|
||
}
|
||
|
||
// Create request using adapter
|
||
const request = adapter.createRequest(unifiedParams)
|
||
|
||
// Determine which API to use
|
||
if (ModelAdapterFactory.shouldUseResponsesAPI(modelProfile)) {
|
||
// Use Responses API for GPT-5 and similar models
|
||
const { callGPT5ResponsesAPI } = await import('./openai')
|
||
const response = await callGPT5ResponsesAPI(modelProfile, request, signal)
|
||
const unifiedResponse = adapter.parseResponse(response)
|
||
|
||
// Convert unified response back to Anthropic format
|
||
const apiMessage = {
|
||
role: 'assistant' as const,
|
||
content: unifiedResponse.content,
|
||
tool_calls: unifiedResponse.toolCalls,
|
||
usage: {
|
||
prompt_tokens: unifiedResponse.usage.promptTokens,
|
||
completion_tokens: unifiedResponse.usage.completionTokens,
|
||
}
|
||
}
|
||
const assistantMsg: AssistantMessage = {
|
||
type: 'assistant',
|
||
message: apiMessage as any,
|
||
costUSD: 0, // Will be calculated later
|
||
durationMs: Date.now() - start,
|
||
uuid: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}` as any,
|
||
responseId: unifiedResponse.responseId // For state management
|
||
}
|
||
return assistantMsg
|
||
} else {
|
||
// Use existing Chat Completions flow
|
||
const s = await getCompletionWithProfile(modelProfile, request, 0, 10, signal)
|
||
let finalResponse
|
||
if (config.stream) {
|
||
finalResponse = await handleMessageStream(s as ChatCompletionStream, signal)
|
||
} else {
|
||
finalResponse = s
|
||
}
|
||
const r = convertOpenAIResponseToAnthropic(finalResponse)
|
||
return r
|
||
}
|
||
} else {
|
||
// Legacy system (preserved for fallback)
|
||
const completionFunction = isGPT5Model(modelProfile.modelName)
|
||
? getGPT5CompletionWithProfile
|
||
: getCompletionWithProfile
|
||
const s = await completionFunction(modelProfile, opts, 0, 10, signal)
|
||
let finalResponse
|
||
if (opts.stream) {
|
||
finalResponse = await handleMessageStream(s as ChatCompletionStream, signal)
|
||
} else {
|
||
finalResponse = s
|
||
}
|
||
const r = convertOpenAIResponseToAnthropic(finalResponse)
|
||
return r
|
||
}
|
||
} else {
|
||
// 🚨 警告:ModelProfile不可用,使用旧逻辑路径
|
||
debugLogger.api('USING_LEGACY_PATH', {
|
||
modelProfileExists: !!modelProfile,
|
||
modelProfileId: modelProfile?.modelName,
|
||
modelNameExists: !!modelProfile?.modelName,
|
||
fallbackModel: 'main',
|
||
actualModel: model,
|
||
requestId: getCurrentRequest()?.id,
|
||
})
|
||
|
||
// 🚨 FALLBACK: 没有有效的ModelProfile时,应该抛出错误而不是使用遗留系统
|
||
const errorDetails = {
|
||
modelProfileExists: !!modelProfile,
|
||
modelProfileId: modelProfile?.modelName,
|
||
modelNameExists: !!modelProfile?.modelName,
|
||
requestedModel: model,
|
||
requestId: getCurrentRequest()?.id,
|
||
}
|
||
debugLogger.error('NO_VALID_MODEL_PROFILE', errorDetails)
|
||
throw new Error(
|
||
`No valid ModelProfile available for model: ${model}. Please configure model through /model command. Debug: ${JSON.stringify(errorDetails)}`,
|
||
)
|
||
}
|
||
}, { signal }) // 🔧 CRITICAL FIX: Pass AbortSignal to withRetry
|
||
} catch (error) {
|
||
logError(error)
|
||
return getAssistantMessageFromError(error)
|
||
}
|
||
const durationMs = Date.now() - start
|
||
const durationMsIncludingRetries = Date.now() - startIncludingRetries
|
||
|
||
const inputTokens = response.usage?.prompt_tokens ?? 0
|
||
const outputTokens = response.usage?.completion_tokens ?? 0
|
||
const cacheReadInputTokens =
|
||
response.usage?.prompt_token_details?.cached_tokens ?? 0
|
||
const cacheCreationInputTokens =
|
||
response.usage?.prompt_token_details?.cached_tokens ?? 0
|
||
const costUSD =
|
||
(inputTokens / 1_000_000) * SONNET_COST_PER_MILLION_INPUT_TOKENS +
|
||
(outputTokens / 1_000_000) * SONNET_COST_PER_MILLION_OUTPUT_TOKENS +
|
||
(cacheReadInputTokens / 1_000_000) *
|
||
SONNET_COST_PER_MILLION_PROMPT_CACHE_READ_TOKENS +
|
||
(cacheCreationInputTokens / 1_000_000) *
|
||
SONNET_COST_PER_MILLION_PROMPT_CACHE_WRITE_TOKENS
|
||
|
||
addToTotalCost(costUSD, durationMsIncludingRetries)
|
||
|
||
// 记录完整的 LLM 交互调试信息 (OpenAI path)
|
||
logLLMInteraction({
|
||
systemPrompt: systemPrompt.join('\n'),
|
||
messages: [...openaiSystem, ...openaiMessages],
|
||
response: response,
|
||
usage: {
|
||
inputTokens: inputTokens,
|
||
outputTokens: outputTokens,
|
||
},
|
||
timing: {
|
||
start: start,
|
||
end: Date.now(),
|
||
},
|
||
apiFormat: 'openai',
|
||
})
|
||
|
||
return {
|
||
message: {
|
||
...response,
|
||
content: normalizeContentFromAPI(response.content),
|
||
usage: {
|
||
input_tokens: inputTokens,
|
||
output_tokens: outputTokens,
|
||
cache_read_input_tokens: cacheReadInputTokens,
|
||
cache_creation_input_tokens: 0,
|
||
},
|
||
},
|
||
costUSD,
|
||
durationMs,
|
||
type: 'assistant',
|
||
uuid: randomUUID(),
|
||
}
|
||
}
|
||
|
||
function getMaxTokensFromProfile(modelProfile: any): number {
|
||
// Use ModelProfile maxTokens or reasonable default
|
||
return modelProfile?.maxTokens || 8000
|
||
}
|
||
|
||
function getModelInputTokenCostUSD(model: string): number {
|
||
// Find the model in the models object
|
||
for (const providerModels of Object.values(models)) {
|
||
const modelInfo = providerModels.find((m: any) => m.model === model)
|
||
if (modelInfo) {
|
||
return modelInfo.input_cost_per_token || 0
|
||
}
|
||
}
|
||
// Default fallback cost for unknown models
|
||
return 0.000003 // Default to Claude 3 Haiku cost
|
||
}
|
||
|
||
function getModelOutputTokenCostUSD(model: string): number {
|
||
// Find the model in the models object
|
||
for (const providerModels of Object.values(models)) {
|
||
const modelInfo = providerModels.find((m: any) => m.model === model)
|
||
if (modelInfo) {
|
||
return modelInfo.output_cost_per_token || 0
|
||
}
|
||
}
|
||
// Default fallback cost for unknown models
|
||
return 0.000015 // Default to Claude 3 Haiku cost
|
||
}
|
||
|
||
// New unified query functions for model pointer system
|
||
export async function queryModel(
|
||
modelPointer: import('../utils/config').ModelPointerType,
|
||
messages: (UserMessage | AssistantMessage)[],
|
||
systemPrompt: string[] = [],
|
||
signal?: AbortSignal,
|
||
): Promise<AssistantMessage> {
|
||
// Use queryLLM with the pointer directly
|
||
return queryLLM(
|
||
messages,
|
||
systemPrompt,
|
||
0, // maxThinkingTokens
|
||
[], // tools
|
||
signal || new AbortController().signal,
|
||
{
|
||
safeMode: false,
|
||
model: modelPointer,
|
||
prependCLISysprompt: true,
|
||
},
|
||
)
|
||
}
|
||
|
||
// Note: Use queryModel(pointer, ...) directly instead of these convenience functions
|
||
|
||
// Simplified query function using quick model pointer
|
||
export async function queryQuick({
|
||
systemPrompt = [],
|
||
userPrompt,
|
||
assistantPrompt,
|
||
enablePromptCaching = false,
|
||
signal,
|
||
}: {
|
||
systemPrompt?: string[]
|
||
userPrompt: string
|
||
assistantPrompt?: string
|
||
enablePromptCaching?: boolean
|
||
signal?: AbortSignal
|
||
}): Promise<AssistantMessage> {
|
||
const messages = [
|
||
{
|
||
message: { role: 'user', content: userPrompt },
|
||
type: 'user',
|
||
uuid: randomUUID(),
|
||
},
|
||
] as (UserMessage | AssistantMessage)[]
|
||
|
||
return queryModel('quick', messages, systemPrompt, signal)
|
||
}
|