Kode-cli/src/services/claude.ts

import '@anthropic-ai/sdk/shims/node'
import Anthropic, { APIConnectionError, APIError } from '@anthropic-ai/sdk'
import { AnthropicBedrock } from '@anthropic-ai/bedrock-sdk'
import { AnthropicVertex } from '@anthropic-ai/vertex-sdk'
import type { BetaUsage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
import chalk from 'chalk'
import { createHash, randomUUID, UUID } from 'crypto'
import 'dotenv/config'

import { addToTotalCost } from '../cost-tracker'
import models from '../constants/models'
import type { AssistantMessage, UserMessage } from '../query'
import { Tool } from '../Tool'
import {
  getAnthropicApiKey,
  getOrCreateUserID,
  getGlobalConfig,
  ModelProfile,
} from '../utils/config'
import { getProjectDocs } from '../context'
import { logError, SESSION_ID } from '../utils/log'
import { USER_AGENT } from '../utils/http'
import {
  createAssistantAPIErrorMessage,
  normalizeContentFromAPI,
} from '../utils/messages'
import { countTokens } from '../utils/tokens'
import { logEvent } from './statsig'
import { withVCR } from './vcr'
import {
  debug as debugLogger,
  markPhase,
  getCurrentRequest,
  logLLMInteraction,
  logSystemPromptConstruction,
  logErrorWithDiagnosis,
} from '../utils/debugLogger'
import {
  MessageContextManager,
  createRetentionStrategy,
} from '../utils/messageContextManager'
import { getModelManager } from '../utils/model'
import { zodToJsonSchema } from 'zod-to-json-schema'
import type { BetaMessageStream } from '@anthropic-ai/sdk/lib/BetaMessageStream.mjs'
import { ModelAdapterFactory } from './modelAdapterFactory'
import { UnifiedRequestParams } from '../types/modelCapabilities'
import { responseStateManager, getConversationId } from './responseStateManager'
import type { ToolUseContext } from '../Tool'
import type {
  Message as APIMessage,
  MessageParam,
  TextBlockParam,
} from '@anthropic-ai/sdk/resources/index.mjs'
import { USE_BEDROCK, USE_VERTEX } from '../utils/model'
import { getCLISyspromptPrefix } from '../constants/prompts'
import { getVertexRegionForModel } from '../utils/model'
import OpenAI from 'openai'
import type { ChatCompletionStream } from 'openai/lib/ChatCompletionStream'
import { ContentBlock } from '@anthropic-ai/sdk/resources/messages/messages'
import { nanoid } from 'nanoid'
import { getCompletionWithProfile, getGPT5CompletionWithProfile } from './openai'
import { getReasoningEffort } from '../utils/thinking'
import { generateSystemReminders } from './systemReminder'

// Helper function to check if a model is GPT-5
function isGPT5Model(modelName: string): boolean {
  return modelName.startsWith('gpt-5')
}

// Helper function to extract model configuration for debug logging
function getModelConfigForDebug(model: string): {
  modelName: string
  provider: string
  apiKeyStatus: 'configured' | 'missing' | 'invalid'
  baseURL?: string
  maxTokens?: number
  reasoningEffort?: string
  isStream?: boolean
  temperature?: number
} {
  const config = getGlobalConfig()
  const modelManager = getModelManager()

  // 🔧 Fix: Use ModelManager to get the actual current model profile
  const modelProfile = modelManager.getModel('main')

  let apiKeyStatus: 'configured' | 'missing' | 'invalid' = 'missing'
  let baseURL: string | undefined
  let maxTokens: number | undefined
  let reasoningEffort: string | undefined

  // 🔧 Fix: Use ModelProfile configuration exclusively
  if (modelProfile) {
    apiKeyStatus = modelProfile.apiKey ? 'configured' : 'missing'
    baseURL = modelProfile.baseURL
    maxTokens = modelProfile.maxTokens
    reasoningEffort = modelProfile.reasoningEffort
  } else {
    // 🚨 No ModelProfile available - this should not happen in modern system
    apiKeyStatus = 'missing'
    maxTokens = undefined
    reasoningEffort = undefined
  }

  return {
    modelName: model,
    provider: modelProfile?.provider || config.primaryProvider || 'anthropic',
    apiKeyStatus,
    baseURL,
    maxTokens,
    reasoningEffort,
    isStream: config.stream || false,
    temperature: MAIN_QUERY_TEMPERATURE,
  }
}

// KodeContext管理器 - 用于项目文档的同步缓存和访问
class KodeContextManager {
  private static instance: KodeContextManager
  private projectDocsCache: string = ''
  private cacheInitialized: boolean = false
  private initPromise: Promise<void> | null = null

  private constructor() {}

  public static getInstance(): KodeContextManager {
    if (!KodeContextManager.instance) {
      KodeContextManager.instance = new KodeContextManager()
    }
    return KodeContextManager.instance
  }

  public async initialize(): Promise<void> {
    if (this.cacheInitialized) return

    if (this.initPromise) {
      return this.initPromise
    }

    this.initPromise = this.loadProjectDocs()
    await this.initPromise
  }

  private async loadProjectDocs(): Promise<void> {
    try {
      const projectDocs = await getProjectDocs()
      this.projectDocsCache = projectDocs || ''
      this.cacheInitialized = true

      // 在调试模式下记录加载结果
      if (process.env.NODE_ENV === 'development') {
        console.log(
          `[KodeContext] Loaded ${this.projectDocsCache.length} characters from project docs`,
        )
      }
    } catch (error) {
      console.warn('[KodeContext] Failed to load project docs:', error)
      this.projectDocsCache = ''
      this.cacheInitialized = true
    }
  }

  public getKodeContext(): string {
    if (!this.cacheInitialized) {
      // 如果未初始化，异步初始化但立即返回空字符串
      this.initialize().catch(console.warn)
      return ''
    }
    return this.projectDocsCache
  }

  public async refreshCache(): Promise<void> {
    this.cacheInitialized = false
    this.initPromise = null
    await this.initialize()
  }
}

// 导出函数保持向后兼容
const kodeContextManager = KodeContextManager.getInstance()

// 在模块加载时异步初始化
kodeContextManager.initialize().catch(console.warn)

export const generateKodeContext = (): string => {
  return kodeContextManager.getKodeContext()
}

export const refreshKodeContext = async (): Promise<void> => {
  await kodeContextManager.refreshCache()
}

interface StreamResponse extends APIMessage {
  ttftMs?: number
}

export const API_ERROR_MESSAGE_PREFIX = 'API Error'
export const PROMPT_TOO_LONG_ERROR_MESSAGE = 'Prompt is too long'
export const CREDIT_BALANCE_TOO_LOW_ERROR_MESSAGE = 'Credit balance is too low'
export const INVALID_API_KEY_ERROR_MESSAGE =
  'Invalid API key · Please run /login'
export const NO_CONTENT_MESSAGE = '(no content)'
const PROMPT_CACHING_ENABLED = !process.env.DISABLE_PROMPT_CACHING

// @see https://docs.anthropic.com/en/docs/about-claude/models#model-comparison-table
const HAIKU_COST_PER_MILLION_INPUT_TOKENS = 0.8
const HAIKU_COST_PER_MILLION_OUTPUT_TOKENS = 4
const HAIKU_COST_PER_MILLION_PROMPT_CACHE_WRITE_TOKENS = 1
const HAIKU_COST_PER_MILLION_PROMPT_CACHE_READ_TOKENS = 0.08

const SONNET_COST_PER_MILLION_INPUT_TOKENS = 3
const SONNET_COST_PER_MILLION_OUTPUT_TOKENS = 15
const SONNET_COST_PER_MILLION_PROMPT_CACHE_WRITE_TOKENS = 3.75
const SONNET_COST_PER_MILLION_PROMPT_CACHE_READ_TOKENS = 0.3

export const MAIN_QUERY_TEMPERATURE = 1 // to get more variation for binary feedback

function getMetadata() {
  return {
    user_id: `${getOrCreateUserID()}_${SESSION_ID}`,
  }
}

const MAX_RETRIES = process.env.USER_TYPE === 'SWE_BENCH' ? 100 : 10
const BASE_DELAY_MS = 500

interface RetryOptions {
  maxRetries?: number
  signal?: AbortSignal
}

// Helper function to create an abortable delay
function abortableDelay(delayMs: number, signal?: AbortSignal): Promise<void> {
  return new Promise((resolve, reject) => {
    // Check if already aborted
    if (signal?.aborted) {
      reject(new Error('Request was aborted'))
      return
    }

    const timeoutId = setTimeout(() => {
      resolve()
    }, delayMs)

    // If signal is provided, listen for abort event
    if (signal) {
      const abortHandler = () => {
        clearTimeout(timeoutId)
        reject(new Error('Request was aborted'))
      }
      signal.addEventListener('abort', abortHandler, { once: true })
    }
  })
}

function getRetryDelay(
  attempt: number,
  retryAfterHeader?: string | null,
): number {
  if (retryAfterHeader) {
    const seconds = parseInt(retryAfterHeader, 10)
    if (!isNaN(seconds)) {
      return seconds * 1000
    }
  }
  return Math.min(BASE_DELAY_MS * Math.pow(2, attempt - 1), 32000) // Max 32s delay
}

function shouldRetry(error: APIError): boolean {
  // Check for overloaded errors first and only retry for SWE_BENCH
  if (error.message?.includes('"type":"overloaded_error"')) {
    return process.env.USER_TYPE === 'SWE_BENCH'
  }

  // Note this is not a standard header.
  const shouldRetryHeader = error.headers?.['x-should-retry']

  // If the server explicitly says whether or not to retry, obey.
  if (shouldRetryHeader === 'true') return true
  if (shouldRetryHeader === 'false') return false

  if (error instanceof APIConnectionError) {
    return true
  }

  if (!error.status) return false

  // Retry on request timeouts.
  if (error.status === 408) return true

  // Retry on lock timeouts.
  if (error.status === 409) return true

  // Retry on rate limits.
  if (error.status === 429) return true

  // Retry internal errors.
  if (error.status && error.status >= 500) return true

  return false
}

async function withRetry<T>(
  operation: (attempt: number) => Promise<T>,
  options: RetryOptions = {},
): Promise<T> {
  const maxRetries = options.maxRetries ?? MAX_RETRIES
  let lastError: unknown

  for (let attempt = 1; attempt <= maxRetries + 1; attempt++) {
    try {
      return await operation(attempt)
    } catch (error) {
      lastError = error
      // Only retry if the error indicates we should
      if (
        attempt > maxRetries ||
        !(error instanceof APIError) ||
        !shouldRetry(error)
      ) {
        throw error
      }
      // 🔧 CRITICAL FIX: Check abort signal BEFORE showing retry message
      if (options.signal?.aborted) {
        throw new Error('Request cancelled by user')
      }

      // Get retry-after header if available
      const retryAfter = error.headers?.['retry-after'] ?? null
      const delayMs = getRetryDelay(attempt, retryAfter)

      console.log(
        `  ⎿  ${chalk.red(`API ${error.name} (${error.message}) · Retrying in ${Math.round(delayMs / 1000)} seconds… (attempt ${attempt}/${maxRetries})`)}`,
      )

      logEvent('tengu_api_retry', {
        attempt: String(attempt),
        delayMs: String(delayMs),
        error: error.message,
        status: String(error.status),
        provider: USE_BEDROCK ? 'bedrock' : USE_VERTEX ? 'vertex' : '1p',
      })

      try {
        await abortableDelay(delayMs, options.signal)
      } catch (delayError) {
        // If aborted during delay, throw the error to stop retrying
        if (delayError.message === 'Request was aborted') {
          throw new Error('Request cancelled by user')
        }
        throw delayError
      }
    }
  }

  throw lastError
}

/**
 * Fetch available models from Anthropic API
 */
export async function fetchAnthropicModels(
  baseURL: string,
  apiKey: string,
): Promise<any[]> {
  try {
    // Use provided baseURL or default to official Anthropic API
    const modelsURL = baseURL
      ? `${baseURL.replace(/\/+$/, '')}/v1/models`
      : 'https://api.anthropic.com/v1/models'

    const response = await fetch(modelsURL, {
      method: 'GET',
      headers: {
        'x-api-key': apiKey,
        'anthropic-version': '2023-06-01',
        'User-Agent': USER_AGENT,
      },
    })

    if (!response.ok) {
      // Provide user-friendly error messages based on status code
      if (response.status === 401) {
        throw new Error(
          'Invalid API key. Please check your Anthropic API key and try again.',
        )
      } else if (response.status === 403) {
        throw new Error(
          'API key does not have permission to access models. Please check your API key permissions.',
        )
      } else if (response.status === 429) {
        throw new Error(
          'Too many requests. Please wait a moment and try again.',
        )
      } else if (response.status >= 500) {
        throw new Error(
          'Anthropic service is temporarily unavailable. Please try again later.',
        )
      } else {
        throw new Error(
          `Unable to connect to Anthropic API (${response.status}). Please check your internet connection and API key.`,
        )
      }
    }

    const data = await response.json()
    return data.data || []
  } catch (error) {
    // If it's already our custom error, pass it through
    if (
      (error instanceof Error && error.message.includes('API key')) ||
      (error instanceof Error && error.message.includes('Anthropic'))
    ) {
      throw error
    }

    // For network errors or other issues
    console.error('Failed to fetch Anthropic models:', error)
    throw new Error(
      'Unable to connect to Anthropic API. Please check your internet connection and try again.',
    )
  }
}

export async function verifyApiKey(
  apiKey: string,
  baseURL?: string,
  provider?: string,
): Promise<boolean> {
  if (!apiKey) {
    return false
  }

  // For non-Anthropic providers, use OpenAI-compatible verification
  if (provider && provider !== 'anthropic') {
    try {
      const headers: Record<string, string> = {
        Authorization: `Bearer ${apiKey}`,
        'Content-Type': 'application/json',
      }

      // 🔧 Fix: Proper URL construction for verification
      if (!baseURL) {
        console.warn(
          'No baseURL provided for non-Anthropic provider verification',
        )
        return false
      }

      const modelsURL = `${baseURL.replace(/\/+$/, '')}/models`

      const response = await fetch(modelsURL, {
        method: 'GET',
        headers,
      })

      return response.ok
    } catch (error) {
      console.warn('API verification failed for non-Anthropic provider:', error)
      return false
    }
  }

  // For Anthropic and Anthropic-compatible APIs
  const clientConfig: any = {
    apiKey,
    dangerouslyAllowBrowser: true,
    maxRetries: 3,
    defaultHeaders: {
      'User-Agent': USER_AGENT,
    },
  }

  // Only add baseURL for true Anthropic-compatible APIs
  if (
    baseURL &&
    (provider === 'anthropic' ||
      provider === 'bigdream' ||
      provider === 'opendev')
  ) {
    clientConfig.baseURL = baseURL
  }

  const anthropic = new Anthropic(clientConfig)

  try {
    await withRetry(
      async () => {
        const model = 'claude-sonnet-4-20250514'
        const messages: MessageParam[] = [{ role: 'user', content: 'test' }]
        await anthropic.messages.create({
          model,
          max_tokens: 1000, // Simple test token limit for API verification
          messages,
          temperature: 0,
          metadata: getMetadata(),
        })
        return true
      },
      { maxRetries: 2 }, // Use fewer retries for API key verification
    )
    return true
  } catch (error) {
    logError(error)
    // Check for authentication error
    if (
      error instanceof Error &&
      error.message.includes(
        '{"type":"error","error":{"type":"authentication_error","message":"invalid x-api-key"}}',
      )
    ) {
      return false
    }
    throw error
  }
}

function convertAnthropicMessagesToOpenAIMessages(
  messages: (UserMessage | AssistantMessage)[],
): (
  | OpenAI.ChatCompletionMessageParam
  | OpenAI.ChatCompletionToolMessageParam
)[] {
  const openaiMessages: (
    | OpenAI.ChatCompletionMessageParam
    | OpenAI.ChatCompletionToolMessageParam
  )[] = []

  const toolResults: Record<string, OpenAI.ChatCompletionToolMessageParam> = {}

  for (const message of messages) {
    let contentBlocks = []
    if (typeof message.message.content === 'string') {
      contentBlocks = [
        {
          type: 'text',
          text: message.message.content,
        },
      ]
    } else if (!Array.isArray(message.message.content)) {
      contentBlocks = [message.message.content]
    } else {
      contentBlocks = message.message.content
    }

    for (const block of contentBlocks) {
      if (block.type === 'text') {
        openaiMessages.push({
          role: message.message.role,
          content: block.text,
        })
      } else if (block.type === 'tool_use') {
        openaiMessages.push({
          role: 'assistant',
          content: undefined,
          tool_calls: [
            {
              type: 'function',
              function: {
                name: block.name,
                arguments: JSON.stringify(block.input),
              },
              id: block.id,
            },
          ],
        })
      } else if (block.type === 'tool_result') {
        // Ensure content is always a string for role:tool messages
        let toolContent = block.content
        if (typeof toolContent !== 'string') {
          // Convert content to string if it's not already
          toolContent = JSON.stringify(toolContent)
        }

        toolResults[block.tool_use_id] = {
          role: 'tool',
          content: toolContent,
          tool_call_id: block.tool_use_id,
        }
      }
    }
  }

  const finalMessages: (
    | OpenAI.ChatCompletionMessageParam
    | OpenAI.ChatCompletionToolMessageParam
  )[] = []

  for (const message of openaiMessages) {
    finalMessages.push(message)

    if ('tool_calls' in message && message.tool_calls) {
      for (const toolCall of message.tool_calls) {
        if (toolResults[toolCall.id]) {
          finalMessages.push(toolResults[toolCall.id])
        }
      }
    }
  }

  return finalMessages
}

function messageReducer(
  previous: OpenAI.ChatCompletionMessage,
  item: OpenAI.ChatCompletionChunk,
): OpenAI.ChatCompletionMessage {
  const reduce = (acc: any, delta: OpenAI.ChatCompletionChunk.Choice.Delta) => {
    acc = { ...acc }
    for (const [key, value] of Object.entries(delta)) {
      if (acc[key] === undefined || acc[key] === null) {
        acc[key] = value
        //  OpenAI.Chat.Completions.ChatCompletionMessageToolCall does not have a key, .index
        if (Array.isArray(acc[key])) {
          for (const arr of acc[key]) {
            delete arr.index
          }
        }
      } else if (typeof acc[key] === 'string' && typeof value === 'string') {
        acc[key] += value
      } else if (typeof acc[key] === 'number' && typeof value === 'number') {
        acc[key] = value
      } else if (Array.isArray(acc[key]) && Array.isArray(value)) {
        const accArray = acc[key]
        for (let i = 0; i < value.length; i++) {
          const { index, ...chunkTool } = value[i]
          if (index - accArray.length > 1) {
            throw new Error(
              `Error: An array has an empty value when tool_calls are constructed. tool_calls: ${accArray}; tool: ${value}`,
            )
          }
          accArray[index] = reduce(accArray[index], chunkTool)
        }
      } else if (typeof acc[key] === 'object' && typeof value === 'object') {
        acc[key] = reduce(acc[key], value)
      }
    }
    return acc
  }

  const choice = item.choices?.[0]
  if (!choice) {
    // chunk contains information about usage and token counts
    return previous
  }
  return reduce(previous, choice.delta) as OpenAI.ChatCompletionMessage
}
async function handleMessageStream(
  stream: ChatCompletionStream,
  signal?: AbortSignal, // 🔧 Add AbortSignal support to stream handler
): Promise<OpenAI.ChatCompletion> {
  const streamStartTime = Date.now()
  let ttftMs: number | undefined
  let chunkCount = 0
  let errorCount = 0

  debugLogger.api('OPENAI_STREAM_START', {
    streamStartTime: String(streamStartTime),
  })

  let message = {} as OpenAI.ChatCompletionMessage

  let id, model, created, object, usage
  try {
    for await (const chunk of stream) {
      // 🔧 CRITICAL FIX: Check abort signal in OpenAI streaming loop
      if (signal?.aborted) {
        debugLogger.flow('OPENAI_STREAM_ABORTED', {
          chunkCount,
          timestamp: Date.now()
        })
        throw new Error('Request was cancelled')
      }

      chunkCount++

      try {
        if (!id) {
          id = chunk.id
          debugLogger.api('OPENAI_STREAM_ID_RECEIVED', {
            id,
            chunkNumber: String(chunkCount),
          })
        }
        if (!model) {
          model = chunk.model
          debugLogger.api('OPENAI_STREAM_MODEL_RECEIVED', {
            model,
            chunkNumber: String(chunkCount),
          })
        }
        if (!created) {
          created = chunk.created
        }
        if (!object) {
          object = chunk.object
        }
        if (!usage) {
          usage = chunk.usage
        }

        message = messageReducer(message, chunk)

        if (chunk?.choices?.[0]?.delta?.content) {
          if (!ttftMs) {
            ttftMs = Date.now() - streamStartTime
            debugLogger.api('OPENAI_STREAM_FIRST_TOKEN', {
              ttftMs: String(ttftMs),
              chunkNumber: String(chunkCount),
            })
          }
        }
      } catch (chunkError) {
        errorCount++
        debugLogger.error('OPENAI_STREAM_CHUNK_ERROR', {
          chunkNumber: String(chunkCount),
          errorMessage:
            chunkError instanceof Error
              ? chunkError.message
              : String(chunkError),
          errorType:
            chunkError instanceof Error
              ? chunkError.constructor.name
              : typeof chunkError,
        })
        // Continue processing other chunks
      }
    }

    debugLogger.api('OPENAI_STREAM_COMPLETE', {
      totalChunks: String(chunkCount),
      errorCount: String(errorCount),
      totalDuration: String(Date.now() - streamStartTime),
      ttftMs: String(ttftMs || 0),
      finalMessageId: id || 'undefined',
    })
  } catch (streamError) {
    debugLogger.error('OPENAI_STREAM_FATAL_ERROR', {
      totalChunks: String(chunkCount),
      errorCount: String(errorCount),
      errorMessage:
        streamError instanceof Error
          ? streamError.message
          : String(streamError),
      errorType:
        streamError instanceof Error
          ? streamError.constructor.name
          : typeof streamError,
    })
    throw streamError
  }
  return {
    id,
    created,
    model,
    object,
    choices: [
      {
        index: 0,
        message,
        finish_reason: 'stop',
        logprobs: undefined,
      },
    ],
    usage,
  }
}

function convertOpenAIResponseToAnthropic(response: OpenAI.ChatCompletion) {
  let contentBlocks: ContentBlock[] = []
  const message = response.choices?.[0]?.message
  if (!message) {
    logEvent('weird_response', {
      response: JSON.stringify(response),
    })
    return {
      role: 'assistant',
      content: [],
      stop_reason: response.choices?.[0]?.finish_reason,
      type: 'message',
      usage: response.usage,
    }
  }

  if (message?.tool_calls) {
    for (const toolCall of message.tool_calls) {
      const tool = toolCall.function
      const toolName = tool.name
      let toolArgs = {}
      try {
        toolArgs = JSON.parse(tool.arguments)
      } catch (e) {
        // console.log(e)
      }

      contentBlocks.push({
        type: 'tool_use',
        input: toolArgs,
        name: toolName,
        id: toolCall.id?.length > 0 ? toolCall.id : nanoid(),
      })
    }
  }

  if ((message as any).reasoning) {
    contentBlocks.push({
      type: 'thinking',
      thinking: (message as any).reasoning,
      signature: '',
    })
  }

  // NOTE: For deepseek api, the key for its returned reasoning process is reasoning_content
  if ((message as any).reasoning_content) {
    contentBlocks.push({
      type: 'thinking',
      thinking: (message as any).reasoning_content,
      signature: '',
    })
  }

  if (message.content) {
    contentBlocks.push({
      type: 'text',
      text: message?.content,
      citations: [],
    })
  }

  const finalMessage = {
    role: 'assistant',
    content: contentBlocks,
    stop_reason: response.choices?.[0]?.finish_reason,
    type: 'message',
    usage: response.usage,
  }

  return finalMessage
}

let anthropicClient: Anthropic | AnthropicBedrock | AnthropicVertex | null =
  null

/**
 * Get the Anthropic client, creating it if it doesn't exist
 */
export function getAnthropicClient(
  model?: string,
): Anthropic | AnthropicBedrock | AnthropicVertex {
  const config = getGlobalConfig()
  const provider = config.primaryProvider

  // Reset client if provider has changed to ensure correct configuration
  if (anthropicClient && provider) {
    // Always recreate client for provider-specific configurations
    anthropicClient = null
  }

  if (anthropicClient) {
    return anthropicClient
  }

  const region = getVertexRegionForModel(model)

  const defaultHeaders: { [key: string]: string } = {
    'x-app': 'cli',
    'User-Agent': USER_AGENT,
  }
  if (process.env.ANTHROPIC_AUTH_TOKEN) {
    defaultHeaders['Authorization'] =
      `Bearer ${process.env.ANTHROPIC_AUTH_TOKEN}`
  }

  const ARGS = {
    defaultHeaders,
    maxRetries: 0, // Disabled auto-retry in favor of manual implementation
    timeout: parseInt(process.env.API_TIMEOUT_MS || String(60 * 1000), 10),
  }
  if (USE_BEDROCK) {
    const client = new AnthropicBedrock(ARGS)
    anthropicClient = client
    return client
  }
  if (USE_VERTEX) {
    const vertexArgs = {
      ...ARGS,
      region: region || process.env.CLOUD_ML_REGION || 'us-east5',
    }
    const client = new AnthropicVertex(vertexArgs)
    anthropicClient = client
    return client
  }

  // Get appropriate API key and baseURL from ModelProfile
  const modelManager = getModelManager()
  const modelProfile = modelManager.getModel('main')

  let apiKey: string
  let baseURL: string | undefined

  if (modelProfile) {
    apiKey = modelProfile.apiKey || ''
    baseURL = modelProfile.baseURL
  } else {
    // Fallback to default anthropic if no ModelProfile
    apiKey = getAnthropicApiKey()
    baseURL = undefined
  }

  if (process.env.USER_TYPE === 'ant' && !apiKey && provider === 'anthropic') {
    console.error(
      chalk.red(
        '[ANT-ONLY] Please set the ANTHROPIC_API_KEY environment variable to use the CLI. To create a new key, go to https://console.anthropic.com/settings/keys.',
      ),
    )
  }

  // Create client with custom baseURL for BigDream/OpenDev
  // Anthropic SDK will append the appropriate paths (like /v1/messages)
  const clientConfig = {
    apiKey,
    dangerouslyAllowBrowser: true,
    ...ARGS,
    ...(baseURL && { baseURL }), // Use baseURL directly, SDK will handle API versioning
  }

  anthropicClient = new Anthropic(clientConfig)
  return anthropicClient
}

/**
 * Reset the Anthropic client to null, forcing a new client to be created on next use
 */
export function resetAnthropicClient(): void {
  anthropicClient = null
}

/**
 * Environment variables for different client types:
 *
 * Direct API:
 * - ANTHROPIC_API_KEY: Required for direct API access
 *
 * AWS Bedrock:
 * - AWS credentials configured via aws-sdk defaults
 *
 * Vertex AI:
 * - Model-specific region variables (highest priority):
 *   - VERTEX_REGION_CLAUDE_3_5_HAIKU: Region for Claude 3.5 Haiku model
 *   - VERTEX_REGION_CLAUDE_3_5_SONNET: Region for Claude 3.5 Sonnet model
 *   - VERTEX_REGION_CLAUDE_3_7_SONNET: Region for Claude 3.7 Sonnet model
 * - CLOUD_ML_REGION: Optional. The default GCP region to use for all models
 *   If specific model region not specified above
 * - ANTHROPIC_VERTEX_PROJECT_ID: Required. Your GCP project ID
 * - Standard GCP credentials configured via google-auth-library
 *
 * Priority for determining region:
 * 1. Hardcoded model-specific environment variables
 * 2. Global CLOUD_ML_REGION variable
 * 3. Default region from config
 * 4. Fallback region (us-east5)
 */

export function userMessageToMessageParam(
  message: UserMessage,
  addCache = false,
): MessageParam {
  if (addCache) {
    if (typeof message.message.content === 'string') {
      return {
        role: 'user',
        content: [
          {
            type: 'text',
            text: message.message.content,
            ...(PROMPT_CACHING_ENABLED
              ? { cache_control: { type: 'ephemeral' } }
              : {}),
          },
        ],
      }
    } else {
      return {
        role: 'user',
        content: message.message.content.map((_, i) => ({
          ..._,
          ...(i === message.message.content.length - 1
            ? PROMPT_CACHING_ENABLED
              ? { cache_control: { type: 'ephemeral' } }
              : {}
            : {}),
        })),
      }
    }
  }
  return {
    role: 'user',
    content: message.message.content,
  }
}

export function assistantMessageToMessageParam(
  message: AssistantMessage,
  addCache = false,
): MessageParam {
  if (addCache) {
    if (typeof message.message.content === 'string') {
      return {
        role: 'assistant',
        content: [
          {
            type: 'text',
            text: message.message.content,
            ...(PROMPT_CACHING_ENABLED
              ? { cache_control: { type: 'ephemeral' } }
              : {}),
          },
        ],
      }
    } else {
      return {
        role: 'assistant',
        content: message.message.content.map((_, i) => ({
          ..._,
          ...(i === message.message.content.length - 1 &&
          _.type !== 'thinking' &&
          _.type !== 'redacted_thinking'
            ? PROMPT_CACHING_ENABLED
              ? { cache_control: { type: 'ephemeral' } }
              : {}
            : {}),
        })),
      }
    }
  }
  return {
    role: 'assistant',
    content: message.message.content,
  }
}

function splitSysPromptPrefix(systemPrompt: string[]): string[] {
  // split out the first block of the system prompt as the "prefix" for API
  // to match on in https://console.statsig.com/4aF3Ewatb6xPVpCwxb5nA3/dynamic_configs/claude_cli_system_prompt_prefixes
  const systemPromptFirstBlock = systemPrompt[0] || ''
  const systemPromptRest = systemPrompt.slice(1)
  return [systemPromptFirstBlock, systemPromptRest.join('\n')].filter(Boolean)
}

export async function queryLLM(
  messages: (UserMessage | AssistantMessage)[],
  systemPrompt: string[],
  maxThinkingTokens: number,
  tools: Tool[],
  signal: AbortSignal,
  options: {
    safeMode: boolean
    model: string | import('../utils/config').ModelPointerType
    prependCLISysprompt: boolean
    toolUseContext?: ToolUseContext
  },
): Promise<AssistantMessage> {
  // 🔧 统一的模型解析：支持指针、model ID 和真实模型名称
  const modelManager = getModelManager()
  const modelResolution = modelManager.resolveModelWithInfo(options.model)

  if (!modelResolution.success || !modelResolution.profile) {
    throw new Error(
      modelResolution.error || `Failed to resolve model: ${options.model}`,
    )
  }

  const modelProfile = modelResolution.profile
  const resolvedModel = modelProfile.modelName

  // Initialize response state if toolUseContext is provided
  const toolUseContext = options.toolUseContext
  if (toolUseContext && !toolUseContext.responseState) {
    const conversationId = getConversationId(toolUseContext.agentId, toolUseContext.messageId)
    const previousResponseId = responseStateManager.getPreviousResponseId(conversationId)

    toolUseContext.responseState = {
      previousResponseId,
      conversationId
    }
  }

  debugLogger.api('MODEL_RESOLVED', {
    inputParam: options.model,
    resolvedModelName: resolvedModel,
    provider: modelProfile.provider,
    isPointer: ['main', 'task', 'reasoning', 'quick'].includes(options.model),
    hasResponseState: !!toolUseContext?.responseState,
    conversationId: toolUseContext?.responseState?.conversationId,
    requestId: getCurrentRequest()?.id,
  })

  const currentRequest = getCurrentRequest()
  debugLogger.api('LLM_REQUEST_START', {
    messageCount: messages.length,
    systemPromptLength: systemPrompt.join(' ').length,
    toolCount: tools.length,
    model: resolvedModel,
    originalModelParam: options.model,
    requestId: getCurrentRequest()?.id,
  })

  markPhase('LLM_CALL')

  try {
    const result = await withVCR(messages, () =>
      queryLLMWithPromptCaching(
        messages,
        systemPrompt,
        maxThinkingTokens,
        tools,
        signal,
        { ...options, model: resolvedModel, modelProfile, toolUseContext }, // Pass resolved ModelProfile and toolUseContext
      ),
    )

    debugLogger.api('LLM_REQUEST_SUCCESS', {
      costUSD: result.costUSD,
      durationMs: result.durationMs,
      responseLength: result.message.content?.length || 0,
      requestId: getCurrentRequest()?.id,
    })

    // Update response state for GPT-5 Responses API continuation
    if (toolUseContext?.responseState?.conversationId && result.responseId) {
      responseStateManager.setPreviousResponseId(
        toolUseContext.responseState.conversationId,
        result.responseId
      )

      debugLogger.api('RESPONSE_STATE_UPDATED', {
        conversationId: toolUseContext.responseState.conversationId,
        responseId: result.responseId,
        requestId: getCurrentRequest()?.id,
      })
    }

    return result
  } catch (error) {
    // 使用错误诊断系统记录 LLM 相关错误
    logErrorWithDiagnosis(
      error,
      {
        messageCount: messages.length,
        systemPromptLength: systemPrompt.join(' ').length,
        model: options.model,
        toolCount: tools.length,
        phase: 'LLM_CALL',
      },
      currentRequest?.id,
    )

    throw error
  }
}

export function formatSystemPromptWithContext(
  systemPrompt: string[],
  context: { [k: string]: string },
  agentId?: string,
  skipContextReminders = false, // Parameter kept for API compatibility but not used anymore
): { systemPrompt: string[]; reminders: string } {
  // 构建增强的系统提示 - 对齐官方 Claude Code 直接注入方式
  const enhancedPrompt = [...systemPrompt]
  let reminders = ''

  // Step 0: Add GPT-5 Agent persistence support for coding tasks
  const modelManager = getModelManager()
  const modelProfile = modelManager.getModel('main')
  if (modelProfile && isGPT5Model(modelProfile.modelName)) {
    // Add coding-specific persistence instructions based on GPT-5 documentation
    const persistencePrompts = [
      "\n# Agent Persistence for Long-Running Coding Tasks",
      "You are working on a coding project that may involve multiple steps and iterations. Please maintain context and continuity throughout the session:",
      "- Remember architectural decisions and design patterns established earlier",
      "- Keep track of file modifications and their relationships",
      "- Maintain awareness of the overall project structure and goals",
      "- Reference previous implementations when making related changes",
      "- Ensure consistency with existing code style and conventions",
      "- Build incrementally on previous work rather than starting from scratch"
    ]
    enhancedPrompt.push(...persistencePrompts)
  }

  // 只有当上下文存在时才处理
  const hasContext = Object.entries(context).length > 0

  if (hasContext) {
    // 步骤1: 直接注入 Kode 上下文到系统提示 - 对齐官方设计
    if (!skipContextReminders) {
      const kodeContext = generateKodeContext()
      if (kodeContext) {
        // 添加分隔符和标识，使项目文档在系统提示中更清晰
        enhancedPrompt.push('\n---\n# 项目上下文\n')
        enhancedPrompt.push(kodeContext)
        enhancedPrompt.push('\n---\n')
      }
    }

    // 步骤2: 生成其他动态提醒返回给调用方 - 保持现有动态提醒功能
    const reminderMessages = generateSystemReminders(hasContext, agentId)
    if (reminderMessages.length > 0) {
      reminders = reminderMessages.map(r => r.content).join('\n') + '\n'
    }

    // 步骤3: 添加其他上下文到系统提示
    enhancedPrompt.push(
      `\nAs you answer the user's questions, you can use the following context:\n`,
    )

    // 过滤掉已经由 Kode 上下文处理的项目文档（避免重复）
    const filteredContext = Object.fromEntries(
      Object.entries(context).filter(
        ([key]) => key !== 'projectDocs' && key !== 'userDocs',
      ),
    )

    enhancedPrompt.push(
      ...Object.entries(filteredContext).map(
        ([key, value]) => `<context name="${key}">${value}</context>`,
      ),
    )
  }

  return { systemPrompt: enhancedPrompt, reminders }
}

async function queryLLMWithPromptCaching(
  messages: (UserMessage | AssistantMessage)[],
  systemPrompt: string[],
  maxThinkingTokens: number,
  tools: Tool[],
  signal: AbortSignal,
  options: {
    safeMode: boolean
    model: string
    prependCLISysprompt: boolean
    modelProfile?: ModelProfile | null
    toolUseContext?: ToolUseContext
  },
): Promise<AssistantMessage> {
  const config = getGlobalConfig()
  const modelManager = getModelManager()
  const toolUseContext = options.toolUseContext

  // 🔧 Fix: 使用传入的ModelProfile，而不是硬编码的'main'指针
  const modelProfile = options.modelProfile || modelManager.getModel('main')
  let provider: string

  if (modelProfile) {
    provider = modelProfile.provider || config.primaryProvider || 'anthropic'
  } else {
    provider = config.primaryProvider || 'anthropic'
  }

  // Use native Anthropic SDK for Anthropic and some Anthropic-compatible providers
  if (
    provider === 'anthropic' ||
    provider === 'bigdream' ||
    provider === 'opendev'
  ) {
    return queryAnthropicNative(
      messages,
      systemPrompt,
      maxThinkingTokens,
      tools,
      signal,
      { ...options, modelProfile, toolUseContext },
    )
  }

  // Use OpenAI-compatible interface for all other providers
  return queryOpenAI(messages, systemPrompt, maxThinkingTokens, tools, signal, {
    ...options,
    modelProfile,
    toolUseContext,
  })
}

async function queryAnthropicNative(
  messages: (UserMessage | AssistantMessage)[],
  systemPrompt: string[],
  maxThinkingTokens: number,
  tools: Tool[],
  signal: AbortSignal,
  options?: {
    safeMode: boolean
    model: string
    prependCLISysprompt: boolean
    modelProfile?: ModelProfile | null
    toolUseContext?: ToolUseContext
  },
): Promise<AssistantMessage> {
  const config = getGlobalConfig()
  const modelManager = getModelManager()
  const toolUseContext = options?.toolUseContext

  // 🔧 Fix: 使用传入的ModelProfile，而不是硬编码的'main'指针
  const modelProfile = options?.modelProfile || modelManager.getModel('main')
  let anthropic: Anthropic | AnthropicBedrock | AnthropicVertex
  let model: string
  let provider: string

  // 🔍 Debug: 记录模型配置详情
  debugLogger.api('MODEL_CONFIG_ANTHROPIC', {
    modelProfileFound: !!modelProfile,
    modelProfileId: modelProfile?.modelName,
    modelProfileName: modelProfile?.name,
    modelProfileModelName: modelProfile?.modelName,
    modelProfileProvider: modelProfile?.provider,
    modelProfileBaseURL: modelProfile?.baseURL,
    modelProfileApiKeyExists: !!modelProfile?.apiKey,
    optionsModel: options?.model,
    requestId: getCurrentRequest()?.id,
  })

  if (modelProfile) {
    // 使用ModelProfile的完整配置
    model = modelProfile.modelName
    provider = modelProfile.provider || config.primaryProvider || 'anthropic'

    // 基于ModelProfile创建专用的API客户端
    if (
      modelProfile.provider === 'anthropic' ||
      modelProfile.provider === 'bigdream' ||
      modelProfile.provider === 'opendev'
    ) {
      const clientConfig: any = {
        apiKey: modelProfile.apiKey,
        dangerouslyAllowBrowser: true,
        maxRetries: 0,
        timeout: parseInt(process.env.API_TIMEOUT_MS || String(60 * 1000), 10),
        defaultHeaders: {
          'x-app': 'cli',
          'User-Agent': USER_AGENT,
        },
      }

      // 使用ModelProfile的baseURL而不是全局配置
      if (modelProfile.baseURL) {
        clientConfig.baseURL = modelProfile.baseURL
      }

      anthropic = new Anthropic(clientConfig)
    } else {
      // 其他提供商的处理逻辑
      anthropic = getAnthropicClient(model)
    }
  } else {
    // 🚨 降级：没有有效的ModelProfile时，应该抛出错误
    const errorDetails = {
      modelProfileExists: !!modelProfile,
      modelProfileModelName: modelProfile?.modelName,
      requestedModel: options?.model,
      requestId: getCurrentRequest()?.id,
    }
    debugLogger.error('ANTHROPIC_FALLBACK_ERROR', errorDetails)
    throw new Error(
      `No valid ModelProfile available for Anthropic provider. Please configure model through /model command. Debug: ${JSON.stringify(errorDetails)}`,
    )
  }

  // Prepend system prompt block for easy API identification
  if (options?.prependCLISysprompt) {
    // Log stats about first block for analyzing prefix matching config
    const [firstSyspromptBlock] = splitSysPromptPrefix(systemPrompt)
    logEvent('tengu_sysprompt_block', {
      snippet: firstSyspromptBlock?.slice(0, 20),
      length: String(firstSyspromptBlock?.length ?? 0),
      hash: firstSyspromptBlock
        ? createHash('sha256').update(firstSyspromptBlock).digest('hex')
        : '',
    })

    systemPrompt = [getCLISyspromptPrefix(), ...systemPrompt]
  }

  const system: TextBlockParam[] = splitSysPromptPrefix(systemPrompt).map(
    _ => ({
      ...(PROMPT_CACHING_ENABLED
        ? { cache_control: { type: 'ephemeral' } }
        : {}),
      text: _,
      type: 'text',
    }),
  )

  const toolSchemas = tools.map(
    tool =>
      ({
        name: tool.name,
        description: tool.description,
        input_schema: zodToJsonSchema(tool.inputSchema),
      }) as unknown as Anthropic.Beta.Messages.BetaTool,
  )

  const anthropicMessages = addCacheBreakpoints(messages)
  const startIncludingRetries = Date.now()

  // 记录系统提示构建过程
  logSystemPromptConstruction({
    basePrompt: systemPrompt.join('\n'),
    kodeContext: generateKodeContext() || '',
    reminders: [], // 这里可以从 generateSystemReminders 获取
    finalPrompt: systemPrompt.join('\n'),
  })

  let start = Date.now()
  let attemptNumber = 0
  let response

  try {
    response = await withRetry(async attempt => {
      attemptNumber = attempt
      start = Date.now()

      const params: Anthropic.Beta.Messages.MessageCreateParams = {
        model,
        max_tokens: getMaxTokensFromProfile(modelProfile),
        messages: anthropicMessages,
        system,
        tools: toolSchemas.length > 0 ? toolSchemas : undefined,
        tool_choice: toolSchemas.length > 0 ? { type: 'auto' } : undefined,
      }

      if (maxThinkingTokens > 0) {
        ;(params as any).extra_headers = {
          'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15',
        }
        ;(params as any).thinking = { max_tokens: maxThinkingTokens }
      }

      // 🔥 REAL-TIME API CALL DEBUG - 使用全局日志系统 (Anthropic Streaming)
      debugLogger.api('ANTHROPIC_API_CALL_START_STREAMING', {
        endpoint: modelProfile?.baseURL || 'DEFAULT_ANTHROPIC',
        model,
        provider,
        apiKeyConfigured: !!modelProfile?.apiKey,
        apiKeyPrefix: modelProfile?.apiKey
          ? modelProfile.apiKey.substring(0, 8)
          : null,
        maxTokens: params.max_tokens,
        temperature: MAIN_QUERY_TEMPERATURE,
        messageCount: params.messages?.length || 0,
        streamMode: true,
        toolsCount: toolSchemas.length,
        thinkingTokens: maxThinkingTokens,
        timestamp: new Date().toISOString(),
        modelProfileId: modelProfile?.modelName,
        modelProfileName: modelProfile?.name,
      })

      if (config.stream) {
        // 🔧 CRITICAL FIX: Connect AbortSignal to Anthropic API call
        const stream = await anthropic.beta.messages.create({
          ...params,
          stream: true,
        }, {
          signal: signal // ← CRITICAL: Connect the AbortSignal to API call
        })

        let finalResponse: any | null = null
        let messageStartEvent: any = null
        const contentBlocks: any[] = []
        let usage: any = null
        let stopReason: string | null = null
        let stopSequence: string | null = null

        for await (const event of stream) {
          // 🔧 CRITICAL FIX: Check abort signal in streaming loop
          if (signal.aborted) {
            debugLogger.flow('STREAM_ABORTED', {
              eventType: event.type,
              timestamp: Date.now()
            })
            throw new Error('Request was cancelled')
          }
          if (event.type === 'message_start') {
            messageStartEvent = event
            finalResponse = {
              ...event.message,
              content: [], // Will be populated from content blocks
            }
          } else if (event.type === 'content_block_start') {
            contentBlocks[event.index] = { ...event.content_block }
          } else if (event.type === 'content_block_delta') {
            if (!contentBlocks[event.index]) {
              contentBlocks[event.index] = {
                type: event.delta.type === 'text_delta' ? 'text' : 'unknown',
                text: '',
              }
            }
            if (event.delta.type === 'text_delta') {
              contentBlocks[event.index].text += event.delta.text
            }
          } else if (event.type === 'message_delta') {
            if (event.delta.stop_reason) stopReason = event.delta.stop_reason
            if (event.delta.stop_sequence)
              stopSequence = event.delta.stop_sequence
            if (event.usage) usage = { ...usage, ...event.usage }
          } else if (event.type === 'message_stop') {
            break
          }
        }

        if (!finalResponse || !messageStartEvent) {
          throw new Error('Stream ended without proper message structure')
        }

        // Construct the final response
        finalResponse = {
          ...messageStartEvent.message,
          content: contentBlocks.filter(Boolean),
          stop_reason: stopReason,
          stop_sequence: stopSequence,
          usage: {
            ...messageStartEvent.message.usage,
            ...usage,
          },
        }

        return finalResponse
      } else {
        // 🔥 REAL-TIME API CALL DEBUG - 使用全局日志系统 (Anthropic Non-Streaming)
        debugLogger.api('ANTHROPIC_API_CALL_START_NON_STREAMING', {
          endpoint: modelProfile?.baseURL || 'DEFAULT_ANTHROPIC',
          model,
          provider,
          apiKeyConfigured: !!modelProfile?.apiKey,
          apiKeyPrefix: modelProfile?.apiKey
            ? modelProfile.apiKey.substring(0, 8)
            : null,
          maxTokens: params.max_tokens,
          temperature: MAIN_QUERY_TEMPERATURE,
          messageCount: params.messages?.length || 0,
          streamMode: false,
          toolsCount: toolSchemas.length,
          thinkingTokens: maxThinkingTokens,
          timestamp: new Date().toISOString(),
          modelProfileId: modelProfile?.modelName,
          modelProfileName: modelProfile?.name,
        })

        // 🔧 CRITICAL FIX: Connect AbortSignal to non-streaming API call
        return await anthropic.beta.messages.create(params, {
          signal: signal // ← CRITICAL: Connect the AbortSignal to API call
        })
      }
    }, { signal }) // 🔧 CRITICAL FIX: Pass AbortSignal to withRetry

    const ttftMs = start - Date.now()
    const durationMs = Date.now() - startIncludingRetries

    const content = response.content.map((block: ContentBlock) => {
      if (block.type === 'text') {
        return {
          type: 'text' as const,
          text: block.text,
        }
      } else if (block.type === 'tool_use') {
        return {
          type: 'tool_use' as const,
          id: block.id,
          name: block.name,
          input: block.input,
        }
      }
      return block
    })

    const assistantMessage: AssistantMessage = {
      message: {
        id: response.id,
        content,
        model: response.model,
        role: 'assistant',
        stop_reason: response.stop_reason,
        stop_sequence: response.stop_sequence,
        type: 'message',
        usage: response.usage,
      },
      type: 'assistant',
      uuid: nanoid() as UUID,
      durationMs,
      costUSD: 0, // Will be calculated below
    }

    // 记录完整的 LLM 交互调试信息 (Anthropic path)
    // 注意：Anthropic API将system prompt和messages分开，这里重构为完整的API调用视图
    const systemMessages = system.map(block => ({
      role: 'system',
      content: block.text,
    }))

    logLLMInteraction({
      systemPrompt: systemPrompt.join('\n'),
      messages: [...systemMessages, ...anthropicMessages],
      response: response,
      usage: response.usage
        ? {
            inputTokens: response.usage.input_tokens,
            outputTokens: response.usage.output_tokens,
          }
        : undefined,
      timing: {
        start: start,
        end: Date.now(),
      },
      apiFormat: 'anthropic',
    })

    // Calculate cost using native Anthropic usage data
    const inputTokens = response.usage.input_tokens
    const outputTokens = response.usage.output_tokens
    const cacheCreationInputTokens =
      response.usage.cache_creation_input_tokens ?? 0
    const cacheReadInputTokens = response.usage.cache_read_input_tokens ?? 0

    const costUSD =
      (inputTokens / 1_000_000) * getModelInputTokenCostUSD(model) +
      (outputTokens / 1_000_000) * getModelOutputTokenCostUSD(model) +
      (cacheCreationInputTokens / 1_000_000) *
        getModelInputTokenCostUSD(model) +
      (cacheReadInputTokens / 1_000_000) *
        (getModelInputTokenCostUSD(model) * 0.1) // Cache reads are 10% of input cost

    assistantMessage.costUSD = costUSD
    addToTotalCost(costUSD, durationMs)

    logEvent('api_response_anthropic_native', {
      model,
      input_tokens: String(inputTokens),
      output_tokens: String(outputTokens),
      cache_creation_input_tokens: String(cacheCreationInputTokens),
      cache_read_input_tokens: String(cacheReadInputTokens),
      cost_usd: String(costUSD),
      duration_ms: String(durationMs),
      ttft_ms: String(ttftMs),
      attempt_number: String(attemptNumber),
    })

    return assistantMessage
  } catch (error) {
    return getAssistantMessageFromError(error)
  }
}

function getAssistantMessageFromError(error: unknown): AssistantMessage {
  if (error instanceof Error && error.message.includes('prompt is too long')) {
    return createAssistantAPIErrorMessage(PROMPT_TOO_LONG_ERROR_MESSAGE)
  }
  if (
    error instanceof Error &&
    error.message.includes('Your credit balance is too low')
  ) {
    return createAssistantAPIErrorMessage(CREDIT_BALANCE_TOO_LOW_ERROR_MESSAGE)
  }
  if (
    error instanceof Error &&
    error.message.toLowerCase().includes('x-api-key')
  ) {
    return createAssistantAPIErrorMessage(INVALID_API_KEY_ERROR_MESSAGE)
  }
  if (error instanceof Error) {
    if (process.env.NODE_ENV === 'development') {
      console.log(error)
    }
    return createAssistantAPIErrorMessage(
      `${API_ERROR_MESSAGE_PREFIX}: ${error.message}`,
    )
  }
  return createAssistantAPIErrorMessage(API_ERROR_MESSAGE_PREFIX)
}

function addCacheBreakpoints(
  messages: (UserMessage | AssistantMessage)[],
): MessageParam[] {
  return messages.map((msg, index) => {
    return msg.type === 'user'
      ? userMessageToMessageParam(msg, index > messages.length - 3)
      : assistantMessageToMessageParam(msg, index > messages.length - 3)
  })
}

async function queryOpenAI(
  messages: (UserMessage | AssistantMessage)[],
  systemPrompt: string[],
  maxThinkingTokens: number,
  tools: Tool[],
  signal: AbortSignal,
  options?: {
    safeMode: boolean
    model: string
    prependCLISysprompt: boolean
    modelProfile?: ModelProfile | null
    toolUseContext?: ToolUseContext
  },
): Promise<AssistantMessage> {
  const config = getGlobalConfig()
  const modelManager = getModelManager()
  const toolUseContext = options?.toolUseContext

  // 🔧 Fix: 使用传入的ModelProfile，而不是硬编码的'main'指针
  const modelProfile = options?.modelProfile || modelManager.getModel('main')
  let model: string

  // 🔍 Debug: 记录模型配置详情
  const currentRequest = getCurrentRequest()
  debugLogger.api('MODEL_CONFIG_OPENAI', {
    modelProfileFound: !!modelProfile,
    modelProfileId: modelProfile?.modelName,
    modelProfileName: modelProfile?.name,
    modelProfileModelName: modelProfile?.modelName,
    modelProfileProvider: modelProfile?.provider,
    modelProfileBaseURL: modelProfile?.baseURL,
    modelProfileApiKeyExists: !!modelProfile?.apiKey,
    optionsModel: options?.model,
    requestId: getCurrentRequest()?.id,
  })

  if (modelProfile) {
    model = modelProfile.modelName
  } else {
    model = options?.model || modelProfile?.modelName || ''
  }
  // Prepend system prompt block for easy API identification
  if (options?.prependCLISysprompt) {
    // Log stats about first block for analyzing prefix matching config (see https://console.statsig.com/4aF3Ewatb6xPVpCwxb5nA3/dynamic_configs/claude_cli_system_prompt_prefixes)
    const [firstSyspromptBlock] = splitSysPromptPrefix(systemPrompt)
    logEvent('tengu_sysprompt_block', {
      snippet: firstSyspromptBlock?.slice(0, 20),
      length: String(firstSyspromptBlock?.length ?? 0),
      hash: firstSyspromptBlock
        ? createHash('sha256').update(firstSyspromptBlock).digest('hex')
        : '',
    })

    systemPrompt = [getCLISyspromptPrefix(), ...systemPrompt]
  }

  const system: TextBlockParam[] = splitSysPromptPrefix(systemPrompt).map(
    _ => ({
      ...(PROMPT_CACHING_ENABLED
        ? { cache_control: { type: 'ephemeral' } }
        : {}),
      text: _,
      type: 'text',
    }),
  )

  const toolSchemas = await Promise.all(
    tools.map(
      async _ =>
        ({
          type: 'function',
          function: {
            name: _.name,
            description: await _.prompt({
              safeMode: options?.safeMode,
            }),
            // Use tool's JSON schema directly if provided, otherwise convert Zod schema
            parameters:
              'inputJSONSchema' in _ && _.inputJSONSchema
                ? _.inputJSONSchema
                : zodToJsonSchema(_.inputSchema),
          },
        }) as OpenAI.ChatCompletionTool,
    ),
  )

  const openaiSystem = system.map(
    s =>
      ({
        role: 'system',
        content: s.text,
      }) as OpenAI.ChatCompletionMessageParam,
  )

  const openaiMessages = convertAnthropicMessagesToOpenAIMessages(messages)
  const startIncludingRetries = Date.now()

  // 记录系统提示构建过程 (OpenAI path)
  logSystemPromptConstruction({
    basePrompt: systemPrompt.join('\n'),
    kodeContext: generateKodeContext() || '',
    reminders: [], // 这里可以从 generateSystemReminders 获取
    finalPrompt: systemPrompt.join('\n'),
  })

  let start = Date.now()
  let attemptNumber = 0
  let response

  try {
    response = await withRetry(async attempt => {
      attemptNumber = attempt
      start = Date.now()
      // 🔥 GPT-5 Enhanced Parameter Construction
      const maxTokens = getMaxTokensFromProfile(modelProfile)
      const isGPT5 = isGPT5Model(model)

      const opts: OpenAI.ChatCompletionCreateParams = {
        model,
        // 🔧 Use correct parameter name based on model type
        ...(isGPT5 ? { max_completion_tokens: maxTokens } : { max_tokens: maxTokens }),
        messages: [...openaiSystem, ...openaiMessages],
        // 🔧 GPT-5 temperature constraint: only 1 or undefined
        temperature: isGPT5 ? 1 : MAIN_QUERY_TEMPERATURE,
      }
      if (config.stream) {
        ;(opts as OpenAI.ChatCompletionCreateParams).stream = true
        opts.stream_options = {
          include_usage: true,
        }
      }

      if (toolSchemas.length > 0) {
        opts.tools = toolSchemas
        opts.tool_choice = 'auto'
      }
      const reasoningEffort = await getReasoningEffort(modelProfile, messages)
      if (reasoningEffort) {
        logEvent('debug_reasoning_effort', {
          effort: reasoningEffort,
        })
        opts.reasoning_effort = reasoningEffort
      }

      // 🔧 Fix: 如果有ModelProfile配置，直接使用它 (更宽松的条件)
      if (modelProfile && modelProfile.modelName) {
        debugLogger.api('USING_MODEL_PROFILE_PATH', {
          modelProfileName: modelProfile.modelName,
          modelName: modelProfile.modelName,
          provider: modelProfile.provider,
          baseURL: modelProfile.baseURL,
          apiKeyExists: !!modelProfile.apiKey,
          requestId: getCurrentRequest()?.id,
        })

        // Enable new adapter system with environment variable
        const USE_NEW_ADAPTER_SYSTEM = process.env.USE_NEW_ADAPTERS !== 'false'

        if (USE_NEW_ADAPTER_SYSTEM) {
          // New adapter system
          const adapter = ModelAdapterFactory.createAdapter(modelProfile)

          // Build unified request parameters
          const unifiedParams: UnifiedRequestParams = {
            messages: openaiMessages,
            systemPrompt: openaiSystem.map(s => s.content as string),
            tools: tools,
            maxTokens: getMaxTokensFromProfile(modelProfile),
            stream: config.stream,
            reasoningEffort: reasoningEffort as any,
            temperature: isGPT5Model(model) ? 1 : MAIN_QUERY_TEMPERATURE,
            previousResponseId: toolUseContext?.responseState?.previousResponseId,
            verbosity: 'high' // High verbosity for coding tasks
          }

          // Create request using adapter
          const request = adapter.createRequest(unifiedParams)

          // Determine which API to use
          if (ModelAdapterFactory.shouldUseResponsesAPI(modelProfile)) {
            // Use Responses API for GPT-5 and similar models
            const { callGPT5ResponsesAPI } = await import('./openai')
            const response = await callGPT5ResponsesAPI(modelProfile, request, signal)
            const unifiedResponse = adapter.parseResponse(response)

            // Convert unified response back to Anthropic format
            const apiMessage = {
              role: 'assistant' as const,
              content: unifiedResponse.content,
              tool_calls: unifiedResponse.toolCalls,
              usage: {
                prompt_tokens: unifiedResponse.usage.promptTokens,
                completion_tokens: unifiedResponse.usage.completionTokens,
              }
            }
            const assistantMsg: AssistantMessage = {
              type: 'assistant',
              message: apiMessage as any,
              costUSD: 0, // Will be calculated later
              durationMs: Date.now() - start,
              uuid: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}` as any,
              responseId: unifiedResponse.responseId  // For state management
            }
            return assistantMsg
          } else {
            // Use existing Chat Completions flow
            const s = await getCompletionWithProfile(modelProfile, request, 0, 10, signal)
            let finalResponse
            if (config.stream) {
              finalResponse = await handleMessageStream(s as ChatCompletionStream, signal)
            } else {
              finalResponse = s
            }
            const r = convertOpenAIResponseToAnthropic(finalResponse)
            return r
          }
        } else {
          // Legacy system (preserved for fallback)
          const completionFunction = isGPT5Model(modelProfile.modelName)
            ? getGPT5CompletionWithProfile
            : getCompletionWithProfile
          const s = await completionFunction(modelProfile, opts, 0, 10, signal)
          let finalResponse
          if (opts.stream) {
            finalResponse = await handleMessageStream(s as ChatCompletionStream, signal)
          } else {
            finalResponse = s
          }
          const r = convertOpenAIResponseToAnthropic(finalResponse)
          return r
        }
      } else {
        // 🚨 警告：ModelProfile不可用，使用旧逻辑路径
        debugLogger.api('USING_LEGACY_PATH', {
          modelProfileExists: !!modelProfile,
          modelProfileId: modelProfile?.modelName,
          modelNameExists: !!modelProfile?.modelName,
          fallbackModel: 'main',
          actualModel: model,
          requestId: getCurrentRequest()?.id,
        })

        // 🚨 FALLBACK: 没有有效的ModelProfile时，应该抛出错误而不是使用遗留系统
        const errorDetails = {
          modelProfileExists: !!modelProfile,
          modelProfileId: modelProfile?.modelName,
          modelNameExists: !!modelProfile?.modelName,
          requestedModel: model,
          requestId: getCurrentRequest()?.id,
        }
        debugLogger.error('NO_VALID_MODEL_PROFILE', errorDetails)
        throw new Error(
          `No valid ModelProfile available for model: ${model}. Please configure model through /model command. Debug: ${JSON.stringify(errorDetails)}`,
        )
      }
    }, { signal }) // 🔧 CRITICAL FIX: Pass AbortSignal to withRetry
  } catch (error) {
    logError(error)
    return getAssistantMessageFromError(error)
  }
  const durationMs = Date.now() - start
  const durationMsIncludingRetries = Date.now() - startIncludingRetries

  const inputTokens = response.usage?.prompt_tokens ?? 0
  const outputTokens = response.usage?.completion_tokens ?? 0
  const cacheReadInputTokens =
    response.usage?.prompt_token_details?.cached_tokens ?? 0
  const cacheCreationInputTokens =
    response.usage?.prompt_token_details?.cached_tokens ?? 0
  const costUSD =
    (inputTokens / 1_000_000) * SONNET_COST_PER_MILLION_INPUT_TOKENS +
    (outputTokens / 1_000_000) * SONNET_COST_PER_MILLION_OUTPUT_TOKENS +
    (cacheReadInputTokens / 1_000_000) *
      SONNET_COST_PER_MILLION_PROMPT_CACHE_READ_TOKENS +
    (cacheCreationInputTokens / 1_000_000) *
      SONNET_COST_PER_MILLION_PROMPT_CACHE_WRITE_TOKENS

  addToTotalCost(costUSD, durationMsIncludingRetries)

  // 记录完整的 LLM 交互调试信息 (OpenAI path)
  logLLMInteraction({
    systemPrompt: systemPrompt.join('\n'),
    messages: [...openaiSystem, ...openaiMessages],
    response: response,
    usage: {
      inputTokens: inputTokens,
      outputTokens: outputTokens,
    },
    timing: {
      start: start,
      end: Date.now(),
    },
    apiFormat: 'openai',
  })

  return {
    message: {
      ...response,
      content: normalizeContentFromAPI(response.content),
      usage: {
        input_tokens: inputTokens,
        output_tokens: outputTokens,
        cache_read_input_tokens: cacheReadInputTokens,
        cache_creation_input_tokens: 0,
      },
    },
    costUSD,
    durationMs,
    type: 'assistant',
    uuid: randomUUID(),
  }
}

function getMaxTokensFromProfile(modelProfile: any): number {
  // Use ModelProfile maxTokens or reasonable default
  return modelProfile?.maxTokens || 8000
}

function getModelInputTokenCostUSD(model: string): number {
  // Find the model in the models object
  for (const providerModels of Object.values(models)) {
    const modelInfo = providerModels.find((m: any) => m.model === model)
    if (modelInfo) {
      return modelInfo.input_cost_per_token || 0
    }
  }
  // Default fallback cost for unknown models
  return 0.000003 // Default to Claude 3 Haiku cost
}

function getModelOutputTokenCostUSD(model: string): number {
  // Find the model in the models object
  for (const providerModels of Object.values(models)) {
    const modelInfo = providerModels.find((m: any) => m.model === model)
    if (modelInfo) {
      return modelInfo.output_cost_per_token || 0
    }
  }
  // Default fallback cost for unknown models
  return 0.000015 // Default to Claude 3 Haiku cost
}

// New unified query functions for model pointer system
export async function queryModel(
  modelPointer: import('../utils/config').ModelPointerType,
  messages: (UserMessage | AssistantMessage)[],
  systemPrompt: string[] = [],
  signal?: AbortSignal,
): Promise<AssistantMessage> {
  // Use queryLLM with the pointer directly
  return queryLLM(
    messages,
    systemPrompt,
    0, // maxThinkingTokens
    [], // tools
    signal || new AbortController().signal,
    {
      safeMode: false,
      model: modelPointer,
      prependCLISysprompt: true,
    },
  )
}

// Note: Use queryModel(pointer, ...) directly instead of these convenience functions

// Simplified query function using quick model pointer
export async function queryQuick({
  systemPrompt = [],
  userPrompt,
  assistantPrompt,
  enablePromptCaching = false,
  signal,
}: {
  systemPrompt?: string[]
  userPrompt: string
  assistantPrompt?: string
  enablePromptCaching?: boolean
  signal?: AbortSignal
}): Promise<AssistantMessage> {
  const messages = [
    {
      message: { role: 'user', content: userPrompt },
      type: 'user',
      uuid: randomUUID(),
    },
  ] as (UserMessage | AssistantMessage)[]

  return queryModel('quick', messages, systemPrompt, signal)
}