import { Message } from '../query' import { countTokens } from './tokens' import { getMessagesGetter, getMessagesSetter } from '../messages' import { getContext } from '../context' import { getCodeStyle } from '../utils/style' import { clearTerminal } from '../utils/terminal' import { resetFileFreshnessSession } from '../services/fileFreshness' import { createUserMessage, normalizeMessagesForAPI } from '../utils/messages' import { queryLLM } from '../services/claude' import { selectAndReadFiles } from './fileRecoveryCore' import { addLineNumbers } from './file' import { getModelManager } from './model' /** * Threshold ratio for triggering automatic context compression * When context usage exceeds 92% of the model's limit, auto-compact activates */ const AUTO_COMPACT_THRESHOLD_RATIO = 0.92 /** * Retrieves the context length for the main model that should execute compression * Uses ModelManager to get the current model's context length */ async function getCompressionModelContextLimit(): Promise { try { // 🔧 Fix: Use ModelManager instead of legacy config const modelManager = getModelManager() const modelProfile = modelManager.getModel('main') if (modelProfile?.contextLength) { return modelProfile.contextLength } // Fallback to a reasonable default return 200_000 } catch (error) { return 200_000 } } const COMPRESSION_PROMPT = `Please provide a comprehensive summary of our conversation structured as follows: ## Technical Context Development environment, tools, frameworks, and configurations in use. Programming languages, libraries, and technical constraints. File structure, directory organization, and project architecture. ## Project Overview Main project goals, features, and scope. Key components, modules, and their relationships. Data models, APIs, and integration patterns. ## Code Changes Files created, modified, or analyzed during our conversation. Specific code implementations, functions, and algorithms added. Configuration changes and structural modifications. ## Debugging & Issues Problems encountered and their root causes. Solutions implemented and their effectiveness. Error messages, logs, and diagnostic information. ## Current Status What we just completed successfully. Current state of the codebase and any ongoing work. Test results, validation steps, and verification performed. ## Pending Tasks Immediate next steps and priorities. Planned features, improvements, and refactoring. Known issues, technical debt, and areas needing attention. ## User Preferences Coding style, formatting, and organizational preferences. Communication patterns and feedback style. Tool choices and workflow preferences. ## Key Decisions Important technical decisions made and their rationale. Alternative approaches considered and why they were rejected. Trade-offs accepted and their implications. Focus on information essential for continuing the conversation effectively, including specific details about code, files, errors, and plans.` /** * Calculates context usage thresholds based on the main model's capabilities * Uses the main model context length since compression tasks require a capable model */ async function calculateThresholds(tokenCount: number) { const contextLimit = await getCompressionModelContextLimit() const autoCompactThreshold = contextLimit * AUTO_COMPACT_THRESHOLD_RATIO return { isAboveAutoCompactThreshold: tokenCount >= autoCompactThreshold, percentUsed: Math.round((tokenCount / contextLimit) * 100), tokensRemaining: Math.max(0, autoCompactThreshold - tokenCount), contextLimit, } } /** * Determines if auto-compact should trigger based on token usage * Uses the main model context limit since compression requires a capable model */ async function shouldAutoCompact(messages: Message[]): Promise { if (messages.length < 3) return false const tokenCount = countTokens(messages) const { isAboveAutoCompactThreshold } = await calculateThresholds(tokenCount) return isAboveAutoCompactThreshold } /** * Main entry point for automatic context compression * * This function is called before each query to check if the conversation * has grown too large and needs compression. When triggered, it: * - Generates a structured summary of the conversation using the main model * - Recovers recently accessed files to maintain development context * - Resets conversation state while preserving essential information * * Uses the main model for compression tasks to ensure high-quality summaries * * @param messages Current conversation messages * @param toolUseContext Execution context with model and tool configuration * @returns Updated messages (compressed if needed) and compression status */ export async function checkAutoCompact( messages: Message[], toolUseContext: any, ): Promise<{ messages: Message[]; wasCompacted: boolean }> { if (!(await shouldAutoCompact(messages))) { return { messages, wasCompacted: false } } try { const compactedMessages = await executeAutoCompact(messages, toolUseContext) return { messages: compactedMessages, wasCompacted: true, } } catch (error) { // Graceful degradation: if auto-compact fails, continue with original messages // This ensures system remains functional even if compression encounters issues console.error( 'Auto-compact failed, continuing with original messages:', error, ) return { messages, wasCompacted: false } } } /** * Executes the conversation compression process using the main model * * This function generates a comprehensive summary using the main model * which is better suited for complex summarization tasks. It also * automatically recovers important files to maintain development context. */ async function executeAutoCompact( messages: Message[], toolUseContext: any, ): Promise { const summaryRequest = createUserMessage(COMPRESSION_PROMPT) const summaryResponse = await queryLLM( normalizeMessagesForAPI([...messages, summaryRequest]), [ 'You are a helpful AI assistant tasked with creating comprehensive conversation summaries that preserve all essential context for continuing development work.', ], 0, toolUseContext.options.tools, toolUseContext.abortController.signal, { safeMode: false, model: 'main', // 使用模型指针，让queryLLM统一解析 prependCLISysprompt: true, }, ) const content = summaryResponse.message.content const summary = typeof content === 'string' ? content : content.length > 0 && content[0]?.type === 'text' ? content[0].text : null if (!summary) { throw new Error( 'Failed to generate conversation summary - response did not contain valid text content', ) } summaryResponse.message.usage = { input_tokens: 0, output_tokens: summaryResponse.message.usage.output_tokens, cache_creation_input_tokens: 0, cache_read_input_tokens: 0, } // Automatic file recovery: preserve recently accessed development files // This maintains coding context even after conversation compression const recoveredFiles = await selectAndReadFiles() const compactedMessages = [ createUserMessage( 'Context automatically compressed due to token limit. Essential information preserved.', ), summaryResponse, ] // Append recovered files to maintain development workflow continuity // Files are prioritized by recency and importance, with strict token limits if (recoveredFiles.length > 0) { for (const file of recoveredFiles) { const contentWithLines = addLineNumbers({ content: file.content, startLine: 1, }) const recoveryMessage = createUserMessage( `**Recovered File: ${file.path}**\n\n\`\`\`\n${contentWithLines}\n\`\`\`\n\n` + `*Automatically recovered (${file.tokens} tokens)${file.truncated ? ' [truncated]' : ''}*`, ) compactedMessages.push(recoveryMessage) } } // State cleanup to ensure fresh context after compression // Mirrors the cleanup sequence from manual /compact command getMessagesSetter()([]) getContext.cache.clear?.() getCodeStyle.cache.clear?.() resetFileFreshnessSession() return compactedMessages }