Kode-cli/src/utils/autoCompactCore.ts
2025-08-10 19:57:17 +08:00

224 lines
8.2 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { Message } from '../query'
import { countTokens } from './tokens'
import { getMessagesGetter, getMessagesSetter } from '../messages'
import { getContext } from '../context'
import { getCodeStyle } from '../utils/style'
import { clearTerminal } from '../utils/terminal'
import { resetFileFreshnessSession } from '../services/fileFreshness'
import { createUserMessage, normalizeMessagesForAPI } from '../utils/messages'
import { queryLLM } from '../services/claude'
import { selectAndReadFiles } from './fileRecoveryCore'
import { addLineNumbers } from './file'
import { getModelManager } from './model'
/**
* Threshold ratio for triggering automatic context compression
* When context usage exceeds 92% of the model's limit, auto-compact activates
*/
const AUTO_COMPACT_THRESHOLD_RATIO = 0.92
/**
* Retrieves the context length for the main model that should execute compression
* Uses ModelManager to get the current model's context length
*/
async function getCompressionModelContextLimit(): Promise<number> {
try {
// 🔧 Fix: Use ModelManager instead of legacy config
const modelManager = getModelManager()
const modelProfile = modelManager.getModel('main')
if (modelProfile?.contextLength) {
return modelProfile.contextLength
}
// Fallback to a reasonable default
return 200_000
} catch (error) {
return 200_000
}
}
const COMPRESSION_PROMPT = `Please provide a comprehensive summary of our conversation structured as follows:
## Technical Context
Development environment, tools, frameworks, and configurations in use. Programming languages, libraries, and technical constraints. File structure, directory organization, and project architecture.
## Project Overview
Main project goals, features, and scope. Key components, modules, and their relationships. Data models, APIs, and integration patterns.
## Code Changes
Files created, modified, or analyzed during our conversation. Specific code implementations, functions, and algorithms added. Configuration changes and structural modifications.
## Debugging & Issues
Problems encountered and their root causes. Solutions implemented and their effectiveness. Error messages, logs, and diagnostic information.
## Current Status
What we just completed successfully. Current state of the codebase and any ongoing work. Test results, validation steps, and verification performed.
## Pending Tasks
Immediate next steps and priorities. Planned features, improvements, and refactoring. Known issues, technical debt, and areas needing attention.
## User Preferences
Coding style, formatting, and organizational preferences. Communication patterns and feedback style. Tool choices and workflow preferences.
## Key Decisions
Important technical decisions made and their rationale. Alternative approaches considered and why they were rejected. Trade-offs accepted and their implications.
Focus on information essential for continuing the conversation effectively, including specific details about code, files, errors, and plans.`
/**
* Calculates context usage thresholds based on the main model's capabilities
* Uses the main model context length since compression tasks require a capable model
*/
async function calculateThresholds(tokenCount: number) {
const contextLimit = await getCompressionModelContextLimit()
const autoCompactThreshold = contextLimit * AUTO_COMPACT_THRESHOLD_RATIO
return {
isAboveAutoCompactThreshold: tokenCount >= autoCompactThreshold,
percentUsed: Math.round((tokenCount / contextLimit) * 100),
tokensRemaining: Math.max(0, autoCompactThreshold - tokenCount),
contextLimit,
}
}
/**
* Determines if auto-compact should trigger based on token usage
* Uses the main model context limit since compression requires a capable model
*/
async function shouldAutoCompact(messages: Message[]): Promise<boolean> {
if (messages.length < 3) return false
const tokenCount = countTokens(messages)
const { isAboveAutoCompactThreshold } = await calculateThresholds(tokenCount)
return isAboveAutoCompactThreshold
}
/**
* Main entry point for automatic context compression
*
* This function is called before each query to check if the conversation
* has grown too large and needs compression. When triggered, it:
* - Generates a structured summary of the conversation using the main model
* - Recovers recently accessed files to maintain development context
* - Resets conversation state while preserving essential information
*
* Uses the main model for compression tasks to ensure high-quality summaries
*
* @param messages Current conversation messages
* @param toolUseContext Execution context with model and tool configuration
* @returns Updated messages (compressed if needed) and compression status
*/
export async function checkAutoCompact(
messages: Message[],
toolUseContext: any,
): Promise<{ messages: Message[]; wasCompacted: boolean }> {
if (!(await shouldAutoCompact(messages))) {
return { messages, wasCompacted: false }
}
try {
const compactedMessages = await executeAutoCompact(messages, toolUseContext)
return {
messages: compactedMessages,
wasCompacted: true,
}
} catch (error) {
// Graceful degradation: if auto-compact fails, continue with original messages
// This ensures system remains functional even if compression encounters issues
console.error(
'Auto-compact failed, continuing with original messages:',
error,
)
return { messages, wasCompacted: false }
}
}
/**
* Executes the conversation compression process using the main model
*
* This function generates a comprehensive summary using the main model
* which is better suited for complex summarization tasks. It also
* automatically recovers important files to maintain development context.
*/
async function executeAutoCompact(
messages: Message[],
toolUseContext: any,
): Promise<Message[]> {
const summaryRequest = createUserMessage(COMPRESSION_PROMPT)
const summaryResponse = await queryLLM(
normalizeMessagesForAPI([...messages, summaryRequest]),
[
'You are a helpful AI assistant tasked with creating comprehensive conversation summaries that preserve all essential context for continuing development work.',
],
0,
toolUseContext.options.tools,
toolUseContext.abortController.signal,
{
safeMode: false,
model: 'main', // 使用模型指针让queryLLM统一解析
prependCLISysprompt: true,
},
)
const content = summaryResponse.message.content
const summary =
typeof content === 'string'
? content
: content.length > 0 && content[0]?.type === 'text'
? content[0].text
: null
if (!summary) {
throw new Error(
'Failed to generate conversation summary - response did not contain valid text content',
)
}
summaryResponse.message.usage = {
input_tokens: 0,
output_tokens: summaryResponse.message.usage.output_tokens,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
}
// Automatic file recovery: preserve recently accessed development files
// This maintains coding context even after conversation compression
const recoveredFiles = await selectAndReadFiles()
const compactedMessages = [
createUserMessage(
'Context automatically compressed due to token limit. Essential information preserved.',
),
summaryResponse,
]
// Append recovered files to maintain development workflow continuity
// Files are prioritized by recency and importance, with strict token limits
if (recoveredFiles.length > 0) {
for (const file of recoveredFiles) {
const contentWithLines = addLineNumbers({
content: file.content,
startLine: 1,
})
const recoveryMessage = createUserMessage(
`**Recovered File: ${file.path}**\n\n\`\`\`\n${contentWithLines}\n\`\`\`\n\n` +
`*Automatically recovered (${file.tokens} tokens)${file.truncated ? ' [truncated]' : ''}*`,
)
compactedMessages.push(recoveryMessage)
}
}
// State cleanup to ensure fresh context after compression
// Mirrors the cleanup sequence from manual /compact command
getMessagesSetter()([])
getContext.cache.clear?.()
getCodeStyle.cache.clear?.()
resetFileFreshnessSession()
return compactedMessages
}