feat: claude support cache control;

2025-08-28 23:06:50 +08:00 · 2025-08-28 23:06:50 +08:00 · c45e28a806
commit c45e28a806
parent 2875af774e
1 changed files with 91 additions and 29 deletions
--- a/src/services/claude.ts
+++ b/src/services/claude.ts
@ -962,6 +962,84 @@ export function resetAnthropicClient(): void {
 * 4. Fallback region (us-east5)
 */

+/**
+ * Manage cache control to ensure it doesn't exceed Claude's 4 cache block limit
+ * Priority:
+ * 1. System prompts (high priority)
+ * 2. Long documents or reference materials (high priority)
+ * 3. Reusable context (medium priority)
+ * 4. Short messages or one-time content (no caching)
+ */
+function applyCacheControlWithLimits(
+  systemBlocks: TextBlockParam[],
+  messageParams: MessageParam[]
+): { systemBlocks: TextBlockParam[]; messageParams: MessageParam[] } {
+  if (!PROMPT_CACHING_ENABLED) {
+    return { systemBlocks, messageParams }
+  }
+
+  const maxCacheBlocks = 4
+  let usedCacheBlocks = 0
+
+  // 1. Prioritize adding cache to system prompts (highest priority)
+  const processedSystemBlocks = systemBlocks.map((block, index) => {
+    if (usedCacheBlocks < maxCacheBlocks && block.text.length > 1000) {
+      usedCacheBlocks++
+      return {
+        ...block,
+        cache_control: { type: 'ephemeral' as const }
+      }
+    }
+    const { cache_control, ...blockWithoutCache } = block
+    return blockWithoutCache
+  })
+
+  // 2. Add cache to message content based on priority
+  const processedMessageParams = messageParams.map((message, messageIndex) => {
+    if (Array.isArray(message.content)) {
+      const processedContent = message.content.map((contentBlock, blockIndex) => {
+        // Determine whether this content block should be cached
+        const shouldCache = 
+          usedCacheBlocks < maxCacheBlocks &&
+          contentBlock.type === 'text' &&
+          typeof contentBlock.text === 'string' &&
+          (
+            // Long documents (over 2000 characters)
+            contentBlock.text.length > 2000 ||
+            // Last content block of the last message (may be important context)
+            (messageIndex === messageParams.length - 1 && 
+             blockIndex === message.content.length - 1 &&
+             contentBlock.text.length > 500)
+          )
+
+        if (shouldCache) {
+          usedCacheBlocks++
+          return {
+            ...contentBlock,
+            cache_control: { type: 'ephemeral' as const }
+          }
+        }
+
+        // Remove existing cache_control
+        const { cache_control, ...blockWithoutCache } = contentBlock as any
+        return blockWithoutCache
+      })
+
+      return {
+        ...message,
+        content: processedContent
+      }
+    }
+
+    return message
+  })
+
+  return {
+    systemBlocks: processedSystemBlocks,
+    messageParams: processedMessageParams
+  }
+}
+
 export function userMessageToMessageParam(
  message: UserMessage,
  addCache = false,
@ -974,23 +1052,13 @@ export function userMessageToMessageParam(
          {
            type: 'text',
            text: message.message.content,
-            ...(PROMPT_CACHING_ENABLED
-              ? { cache_control: { type: 'ephemeral' } }
-              : {}),
          },
        ],
      }
    } else {
      return {
        role: 'user',
-        content: message.message.content.map((_, i) => ({
-          ..._,
-          ...(i === message.message.content.length - 1
-            ? PROMPT_CACHING_ENABLED
-              ? { cache_control: { type: 'ephemeral' } }
-              : {}
-            : {}),
-        })),
+        content: message.message.content.map((_) => ({ ..._ })),
      }
    }
  }
@ -1012,25 +1080,13 @@ export function assistantMessageToMessageParam(
          {
            type: 'text',
            text: message.message.content,
-            ...(PROMPT_CACHING_ENABLED
-              ? { cache_control: { type: 'ephemeral' } }
-              : {}),
          },
        ],
      }
    } else {
      return {
        role: 'assistant',
-        content: message.message.content.map((_, i) => ({
-          ..._,
-          ...(i === message.message.content.length - 1 &&
-          _.type !== 'thinking' &&
-          _.type !== 'redacted_thinking'
-            ? PROMPT_CACHING_ENABLED
-              ? { cache_control: { type: 'ephemeral' } }
-              : {}
-            : {}),
-        })),
+        content: message.message.content.map((_) => ({ ..._ })),
      }
    }
  }
@ -1383,9 +1439,6 @@ async function queryAnthropicNative(

  const system: TextBlockParam[] = splitSysPromptPrefix(systemPrompt).map(
    _ => ({
-      ...(PROMPT_CACHING_ENABLED
-        ? { cache_control: { type: 'ephemeral' } }
-        : {}),
      text: _,
      type: 'text',
    }),
@ -1404,6 +1457,10 @@ async function queryAnthropicNative(
  )

  const anthropicMessages = addCacheBreakpoints(messages)
+
+  //  apply cache control
+  const { systemBlocks: processedSystem, messageParams: processedMessages } = 
+    applyCacheControlWithLimits(system, anthropicMessages)
  const startIncludingRetries = Date.now()

  // 记录系统提示构建过程
@ -1426,8 +1483,8 @@ async function queryAnthropicNative(
      const params: Anthropic.Beta.Messages.MessageCreateParams = {
        model,
        max_tokens: getMaxTokensFromProfile(modelProfile),
-        messages: anthropicMessages,
-        system,
+        messages: processedMessages,
+        system: processedSystem,
        tools: toolSchemas.length > 0 ? toolSchemas : undefined,
        tool_choice: toolSchemas.length > 0 ? { type: 'auto' } : undefined,
      }
@ -1450,6 +1507,7 @@ async function queryAnthropicNative(
          : null,
        maxTokens: params.max_tokens,
        temperature: MAIN_QUERY_TEMPERATURE,
+        params: params,
        messageCount: params.messages?.length || 0,
        streamMode: true,
        toolsCount: toolSchemas.length,
@ -1609,6 +1667,10 @@ async function queryAnthropicNative(
      }
    }, { signal })

+    debugLogger.api('ANTHROPIC_API_CALL_SUCCESS', {
+      content: response.content
+    })
+
    const ttftMs = start - Date.now()
    const durationMs = Date.now() - startIncludingRetries