feat(ModelSelector): model context

2025-10-06 13:18:05 +08:00 · 2025-10-06 13:18:05 +08:00 · 70f0d6b109
commit 70f0d6b109
parent 451362256c
1 changed files with 103 additions and 5 deletions
--- a/src/components/ModelSelector.tsx
+++ b/src/components/ModelSelector.tsx
@ -419,7 +419,10 @@ export function ModelSelector({
  function getModelDetails(model: ModelInfo): string {
    const details = []
-    if (model.max_tokens) {
+    // Show context_length if available (Ollama models), otherwise max_tokens
    if (model.context_length) {
      details.push(`${formatNumber(model.context_length)} tokens`)
    } else if (model.max_tokens) {
      details.push(`${formatNumber(model.max_tokens)} tokens`)
    }
@ -1040,6 +1043,7 @@ export function ModelSelector({
      }
      // Transform Ollama models to our format
      // Note: max_tokens here is for OUTPUT tokens, not context length
      const ollamaModels = models.map((model: any) => ({
        model:
          model.id ??
@ -1047,7 +1051,7 @@ export function ModelSelector({
          model.modelName ??
          (typeof model === 'string' ? model : ''),
        provider: 'ollama',
-        max_tokens: 4096, // Default value
+        max_tokens: DEFAULT_MAX_TOKENS, // Default output tokens (8K is reasonable)
        supports_vision: false,
        supports_function_calling: true,
        supports_reasoning_effort: false,
@ -1056,16 +1060,102 @@ export function ModelSelector({
      // Filter out models with empty names
      const validModels = ollamaModels.filter(model => model.model)
-      setAvailableModels(validModels)
+      // Helper: normalize Ollama server root for /api/show (strip trailing /v1)
      const normalizeOllamaRoot = (url: string): string => {
        try {
          const u = new URL(url)
          let pathname = u.pathname.replace(/\/+$|^$/, '')
          if (pathname.endsWith('/v1')) {
            pathname = pathname.slice(0, -3)
          }
          u.pathname = pathname
          return u.toString().replace(/\/+$/, '')
        } catch {
          return url.replace(/\/v1\/?$/, '')
        }
      }
      // Helper: extract num_ctx/context_length from /api/show response
      const extractContextTokens = (data: any): number | null => {
        if (!data || typeof data !== 'object') return null
        // First check model_info for architecture-specific context_length fields
        // Example: qwen2.context_length, llama.context_length, etc.
        if (data.model_info && typeof data.model_info === 'object') {
          const modelInfo = data.model_info
          for (const key of Object.keys(modelInfo)) {
            if (key.endsWith('.context_length') || key.endsWith('_context_length')) {
              const val = modelInfo[key]
              if (typeof val === 'number' && isFinite(val) && val > 0) {
                return val
              }
            }
          }
        }
        // Fallback to other common fields
        const candidates = [
          (data as any)?.parameters?.num_ctx,
          (data as any)?.model_info?.num_ctx,
          (data as any)?.config?.num_ctx,
          (data as any)?.details?.context_length,
          (data as any)?.context_length,
          (data as any)?.num_ctx,
          (data as any)?.max_tokens,
          (data as any)?.max_new_tokens
        ].filter((v: any) => typeof v === 'number' && isFinite(v) && v > 0)
        if (candidates.length > 0) {
          return Math.max(...candidates)
        }
        // parameters may be a string like "num_ctx=4096 ..."
        if (typeof (data as any)?.parameters === 'string') {
          const m = (data as any).parameters.match(/num_ctx\s*[:=]\s*(\d+)/i)
          if (m) {
            const n = parseInt(m[1], 10)
            if (Number.isFinite(n) && n > 0) return n
          }
        }
        return null
      }
      // Enrich each model via /api/show to get accurate context length
      // Store context length separately from max_tokens (output limit)
      const ollamaRoot = normalizeOllamaRoot(ollamaBaseUrl)
      const enrichedModels = await Promise.all(
        validModels.map(async (m: any) => {
          try {
            const showResp = await fetch(`${ollamaRoot}/api/show`, {
              method: 'POST',
              headers: { 'Content-Type': 'application/json' },
              body: JSON.stringify({ name: m.model })
            })
            if (showResp.ok) {
              const showData = await showResp.json()
              const ctx = extractContextTokens(showData)
              if (typeof ctx === 'number' && isFinite(ctx) && ctx > 0) {
                // Store context_length separately, don't override max_tokens
                return { ...m, context_length: ctx }
              }
            }
            // Fallback to default if missing
            return m
          } catch {
            return m
          }
        })
      )
      setAvailableModels(enrichedModels)
      // Only navigate if we have models
-      if (validModels.length > 0) {
+      if (enrichedModels.length > 0) {
        navigateTo('model')
      } else {
        setModelLoadError('No models found in your Ollama installation')
      }
-      return validModels
+      return enrichedModels
    } catch (error) {
      const errorMessage =
        error instanceof Error ? error.message : String(error)
@ -1404,7 +1494,15 @@ export function ModelSelector({
      setReasoningEffort(null)
    }
    // Set context length if available (from Ollama /api/show)
    if (modelInfo?.context_length) {
      setContextLength(modelInfo.context_length)
    } else {
      setContextLength(DEFAULT_CONTEXT_LENGTH)
    }
    // Set max tokens based on model info or default
    // Note: max_tokens is for OUTPUT, not context window
    if (modelInfo?.max_tokens) {
      const modelMaxTokens = modelInfo.max_tokens
      // Check if the model's max tokens matches any of our presets