feat(ModelSelector): model context
This commit is contained in:
parent
451362256c
commit
70f0d6b109
@ -419,7 +419,10 @@ export function ModelSelector({
|
|||||||
function getModelDetails(model: ModelInfo): string {
|
function getModelDetails(model: ModelInfo): string {
|
||||||
const details = []
|
const details = []
|
||||||
|
|
||||||
if (model.max_tokens) {
|
// Show context_length if available (Ollama models), otherwise max_tokens
|
||||||
|
if (model.context_length) {
|
||||||
|
details.push(`${formatNumber(model.context_length)} tokens`)
|
||||||
|
} else if (model.max_tokens) {
|
||||||
details.push(`${formatNumber(model.max_tokens)} tokens`)
|
details.push(`${formatNumber(model.max_tokens)} tokens`)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1040,6 +1043,7 @@ export function ModelSelector({
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Transform Ollama models to our format
|
// Transform Ollama models to our format
|
||||||
|
// Note: max_tokens here is for OUTPUT tokens, not context length
|
||||||
const ollamaModels = models.map((model: any) => ({
|
const ollamaModels = models.map((model: any) => ({
|
||||||
model:
|
model:
|
||||||
model.id ??
|
model.id ??
|
||||||
@ -1047,7 +1051,7 @@ export function ModelSelector({
|
|||||||
model.modelName ??
|
model.modelName ??
|
||||||
(typeof model === 'string' ? model : ''),
|
(typeof model === 'string' ? model : ''),
|
||||||
provider: 'ollama',
|
provider: 'ollama',
|
||||||
max_tokens: 4096, // Default value
|
max_tokens: DEFAULT_MAX_TOKENS, // Default output tokens (8K is reasonable)
|
||||||
supports_vision: false,
|
supports_vision: false,
|
||||||
supports_function_calling: true,
|
supports_function_calling: true,
|
||||||
supports_reasoning_effort: false,
|
supports_reasoning_effort: false,
|
||||||
@ -1056,16 +1060,102 @@ export function ModelSelector({
|
|||||||
// Filter out models with empty names
|
// Filter out models with empty names
|
||||||
const validModels = ollamaModels.filter(model => model.model)
|
const validModels = ollamaModels.filter(model => model.model)
|
||||||
|
|
||||||
setAvailableModels(validModels)
|
// Helper: normalize Ollama server root for /api/show (strip trailing /v1)
|
||||||
|
const normalizeOllamaRoot = (url: string): string => {
|
||||||
|
try {
|
||||||
|
const u = new URL(url)
|
||||||
|
let pathname = u.pathname.replace(/\/+$|^$/, '')
|
||||||
|
if (pathname.endsWith('/v1')) {
|
||||||
|
pathname = pathname.slice(0, -3)
|
||||||
|
}
|
||||||
|
u.pathname = pathname
|
||||||
|
return u.toString().replace(/\/+$/, '')
|
||||||
|
} catch {
|
||||||
|
return url.replace(/\/v1\/?$/, '')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper: extract num_ctx/context_length from /api/show response
|
||||||
|
const extractContextTokens = (data: any): number | null => {
|
||||||
|
if (!data || typeof data !== 'object') return null
|
||||||
|
|
||||||
|
// First check model_info for architecture-specific context_length fields
|
||||||
|
// Example: qwen2.context_length, llama.context_length, etc.
|
||||||
|
if (data.model_info && typeof data.model_info === 'object') {
|
||||||
|
const modelInfo = data.model_info
|
||||||
|
for (const key of Object.keys(modelInfo)) {
|
||||||
|
if (key.endsWith('.context_length') || key.endsWith('_context_length')) {
|
||||||
|
const val = modelInfo[key]
|
||||||
|
if (typeof val === 'number' && isFinite(val) && val > 0) {
|
||||||
|
return val
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to other common fields
|
||||||
|
const candidates = [
|
||||||
|
(data as any)?.parameters?.num_ctx,
|
||||||
|
(data as any)?.model_info?.num_ctx,
|
||||||
|
(data as any)?.config?.num_ctx,
|
||||||
|
(data as any)?.details?.context_length,
|
||||||
|
(data as any)?.context_length,
|
||||||
|
(data as any)?.num_ctx,
|
||||||
|
(data as any)?.max_tokens,
|
||||||
|
(data as any)?.max_new_tokens
|
||||||
|
].filter((v: any) => typeof v === 'number' && isFinite(v) && v > 0)
|
||||||
|
if (candidates.length > 0) {
|
||||||
|
return Math.max(...candidates)
|
||||||
|
}
|
||||||
|
|
||||||
|
// parameters may be a string like "num_ctx=4096 ..."
|
||||||
|
if (typeof (data as any)?.parameters === 'string') {
|
||||||
|
const m = (data as any).parameters.match(/num_ctx\s*[:=]\s*(\d+)/i)
|
||||||
|
if (m) {
|
||||||
|
const n = parseInt(m[1], 10)
|
||||||
|
if (Number.isFinite(n) && n > 0) return n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enrich each model via /api/show to get accurate context length
|
||||||
|
// Store context length separately from max_tokens (output limit)
|
||||||
|
const ollamaRoot = normalizeOllamaRoot(ollamaBaseUrl)
|
||||||
|
const enrichedModels = await Promise.all(
|
||||||
|
validModels.map(async (m: any) => {
|
||||||
|
try {
|
||||||
|
const showResp = await fetch(`${ollamaRoot}/api/show`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ name: m.model })
|
||||||
|
})
|
||||||
|
if (showResp.ok) {
|
||||||
|
const showData = await showResp.json()
|
||||||
|
const ctx = extractContextTokens(showData)
|
||||||
|
if (typeof ctx === 'number' && isFinite(ctx) && ctx > 0) {
|
||||||
|
// Store context_length separately, don't override max_tokens
|
||||||
|
return { ...m, context_length: ctx }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Fallback to default if missing
|
||||||
|
return m
|
||||||
|
} catch {
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
})
|
||||||
|
)
|
||||||
|
|
||||||
|
setAvailableModels(enrichedModels)
|
||||||
|
|
||||||
// Only navigate if we have models
|
// Only navigate if we have models
|
||||||
if (validModels.length > 0) {
|
if (enrichedModels.length > 0) {
|
||||||
navigateTo('model')
|
navigateTo('model')
|
||||||
} else {
|
} else {
|
||||||
setModelLoadError('No models found in your Ollama installation')
|
setModelLoadError('No models found in your Ollama installation')
|
||||||
}
|
}
|
||||||
|
|
||||||
return validModels
|
return enrichedModels
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const errorMessage =
|
const errorMessage =
|
||||||
error instanceof Error ? error.message : String(error)
|
error instanceof Error ? error.message : String(error)
|
||||||
@ -1404,7 +1494,15 @@ export function ModelSelector({
|
|||||||
setReasoningEffort(null)
|
setReasoningEffort(null)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set context length if available (from Ollama /api/show)
|
||||||
|
if (modelInfo?.context_length) {
|
||||||
|
setContextLength(modelInfo.context_length)
|
||||||
|
} else {
|
||||||
|
setContextLength(DEFAULT_CONTEXT_LENGTH)
|
||||||
|
}
|
||||||
|
|
||||||
// Set max tokens based on model info or default
|
// Set max tokens based on model info or default
|
||||||
|
// Note: max_tokens is for OUTPUT, not context window
|
||||||
if (modelInfo?.max_tokens) {
|
if (modelInfo?.max_tokens) {
|
||||||
const modelMaxTokens = modelInfo.max_tokens
|
const modelMaxTokens = modelInfo.max_tokens
|
||||||
// Check if the model's max tokens matches any of our presets
|
// Check if the model's max tokens matches any of our presets
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user