refactor(app): explicitly specify LLM request parameters to allow typing

This commit is contained in:
Sheng Fan 2025-03-18 11:53:47 +08:00
parent aa512fac6e
commit ca612699ec

View File

@ -263,14 +263,15 @@ class LLM:
if not stream:
# Non-streaming request
params["stream"] = False
response = await self.client.chat.completions.create(**params)
response = await self.client.chat.completions.create(
**params, stream=False
)
if not response.choices or not response.choices[0].message.content:
raise ValueError("Empty or invalid response from LLM")
# Update token counts
if response.usage:
self.update_token_count(response.usage.prompt_tokens)
return response.choices[0].message.content
@ -278,8 +279,7 @@ class LLM:
# Streaming request, For streaming, update estimated token count before making the request
self.update_token_count(input_tokens)
params["stream"] = True
response = await self.client.chat.completions.create(**params)
response = await self.client.chat.completions.create(**params, stream=True)
collected_messages = []
async for chunk in response:
@ -292,6 +292,8 @@ class LLM:
if not full_response:
raise ValueError("Empty response from streaming LLM")
# TODO Update token counts
return full_response
except TokenLimitExceeded: