refactor(app): explicitly specify LLM request parameters to allow typing
This commit is contained in:
parent
aa512fac6e
commit
ca612699ec
12
app/llm.py
12
app/llm.py
@ -263,14 +263,15 @@ class LLM:
|
||||
|
||||
if not stream:
|
||||
# Non-streaming request
|
||||
params["stream"] = False
|
||||
|
||||
response = await self.client.chat.completions.create(**params)
|
||||
response = await self.client.chat.completions.create(
|
||||
**params, stream=False
|
||||
)
|
||||
|
||||
if not response.choices or not response.choices[0].message.content:
|
||||
raise ValueError("Empty or invalid response from LLM")
|
||||
|
||||
# Update token counts
|
||||
if response.usage:
|
||||
self.update_token_count(response.usage.prompt_tokens)
|
||||
|
||||
return response.choices[0].message.content
|
||||
@ -278,8 +279,7 @@ class LLM:
|
||||
# Streaming request, For streaming, update estimated token count before making the request
|
||||
self.update_token_count(input_tokens)
|
||||
|
||||
params["stream"] = True
|
||||
response = await self.client.chat.completions.create(**params)
|
||||
response = await self.client.chat.completions.create(**params, stream=True)
|
||||
|
||||
collected_messages = []
|
||||
async for chunk in response:
|
||||
@ -292,6 +292,8 @@ class LLM:
|
||||
if not full_response:
|
||||
raise ValueError("Empty response from streaming LLM")
|
||||
|
||||
# TODO Update token counts
|
||||
|
||||
return full_response
|
||||
|
||||
except TokenLimitExceeded:
|
||||
|
Loading…
x
Reference in New Issue
Block a user