refactor(app): explicitly specify LLM request parameters to allow typing
This commit is contained in:
parent
aa512fac6e
commit
ca612699ec
14
app/llm.py
14
app/llm.py
@ -263,23 +263,23 @@ class LLM:
|
|||||||
|
|
||||||
if not stream:
|
if not stream:
|
||||||
# Non-streaming request
|
# Non-streaming request
|
||||||
params["stream"] = False
|
response = await self.client.chat.completions.create(
|
||||||
|
**params, stream=False
|
||||||
response = await self.client.chat.completions.create(**params)
|
)
|
||||||
|
|
||||||
if not response.choices or not response.choices[0].message.content:
|
if not response.choices or not response.choices[0].message.content:
|
||||||
raise ValueError("Empty or invalid response from LLM")
|
raise ValueError("Empty or invalid response from LLM")
|
||||||
|
|
||||||
# Update token counts
|
# Update token counts
|
||||||
self.update_token_count(response.usage.prompt_tokens)
|
if response.usage:
|
||||||
|
self.update_token_count(response.usage.prompt_tokens)
|
||||||
|
|
||||||
return response.choices[0].message.content
|
return response.choices[0].message.content
|
||||||
|
|
||||||
# Streaming request, For streaming, update estimated token count before making the request
|
# Streaming request, For streaming, update estimated token count before making the request
|
||||||
self.update_token_count(input_tokens)
|
self.update_token_count(input_tokens)
|
||||||
|
|
||||||
params["stream"] = True
|
response = await self.client.chat.completions.create(**params, stream=True)
|
||||||
response = await self.client.chat.completions.create(**params)
|
|
||||||
|
|
||||||
collected_messages = []
|
collected_messages = []
|
||||||
async for chunk in response:
|
async for chunk in response:
|
||||||
@ -292,6 +292,8 @@ class LLM:
|
|||||||
if not full_response:
|
if not full_response:
|
||||||
raise ValueError("Empty response from streaming LLM")
|
raise ValueError("Empty response from streaming LLM")
|
||||||
|
|
||||||
|
# TODO Update token counts
|
||||||
|
|
||||||
return full_response
|
return full_response
|
||||||
|
|
||||||
except TokenLimitExceeded:
|
except TokenLimitExceeded:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user