From ca612699ec0d2b1942c27a0ccd2c1adae39a1a48 Mon Sep 17 00:00:00 2001 From: Sheng Fan Date: Tue, 18 Mar 2025 11:53:47 +0800 Subject: [PATCH] refactor(app): explicitly specify LLM request parameters to allow typing --- app/llm.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/app/llm.py b/app/llm.py index f78a7f7..b527058 100644 --- a/app/llm.py +++ b/app/llm.py @@ -263,23 +263,23 @@ class LLM: if not stream: # Non-streaming request - params["stream"] = False - - response = await self.client.chat.completions.create(**params) + response = await self.client.chat.completions.create( + **params, stream=False + ) if not response.choices or not response.choices[0].message.content: raise ValueError("Empty or invalid response from LLM") # Update token counts - self.update_token_count(response.usage.prompt_tokens) + if response.usage: + self.update_token_count(response.usage.prompt_tokens) return response.choices[0].message.content # Streaming request, For streaming, update estimated token count before making the request self.update_token_count(input_tokens) - params["stream"] = True - response = await self.client.chat.completions.create(**params) + response = await self.client.chat.completions.create(**params, stream=True) collected_messages = [] async for chunk in response: @@ -292,6 +292,8 @@ class LLM: if not full_response: raise ValueError("Empty response from streaming LLM") + # TODO Update token counts + return full_response except TokenLimitExceeded: