From ca612699ec0d2b1942c27a0ccd2c1adae39a1a48 Mon Sep 17 00:00:00 2001
From: Sheng Fan <fredtools999@gmail.com>
Date: Tue, 18 Mar 2025 11:53:47 +0800
Subject: [PATCH] refactor(app): explicitly specify LLM request parameters to
 allow typing

---
 app/llm.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/app/llm.py b/app/llm.py
index f78a7f7..b527058 100644
--- a/app/llm.py
+++ b/app/llm.py
@@ -263,23 +263,23 @@ class LLM:
 
             if not stream:
                 # Non-streaming request
-                params["stream"] = False
-
-                response = await self.client.chat.completions.create(**params)
+                response = await self.client.chat.completions.create(
+                    **params, stream=False
+                )
 
                 if not response.choices or not response.choices[0].message.content:
                     raise ValueError("Empty or invalid response from LLM")
 
                 # Update token counts
-                self.update_token_count(response.usage.prompt_tokens)
+                if response.usage:
+                    self.update_token_count(response.usage.prompt_tokens)
 
                 return response.choices[0].message.content
 
             # Streaming request, For streaming, update estimated token count before making the request
             self.update_token_count(input_tokens)
 
-            params["stream"] = True
-            response = await self.client.chat.completions.create(**params)
+            response = await self.client.chat.completions.create(**params, stream=True)
 
             collected_messages = []
             async for chunk in response:
@@ -292,6 +292,8 @@ class LLM:
             if not full_response:
                 raise ValueError("Empty response from streaming LLM")
 
+            # TODO Update token counts
+
             return full_response
 
         except TokenLimitExceeded: