refactor(app): explicitly specify LLM request parameters to allow typing

2025-03-18 11:53:47 +08:00 · 2025-03-18 11:53:47 +08:00 · ca612699ec
commit ca612699ec
parent aa512fac6e
1 changed files with 8 additions and 6 deletions
--- a/app/llm.py
+++ b/app/llm.py
@ -263,23 +263,23 @@ class LLM:
            if not stream:
                # Non-streaming request
-                params["stream"] = False
+                response = await self.client.chat.completions.create(
-
+                    **params, stream=False
-                response = await self.client.chat.completions.create(**params)
+                )
                if not response.choices or not response.choices[0].message.content:
                    raise ValueError("Empty or invalid response from LLM")
                # Update token counts
-                self.update_token_count(response.usage.prompt_tokens)
+                if response.usage:
                    self.update_token_count(response.usage.prompt_tokens)
                return response.choices[0].message.content
            # Streaming request, For streaming, update estimated token count before making the request
            self.update_token_count(input_tokens)
-            params["stream"] = True
+            response = await self.client.chat.completions.create(**params, stream=True)
            response = await self.client.chat.completions.create(**params)
            collected_messages = []
            async for chunk in response:
@ -292,6 +292,8 @@ class LLM:
            if not full_response:
                raise ValueError("Empty response from streaming LLM")
            # TODO Update token counts
            return full_response
        except TokenLimitExceeded: