refactor(app): explicitly specify LLM request parameters to allow typing

2025-03-18 11:53:47 +08:00 · 2025-03-18 11:53:47 +08:00 · ca612699ec
commit ca612699ec
parent aa512fac6e
1 changed files with 8 additions and 6 deletions
--- a/app/llm.py
+++ b/app/llm.py
@ -263,14 +263,15 @@ class LLM:

            if not stream:
                # Non-streaming request
-                params["stream"] = False
-
-                response = await self.client.chat.completions.create(**params)
+                response = await self.client.chat.completions.create(
+                    **params, stream=False
+                )

                if not response.choices or not response.choices[0].message.content:
                    raise ValueError("Empty or invalid response from LLM")

                # Update token counts
+                if response.usage:
                    self.update_token_count(response.usage.prompt_tokens)

                return response.choices[0].message.content
@ -278,8 +279,7 @@ class LLM:
            # Streaming request, For streaming, update estimated token count before making the request
            self.update_token_count(input_tokens)

-            params["stream"] = True
-            response = await self.client.chat.completions.create(**params)
+            response = await self.client.chat.completions.create(**params, stream=True)

            collected_messages = []
            async for chunk in response:
@ -292,6 +292,8 @@ class LLM:
            if not full_response:
                raise ValueError("Empty response from streaming LLM")

+            # TODO Update token counts
+
            return full_response

        except TokenLimitExceeded: