From 10ecc91e5e5286025a236a73395b1851c07209dc Mon Sep 17 00:00:00 2001 From: zhiyuanRen <1131876818@qq.com> Date: Sun, 16 Mar 2025 21:47:46 +0800 Subject: [PATCH 1/3] print the token usage of each step's prompt and completion, as well as the cumulative total consumption up to now, which is useful for analyzing resource usage. --- app/llm.py | 25 +++++++++++++++++++++---- app/tool/file_saver.py | 2 +- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/app/llm.py b/app/llm.py index 18a13af..47e18ab 100644 --- a/app/llm.py +++ b/app/llm.py @@ -59,6 +59,7 @@ class LLM: # Add token counting related attributes self.total_input_tokens = 0 + self.total_completion_tokens = 0 self.max_input_tokens = ( llm_config.max_input_tokens if hasattr(llm_config, "max_input_tokens") @@ -129,12 +130,15 @@ class LLM: return token_count - def update_token_count(self, input_tokens: int) -> None: + def update_token_count(self, input_tokens: int, completion_tokens: int = 0) -> None: """Update token counts""" # Only track tokens if max_input_tokens is set self.total_input_tokens += input_tokens + self.total_completion_tokens += completion_tokens logger.info( - f"Token usage: Input={input_tokens}, Cumulative Input={self.total_input_tokens}" + f"Token usage: Input={input_tokens}, Completion={completion_tokens}, " + f"Cumulative Input={self.total_input_tokens}, Cumulative Completion={self.total_completion_tokens}, " + f"Total={input_tokens + completion_tokens}, Cumulative Total={self.total_input_tokens + self.total_completion_tokens}" ) def check_token_limit(self, input_tokens: int) -> bool: @@ -271,7 +275,9 @@ class LLM: raise ValueError("Empty or invalid response from LLM") # Update token counts - self.update_token_count(response.usage.prompt_tokens) + self.update_token_count( + response.usage.prompt_tokens, response.usage.completion_tokens + ) return response.choices[0].message.content @@ -282,9 +288,11 @@ class LLM: response = await self.client.chat.completions.create(**params) collected_messages = [] + completion_text = "" async for chunk in response: chunk_message = chunk.choices[0].delta.content or "" collected_messages.append(chunk_message) + completion_text += chunk_message print(chunk_message, end="", flush=True) print() # Newline after streaming @@ -292,6 +300,13 @@ class LLM: if not full_response: raise ValueError("Empty response from streaming LLM") + # 对于流式响应,估算completion tokens + completion_tokens = self.count_tokens(completion_text) + logger.info( + f"Estimated completion tokens for streaming response: {completion_tokens}" + ) + self.total_completion_tokens += completion_tokens + return full_response except TokenLimitExceeded: @@ -412,7 +427,9 @@ class LLM: raise ValueError("Invalid or empty response from LLM") # Update token counts - self.update_token_count(response.usage.prompt_tokens) + self.update_token_count( + response.usage.prompt_tokens, response.usage.completion_tokens + ) return response.choices[0].message diff --git a/app/tool/file_saver.py b/app/tool/file_saver.py index 96d64b3..7d92a02 100644 --- a/app/tool/file_saver.py +++ b/app/tool/file_saver.py @@ -2,8 +2,8 @@ import os import aiofiles -from app.tool.base import BaseTool from app.config import WORKSPACE_ROOT +from app.tool.base import BaseTool class FileSaver(BaseTool): From 6dcd2ca0648cfe20b703ea5c143a0ccec0ec9b48 Mon Sep 17 00:00:00 2001 From: zhiyuanRen <1131876818@qq.com> Date: Mon, 17 Mar 2025 21:36:04 +0800 Subject: [PATCH 2/3] fix: replace chinese comment with english version --- app/llm.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/llm.py b/app/llm.py index 47e18ab..966ec52 100644 --- a/app/llm.py +++ b/app/llm.py @@ -27,7 +27,6 @@ from app.schema import ( ToolChoice, ) - REASONING_MODELS = ["o1", "o3-mini"] @@ -300,7 +299,7 @@ class LLM: if not full_response: raise ValueError("Empty response from streaming LLM") - # 对于流式响应,估算completion tokens + # estimate completion tokens for streaming response completion_tokens = self.count_tokens(completion_text) logger.info( f"Estimated completion tokens for streaming response: {completion_tokens}" From 11d1bd77294dfa5ba31be527d7b14f7bb6322ff0 Mon Sep 17 00:00:00 2001 From: zhiyuanRen <1131876818@qq.com> Date: Mon, 17 Mar 2025 21:39:36 +0800 Subject: [PATCH 3/3] format change for precommit purpose --- app/llm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/app/llm.py b/app/llm.py index 966ec52..334103a 100644 --- a/app/llm.py +++ b/app/llm.py @@ -27,6 +27,7 @@ from app.schema import ( ToolChoice, ) + REASONING_MODELS = ["o1", "o3-mini"]