print the token usage of each step's prompt and completion, as well as the cumulative total consumption up to now, which is useful for analyzing resource usage.

This commit is contained in:
zhiyuanRen 2025-03-16 21:47:46 +08:00
parent 2a5fa9727f
commit 10ecc91e5e
2 changed files with 22 additions and 5 deletions

View File

@ -59,6 +59,7 @@ class LLM:
# Add token counting related attributes # Add token counting related attributes
self.total_input_tokens = 0 self.total_input_tokens = 0
self.total_completion_tokens = 0
self.max_input_tokens = ( self.max_input_tokens = (
llm_config.max_input_tokens llm_config.max_input_tokens
if hasattr(llm_config, "max_input_tokens") if hasattr(llm_config, "max_input_tokens")
@ -129,12 +130,15 @@ class LLM:
return token_count return token_count
def update_token_count(self, input_tokens: int) -> None: def update_token_count(self, input_tokens: int, completion_tokens: int = 0) -> None:
"""Update token counts""" """Update token counts"""
# Only track tokens if max_input_tokens is set # Only track tokens if max_input_tokens is set
self.total_input_tokens += input_tokens self.total_input_tokens += input_tokens
self.total_completion_tokens += completion_tokens
logger.info( logger.info(
f"Token usage: Input={input_tokens}, Cumulative Input={self.total_input_tokens}" f"Token usage: Input={input_tokens}, Completion={completion_tokens}, "
f"Cumulative Input={self.total_input_tokens}, Cumulative Completion={self.total_completion_tokens}, "
f"Total={input_tokens + completion_tokens}, Cumulative Total={self.total_input_tokens + self.total_completion_tokens}"
) )
def check_token_limit(self, input_tokens: int) -> bool: def check_token_limit(self, input_tokens: int) -> bool:
@ -271,7 +275,9 @@ class LLM:
raise ValueError("Empty or invalid response from LLM") raise ValueError("Empty or invalid response from LLM")
# Update token counts # Update token counts
self.update_token_count(response.usage.prompt_tokens) self.update_token_count(
response.usage.prompt_tokens, response.usage.completion_tokens
)
return response.choices[0].message.content return response.choices[0].message.content
@ -282,9 +288,11 @@ class LLM:
response = await self.client.chat.completions.create(**params) response = await self.client.chat.completions.create(**params)
collected_messages = [] collected_messages = []
completion_text = ""
async for chunk in response: async for chunk in response:
chunk_message = chunk.choices[0].delta.content or "" chunk_message = chunk.choices[0].delta.content or ""
collected_messages.append(chunk_message) collected_messages.append(chunk_message)
completion_text += chunk_message
print(chunk_message, end="", flush=True) print(chunk_message, end="", flush=True)
print() # Newline after streaming print() # Newline after streaming
@ -292,6 +300,13 @@ class LLM:
if not full_response: if not full_response:
raise ValueError("Empty response from streaming LLM") raise ValueError("Empty response from streaming LLM")
# 对于流式响应估算completion tokens
completion_tokens = self.count_tokens(completion_text)
logger.info(
f"Estimated completion tokens for streaming response: {completion_tokens}"
)
self.total_completion_tokens += completion_tokens
return full_response return full_response
except TokenLimitExceeded: except TokenLimitExceeded:
@ -412,7 +427,9 @@ class LLM:
raise ValueError("Invalid or empty response from LLM") raise ValueError("Invalid or empty response from LLM")
# Update token counts # Update token counts
self.update_token_count(response.usage.prompt_tokens) self.update_token_count(
response.usage.prompt_tokens, response.usage.completion_tokens
)
return response.choices[0].message return response.choices[0].message

View File

@ -2,8 +2,8 @@ import os
import aiofiles import aiofiles
from app.tool.base import BaseTool
from app.config import WORKSPACE_ROOT from app.config import WORKSPACE_ROOT
from app.tool.base import BaseTool
class FileSaver(BaseTool): class FileSaver(BaseTool):