print the token usage of each step's prompt and completion, as well as the cumulative total consumption up to now, which is useful for analyzing resource usage.
This commit is contained in:
parent
2a5fa9727f
commit
10ecc91e5e
25
app/llm.py
25
app/llm.py
@ -59,6 +59,7 @@ class LLM:
|
|||||||
|
|
||||||
# Add token counting related attributes
|
# Add token counting related attributes
|
||||||
self.total_input_tokens = 0
|
self.total_input_tokens = 0
|
||||||
|
self.total_completion_tokens = 0
|
||||||
self.max_input_tokens = (
|
self.max_input_tokens = (
|
||||||
llm_config.max_input_tokens
|
llm_config.max_input_tokens
|
||||||
if hasattr(llm_config, "max_input_tokens")
|
if hasattr(llm_config, "max_input_tokens")
|
||||||
@ -129,12 +130,15 @@ class LLM:
|
|||||||
|
|
||||||
return token_count
|
return token_count
|
||||||
|
|
||||||
def update_token_count(self, input_tokens: int) -> None:
|
def update_token_count(self, input_tokens: int, completion_tokens: int = 0) -> None:
|
||||||
"""Update token counts"""
|
"""Update token counts"""
|
||||||
# Only track tokens if max_input_tokens is set
|
# Only track tokens if max_input_tokens is set
|
||||||
self.total_input_tokens += input_tokens
|
self.total_input_tokens += input_tokens
|
||||||
|
self.total_completion_tokens += completion_tokens
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Token usage: Input={input_tokens}, Cumulative Input={self.total_input_tokens}"
|
f"Token usage: Input={input_tokens}, Completion={completion_tokens}, "
|
||||||
|
f"Cumulative Input={self.total_input_tokens}, Cumulative Completion={self.total_completion_tokens}, "
|
||||||
|
f"Total={input_tokens + completion_tokens}, Cumulative Total={self.total_input_tokens + self.total_completion_tokens}"
|
||||||
)
|
)
|
||||||
|
|
||||||
def check_token_limit(self, input_tokens: int) -> bool:
|
def check_token_limit(self, input_tokens: int) -> bool:
|
||||||
@ -271,7 +275,9 @@ class LLM:
|
|||||||
raise ValueError("Empty or invalid response from LLM")
|
raise ValueError("Empty or invalid response from LLM")
|
||||||
|
|
||||||
# Update token counts
|
# Update token counts
|
||||||
self.update_token_count(response.usage.prompt_tokens)
|
self.update_token_count(
|
||||||
|
response.usage.prompt_tokens, response.usage.completion_tokens
|
||||||
|
)
|
||||||
|
|
||||||
return response.choices[0].message.content
|
return response.choices[0].message.content
|
||||||
|
|
||||||
@ -282,9 +288,11 @@ class LLM:
|
|||||||
response = await self.client.chat.completions.create(**params)
|
response = await self.client.chat.completions.create(**params)
|
||||||
|
|
||||||
collected_messages = []
|
collected_messages = []
|
||||||
|
completion_text = ""
|
||||||
async for chunk in response:
|
async for chunk in response:
|
||||||
chunk_message = chunk.choices[0].delta.content or ""
|
chunk_message = chunk.choices[0].delta.content or ""
|
||||||
collected_messages.append(chunk_message)
|
collected_messages.append(chunk_message)
|
||||||
|
completion_text += chunk_message
|
||||||
print(chunk_message, end="", flush=True)
|
print(chunk_message, end="", flush=True)
|
||||||
|
|
||||||
print() # Newline after streaming
|
print() # Newline after streaming
|
||||||
@ -292,6 +300,13 @@ class LLM:
|
|||||||
if not full_response:
|
if not full_response:
|
||||||
raise ValueError("Empty response from streaming LLM")
|
raise ValueError("Empty response from streaming LLM")
|
||||||
|
|
||||||
|
# 对于流式响应,估算completion tokens
|
||||||
|
completion_tokens = self.count_tokens(completion_text)
|
||||||
|
logger.info(
|
||||||
|
f"Estimated completion tokens for streaming response: {completion_tokens}"
|
||||||
|
)
|
||||||
|
self.total_completion_tokens += completion_tokens
|
||||||
|
|
||||||
return full_response
|
return full_response
|
||||||
|
|
||||||
except TokenLimitExceeded:
|
except TokenLimitExceeded:
|
||||||
@ -412,7 +427,9 @@ class LLM:
|
|||||||
raise ValueError("Invalid or empty response from LLM")
|
raise ValueError("Invalid or empty response from LLM")
|
||||||
|
|
||||||
# Update token counts
|
# Update token counts
|
||||||
self.update_token_count(response.usage.prompt_tokens)
|
self.update_token_count(
|
||||||
|
response.usage.prompt_tokens, response.usage.completion_tokens
|
||||||
|
)
|
||||||
|
|
||||||
return response.choices[0].message
|
return response.choices[0].message
|
||||||
|
|
||||||
|
@ -2,8 +2,8 @@ import os
|
|||||||
|
|
||||||
import aiofiles
|
import aiofiles
|
||||||
|
|
||||||
from app.tool.base import BaseTool
|
|
||||||
from app.config import WORKSPACE_ROOT
|
from app.config import WORKSPACE_ROOT
|
||||||
|
from app.tool.base import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class FileSaver(BaseTool):
|
class FileSaver(BaseTool):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user