Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions ms_agent/agent/llm_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ class LLMAgent(Agent):

DEFAULT_MAX_CHAT_ROUND = 20

TOTAL_PROMPT_TOKENS = 0
TOTAL_COMPLETION_TOKENS = 0
TOKEN_LOCK = asyncio.Lock()

def __init__(self,
config: DictConfig = DictConfig({}),
tag: str = DEFAULT_TAG,
Expand Down Expand Up @@ -471,9 +475,24 @@ async def step(
messages = await self.parallel_tool_call(messages)

await self.after_tool_call(messages)

# usage
prompt_tokens = _response_message.prompt_tokens
completion_tokens = _response_message.completion_tokens

async with LLMAgent.TOKEN_LOCK:
LLMAgent.TOTAL_PROMPT_TOKENS += prompt_tokens
LLMAgent.TOTAL_COMPLETION_TOKENS += completion_tokens

# tokens in the current step
self.log_output(
f'[usage] prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}'
)
# total tokens for the process so far
self.log_output(
f'[usage] prompt_tokens: {_response_message.prompt_tokens}, '
f'completion_tokens: {_response_message.completion_tokens}')
f'[usage_total] total_prompt_tokens: {LLMAgent.TOTAL_PROMPT_TOKENS}, '
f'total_completion_tokens: {LLMAgent.TOTAL_COMPLETION_TOKENS}')

yield messages

def prepare_llm(self):
Expand Down
7 changes: 5 additions & 2 deletions ms_agent/llm/openai_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,11 @@ def _call_llm(self,
"""
messages = self._format_input_message(messages)

if kwargs.get('stream', False) and self.args.get(
'stream_options', {}).get('include_usage', True):
is_streaming = kwargs.get('stream', False)
stream_options_config = self.args.get('stream_options', {})
# For streaming responses, we should request usage statistics by default,
# unless it's explicitly disabled in the configuration.
if is_streaming and stream_options_config.get('include_usage', True):
kwargs.setdefault('stream_options', {})['include_usage'] = True

return self.client.chat.completions.create(
Expand Down