modelscope · tastelikefeet · Jan 9, 2026 · Dec 26, 2025 · Dec 26, 2025 · Dec 29, 2025
diff --git a/ms_agent/agent/llm_agent.py b/ms_agent/agent/llm_agent.py
@@ -54,6 +54,10 @@ class LLMAgent(Agent):
 
     DEFAULT_MAX_CHAT_ROUND = 20
 
+    TOTAL_PROMPT_TOKENS = 0
+    TOTAL_COMPLETION_TOKENS = 0
+    TOKEN_LOCK = asyncio.Lock()
+
     def __init__(self,
                  config: DictConfig = DictConfig({}),
                  tag: str = DEFAULT_TAG,
@@ -471,9 +475,24 @@ async def step(
             messages = await self.parallel_tool_call(messages)
 
         await self.after_tool_call(messages)
+
+        # usage
+        prompt_tokens = _response_message.prompt_tokens
+        completion_tokens = _response_message.completion_tokens
+
+        async with LLMAgent.TOKEN_LOCK:
+            LLMAgent.TOTAL_PROMPT_TOKENS += prompt_tokens
+            LLMAgent.TOTAL_COMPLETION_TOKENS += completion_tokens
+
+        # tokens in the current step
+        self.log_output(
+            f'[usage] prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}'
+        )
+        # total tokens for the process so far
         self.log_output(
-            f'[usage] prompt_tokens: {_response_message.prompt_tokens}, '
-            f'completion_tokens: {_response_message.completion_tokens}')
+            f'[usage_total] total_prompt_tokens: {LLMAgent.TOTAL_PROMPT_TOKENS}, '
+            f'total_completion_tokens: {LLMAgent.TOTAL_COMPLETION_TOKENS}')
+
         yield messages
 
     def prepare_llm(self):

diff --git a/ms_agent/llm/openai_llm.py b/ms_agent/llm/openai_llm.py
@@ -132,8 +132,11 @@ def _call_llm(self,
         """
         messages = self._format_input_message(messages)
 
-        if kwargs.get('stream', False) and self.args.get(
-                'stream_options', {}).get('include_usage', True):
+        is_streaming = kwargs.get('stream', False)
+        stream_options_config = self.args.get('stream_options', {})
+        # For streaming responses, we should request usage statistics by default,
+        # unless it's explicitly disabled in the configuration.
+        if is_streaming and stream_options_config.get('include_usage', True):
             kwargs.setdefault('stream_options', {})['include_usage'] = True
 
         return self.client.chat.completions.create(