modelscope · hiyuchang · Dec 22, 2025 · Dec 22, 2025 · Dec 22, 2025
diff --git a/benchmark/bench.py b/benchmark/bench.py
@@ -197,7 +197,7 @@ def prepare_configs(args, rank, current_time):
         )
         eval_taskset_config = config["buffer"]["explorer_input"]["eval_tasksets"]
         if len(eval_taskset_config) > 0:
-            # TODO: support seperately set path for eval taskset
+            # TODO: support separately set path for eval taskset
             for eval_taskset_config in eval_taskset_config:
                 eval_taskset_config["path"] = taskset_config["path"]
         if args.lr:

diff --git a/benchmark/config/guru_math-template.yaml b/benchmark/config/guru_math-template.yaml
@@ -36,7 +36,7 @@ buffer:
       format:
         prompt_key: question
         response_key: ground_truth
-        system_prompt: "You are a helpful assistant. To answer a query from the user, please first thinks through the question step-by-step inside <think>...</think>, then provides the final response to user."
+        system_prompt: "You are a helpful assistant. To answer a query from the user, please first think through the question step-by-step inside <think>...</think>, then provides the final response to user."
         reply_prefix: "<think>"
       rollout_args:
         temperature: 1.0

diff --git a/examples/agentscope_frozenlake/agent.py b/examples/agentscope_frozenlake/agent.py
@@ -41,7 +41,7 @@ def get_prompt(self, observation: str) -> str:
         )
         if self.current_step > 0 and self.last_action is not None:
             if self.last_observation == observation:
-                prompt += "\nYour last response is invalid. Your position didn't change at all. You may need to recheck your thinking process, action outputted, and the format of response. Remember, you should only output the NEXT ACTION at each interation in the ``` ```. For example, if you want to move up, you should output ```Up```."
+                prompt += "\nYour last response is invalid. Your position didn't change at all. You may need to recheck your thinking process, action outputted, and the format of response. Remember, you should only output the NEXT ACTION at each iteration in the ``` ```. For example, if you want to move up, you should output ```Up```."
 
         if self.max_steps is not None and self.max_steps - self.current_step > 0:
             prompt += (

diff --git a/examples/agentscope_frozenlake/utils.py b/examples/agentscope_frozenlake/utils.py
@@ -56,7 +56,7 @@
 
 You will be provided the current observation, please decide on the next Action.
 You should show your thought process and then input the final action in ``` ```.
-You should only output the NEXT ACTION at each interation in the ``` ```. For example, if you want to move up, you should output ```Up```.
+You should only output the NEXT ACTION at each iteration in the ``` ```. For example, if you want to move up, you should output ```Up```.
 You should plan ahead and need to achieve it in minimum number of steps.
 You should be aware that frozen tiles can be slippery, but the chance is small and you should not overthink it.
 

diff --git a/examples/grpo_alfworld/README.md b/examples/grpo_alfworld/README.md
@@ -16,7 +16,7 @@ The SFT data should be named as `<TRINITY_SFT_DATASET_PATH>/data.json`, followin
          "messages": [
             {
                 "role": "system", # fixed, align with the grpo workflow: alfworld_workflow.
-                "content": "\nYou are an agent interacting with a virtual test-based environments.\n\n## Notes:\nAt each step, you should first think then perform action to fulfill the instruction. You should ALWAYS wrap your thinking with the   tag and wrap your action with the   tag.\nYou should ALWAYS take one action each step. \nYou should finish the task and buy the item within 15 steps.\nDONOT try to interact with the user at anytime. Finish the task and buy the item by yourself.\n\n## Action Format:\nBelow are the available commands you can use:\n  look:                             look around your current location\n  inventory:                        check your current inventory(you can only have 1 item in your inventory)\n  go to (receptacle):               move to a receptacle\n  open (receptacle):                open a receptacle\n  close (receptacle):               close a receptacle\n  take (object) from (receptacle):  take an object from a receptacle\n  move (object) to (receptacle):  place an object in or on a receptacle\n  examine (something):              examine a receptacle or an object\n  use (object):                     use an object\n  heat (object) with (receptacle):  heat an object using a receptacle\n  clean (object) with (receptacle): clean an object using a receptacle\n  cool (object) with (receptacle):  cool an object using a receptacle\n  slice (object) with (object):     slice an object using a sharp object\n\nFor example your output should be like this:\n To solve the task, I need first to ... go to cabinet 1\n"
+                "content": "\nYou are an agent interacting with a virtual text-based environment.\n\n## Notes:\nAt each step, you should first think then perform action to fulfill the instruction. You should ALWAYS wrap your thinking with the   tag and wrap your action with the   tag.\nYou should ALWAYS take one action each step. \nYou should finish the task and buy the item within 15 steps.\nDO NOT try to interact with the user at anytime. Finish the task and buy the item by yourself.\n\n## Action Format:\nBelow are the available commands you can use:\n  look:                             look around your current location\n  inventory:                        check your current inventory(you can only have 1 item in your inventory)\n  go to (receptacle):               move to a receptacle\n  open (receptacle):                open a receptacle\n  close (receptacle):               close a receptacle\n  take (object) from (receptacle):  take an object from a receptacle\n  move (object) to (receptacle):  place an object in or on a receptacle\n  examine (something):              examine a receptacle or an object\n  use (object):                     use an object\n  heat (object) with (receptacle):  heat an object using a receptacle\n  clean (object) with (receptacle): clean an object using a receptacle\n  cool (object) with (receptacle):  cool an object using a receptacle\n  slice (object) with (object):     slice an object using a sharp object\n\nFor example your output should be like this:\n To solve the task, I need first to ... go to cabinet 1\n"
             },
             {
                 "role": "user",

diff --git a/examples/mix_chord/get_openr1_data.py b/examples/mix_chord/get_openr1_data.py
@@ -36,7 +36,7 @@
 MAX_TOKEN_LENGTH = 8196
 SFT_SAMPLE_SIZE = 5000
 RL_SAMPLE_SIZE = 20000
-SYSTEM_PROMPT = """You are a helpful assistant that solves MATH problems. You should first thinks about the reasoning process in mind and then provides the user with the answer. You should present your reasoning process using the format: <think>\n ...your reasoning process here... </think>\n first. You should always include your final answer in \\boxed{} as closed-form results."""
+SYSTEM_PROMPT = """You are a helpful assistant that solves MATH problems. You should first think about the reasoning process in mind and then provides the user with the answer. You should present your reasoning process using the format: <think>\n ...your reasoning process here... </think>\n first. You should always include your final answer in \\boxed{} as closed-form results."""
 
 
 def can_convert_to_int(answer):

diff --git a/tests/common/vllm_test.py b/tests/common/vllm_test.py
@@ -470,7 +470,7 @@ async def test_api(self):
 
 You will be provided the current observation, please decide on the next Action.
 You should show your thought process and then input the final action in ``` ```.
-You should only output the NEXT ACTION at each interation in the ``` ```. For example, if you want to move up, you should output ```Up```.
+You should only output the NEXT ACTION at each iteration in the ``` ```. For example, if you want to move up, you should output ```Up```.
 You should plan ahead and need to achieve it in minimum number of steps.
 You should be aware that frozen tiles can be slippery, but the chance is small and you should not overthink it.
 

diff --git a/trinity/common/rewards/dapo_reward.py b/trinity/common/rewards/dapo_reward.py
@@ -53,11 +53,11 @@ def compute_overlong_penalty(self, response_token):
         ), "max_response_length must be greater than cache_length"
 
         response_len = len(response_token)
-        excepted_len = self.max_response_length - self.cache_length
+        expected_len = self.max_response_length - self.cache_length
 
-        if response_len < excepted_len:
+        if response_len < expected_len:
             return 0.0
         elif response_len > self.max_response_length:
             return -self.penalty_factor
         else:
-            return (excepted_len - response_len) / self.cache_length * self.penalty_factor
+            return (expected_len - response_len) / self.cache_length * self.penalty_factor
diff --git a/trinity/common/rewards/naive_dapo_score.py b/trinity/common/rewards/naive_dapo_score.py
@@ -1,5 +1,5 @@
 """
-This file contaims the naive dapo reward function for math tasks.
+This file contains the naive dapo reward function for math tasks.
 Adapted from https://github.com/LLM360/Reasoning360/blob/main/verl/utils/reward_score/naive_dapo.py
 """
 

diff --git a/trinity/common/workflows/envs/alfworld/alfworld_workflow.py b/trinity/common/workflows/envs/alfworld/alfworld_workflow.py
@@ -15,13 +15,13 @@
 Your task is to: put some spraybottle on toilet.
 
 Response:
-<think>To solve the task, I need to find and take a sparybottle, then put it on toilet. First I need to find a spraybottle. A spraybottle is more likely to appear in cabinet (1-4), countertop (1), toilet (1), sinkbasin (1-2), garbagecan (1). I can check one by one, starting with cabinet 1.</think><action>go to cabinet 1</action>
+<think>To solve the task, I need to find and take a spraybottle, then put it on toilet. First I need to find a spraybottle. A spraybottle is more likely to appear in cabinet (1-4), countertop (1), toilet (1), sinkbasin (1-2), garbagecan (1). I can check one by one, starting with cabinet 1.</think><action>go to cabinet 1</action>
 
 Observation:
 On the cabinet 1, you see a cloth 1, a soapbar 1, a soapbottle 1.
 
 Response:
-<think>The sparybottle is not here, I should go to cabinet2 to check one by one</think><action>go to cabinet 2</action>
+<think>The spraybottle is not here, I should go to cabinet2 to check one by one</think><action>go to cabinet 2</action>
 
 Observation:
 The cabinet 2 is closed.
@@ -49,7 +49,7 @@
 """
 
 AlfWORLD_SYSTEM_PROMPT = """
-You are an agent interacting with a virtual test-based environments.
+You are an agent interacting with a virtual text-based environment.
 
 ## Notes:
 At each step, you should first think then perform action to fulfill the instruction. You should ALWAYS wrap your thinking with the <think> </think> tag and wrap your action with the <action> </action> tag.

diff --git a/trinity/common/workflows/envs/sciworld/sciworld_workflow.py b/trinity/common/workflows/envs/sciworld/sciworld_workflow.py
@@ -7,12 +7,12 @@
 from trinity.common.workflows.workflow import MultiTurnWorkflow, Task
 
 SCIWORLD_SYSTEM_PROMPT = """
-You are an agent, you job is to do some scientific experiment in a virtual test-based environments.
+You are an agent, your job is to do some scientific experiment in a virtual text-based environment.
 
 ## Notes:
 At each step, you should first think then perform action to fulfill the instruction. You should ALWAYS wrap your thinking with the <think> </think> tag and wrap your action with the <action> </action> tag.
 You should ALWAYS take one action each step.
-DONOT try to interact with the user at anytime. Finish the task by yourself.
+DO NOT try to interact with the user at anytime. Finish the task by yourself.
 
 ## Action Format:
 Below are the available commands you can use:

diff --git a/trinity/common/workflows/envs/webshop/webshop_workflow.py b/trinity/common/workflows/envs/webshop/webshop_workflow.py
@@ -71,7 +71,7 @@
 
 
 WebShop_SYSTEM_PROMPT_WITH_EXAMPLE = f"""
-You are an agent interacting with a virtual text-based web shopping environment to test out your ability. Your job is to find follow the Instruction provided and mimic the steps to buy the item that are closest to the Instruct provided.
+You are an agent interacting with a virtual text-based web shopping environment to test out your ability. Your job is to follow the Instruction provided and mimic the steps to buy the item that are closest to the Instruct provided.
 
 ## Action Format:
 You should give both the action_name and action_arg like the format `action_name[action_arg]`. You can execute two types of actions, search and click.
@@ -92,11 +92,11 @@
 At each step, you should first think then perform action to fulfill the instruction. You should ALWAYS wrap your thinking with the <think> </think> tag and wrap your action with the <action> </action> tag.
 You should ALWAYS take one action each step.
 You should finish the task and buy the item within 15 steps.
-DONOT try to interact with the user at anytime. Finish the task and buy the item by yourself.
+DO NOT try to interact with the user at anytime. Finish the task and buy the item by yourself.
 """
 
 WebShop_SYSTEM_PROMPT = """
-You are an agent interacting with a virtual text-baed web shopping environments to testout your ability. Your job is to find follow the Instruction provided and mimic the steps to buy the item that are closest to the Instruct provided.
+You are an agent interacting with a virtual text-based web shopping environment to test out your ability. Your job is to follow the Instruction provided and mimic the steps to buy the item that are closest to the Instruction provided.
 
 ## Action Format:
 You should give both the action_name and action_arg like the format `action_name[action_arg]`. You can execute two types of actions, search and click.
@@ -111,7 +111,7 @@
 At each step, you should first think then perform action to fulfill the instruction. You should ALWAYS wrap your thinking with the <think> </think> tag and wrap your action with the <action> </action> tag.
 You should ALWAYS take one action each step.
 You should finish the task and buy the item within 15 steps.
-DONOT try to interact with the user at anytime. Finish the task and buy the item by yourself.
+DO NOT try to interact with the user at anytime. Finish the task and buy the item by yourself.
 """
 
 
@@ -163,7 +163,7 @@ def validate_action(action, available_actions):
         else:
             return (
                 False,
-                "Can not perfrom search action without search bar. Please click the Back to Search button first.",
+                "Can not perform search action without search bar. Please click the Back to Search button first.",
             )
     elif action_name == "click":
         if action_arg not in available_actions["clickables"]:

diff --git a/trinity/common/workflows/math_ruler_workflow.py b/trinity/common/workflows/math_ruler_workflow.py
@@ -96,7 +96,7 @@ def get_ruler_scores(
 
         question_prompt = (
             f"Question: {self.task_desc}\n\n"
-            f"""Solution format requirement: first thinks about the reasoning process in the mind and then provides the final answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e.,
+            f"""Solution format requirement: first think about the reasoning process in the mind and then provides the final answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e.,
 <think> reasoning process here </think>
 <answer> answer here </answer>."""
         )

diff --git a/trinity/common/workflows/math_trainable_ruler_workflow.py b/trinity/common/workflows/math_trainable_ruler_workflow.py
@@ -144,7 +144,7 @@ def get_ruler_responses(
 
         question_prompt = (
             f"Question: {self.task_desc}\n\n"
-            f"""Solution format requirement: first thinks about the reasoning process in the mind and then provides the final answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e.,
+            f"""Solution format requirement: first think about the reasoning process in the mind and then provides the final answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e.,
 <think> reasoning process here </think>
 <answer> answer here </answer>."""
         )