diff --git a/benchmark/bench.py b/benchmark/bench.py
index 07a481c312..c922a21214 100644
--- a/benchmark/bench.py
+++ b/benchmark/bench.py
@@ -197,7 +197,7 @@ def prepare_configs(args, rank, current_time):
)
eval_taskset_config = config["buffer"]["explorer_input"]["eval_tasksets"]
if len(eval_taskset_config) > 0:
- # TODO: support seperately set path for eval taskset
+ # TODO: support separately set path for eval taskset
for eval_taskset_config in eval_taskset_config:
eval_taskset_config["path"] = taskset_config["path"]
if args.lr:
diff --git a/benchmark/config/guru_math-template.yaml b/benchmark/config/guru_math-template.yaml
index 7538171826..b0b32ff164 100644
--- a/benchmark/config/guru_math-template.yaml
+++ b/benchmark/config/guru_math-template.yaml
@@ -36,7 +36,7 @@ buffer:
format:
prompt_key: question
response_key: ground_truth
- system_prompt: "You are a helpful assistant. To answer a query from the user, please first thinks through the question step-by-step inside ..., then provides the final response to user."
+ system_prompt: "You are a helpful assistant. To answer a query from the user, please first think through the question step-by-step inside ..., then provides the final response to user."
reply_prefix: ""
rollout_args:
temperature: 1.0
diff --git a/examples/agentscope_frozenlake/agent.py b/examples/agentscope_frozenlake/agent.py
index f4788e67f9..f05689d6b2 100644
--- a/examples/agentscope_frozenlake/agent.py
+++ b/examples/agentscope_frozenlake/agent.py
@@ -41,7 +41,7 @@ def get_prompt(self, observation: str) -> str:
)
if self.current_step > 0 and self.last_action is not None:
if self.last_observation == observation:
- prompt += "\nYour last response is invalid. Your position didn't change at all. You may need to recheck your thinking process, action outputted, and the format of response. Remember, you should only output the NEXT ACTION at each interation in the ``` ```. For example, if you want to move up, you should output ```Up```."
+ prompt += "\nYour last response is invalid. Your position didn't change at all. You may need to recheck your thinking process, action outputted, and the format of response. Remember, you should only output the NEXT ACTION at each iteration in the ``` ```. For example, if you want to move up, you should output ```Up```."
if self.max_steps is not None and self.max_steps - self.current_step > 0:
prompt += (
diff --git a/examples/agentscope_frozenlake/utils.py b/examples/agentscope_frozenlake/utils.py
index 9cac5de81c..9e62751313 100644
--- a/examples/agentscope_frozenlake/utils.py
+++ b/examples/agentscope_frozenlake/utils.py
@@ -56,7 +56,7 @@
You will be provided the current observation, please decide on the next Action.
You should show your thought process and then input the final action in ``` ```.
-You should only output the NEXT ACTION at each interation in the ``` ```. For example, if you want to move up, you should output ```Up```.
+You should only output the NEXT ACTION at each iteration in the ``` ```. For example, if you want to move up, you should output ```Up```.
You should plan ahead and need to achieve it in minimum number of steps.
You should be aware that frozen tiles can be slippery, but the chance is small and you should not overthink it.
diff --git a/examples/grpo_alfworld/README.md b/examples/grpo_alfworld/README.md
index 16c37f7fe5..06c4d36582 100644
--- a/examples/grpo_alfworld/README.md
+++ b/examples/grpo_alfworld/README.md
@@ -16,7 +16,7 @@ The SFT data should be named as `/data.json`, followin
"messages": [
{
"role": "system", # fixed, align with the grpo workflow: alfworld_workflow.
- "content": "\nYou are an agent interacting with a virtual test-based environments.\n\n## Notes:\nAt each step, you should first think then perform action to fulfill the instruction. You should ALWAYS wrap your thinking with the tag and wrap your action with the tag.\nYou should ALWAYS take one action each step. \nYou should finish the task and buy the item within 15 steps.\nDONOT try to interact with the user at anytime. Finish the task and buy the item by yourself.\n\n## Action Format:\nBelow are the available commands you can use:\n look: look around your current location\n inventory: check your current inventory(you can only have 1 item in your inventory)\n go to (receptacle): move to a receptacle\n open (receptacle): open a receptacle\n close (receptacle): close a receptacle\n take (object) from (receptacle): take an object from a receptacle\n move (object) to (receptacle): place an object in or on a receptacle\n examine (something): examine a receptacle or an object\n use (object): use an object\n heat (object) with (receptacle): heat an object using a receptacle\n clean (object) with (receptacle): clean an object using a receptacle\n cool (object) with (receptacle): cool an object using a receptacle\n slice (object) with (object): slice an object using a sharp object\n\nFor example your output should be like this:\n To solve the task, I need first to ... go to cabinet 1\n"
+ "content": "\nYou are an agent interacting with a virtual text-based environment.\n\n## Notes:\nAt each step, you should first think then perform action to fulfill the instruction. You should ALWAYS wrap your thinking with the tag and wrap your action with the tag.\nYou should ALWAYS take one action each step. \nYou should finish the task and buy the item within 15 steps.\nDO NOT try to interact with the user at anytime. Finish the task and buy the item by yourself.\n\n## Action Format:\nBelow are the available commands you can use:\n look: look around your current location\n inventory: check your current inventory(you can only have 1 item in your inventory)\n go to (receptacle): move to a receptacle\n open (receptacle): open a receptacle\n close (receptacle): close a receptacle\n take (object) from (receptacle): take an object from a receptacle\n move (object) to (receptacle): place an object in or on a receptacle\n examine (something): examine a receptacle or an object\n use (object): use an object\n heat (object) with (receptacle): heat an object using a receptacle\n clean (object) with (receptacle): clean an object using a receptacle\n cool (object) with (receptacle): cool an object using a receptacle\n slice (object) with (object): slice an object using a sharp object\n\nFor example your output should be like this:\n To solve the task, I need first to ... go to cabinet 1\n"
},
{
"role": "user",
diff --git a/examples/mix_chord/get_openr1_data.py b/examples/mix_chord/get_openr1_data.py
index 80c2f41cfe..b2d75ec3f0 100644
--- a/examples/mix_chord/get_openr1_data.py
+++ b/examples/mix_chord/get_openr1_data.py
@@ -36,7 +36,7 @@
MAX_TOKEN_LENGTH = 8196
SFT_SAMPLE_SIZE = 5000
RL_SAMPLE_SIZE = 20000
-SYSTEM_PROMPT = """You are a helpful assistant that solves MATH problems. You should first thinks about the reasoning process in mind and then provides the user with the answer. You should present your reasoning process using the format: \n ...your reasoning process here... \n first. You should always include your final answer in \\boxed{} as closed-form results."""
+SYSTEM_PROMPT = """You are a helpful assistant that solves MATH problems. You should first think about the reasoning process in mind and then provides the user with the answer. You should present your reasoning process using the format: \n ...your reasoning process here... \n first. You should always include your final answer in \\boxed{} as closed-form results."""
def can_convert_to_int(answer):
diff --git a/tests/common/vllm_test.py b/tests/common/vllm_test.py
index 18731f361e..61fd03e675 100644
--- a/tests/common/vllm_test.py
+++ b/tests/common/vllm_test.py
@@ -470,7 +470,7 @@ async def test_api(self):
You will be provided the current observation, please decide on the next Action.
You should show your thought process and then input the final action in ``` ```.
-You should only output the NEXT ACTION at each interation in the ``` ```. For example, if you want to move up, you should output ```Up```.
+You should only output the NEXT ACTION at each iteration in the ``` ```. For example, if you want to move up, you should output ```Up```.
You should plan ahead and need to achieve it in minimum number of steps.
You should be aware that frozen tiles can be slippery, but the chance is small and you should not overthink it.
diff --git a/trinity/common/rewards/dapo_reward.py b/trinity/common/rewards/dapo_reward.py
index ff2783e223..7ea4dee516 100644
--- a/trinity/common/rewards/dapo_reward.py
+++ b/trinity/common/rewards/dapo_reward.py
@@ -53,11 +53,11 @@ def compute_overlong_penalty(self, response_token):
), "max_response_length must be greater than cache_length"
response_len = len(response_token)
- excepted_len = self.max_response_length - self.cache_length
+ expected_len = self.max_response_length - self.cache_length
- if response_len < excepted_len:
+ if response_len < expected_len:
return 0.0
elif response_len > self.max_response_length:
return -self.penalty_factor
else:
- return (excepted_len - response_len) / self.cache_length * self.penalty_factor
+ return (expected_len - response_len) / self.cache_length * self.penalty_factor
diff --git a/trinity/common/rewards/naive_dapo_score.py b/trinity/common/rewards/naive_dapo_score.py
index fd791c854d..ed15c89071 100644
--- a/trinity/common/rewards/naive_dapo_score.py
+++ b/trinity/common/rewards/naive_dapo_score.py
@@ -1,5 +1,5 @@
"""
-This file contaims the naive dapo reward function for math tasks.
+This file contains the naive dapo reward function for math tasks.
Adapted from https://github.com/LLM360/Reasoning360/blob/main/verl/utils/reward_score/naive_dapo.py
"""
diff --git a/trinity/common/workflows/envs/alfworld/alfworld_workflow.py b/trinity/common/workflows/envs/alfworld/alfworld_workflow.py
index 7388e156a1..64fe07a6ce 100644
--- a/trinity/common/workflows/envs/alfworld/alfworld_workflow.py
+++ b/trinity/common/workflows/envs/alfworld/alfworld_workflow.py
@@ -15,13 +15,13 @@
Your task is to: put some spraybottle on toilet.
Response:
-To solve the task, I need to find and take a sparybottle, then put it on toilet. First I need to find a spraybottle. A spraybottle is more likely to appear in cabinet (1-4), countertop (1), toilet (1), sinkbasin (1-2), garbagecan (1). I can check one by one, starting with cabinet 1.go to cabinet 1
+To solve the task, I need to find and take a spraybottle, then put it on toilet. First I need to find a spraybottle. A spraybottle is more likely to appear in cabinet (1-4), countertop (1), toilet (1), sinkbasin (1-2), garbagecan (1). I can check one by one, starting with cabinet 1.go to cabinet 1
Observation:
On the cabinet 1, you see a cloth 1, a soapbar 1, a soapbottle 1.
Response:
-The sparybottle is not here, I should go to cabinet2 to check one by onego to cabinet 2
+The spraybottle is not here, I should go to cabinet2 to check one by onego to cabinet 2
Observation:
The cabinet 2 is closed.
@@ -49,7 +49,7 @@
"""
AlfWORLD_SYSTEM_PROMPT = """
-You are an agent interacting with a virtual test-based environments.
+You are an agent interacting with a virtual text-based environment.
## Notes:
At each step, you should first think then perform action to fulfill the instruction. You should ALWAYS wrap your thinking with the tag and wrap your action with the tag.
diff --git a/trinity/common/workflows/envs/sciworld/sciworld_workflow.py b/trinity/common/workflows/envs/sciworld/sciworld_workflow.py
index f0862b6f38..c9d9cdc684 100644
--- a/trinity/common/workflows/envs/sciworld/sciworld_workflow.py
+++ b/trinity/common/workflows/envs/sciworld/sciworld_workflow.py
@@ -7,12 +7,12 @@
from trinity.common.workflows.workflow import MultiTurnWorkflow, Task
SCIWORLD_SYSTEM_PROMPT = """
-You are an agent, you job is to do some scientific experiment in a virtual test-based environments.
+You are an agent, your job is to do some scientific experiment in a virtual text-based environment.
## Notes:
At each step, you should first think then perform action to fulfill the instruction. You should ALWAYS wrap your thinking with the tag and wrap your action with the tag.
You should ALWAYS take one action each step.
-DONOT try to interact with the user at anytime. Finish the task by yourself.
+DO NOT try to interact with the user at anytime. Finish the task by yourself.
## Action Format:
Below are the available commands you can use:
diff --git a/trinity/common/workflows/envs/webshop/webshop_workflow.py b/trinity/common/workflows/envs/webshop/webshop_workflow.py
index 48a0f464f7..7514965eba 100644
--- a/trinity/common/workflows/envs/webshop/webshop_workflow.py
+++ b/trinity/common/workflows/envs/webshop/webshop_workflow.py
@@ -71,7 +71,7 @@
WebShop_SYSTEM_PROMPT_WITH_EXAMPLE = f"""
-You are an agent interacting with a virtual text-based web shopping environment to test out your ability. Your job is to find follow the Instruction provided and mimic the steps to buy the item that are closest to the Instruct provided.
+You are an agent interacting with a virtual text-based web shopping environment to test out your ability. Your job is to follow the Instruction provided and mimic the steps to buy the item that are closest to the Instruct provided.
## Action Format:
You should give both the action_name and action_arg like the format `action_name[action_arg]`. You can execute two types of actions, search and click.
@@ -92,11 +92,11 @@
At each step, you should first think then perform action to fulfill the instruction. You should ALWAYS wrap your thinking with the tag and wrap your action with the tag.
You should ALWAYS take one action each step.
You should finish the task and buy the item within 15 steps.
-DONOT try to interact with the user at anytime. Finish the task and buy the item by yourself.
+DO NOT try to interact with the user at anytime. Finish the task and buy the item by yourself.
"""
WebShop_SYSTEM_PROMPT = """
-You are an agent interacting with a virtual text-baed web shopping environments to testout your ability. Your job is to find follow the Instruction provided and mimic the steps to buy the item that are closest to the Instruct provided.
+You are an agent interacting with a virtual text-based web shopping environment to test out your ability. Your job is to follow the Instruction provided and mimic the steps to buy the item that are closest to the Instruction provided.
## Action Format:
You should give both the action_name and action_arg like the format `action_name[action_arg]`. You can execute two types of actions, search and click.
@@ -111,7 +111,7 @@
At each step, you should first think then perform action to fulfill the instruction. You should ALWAYS wrap your thinking with the tag and wrap your action with the tag.
You should ALWAYS take one action each step.
You should finish the task and buy the item within 15 steps.
-DONOT try to interact with the user at anytime. Finish the task and buy the item by yourself.
+DO NOT try to interact with the user at anytime. Finish the task and buy the item by yourself.
"""
@@ -163,7 +163,7 @@ def validate_action(action, available_actions):
else:
return (
False,
- "Can not perfrom search action without search bar. Please click the Back to Search button first.",
+ "Can not perform search action without search bar. Please click the Back to Search button first.",
)
elif action_name == "click":
if action_arg not in available_actions["clickables"]:
diff --git a/trinity/common/workflows/math_ruler_workflow.py b/trinity/common/workflows/math_ruler_workflow.py
index 11a7a3a8c6..e04945d2ca 100644
--- a/trinity/common/workflows/math_ruler_workflow.py
+++ b/trinity/common/workflows/math_ruler_workflow.py
@@ -96,7 +96,7 @@ def get_ruler_scores(
question_prompt = (
f"Question: {self.task_desc}\n\n"
- f"""Solution format requirement: first thinks about the reasoning process in the mind and then provides the final answer. The reasoning process and answer are enclosed within and tags, respectively, i.e.,
+ f"""Solution format requirement: first think about the reasoning process in the mind and then provides the final answer. The reasoning process and answer are enclosed within and tags, respectively, i.e.,
reasoning process here
answer here ."""
)
diff --git a/trinity/common/workflows/math_trainable_ruler_workflow.py b/trinity/common/workflows/math_trainable_ruler_workflow.py
index 5c01ec499b..279cf76c87 100644
--- a/trinity/common/workflows/math_trainable_ruler_workflow.py
+++ b/trinity/common/workflows/math_trainable_ruler_workflow.py
@@ -144,7 +144,7 @@ def get_ruler_responses(
question_prompt = (
f"Question: {self.task_desc}\n\n"
- f"""Solution format requirement: first thinks about the reasoning process in the mind and then provides the final answer. The reasoning process and answer are enclosed within and tags, respectively, i.e.,
+ f"""Solution format requirement: first think about the reasoning process in the mind and then provides the final answer. The reasoning process and answer are enclosed within and tags, respectively, i.e.,
reasoning process here
answer here ."""
)