diff --git a/examples/data_preprocess/preprocess_search_r1_dataset.py b/examples/data_preprocess/preprocess_search_r1_dataset.py index a0c10d59b9c..a8d67cac630 100644 --- a/examples/data_preprocess/preprocess_search_r1_dataset.py +++ b/examples/data_preprocess/preprocess_search_r1_dataset.py @@ -30,10 +30,22 @@ # Configuration constants DEFAULT_SYSTEM_CONTENT = "You are a helpful and harmless assistant." +# DEFAULT_USER_CONTENT_PREFIX = ( +# "Answer the given question. You must conduct reasoning inside and " +# "first every time you get new information. After reasoning, if you find you lack " +# "some knowledge, you can call a search engine by query " +# "and it will return the top searched results between and " +# ". You can search as many times as your want. If you find no " +# "further external knowledge needed, you can directly provide the answer inside " +# " and , without detailed illustrations. For example, " +# " Beijing . Question: " +# ) +#To adapt to the tool call format of Verl's AgentLoop, modify the prompt as follows: DEFAULT_USER_CONTENT_PREFIX = ( - "Answer the given question. You must conduct reasoning inside and " - "first every time you get new information. After reasoning, if you find you lack " - "some knowledge, you can call a search engine by query " + "Answer the given question. You must conduct step by step reasoning " + "first every time you get new information. If you need external information, " + "call the search tool by returning a JSON object inside tags. " + "For example: {\"name\": \"search\", \"arguments\": {\"query\": \"your query\"}}. " "and it will return the top searched results between and " ". You can search as many times as your want. If you find no " "further external knowledge needed, you can directly provide the answer inside " @@ -176,3 +188,4 @@ def apply_process_row(row, split_name=split): user_content_prefix = DEFAULT_USER_CONTENT_PREFIX main() + diff --git a/examples/sglang_multiturn/config/search_multiturn_grpo.yaml b/examples/sglang_multiturn/config/search_multiturn_grpo.yaml index 0e24f62b788..b6c9e831ffc 100644 --- a/examples/sglang_multiturn/config/search_multiturn_grpo.yaml +++ b/examples/sglang_multiturn/config/search_multiturn_grpo.yaml @@ -19,5 +19,5 @@ actor_rollout_ref: name: sglang multi_turn: enable: True - max_assistant_turns: 2 - format: qwen + max_assistant_turns: 4 + format: hermes diff --git a/examples/sglang_multiturn/search_r1_like/run_qwen2.5-3b_instruct_search_multiturn.sh b/examples/sglang_multiturn/search_r1_like/run_qwen2.5-3b_instruct_search_multiturn.sh index 4415e47a953..6812fdf7c57 100644 --- a/examples/sglang_multiturn/search_r1_like/run_qwen2.5-3b_instruct_search_multiturn.sh +++ b/examples/sglang_multiturn/search_r1_like/run_qwen2.5-3b_instruct_search_multiturn.sh @@ -46,7 +46,11 @@ python3 -m verl.trainer.main_ppo \ actor_rollout_ref.rollout.name=sglang \ actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \ actor_rollout_ref.rollout.n=5 \ - actor_rollout_ref.rollout.multi_turn.max_assistant_turns=2 \ + actor_rollout_ref.rollout.mode=async \ + actor_rollout_ref.rollout.agent.default_agent_loop=tool_agent \ + actor_rollout_ref.rollout.multi_turn.max_tool_response_length=1024 \ + actor_rollout_ref.rollout.multi_turn.max_assistant_turns=4 \ + actor_rollout_ref.rollout.multi_turn.max_user_turns=4 \ actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=8 \ actor_rollout_ref.ref.fsdp_config.param_offload=True \ algorithm.use_kl_in_reward=False \