diff --git a/examples/data_preprocess/preprocess_search_r1_dataset.py b/examples/data_preprocess/preprocess_search_r1_dataset.py
index a0c10d59b9c..a8d67cac630 100644
--- a/examples/data_preprocess/preprocess_search_r1_dataset.py
+++ b/examples/data_preprocess/preprocess_search_r1_dataset.py
@@ -30,10 +30,22 @@
# Configuration constants
DEFAULT_SYSTEM_CONTENT = "You are a helpful and harmless assistant."
+# DEFAULT_USER_CONTENT_PREFIX = (
+# "Answer the given question. You must conduct reasoning inside and "
+# "first every time you get new information. After reasoning, if you find you lack "
+# "some knowledge, you can call a search engine by query "
+# "and it will return the top searched results between and "
+# ". You can search as many times as your want. If you find no "
+# "further external knowledge needed, you can directly provide the answer inside "
+# " and , without detailed illustrations. For example, "
+# " Beijing . Question: "
+# )
+#To adapt to the tool call format of Verl's AgentLoop, modify the prompt as follows:
DEFAULT_USER_CONTENT_PREFIX = (
- "Answer the given question. You must conduct reasoning inside and "
- "first every time you get new information. After reasoning, if you find you lack "
- "some knowledge, you can call a search engine by query "
+ "Answer the given question. You must conduct step by step reasoning "
+ "first every time you get new information. If you need external information, "
+ "call the search tool by returning a JSON object inside tags. "
+ "For example: {\"name\": \"search\", \"arguments\": {\"query\": \"your query\"}}. "
"and it will return the top searched results between and "
". You can search as many times as your want. If you find no "
"further external knowledge needed, you can directly provide the answer inside "
@@ -176,3 +188,4 @@ def apply_process_row(row, split_name=split):
user_content_prefix = DEFAULT_USER_CONTENT_PREFIX
main()
+
diff --git a/examples/sglang_multiturn/config/search_multiturn_grpo.yaml b/examples/sglang_multiturn/config/search_multiturn_grpo.yaml
index 0e24f62b788..b6c9e831ffc 100644
--- a/examples/sglang_multiturn/config/search_multiturn_grpo.yaml
+++ b/examples/sglang_multiturn/config/search_multiturn_grpo.yaml
@@ -19,5 +19,5 @@ actor_rollout_ref:
name: sglang
multi_turn:
enable: True
- max_assistant_turns: 2
- format: qwen
+ max_assistant_turns: 4
+ format: hermes
diff --git a/examples/sglang_multiturn/search_r1_like/run_qwen2.5-3b_instruct_search_multiturn.sh b/examples/sglang_multiturn/search_r1_like/run_qwen2.5-3b_instruct_search_multiturn.sh
index 4415e47a953..6812fdf7c57 100644
--- a/examples/sglang_multiturn/search_r1_like/run_qwen2.5-3b_instruct_search_multiturn.sh
+++ b/examples/sglang_multiturn/search_r1_like/run_qwen2.5-3b_instruct_search_multiturn.sh
@@ -46,7 +46,11 @@ python3 -m verl.trainer.main_ppo \
actor_rollout_ref.rollout.name=sglang \
actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
actor_rollout_ref.rollout.n=5 \
- actor_rollout_ref.rollout.multi_turn.max_assistant_turns=2 \
+ actor_rollout_ref.rollout.mode=async \
+ actor_rollout_ref.rollout.agent.default_agent_loop=tool_agent \
+ actor_rollout_ref.rollout.multi_turn.max_tool_response_length=1024 \
+ actor_rollout_ref.rollout.multi_turn.max_assistant_turns=4 \
+ actor_rollout_ref.rollout.multi_turn.max_user_turns=4 \
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=8 \
actor_rollout_ref.ref.fsdp_config.param_offload=True \
algorithm.use_kl_in_reward=False \