[misc] fix: fix list conversion in get_tensordict (#4304)

PeterSH6 · gemini-code-assist[bot] · web-flow · commit 77ef1db32eb7 · 2025-11-26T21:37:37.000+08:00
--- ## What does this PR do? This PR fixes a `ValueError` that occurs when converting `DataProto` containing nested Python structures (lists of lists, lists of dicts, etc.) to `TensorDict`. The issue manifested during distributed training when `non_tensor_batch` fields like `turn_scores`, `reward_extra_info`, `raw_prompt`, and `tool_rewards` contained nested structures that `TensorDict` couldn't handle directly. **Root Cause:** `TensorDict` cannot accept raw nested Python objects like `[[], [0.5, 0.8]]` or `[{"acc": 1.0}, {"acc": 0.0}]`. These must be wrapped using `NonTensorData` and organized into `NonTensorStack` for proper handling. **Solution:** - Explicitly wrap each element in nested lists with `NonTensorData` before creating `NonTensorStack` - Added helper functions `assign_non_tensor_stack()` and `assign_non_tensor()` in `tensordict_utils.py` - Updated `DataProto.to_tensordict()` and `DataProto.from_tensordict()` for proper round-trip conversion - Added automatic nested structure detection in `get_tensordict()` Previous PR: [4296 ](#4296) --- ## Test ### Unit Tests Added **`tests/test_protocol_v2_on_cpu.py`** (8 new tests): - `test_assign_non_tensor_stack_with_nested_lists` - Lists of lists - `test_assign_non_tensor_stack_with_nested_dicts` - Lists of dicts - `test_assign_non_tensor_stack_with_complex_nested` - Lists of lists of dicts - `test_assign_non_tensor_with_auto_detection` - Auto type detection - `test_get_tensordict_with_nested_lists` - Integration with get_tensordict - `test_get_tensordict_with_nested_dicts` - Integration with get_tensordict - `test_get_tensordict_with_complex_nested_structures` - Complex nested case - `test_get_tensordict_agent_loop_scenario` - Real-world agent loop scenario ### How to Run Tests ```bash # Test tensordict_utils nested structure support pytest third_party/open_verl/tests/test_protocol_v2_on_cpu.py -v ``` ### Validation ✅ All new tests pass ✅ Existing tests remain passing ✅ Successfully handles empty lists in nested structures (e.g., `turn_scores = [[], [0.5, 0.8]]`) ✅ Round-trip conversion (DataProto → TensorDict → DataProto) preserves data integrity --- ### Checklist Before Submitting > [!IMPORTANT] > Please check all the following items before requesting a review, otherwise the reviewer might deprioritize this PR for review. - [ ] Read the [Contribute Guide](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md). - [ ] Apply [pre-commit checks](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md#code-linting-and-formatting): `pre-commit install && pre-commit run --all-files --show-diff-on-failure --color=always` - [ ] Add / Update [the documentation](https://github.com/volcengine/verl/tree/main/docs). - [ ] Add unit or end-to-end test(s) to [the CI workflow](https://github.com/volcengine/verl/tree/main/.github/workflows) to cover all the code. If not feasible, explain why: ... - [ ] Once your PR is ready for CI, send a message in [the `ci-request` channel](https://verl-project.slack.com/archives/C091TCESWB1) in [the `verl` Slack workspace](https://join.slack.com/t/verl-project/shared_invite/zt-3855yhg8g-CTkqXu~hKojPCmo7k_yXTQ). (If not accessible, please try [the Feishu group (飞书群)](https://applink.larkoffice.com/client/chat/chatter/add_by_link?link_token=772jd4f1-cd91-441e-a820-498c6614126a).) --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
diff --git a/tests/test_protocol_v2_on_cpu.py b/tests/test_protocol_v2_on_cpu.py
@@ -22,6 +22,7 @@
 import numpy as np
 import pytest
 import torch
+from tensordict.tensorclass import NonTensorData, NonTensorStack
 
 from verl.utils import tensordict_utils as tu
 
@@ -45,10 +46,10 @@ def test_union_tensor_dict():
         # conflict in tensor values
         tu.union_tensor_dict(data1, data_with_copied_obs)
 
-    data1 = tu.assign_non_tensor_dict(data1, meta_info1)
+    data1 = tu.assign_non_tensor(data1, **meta_info1)
     tu.union_tensor_dict(data1, data2)  # works ok
 
-    data2 = tu.assign_non_tensor_dict(data2, meta_info2)
+    data2 = tu.assign_non_tensor(data2, **meta_info2)
 
     with pytest.raises(AssertionError):
         # conflict in NonTensorData
@@ -651,3 +652,203 @@ def test_concat_tensordict():
     # make sure tensordict1 and tensordict2 is untouched
     tu.assert_tensordict_eq(tensordict1, tensordict1_copy)
     tu.assert_tensordict_eq(tensordict2, tensordict2_copy)
+
+
+def test_assign_non_tensor_stack_with_nested_lists():
+    """Test assign_non_tensor_stack with lists of lists."""
+    td = tu.get_tensordict({"obs": torch.randn(3, 4)}, non_tensor_dict={})
+
+    # Lists of varying lengths (like turn_scores or tool_rewards)
+    turn_scores = [[], [0.5, 0.8], [0.9]]
+    tu.assign_non_tensor_stack(td, "turn_scores", turn_scores)
+
+    # Verify data is accessible
+    assert len(td["turn_scores"]) == 3
+    assert list(td["turn_scores"][0]) == []
+    assert list(td["turn_scores"][1]) == [0.5, 0.8]
+    assert list(td["turn_scores"][2]) == [0.9]
+
+
+def test_assign_non_tensor_stack_with_nested_dicts():
+    """Test assign_non_tensor_stack with lists of dicts."""
+    td = tu.get_tensordict({"obs": torch.randn(3, 4)}, non_tensor_dict={})
+
+    # Lists of dicts (like reward_extra_info)
+    reward_extra_info = [{"acc": 1.0, "loss": 0.1}, {"acc": 0.0, "loss": 0.9}, {"acc": 1.0, "loss": 0.05}]
+    tu.assign_non_tensor_stack(td, "reward_extra_info", reward_extra_info)
+
+    # Verify data is accessible
+    assert len(td["reward_extra_info"]) == 3
+    assert dict(td["reward_extra_info"][0]) == {"acc": 1.0, "loss": 0.1}
+    assert dict(td["reward_extra_info"][1]) == {"acc": 0.0, "loss": 0.9}
+    assert dict(td["reward_extra_info"][2]) == {"acc": 1.0, "loss": 0.05}
+
+
+def test_assign_non_tensor_stack_with_complex_nested():
+    """Test assign_non_tensor_stack with lists of lists of dicts."""
+    td = tu.get_tensordict({"obs": torch.randn(2, 4)}, non_tensor_dict={})
+
+    # Lists of lists of dicts (like raw_prompt)
+    raw_prompt = [
+        [{"content": "Question 1", "role": "user"}],
+        [{"content": "Question 2", "role": "user"}, {"content": "Answer 2", "role": "assistant"}],
+    ]
+    tu.assign_non_tensor_stack(td, "raw_prompt", raw_prompt)
+
+    # Verify data is accessible
+    assert len(td["raw_prompt"]) == 2
+    assert len(td["raw_prompt"][0]) == 1
+    assert dict(td["raw_prompt"][0][0]) == {"content": "Question 1", "role": "user"}
+    assert len(td["raw_prompt"][1]) == 2
+    assert dict(td["raw_prompt"][1][0]) == {"content": "Question 2", "role": "user"}
+
+
+def test_assign_non_tensor_handles_wrappers():
+    td = tu.get_tensordict({"obs": torch.randn(3, 4)}, non_tensor_dict={})
+
+    meta = {"top_p": 0.8}
+    tu.assign_non_tensor(td, **meta)
+    assert td["top_p"] == 0.8
+
+    wrapped = NonTensorData(0.3)
+    stack = NonTensorStack.from_list([NonTensorData(1.0), NonTensorData(2.0), NonTensorData(3.0)])
+    tu.assign_non_tensor(td, wrapped=wrapped, stack=stack)
+
+    assert td["wrapped"] == 0.3
+    assert td["stack"] == [1.0, 2.0, 3.0]
+
+
+def test_assign_non_tensor_stack_batch_size_check():
+    td = tu.get_tensordict({"obs": torch.randn(3, 4)}, non_tensor_dict={})
+    stack = NonTensorStack.from_list([NonTensorData(1.0), NonTensorData(2.0)])
+
+    with pytest.raises(RuntimeError):
+        tu.assign_non_tensor(td, stack=stack)
+
+
+def test_assign_non_tensor_with_auto_detection():
+    """Test assign_non_tensor automatically detects and handles nested structures."""
+    td = tu.get_tensordict({"obs": torch.randn(3, 4)}, non_tensor_dict={})
+
+    # Mix of simple and nested data
+    tu.assign_non_tensor(
+        td,
+        metadata="experiment_1",  # Simple value
+        turn_scores=[[], [0.5, 0.8], [0.9]],  # Nested list
+        reward_extra_info=[{"acc": 1.0}, {"acc": 0.0}, {"acc": 1.0}],  # List of dicts
+        simple_list=["a", "b", "c"],  # Simple list (also uses NonTensorStack for consistency)
+    )
+
+    # Verify all data is accessible
+    assert td["metadata"] == "experiment_1"
+    assert len(td["turn_scores"]) == 3
+    assert list(td["turn_scores"][1]) == [0.5, 0.8]
+    assert len(td["reward_extra_info"]) == 3
+    assert dict(td["reward_extra_info"][0]) == {"acc": 1.0}
+    assert len(td["simple_list"]) == 3
+    assert td["simple_list"][0] == "a"
+
+
+def test_get_tensordict_with_nested_lists():
+    """Test get_tensordict automatically handles nested lists."""
+    obs = torch.randn(3, 4)
+    turn_scores = [[], [0.5, 0.8], [0.9]]
+
+    # This should automatically convert turn_scores to NonTensorStack
+    td = tu.get_tensordict({"obs": obs, "turn_scores": turn_scores})
+
+    # Verify tensors and nested data are both accessible
+    assert torch.all(torch.eq(td["obs"], obs))
+    assert len(td["turn_scores"]) == 3
+    assert list(td["turn_scores"][0]) == []
+    assert list(td["turn_scores"][1]) == [0.5, 0.8]
+
+
+def test_get_tensordict_with_nested_dicts():
+    """Test get_tensordict automatically handles lists of dicts."""
+    obs = torch.randn(3, 4)
+    reward_extra_info = [{"acc": 1.0}, {"acc": 0.0}, {"acc": 1.0}]
+
+    td = tu.get_tensordict({"obs": obs, "reward_extra_info": reward_extra_info})
+
+    assert torch.all(torch.eq(td["obs"], obs))
+    assert len(td["reward_extra_info"]) == 3
+    assert dict(td["reward_extra_info"][0]) == {"acc": 1.0}
+
+
+def test_get_tensordict_with_complex_nested_structures():
+    """Test get_tensordict with lists of lists of dicts."""
+    obs = torch.randn(2, 4)
+    raw_prompt = [
+        [{"content": "Q1", "role": "user"}],
+        [{"content": "Q2", "role": "user"}, {"content": "A2", "role": "assistant"}],
+    ]
+
+    td = tu.get_tensordict({"obs": obs, "raw_prompt": raw_prompt})
+
+    assert torch.all(torch.eq(td["obs"], obs))
+    assert len(td["raw_prompt"]) == 2
+    assert dict(td["raw_prompt"][0][0]) == {"content": "Q1", "role": "user"}
+
+
+def test_get_tensordict_agent_loop_scenario():
+    """Test the complete agent loop scenario with all nested types.
+
+    This simulates the exact use case from agent loops with:
+    - turn_scores: lists of lists
+    - reward_extra_info: lists of dicts
+    - raw_prompt: lists of lists of dicts
+    - tool_rewards: lists of lists
+    """
+    prompts = torch.randn(2, 10)
+    responses = torch.randn(2, 5)
+
+    # Nested structures from agent loop
+    data_source = ["lighteval/MATH", "lighteval/MATH"]
+    uid = ["uuid-1", "uuid-2"]
+    turn_scores = [[], [0.5, 0.8]]  # Lists of varying lengths
+    reward_extra_info = [{"acc": 1.0, "loss": 0.1}, {"acc": 0.0, "loss": 0.9}]
+    raw_prompt = [
+        [{"content": "Compute 4 @ 2", "role": "user"}],
+        [{"content": "Compute 8 @ 7", "role": "user"}],
+    ]
+    tool_rewards = [[0.0], []]  # List of lists
+
+    # This should handle all nested structures automatically
+    td = tu.get_tensordict(
+        tensor_dict={
+            "prompts": prompts,
+            "responses": responses,
+            "data_source": data_source,
+            "uid": uid,
+            "turn_scores": turn_scores,
+            "reward_extra_info": reward_extra_info,
+            "raw_prompt": raw_prompt,
+            "tool_rewards": tool_rewards,
+        },
+        non_tensor_dict={"global_steps": 42},
+    )
+
+    # Verify all data types are accessible
+    assert torch.all(torch.eq(td["prompts"], prompts))
+    assert torch.all(torch.eq(td["responses"], responses))
+    assert td["data_source"] == data_source
+    assert td["uid"] == uid
+
+    # Verify nested structures
+    assert len(td["turn_scores"]) == 2
+    assert list(td["turn_scores"][0]) == []
+    assert list(td["turn_scores"][1]) == [0.5, 0.8]
+
+    assert len(td["reward_extra_info"]) == 2
+    assert dict(td["reward_extra_info"][0]) == {"acc": 1.0, "loss": 0.1}
+
+    assert len(td["raw_prompt"]) == 2
+    assert dict(td["raw_prompt"][0][0]) == {"content": "Compute 4 @ 2", "role": "user"}
+
+    assert len(td["tool_rewards"]) == 2
+    assert list(td["tool_rewards"][0]) == [0.0]
+    assert list(td["tool_rewards"][1]) == []
+
+    # Verify metadata
+    assert td["global_steps"] == 42
diff --git a/verl/utils/tensordict_utils.py b/verl/utils/tensordict_utils.py
@@ -20,20 +20,71 @@
 from tensordict.tensorclass import NonTensorData, NonTensorStack
 
 
-def assign_non_tensor_dict(tensor_dict: TensorDict, non_tensor_dict: dict):
-    for key, val in non_tensor_dict.items():
-        assign_non_tensor_data(tensor_dict=tensor_dict, key=key, val=val)
-    return tensor_dict
-
-
 def assign_non_tensor_data(tensor_dict: TensorDict, key, val):
+    assert isinstance(tensor_dict, TensorDict), "input dict must be a TensorDict"
     tensor_dict[key] = NonTensorData(val)
 
 
-def assign_non_tensor(tensordict: TensorDict, **kwargs):
+def assign_non_tensor_stack(tensor_dict: TensorDict, key, val: list):
+    """Assign a list with potentially nested structures (lists, dicts, etc.) to TensorDict.
+
+    This function handles complex nested data structures like:
+    - Lists of lists: [[], [0.5, 0.8], [0.9]]
+    - Lists of dicts: [{"acc": 1.0}, {"acc": 0.0}]
+    - Lists of lists of dicts: [[{"content": "...", "role": "user"}]]
+
+    These structures are wrapped in NonTensorStack so TensorDict can handle them correctly.
+
+    Args:
+        tensor_dict: The TensorDict to assign to
+        key: The key to assign the value under
+        val: A list containing potentially nested structures
+
+    Example:
+        >>> td = TensorDict({}, batch_size=[])
+        >>> turn_scores = [[], [0.5, 0.8], [0.9]]
+        >>> assign_non_tensor_stack(td, "turn_scores", turn_scores)
+        >>> # Now td["turn_scores"] contains the nested data
+    """
+    # Convert list to NonTensorStack to handle nested structures
+    # This wraps each item in NonTensorData to preserve complex objects
+    # TODO(petersh6): can convert back to val directly if we are not accessing .data from the NonTensorStack
+    assert isinstance(tensor_dict, TensorDict), "input dict must be a TensorDict"
+    tensor_dict[key] = NonTensorStack.from_list([NonTensorData(item) for item in val])
+
+
+def assign_non_tensor(tensor_dict: TensorDict, **kwargs):
+    """Assign non-tensor data to a TensorDict.
+
+    Automatically detects if the value is a list with nested structures and uses
+    the appropriate assignment method (NonTensorData for simple values,
+    NonTensorStack for lists with nested structures).
+
+    Args:
+        tensor_dict: The TensorDict to assign to
+        **kwargs: Key-value pairs where values can be:
+            - Simple values (stored as NonTensorData)
+            - Lists with nested structures (stored as NonTensorStack)
+
+    Example:
+        >>> td = TensorDict({"obs": torch.randn(3, 4)}, batch_size=[3])
+        >>> assign_non_tensor(
+        ...     tensor_dict=td,
+        ...     metadata="experiment_1",  # Simple value
+        ...     turn_scores=[[], [0.5, 0.8], [0.9]]  # Nested list
+        ... )
+    """
+    assert isinstance(tensor_dict, TensorDict), "input dict must be a TensorDict"
     for key, val in kwargs.items():
-        assign_non_tensor_data(tensor_dict=tensordict, key=key, val=val)
-    return tensordict
+        if isinstance(val, (NonTensorData | NonTensorStack)):
+            tensor_dict[key] = val
+        elif isinstance(val, list):
+            # For lists, use NonTensorStack
+            assign_non_tensor_stack(tensor_dict=tensor_dict, key=key, val=val)
+        else:
+            # For non-list values, use NonTensorData
+            assign_non_tensor_data(tensor_dict=tensor_dict, key=key, val=val)
+    return tensor_dict
 
 
 def unwrap_non_tensor_data(data):
@@ -92,15 +143,31 @@ def concat_tensordict(data: list[TensorDict]) -> TensorDict:
 
 
 def get_tensordict(tensor_dict: dict[str, torch.Tensor | list], non_tensor_dict: dict = None) -> TensorDict:
-    """
+    """Create a TensorDict from tensors and non-tensor data.
+
+    Automatically handles nested structures in lists by converting them to NonTensorStack.
+    This enables support for:
+    - Lists of lists: [[], [0.5, 0.8], [0.9]]
+    - Lists of dicts: [{"acc": 1.0}, {"acc": 0.0}]
+    - Lists of lists of dicts: [[{"content": "...", "role": "user"}]]
 
     Args:
-        data_dict:
-        meta_info:
+        tensor_dict: Dictionary of tensors and lists to include in the TensorDict
+        non_tensor_dict: Dictionary of metadata to store as NonTensorData
 
     Returns:
-
+        TensorDict with proper handling of nested structures
+
+    Example:
+        >>> td = get_tensordict(
+        ...     tensor_dict={
+        ...         "obs": torch.randn(3, 4),
+        ...         "turn_scores": [[], [0.5, 0.8], [0.9]]  # Nested list
+        ...     },
+        ...     non_tensor_dict={"experiment": "test"}
+        ... )
     """
+    tensor_dict = tensor_dict.copy()
     if non_tensor_dict is None:
         non_tensor_dict = {}
 
@@ -127,6 +194,9 @@ def get_tensordict(tensor_dict: dict[str, torch.Tensor | list], non_tensor_dict:
                     "Passing a list makes the data NonTensorStack, "
                     "which doesn't support torch.Tensor. Please convert to numpy first"
                 )
+            # Convert to NonTensorStack to handle nested structures
+            tensor_dict[key] = NonTensorStack.from_list([NonTensorData(item) for item in val])
+
         assert isinstance(val, torch.Tensor | list)
 
         if batch_size is None: