diff --git a/components/src/dynamo/sglang/request_handlers/llm/decode_handler.py b/components/src/dynamo/sglang/request_handlers/llm/decode_handler.py index 2e5777c5ff..7b8fe1302f 100644 --- a/components/src/dynamo/sglang/request_handlers/llm/decode_handler.py +++ b/components/src/dynamo/sglang/request_handlers/llm/decode_handler.py @@ -164,26 +164,24 @@ async def _process_token_stream( Yields: Dict with token_ids and optional finish_reason. - - Raises: - ValueError: If response missing output_ids. """ num_output_tokens_so_far = 0 async for res in stream_source: + out = {} finish_reason = res["meta_info"]["finish_reason"] if finish_reason: - out = {"token_ids": [], "finish_reason": finish_reason["type"]} - else: - try: - next_total_toks = len(res["output_ids"]) - except KeyError: - raise ValueError( - f"Missing 'output_ids' in response. Response keys: {list(res.keys())}" - ) - out = {"token_ids": res["output_ids"][num_output_tokens_so_far:]} - num_output_tokens_so_far = next_total_toks + out["finish_reason"] = finish_reason["type"] + + output_ids = res.get("output_ids", []) + # If request is not finished yet, but there are no outputs, return an error. + if not output_ids and not finish_reason: + yield {"finish_reason": "error", "token_ids": []} + break + next_total_toks = len(output_ids) + out["token_ids"] = output_ids[num_output_tokens_so_far:] + num_output_tokens_so_far = next_total_toks yield out async def _process_text_stream(