diff --git a/fastchat/serve/vllm_worker.py b/fastchat/serve/vllm_worker.py index 8e255b79c..1a57dc660 100644 --- a/fastchat/serve/vllm_worker.py +++ b/fastchat/serve/vllm_worker.py @@ -110,7 +110,21 @@ async def generate_stream(self, params): text_outputs = [output.text for output in request_output.outputs] text_outputs = " ".join(text_outputs) # Note: usage is not supported yet - ret = {"text": text_outputs, "error_code": 0, "usage": {}} + ret = { + "text": text_outputs, + "error_code": 0, + "usage": {}, + "cumulative_logprob": [ + output.cumulative_logprob for output in request_output.outputs + ], + "prompt_token_len": len(request_output.prompt_token_ids), + "output_token_len": [ + len(output.token_ids) for output in request_output.outputs + ], + "finish_reason": [ + output.finish_reason for output in request_output.outputs + ], + } yield (json.dumps(ret) + "\0").encode() async def generate(self, params):