Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
4aba7a0
Update GPT OSS parser and related components
zhongdaor-nv Sep 10, 2025
de1a915
tmp
zhongdaor-nv Sep 10, 2025
da62e0c
feat(parsers): enhance harmony tool calling parser and add debug stat…
zhongdaor-nv Sep 10, 2025
471f1cb
tmp
zhongdaor-nv Sep 10, 2025
63d639a
remove code for debugging
zhongdaor-nv Sep 10, 2025
244d31c
resolve coderabit comment
zhongdaor-nv Sep 10, 2025
f50ed91
cargo fmt
zhongdaor-nv Sep 10, 2025
855fb29
coderabbit
zhongdaor-nv Sep 10, 2025
97f8f65
fix unit test
zhongdaor-nv Sep 10, 2025
bd56609
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 10, 2025
33e7286
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 11, 2025
c9485d7
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 15, 2025
9a8ce48
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 15, 2025
fbc5155
resolve comment
zhongdaor-nv Sep 15, 2025
031c965
Resolve merge conflicts in preprocessor.rs
zhongdaor-nv Sep 15, 2025
40c1d03
make ci/cd happy
zhongdaor-nv Sep 16, 2025
80b2ac8
Merge remote-tracking branch 'origin/main' into zhongdaor/gpt-oss-fro…
zhongdaor-nv Sep 18, 2025
9392353
merge to main
zhongdaor-nv Sep 18, 2025
489019a
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 18, 2025
a733a8b
cargo fmt
zhongdaor-nv Sep 18, 2025
cdb9e7f
cargo test
zhongdaor-nv Sep 18, 2025
c0e22d7
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 18, 2025
8c5d62b
add test for test_parse_tool_calls_harmony_complete_basic
zhongdaor-nv Sep 18, 2025
67995d7
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 18, 2025
eb7855f
add more comment
zhongdaor-nv Sep 18, 2025
ca70608
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 18, 2025
e579b3a
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 18, 2025
fdc9f0d
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat(parsers): enhance harmony tool calling parser and add debug stat…
…e output

- Add state JSON output to gpt_oss_parser for debugging purposes
- Refactor harmony parser to use StreamableParser for better token processing
- Add parse_tool_calls_harmony_chunk function for chunked parsing
- Update module exports to include new harmony chunk parser
- Improve error handling and token processing in harmony parser
  • Loading branch information
zhongdaor-nv committed Sep 10, 2025
commit da62e0ca58c5f29fb178d766f9c34e615b42f803
20 changes: 20 additions & 0 deletions lib/parsers/src/reasoning/gpt_oss_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ impl ReasoningParser for GptOssReasoningParser {
return ParserResult::default();
}
}

// Debug: append tokens and text to two files
// Write tokens to "tokens.txt" and text to "text.txt"
use std::io::Write;
Expand Down Expand Up @@ -205,6 +206,25 @@ impl ReasoningParser for GptOssReasoningParser {
{
tracing::warn!("Failed to write text to file: {}", e);
}
// write the state json to file
if let Err(e) = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open("state.json")
.and_then(|mut file| match self.parser.state_json() {
Ok(state_json) => writeln!(file, "{}", state_json),
Err(json_err) => {
tracing::warn!("Failed to serialize parser state: {}", json_err);
writeln!(
file,
"{{\"error\": \"Failed to serialize state: {}\"}}",
json_err
)
}
})
{
tracing::warn!("Failed to write state json to file: {}", e);
}

if let Some(channel) = self.parser.current_channel() {
tracing::debug!("Current channel {}", channel);
Expand Down
183 changes: 129 additions & 54 deletions lib/parsers/src/tool_calling/harmony/harmony_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
use super::config::JsonParserConfig;
use super::response::{CalledFunction, ToolCallResponse, ToolCallType};
use openai_harmony::chat::{Content::Text, Role};
use openai_harmony::{HarmonyEncoding, HarmonyEncodingName, load_harmony_encoding};
use openai_harmony::{load_harmony_encoding, HarmonyEncoding, HarmonyEncodingName, StreamableParser};
use serde_json::Value;
use std::sync::OnceLock;

Expand All @@ -22,73 +22,59 @@ pub fn parse_tool_calls_harmony(
text: &str,
config: &JsonParserConfig,
) -> anyhow::Result<(Vec<ToolCallResponse>, Option<String>)> {
// let mut trimmed = text.trim().to_string();
// let original_text = trimmed.clone();

// // Check if tool call start tokens are present, if not return everything as normal text
// // Start Token: "<|start|>assistant<|channel|>commentary" should be present in the text if tool calls are present
// // End Token: "<|call|>"
// eprintln!("harmony parser text[+++] {:?}", text);
// if !detect_tool_call_start_harmony(text, config) {
// return Ok((vec![], Some(trimmed)));
// }

// // Workaround to add <|call|> token to the end of the text if it is not present. Otherwise, StreamableParser will not be able to parse the text.
// let end_token = config
// .tool_call_end_tokens
// .first()
// .map(String::as_str)
// .unwrap_or("<|call|>");
// if !trimmed.ends_with(end_token) {
// trimmed.push_str(end_token);
// }
let mut trimmed = text.trim().to_string();
let original_text = trimmed.clone();

// Check if tool call start tokens are present, if not return everything as normal text
// Start Token: "<|start|>assistant<|channel|>commentary" should be present in the text if tool calls are present
// End Token: "<|call|>"
if !detect_tool_call_start_harmony(text, config) {
return Ok((vec![], Some(trimmed)));
}

// Workaround to add <|call|> token to the end of the text if it is not present. Otherwise, StreamableParser will not be able to parse the text.
let end_token = config
.tool_call_end_tokens
.first()
.map(String::as_str)
.unwrap_or("<|call|>");
if !trimmed.ends_with(end_token) {
trimmed.push_str(end_token);
}

let enc = match get_harmony_encoding().as_ref() {
Ok(e) => e,
Err(e) => {
tracing::debug!("Failed to load harmony encoding: {e}. Tool calls will not be parsed.");
return Ok((vec![], Some(text.to_string())));
return Ok((vec![], Some(original_text)));
}
};

// // Encode the text into tokens using harmony encoding
let tokens = enc.tokenizer().encode_with_special_tokens(text);
eprintln!("tokens[+++] {:?}", tokens);
// let messages = enc.parse_messages_from_completion_tokens(tokens, Some(Role::Assistant)).unwrap();
let messages = match enc.parse_messages_from_completion_tokens(tokens, Some(Role::Assistant)) {
Ok(messages) => messages,
// Encode the text into tokens using harmony encoding
let tokens = enc.tokenizer().encode_with_special_tokens(&trimmed);

// Create StreamableParser to process each token and create Harmony Format messages
// Set Role to Assistant because we are parsing tool calls from an assistant message
let mut parser = match StreamableParser::new(enc.clone(), Some(Role::Assistant)) {
Ok(p) => p,
Err(e) => {
tracing::debug!(
"Failed to parse messages from completion tokens: {e}. Tool calls will not be parsed."
"Failed to create harmony streamable parser: {e}. Tool calls will not be parsed."
);
return Ok((vec![], Some(text.to_string())));
return Ok((vec![], Some(original_text)));
}
};
eprintln!("messages[+++] {:?}", messages);

// Create StreamableParser to process each token and create Harmony Format messages
// Set Role to Assistant because we are parsing tool calls from an assistant message
// let mut parser = match StreamableParser::new(enc.clone(), Some(Role::Assistant)) {
// Ok(p) => p,
// Err(e) => {
// tracing::debug!(
// "Failed to create harmony streamable parser: {e}. Tool calls will not be parsed."
// );
// return Ok((vec![], Some(text.to_string())));
// }
// };

// // Process each token to create Harmony Format messages
// for token in tokens {
// if parser.process(token).is_err() {
// // Skip the token if it causes an error. Some special tokens are not supported by the parser.
// continue;
// }
// }

// // Get the Harmony Format messages
// let messages = parser.messages();
// eprintln!("message[+++] {:?}", messages);
// Process each token to create Harmony Format messages
for token in tokens {
if parser.process(token).is_err() {
// Skip the token if it causes an error. Some special tokens are not supported by the parser.
continue;
}
}

// Get the Harmony Format messages
let messages = parser.messages();

let mut normal_text = String::new();

Expand All @@ -112,6 +98,95 @@ pub fn parse_tool_calls_harmony(
// ],
// channel: Some("commentary"),
// content_type: Some("<|constrain|>json")
for message in messages.iter() {
if message.author.role == Role::Assistant
&& message.channel.as_deref() == Some("commentary")
&& message
.recipient
.as_deref()
.unwrap_or_default()
.starts_with("functions.")
{
let Some(fname) = message
.recipient
.as_ref()
.and_then(|r| r.split('.').nth(1))
.filter(|s| !s.is_empty())
.map(|s| s.to_string())
else {
continue;
};

let args = match message.content.first() {
Some(Text(text)) => match serde_json::from_str::<Value>(text.text.trim()) {
Ok(value) => value,
Err(_) => {
Value::Null // Set args to null if it's not valid JSON
}
},
_ => {
Value::Null // Set args to null if it's not a text content
}
};
// Add tool call to result if args is valid JSON
if !args.is_null() {
call_idx += 1;
res.push(ToolCallResponse {
id: format!("call-{}", call_idx),
tp: ToolCallType::Function,
function: CalledFunction {
name: fname.to_string(),
// Safety: `Value::Object` is always valid JSON, so serialization cannot fail
arguments: serde_json::to_string(&args).unwrap(),
},
});
}
}
if message.author.role == Role::Assistant && message.channel.as_deref() == Some("analysis")
{
normal_text.push_str(match &message.content[0] {
Text(t) => &t.text,
_ => "",
});
}
}
Ok((res, Some(normal_text.to_string())))
}

pub fn parse_tool_calls_harmony_chunk(
text: &str,
config: &JsonParserConfig,
) -> anyhow::Result<(Vec<ToolCallResponse>, Option<String>)> {
let _ = config;
let enc = match get_harmony_encoding().as_ref() {
Ok(e) => e,
Err(e) => {
tracing::debug!("Failed to load harmony encoding: {e}. Tool calls will not be parsed.");
return Ok((vec![], Some(text.to_string())));
}
};

// // Encode the text into tokens using harmony encoding
let tokens = enc.tokenizer().encode_with_special_tokens(text);
eprintln!("tokens[+++] {:?}", tokens);
// let messages = enc.parse_messages_from_completion_tokens(tokens, Some(Role::Assistant)).unwrap();
let messages = match enc.parse_messages_from_completion_tokens(tokens, Some(Role::Assistant)) {
Ok(messages) => messages,
Err(e) => {
tracing::debug!(
"Failed to parse messages from completion tokens: {e}. Tool calls will not be parsed."
);
return Ok((vec![], Some(text.to_string())));
}
};
eprintln!("messages[+++] {:?}", messages);


let mut normal_text = String::new();

let mut res = Vec::with_capacity(messages.len());
let mut call_idx = 0usize; // Index of the tool call

for message in messages.iter() {
eprintln!("message[+++] {:?}", message);
if message.author.role == Role::Assistant
Expand Down
4 changes: 3 additions & 1 deletion lib/parsers/src/tool_calling/harmony/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@
pub mod harmony_parser;

pub use super::{config, response};
pub use harmony_parser::{detect_tool_call_start_harmony, parse_tool_calls_harmony};
pub use harmony_parser::{
detect_tool_call_start_harmony, parse_tool_calls_harmony, parse_tool_calls_harmony_chunk,
};
2 changes: 1 addition & 1 deletion lib/parsers/src/tool_calling/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pub mod tools;

// Re-export main types and functions for convenience
pub use config::{JsonParserConfig, ToolCallConfig, ToolCallParserType};
pub use harmony::parse_tool_calls_harmony;
pub use harmony::{parse_tool_calls_harmony, parse_tool_calls_harmony_chunk};
pub use json::try_tool_call_parse_json;
pub use parsers::{detect_and_parse_tool_call, try_tool_call_parse};
pub use pythonic::try_tool_call_parse_pythonic;
Expand Down
4 changes: 2 additions & 2 deletions lib/parsers/src/tool_calling/parsers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0

use super::config::{ToolCallConfig, ToolCallParserType};
use super::harmony::{detect_tool_call_start_harmony, parse_tool_calls_harmony};
use super::harmony::{detect_tool_call_start_harmony, parse_tool_calls_harmony_chunk};
use super::json::{detect_tool_call_start_json, try_tool_call_parse_json};
use super::pythonic::{detect_tool_call_start_pythonic, try_tool_call_parse_pythonic};
use super::response::ToolCallResponse;
Expand Down Expand Up @@ -43,7 +43,7 @@ pub fn try_tool_call_parse(
Ok((results, normal_content))
}
ToolCallParserType::Harmony => {
let (results, normal_content) = parse_tool_calls_harmony(message)?;
let (results, normal_content) = parse_tool_calls_harmony_chunk(message, &config.json)?;
Ok((results, normal_content))
}
ToolCallParserType::Pythonic => {
Expand Down