Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
4aba7a0
Update GPT OSS parser and related components
zhongdaor-nv Sep 10, 2025
de1a915
tmp
zhongdaor-nv Sep 10, 2025
da62e0c
feat(parsers): enhance harmony tool calling parser and add debug stat…
zhongdaor-nv Sep 10, 2025
471f1cb
tmp
zhongdaor-nv Sep 10, 2025
63d639a
remove code for debugging
zhongdaor-nv Sep 10, 2025
244d31c
resolve coderabit comment
zhongdaor-nv Sep 10, 2025
f50ed91
cargo fmt
zhongdaor-nv Sep 10, 2025
855fb29
coderabbit
zhongdaor-nv Sep 10, 2025
97f8f65
fix unit test
zhongdaor-nv Sep 10, 2025
bd56609
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 10, 2025
33e7286
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 11, 2025
c9485d7
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 15, 2025
9a8ce48
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 15, 2025
fbc5155
resolve comment
zhongdaor-nv Sep 15, 2025
031c965
Resolve merge conflicts in preprocessor.rs
zhongdaor-nv Sep 15, 2025
40c1d03
make ci/cd happy
zhongdaor-nv Sep 16, 2025
80b2ac8
Merge remote-tracking branch 'origin/main' into zhongdaor/gpt-oss-fro…
zhongdaor-nv Sep 18, 2025
9392353
merge to main
zhongdaor-nv Sep 18, 2025
489019a
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 18, 2025
a733a8b
cargo fmt
zhongdaor-nv Sep 18, 2025
cdb9e7f
cargo test
zhongdaor-nv Sep 18, 2025
c0e22d7
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 18, 2025
8c5d62b
add test for test_parse_tool_calls_harmony_complete_basic
zhongdaor-nv Sep 18, 2025
67995d7
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 18, 2025
eb7855f
add more comment
zhongdaor-nv Sep 18, 2025
ca70608
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 18, 2025
e579b3a
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 18, 2025
fdc9f0d
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,4 @@ opt-level = 3
[profile.release]
# These make the build much slower but shrink the binary, and could help performance
codegen-units = 1
lto = true
lto = true
6 changes: 6 additions & 0 deletions lib/llm/src/discovery/watcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,12 @@ impl ModelWatcher {
let Some(mut card) = card else {
anyhow::bail!("Missing model deployment card");
};

// Ensure runtime_config is populated: prefer entry value if present
if let Some(rc) = model_entry.runtime_config.clone() {
card.runtime_config = rc;
}

// Download tokenizer.json etc to local disk
// This cache_dir is a tempfile::TempDir will be deleted on drop. I _think_
// OpenAIPreprocessor::new loads the files, so we can delete them after this
Expand Down
9 changes: 9 additions & 0 deletions lib/llm/src/preprocessor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ pub struct OpenAIPreprocessor {
formatter: Arc<dyn OAIPromptFormatter>,
tokenizer: Arc<dyn Tokenizer>,
model_info: Arc<dyn ModelInfo>,
/// Per-model runtime configuration propagated to response generator (e.g., reasoning/tool parser)
runtime_config: crate::local_model::runtime_config::ModelRuntimeConfig,
}

impl OpenAIPreprocessor {
Expand All @@ -120,11 +122,15 @@ impl OpenAIPreprocessor {
};
let model_info = model_info.get_model_info()?;

// Initialize runtime config; allow env override if not provided by backend/card
let runtime_config = mdc.runtime_config.clone();

Ok(Arc::new(Self {
formatter,
tokenizer,
model_info,
mdcsum,
runtime_config,
}))
}
/// Encode a string to it's tokens
Expand Down Expand Up @@ -580,6 +586,9 @@ impl
let response_generator = request.response_generator(context.id().to_string());
let mut response_generator = Box::new(response_generator);

// set the runtime configuration
response_generator.set_runtime_config(self.runtime_config.clone());

// convert the chat completion request to a common completion request
let (common_request, annotations) = self.preprocess_request(&request)?;

Expand Down
14 changes: 14 additions & 0 deletions lib/llm/src/protocols/openai/chat_completions/delta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,20 @@ impl DeltaGenerator {
}
}

/// Update runtime configuration and reconfigure the reasoning parser accordingly.
pub fn set_runtime_config(&mut self, runtime_config: ModelRuntimeConfig) {
self.options.runtime_config = runtime_config.clone();
match self.options.runtime_config.reasoning_parser.as_deref() {
Some(name) => {
self.reasoning_parser =
Some(ReasoningParserType::get_reasoning_parser_from_name(name));
}
None => {
self.reasoning_parser = None;
}
}
}

/// Updates the prompt token usage count.
///
/// # Arguments
Expand Down
51 changes: 49 additions & 2 deletions lib/parsers/src/reasoning/gpt_oss_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,8 @@ impl ReasoningParser for GptOssReasoningParser {
}

if let Some(channel) = self.parser.current_channel() {
tracing::debug!("Current channel: {}", channel);
tracing::debug!("Current channel {}", channel);
if channel == "final" {
tracing::debug!("In final channel, processing normal text");
// If we're in the final channel, we should not parse reasoning
if let Some(current) = self.parser.last_content_delta().unwrap_or_default() {
tracing::debug!("Got normal text delta of {} chars", current.len());
Expand All @@ -186,6 +185,54 @@ impl ReasoningParser for GptOssReasoningParser {
}
tracing::debug!("No content delta in final channel");
ParserResult::default()
} else if channel == "commentary" {
// If we're in the commentary channel, we should return raw token content and recover content that is been comsumed by the parser
// so that the tool parser can process it properly
if let Ok(enc) = get_harmony_encoding() {
let raw_content = self.parser.current_content().unwrap_or_default();
let mut final_text = _text.to_string();

// need to recover content in commentary that is been comsumed by the parser
if raw_content.is_empty() {
let tokens = self.parser.tokens();

// Get the token id for " <|channel|>"
let start_token_id = enc
.tokenizer()
.encode_with_special_tokens("<|channel|>")
.last()
.copied();

// Find the last occurrence of the <|channel|> token (id 20005) in the tokens vector
let last_channel_toke_idx = start_token_id
.and_then(|token_id| {
tokens.iter().rposition(|token| *token == token_id)
})
.unwrap_or(0);

// then get the generate text between the last <|channel|> to the end of self.parser.tokens()
let end_token_idx = self.parser.tokens().len();
// using the harmony decode_utf8 to translate the tokens to text
let generated_text = enc
.tokenizer()
.decode_utf8(
&self.parser.tokens()[last_channel_toke_idx..end_token_idx],
)
.unwrap();

final_text = generated_text;

// Mark as processed to prevent running this again
}

ParserResult {
normal_text: final_text,
reasoning_text: String::new(),
}
} else {
tracing::warn!("Failed to get harmony encoding for raw token decoding");
ParserResult::default()
}
} else {
tracing::debug!("In reasoning channel: {}", channel);
if let Some(current) = self.parser.last_content_delta().unwrap_or_default() {
Expand Down
108 changes: 106 additions & 2 deletions lib/parsers/src/tool_calling/harmony/harmony_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@

use super::config::JsonParserConfig;
use super::response::{CalledFunction, ToolCallResponse, ToolCallType};
use openai_harmony::StreamableParser;
use openai_harmony::chat::{Content::Text, Role};
use openai_harmony::{HarmonyEncoding, HarmonyEncodingName, load_harmony_encoding};
use openai_harmony::{
HarmonyEncoding, HarmonyEncodingName, StreamableParser, load_harmony_encoding,
};
use serde_json::Value;
use std::sync::OnceLock;

Expand Down Expand Up @@ -154,6 +155,109 @@ pub fn parse_tool_calls_harmony(
Ok((res, Some(normal_text.to_string())))
}

/// Parse tool calls from a complete Harmony Format text chunk using direct token parsing.
///
/// This function is optimized for parsing complete text chunks where the entire content
/// is available at once. It uses `parse_messages_from_completion_tokens` to directly
/// parse all tokens into Harmony Format messages, then extracts tool calls from messages
/// with the "commentary" channel and "functions.*" recipients.
///
/// Unlike `parse_tool_calls_harmony`, this function doesn't perform start token detection
/// or token-by-token streaming, making it more efficient for complete chunks.
///
/// # Arguments
/// * `text` - The complete Harmony Format text to parse
/// * `config` - Parser configuration (currently unused but kept for API consistency)
///
/// # Returns
/// * `Ok((tool_calls, normal_text))` - Tuple containing extracted tool calls and any normal text
/// * `Err(e)` - If parsing fails due to encoding or tokenization errors
/// <|channel|>commentary to=functions.get_current_weather <|constrain|>json<|message|>{"location":"San Francisco"}
pub fn parse_tool_calls_harmony_complete(
text: &str,
config: &JsonParserConfig,
) -> anyhow::Result<(Vec<ToolCallResponse>, Option<String>)> {
let _ = config;
let enc = match get_harmony_encoding().as_ref() {
Ok(e) => e,
Err(e) => {
tracing::debug!("Failed to load harmony encoding: {e}. Tool calls will not be parsed.");
return Ok((vec![], Some(text.to_string())));
}
};

// // Encode the text into tokens using harmony encoding
let tokens: Vec<u32> = enc.tokenizer().encode_with_special_tokens(text);
let messages = match enc.parse_messages_from_completion_tokens(tokens, Some(Role::Assistant)) {
Ok(messages) => messages,
Err(e) => {
tracing::debug!(
"Failed to parse messages from completion tokens: {e}. Tool calls will not be parsed."
);
return Ok((vec![], Some(text.to_string())));
}
};

let mut normal_text = String::new();

let mut res = Vec::with_capacity(messages.len());
let mut call_idx = 0usize; // Index of the tool call

for message in messages.iter() {
if message.author.role == Role::Assistant
&& message.channel.as_deref() == Some("commentary")
&& message
.recipient
.as_deref()
.unwrap_or_default()
.starts_with("functions.")
{
let Some(fname) = message
.recipient
.as_ref()
.and_then(|r| r.split('.').nth(1))
.filter(|s| !s.is_empty())
.map(|s| s.to_string())
else {
continue;
};

let args = match message.content.first() {
Some(Text(text)) => match serde_json::from_str::<Value>(text.text.trim()) {
Ok(value) => value,
Err(_) => {
Value::Null // Set args to null if it's not valid JSON
}
},
_ => {
Value::Null // Set args to null if it's not a text content
}
};
// Add tool call to result if args is valid JSON
if !args.is_null() {
call_idx += 1;
res.push(ToolCallResponse {
id: format!("call-{}", call_idx),
tp: ToolCallType::Function,
function: CalledFunction {
name: fname.to_string(),
// Safety: `Value::Object` is always valid JSON, so serialization cannot fail
arguments: serde_json::to_string(&args).unwrap(),
},
});
}
}
if message.author.role == Role::Assistant && message.channel.as_deref() == Some("analysis")
{
normal_text.push_str(match &message.content[0] {
Text(t) => &t.text,
_ => "",
});
}
}
Ok((res, Some(normal_text.to_string())))
}

pub fn detect_tool_call_start_harmony(chunk: &str, config: &JsonParserConfig) -> bool {
let trimmed = chunk.trim();
if trimmed.is_empty() {
Expand Down
4 changes: 3 additions & 1 deletion lib/parsers/src/tool_calling/harmony/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@
pub mod harmony_parser;

pub use super::{config, response};
pub use harmony_parser::{detect_tool_call_start_harmony, parse_tool_calls_harmony};
pub use harmony_parser::{
detect_tool_call_start_harmony, parse_tool_calls_harmony, parse_tool_calls_harmony_complete,
};
2 changes: 1 addition & 1 deletion lib/parsers/src/tool_calling/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pub mod tools;

// Re-export main types and functions for convenience
pub use config::{JsonParserConfig, ToolCallConfig, ToolCallParserType};
pub use harmony::parse_tool_calls_harmony;
pub use harmony::{parse_tool_calls_harmony, parse_tool_calls_harmony_complete};
pub use json::try_tool_call_parse_json;
pub use parsers::{detect_and_parse_tool_call, try_tool_call_parse};
pub use pythonic::try_tool_call_parse_pythonic;
Expand Down
5 changes: 3 additions & 2 deletions lib/parsers/src/tool_calling/parsers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0

use super::config::{ToolCallConfig, ToolCallParserType};
use super::harmony::{detect_tool_call_start_harmony, parse_tool_calls_harmony};
use super::harmony::{detect_tool_call_start_harmony, parse_tool_calls_harmony_complete};
use super::json::{detect_tool_call_start_json, try_tool_call_parse_json};
use super::pythonic::{detect_tool_call_start_pythonic, try_tool_call_parse_pythonic};
use super::response::ToolCallResponse;
Expand Down Expand Up @@ -43,7 +43,8 @@ pub fn try_tool_call_parse(
Ok((results, normal_content))
}
ToolCallParserType::Harmony => {
let (results, normal_content) = parse_tool_calls_harmony(message, &config.json)?;
let (results, normal_content) =
parse_tool_calls_harmony_complete(message, &config.json)?;
Ok((results, normal_content))
}
ToolCallParserType::Pythonic => {
Expand Down
Loading