Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Fix UTF-8 hex escapes in string literals
Co-authored-by: Raj_Pabnani <RajPabnani03@users.noreply.github.com>
  • Loading branch information
cursoragent and RajPabnani03 committed Mar 8, 2026
commit 9287d4f99d1e8ac05461b31ef42ca730297abed9
12 changes: 10 additions & 2 deletions cli/src/template_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,9 @@ fn parse_term_node(pair: Pair<Rule>) -> TemplateParseResult<ExpressionNode> {
let span = expr.as_span();
let primary = match expr.as_rule() {
Rule::string_literal => {
let text = STRING_LITERAL_PARSER.parse(expr.into_inner());
let text = STRING_LITERAL_PARSER
.parse(expr.into_inner())
.map_err(|err| TemplateParseError::expression(err.to_string(), span))?;
ExpressionNode::new(ExpressionKind::String(text), span)
}
Rule::raw_string_literal => {
Expand Down Expand Up @@ -1122,9 +1124,15 @@ mod tests {
Ok(ExpressionKind::String("aeiou".to_owned())),
);
assert_eq!(
parse_into_kind(r#""\xe0\xe8\xec\xf0\xf9""#),
parse_into_kind(r#""\xc3\xa0\xc3\xa8\xc3\xac\xc3\xb0\xc3\xb9""#),
Ok(ExpressionKind::String("àèìðù".to_owned())),
);
assert_eq!(
parse_into_kind(r#""\xc3""#),
Err(TemplateParseErrorKind::Expression(
"Invalid UTF-8 sequence".to_owned(),
)),
);
assert_eq!(
parse_into_kind(r#""\x""#),
Err(TemplateParseErrorKind::SyntaxError),
Expand Down
2 changes: 1 addition & 1 deletion docs/templates.md
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ A double-quoted string literal supports the following escape sequences:
* `\n`: new line
* `\0`: null
* `\e`: escape (i.e., `\x1b`)
* `\xHH`: byte with hex value `HH`
* `\xHH`: byte with hex value `HH` (must be part of a valid UTF-8 string)

Other escape sequences are not supported. Any UTF-8 characters are allowed
inside a string literal, with two exceptions: unescaped `"`-s and uses of `\`
Expand Down
39 changes: 25 additions & 14 deletions lib/src/dsl_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

use std::ascii;
use std::collections::HashMap;
use std::error;
use std::fmt;
use std::slice;

Expand Down Expand Up @@ -397,34 +398,44 @@ pub struct StringLiteralParser<R> {
pub escape_rule: R,
}

#[expect(missing_docs)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct StringLiteralParseError;

impl fmt::Display for StringLiteralParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("Invalid UTF-8 sequence")
}
}

impl error::Error for StringLiteralParseError {}

impl<R: RuleType> StringLiteralParser<R> {
/// Parses the given string literal `pairs` into string.
pub fn parse(&self, pairs: Pairs<R>) -> String {
let mut result = String::new();
pub fn parse(&self, pairs: Pairs<R>) -> Result<String, StringLiteralParseError> {
let mut result = Vec::new();
for part in pairs {
if part.as_rule() == self.content_rule {
result.push_str(part.as_str());
result.extend_from_slice(part.as_str().as_bytes());
} else if part.as_rule() == self.escape_rule {
match &part.as_str()[1..] {
"\"" => result.push('"'),
"\\" => result.push('\\'),
"t" => result.push('\t'),
"r" => result.push('\r'),
"n" => result.push('\n'),
"0" => result.push('\0'),
"e" => result.push('\x1b'),
"\"" => result.push(b'"'),
"\\" => result.push(b'\\'),
"t" => result.push(b'\t'),
"r" => result.push(b'\r'),
"n" => result.push(b'\n'),
"0" => result.push(b'\0'),
"e" => result.push(b'\x1b'),
hex if hex.starts_with('x') => {
result.push(char::from(
u8::from_str_radix(&hex[1..], 16).expect("hex characters"),
));
result.push(u8::from_str_radix(&hex[1..], 16).expect("hex characters"));
}
char => panic!("invalid escape: \\{char:?}"),
}
} else {
panic!("unexpected part of string: {part:?}");
}
}
result
String::from_utf8(result).map_err(|_| StringLiteralParseError)
}
}

Expand Down
23 changes: 16 additions & 7 deletions lib/src/fileset_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,14 +236,17 @@ fn parse_function_call_node(pair: Pair<Rule>) -> FilesetParseResult<FunctionCall
})
}

fn parse_as_string_literal(pair: Pair<Rule>) -> String {
fn parse_as_string_literal(pair: Pair<Rule>) -> FilesetParseResult<String> {
let span = pair.as_span();
match pair.as_rule() {
Rule::identifier => pair.as_str().to_owned(),
Rule::string_literal => STRING_LITERAL_PARSER.parse(pair.into_inner()),
Rule::identifier => Ok(pair.as_str().to_owned()),
Rule::string_literal => STRING_LITERAL_PARSER
.parse(pair.into_inner())
.map_err(|err| FilesetParseError::expression(err.to_string(), span)),
Rule::raw_string_literal => {
let [content] = pair.into_inner().collect_array().unwrap();
assert_eq!(content.as_rule(), Rule::raw_string_content);
content.as_str().to_owned()
Ok(content.as_str().to_owned())
}
r => panic!("unexpected string literal rule: {r:?}"),
}
Expand All @@ -264,12 +267,12 @@ fn parse_primary_node(pair: Pair<Rule>) -> FilesetParseResult<ExpressionNode> {
assert_eq!(lhs.as_rule(), Rule::strict_identifier);
assert_eq!(op.as_rule(), Rule::pattern_kind_op);
let kind = lhs.as_str();
let value = parse_as_string_literal(rhs);
let value = parse_as_string_literal(rhs)?;
ExpressionKind::StringPattern { kind, value }
}
Rule::identifier => ExpressionKind::Identifier(first.as_str()),
Rule::string_literal | Rule::raw_string_literal => {
ExpressionKind::String(parse_as_string_literal(first))
ExpressionKind::String(parse_as_string_literal(first)?)
}
r => panic!("unexpected primary rule: {r:?}"),
};
Expand Down Expand Up @@ -519,9 +522,15 @@ mod tests {
Ok(ExpressionKind::String("aeiou".to_owned())),
);
assert_eq!(
parse_into_kind(r#""\xe0\xe8\xec\xf0\xf9""#),
parse_into_kind(r#""\xc3\xa0\xc3\xa8\xc3\xac\xc3\xb0\xc3\xb9""#),
Ok(ExpressionKind::String("àèìðù".to_owned())),
);
assert_eq!(
parse_into_kind(r#""\xc3""#),
Err(FilesetParseErrorKind::Expression(
"Invalid UTF-8 sequence".to_owned(),
)),
);
assert_eq!(
parse_into_kind(r#""\x""#),
Err(FilesetParseErrorKind::SyntaxError),
Expand Down
27 changes: 18 additions & 9 deletions lib/src/revset_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -636,14 +636,14 @@ fn parse_primary_node(pair: Pair<Rule>) -> Result<ExpressionNode, RevsetParseErr
assert_eq!(lhs.as_rule(), Rule::strict_identifier);
assert_eq!(op.as_rule(), Rule::pattern_kind_op);
let kind = lhs.as_str();
let value = parse_as_string_literal(rhs);
let value = parse_as_string_literal(rhs)?;
ExpressionKind::StringPattern { kind, value }
}
// Identifier without "@" may be substituted by aliases. Primary expression including "@"
// is considered an indecomposable unit, and no alias substitution would be made.
Rule::identifier if pairs.peek().is_none() => ExpressionKind::Identifier(first.as_str()),
Rule::identifier | Rule::string_literal | Rule::raw_string_literal => {
let name = parse_as_string_literal(first);
let name = parse_as_string_literal(first)?;
match pairs.next() {
None => ExpressionKind::String(name),
Some(op) => {
Expand All @@ -654,7 +654,7 @@ fn parse_primary_node(pair: Pair<Rule>) -> Result<ExpressionNode, RevsetParseErr
// infix "<name>@<remote>"
Some(second) => {
let name: RefNameBuf = name.into();
let remote: RemoteNameBuf = parse_as_string_literal(second).into();
let remote: RemoteNameBuf = parse_as_string_literal(second)?.into();
ExpressionKind::RemoteSymbol(RemoteRefSymbolBuf { name, remote })
}
}
Expand All @@ -669,14 +669,17 @@ fn parse_primary_node(pair: Pair<Rule>) -> Result<ExpressionNode, RevsetParseErr
}

/// Parses part of compound symbol to string.
fn parse_as_string_literal(pair: Pair<Rule>) -> String {
fn parse_as_string_literal(pair: Pair<Rule>) -> Result<String, RevsetParseError> {
let span = pair.as_span();
match pair.as_rule() {
Rule::identifier => pair.as_str().to_owned(),
Rule::string_literal => STRING_LITERAL_PARSER.parse(pair.into_inner()),
Rule::identifier => Ok(pair.as_str().to_owned()),
Rule::string_literal => STRING_LITERAL_PARSER
.parse(pair.into_inner())
.map_err(|err| RevsetParseError::expression(err.to_string(), span)),
Rule::raw_string_literal => {
let [content] = pair.into_inner().collect_array().unwrap();
assert_eq!(content.as_rule(), Rule::raw_string_content);
content.as_str().to_owned()
Ok(content.as_str().to_owned())
}
_ => {
panic!("unexpected string literal rule: {:?}", pair.as_str());
Expand All @@ -697,7 +700,7 @@ pub fn parse_symbol(text: &str) -> Result<String, RevsetParseError> {
let mut pairs = RevsetParser::parse(Rule::symbol_name, text)?;
let first = pairs.next().unwrap();
let span = first.as_span();
let name = parse_as_string_literal(first);
let name = parse_as_string_literal(first)?;
if name.is_empty() {
Err(RevsetParseError::expression(
"Expected non-empty string",
Expand Down Expand Up @@ -1284,9 +1287,15 @@ mod tests {
Ok(ExpressionKind::String("aeiou".to_owned()))
);
assert_eq!(
parse_into_kind(r#""\xe0\xe8\xec\xf0\xf9""#),
parse_into_kind(r#""\xc3\xa0\xc3\xa8\xc3\xac\xc3\xb0\xc3\xb9""#),
Ok(ExpressionKind::String("àèìðù".to_owned()))
);
assert_eq!(
parse_into_kind(r#""\xc3""#),
Err(RevsetParseErrorKind::Expression(
"Invalid UTF-8 sequence".to_owned()
))
);
assert_eq!(
parse_into_kind(r#""\x""#),
Err(RevsetParseErrorKind::SyntaxError)
Expand Down
Loading