Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
change errors
  • Loading branch information
sevenzing committed Dec 5, 2024
commit 4fb080d316901a45b16500044ab8a685ffba4a61
2 changes: 1 addition & 1 deletion examples/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ fn main() {
let normalizer = EnsNameNormalizer::default();

let name = "Nàme‍🧙‍♂.eth";
let result = normalizer.tokenize(name).unwrap();
let result = normalizer.tokenize(name);

for token in result.tokens {
if token.is_disallowed() {
Expand Down
2 changes: 1 addition & 1 deletion src/code_points/specs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ impl CodePointsSpecs {
.unwrap_or(false)
}

pub fn finditer_emoji<'a>(&'a self, s: &'a str) -> impl Iterator<Item = regex::Match<'_>> {
pub fn finditer_emoji<'a>(&'a self, s: &'a str) -> impl Iterator<Item = regex::Match<'a>> {
self.emoji_regex.find_iter(s)
}

Expand Down
8 changes: 4 additions & 4 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@ use crate::CodePoint;
/// Errors that can occur during processing of an ENS name.
#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
pub enum ProcessError {
#[error("contains visually confusing characters from multiple scripts: {0}")]
Confused(String),
#[error("contains visually confusing characters from {group1} and {group2} scripts")]
ConfusedGroups { group1: String, group2: String },
#[error("invalid character ('{sequence}') at position {index}: {inner}")]
CurrableError {
inner: CurrableError,
Expand Down Expand Up @@ -35,6 +31,8 @@ pub enum CurrableError {
FencedTrailing,
#[error("consecutive sequence of fenced characters")]
FencedConsecutive,
#[error("contains visually confusing characters from multiple scripts: character with code '{cp}' not in group '{group_name}'")]
Confused { group_name: String, cp: CodePoint },
}

/// Errors regarding disallowed sequences.
Expand All @@ -50,4 +48,6 @@ pub enum DisallowedSequence {
NsmTooMany,
#[error("nsm repeated")]
NsmRepeated,
#[error("contains visually confusing characters from {group1} and {group2} scripts")]
ConfusedGroups { group1: String, group2: String },
}
6 changes: 3 additions & 3 deletions src/normalizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@ impl EnsNameNormalizer {
}

/// Tokenize the input string, return a `TokenizedName` object with `Vec<EnsNameToken>` inside
pub fn tokenize(&self, input: impl AsRef<str>) -> Result<TokenizedName, ProcessError> {
pub fn tokenize(&self, input: impl AsRef<str>) -> TokenizedName {
TokenizedName::from_input(input.as_ref(), &self.specs, true)
}

/// Process the input string, return a `ProcessedName` object with `Vec<ValidatedLabel>` inside
/// This function will tokenize and validate the name. Processed name can be normalized and beautified.
pub fn process(&self, input: impl AsRef<str>) -> Result<ProcessedName, ProcessError> {
let input = input.as_ref();
let tokenized = self.tokenize(input)?;
let tokenized = self.tokenize(input);
let labels = validate_name(&tokenized, &self.specs)?;
Ok(ProcessedName { tokenized, labels })
}
Expand All @@ -60,7 +60,7 @@ impl ProcessedName {
}

/// `no-cache` version of [`EnsNameNormalizer::tokenize`]
pub fn tokenize(input: impl AsRef<str>) -> Result<TokenizedName, ProcessError> {
pub fn tokenize(input: impl AsRef<str>) -> TokenizedName {
EnsNameNormalizer::default().tokenize(input)
}

Expand Down
30 changes: 11 additions & 19 deletions src/tokens/tokenize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::{
CollapsedEnsNameToken, EnsNameToken, TokenDisallowed, TokenEmoji, TokenIgnored,
TokenMapped, TokenNfc, TokenStop, TokenValid,
},
utils, CodePoint, CodePointsSpecs, ProcessError,
utils, CodePoint, CodePointsSpecs,
};

/// Represents a full ENS name, including the original input and the sequence of tokens
Expand All @@ -28,11 +28,7 @@ impl TokenizedName {
}

/// Tokenizes an input string, applying NFC normalization if requested.
pub fn from_input(
input: impl AsRef<str>,
specs: &CodePointsSpecs,
apply_nfc: bool,
) -> Result<Self, ProcessError> {
pub fn from_input(input: impl AsRef<str>, specs: &CodePointsSpecs, apply_nfc: bool) -> Self {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Reconsider removing error handling from from_input method

Changing from_input to return Self instead of Result<Self, ProcessError> might suppress errors that can occur during tokenization. To prevent unexpected behaviors, consider maintaining error propagation to handle invalid inputs gracefully.

tokenize_name(input, specs, apply_nfc)
}

Expand Down Expand Up @@ -135,27 +131,23 @@ where
}
}

fn tokenize_name(
name: impl AsRef<str>,
specs: &CodePointsSpecs,
apply_nfc: bool,
) -> Result<TokenizedName, ProcessError> {
fn tokenize_name(name: impl AsRef<str>, specs: &CodePointsSpecs, apply_nfc: bool) -> TokenizedName {
let name = name.as_ref();
if name.is_empty() {
return Ok(TokenizedName::empty());
return TokenizedName::empty();
}
let tokens = tokenize_input(name, specs, apply_nfc)?;
Ok(TokenizedName {
let tokens = tokenize_input(name, specs, apply_nfc);
TokenizedName {
input: name.to_string(),
tokens,
})
}
}

fn tokenize_input(
input: impl AsRef<str>,
specs: &CodePointsSpecs,
apply_nfc: bool,
) -> Result<Vec<EnsNameToken>, ProcessError> {
) -> Vec<EnsNameToken> {
let input = input.as_ref();
let emojis = specs.finditer_emoji(input).collect::<Vec<_>>();

Expand Down Expand Up @@ -184,7 +176,7 @@ fn tokenize_input(
perform_nfc_transform(&mut tokens, specs);
}
collapse_valid_tokens(&mut tokens);
Ok(tokens)
tokens
}

fn perform_nfc_transform(tokens: &mut Vec<EnsNameToken>, specs: &CodePointsSpecs) {
Expand Down Expand Up @@ -462,7 +454,7 @@ mod tests {
#[case] expected: Vec<EnsNameToken>,
specs: &CodePointsSpecs,
) {
let tokens = tokenize_input(input, specs, apply_nfc).expect("tokenize");
let tokens = tokenize_input(input, specs, apply_nfc);
assert_eq!(tokens, expected);
}

Expand All @@ -486,7 +478,7 @@ mod tests {
#[case] expected: Vec<CollapsedEnsNameToken>,
specs: &CodePointsSpecs,
) {
let tokens = tokenize_input(input, specs, true).expect("tokenize");
let tokens = tokenize_input(input, specs, true);
let label = TokenizedLabel::from(&tokens);
let result = label.collapse_into_text_or_emoji();
assert_eq!(result, expected);
Expand Down
71 changes: 52 additions & 19 deletions src/validate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ fn check_and_get_group(
.collect::<HashSet<_>>()
.into_iter()
.collect::<Vec<_>>();
let group = determine_group(&unique_cps, specs).cloned()?;
let group = determine_group(label, &unique_cps, specs).cloned()?;
check_group(&group, &cps, specs)?;
check_whole(&group, &unique_cps, specs)?;
Ok(group)
Expand All @@ -223,13 +223,17 @@ fn check_group(
cps: &[CodePoint],
specs: &CodePointsSpecs,
) -> Result<(), ProcessError> {
for cp in cps.iter() {
for (i, cp) in cps.iter().enumerate() {
if !group.contains_cp(*cp) {
return Err(ProcessError::Confused(format!(
"symbol {} not present in group {}",
utils::cp2str(*cp),
group.name
)));
return Err(ProcessError::CurrableError {
inner: CurrableError::Confused {
group_name: group.name.to_string(),
cp: *cp,
},
index: i,
sequence: utils::cps2str(cps),
maybe_suggest: Some("".to_string()),
});
}
}
if group.cm_absent {
Expand Down Expand Up @@ -271,10 +275,12 @@ fn check_whole(
for group_name in maker {
let confused_group_candidate = specs.group_by_name(group_name).expect("group must exist");
if confused_group_candidate.contains_all_cps(&shared) {
return Err(ProcessError::ConfusedGroups {
group1: group.name.to_string(),
group2: confused_group_candidate.name.to_string(),
});
return Err(ProcessError::DisallowedSequence(
DisallowedSequence::ConfusedGroups {
group1: group.name.to_string(),
group2: confused_group_candidate.name.to_string(),
},
));
}
}
Ok(())
Expand Down Expand Up @@ -317,16 +323,43 @@ fn get_groups_candidates_and_shared_cps(
}

fn determine_group<'a>(
label: &TokenizedLabel,
unique_cps: &'a [CodePoint],
specs: &'a CodePointsSpecs,
) -> Result<&'a ParsedGroup, ProcessError> {
specs
.groups_for_cps(unique_cps)
.next()
.ok_or(ProcessError::Confused(format!(
"no group found for {:?}",
unique_cps
)))
if let Some(group) = specs.groups_for_cps(unique_cps).next() {
Ok(group)
} else {
let mut maybe_group = None;
for last_cp_index in 0..unique_cps.len() {
let cps = &unique_cps[..last_cp_index + 1];
if let Some(group) = specs.groups_for_cps(cps).next() {
maybe_group = Some(group);
continue;
} else {
let cp = unique_cps[last_cp_index];
let index_of_cp = label
.iter_cps()
.position(|cp_in_label| cp_in_label == cp)
.expect("cp must exist in label");
match maybe_group {
Some(group) => {
return Err(ProcessError::CurrableError {
inner: CurrableError::Confused {
group_name: group.name.to_string(),
cp,
},
index: index_of_cp,
sequence: utils::cp2str(cp),
maybe_suggest: Some("".to_string()),
});
}
None => unreachable!(),
}
}
}
unreachable!("")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Avoid using unreachable!() macro; handle all match arms explicitly

In the determine_group function, the use of unreachable!() within the match statement can lead to panics if assumptions about the data change. To make the code more robust, consider explicitly handling the None case or adding logic to ensure that all possible cases are accounted for.

}
}

#[cfg(test)]
Expand Down Expand Up @@ -386,7 +419,7 @@ mod tests {
#[case] expected: Result<LabelType, ProcessError>,
specs: &CodePointsSpecs,
) {
let name = TokenizedName::from_input(input, specs, true).unwrap();
let name = TokenizedName::from_input(input, specs, true);
let label = name.iter_labels().next().unwrap();
let result = validate_label(label, specs);
assert_eq!(
Expand Down
Loading