sevenzing · sevenzing · Dec 1, 2024 · Dec 5, 2024 · Dec 5, 2024 · coderabbitai
diff --git a/README.md b/README.md
@@ -1,6 +1,8 @@
 # ens-normalize-rs
 
 ![tests](https://github.com/sevenzing/ens-normalize-rs/actions/workflows/tests.yml/badge.svg)
+![Crates.io Version](https://img.shields.io/crates/v/ens-normalize-rs)
+
 
 A Rust implementation of ENS (Ethereum Name Service) name normalization.
 

diff --git a/examples/tokens.rs b/examples/tokens.rs
@@ -4,7 +4,7 @@ fn main() {
     let normalizer = EnsNameNormalizer::default();
 
     let name = "Nàme‍🧙‍♂.eth";
-    let result = normalizer.tokenize(name).unwrap();
+    let result = normalizer.tokenize(name);
 
     for token in result.tokens {
         if token.is_disallowed() {

diff --git a/src/code_points/specs.rs b/src/code_points/specs.rs
@@ -48,6 +48,7 @@ impl CodePointsSpecs {
             .collect();
         let valid = compute_valid(&groups, &decomp);
         let whole_map = compute_whole_map(spec.whole_map);
+
         let emoji_str_list = emoji
             .iter()
             .map(|cps| utils::cps2str(cps))
@@ -95,7 +96,7 @@ impl CodePointsSpecs {
             .unwrap_or(false)
     }
 
-    pub fn finditer_emoji<'a>(&'a self, s: &'a str) -> impl Iterator<Item = regex::Match<'_>> {
+    pub fn finditer_emoji<'a>(&'a self, s: &'a str) -> impl Iterator<Item = regex::Match<'a>> {
         self.emoji_regex.find_iter(s)
     }
 

diff --git a/src/code_points/types.rs b/src/code_points/types.rs
@@ -42,6 +42,7 @@ impl ParsedGroup {
 pub type ParsedWholeMap = HashMap<CodePoint, ParsedWholeValue>;
 
 pub enum ParsedWholeValue {
+    #[allow(dead_code)]
     Number(u32),
     WholeObject(ParsedWholeObject),
 }
@@ -59,6 +60,7 @@ impl TryFrom<spec_json::WholeValue> for ParsedWholeValue {
 }
 
 pub struct ParsedWholeObject {
+    #[allow(dead_code)]
     pub v: HashSet<CodePoint>,
     pub m: HashMap<CodePoint, HashSet<String>>,
 }

diff --git a/src/error.rs b/src/error.rs
@@ -1,11 +1,8 @@
 use crate::CodePoint;
 
+/// Errors that can occur during processing of an ENS name.
 #[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
 pub enum ProcessError {
-    #[error("contains visually confusing characters from multiple scripts: {0}")]
-    Confused(String),
-    #[error("contains visually confusing characters from {group1} and {group2} scripts")]
-    ConfusedGroups { group1: String, group2: String },
     #[error("invalid character ('{sequence}') at position {index}: {inner}")]
     CurrableError {
         inner: CurrableError,
@@ -17,6 +14,7 @@ pub enum ProcessError {
     DisallowedSequence(#[from] DisallowedSequence),
 }
 
+/// Errors that can be cured by the normalizer.
 #[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
 pub enum CurrableError {
     #[error("underscore in middle")]
@@ -33,8 +31,13 @@ pub enum CurrableError {
     FencedTrailing,
     #[error("consecutive sequence of fenced characters")]
     FencedConsecutive,
+    #[error("contains visually confusing characters from multiple scripts: character with code '{cp}' not in group '{group_name}'")]
+    Confused { group_name: String, cp: CodePoint },
+    #[error("contains a disallowed character")]
+    Disallowed,
 }
 
+/// Errors regarding disallowed sequences.
 #[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
 pub enum DisallowedSequence {
     #[error("disallowed character: {0}")]
@@ -47,4 +50,6 @@ pub enum DisallowedSequence {
     NsmTooMany,
     #[error("nsm repeated")]
     NsmRepeated,
+    #[error("contains visually confusing characters from {group1} and {group2} scripts")]
+    ConfusedGroups { group1: String, group2: String },
 }
diff --git a/src/lib.rs b/src/lib.rs
@@ -9,7 +9,7 @@ mod tokens;
 mod utils;
 mod validate;
 
-pub use code_points::*;
+pub(crate) use code_points::*;
 pub use error::{CurrableError, DisallowedSequence, ProcessError};
 pub use normalizer::{beautify, normalize, process, tokenize, EnsNameNormalizer, ProcessedName};
 pub use tokens::*;

diff --git a/src/normalizer.rs b/src/normalizer.rs
@@ -3,11 +3,16 @@ use crate::{
     ProcessError, TokenizedName, ValidatedLabel,
 };
 
+/// Main struct to handle ENS name normalization including
+/// tokenization, validation, beautification and normalization
 #[derive(Default)]
 pub struct EnsNameNormalizer {
     specs: CodePointsSpecs,
 }
 
+/// Result of processing an ENS name.
+/// Contains tokenized name as intermediate processing result and validated labels.
+/// Validated labels can be normalized and beautified.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct ProcessedName {
     pub labels: Vec<ValidatedLabel>,
@@ -19,21 +24,26 @@ impl EnsNameNormalizer {
         Self { specs }
     }
 
-    pub fn tokenize(&self, input: impl AsRef<str>) -> Result<TokenizedName, ProcessError> {
+    /// Tokenize the input string, return a `TokenizedName` object with `Vec<EnsNameToken>` inside
+    pub fn tokenize(&self, input: impl AsRef<str>) -> TokenizedName {
         TokenizedName::from_input(input.as_ref(), &self.specs, true)
     }
 
+    /// Process the input string, return a `ProcessedName` object with `Vec<ValidatedLabel>` inside
+    /// This function will tokenize and validate the name. Processed name can be normalized and beautified.
     pub fn process(&self, input: impl AsRef<str>) -> Result<ProcessedName, ProcessError> {
         let input = input.as_ref();
-        let tokenized = self.tokenize(input)?;
+        let tokenized = self.tokenize(input);
         let labels = validate_name(&tokenized, &self.specs)?;
         Ok(ProcessedName { tokenized, labels })
     }
 
+    /// Normalize the input string, return a normalized version of ENS name
     pub fn normalize(&self, input: impl AsRef<str>) -> Result<String, ProcessError> {
         self.process(input).map(|processed| processed.normalize())
     }
 
+    /// Beautify the input string, return a beautified version of ENS name/// Beautify the input string, return a beautified version of ENS name
     pub fn beautify(&self, input: impl AsRef<str>) -> Result<String, ProcessError> {
         self.process(input).map(|processed| processed.beautify())
     }
@@ -49,18 +59,22 @@ impl ProcessedName {
     }
 }
 
-pub fn tokenize(input: impl AsRef<str>) -> Result<TokenizedName, ProcessError> {
+/// `no-cache` version of [`EnsNameNormalizer::tokenize`]
+pub fn tokenize(input: impl AsRef<str>) -> TokenizedName {
     EnsNameNormalizer::default().tokenize(input)
 }
 
+/// `no-cache` version of [`EnsNameNormalizer::process`]
 pub fn process(input: impl AsRef<str>) -> Result<ProcessedName, ProcessError> {
     EnsNameNormalizer::default().process(input)
 }
 
+/// `no-cache` version of [`EnsNameNormalizer::normalize`]
 pub fn normalize(input: impl AsRef<str>) -> Result<String, ProcessError> {
     EnsNameNormalizer::default().normalize(input)
 }
 
+/// `no-cache` version of [`EnsNameNormalizer::beautify`]
 pub fn beautify(input: impl AsRef<str>) -> Result<String, ProcessError> {
     EnsNameNormalizer::default().beautify(input)
 }
diff --git a/src/tokens/tokenize.rs b/src/tokens/tokenize.rs
@@ -3,25 +3,17 @@ use crate::{
         CollapsedEnsNameToken, EnsNameToken, TokenDisallowed, TokenEmoji, TokenIgnored,
         TokenMapped, TokenNfc, TokenStop, TokenValid,
     },
-    utils, CodePoint, CodePointsSpecs, ProcessError,
+    utils, CodePoint, CodePointsSpecs,
 };
 
 /// Represents a full ENS name, including the original input and the sequence of tokens
-/// vitalik.eth
-/// ^^^^^^^^^^^
-/// name
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct TokenizedName {
     pub input: String,
     pub tokens: Vec<EnsNameToken>,
 }
 
 /// Represents a tokenized ENS label (part of a name separated by periods), including sequence of tokens
-/// vitalik.eth
-/// ^^^^^^^
-/// label 1
-///         ^^^
-///         label 2
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct TokenizedLabel<'a> {
     pub tokens: &'a [EnsNameToken],
@@ -36,11 +28,7 @@ impl TokenizedName {
     }
 
     /// Tokenizes an input string, applying NFC normalization if requested.
-    pub fn from_input(
-        input: impl AsRef<str>,
-        specs: &CodePointsSpecs,
-        apply_nfc: bool,
-    ) -> Result<Self, ProcessError> {
+    pub fn from_input(input: impl AsRef<str>, specs: &CodePointsSpecs, apply_nfc: bool) -> Self {
         tokenize_name(input, specs, apply_nfc)
     }
 
@@ -143,27 +131,23 @@ where
     }
 }
 
-fn tokenize_name(
-    name: impl AsRef<str>,
-    specs: &CodePointsSpecs,
-    apply_nfc: bool,
-) -> Result<TokenizedName, ProcessError> {
+fn tokenize_name(name: impl AsRef<str>, specs: &CodePointsSpecs, apply_nfc: bool) -> TokenizedName {
     let name = name.as_ref();
     if name.is_empty() {
-        return Ok(TokenizedName::empty());
+        return TokenizedName::empty();
     }
-    let tokens = tokenize_input(name, specs, apply_nfc)?;
-    Ok(TokenizedName {
+    let tokens = tokenize_input(name, specs, apply_nfc);
+    TokenizedName {
         input: name.to_string(),
         tokens,
-    })
+    }
 }
 
 fn tokenize_input(
     input: impl AsRef<str>,
     specs: &CodePointsSpecs,
     apply_nfc: bool,
-) -> Result<Vec<EnsNameToken>, ProcessError> {
+) -> Vec<EnsNameToken> {
     let input = input.as_ref();
     let emojis = specs.finditer_emoji(input).collect::<Vec<_>>();
 
@@ -192,7 +176,7 @@ fn tokenize_input(
         perform_nfc_transform(&mut tokens, specs);
     }
     collapse_valid_tokens(&mut tokens);
-    Ok(tokens)
+    tokens
 }
 
 fn perform_nfc_transform(tokens: &mut Vec<EnsNameToken>, specs: &CodePointsSpecs) {
@@ -470,7 +454,7 @@ mod tests {
         #[case] expected: Vec<EnsNameToken>,
         specs: &CodePointsSpecs,
     ) {
-        let tokens = tokenize_input(input, specs, apply_nfc).expect("tokenize");
+        let tokens = tokenize_input(input, specs, apply_nfc);
         assert_eq!(tokens, expected);
     }
 
@@ -494,7 +478,7 @@ mod tests {
         #[case] expected: Vec<CollapsedEnsNameToken>,
         specs: &CodePointsSpecs,
     ) {
-        let tokens = tokenize_input(input, specs, true).expect("tokenize");
+        let tokens = tokenize_input(input, specs, true);
         let label = TokenizedLabel::from(&tokens);
         let result = label.collapse_into_text_or_emoji();
         assert_eq!(result, expected);

diff --git a/src/tokens/types.rs b/src/tokens/types.rs
@@ -1,7 +1,7 @@
 use crate::{constants, utils, CodePoint};
 
 /// Represents a token in an ENS name.
-/// see https://docs.ens.domains/ensip/15#tokenize for more details.
+/// see <https://docs.ens.domains/ensip/15#tokenize> for more details.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum EnsNameToken {
     Valid(TokenValid),
@@ -72,35 +72,48 @@ impl EnsNameToken {
     }
 }
 
+/// A valid vector of code points
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct TokenValid {
     pub cps: Vec<CodePoint>,
 }
+
+/// Code point should be mapped to vector of code points
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct TokenMapped {
     pub cps: Vec<CodePoint>,
     pub cp: CodePoint,
 }
 
+/// Code point should be ignored
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct TokenIgnored {
     pub cp: CodePoint,
 }
 
+/// Code point is disallowed
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct TokenDisallowed {
     pub cp: CodePoint,
 }
+
+/// Represents a stop token (.)
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct TokenStop {
     pub cp: CodePoint,
 }
+
+/// Represents a vector of code points that should be normalized using NFC
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct TokenNfc {
     pub cps: Vec<CodePoint>,
     pub input: Vec<CodePoint>,
 }
 
+/// Represents a vector of code points of emoji
+/// `cps_input` contains vector of code from input string
+/// `emoji` contains vector of beautified emoji code points
+/// `cps_no_fe0f` contains vector of code points of emoji without `FE0F`
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct TokenEmoji {
     pub input: String,
@@ -109,6 +122,7 @@ pub struct TokenEmoji {
     pub cps_no_fe0f: Vec<CodePoint>,
 }
 
+/// Represents a collapsed token in an ENS name: either text or emoji
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum CollapsedEnsNameToken {
     Text(TokenValid),