|
| 1 | +use std::collections::{HashMap, HashSet}; |
| 2 | + |
| 3 | +use litrs::StringLit; |
| 4 | +use proc_macro2::TokenStream; |
| 5 | +use quote::{quote, ToTokens}; |
| 6 | +use regex_syntax::hir::Hir; |
| 7 | +use syn::{self, parse_macro_input, DataStruct, Ident, Type}; |
| 8 | + |
| 9 | +struct Data<'a> { |
| 10 | + captures: Vec<&'a Hir>, |
| 11 | + named_captures: HashMap<String, usize>, |
| 12 | +} |
| 13 | + |
| 14 | +impl<'a> Data<'a> { |
| 15 | + fn walk_re(&mut self, r: &'a Hir) { |
| 16 | + match r.kind() { |
| 17 | + regex_syntax::hir::HirKind::Empty => (), |
| 18 | + regex_syntax::hir::HirKind::Literal(_) => (), |
| 19 | + regex_syntax::hir::HirKind::Class(_) => (), |
| 20 | + regex_syntax::hir::HirKind::Anchor(_) => (), |
| 21 | + regex_syntax::hir::HirKind::WordBoundary(_) => (), |
| 22 | + regex_syntax::hir::HirKind::Repetition(e) => self.walk_re(&e.hir), |
| 23 | + regex_syntax::hir::HirKind::Group(g) => match &g.kind { |
| 24 | + regex_syntax::hir::GroupKind::CaptureIndex(i) => { |
| 25 | + let i = *i as usize - 1; |
| 26 | + if self.captures.len() != i { |
| 27 | + panic!("ASSERTION cap len {} but index {}", self.captures.len(), i) |
| 28 | + } |
| 29 | + self.captures.push(&g.hir); |
| 30 | + } |
| 31 | + regex_syntax::hir::GroupKind::CaptureName { name, index } => { |
| 32 | + let index = *index as usize - 1; |
| 33 | + if self.captures.len() != index { |
| 34 | + panic!( |
| 35 | + "ASSERTION cap len {} but index {}", |
| 36 | + self.captures.len(), |
| 37 | + index |
| 38 | + ) |
| 39 | + } |
| 40 | + self.captures.push(&g.hir); |
| 41 | + self.named_captures.insert(name.clone(), index); |
| 42 | + } |
| 43 | + regex_syntax::hir::GroupKind::NonCapturing => self.walk_re(&g.hir), |
| 44 | + }, |
| 45 | + regex_syntax::hir::HirKind::Concat(c) => { |
| 46 | + for c in c { |
| 47 | + self.walk_re(c); |
| 48 | + } |
| 49 | + } |
| 50 | + regex_syntax::hir::HirKind::Alternation(_) => (), |
| 51 | + } |
| 52 | + } |
| 53 | + |
| 54 | + fn gen_struct_tuple( |
| 55 | + &self, |
| 56 | + i: &mut usize, |
| 57 | + fields: &mut dyn Iterator<Item = &Type>, |
| 58 | + ) -> Vec<TokenStream> { |
| 59 | + let mut out = vec![]; |
| 60 | + for ty in fields { |
| 61 | + match ty { |
| 62 | + Type::Tuple(t) => { |
| 63 | + let child = self.gen_struct_tuple(i, &mut t.elems.iter()); |
| 64 | + out.push(quote!((#(#child),*))); |
| 65 | + } |
| 66 | + ty => { |
| 67 | + *i += 1; |
| 68 | + out.push( |
| 69 | + quote!(#ty::from_str(caps_.get(#i).map(|m| m.as_str()).unwrap_or(""))?), |
| 70 | + ); |
| 71 | + } |
| 72 | + } |
| 73 | + } |
| 74 | + out |
| 75 | + } |
| 76 | + |
| 77 | + fn gen_struct(&self, ident: &Ident, d: &DataStruct) -> TokenStream { |
| 78 | + match &d.fields { |
| 79 | + syn::Fields::Named(n) => { |
| 80 | + let mut field_tokens = vec![]; |
| 81 | + let mut keys = self |
| 82 | + .named_captures |
| 83 | + .keys() |
| 84 | + .into_iter() |
| 85 | + .map(&String::to_string) |
| 86 | + .collect::<HashSet<String>>(); |
| 87 | + for field in &n.named { |
| 88 | + let name = field.ident.as_ref().unwrap(); |
| 89 | + let i = match self.named_captures.get(&name.to_string()) { |
| 90 | + Some(c) => *c, |
| 91 | + None => panic!("No named capture for field {}", name), |
| 92 | + }; |
| 93 | + keys.remove(&name.to_string()); |
| 94 | + let ty = &field.ty; |
| 95 | + let i = i + 1; |
| 96 | + field_tokens.push(quote!(#name: #ty::from_str(caps_.get(#i).map(|m| m.as_str()).unwrap_or(""))?)); |
| 97 | + } |
| 98 | + if !keys.is_empty() { |
| 99 | + panic!("No fields for named captures: {:?}", keys); |
| 100 | + } |
| 101 | + if self.captures.len() > self.named_captures.len() { |
| 102 | + panic!("This is a struct with named fields but there are some unused unnamed captures"); |
| 103 | + } |
| 104 | + quote!(Ok(#ident { |
| 105 | + #(#field_tokens),* |
| 106 | + })) |
| 107 | + } |
| 108 | + syn::Fields::Unnamed(u) => { |
| 109 | + if !self.named_captures.is_empty() { |
| 110 | + panic!( |
| 111 | + "Tuples must have only unnamed captures, but named captures are present" |
| 112 | + ); |
| 113 | + } |
| 114 | + let mut i = 0usize; |
| 115 | + let field_tokens = |
| 116 | + self.gen_struct_tuple(&mut i, &mut u.unnamed.iter().map(|e| &e.ty)); |
| 117 | + if i != self.captures.len() { |
| 118 | + panic!( |
| 119 | + "Struct has {} fields but only {} captures", |
| 120 | + u.unnamed.len(), |
| 121 | + self.captures.len() |
| 122 | + ); |
| 123 | + } |
| 124 | + quote!(Ok(#ident ( |
| 125 | + #(#field_tokens),* |
| 126 | + ))) |
| 127 | + } |
| 128 | + syn::Fields::Unit => { |
| 129 | + if !self.captures.is_empty() { |
| 130 | + panic!("This is an empty struct but regex has captures") |
| 131 | + } |
| 132 | + quote!(Ok(#ident ())) |
| 133 | + } |
| 134 | + } |
| 135 | + } |
| 136 | +} |
| 137 | + |
| 138 | +fn gen_value(regex_raw: &str, ast: &syn::DeriveInput) -> TokenStream { |
| 139 | + let regex = regex_syntax::Parser::new().parse(regex_raw).unwrap(); |
| 140 | + let mut data = Data { |
| 141 | + captures: Default::default(), |
| 142 | + named_captures: Default::default(), |
| 143 | + }; |
| 144 | + data.walk_re(®ex); |
| 145 | + match &ast.data { |
| 146 | + syn::Data::Struct(d) => data.gen_struct(&ast.ident, d), |
| 147 | + syn::Data::Enum(_) => panic!("enum not supported yet"), |
| 148 | + syn::Data::Union(_) => panic!("union not supported"), |
| 149 | + } |
| 150 | +} |
| 151 | + |
| 152 | +fn gen_impls(regex_raw: &str, ast: syn::DeriveInput) -> TokenStream { |
| 153 | + let value = gen_value(regex_raw, &ast); |
| 154 | + let name = &ast.ident; |
| 155 | + let mut out = vec![ast.to_token_stream()]; |
| 156 | + #[cfg(feature = "unicode")] |
| 157 | + out.push(quote! { |
| 158 | + impl std::str::FromStr for #name { |
| 159 | + type Err = structre::Error; |
| 160 | + fn from_str(input: &str) -> Result<Self, Self::Err> { |
| 161 | + #[allow(unused_imports)] |
| 162 | + use std::str::FromStr; |
| 163 | + static re: structre::Lazy<structre::UnicodeRegex> = structre::Lazy::new( |
| 164 | + || structre::UnicodeRegex::new(#regex_raw).unwrap()); |
| 165 | + let caps_ = re.captures(input).ok_or_else(|| structre::Error::msg("No match"))?; |
| 166 | + #value |
| 167 | + } |
| 168 | + } |
| 169 | + }); |
| 170 | + #[cfg(feature = "bytes")] |
| 171 | + out.push(quote! { |
| 172 | + impl std::str::FromU8Str for #name { |
| 173 | + fn from_str(input: &[u8]) -> structre::Result<Self> { |
| 174 | + #[allow(unused_imports)] |
| 175 | + use structre::FromU8Str; |
| 176 | + static re: structre::Lazy<structre::BytesRegex> = structre::Lazy::new( |
| 177 | + || structre::BytesRegex::new(#regex_raw).unwrap()); |
| 178 | + let caps_ = re.captures(input).ok_or_else(|| "No match".into())?; |
| 179 | + #value |
| 180 | + } |
| 181 | + } |
| 182 | + }); |
| 183 | + TokenStream::from_iter(out) |
| 184 | +} |
| 185 | + |
| 186 | +#[proc_macro_attribute] |
| 187 | +pub fn structre( |
| 188 | + args: proc_macro::TokenStream, |
| 189 | + body: proc_macro::TokenStream, |
| 190 | +) -> proc_macro::TokenStream { |
| 191 | + let mut args = proc_macro2::TokenStream::from(args).into_iter(); |
| 192 | + let regex_raw = match args.next().unwrap() { |
| 193 | + proc_macro2::TokenTree::Literal(l) => match StringLit::try_from(&l) { |
| 194 | + Ok(l) => l.value().to_string(), |
| 195 | + Err(_) => panic!("First arg must be literal string, got {}", l), |
| 196 | + }, |
| 197 | + t => panic!("First arg must be literal, got {}", t), |
| 198 | + }; |
| 199 | + if args.next().is_some() { |
| 200 | + panic!("Only takes one arg, got more than one"); |
| 201 | + } |
| 202 | + let ast = parse_macro_input!(body as syn::DeriveInput); |
| 203 | + gen_impls(®ex_raw, ast).into() |
| 204 | +} |
| 205 | + |
| 206 | +#[cfg(test)] |
| 207 | +mod tests { |
| 208 | + use std::str::FromStr; |
| 209 | + |
| 210 | + use proc_macro2::TokenStream; |
| 211 | + |
| 212 | + use crate::gen_value; |
| 213 | + use quote::quote; |
| 214 | + |
| 215 | + #[test] |
| 216 | + fn newtype_string() { |
| 217 | + assert_eq!( |
| 218 | + gen_value( |
| 219 | + "(a)", |
| 220 | + &syn::parse2(TokenStream::from_str("struct Parsed(String);").unwrap()).unwrap(), |
| 221 | + ) |
| 222 | + .to_string(), |
| 223 | + quote!(Ok(Parsed(String::from_str( |
| 224 | + caps_.get(1usize).map(|m| m.as_str()).unwrap_or("") |
| 225 | + )?))) |
| 226 | + .to_string() |
| 227 | + ); |
| 228 | + } |
| 229 | + |
| 230 | + #[test] |
| 231 | + fn tuple() { |
| 232 | + assert_eq!( |
| 233 | + gen_value( |
| 234 | + "(a)(b)", |
| 235 | + &syn::parse2(TokenStream::from_str("struct Parsed((String, u32));").unwrap()) |
| 236 | + .unwrap(), |
| 237 | + ) |
| 238 | + .to_string(), |
| 239 | + quote!(Ok(Parsed(( |
| 240 | + String::from_str(caps_.get(1usize).map(|m| m.as_str()).unwrap_or(""))?, |
| 241 | + u32::from_str(caps_.get(2usize).map(|m| m.as_str()).unwrap_or(""))? |
| 242 | + )))) |
| 243 | + .to_string() |
| 244 | + ); |
| 245 | + } |
| 246 | + |
| 247 | + #[test] |
| 248 | + fn struct_() { |
| 249 | + assert_eq!( |
| 250 | + gen_value( |
| 251 | + "(?P<a>a)(?P<b>b)", |
| 252 | + &syn::parse2(TokenStream::from_str("struct Parsed { a: String, b: u32 }").unwrap()) |
| 253 | + .unwrap(), |
| 254 | + ) |
| 255 | + .to_string(), |
| 256 | + quote!(Ok(Parsed { |
| 257 | + a: String::from_str(caps_.get(1usize).map(|m| m.as_str()).unwrap_or(""))?, |
| 258 | + b: u32::from_str(caps_.get(2usize).map(|m| m.as_str()).unwrap_or(""))? |
| 259 | + })) |
| 260 | + .to_string() |
| 261 | + ); |
| 262 | + } |
| 263 | +} |
0 commit comments