Skip to content

Commit a7c0f89

Browse files
author
andrew
committed
Initial commit
0 parents  commit a7c0f89

File tree

7 files changed

+352
-0
lines changed

7 files changed

+352
-0
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
/target
2+
/Cargo.lock

Cargo.toml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
[package]
2+
name = "structre"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
[workspace]
7+
members = ["src/proc_macros"]
8+
9+
[features]
10+
default = ["unicode", "bytes"]
11+
unicode = ["structre_proc_macros/unicode", "regex/unicode"]
12+
bytes = ["atoi"]
13+
14+
[dependencies]
15+
anyhow = "1.0.66"
16+
atoi = { version = "2.0.0", optional = true }
17+
once_cell = "1.16.0"
18+
regex = { version = "1.7.0", default-features = false, features = ["std"] }
19+
structre_proc_macros = { path = "src/proc_macros" }

readme.md

Whitespace-only changes.

src/lib.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
pub use anyhow::{Error, Result};
2+
pub use once_cell::sync::Lazy;
3+
pub use structre_proc_macros::structre;
4+
5+
#[cfg(feature = "bytes")]
6+
pub mod structre_bytes {
7+
use atoi::{FromRadix10, FromRadix10Signed};
8+
pub use regex::bytes::Regex as BytesRegex;
9+
10+
trait FromU8Str: Sized {
11+
fn from_str(str: &[u8]) -> Option<Self>;
12+
}
13+
14+
impl FromU8Str for Box<[u8]> {
15+
fn from_str(str: &[u8]) -> Option<Self> {
16+
Box::new(str)
17+
}
18+
}
19+
20+
impl<T: FromRadix10> FromU8Str for T {
21+
fn from_str(str: &[u8]) -> Option<Self> {
22+
atoi::atoi(str)
23+
}
24+
}
25+
26+
impl<T: FromRadix10Signed> FromU8Str for T {
27+
fn from_str(str: &[u8]) -> Option<Self> {
28+
atoi::atoi(str)
29+
}
30+
}
31+
}
32+
#[cfg(feature = "bytes")]
33+
pub use structre_bytes::*;
34+
35+
#[cfg(feature = "unicode")]
36+
pub use regex::Regex as UnicodeRegex;

src/proc_macros/Cargo.toml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
[package]
2+
name = "structre_proc_macros"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
[lib]
7+
proc-macro = true
8+
path = "mod.rs"
9+
10+
[features]
11+
default = ["unicode"]
12+
unicode = []
13+
bytes = []
14+
15+
[dependencies]
16+
litrs = "0.2.3"
17+
proc-macro2 = "1.0.47"
18+
quote = "1.0.21"
19+
regex-syntax = "0.6.28"
20+
syn = "1.0.103"

src/proc_macros/mod.rs

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
use std::collections::{HashMap, HashSet};
2+
3+
use litrs::StringLit;
4+
use proc_macro2::TokenStream;
5+
use quote::{quote, ToTokens};
6+
use regex_syntax::hir::Hir;
7+
use syn::{self, parse_macro_input, DataStruct, Ident, Type};
8+
9+
struct Data<'a> {
10+
captures: Vec<&'a Hir>,
11+
named_captures: HashMap<String, usize>,
12+
}
13+
14+
impl<'a> Data<'a> {
15+
fn walk_re(&mut self, r: &'a Hir) {
16+
match r.kind() {
17+
regex_syntax::hir::HirKind::Empty => (),
18+
regex_syntax::hir::HirKind::Literal(_) => (),
19+
regex_syntax::hir::HirKind::Class(_) => (),
20+
regex_syntax::hir::HirKind::Anchor(_) => (),
21+
regex_syntax::hir::HirKind::WordBoundary(_) => (),
22+
regex_syntax::hir::HirKind::Repetition(e) => self.walk_re(&e.hir),
23+
regex_syntax::hir::HirKind::Group(g) => match &g.kind {
24+
regex_syntax::hir::GroupKind::CaptureIndex(i) => {
25+
let i = *i as usize - 1;
26+
if self.captures.len() != i {
27+
panic!("ASSERTION cap len {} but index {}", self.captures.len(), i)
28+
}
29+
self.captures.push(&g.hir);
30+
}
31+
regex_syntax::hir::GroupKind::CaptureName { name, index } => {
32+
let index = *index as usize - 1;
33+
if self.captures.len() != index {
34+
panic!(
35+
"ASSERTION cap len {} but index {}",
36+
self.captures.len(),
37+
index
38+
)
39+
}
40+
self.captures.push(&g.hir);
41+
self.named_captures.insert(name.clone(), index);
42+
}
43+
regex_syntax::hir::GroupKind::NonCapturing => self.walk_re(&g.hir),
44+
},
45+
regex_syntax::hir::HirKind::Concat(c) => {
46+
for c in c {
47+
self.walk_re(c);
48+
}
49+
}
50+
regex_syntax::hir::HirKind::Alternation(_) => (),
51+
}
52+
}
53+
54+
fn gen_struct_tuple(
55+
&self,
56+
i: &mut usize,
57+
fields: &mut dyn Iterator<Item = &Type>,
58+
) -> Vec<TokenStream> {
59+
let mut out = vec![];
60+
for ty in fields {
61+
match ty {
62+
Type::Tuple(t) => {
63+
let child = self.gen_struct_tuple(i, &mut t.elems.iter());
64+
out.push(quote!((#(#child),*)));
65+
}
66+
ty => {
67+
*i += 1;
68+
out.push(
69+
quote!(#ty::from_str(caps_.get(#i).map(|m| m.as_str()).unwrap_or(""))?),
70+
);
71+
}
72+
}
73+
}
74+
out
75+
}
76+
77+
fn gen_struct(&self, ident: &Ident, d: &DataStruct) -> TokenStream {
78+
match &d.fields {
79+
syn::Fields::Named(n) => {
80+
let mut field_tokens = vec![];
81+
let mut keys = self
82+
.named_captures
83+
.keys()
84+
.into_iter()
85+
.map(&String::to_string)
86+
.collect::<HashSet<String>>();
87+
for field in &n.named {
88+
let name = field.ident.as_ref().unwrap();
89+
let i = match self.named_captures.get(&name.to_string()) {
90+
Some(c) => *c,
91+
None => panic!("No named capture for field {}", name),
92+
};
93+
keys.remove(&name.to_string());
94+
let ty = &field.ty;
95+
let i = i + 1;
96+
field_tokens.push(quote!(#name: #ty::from_str(caps_.get(#i).map(|m| m.as_str()).unwrap_or(""))?));
97+
}
98+
if !keys.is_empty() {
99+
panic!("No fields for named captures: {:?}", keys);
100+
}
101+
if self.captures.len() > self.named_captures.len() {
102+
panic!("This is a struct with named fields but there are some unused unnamed captures");
103+
}
104+
quote!(Ok(#ident {
105+
#(#field_tokens),*
106+
}))
107+
}
108+
syn::Fields::Unnamed(u) => {
109+
if !self.named_captures.is_empty() {
110+
panic!(
111+
"Tuples must have only unnamed captures, but named captures are present"
112+
);
113+
}
114+
let mut i = 0usize;
115+
let field_tokens =
116+
self.gen_struct_tuple(&mut i, &mut u.unnamed.iter().map(|e| &e.ty));
117+
if i != self.captures.len() {
118+
panic!(
119+
"Struct has {} fields but only {} captures",
120+
u.unnamed.len(),
121+
self.captures.len()
122+
);
123+
}
124+
quote!(Ok(#ident (
125+
#(#field_tokens),*
126+
)))
127+
}
128+
syn::Fields::Unit => {
129+
if !self.captures.is_empty() {
130+
panic!("This is an empty struct but regex has captures")
131+
}
132+
quote!(Ok(#ident ()))
133+
}
134+
}
135+
}
136+
}
137+
138+
fn gen_value(regex_raw: &str, ast: &syn::DeriveInput) -> TokenStream {
139+
let regex = regex_syntax::Parser::new().parse(regex_raw).unwrap();
140+
let mut data = Data {
141+
captures: Default::default(),
142+
named_captures: Default::default(),
143+
};
144+
data.walk_re(&regex);
145+
match &ast.data {
146+
syn::Data::Struct(d) => data.gen_struct(&ast.ident, d),
147+
syn::Data::Enum(_) => panic!("enum not supported yet"),
148+
syn::Data::Union(_) => panic!("union not supported"),
149+
}
150+
}
151+
152+
fn gen_impls(regex_raw: &str, ast: syn::DeriveInput) -> TokenStream {
153+
let value = gen_value(regex_raw, &ast);
154+
let name = &ast.ident;
155+
let mut out = vec![ast.to_token_stream()];
156+
#[cfg(feature = "unicode")]
157+
out.push(quote! {
158+
impl std::str::FromStr for #name {
159+
type Err = structre::Error;
160+
fn from_str(input: &str) -> Result<Self, Self::Err> {
161+
#[allow(unused_imports)]
162+
use std::str::FromStr;
163+
static re: structre::Lazy<structre::UnicodeRegex> = structre::Lazy::new(
164+
|| structre::UnicodeRegex::new(#regex_raw).unwrap());
165+
let caps_ = re.captures(input).ok_or_else(|| structre::Error::msg("No match"))?;
166+
#value
167+
}
168+
}
169+
});
170+
#[cfg(feature = "bytes")]
171+
out.push(quote! {
172+
impl std::str::FromU8Str for #name {
173+
fn from_str(input: &[u8]) -> structre::Result<Self> {
174+
#[allow(unused_imports)]
175+
use structre::FromU8Str;
176+
static re: structre::Lazy<structre::BytesRegex> = structre::Lazy::new(
177+
|| structre::BytesRegex::new(#regex_raw).unwrap());
178+
let caps_ = re.captures(input).ok_or_else(|| "No match".into())?;
179+
#value
180+
}
181+
}
182+
});
183+
TokenStream::from_iter(out)
184+
}
185+
186+
#[proc_macro_attribute]
187+
pub fn structre(
188+
args: proc_macro::TokenStream,
189+
body: proc_macro::TokenStream,
190+
) -> proc_macro::TokenStream {
191+
let mut args = proc_macro2::TokenStream::from(args).into_iter();
192+
let regex_raw = match args.next().unwrap() {
193+
proc_macro2::TokenTree::Literal(l) => match StringLit::try_from(&l) {
194+
Ok(l) => l.value().to_string(),
195+
Err(_) => panic!("First arg must be literal string, got {}", l),
196+
},
197+
t => panic!("First arg must be literal, got {}", t),
198+
};
199+
if args.next().is_some() {
200+
panic!("Only takes one arg, got more than one");
201+
}
202+
let ast = parse_macro_input!(body as syn::DeriveInput);
203+
gen_impls(&regex_raw, ast).into()
204+
}
205+
206+
#[cfg(test)]
207+
mod tests {
208+
use std::str::FromStr;
209+
210+
use proc_macro2::TokenStream;
211+
212+
use crate::gen_value;
213+
use quote::quote;
214+
215+
#[test]
216+
fn newtype_string() {
217+
assert_eq!(
218+
gen_value(
219+
"(a)",
220+
&syn::parse2(TokenStream::from_str("struct Parsed(String);").unwrap()).unwrap(),
221+
)
222+
.to_string(),
223+
quote!(Ok(Parsed(String::from_str(
224+
caps_.get(1usize).map(|m| m.as_str()).unwrap_or("")
225+
)?)))
226+
.to_string()
227+
);
228+
}
229+
230+
#[test]
231+
fn tuple() {
232+
assert_eq!(
233+
gen_value(
234+
"(a)(b)",
235+
&syn::parse2(TokenStream::from_str("struct Parsed((String, u32));").unwrap())
236+
.unwrap(),
237+
)
238+
.to_string(),
239+
quote!(Ok(Parsed((
240+
String::from_str(caps_.get(1usize).map(|m| m.as_str()).unwrap_or(""))?,
241+
u32::from_str(caps_.get(2usize).map(|m| m.as_str()).unwrap_or(""))?
242+
))))
243+
.to_string()
244+
);
245+
}
246+
247+
#[test]
248+
fn struct_() {
249+
assert_eq!(
250+
gen_value(
251+
"(?P<a>a)(?P<b>b)",
252+
&syn::parse2(TokenStream::from_str("struct Parsed { a: String, b: u32 }").unwrap())
253+
.unwrap(),
254+
)
255+
.to_string(),
256+
quote!(Ok(Parsed {
257+
a: String::from_str(caps_.get(1usize).map(|m| m.as_str()).unwrap_or(""))?,
258+
b: u32::from_str(caps_.get(2usize).map(|m| m.as_str()).unwrap_or(""))?
259+
}))
260+
.to_string()
261+
);
262+
}
263+
}

tests/test.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#[cfg(feature = "unicode")]
2+
#[test]
3+
fn match_() {
4+
use std::str::FromStr;
5+
6+
use structre::structre;
7+
#[structre("(a)(44)")]
8+
struct Parsed(String, u32);
9+
let v = Parsed::from_str("a44").unwrap();
10+
assert_eq!(v.0, "a");
11+
assert_eq!(v.1, 44);
12+
}

0 commit comments

Comments
 (0)