Skip to content

Commit 41734af

Browse files
joe-conigliaromedvednikov
authored andcommitted
compiler: detect typos in function/variable/module names
1 parent 5055ac4 commit 41734af

4 files changed

Lines changed: 147 additions & 0 deletions

File tree

compiler/fn.v

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,3 +1023,23 @@ fn (f &Fn) str_args(table &Table) string {
10231023
}
10241024
return s
10251025
}
1026+
1027+
// find local function variable with closest name to `name`
1028+
fn (f &Fn) find_misspelled_local_var(name string, min_match f64) string {
1029+
mut closest := f64(0)
1030+
mut closest_var := ''
1031+
for var in f.local_vars {
1032+
n := '${f.mod}.$var.name'
1033+
if var.name == '' || !name.starts_with(f.mod) || (n.len - name.len > 3 || name.len - n.len > 3) { continue }
1034+
p := strings.dice_coefficient(name, n)
1035+
println(' ## $name - $n: $p')
1036+
if p > closest {
1037+
closest = p
1038+
closest_var = n
1039+
}
1040+
}
1041+
if closest >= min_match {
1042+
return closest_var
1043+
}
1044+
return ''
1045+
}

compiler/parser.v

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1648,6 +1648,11 @@ fn (p mut Parser) name_expr() string {
16481648
f = p.table.find_fn(name)
16491649
}
16501650
if f.name == '' {
1651+
// check for misspelled function / variable / module
1652+
suggested := p.table.identify_typo(name, p.cur_fn, p.import_table)
1653+
if suggested != '' {
1654+
p.error('undefined: `$name`. did you mean:$suggested')
1655+
}
16511656
// If orig_name is a mod, then printing undefined: `mod` tells us nothing
16521657
// if p.table.known_mod(orig_name) {
16531658
if p.table.known_mod(orig_name) || p.import_table.known_alias(orig_name) {

compiler/table.v

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -926,3 +926,66 @@ fn (t &Type) contains_field_type(typ string) bool {
926926
}
927927
return false
928928
}
929+
930+
// check for a function / variable / module typo in `name`
931+
fn (table &Table) identify_typo(name string, current_fn &Fn, fit &FileImportTable) string {
932+
// dont check if so short
933+
if name.len < 2 { return '' }
934+
min_match := 0.8 // for dice coefficient between 0.0 - 1.0
935+
name_orig := name.replace('__', '.').replace('_dot_', '.')
936+
mut output := ''
937+
// check functions
938+
mut n := table.find_misspelled_fn(name_orig, min_match)
939+
if n != '' {
940+
output += '\n * function: `$n`'
941+
}
942+
// check function local variables
943+
n = current_fn.find_misspelled_local_var(name_orig, min_match)
944+
if n != '' {
945+
output += '\n * variable: `$n`'
946+
}
947+
// check imported modules
948+
n = table.find_misspelled_imported_mod(name_orig, fit, min_match)
949+
if n != '' {
950+
output += '\n * module: `$n`'
951+
}
952+
return output
953+
}
954+
955+
// find function with closest name to `name`
956+
fn (table &Table) find_misspelled_fn(name string, min_match f64) string {
957+
mut closest := f64(0)
958+
mut closest_fn := ''
959+
for _, f in table.fns {
960+
n := '${f.mod}.$f.name'
961+
if !name.starts_with(f.mod) || (n.len - name.len > 3 || name.len - n.len > 3) { continue }
962+
p := strings.dice_coefficient(name, n)
963+
if p > closest {
964+
closest = p
965+
closest_fn = n
966+
}
967+
}
968+
if closest >= min_match {
969+
return closest_fn
970+
}
971+
return ''
972+
}
973+
974+
// find imported module with closest name to `name`
975+
fn (table &Table) find_misspelled_imported_mod(name string, fit &FileImportTable, min_match f64) string {
976+
mut closest := f64(0)
977+
mut closest_mod := ''
978+
for alias, mod in fit.imports {
979+
n := '${fit.module_name}.$alias'
980+
if !name.starts_with(fit.module_name) || (n.len - name.len > 3 || name.len - n.len > 3) { continue }
981+
p := strings.dice_coefficient(name, n)
982+
if p > closest {
983+
closest = p
984+
closest_mod = '$alias ($mod)'
985+
}
986+
}
987+
if closest >= min_match {
988+
return closest_mod
989+
}
990+
return ''
991+
}

vlib/strings/similarity.v

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
module strings
2+
3+
// use levenshtein distance algorithm to calculate
4+
// the distance between between two strings (lower is closer)
5+
pub fn levenshtein_distance(a, b string) int {
6+
mut f := [int(0); b.len+1]
7+
for ca in a {
8+
mut j := 1
9+
mut fj1 := f[0]
10+
f[0]++
11+
for cb in b {
12+
mut mn := if f[j]+1 <= f[j-1]+1 { f[j]+1 } else { f[j-1]+1 }
13+
if cb != ca {
14+
mn = if mn <= fj1+1 { mn } else { fj1+1 }
15+
} else {
16+
mn = if mn <= fj1 { mn } else { fj1 }
17+
}
18+
fj1 = f[j]
19+
f[j] = mn
20+
j++
21+
}
22+
}
23+
return f[f.len-1]
24+
}
25+
26+
// use levenshtein distance algorithm to calculate
27+
// how similar two strings are as a percentage (higher is closer)
28+
pub fn levenshtein_distance_percentage(a, b string) f64 {
29+
d := levenshtein_distance(a, b)
30+
l := if a.len >= b.len { a.len } else { b.len }
31+
return (1.00 - f64(d)/f64(l)) * 100.00
32+
}
33+
34+
// implementation of Sørensen–Dice coefficient.
35+
// find the similarity between two strings.
36+
// returns f64 between 0.0 (not similar) and 1.0 (exact match).
37+
pub fn dice_coefficient(s1, s2 string) f64 {
38+
if s1.len == 0 || s2.len == 0 { return 0.0 }
39+
if s1 == s2 { return 1.0 }
40+
if s1.len < 2 || s2.len < 2 { return 0.0 }
41+
mut first_bigrams := map[string]int
42+
for i := 0; i < s1.len-1; i++ {
43+
a := s1[i]
44+
b := s1[i+1]
45+
bigram := (a+b).str()
46+
first_bigrams[bigram] = if bigram in first_bigrams { first_bigrams[bigram]+1 } else { 1 }
47+
}
48+
mut intersection_size := 0
49+
for i := 0; i < s2.len-1; i++ {
50+
a := s2[i]
51+
b := s2[i+1]
52+
bigram := (a+b).str()
53+
count := if bigram in first_bigrams { first_bigrams[bigram] } else { 0 }
54+
if count > 0 {
55+
intersection_size++
56+
}
57+
}
58+
return (2.0 * intersection_size) / (f64(s1.len) + f64(s2.len) - 2)
59+
}

0 commit comments

Comments
 (0)