-
Notifications
You must be signed in to change notification settings - Fork 182
Expand file tree
/
Copy pathPattern.carp
More file actions
152 lines (123 loc) · 5.53 KB
/
Pattern.carp
File metadata and controls
152 lines (123 loc) · 5.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
(system-include "carp_pattern.h")
(defmodule Pattern
(doc find "finds the index of a pattern in a string.
Returns `-1` if it doesn’t find a matching pattern.")
(register find (Fn [&Pattern &String] Int))
(doc find-all "finds all indices of a pattern in a string.
Returns `[]` if it doesn’t find a matching pattern.")
(register find-all (Fn [&Pattern &String] (Array Int)))
(doc match-groups "finds the match groups of the first match of a pattern in
a string.
Returns `[]` if it doesn’t find a matching pattern.")
(register match-groups (Fn [&Pattern &String] (Array String)))
(doc match-str "finds the first match of a pattern in a string.
Returns `[]` if it doesn’t find a matching pattern.")
(register match-str (Fn [&Pattern &String] String))
(doc global-match "finds all matches of a pattern in a string as a nested
array.
Returns `[]` if it doesn’t find a matching pattern.")
(register global-match (Fn [&Pattern &String] (Array (Array String))))
(doc substitute "finds all matches of a pattern in a string and replaces it
by another pattern `n` times.
The substitute pattern can reference the original pattern by match group
indices, such as `\1`. This means that backslashes need to be double escaped.
If you want to replace all occurrences of the pattern, use `-1`.")
(register substitute (Fn [&Pattern &String &String Int] String))
(doc matches? "checks whether a pattern matches a string.")
(defn matches? [pat s] (/= (find pat s) -1))
(register str (Fn [&Pattern] String))
(implements str Pattern.str)
(register prn (Fn [&Pattern] String))
(implements prn Pattern.prn)
(register init (Fn [&String] Pattern))
(register = (Fn [&Pattern &Pattern] Bool))
(implements = Pattern.=)
(register delete (Fn [Pattern] ()))
(implements delete Pattern.delete)
(register copy (Fn [&Pattern] Pattern))
(implements copy Pattern.copy)
(doc from-chars "creates a pattern that matches a group of characters from a
list of those characters.")
(defn from-chars [chars]
(Pattern.init &(str* @"[" (String.from-chars chars) @"]")))
(defn global-match-str [p s]
(Array.copy-map &(fn [x] @(Array.unsafe-first x)) &(global-match p s)))
(doc split "splits a string by a pattern.")
(defn split [p s]
(let-do [idx (find-all p s)
strs (global-match-str p s)
lidx (Array.length &idx)
result (Array.allocate (Int.inc lidx))]
(Array.aset-uninitialized! &result 0
(slice s 0 (if (> lidx 0) @(Array.unsafe-nth &idx 0) (length s))))
(for [i 0 (Int.dec (Array.length &idx))]
(let [plen (length (Array.unsafe-nth &strs i))]
(Array.aset-uninitialized! &result (Int.inc i)
(slice s (+ @(Array.unsafe-nth &idx i) plen)
@(Array.unsafe-nth &idx (Int.inc i))))))
(when (> lidx 0)
(let [plen (length (Array.unsafe-nth &strs (Int.dec lidx)))]
(Array.aset-uninitialized! &result lidx
(suffix s (+ @(Array.unsafe-nth &idx (Int.dec lidx))
plen)))))
result))
)
(defmodule String
(doc in? "checks whether a string contains another string.")
(defn in? [s sub]
(Pattern.matches? &(Pattern.init sub) s))
(doc upper? "checks whether a string is all uppercase.")
(defn upper? [s]
(Pattern.matches? #"^[\u\s\p]*$" s))
(doc lower? "checks whether a string is all lowercase.")
(defn lower? [s]
(Pattern.matches? #"^[\l\s\p]*$" s))
(doc num? "checks whether a string is numerical.")
(defn num? [s]
(Pattern.matches? #"^[0-9]*$" s))
(doc alpha? "checks whether a string contains only alphabetical characters (a-Z).")
(defn alpha? [s]
(Pattern.matches? #"^[\u\l]*$" s))
(doc alphanum? "checks whether a string is alphanumerical.")
(defn alphanum? [s]
(Pattern.matches? #"^[\w]*$" s))
(doc hex? "checks whether a string is hexadecimal.")
(defn hex? [s]
(Pattern.matches? #"^[\x]*$" s))
(doc trim-left "trims whitespace from the left of a string.")
(defn trim-left [s]
(Pattern.substitute #"^\s+" s "" 1))
(doc trim-right "trims whitespace from the right of a string.")
(defn trim-right [s]
(Pattern.substitute #"\s+$" s "" 1))
(doc trim "trims whitespace from both sides of a string.")
(defn trim [s]
(trim-left &(trim-right s)))
(doc chomp "trims a newline from the end of a string.")
(defn chomp [s]
(Pattern.substitute #"\r$" &(Pattern.substitute #"\n$" s "" 1) "" 1))
(doc collapse-whitespace "collapses groups of whitespace into single spaces.")
(defn collapse-whitespace [s]
(Pattern.substitute #"\s+" s " " -1))
(doc split-by "splits a string by separators.")
(defn split-by [s separators]
(let-do [pat (Pattern.from-chars separators)
idx (Pattern.find-all &pat s)
lidx (Array.length &idx)
result (Array.allocate (Int.inc lidx))]
(Array.aset-uninitialized! &result 0
(slice s 0 (if (> lidx 0) @(Array.unsafe-nth &idx 0) (length s))))
(for [i 0 (Int.dec (Array.length &idx))]
(Array.aset-uninitialized! &result (Int.inc i)
(slice s (Int.inc @(Array.unsafe-nth &idx i)) @(Array.unsafe-nth &idx (Int.inc i)))))
(when (> lidx 0)
(Array.aset-uninitialized! &result lidx
(suffix s (Int.inc @(Array.unsafe-nth &idx (Int.dec lidx))))))
result))
(doc words "splits a string into words.")
(defn words [s]
(Array.endo-filter &(fn [s] (not (empty? s))) (split-by s &[\tab \space \newline])))
(doc lines "splits a string into lines.")
(defn lines [s]
(split-by s &[\newline]))
)