Skip to content

Commit c7072c8

Browse files
author
extempore
committed
Documentation and examples for Regex.
Contributed by Daniel C. Sobral git-svn-id: http://lampsvn.epfl.ch/svn-repos/scala/scala/trunk@25305 5e8d7ff9-d8ef-0310-90f0-a4852d11357a
1 parent 0e66914 commit c7072c8

File tree

1 file changed

+156
-9
lines changed

1 file changed

+156
-9
lines changed

src/library/scala/util/matching/Regex.scala

Lines changed: 156 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,41 @@ import java.util.regex.{ Pattern, Matcher }
1515
/** This class provides methods for creating and using regular expressions.
1616
* It is based on the regular expressions of the JDK since 1.4.
1717
*
18+
* Its main goal is to extract strings that match a pattern, or the subgroups
19+
* that make it up. For that reason, it is usually used with for comprehensions
20+
* and matching (see methods for examples).
21+
*
22+
* Because regex patterns make extensive use of the backslash character (`\`),
23+
* it is usually defined with triple quotes so that backslash characters won't
24+
* need to be quoted. Also, an implicit conversion is available through
25+
* [[scala.Predef]] that makes converting a [[java.lang.String]] into a Regex
26+
* as easy as calling the method `r` on it. For example:
27+
*
28+
* {{{
29+
* val datePattern = """(\d\d\d\d)-(\d\d)-(\d\d)""".r
30+
* }}}
31+
*
32+
* Regex provide methods to find and replace patterns, but also provides
33+
* extractors for pattern subgroups. Note, however, that extractors require
34+
* that the whole text be matched, or they fail.
35+
*
36+
* For example, the subgroups in the pattern above can be obtained in the following ways:
37+
*
38+
* {{{
39+
* val datePattern(year, month, day) = "2011-07-15"
40+
*
41+
* // val datePattern(year, month, day) = "Date 2011-07-15" // throws an exception at runtime
42+
*
43+
* val copyright: String = datePattern findFirstIn "Date of this document: 2011-07-15" match {
44+
* case Some(datePattern(year, month, day)) => "Copyright "+year
45+
* case None => "No copyright"
46+
* }
47+
*
48+
* val copyright: Option[String] = for {
49+
* datePattern(year, month, day) <- datePattern findFirstIn "Last modified 2011-07-15"
50+
* } yield year
51+
* }}}
52+
*
1853
* You can use special pattern syntax constructs like `(?idmsux-idmsux)`¹ to switch
1954
* various regex compilation options like `CASE_INSENSITIVE` or `UNICODE_CASE`.
2055
*
@@ -37,7 +72,29 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
3772
/** The compiled pattern */
3873
val pattern = Pattern.compile(regex)
3974

40-
/** Tries to match target (whole match) and returns the matches.
75+
/** Tries to match target (whole match) and returns the matching subgroups.
76+
* if the pattern has no subgroups, then it returns an empty list on a
77+
* successful match.
78+
*
79+
* Note, however, that if some subgroup has not been matched, a `null` will
80+
* be returned for that subgroup.
81+
*
82+
* For example:
83+
*
84+
* {{{
85+
* val p1 = "ab*c".r
86+
* val p2 = "a(b*)c".r
87+
*
88+
* val p1Matches = "abbbc" match {
89+
* case p1() => true
90+
* case _ => false
91+
* }
92+
*
93+
* val numberOfB = "abbbc" match {
94+
* case p2(b) => Some(b.length)
95+
* case _ => None
96+
* }
97+
* }}}
4198
*
4299
* @param target The string to match
43100
* @return The matches
@@ -53,20 +110,42 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
53110
None
54111
}
55112

56-
/** Return all matches of this regexp in given character sequence as an iterator
113+
/** Return all matches of this regexp in given character sequence as a [[scala.util.mathcing.Regex.MatchIterator]],
114+
* which is a special [[scala.collection.Iterator]] that returns the
115+
* matched strings, but can also be converted into a normal iterator
116+
* that returns objects of type [[scala.util.matching.Regex.Match]]
117+
* that can be queried for data such as the text that precedes the
118+
* match, subgroups, etc.
119+
*
120+
* @param source The text to match against.
121+
* @return A [[scala.util.matching.Reegex.MatchIterator]] of all matches.
122+
* @example {{{for (words <- """\w+""".r findAllIn "A simple example.") yield words}}}
57123
*/
58124
def findAllIn(source: java.lang.CharSequence) = new Regex.MatchIterator(source, this, groupNames)
59125

60126
/** Return optionally first matching string of this regexp in given character sequence,
61-
* None if it does not exist.
127+
* or None if it does not exist.
128+
*
129+
* @param source The text to match against.
130+
* @return An [[scala.Option]] of the first matching string in the text.
131+
* @example {{{"""\w+""".r findFirstIn "A simple example." foreach println // prints "A"}}}
62132
*/
63133
def findFirstIn(source: java.lang.CharSequence): Option[String] = {
64134
val m = pattern.matcher(source)
65135
if (m.find) Some(m.group) else None
66-
}
136+
}
67137

68138
/** Return optionally first match of this regexp in given character sequence,
69-
* None if it does not exist.
139+
* or None if it does not exist.
140+
*
141+
* The main difference between this method and `findFirstIn` is that the (optional) return
142+
* type for this is [[scala.util.matching.Regex.Match]], through which more
143+
* data can be obtained about the match, such as the strings that precede and follow it,
144+
* or subgroups.
145+
*
146+
* @param source The text to match against.
147+
* @return A [[scala.Option]] of [[scala.util.matching.Regex.Match]] of the first matching string in the text.
148+
* @example {{{("""[a-z]""".r findFirstMatchIn "A simple example.") map (_.start) // returns Some(2), the index of the first match in the text}}}
70149
*/
71150
def findFirstMatchIn(source: java.lang.CharSequence): Option[Match] = {
72151
val m = pattern.matcher(source)
@@ -76,6 +155,14 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
76155
/** Return optionally match of this regexp at the beginning of the
77156
* given character sequence, or None if regexp matches no prefix
78157
* of the character sequence.
158+
*
159+
* The main difference from this method to `findFirstIn` is that this
160+
* method will not return any matches that do not begin at the start
161+
* of the text being matched against.
162+
*
163+
* @param source The text to match against.
164+
* @return A [[scala.Option]] of the matched prefix.
165+
* @example {{{"""[a-z]""".r findPrefixOf "A simple example." // returns None, since the text does not begin with a lowercase letter}}}
79166
*/
80167
def findPrefixOf(source: java.lang.CharSequence): Option[String] = {
81168
val m = pattern.matcher(source)
@@ -85,6 +172,14 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
85172
/** Return optionally match of this regexp at the beginning of the
86173
* given character sequence, or None if regexp matches no prefix
87174
* of the character sequence.
175+
*
176+
* The main difference from this method to `findFirstMatchIn` is that
177+
* this method will not return any matches that do not begin at the
178+
* start of the text being matched against.
179+
*
180+
* @param source The text to match against.
181+
* @return A [[scala.Option]] of the [[scala.util.matching.Regex.Match]] of the matched string.
182+
* @example {{{"""\w+""".r findPrefixMatchOf "A simple example." map (_.after) // returns Some(" simple example.")}}}
88183
*/
89184
def findPrefixMatchOf(source: java.lang.CharSequence): Option[Match] = {
90185
val m = pattern.matcher(source)
@@ -96,14 +191,24 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
96191
* @param target The string to match
97192
* @param replacement The string that will replace each match
98193
* @return The resulting string
194+
* @example {{{"""\d+""".r replaceAllIn ("July 15", "<NUMBER>") // returns "July <NUMBER>"}}}
99195
*/
100196
def replaceAllIn(target: java.lang.CharSequence, replacement: String): String = {
101197
val m = pattern.matcher(target)
102198
m.replaceAll(replacement)
103199
}
104200

105201
/**
106-
* Replaces all matches using a replacer function.
202+
* Replaces all matches using a replacer function. The replacer function takes a
203+
* [[scala.util.matching.Regex.Match]] so that extra information can be obtained
204+
* from the match. For example:
205+
*
206+
* {{{
207+
* import scala.util.matching.Regex
208+
* val datePattern = new Regex("""(\d\d\d\d)-(\d\d)-(\d\d)""", "year", "month", "day")
209+
* val text = "From 2011-07-15 to 2011-07-17"
210+
* val repl = datePattern replaceAllIn (text, m => m.group("month")+"/"+m.group("day"))
211+
* }}}
107212
*
108213
* @param target The string to match.
109214
* @param replacer The function which maps a match to another string.
@@ -114,7 +219,26 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
114219
it foreach (md => it replace replacer(md))
115220
it.replaced
116221
}
117-
222+
223+
/**
224+
* Replaces some of the matches using a replacer function that returns an [[scala.Option]].
225+
* The replacer function takes a [[scala.util.matching.Regex.Match]] so that extra
226+
* information can be btained from the match. For example:
227+
*
228+
* {{{
229+
* import scala.util.matching.Regex._
230+
*
231+
* val map = Map("x" -> "a var", "y" -> "another var")
232+
* val text = "A text with variables %x, %y and %z."
233+
* val varPattern = """%(\w+)""".r
234+
* val mapper = (m: Match) => map get (m group 1)
235+
* val repl = varPattern replaceSomeIn (text, mapper)
236+
* }}}
237+
*
238+
* @param target The string to match.
239+
* @param replacer The function which optionally maps a match to another string.
240+
* @return The target string after replacements.
241+
*/
118242
def replaceSomeIn(target: java.lang.CharSequence, replacer: Match => Option[String]): String = {
119243
val it = new Regex.MatchIterator(target, this, groupNames).replacementData
120244
for (matchdata <- it ; replacement <- replacer(matchdata))
@@ -274,12 +398,35 @@ object Regex {
274398
def force: this.type = { starts; ends; this }
275399
}
276400

277-
/** An extractor object for Matches, yielding the matched string */
401+
/** An extractor object for Matches, yielding the matched string
402+
*
403+
* This can be used to help writing replacer functions when you
404+
* are not interested in match data. For example:
405+
*
406+
* {{{
407+
* import scala.util.matching.Regex.Match
408+
* """\w+""".r replaceAllIn ("A simple example.", _ match { case Match(s) => s.toUpperCase })
409+
* }}}
410+
*
411+
*/
278412
object Match {
279413
def unapply(m: Match): Some[String] = Some(m.matched)
280414
}
281415

282-
/** An extractor object that yields groups in the match. */
416+
/** An extractor object that yields groups in the match. The main
417+
* advantage of using this is extractor instead of using the original
418+
* regex is that this avoids rematching the string.
419+
*
420+
* For example:
421+
*
422+
* {{{
423+
* import scala.util.matching.Regex.Groups
424+
*
425+
* val datePattern = """(\d\d\d\d)-(\d\d)-(\d\d)""".r
426+
* val text = "The doc spree happened on 2011-07-15."
427+
* val day = datePattern replaceAllIn(text, _ match { case Groups(year, month, day) => month+"/"+day })
428+
* }}}
429+
*/
283430
object Groups {
284431
def unapplySeq(m: Match): Option[Seq[String]] = if (m.groupCount > 0) Some(1 to m.groupCount map m.group) else None
285432
}

0 commit comments

Comments
 (0)