@@ -15,6 +15,41 @@ import java.util.regex.{ Pattern, Matcher }
1515/** This class provides methods for creating and using regular expressions.
1616 * It is based on the regular expressions of the JDK since 1.4.
1717 *
18+ * Its main goal is to extract strings that match a pattern, or the subgroups
19+ * that make it up. For that reason, it is usually used with for comprehensions
20+ * and matching (see methods for examples).
21+ *
22+ * Because regex patterns make extensive use of the backslash character (`\`),
23+ * it is usually defined with triple quotes so that backslash characters won't
24+ * need to be quoted. Also, an implicit conversion is available through
25+ * [[scala.Predef ]] that makes converting a [[java.lang.String ]] into a Regex
26+ * as easy as calling the method `r` on it. For example:
27+ *
28+ * {{{
29+ * val datePattern = """(\d\d\d\d)-(\d\d)-(\d\d)""".r
30+ * }}}
31+ *
32+ * Regex provide methods to find and replace patterns, but also provides
33+ * extractors for pattern subgroups. Note, however, that extractors require
34+ * that the whole text be matched, or they fail.
35+ *
36+ * For example, the subgroups in the pattern above can be obtained in the following ways:
37+ *
38+ * {{{
39+ * val datePattern(year, month, day) = "2011-07-15"
40+ *
41+ * // val datePattern(year, month, day) = "Date 2011-07-15" // throws an exception at runtime
42+ *
43+ * val copyright: String = datePattern findFirstIn "Date of this document: 2011-07-15" match {
44+ * case Some(datePattern(year, month, day)) => "Copyright "+year
45+ * case None => "No copyright"
46+ * }
47+ *
48+ * val copyright: Option[String] = for {
49+ * datePattern(year, month, day) <- datePattern findFirstIn "Last modified 2011-07-15"
50+ * } yield year
51+ * }}}
52+ *
1853 * You can use special pattern syntax constructs like `(?idmsux-idmsux)`¹ to switch
1954 * various regex compilation options like `CASE_INSENSITIVE` or `UNICODE_CASE`.
2055 *
@@ -37,7 +72,29 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
3772 /** The compiled pattern */
3873 val pattern = Pattern .compile(regex)
3974
40- /** Tries to match target (whole match) and returns the matches.
75+ /** Tries to match target (whole match) and returns the matching subgroups.
76+ * if the pattern has no subgroups, then it returns an empty list on a
77+ * successful match.
78+ *
79+ * Note, however, that if some subgroup has not been matched, a `null` will
80+ * be returned for that subgroup.
81+ *
82+ * For example:
83+ *
84+ * {{{
85+ * val p1 = "ab*c".r
86+ * val p2 = "a(b*)c".r
87+ *
88+ * val p1Matches = "abbbc" match {
89+ * case p1() => true
90+ * case _ => false
91+ * }
92+ *
93+ * val numberOfB = "abbbc" match {
94+ * case p2(b) => Some(b.length)
95+ * case _ => None
96+ * }
97+ * }}}
4198 *
4299 * @param target The string to match
43100 * @return The matches
@@ -53,20 +110,42 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
53110 None
54111 }
55112
56- /** Return all matches of this regexp in given character sequence as an iterator
113+ /** Return all matches of this regexp in given character sequence as a [[scala.util.mathcing.Regex.MatchIterator ]],
114+ * which is a special [[scala.collection.Iterator ]] that returns the
115+ * matched strings, but can also be converted into a normal iterator
116+ * that returns objects of type [[scala.util.matching.Regex.Match ]]
117+ * that can be queried for data such as the text that precedes the
118+ * match, subgroups, etc.
119+ *
120+ * @param source The text to match against.
121+ * @return A [[scala.util.matching.Reegex.MatchIterator ]] of all matches.
122+ * @example {{{for (words <- """\w+""".r findAllIn "A simple example.") yield words}}}
57123 */
58124 def findAllIn (source : java.lang.CharSequence ) = new Regex .MatchIterator (source, this , groupNames)
59125
60126 /** Return optionally first matching string of this regexp in given character sequence,
61- * None if it does not exist.
127+ * or None if it does not exist.
128+ *
129+ * @param source The text to match against.
130+ * @return An [[scala.Option ]] of the first matching string in the text.
131+ * @example {{{"""\w+""".r findFirstIn "A simple example." foreach println // prints "A"}}}
62132 */
63133 def findFirstIn (source : java.lang.CharSequence ): Option [String ] = {
64134 val m = pattern.matcher(source)
65135 if (m.find) Some (m.group) else None
66- }
136+ }
67137
68138 /** Return optionally first match of this regexp in given character sequence,
69- * None if it does not exist.
139+ * or None if it does not exist.
140+ *
141+ * The main difference between this method and `findFirstIn` is that the (optional) return
142+ * type for this is [[scala.util.matching.Regex.Match ]], through which more
143+ * data can be obtained about the match, such as the strings that precede and follow it,
144+ * or subgroups.
145+ *
146+ * @param source The text to match against.
147+ * @return A [[scala.Option ]] of [[scala.util.matching.Regex.Match ]] of the first matching string in the text.
148+ * @example {{{("""[a-z]""".r findFirstMatchIn "A simple example.") map (_.start) // returns Some(2), the index of the first match in the text}}}
70149 */
71150 def findFirstMatchIn (source : java.lang.CharSequence ): Option [Match ] = {
72151 val m = pattern.matcher(source)
@@ -76,6 +155,14 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
76155 /** Return optionally match of this regexp at the beginning of the
77156 * given character sequence, or None if regexp matches no prefix
78157 * of the character sequence.
158+ *
159+ * The main difference from this method to `findFirstIn` is that this
160+ * method will not return any matches that do not begin at the start
161+ * of the text being matched against.
162+ *
163+ * @param source The text to match against.
164+ * @return A [[scala.Option ]] of the matched prefix.
165+ * @example {{{"""[a-z]""".r findPrefixOf "A simple example." // returns None, since the text does not begin with a lowercase letter}}}
79166 */
80167 def findPrefixOf (source : java.lang.CharSequence ): Option [String ] = {
81168 val m = pattern.matcher(source)
@@ -85,6 +172,14 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
85172 /** Return optionally match of this regexp at the beginning of the
86173 * given character sequence, or None if regexp matches no prefix
87174 * of the character sequence.
175+ *
176+ * The main difference from this method to `findFirstMatchIn` is that
177+ * this method will not return any matches that do not begin at the
178+ * start of the text being matched against.
179+ *
180+ * @param source The text to match against.
181+ * @return A [[scala.Option ]] of the [[scala.util.matching.Regex.Match ]] of the matched string.
182+ * @example {{{"""\w+""".r findPrefixMatchOf "A simple example." map (_.after) // returns Some(" simple example.")}}}
88183 */
89184 def findPrefixMatchOf (source : java.lang.CharSequence ): Option [Match ] = {
90185 val m = pattern.matcher(source)
@@ -96,14 +191,24 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
96191 * @param target The string to match
97192 * @param replacement The string that will replace each match
98193 * @return The resulting string
194+ * @example {{{"""\d+""".r replaceAllIn ("July 15", "<NUMBER>") // returns "July <NUMBER>"}}}
99195 */
100196 def replaceAllIn (target : java.lang.CharSequence , replacement : String ): String = {
101197 val m = pattern.matcher(target)
102198 m.replaceAll(replacement)
103199 }
104200
105201 /**
106- * Replaces all matches using a replacer function.
202+ * Replaces all matches using a replacer function. The replacer function takes a
203+ * [[scala.util.matching.Regex.Match ]] so that extra information can be obtained
204+ * from the match. For example:
205+ *
206+ * {{{
207+ * import scala.util.matching.Regex
208+ * val datePattern = new Regex("""(\d\d\d\d)-(\d\d)-(\d\d)""", "year", "month", "day")
209+ * val text = "From 2011-07-15 to 2011-07-17"
210+ * val repl = datePattern replaceAllIn (text, m => m.group("month")+"/"+m.group("day"))
211+ * }}}
107212 *
108213 * @param target The string to match.
109214 * @param replacer The function which maps a match to another string.
@@ -114,7 +219,26 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
114219 it foreach (md => it replace replacer(md))
115220 it.replaced
116221 }
117-
222+
223+ /**
224+ * Replaces some of the matches using a replacer function that returns an [[scala.Option ]].
225+ * The replacer function takes a [[scala.util.matching.Regex.Match ]] so that extra
226+ * information can be btained from the match. For example:
227+ *
228+ * {{{
229+ * import scala.util.matching.Regex._
230+ *
231+ * val map = Map("x" -> "a var", "y" -> "another var")
232+ * val text = "A text with variables %x, %y and %z."
233+ * val varPattern = """%(\w+)""".r
234+ * val mapper = (m: Match) => map get (m group 1)
235+ * val repl = varPattern replaceSomeIn (text, mapper)
236+ * }}}
237+ *
238+ * @param target The string to match.
239+ * @param replacer The function which optionally maps a match to another string.
240+ * @return The target string after replacements.
241+ */
118242 def replaceSomeIn (target : java.lang.CharSequence , replacer : Match => Option [String ]): String = {
119243 val it = new Regex .MatchIterator (target, this , groupNames).replacementData
120244 for (matchdata <- it ; replacement <- replacer(matchdata))
@@ -274,12 +398,35 @@ object Regex {
274398 def force : this .type = { starts; ends; this }
275399 }
276400
277- /** An extractor object for Matches, yielding the matched string */
401+ /** An extractor object for Matches, yielding the matched string
402+ *
403+ * This can be used to help writing replacer functions when you
404+ * are not interested in match data. For example:
405+ *
406+ * {{{
407+ * import scala.util.matching.Regex.Match
408+ * """\w+""".r replaceAllIn ("A simple example.", _ match { case Match(s) => s.toUpperCase })
409+ * }}}
410+ *
411+ */
278412 object Match {
279413 def unapply (m : Match ): Some [String ] = Some (m.matched)
280414 }
281415
282- /** An extractor object that yields groups in the match. */
416+ /** An extractor object that yields groups in the match. The main
417+ * advantage of using this is extractor instead of using the original
418+ * regex is that this avoids rematching the string.
419+ *
420+ * For example:
421+ *
422+ * {{{
423+ * import scala.util.matching.Regex.Groups
424+ *
425+ * val datePattern = """(\d\d\d\d)-(\d\d)-(\d\d)""".r
426+ * val text = "The doc spree happened on 2011-07-15."
427+ * val day = datePattern replaceAllIn(text, _ match { case Groups(year, month, day) => month+"/"+day })
428+ * }}}
429+ */
283430 object Groups {
284431 def unapplySeq (m : Match ): Option [Seq [String ]] = if (m.groupCount > 0 ) Some (1 to m.groupCount map m.group) else None
285432 }
0 commit comments