@@ -30,9 +30,38 @@ use crate::unicode;
3030//
3131// Tests on this are relegated to the public API of HIR in src/hir.rs.
3232
33- #[ derive( Clone , Debug , Eq , PartialEq ) ]
33+ #[ derive( Clone , Debug ) ]
3434pub struct IntervalSet < I > {
35+ /// A sorted set of non-overlapping ranges.
3536 ranges : Vec < I > ,
37+ /// While not required at all for correctness, we keep track of whether an
38+ /// interval set has been case folded or not. This helps us avoid doing
39+ /// redundant work if, for example, a set has already been cased folded.
40+ /// And note that whether a set is folded or not is preserved through
41+ /// all of the pairwise set operations. That is, if both interval sets
42+ /// have been case folded, then any of difference, union, intersection or
43+ /// symmetric difference all produce a case folded set.
44+ ///
45+ /// Note that when this is true, it *must* be the case that the set is case
46+ /// folded. But when it's false, the set *may* be case folded. In other
47+ /// words, we only set this to true when we know it to be case, but we're
48+ /// okay with it being false if it would otherwise be costly to determine
49+ /// whether it should be true. This means code cannot assume that a false
50+ /// value necessarily indicates that the set is not case folded.
51+ ///
52+ /// Bottom line: this is a performance optimization.
53+ folded : bool ,
54+ }
55+
56+ impl < I : Interval > Eq for IntervalSet < I > { }
57+
58+ // We implement PartialEq manually so that we don't consider the set's internal
59+ // 'folded' property to be part of its identity. The 'folded' property is
60+ // strictly an optimization.
61+ impl < I : Interval > PartialEq for IntervalSet < I > {
62+ fn eq ( & self , other : & IntervalSet < I > ) -> bool {
63+ self . ranges . eq ( & other. ranges )
64+ }
3665}
3766
3867impl < I : Interval > IntervalSet < I > {
@@ -42,7 +71,10 @@ impl<I: Interval> IntervalSet<I> {
4271 /// The given ranges do not need to be in any specific order, and ranges
4372 /// may overlap.
4473 pub fn new < T : IntoIterator < Item = I > > ( intervals : T ) -> IntervalSet < I > {
45- let mut set = IntervalSet { ranges : intervals. into_iter ( ) . collect ( ) } ;
74+ let ranges: Vec < I > = intervals. into_iter ( ) . collect ( ) ;
75+ // An empty set is case folded.
76+ let folded = ranges. is_empty ( ) ;
77+ let mut set = IntervalSet { ranges, folded } ;
4678 set. canonicalize ( ) ;
4779 set
4880 }
@@ -53,6 +85,10 @@ impl<I: Interval> IntervalSet<I> {
5385 // it preserves canonicalization.
5486 self . ranges . push ( interval) ;
5587 self . canonicalize ( ) ;
88+ // We don't know whether the new interval added here is considered
89+ // case folded, so we conservatively assume that the entire set is
90+ // no longer case folded if it was previously.
91+ self . folded = false ;
5692 }
5793
5894 /// Return an iterator over all intervals in this set.
@@ -77,6 +113,9 @@ impl<I: Interval> IntervalSet<I> {
77113 /// This returns an error if the necessary case mapping data is not
78114 /// available.
79115 pub fn case_fold_simple ( & mut self ) -> Result < ( ) , unicode:: CaseFoldError > {
116+ if self . folded {
117+ return Ok ( ( ) ) ;
118+ }
80119 let len = self . ranges . len ( ) ;
81120 for i in 0 ..len {
82121 let range = self . ranges [ i] ;
@@ -86,14 +125,19 @@ impl<I: Interval> IntervalSet<I> {
86125 }
87126 }
88127 self . canonicalize ( ) ;
128+ self . folded = true ;
89129 Ok ( ( ) )
90130 }
91131
92132 /// Union this set with the given set, in place.
93133 pub fn union ( & mut self , other : & IntervalSet < I > ) {
134+ if other. ranges . is_empty ( ) {
135+ return ;
136+ }
94137 // This could almost certainly be done more efficiently.
95138 self . ranges . extend ( & other. ranges ) ;
96139 self . canonicalize ( ) ;
140+ self . folded = self . folded && other. folded ;
97141 }
98142
99143 /// Intersect this set with the given set, in place.
@@ -103,6 +147,8 @@ impl<I: Interval> IntervalSet<I> {
103147 }
104148 if other. ranges . is_empty ( ) {
105149 self . ranges . clear ( ) ;
150+ // An empty set is case folded.
151+ self . folded = true ;
106152 return ;
107153 }
108154
@@ -132,6 +178,7 @@ impl<I: Interval> IntervalSet<I> {
132178 }
133179 }
134180 self . ranges . drain ( ..drain_end) ;
181+ self . folded = self . folded && other. folded ;
135182 }
136183
137184 /// Subtract the given set from this set, in place.
@@ -224,6 +271,7 @@ impl<I: Interval> IntervalSet<I> {
224271 a += 1 ;
225272 }
226273 self . ranges . drain ( ..drain_end) ;
274+ self . folded = self . folded && other. folded ;
227275 }
228276
229277 /// Compute the symmetric difference of the two sets, in place.
@@ -249,6 +297,8 @@ impl<I: Interval> IntervalSet<I> {
249297 if self . ranges . is_empty ( ) {
250298 let ( min, max) = ( I :: Bound :: min_value ( ) , I :: Bound :: max_value ( ) ) ;
251299 self . ranges . push ( I :: create ( min, max) ) ;
300+ // The set containing everything must case folded.
301+ self . folded = true ;
252302 return ;
253303 }
254304
@@ -274,6 +324,19 @@ impl<I: Interval> IntervalSet<I> {
274324 self . ranges . push ( I :: create ( lower, I :: Bound :: max_value ( ) ) ) ;
275325 }
276326 self . ranges . drain ( ..drain_end) ;
327+ // We don't need to update whether this set is folded or not, because
328+ // it is conservatively preserved through negation. Namely, if a set
329+ // is not folded, then it is possible that its negation is folded, for
330+ // example, [^☃]. But we're fine with assuming that the set is not
331+ // folded in that case. (`folded` permits false negatives but not false
332+ // positives.)
333+ //
334+ // But what about when a set is folded, is its negation also
335+ // necessarily folded? Yes. Because if a set is folded, then for every
336+ // character in the set, it necessarily included its equivalence class
337+ // of case folded characters. Negating it in turn means that all
338+ // equivalence classes in the set are negated, and any equivalence
339+ // class that was previously not in the set is now entirely in the set.
277340 }
278341
279342 /// Converts this set into a canonical ordering.
0 commit comments