Skip to content

Commit 3920656

Browse files
committed
SPARK-8309: Support for more than 12M items in OpenHashMap
1 parent 424b007 commit 3920656

File tree

2 files changed

+12
-1
lines changed

2 files changed

+12
-1
lines changed

core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ class OpenHashSet[@specialized(Long, Int) T: ClassTag](
223223
*/
224224
private def rehash(k: T, allocateFunc: (Int) => Unit, moveFunc: (Int, Int) => Unit) {
225225
val newCapacity = _capacity * 2
226+
require(newCapacity <= (1 << 29), "Can't make capacity bigger than 2^29 elements")
226227
allocateFunc(newCapacity)
227228
val newBitset = new BitSet(newCapacity)
228229
val newData = new Array[T](newCapacity)
@@ -278,7 +279,7 @@ object OpenHashSet {
278279

279280
val INVALID_POS = -1
280281
val NONEXISTENCE_MASK = 0x80000000
281-
val POSITION_MASK = 0xEFFFFFF
282+
val POSITION_MASK = 0x1FFFFFFF
282283

283284
/**
284285
* A set of specialized hash function implementation to avoid boxing hash code computation

core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,4 +186,14 @@ class OpenHashMapSuite extends SparkFunSuite with Matchers {
186186
map(null) = 0
187187
assert(map.contains(null))
188188
}
189+
190+
test("support for more than 12M items") {
191+
val cnt = 12000000 // 12M
192+
val map = new OpenHashMap[Int, Int](cnt)
193+
for (i <- 0 until cnt) {
194+
map(i) = 1
195+
}
196+
val numInvalidValues = map.iterator.count(_._2 == 0)
197+
assertResult(0)(numInvalidValues)
198+
}
189199
}

0 commit comments

Comments
 (0)