Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ class OpenHashSet[@specialized(Long, Int) T: ClassTag](
loadFactor: Double)
extends Serializable {

require(initialCapacity <= (1 << 29), "Can't make capacity bigger than 2^29 elements")
require(initialCapacity <= OpenHashSet.MAX_CAPACITY,
s"Can't make capacity bigger than ${OpenHashSet.MAX_CAPACITY} elements")
require(initialCapacity >= 1, "Invalid initial capacity")
require(loadFactor < 1.0, "Load factor must be less than 1.0")
require(loadFactor > 0.0, "Load factor must be greater than 0.0")
Expand Down Expand Up @@ -223,6 +224,8 @@ class OpenHashSet[@specialized(Long, Int) T: ClassTag](
*/
private def rehash(k: T, allocateFunc: (Int) => Unit, moveFunc: (Int, Int) => Unit) {
val newCapacity = _capacity * 2
require(newCapacity > 0 && newCapacity <= OpenHashSet.MAX_CAPACITY,
s"Can't contain more than ${(loadFactor * OpenHashSet.MAX_CAPACITY).toInt} elements")
allocateFunc(newCapacity)
val newBitset = new BitSet(newCapacity)
val newData = new Array[T](newCapacity)
Expand Down Expand Up @@ -276,9 +279,10 @@ class OpenHashSet[@specialized(Long, Int) T: ClassTag](
private[spark]
object OpenHashSet {

val MAX_CAPACITY = 1 << 30
val INVALID_POS = -1
val NONEXISTENCE_MASK = 0x80000000
val POSITION_MASK = 0xEFFFFFF
val NONEXISTENCE_MASK = 1 << 31
val POSITION_MASK = (1 << 31) - 1

/**
* A set of specialized hash function implementation to avoid boxing hash code computation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class OpenHashMapSuite extends SparkFunSuite with Matchers {
val goodMap3 = new OpenHashMap[String, String](256)
assert(goodMap3.size === 0)
intercept[IllegalArgumentException] {
new OpenHashMap[String, Int](1 << 30) // Invalid map size: bigger than 2^29
new OpenHashMap[String, Int](1 << 30 + 1) // Invalid map size: bigger than 2^30
}
intercept[IllegalArgumentException] {
new OpenHashMap[String, Int](-1)
Expand Down Expand Up @@ -186,4 +186,14 @@ class OpenHashMapSuite extends SparkFunSuite with Matchers {
map(null) = 0
assert(map.contains(null))
}

test("support for more than 12M items") {
val cnt = 12000000 // 12M
val map = new OpenHashMap[Int, Int](cnt)
for (i <- 0 until cnt) {
map(i) = 1
}
val numInvalidValues = map.iterator.count(_._2 == 0)
assertResult(0)(numInvalidValues)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class PrimitiveKeyOpenHashMapSuite extends SparkFunSuite with Matchers {
val goodMap3 = new PrimitiveKeyOpenHashMap[Int, Int](256)
assert(goodMap3.size === 0)
intercept[IllegalArgumentException] {
new PrimitiveKeyOpenHashMap[Int, Int](1 << 30) // Invalid map size: bigger than 2^29
new PrimitiveKeyOpenHashMap[Int, Int](1 << 30 + 1) // Invalid map size: bigger than 2^30
}
intercept[IllegalArgumentException] {
new PrimitiveKeyOpenHashMap[Int, Int](-1)
Expand Down