Skip to content

Commit b9ee07a

Browse files
author
Antonio Cunei
committed
Port of MurmurHash to 2.8.x
1 parent 21209bf commit b9ee07a

File tree

1 file changed

+196
-0
lines changed

1 file changed

+196
-0
lines changed
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
/* __ *\
2+
** ________ ___ / / ___ Scala API **
3+
** / __/ __// _ | / / / _ | (c) 2003-2011, LAMP/EPFL **
4+
** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
5+
** /____/\___/_/ |_/____/_/ | | **
6+
** |/ **
7+
\* */
8+
9+
package scala.util
10+
11+
/** An implementation of Austin Appleby's MurmurHash 3.0 algorithm
12+
* (32 bit version); reference: http://code.google.com/p/smhasher
13+
*
14+
* This is the hash used by collections and case classes (including
15+
* tuples).
16+
*
17+
* @author Rex Kerr
18+
* @version 2.9
19+
* @since 2.9
20+
*/
21+
22+
import java.lang.Integer.{ rotateLeft => rotl }
23+
import scala.collection.Iterator
24+
25+
/** A class designed to generate well-distributed non-cryptographic
26+
* hashes. It is designed to be passed to a collection's foreach method,
27+
* or can take individual hash values with append. Its own hash code is
28+
* set equal to the hash code of whatever it is hashing.
29+
*/
30+
class MurmurHash[@specialized(Int,Long,Float,Double) T](seed: Int) extends (T => Unit) {
31+
import MurmurHash._
32+
33+
private var h = startHash(seed)
34+
private var c = hiddenMagicA
35+
private var k = hiddenMagicB
36+
private var hashed = false
37+
private var hashvalue = h
38+
39+
/** Begin a new hash using the same seed. */
40+
def reset() {
41+
h = startHash(seed)
42+
c = hiddenMagicA
43+
k = hiddenMagicB
44+
hashed = false
45+
}
46+
47+
/** Incorporate the hash value of one item. */
48+
def apply(t: T) {
49+
h = extendHash(h,t.##,c,k)
50+
c = nextMagicA(c)
51+
k = nextMagicB(k)
52+
hashed = false
53+
}
54+
55+
/** Incorporate a known hash value. */
56+
def append(i: Int) {
57+
h = extendHash(h,i,c,k)
58+
c = nextMagicA(c)
59+
k = nextMagicB(k)
60+
hashed = false
61+
}
62+
63+
/** Retrieve the hash value */
64+
def hash = {
65+
if (!hashed) {
66+
hashvalue = finalizeHash(h)
67+
hashed = true
68+
}
69+
hashvalue
70+
}
71+
override def hashCode = hash
72+
}
73+
74+
/** An object designed to generate well-distributed non-cryptographic
75+
* hashes. It is designed to hash a collection of integers; along with
76+
* the integers to hash, it generates two magic streams of integers to
77+
* increase the distribution of repetitive input sequences. Thus,
78+
* three methods need to be called at each step (to start and to
79+
* incorporate a new integer) to update the values. Only one method
80+
* needs to be called to finalize the hash.
81+
*/
82+
83+
object MurmurHash {
84+
// Magic values used for MurmurHash's 32 bit hash.
85+
// Don't change these without consulting a hashing expert!
86+
final private val visibleMagic = 0x971e137b
87+
final private val hiddenMagicA = 0x95543787
88+
final private val hiddenMagicB = 0x2ad7eb25
89+
final private val visibleMixer = 0x52dce729
90+
final private val hiddenMixerA = 0x7b7d159c
91+
final private val hiddenMixerB = 0x6bce6396
92+
final private val finalMixer1 = 0x85ebca6b
93+
final private val finalMixer2 = 0xc2b2ae35
94+
95+
// Arbitrary values used for hashing certain classes
96+
final private val seedString = 0xf7ca7fd2
97+
final private val seedArray = 0x3c074a61
98+
99+
/** The first 23 magic integers from the first stream are stored here */
100+
val storedMagicA =
101+
Iterator.iterate(hiddenMagicA)(nextMagicA).take(23).toArray
102+
103+
/** The first 23 magic integers from the second stream are stored here */
104+
val storedMagicB =
105+
Iterator.iterate(hiddenMagicB)(nextMagicB).take(23).toArray
106+
107+
/** Begin a new hash with a seed value. */
108+
def startHash(seed: Int) = seed ^ visibleMagic
109+
110+
/** The initial magic integers in the first stream. */
111+
def startMagicA = hiddenMagicA
112+
113+
/** The initial magic integer in the second stream. */
114+
def startMagicB = hiddenMagicB
115+
116+
/** Incorporates a new value into an existing hash.
117+
*
118+
* @param hash the prior hash value
119+
* @param value the new value to incorporate
120+
* @param magicA a magic integer from the stream
121+
* @param magicB a magic integer from a different stream
122+
* @return the updated hash value
123+
*/
124+
def extendHash(hash: Int, value: Int, magicA: Int, magicB: Int) = {
125+
(hash ^ rotl(value*magicA,11)*magicB)*3 + visibleMixer
126+
}
127+
128+
/** Given a magic integer from the first stream, compute the next */
129+
def nextMagicA(magicA: Int) = magicA*5 + hiddenMixerA
130+
131+
/** Given a magic integer from the second stream, compute the next */
132+
def nextMagicB(magicB: Int) = magicB*5 + hiddenMixerB
133+
134+
/** Once all hashes have been incorporated, this performs a final mixing */
135+
def finalizeHash(hash: Int) = {
136+
var i = (hash ^ (hash>>>16))
137+
i *= finalMixer1
138+
i ^= (i >>> 13)
139+
i *= finalMixer2
140+
i ^= (i >>> 16)
141+
i
142+
}
143+
144+
/** Compute a high-quality hash of an array */
145+
def arrayHash[T](a: Array[T]) = {
146+
var h = startHash(a.length * seedArray)
147+
var c = hiddenMagicA
148+
var k = hiddenMagicB
149+
var j = 0
150+
while (j < a.length) {
151+
h = extendHash(h, a(j).##, c, k)
152+
c = nextMagicA(c)
153+
k = nextMagicB(k)
154+
j += 1
155+
}
156+
finalizeHash(h)
157+
}
158+
159+
/** Compute a high-quality hash of a string */
160+
def stringHash(s: String) = {
161+
var h = startHash(s.length * seedString)
162+
var c = hiddenMagicA
163+
var k = hiddenMagicB
164+
var j = 0
165+
while (j+1 < s.length) {
166+
val i = (s.charAt(j)<<16) + s.charAt(j+1);
167+
h = extendHash(h,i,c,k)
168+
c = nextMagicA(c)
169+
k = nextMagicB(k)
170+
j += 2
171+
}
172+
if (j < s.length) h = extendHash(h,s.charAt(j),c,k)
173+
finalizeHash(h)
174+
}
175+
176+
/** Compute a hash that is symmetric in its arguments--that is,
177+
* where the order of appearance of elements does not matter.
178+
* This is useful for hashing sets, for example.
179+
*/
180+
def symmetricHash[T](xs: collection.TraversableOnce[T], seed: Int) = {
181+
var a,b,n = 0
182+
var c = 1
183+
xs.foreach(i => {
184+
val h = i.##
185+
a += h
186+
b ^= h
187+
if (h != 0) c *= h
188+
n += 1
189+
})
190+
var h = startHash(seed * n)
191+
h = extendHash(h, a, storedMagicA(0), storedMagicB(0))
192+
h = extendHash(h, b, storedMagicA(1), storedMagicB(1))
193+
h = extendHash(h, c, storedMagicA(2), storedMagicB(2))
194+
finalizeHash(h)
195+
}
196+
}

0 commit comments

Comments
 (0)