Python实现Jenkins哈希?
有没有原生的Python实现的Jenkins hash算法?
我需要一个哈希算法,它可以把任意字符串转换成一个32位的整数。对于同一个字符串,它必须保证在不同的平台上返回相同的整数。
我看过ELF哈希算法,找到了一个Python的实现。根据上面的要求,这个算法可以替代Jenkins哈希吗?(http://www.partow.net/programming/hashfunctions/#ELFHashFunction)
6 个回答
4
最后我自己实现了这个功能。根据原来的版权声明发布(见下文)。使用时请自行承担风险,祝你玩得开心 :-)
'''Implements a straight Jenkins lookup hash - http://burtleburtle.net/bob/hash/doobs.html
Usage:
from jhash import jhash
print jhash('My hovercraft is full of eels')
Returns: unsigned 32 bit integer value
Prereqs: None
Tested with Python 2.6.
Version 1.00 - der@dod.no - 23.08.2010
Partly based on the Perl module Digest::JHash
http://search.cpan.org/~shlomif/Digest-JHash-0.06/lib/Digest/JHash.pm
Original copyright notice:
By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
code any way you wish, private, educational, or commercial. It's free.
See http://burtleburtle.net/bob/hash/evahash.html
Use for hash table lookup, or anything where one collision in 2^^32 is
acceptable. Do NOT use for cryptographic purposes.
'''
def mix(a, b, c):
'''mix() -- mix 3 32-bit values reversibly.
For every delta with one or two bits set, and the deltas of all three
high bits or all three low bits, whether the original value of a,b,c
is almost all zero or is uniformly distributed,
* If mix() is run forward or backward, at least 32 bits in a,b,c
have at least 1/4 probability of changing.
* If mix() is run forward, every bit of c will change between 1/3 and
2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)'''
# Need to constrain U32 to only 32 bits using the & 0xffffffff
# since Python has no native notion of integers limited to 32 bit
# http://docs.python.org/library/stdtypes.html#numeric-types-int-float-long-complex
a &= 0xffffffff; b &= 0xffffffff; c &= 0xffffffff
a -= b; a -= c; a ^= (c>>13); a &= 0xffffffff
b -= c; b -= a; b ^= (a<<8); b &= 0xffffffff
c -= a; c -= b; c ^= (b>>13); c &= 0xffffffff
a -= b; a -= c; a ^= (c>>12); a &= 0xffffffff
b -= c; b -= a; b ^= (a<<16); b &= 0xffffffff
c -= a; c -= b; c ^= (b>>5); c &= 0xffffffff
a -= b; a -= c; a ^= (c>>3); a &= 0xffffffff
b -= c; b -= a; b ^= (a<<10); b &= 0xffffffff
c -= a; c -= b; c ^= (b>>15); c &= 0xffffffff
return a, b, c
def jhash(data, initval = 0):
'''hash() -- hash a variable-length key into a 32-bit value
data : the key (the unaligned variable-length array of bytes)
initval : can be any 4-byte value, defaults to 0
Returns a 32-bit value. Every bit of the key affects every bit of
the return value. Every 1-bit and 2-bit delta achieves avalanche.'''
length = lenpos = len(data)
# empty string returns 0
if length == 0:
return 0
# Set up the internal state
a = b = 0x9e3779b9 # the golden ratio; an arbitrary value
c = initval # the previous hash value
p = 0 # string offset
# ------------------------- handle most of the key in 12 byte chunks
while lenpos >= 12:
a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24))
b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24))
c += (ord(data[p+8]) + (ord(data[p+9])<<8) + (ord(data[p+10])<<16) + (ord(data[p+11])<<24))
a, b, c = mix(a, b, c)
p += 12
lenpos -= 12
# ------------------------- handle the last 11 bytes
c += length
if lenpos >= 11: c += ord(data[p+10])<<24
if lenpos >= 10: c += ord(data[p+9])<<16
if lenpos >= 9: c += ord(data[p+8])<<8
# the first byte of c is reserved for the length
if lenpos >= 8: b += ord(data[p+7])<<24
if lenpos >= 7: b += ord(data[p+6])<<16
if lenpos >= 6: b += ord(data[p+5])<<8
if lenpos >= 5: b += ord(data[p+4])
if lenpos >= 4: a += ord(data[p+3])<<24
if lenpos >= 3: a += ord(data[p+2])<<16
if lenpos >= 2: a += ord(data[p+1])<<8
if lenpos >= 1: a += ord(data[p+0])
a, b, c = mix(a, b, c)
# ------------------------- report the result
return c
if __name__ == "__main__":
hashstr = 'My hovercraft is full of eels'
myhash = jhash(hashstr)
print 'jhash("%s"): %d' % (hashstr, myhash)
6
你可以只使用md5sum的前32位。
>>> from hashlib import md5
>>> from struct import unpack
>>> unpack("<IIII",md5("thestring").digest())[0]
1515985217
14
这段原生的Python代码应该能给你和原来的lookup3.c得到一样的哈希值。
# Need to constrain U32 to only 32 bits using the & 0xffffffff
# since Python has no native notion of integers limited to 32 bit
# http://docs.python.org/library/stdtypes.html#numeric-types-int-float-long-complex
'''Original copyright notice:
By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
code any way you wish, private, educational, or commercial. Its free.
'''
def rot(x,k):
return (((x)<<(k)) | ((x)>>(32-(k))))
def mix(a, b, c):
a &= 0xffffffff; b &= 0xffffffff; c &= 0xffffffff
a -= c; a &= 0xffffffff; a ^= rot(c,4); a &= 0xffffffff; c += b; c &= 0xffffffff
b -= a; b &= 0xffffffff; b ^= rot(a,6); b &= 0xffffffff; a += c; a &= 0xffffffff
c -= b; c &= 0xffffffff; c ^= rot(b,8); c &= 0xffffffff; b += a; b &= 0xffffffff
a -= c; a &= 0xffffffff; a ^= rot(c,16); a &= 0xffffffff; c += b; c &= 0xffffffff
b -= a; b &= 0xffffffff; b ^= rot(a,19); b &= 0xffffffff; a += c; a &= 0xffffffff
c -= b; c &= 0xffffffff; c ^= rot(b,4); c &= 0xffffffff; b += a; b &= 0xffffffff
return a, b, c
def final(a, b, c):
a &= 0xffffffff; b &= 0xffffffff; c &= 0xffffffff
c ^= b; c &= 0xffffffff; c -= rot(b,14); c &= 0xffffffff
a ^= c; a &= 0xffffffff; a -= rot(c,11); a &= 0xffffffff
b ^= a; b &= 0xffffffff; b -= rot(a,25); b &= 0xffffffff
c ^= b; c &= 0xffffffff; c -= rot(b,16); c &= 0xffffffff
a ^= c; a &= 0xffffffff; a -= rot(c,4); a &= 0xffffffff
b ^= a; b &= 0xffffffff; b -= rot(a,14); b &= 0xffffffff
c ^= b; c &= 0xffffffff; c -= rot(b,24); c &= 0xffffffff
return a, b, c
def hashlittle2(data, initval = 0, initval2 = 0):
length = lenpos = len(data)
a = b = c = (0xdeadbeef + (length) + initval)
c += initval2; c &= 0xffffffff
p = 0 # string offset
while lenpos > 12:
a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24)); a &= 0xffffffff
b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); b &= 0xffffffff
c += (ord(data[p+8]) + (ord(data[p+9])<<8) + (ord(data[p+10])<<16) + (ord(data[p+11])<<24)); c &= 0xffffffff
a, b, c = mix(a, b, c)
p += 12
lenpos -= 12
if lenpos == 12: c += (ord(data[p+8]) + (ord(data[p+9])<<8) + (ord(data[p+10])<<16) + (ord(data[p+11])<<24)); b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
if lenpos == 11: c += (ord(data[p+8]) + (ord(data[p+9])<<8) + (ord(data[p+10])<<16)); b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
if lenpos == 10: c += (ord(data[p+8]) + (ord(data[p+9])<<8)); b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
if lenpos == 9: c += (ord(data[p+8])); b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
if lenpos == 8: b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
if lenpos == 7: b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
if lenpos == 6: b += ((ord(data[p+5])<<8) + ord(data[p+4])); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24))
if lenpos == 5: b += (ord(data[p+4])); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
if lenpos == 4: a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24))
if lenpos == 3: a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16))
if lenpos == 2: a += (ord(data[p+0]) + (ord(data[p+1])<<8))
if lenpos == 1: a += ord(data[p+0])
a &= 0xffffffff; b &= 0xffffffff; c &= 0xffffffff
if lenpos == 0: return c, b
a, b, c = final(a, b, c)
return c, b
def hashlittle(data, initval=0):
c, b = hashlittle2(data, initval, 0)
return c
if __name__ == "__main__":
import sys
hashstr = 'Four score and seven years ago'
hash, hash2 = hashlittle2(hashstr, 0xdeadbeef, 0xdeadbeef)
print '"%s": %x %x' % (hashstr, hash, hash2)
hash = hashlittle(hashstr, 0)
print '"%s": %x' % (hashstr, hash)