结合了位图和Hash表二者的优势,位图的优势是节省空间,可是只能处理整型值一类的问题,没法处理字符串一类的问题,而Hash表却恰巧解决了位图没法解决的问题,然而Hash太浪费空间。针对这个问题,布隆提出了一种基于二进制向量和一系列随机函数的数据结构-布隆过滤器。它的空间利用率和时间效率是不少算法没法企及的,可是它也有一些缺点,就是会有必定的误判率而且不支持删除操做。
实现代码:ios
#include"comm.h"
#include"BitMap.h"
//布隆过滤器
template <class T, class HashFun1 = __HashFunc1<T>,
class HashFun2 = __HashFunc2<T>, class HashFun3 = __HashFunc3<T>, class HashFun4 = __HashFunc4<T>, class HashFun5 = __HashFunc5<T> > class BloomFilter { public: BloomFilter(size_t size) : _bmp(size) , _capacity(size) {} void Insert(string str) { size_t idx1 = _HashFunc1()(str) % _capacity; size_t idx2 = _HashFunc2()(str) % _capacity; size_t idx3 = _HashFunc3()(str) % _capacity; size_t idx4 = _HashFunc4()(str) % _capacity; size_t idx5 = _HashFunc5()(str) % _capacity; _bmp.set(idx1); _bmp.set(idx2); _bmp.set(idx3); _bmp.set(idx4); _bmp.set(idx5); } bool Find(string str) { size_t idx1 = _HashFunc1()(str) % _capacity; size_t idx2 = _HashFunc2()(str) % _capacity; size_t idx3 = _HashFunc3()(str) % _capacity; size_t idx4 = _HashFunc4()(str) % _capacity; size_t idx5 = _HashFunc5()(str) % _capacity; if (!_bmp.Test(idx1) || !_bmp.Test(idx2) || !_bmp.Test(idx3) || !_bmp.Test(idx4) || !_bmp.Test(idx5)) return false; else if (_bmp.set(idx1) && _bmp.set(idx2) && _bmp.set(idx3) && _bmp.set(idx4) && _bmp.set(idx5)) return true; } private: Bitmap _bmp; size_t _capacity; };
comm.hweb
#include<string>
template<class K>
class HashFunDef
{
public:
size_t operator()(const K& key)
{
return key;
}
};
template<>
class HashFunDef<string>
{
public:
size_t operator()(const string& key)
{
return BKDRHash(key.c_str());
}
};
static size_t BKDRHash(const char * str)
{
unsigned int seed = 131; // 31 131 1313 13131 131313
unsigned int hash = 0;
while (*str)
{
hash = hash * seed + (*str++);
}
return (hash & 0x7FFFFFFF);
}
size_t SDBMHash(const char* str)
{
register size_t hash = 0;
while (size_t ch = (size_t)*str++)
{
hash = 65599 * hash + ch;
//hash = (size_t)ch+(hash<<6)+ (hash<<16)-hash;
}
return hash;
}
size_t RSHash(const char *str)
{
register size_t hash = 0;
size_t magic = 63689;
while (size_t ch = (size_t)*str++)
{
hash = hash * magic + ch;
magic *= 378551;
}
return hash;
}
size_t APHash(const char* str)
{
register size_t hash = 0;
size_t ch;
for (long i = 0; ch = (size_t)*str++; i++)
{
if (0 == (i & 1))
{
hash ^= ((hash << 7) ^ (hash >> 3));
}
else
{
hash ^= (~((hash << 11) ^ ch ^ (hash >> 5)));
}
}
return hash;
}
size_t JSHash(const char* str)
{
if (!*str)
return 0;
register size_t hash = 1315423911;
while (size_t ch = (size_t)*str++)
{
hash ^= ((hash << 5) + ch + (hash >> 2));
}
return hash;
}
template<class K>
struct __HashFunc1
{
size_t operator()(const K& key)
{
return BKDRHash(key.c_str());
}
};
template<class K>
struct __HashFunc2
{
size_t operator()(const K& key)
{
return SDBMHash(key.c_str());
}
};
template<class K>
struct __HashFunc3
{
size_t operator()(const K& key)
{
return RSHash(key.c_str());
}
};
template<class K>
struct __HashFunc4
{
size_t operator()(const K& key)
{
return APHash(key.c_str());
}
};
template<class K>
struct __HashFunc5
{
size_t operator()(const K& key)
{
return JSHash(key.c_str());
}
};
BitMap.h算法
#include<iostream>
using namespace std;
#include<vector>
class Bitmap
{
public:
Bitmap()
{}
Bitmap(size_t size)
{
_table.resize((size >> 5) + 1);
}
void set(size_t data)
{
//将数据所在字节的比特位置1
size_t byte = data >> 5;
size_t bit = data % 32;
_table[byte] |= 1 << bit;
}
void ReSet(size_t data)
{
//将数据所在字节的比特位置0
size_t byte = data >> 5;
size_t bit = data % 32;
_table[byte] &= ~(1 << bit);
}
//所查data是否存在
bool test(size_t data)
{
size_t byte = data >> 5;
size_t bit = data % 32;
//1<<bit将其他位置0,除bit位
if ((1 << bit)&_table[byte])
return true;
return false;
}
private:
vector<int> _table;
};