实现布隆过滤器

结合了位图和Hash表二者的优势,位图的优势是节省空间,可是只能处理整型值一类的问题,没法处理字符串一类的问题,而Hash表却恰巧解决了位图没法解决的问题,然而Hash太浪费空间。针对这个问题,布隆提出了一种基于二进制向量和一系列随机函数的数据结构-布隆过滤器。它的空间利用率和时间效率是不少算法没法企及的,可是它也有一些缺点,就是会有必定的误判率而且不支持删除操做。
实现代码:ios

#include"comm.h"
#include"BitMap.h"
//布隆过滤器
template <class T, class HashFun1 =  __HashFunc1<T>,
 class HashFun2 = __HashFunc2<T>, class HashFun3 = __HashFunc3<T>, class HashFun4 = __HashFunc4<T>, class HashFun5 = __HashFunc5<T> > class BloomFilter { public: BloomFilter(size_t size) : _bmp(size) , _capacity(size) {} void Insert(string str) { size_t idx1 = _HashFunc1()(str) % _capacity; size_t idx2 = _HashFunc2()(str) % _capacity; size_t idx3 = _HashFunc3()(str) % _capacity; size_t idx4 = _HashFunc4()(str) % _capacity; size_t idx5 = _HashFunc5()(str) % _capacity; _bmp.set(idx1); _bmp.set(idx2); _bmp.set(idx3); _bmp.set(idx4); _bmp.set(idx5); } bool Find(string str) { size_t idx1 = _HashFunc1()(str) % _capacity; size_t idx2 = _HashFunc2()(str) % _capacity; size_t idx3 = _HashFunc3()(str) % _capacity; size_t idx4 = _HashFunc4()(str) % _capacity; size_t idx5 = _HashFunc5()(str) % _capacity; if (!_bmp.Test(idx1) || !_bmp.Test(idx2) || !_bmp.Test(idx3) || !_bmp.Test(idx4) || !_bmp.Test(idx5)) return false; else if (_bmp.set(idx1) && _bmp.set(idx2) && _bmp.set(idx3) && _bmp.set(idx4) && _bmp.set(idx5)) return true; } private: Bitmap _bmp; size_t _capacity; };

comm.hweb

#include<string>
template<class K>
class HashFunDef
{
public:
    size_t operator()(const K& key)
    {
        return key;
    }
};
template<>
class HashFunDef<string>
{
public:
    size_t operator()(const string& key)
    {
        return BKDRHash(key.c_str());
    }
};

static size_t BKDRHash(const char * str)
{
    unsigned int seed = 131; // 31 131 1313 13131 131313
    unsigned int hash = 0;
    while (*str)
    {
        hash = hash * seed + (*str++);
    }
    return (hash & 0x7FFFFFFF);
}
size_t SDBMHash(const char* str)
{
    register size_t hash = 0;
    while (size_t ch = (size_t)*str++)
    {
        hash = 65599 * hash + ch;
        //hash = (size_t)ch+(hash<<6)+ (hash<<16)-hash;
    }

    return hash;
}

size_t RSHash(const char *str)
{
    register size_t hash = 0;
    size_t magic = 63689;
    while (size_t ch = (size_t)*str++)
    {
        hash = hash * magic + ch;
        magic *= 378551;
    }

    return hash;
}

size_t APHash(const char* str)
{
    register size_t hash = 0;
    size_t ch;
    for (long i = 0; ch = (size_t)*str++; i++)
    {
        if (0 == (i & 1))
        {
            hash ^= ((hash << 7) ^ (hash >> 3));
        }
        else
        {
            hash ^= (~((hash << 11) ^ ch ^ (hash >> 5)));
        }
    }

    return hash;
}

size_t JSHash(const char* str)
{
    if (!*str)
        return 0;

    register size_t hash = 1315423911;
    while (size_t ch = (size_t)*str++)
    {
        hash ^= ((hash << 5) + ch + (hash >> 2));
    }

    return hash;
}

template<class K>
struct __HashFunc1
{
    size_t operator()(const K& key)
    {
        return BKDRHash(key.c_str());
    }
};

template<class K>
struct __HashFunc2
{
    size_t operator()(const K& key)
    {
        return SDBMHash(key.c_str());
    }
};

template<class K>
struct __HashFunc3
{
    size_t operator()(const K& key)
    {
        return RSHash(key.c_str());
    }
};

template<class K>
struct __HashFunc4
{
    size_t operator()(const K& key)
    {
        return APHash(key.c_str());
    }
};

template<class K>
struct __HashFunc5
{
    size_t operator()(const K& key)
    {
        return JSHash(key.c_str());
    }
};

BitMap.h算法

#include<iostream>
using namespace std;
#include<vector>
class Bitmap
{
public:
    Bitmap()
    {}
    Bitmap(size_t size)
    {
        _table.resize((size >> 5) + 1);
    }
    void set(size_t data)
    {
        //将数据所在字节的比特位置1
        size_t byte = data >> 5;
        size_t bit = data % 32;
        _table[byte] |= 1 << bit;
    }
    void ReSet(size_t data)
    {
        //将数据所在字节的比特位置0
        size_t byte = data >> 5;
        size_t bit = data % 32;
        _table[byte] &= ~(1 << bit);
    }
    //所查data是否存在
    bool test(size_t data)
    {
        size_t byte = data >> 5;
        size_t bit = data % 32;
        //1<<bit将其他位置0,除bit位
        if ((1 << bit)&_table[byte])
            return true;
        return false;
    }
private:
    vector<int>  _table;

};