HASH算法的本质是特征提取——将某种不太好表示的特征,经过某种压缩的方式映射成一个值。这样,就能够优雅解决一部分难以解决的特征统计问题。node
同时考虑到hash算法的本质是个几率算法,所以并不能保证全部的数据都不发生冲突<冲突是指两个不一样的特征计算出了同一个HASH值>,所以能够考虑使用双hash的形式,使用两个不一样的HASH算法,算出来的HASH值来表示一个特征量——pair<ull,ull>就是一种实现方式。linux
一种经常使用的hash算法来自一一个递推式:hash[i] = ( hash[i-1] * HASH_P + val[i] ) % HASH_MOD;ios
这种方式实际上能够比喻成为一个在%HASH_MOD意义下P进制的大数,且每次都添加一个新的个位数进入hash值中。c++
所以,实际使用中能够支持%HASH_MOD意义下的加法、减法。算法
另外hash算法好想好写,能够分外暴力的解决至关部分的问题。<甚至能够直接使用优雅的#define来完成模板的编写>app
Similarity of Subtrees
https://vjudge.net/problem/Aizu-2784ide
题意:给出一颗树,询问以1做为树根的树中,结构相同的子树个数有多少对。结构相同定义为,以某点为根节点,其如下每层的的节点个数都与另外一节点相应属性相同。优化
Define the depth of a node in a rooted tree by applying the following rules recursively:spa
Let S(T,d)S(T,d) be the number of nodes of TT with depth dd. Two rooted trees TT and T′T′ are similar if and only if S(T,d)S(T,d) equals S(T′,d)S(T′,d) for all non-negative integer dd..net
You are given a rooted tree TT with NN nodes. The nodes of TT are numbered from 1 to NN. Node 1 is the root node of TT. Let TiTi be the rooted subtree of TT whose root is node ii. Your task is to write a program which calculates the number of pairs (i,j)(i,j)such that TiTi and TjTj are similar and i<ji<j.
https://cn.vjudge.net/problem/Aizu-2784
题解:能够发现,子树的结构其实是能够经过HASH算法形式的递推获得——hash[now] = (∑(hash[child]) * HAHS_P + num[now])%HASH_MOD
该递推式实际上表现了hash值的加法过程。
则,若是支持dfs且不爆栈的话,可使用dfs一发搞定,至关的优雅。
可是反过来,若是不支持dfs,则必须用bfs的方式来搞定树的遍历和递推,实际上也很好想,由于记录了每一个节点的父节点,也记录了每一个节点的子节点数量,就能够很容易的计算出来某个节点的全部子节点是否已经完成了递推计算。提供两个版本的代码:dfs实现和bfs实现。
dfs:
#include<math.h> #include<algorithm> #include<vector> #include<stdlib.h> #include<string.h> #include<string> #include<set> #include<map> #include<queue> #include<stack> #include <iostream> using namespace std; #define ull unsigned long long #define hash1(x,b) (((ull)x * HASH_P1 + b)%HASH_MOD1) #define hash2(x,b) (((ull)x * HASH_P2 + b)%HASH_MOD2) #define ll long long const int MAXN = 200233; const ull HASH_MOD1 = 1000000007; const ull HASH_MOD2 = 1000000009; const ull HASH_P1 = 100003; const ull HASH_P2 = 100019; #define veci vector<int> #define pp pair<ull,ull> veci G[MAXN]; int n; map<ull,int> mapp; ll ans; pp dfs_count(int now,int father){ // pp ret = make_pair<ull.int>(0ull,0); pp ret; ret.first = ret.second = 0; int len = G[now].size(); for(int i=0;i<len;++i){ int tar = G[now][i]; if(tar == father)continue; pp tmp = dfs_count(tar,now); ret.first += tmp.first; ret.second += tmp.second; } ret.first %= HASH_MOD1; ret.second %= HASH_MOD2; ret.first = hash1(ret.first,1); ret.second = hash2(ret.second,1); ull hash_tmp = ret.first * HASH_MOD1 + ret.second; if(mapp.count(hash_tmp)){ int tmp = mapp[hash_tmp]; ans += tmp; mapp[hash_tmp] = tmp+1; }else{ // mapp.insert(make_pair(hash_tmp,1)); mapp[hash_tmp] = 1; } return ret; } void init(){ ans = 0; for(int i=0;i<n+23;++i)G[i].clear(); mapp.clear(); for(int i=1;i<n;++i){ int a,b; cin>>a>>b; G[a].push_back(b); G[b].push_back(a); } dfs_count(1,0); cout<<ans<<"\n"; } int main(){ cin.sync_with_stdio(false); while(cin>>n)init(); return 0; }
bfs:
#include<math.h> #include<algorithm> #include<vector> #include<stdlib.h> #include<string.h> #include<string> #include<set> #include<map> #include<queue> #include<stack> #include <iostream> using namespace std; #define ull unsigned long long #define hash1(x,b) (((ull)x * HASH_P1 + b)%HASH_MOD1) #define hash2(x,b) (((ull)x * HASH_P2 + b)%HASH_MOD2) #define ll long long const int MAXN = 200233; const ull HASH_MOD1 = 1000000007; const ull HASH_MOD2 = 1000000009; const ull HASH_P1 = 100003; const ull HASH_P2 = 100019; #define veci vector<int> #define pp pair<ull,ull> veci G[MAXN]; int n; map<ull,int> mapp; ll ans; ull hash_tmp; int fa[MAXN]; pp anss[MAXN]; int times[MAXN]; void bfs(){ queue<int>que; que.push(1); for(int i=0;i<G[1].size();++i)fa[G[1][i]] = 1; while(!que.empty()){ int now = que.front(); que.pop(); times[now] = 0; for(int i=0;i<G[now].size();++i){ int tar = G[now][i]; if(tar==fa[now])continue; times[now] ++; fa[tar] = now; que.push(tar); } } } void deal(){ queue<int> que; for(int i=2;i<=n;++i){ // G[i].size() == 1; if(times[i] == 0) que.push(i); // anss[i] = make_pair(hash1(0,1),hash2(0,1)); } while(!que.empty()){ int now = que.front(); que.pop(); // if(times[now])continue; // cout<<"check_seq: "<<now; times[fa[now]]--; if(times[fa[now]] == 0)que.push(fa[now]); int len = G[now].size(); // anss[now] = make_pair(0,0); for(int i=0;i<len;++i){ int tar = G[now][i]; if(tar == fa[now])continue; anss[now].first += anss[tar].first; anss[now].second += anss[tar].second; } anss[now].first %= HASH_MOD1; anss[now].second %= HASH_MOD2; anss[now].first = hash1(anss[now].first,1); anss[now].second = hash2(anss[now].second,1); ull hash_tmp = anss[now].first * HASH_MOD1 + anss[now].second; // cout<<" "<<hash_tmp<<endl; if(mapp.count(hash_tmp)){ int tmp = mapp[hash_tmp]; ans += tmp; mapp[hash_tmp] = tmp+1; }else{ mapp[hash_tmp] = 1; } times[now] = 1; } } void init(){ memset(anss,0,sizeof(anss)); memset(times,0,sizeof(times)); ans = 0; for(int i=0;i<n+23;++i)G[i].clear(); mapp.clear(); for(int i=1;i<n;++i){ int a,b; cin>>a>>b; G[a].push_back(b); G[b].push_back(a); } bfs(); deal(); cout<<ans<<"\n"; } int main(){ cin.sync_with_stdio(false); while(cin>>n)init(); return 0; }
Stammering Aliens
https://cn.vjudge.net/problem/UVALive-4513
题意:给一个长串,问至少出现m次的最长连续字串的长度和出现的最右一个字串的起始的位置是多少。
题解:
这道题实际上时刘汝佳蓝书上的一道例题,在作的过程当中表现了用到了hash串作减法的思路。
考虑答案中的两个量:最长长度和最右起始位置。最长长度具备某种意义上的单调性:若是长度为n的字串能够符合题目条件,则n-1的也能够(n>1);所以考虑使用二分的形式来枚举字串的长度。最右起始位置能够直观的求解。
考虑递推式:hash[i] = (hash[i-1] * HAHS_P + str[i]) % HASH_MOD
若简化为十进制数字则能够有以下样例:
3129741938274 求字串由2到7的hash值
hash[7] = 31297419
hash[2] = 312
hahs[2-7] = 97419
观察可得:hash[2-7] = hash[7] - hash[2]*10^(7-2);
则实际上只要保证上式在%HASH_MOD意义上成当即可。
#include<bits/stdc++.h> using namespace std; #define ll long long #define ull unsigned long long #define pp pair<ull,ull> const int MAXN = 1000233; const ull HASH_P1 = 233; const ull HASH_P2 = 241; const ull HASH_MOD1 = 1000000037; const ull HASH_MOD2 = 1000000049; #define hash1(x,b) (((ull)x * HASH_P1 + b) % HASH_MOD1) #define hash2(x,b) (((ull)x * HASH_P2 + b) % HASH_MOD2) #define get_next_hash(tmp,b) (make_pair(hash1(tmp.first,b),hash2(tmp.second,b))) pp hashs[MAXN]; pp hash_hex[MAXN]; int m; char str[MAXN]; int str_len,pos; ull mapp[MAXN]; int anss[MAXN]; int mapp_num; bool cmp(int a,int b){ if(mapp[a] == mapp[b])return a<b; return mapp[a]<mapp[b]; } bool check(int length){ pos = -1; mapp_num = 0; anss[mapp_num] = mapp_num; mapp[mapp_num++] = hash1(hashs[length-1].first,hashs[length-1].second); for(int i=length;i<str_len;++i){ ull a = hashs[i].first; ull tmp = (hashs[i-length].first * hash_hex[length].first)%HASH_MOD1; a-= tmp; a+=HASH_MOD1;a%=HASH_MOD1; ull b = hashs[i].second; tmp = (hashs[i-length].second * hash_hex[length].second)%HASH_MOD2; b -= tmp; b+=HASH_MOD2;b%=HASH_MOD2; ull hash_tmp = hash1(a,b); anss[mapp_num] = mapp_num ; mapp[mapp_num++] = hash_tmp; } sort(anss,anss+mapp_num,cmp); int cntt = 1; if(m == 1)pos = anss[0]; for(int i=1;i<mapp_num;++i){ if(mapp[anss[i]] == mapp[anss[i-1]])cntt++; else cntt = 1; if(cntt >= m )pos = max(pos,anss[i]); } return pos != -1; } int bin_search(int a,int b){ if(a == b-1)return a; int mid = (a+b)/2; if(check(mid))return bin_search(mid,b); else return bin_search(a,mid); } void init(){ gets(str); str_len = strlen(str); pp tmp = make_pair(0,0); for(int i=0;i<str_len;++i){ tmp = get_next_hash(tmp,str[i]); hashs[i] = tmp; } int ans = bin_search(0,str_len+1); check(ans); if(ans){ printf("%d %d\n",ans,pos); }else{ puts("none"); } } int main(){ // pp tmp = make_pair(1,1); hash_hex[0] = make_pair(1,1); for(int i=1;i<MAXN;++i){ hash_hex[i] = get_next_hash(hash_hex[i-1],0); } while(~scanf("%d\n",&m)&&m)init(); return 0; }
Hidden Anagrams
AIZU:https://cn.vjudge.net/problem/Aizu-1370
Gym:https://cn.vjudge.net/problem/Gym-101158D
UVALive:https://cn.vjudge.net/problem/UVALive-7592
题意:给出两个字符串,求出最大的长度知足,两个字符串都包含该子串,同时两个字串包含的字母的种类和个数彻底相同。
题解:思路很简单,就是枚举长度,并检查上面的字符串中是否存在能和下面的串长度相同的,HASH值一致的串。若是有,则检查经过,没有则不经过,从高往低枚举,找到第一个经过的跳出循环<也许会有个常数优化>。
此时HASH算法应当作一个简单的变化:统计某个字母出现的个数。HASH[I] = HASH[I-1] + HASH_HEX[STR[I]-'a']
此实HASH_HEX表明了HASH_P的在对HASH_MOD作膜法操做的前提下的若干次方。
这道题有4个来源能够提交,Gym和AIZU可让N2LOGN甚至更慢的代码经过,UVALIVE容许N2的代码加入邻接链表优化经过<此时我已经开了IO挂>,HOJ。。。。。须要在进一步的取消HASH2的膜法操做。
#include<math.h> #include<algorithm> #include<vector> #include<stdlib.h> #include<string.h> #include<string> #include<stdio.h> #include<set> #include<map> #include<queue> #include<stack> #include <iostream> #include <limits.h> using namespace std; #define ull unsigned long long #define pp pair<ull,ull> const ull MAXN = 1000249; #define vecu vector<ull> #define vevi vector<int> #define vecp vector<pp > const ull HASH_P1 = 109; const ull HASH_P2 = 4007; const ull HASH_MOD1 = 1000249; const ull HASH_MOD2 = 1000000037; #define hash1(x,b) (((ull)x * HASH_P1 + b)%HASH_MOD1) #define hash2(x,b) (((ull)x * HASH_P2 + b)) #define next_hash(tmp,b) (make_pair(hash1(tmp.first,b),hash2(tmp.second,b))) #define add_hash(tmp,b) (make_pair((tmp.first + hash_hex[idx(b)].first) % HASH_MOD1,(tmp.second + hash_hex[idx(b)].second) )) #define sub_hash(tmpa,tmpb) (make_pair((tmpa.first + HASH_MOD1 - tmpb.first) % HASH_MOD1 , (tmpa.second - tmpb.second) ) ) #define idx(x) (x-'a') namespace fastIO{ #define BUF_SIZE 100000 #define OUT_SIZE 100000 #define ll long long //fread->read bool IOerror=0; inline char nc(){ static char buf[BUF_SIZE],*p1=buf+BUF_SIZE,*pend=buf+BUF_SIZE; if (p1==pend){ p1=buf; pend=buf+fread(buf,1,BUF_SIZE,stdin); if (pend==p1){IOerror=1;return -1;} //{printf("IO error!\n");system("pause");for (;;);exit(0);} } return *p1++; } inline bool blank(char ch){return ch==' '||ch=='\n'||ch=='\r'||ch=='\t';} inline void read(int &x){ bool sign=0; char ch=nc(); x=0; for (;blank(ch);ch=nc()); if (IOerror)return; if (ch=='-')sign=1,ch=nc(); for (;ch>='0'&&ch<='9';ch=nc())x=x*10+ch-'0'; if (sign)x=-x; } inline void read(ll &x){ bool sign=0; char ch=nc(); x=0; for (;blank(ch);ch=nc()); if (IOerror)return; if (ch=='-')sign=1,ch=nc(); for (;ch>='0'&&ch<='9';ch=nc())x=x*10+ch-'0'; if (sign)x=-x; } inline void read(double &x){ bool sign=0; char ch=nc(); x=0; for (;blank(ch);ch=nc()); if (IOerror)return; if (ch=='-')sign=1,ch=nc(); for (;ch>='0'&&ch<='9';ch=nc())x=x*10+ch-'0'; if (ch=='.'){ double tmp=1; ch=nc(); for (;ch>='0'&&ch<='9';ch=nc())tmp/=10.0,x+=tmp*(ch-'0'); } if (sign)x=-x; } inline int read(char *s){ char ch=nc();if(ch == EOF)return -1; for (;blank(ch);ch=nc()); if (IOerror)return -1; for (;!blank(ch)&&!IOerror;ch=nc())*s++=ch; *s=0; return 0; } inline void read(char &c){ for (c=nc();blank(c);c=nc()); if (IOerror){c=-1;return;} } //getchar->read inline void read1(int &x){ char ch;int bo=0;x=0; for (ch=getchar();ch<'0'||ch>'9';ch=getchar())if (ch=='-')bo=1; for (;ch>='0'&&ch<='9';x=x*10+ch-'0',ch=getchar()); if (bo)x=-x; } inline void read1(ll &x){ char ch;int bo=0;x=0; for (ch=getchar();ch<'0'||ch>'9';ch=getchar())if (ch=='-')bo=1; for (;ch>='0'&&ch<='9';x=x*10+ch-'0',ch=getchar()); if (bo)x=-x; } inline void read1(double &x){ char ch;int bo=0;x=0; for (ch=getchar();ch<'0'||ch>'9';ch=getchar())if (ch=='-')bo=1; for (;ch>='0'&&ch<='9';x=x*10+ch-'0',ch=getchar()); if (ch=='.'){ double tmp=1; for (ch=getchar();ch>='0'&&ch<='9';tmp/=10.0,x+=tmp*(ch-'0'),ch=getchar()); } if (bo)x=-x; } inline int read1(char *s){ char ch=getchar(); for (;blank(ch);ch=getchar()); for (;!blank(ch);ch=getchar())*s++=ch; *s=0; } inline void read1(char &c){for (c=getchar();blank(c);c=getchar());} //scanf->read inline void read2(int &x){scanf("%d",&x);} inline void read2(ll &x){ #ifdef _WIN32 scanf("%I64d",&x); #else #ifdef __linux scanf("%lld",&x); #else puts("error:can't recognize the system!"); #endif #endif } inline void read2(double &x){scanf("%lf",&x);} inline void read2(char *s){scanf("%s",s);} inline void read2(char &c){scanf(" %c",&c);} inline void readln2(char *s){gets(s);} //fwrite->write struct Ostream_fwrite{ char *buf,*p1,*pend; Ostream_fwrite(){buf=new char[BUF_SIZE];p1=buf;pend=buf+BUF_SIZE;} void out(char ch){ if (p1==pend){ fwrite(buf,1,BUF_SIZE,stdout);p1=buf; } *p1++=ch; } void print(int x){ static char s[15],*s1;s1=s; if (!x)*s1++='0';if (x<0)out('-'),x=-x; while(x)*s1++=x%10+'0',x/=10; while(s1--!=s)out(*s1); } void println(int x){ static char s[15],*s1;s1=s; if (!x)*s1++='0';if (x<0)out('-'),x=-x; while(x)*s1++=x%10+'0',x/=10; while(s1--!=s)out(*s1); out('\n'); } void print(ll x){ static char s[25],*s1;s1=s; if (!x)*s1++='0';if (x<0)out('-'),x=-x; while(x)*s1++=x%10+'0',x/=10; while(s1--!=s)out(*s1); } void println(ll x){ static char s[25],*s1;s1=s; if (!x)*s1++='0';if (x<0)out('-'),x=-x; while(x)*s1++=x%10+'0',x/=10; while(s1--!=s)out(*s1); out('\n'); } void print(double x,int y){ static ll mul[]={1,10,100,1000,10000,100000,1000000,10000000,100000000, 1000000000,10000000000LL,100000000000LL,1000000000000LL,10000000000000LL, 100000000000000LL,1000000000000000LL,10000000000000000LL,100000000000000000LL}; if (x<-1e-12)out('-'),x=-x;x*=mul[y]; ll x1=(ll)floor(x); if (x-floor(x)>=0.5)++x1; ll x2=x1/mul[y],x3=x1-x2*mul[y]; print(x2); if (y>0){out('.'); for (size_t i=1;i<y&&x3*mul[i]<mul[y];out('0'),++i); print(x3);} } void println(double x,int y){print(x,y);out('\n');} void print(char *s){while (*s)out(*s++);} void println(char *s){while (*s)out(*s++);out('\n');} void flush(){if (p1!=buf){fwrite(buf,1,p1-buf,stdout);p1=buf;}} ~Ostream_fwrite(){flush();} }Ostream; inline void print(int x){Ostream.print(x);} inline void println(int x){Ostream.println(x);} inline void print(char x){Ostream.out(x);} inline void println(char x){Ostream.out(x);Ostream.out('\n');} inline void print(ll x){Ostream.print(x);} inline void println(ll x){Ostream.println(x);} inline void print(double x,int y){Ostream.print(x,y);} inline void println(double x,int y){Ostream.println(x,y);} inline void print(char *s){Ostream.print(s);} inline void println(char *s){Ostream.println(s);} inline void println(){Ostream.out('\n');} inline void flush(){Ostream.flush();} //puts->write char Out[OUT_SIZE],*o=Out; inline void print1(int x){ static char buf[15]; char *p1=buf;if (!x)*p1++='0';if (x<0)*o++='-',x=-x; while(x)*p1++=x%10+'0',x/=10; while(p1--!=buf)*o++=*p1; } inline void println1(int x){print1(x);*o++='\n';} inline void print1(ll x){ static char buf[25]; char *p1=buf;if (!x)*p1++='0';if (x<0)*o++='-',x=-x; while(x)*p1++=x%10+'0',x/=10; while(p1--!=buf)*o++=*p1; } inline void println1(ll x){print1(x);*o++='\n';} inline void print1(char c){*o++=c;} inline void println1(char c){*o++=c;*o++='\n';} inline void print1(char *s){while (*s)*o++=*s++;} inline void println1(char *s){print1(s);*o++='\n';} inline void println1(){*o++='\n';} inline void flush1(){if (o!=Out){if (*(o-1)=='\n')*--o=0;puts(Out);}} struct puts_write{ ~puts_write(){flush1();} }_puts; inline void print2(int x){printf("%d",x);} inline void println2(int x){printf("%d\n",x);} inline void print2(char x){printf("%c",x);} inline void println2(char x){printf("%c\n",x);} inline void print2(ll x){ #ifdef _WIN32 printf("%I64d",x); #else #ifdef __linux printf("%lld",x); #else puts("error:can't recognize the system!"); #endif #endif } inline void println2(ll x){print2(x);printf("\n");} inline void println2(){printf("\n");} #undef ll #undef OUT_SIZE #undef BUF_SIZE }; char str1[MAXN]; char str2[MAXN]; int str1_len,str2_len; pp hash_hex[MAXN]; pp str1_hash[MAXN]; pp str2_hash[MAXN]; class hash_node{ public: ull val;int next; }; hash_node hash_nodes[MAXN]; int hash_nodes_num; int hash_table[MAXN]; inline int new_hash_nodes(int idx,ull key){ hash_nodes[hash_nodes_num].next = hash_table[idx]; hash_nodes[hash_nodes_num].val = key; return hash_nodes_num++; } inline bool hash_find_key(int idx,ull key){ int now = hash_table[idx]; while(now!=-1){ if(hash_nodes[now].val == key)return true; now = hash_nodes[now].next; }return false; } inline void hash_insert(int idx,ull key){ hash_table[idx] = new_hash_nodes(idx,key); } inline void hash_clear(int idx){ hash_table[idx] = -1; } // vecu hash_table[HASH_MOD1]; // inline bool find_key(ull idx,ull key){ // int len = hash_table[idx].size(); // for(int i=0;i<len;++i){ // if(hash_table[idx][i] == key)return true; // }return false; // } // inline void hash_insert(ull idx,ull key){ // hash_table[idx].push_back(key); // } // inline void hash_clear(ull idx){ // hash_table[idx].clear(); // } inline bool check(int length){ hash_nodes_num = 0; hash_insert(str1_hash[length-1].first,str1_hash[length-1].second); for(int i=length;i<str1_len;++i){ pp tmp = sub_hash(str1_hash[i],str1_hash[i-length]); hash_insert(tmp.first,tmp.second); } if(hash_find_key(str2_hash[length-1].first,str2_hash[length-1].second))return true; for(int i=length;i<str2_len;++i){ pp tmp = sub_hash(str2_hash[i],str2_hash[i-length]); // hash_insert(tmp.first,tmp.second); if(hash_find_key(tmp.first,tmp.second))return true; } hash_clear(str1_hash[length-1].first); for(int i=length;i<str1_len;++i){ pp tmp = sub_hash(str1_hash[i],str1_hash[i-length]); hash_clear(tmp.first); } return false; } void init(){ // for(int i=0;i<HASH_MOD1;++i)hash_table[i].clear(); memset(hash_table,-1,sizeof(hash_table)); str1_len = strlen(str1); str2_len = strlen(str2); str1_hash[0] = hash_hex[idx(str1[0])]; str2_hash[0] = hash_hex[idx(str2[0])]; for(int i=1;i<str1_len;++i)str1_hash[i] = add_hash(str1_hash[i-1],str1[i]); for(int i=1;i<str2_len;++i)str2_hash[i] = add_hash(str2_hash[i-1],str2[i]); int limit = min(str1_len,str2_len); int ans = 0; for(int i=limit;i;i--){ if(check(i)){ ans = i; break; } } // cout<<ans<<"\n"; fastIO::println(ans); } int main(){ hash_hex[0] = make_pair(1,1); for(int i=1;i<233;++i) hash_hex[i] = next_hash(hash_hex[i-1],0); // while(gets(str1)&&gets(str2))init(); while(~fastIO::read(str1) && ~fastIO::read(str2))init(); // while() return 0; }