/* ** This is a C++ representation of Aho-Corasick's dictionary searcher. ** Runtime: O(n+m+z) [m is total character count in the dictionary] ** Source: www.geeksforgeeks.org/aho-corasick-algorithm-pattern-searching */ #include <cstdio> #include <cstring> #include <string> #include <iostream> #include <queue> #include <bitset> using namespace std; #define MAXS 100000 // Total character count in patterns (=m). #define MAXC 26 // Total character in alphabet. #define MAXW 500 // Total number of patterns. int f[MAXS], g[MAXS][MAXC]; bitset<MAXW> out[MAXS]; int buildAutomata(string arr[], int k) { memset(g, -1, sizeof g); int states=1; for(int i=0;i<k;i++) { const string &word = arr[i]; int currentState = 0; for (int j=0;j<word.size();j++) { int ch=word[j]-'a'; if(g[currentState][ch]==-1) g[currentState][ch]=states++; currentState=g[currentState][ch]; } out[currentState].set(i); } for(int ch=0;ch<MAXC;ch++) if(g[0][ch]==-1) g[0][ch]=0; memset(f, -1, sizeof f); queue<int> q; for(int ch=0;ch<MAXC;ch++) if(g[0][ch]!=0) f[g[0][ch]]=0, q.push(g[0][ch]); while(q.size()) { int state=q.front(); q.pop(); for (int ch=0;ch<=MAXC;ch++) { if(g[state][ch]!=-1) { int failure=f[state]; while(g[failure][ch]==-1) failure=f[failure]; failure=g[failure][ch]; f[g[state][ch]]=failure; out[g[state][ch]] |= out[failure]; q.push(g[state][ch]); } } } return states; } int nextState(int currentState, char nextInput) { int answer=currentState, ch=nextInput-'a'; while(g[answer][ch]==-1) answer=f[answer]; return g[answer][ch]; } void searchWords(string arr[], int k, string text) { buildAutomata(arr, k); int currentState=0; for(int i=0;i<text.size();i++) { currentState=nextState(currentState, text[i]); if(out[currentState]==0) continue; for(int j=0;j<k;j++) if(out[currentState].test(j)) cout<<"Word "<<arr[j]<<" appears from "<<i-arr[j].size()+1<<" to "<<i<<endl; } } int main() { string arr[]={"he","she","hers","his"}; string text="ahehishers"; int k=sizeof(arr)/sizeof(arr[0]); searchWords(arr, k, text); return 0; }
Aho-Corasick (C++)
Subscribe to:
Posts (Atom)