AC自动机(多模式串匹配)
程序员文章站
2022-03-12 21:51:32
...
虚线部分代表fail指针
AC自动机模板题链接
静态数组版本(推荐:代码量少易写,更快易写):
#include <bits/stdc++.h>
using namespace std;
class Aho_Corasick
{
public:
void init()
{
idx = 0;
memset(cnt, 0, sizeof cnt);
memset(son, 0, sizeof son);
memset(fail, 0, sizeof fail);
}
/*存储结构:Trie树*/
void insert(const char* str)
{
int p = 0;
int len = strlen(str);
for (int i = 0; i < len; i++)
{
int u = str[i] - 'a';
if (!son[p][u]) son[p][u] = ++idx;
p = son[p][u];
}
cnt[p]++;
}
void build_fail()
{
queue<int> q;
for (int i = 0; i < childCnt; i++) {
if (son[0][i]) q.push(son[0][i]);
}
while (!q.empty())
{
int t = q.front(); q.pop();
for (int i = 0; i < childCnt; i++)
{
//类似于状态压缩,不至于每次fail指针跳转很多次,只需每次跳转一次,相当于构建了图
if (!son[t][i]) son[t][i] = son[fail[t]][i];
else
{
fail[son[t][i]] = son[fail[t]][i];
q.push(son[t][i]);
}
}
}
}
int query(const char* str)
{
int p = 0, ans = 0;
int len = strlen(str);
for (int i = 0; i < len; i++)
{
p = son[p][str[i] - 'a'];
int temp = p;
while (temp)
{
ans += cnt[temp];
cnt[temp] = 0;
temp = fail[temp];
}
}
return ans;
}
private:
static const int childCnt = 26;
static const int N = 500005;
int son[N][childCnt], cnt[N], idx; // 0既是root又是null
int fail[N];
};
const int maxn = 1e7 + 5;
char key[70];
char pattern[maxn];
int N;
Aho_Corasick ac;
int main()
{
int T;
scanf("%d", &T);
while (T--)
{
ac.init();
scanf("%d", &N);
getchar();
for (int i = 1; i <= N; i++)
{
scanf("%s", key);
ac.insert(key);
}
scanf("%s", pattern);
ac.build_fail();
printf("%d\n", ac.query(pattern));
}
return 0;
}
指针版本:
#include <bits/stdc++.h>
using namespace std;
class Aho_Corasick
{
public:
Aho_Corasick()
{
root = new Node();
}
/*存储结构:Trie树*/
void insert(const char* word)
{
Node* tmp = root;
for (int i = 0; i < strlen(word); i++)
{
int c = word[i] - 'a';
if (!tmp->child[c]) //不存在就创建该节点
tmp->child[c] = new Node();
tmp = tmp->child[c];
}
tmp->sum++;
}
void build_fail()
{
Node* p = root;
queue<Node*> q;
/*root的儿子fail指针均指向root*/
for (int i = 0; i < childCnt; i++)
{
if (!p->child[i])continue;
q.push(p->child[i]);
p->child[i]->fail = root;
}
/*fail指针构建规则:首先在fatherFail指针寻找当前字符是否存在
如果不存在则继续跳fail指针直到直到该字符或者fatherFail为空
如果fatherFail最后为空则设置当前节点的fail为root
否则就设置为fatherFail->child[i]*/
while (!q.empty())
{
Node* cur = q.front();
q.pop();
for (int i = 0; i < 26; i++)
{
if (cur->child[i])
{
p = cur->fail;
while (p)
{
if (p->child[i])
{
cur->child[i]->fail = p->child[i];
break;
}
p = p->fail;
}
if (!p) cur->child[i]->fail = root;
q.push(cur->child[i]);
}
}
}
}
int query(char* ch)
{
int cnt = 0;
Node* p = root;
int len = strlen(ch);
for (int i = 0; i < len; i++)
{
int v = ch[i] - 'a';
while (!p->child[v] && p != root)p = p->fail;
p = p->child[v];
if (!p)p = root;
Node* tmp = p;
while (tmp != root)
{
if (tmp->sum >= 0)
{
cnt += tmp->sum;
tmp->sum = -1; // 防止重复计数
}
else break;
tmp = tmp->fail;
}
}
return cnt;
}
private:
static const int childCnt = 26;
struct Node
{
Node* fail; //指向当前节点的最长后缀模板串的节点
int sum;
Node* child[childCnt];
Node()
{
fail = NULL;
sum = 0;
memset(child, NULL, sizeof child);
}
};
Node* root;
};
const int maxn = 1e7 + 5;
char key[70];
char pattern[maxn];
int N;
int main()
{
int T;
scanf("%d", &T);
while (T--)
{
Aho_Corasick ac;
scanf("%d", &N);
getchar();
for (int i = 1; i <= N; i++)
{
scanf("%s", key);
ac.insert(key);
}
scanf("%s", pattern);
ac.build_fail();
printf("%d\n", ac.query(pattern));
}
return 0;
}
上一篇: Defense against Common Web Attacks
下一篇: 正则表达式