欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

AC自动机(多模式串匹配)

程序员文章站 2022-03-12 21:51:32
...

AC自动机(多模式串匹配)

虚线部分代表fail指针

AC自动机模板题链接
静态数组版本(推荐:代码量少易写,更快易写):

#include <bits/stdc++.h>
using namespace std;

class Aho_Corasick
{
public:
	void init()
	{
		idx = 0;
		memset(cnt, 0, sizeof cnt);
		memset(son, 0, sizeof son);
		memset(fail, 0, sizeof fail);
	}

	/*存储结构:Trie树*/
	void insert(const char* str)
	{
		int p = 0;
		int len = strlen(str);
		for (int i = 0; i < len; i++) 
		{
			int u = str[i] - 'a';
			if (!son[p][u]) son[p][u] = ++idx;
			p = son[p][u];
		}
		cnt[p]++;
	}

	void build_fail()
	{
		queue<int> q;
		for (int i = 0; i < childCnt; i++) {
			if (son[0][i]) q.push(son[0][i]);
		}
		while (!q.empty())
		{
			int t = q.front(); q.pop();
			for (int i = 0; i < childCnt; i++)
			{
			    //类似于状态压缩,不至于每次fail指针跳转很多次,只需每次跳转一次,相当于构建了图
				if (!son[t][i]) son[t][i] = son[fail[t]][i];  
				else
				{
					fail[son[t][i]] = son[fail[t]][i];
					q.push(son[t][i]);
				}
			}
		}
	}

	int query(const char* str)
	{
		int p = 0, ans = 0;
		int len = strlen(str);
		for (int i = 0; i < len; i++) 
		{
			p = son[p][str[i] - 'a'];
			int temp = p;
			while (temp) 
			{
				ans += cnt[temp];
				cnt[temp] = 0;
				temp = fail[temp];
			}
		}
		return ans;
	}

private:
	static const int childCnt = 26;
	static const int N = 500005;
	int son[N][childCnt], cnt[N], idx;  // 0既是root又是null
	int fail[N];
};

const int maxn = 1e7 + 5;
char key[70];
char pattern[maxn];
int N;
Aho_Corasick ac;

int main()
{
	int T;
	scanf("%d", &T);
	while (T--)
	{
		ac.init();
		scanf("%d", &N);
		getchar();
		for (int i = 1; i <= N; i++)
		{
			scanf("%s", key);
			ac.insert(key);
		}
		scanf("%s", pattern);
		ac.build_fail();
		printf("%d\n", ac.query(pattern));
	}
	return 0;
}

指针版本:

#include <bits/stdc++.h>
using namespace std;

class Aho_Corasick
{
public:
	Aho_Corasick()
	{
		root = new Node();
	}

	/*存储结构:Trie树*/
	void insert(const char* word)
	{
		Node* tmp = root;

		for (int i = 0; i < strlen(word); i++)
		{
			int c = word[i] - 'a';
			if (!tmp->child[c])  //不存在就创建该节点
				tmp->child[c] = new Node();
			tmp = tmp->child[c];
		}
		tmp->sum++;
	}

	void build_fail()
	{
		Node* p = root;
		queue<Node*> q;
		/*root的儿子fail指针均指向root*/
		for (int i = 0; i < childCnt; i++)
		{
			if (!p->child[i])continue;
			q.push(p->child[i]);
			p->child[i]->fail = root;
		}

		/*fail指针构建规则:首先在fatherFail指针寻找当前字符是否存在
		如果不存在则继续跳fail指针直到直到该字符或者fatherFail为空
		如果fatherFail最后为空则设置当前节点的fail为root
		否则就设置为fatherFail->child[i]*/
		while (!q.empty())
		{
			Node* cur = q.front();
			q.pop();

			for (int i = 0; i < 26; i++)
			{
				if (cur->child[i])
				{
					p = cur->fail;
					while (p)
					{
						if (p->child[i])
						{
							cur->child[i]->fail = p->child[i];
							break;
						}
						p = p->fail;
					}
					if (!p)  cur->child[i]->fail = root;
					q.push(cur->child[i]);
				}

			}
		}
	}

	int query(char* ch)
	{
		int cnt = 0;
		Node* p = root;
		int len = strlen(ch);

		for (int i = 0; i < len; i++)
		{
			int v = ch[i] - 'a';
			while (!p->child[v] && p != root)p = p->fail;
			p = p->child[v];
			if (!p)p = root;

			Node* tmp = p;
			while (tmp != root)
			{
				if (tmp->sum >= 0)
				{
					cnt += tmp->sum;
					tmp->sum = -1;  // 防止重复计数
				}
				else break;
				tmp = tmp->fail;
			}
		}
		return cnt;
	}

private:
	static const int childCnt = 26;
	struct Node
	{
		Node* fail;        //指向当前节点的最长后缀模板串的节点
		int sum;
		Node* child[childCnt];
		Node()
		{
			fail = NULL;
			sum = 0;
			memset(child, NULL, sizeof child);
		}
	};
	Node* root;
};

const int maxn = 1e7 + 5;
char key[70];
char pattern[maxn];
int N;

int main()
{
	int T;
	scanf("%d", &T);
	while (T--)
	{
		Aho_Corasick ac;
		scanf("%d", &N);
		getchar();
		for (int i = 1; i <= N; i++)
		{
			scanf("%s", key);
			ac.insert(key);
		}
		scanf("%s", pattern);
		ac.build_fail();
		printf("%d\n", ac.query(pattern));
	}
	return 0;
}