欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

SPOJ Distinct Substrings (后缀数组,不相同的子串个数)

程序员文章站 2022-04-30 20:36:50
...

描述

Given a string, we need to find the total number of its distinct substrings.

Input

T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000

Output

For each test case output one number saying the number of distinct substrings.

Sample Input:

2
CCCCC
ABABA

Sample Output:

5
9

Explanation for the testcase with string ABABA:
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9.

思路

给出一个字符串,求其中不同的子串个数.

一个串的子串个数为n*(n+1)/2,减去后缀的相同前缀所作的贡献每一个height[i]即为答案.

引用论文:

SPOJ Distinct Substrings (后缀数组,不相同的子串个数)

代码

#include <cstdio>
#include <iostream>
#include <cstring>
#include <algorithm>
using namespace std;
#define mem(a, b) memset(a, b, sizeof(a))
const int N = 1e5 + 10;
int t1[N], t2[N], c[N], r[N], sa[N], rak[N], height[N];
bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}
void da(int str[], int sa[], int rak[], int height[], int n, int m)
{
    n++;
    int i, j, p, *x = t1, *y = t2;
    for (i = 0; i < m; i++)
        c[i] = 0;
    for (i = 0; i < n; i++)
        c[x[i] = str[i]]++;
    for (i = 1; i < m; i++)
        c[i] += c[i - 1];
    for (i = n - 1; i >= 0; i--)
        sa[--c[x[i]]] = i;
    for (j = 1; j <= n; j <<= 1)
    {
        p = 0;
        for (i = n - j; i < n; i++)
            y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j)
                y[p++] = sa[i] - j;
        for (i = 0; i < m; i++)
            c[i] = 0;
        for (i = 0; i < n; i++)
            c[x[y[i]]]++;
        for (i = 1; i < m; i++)
            c[i] += c[i - 1];
        for (i = n - 1; i >= 0; i--)
            sa[--c[x[y[i]]]] = y[i];
        swap(x, y);
        p = 1;
        x[sa[0]] = 0;
        for (i = 1; i < n; i++)
            x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
        if (p >= n)
            break;
        m = p;
    }
    int k = 0;
    n--;
    for (i = 0; i <= n; i++)
        rak[sa[i]] = i;
    for (i = 0; i < n; i++)
    {
        if (k)
            k--;
        j = sa[rak[i] - 1];
        while (str[i + k] == str[j + k])
            k++;
        height[rak[i]] = k;
    }
}
char s[N];
int a[N];
int main()
{
    //freopen("in.txt", "r", stdin);
    int t;
    scanf("%d", &t);
    while (t--)
    {
        scanf("%s", s);
        int n = strlen(s);
        for (int i = 0; i < n; i++)
            a[i] = s[i];
        a[n] = 0;
        da(a, sa, rak, height, n, 128);
        int ans = n * (n + 1) / 2;
        for (int i = 2; i <= n; i++)
            ans -= height[i];
        printf("%d\n", ans);
    }
    return 0;
}
相关标签: 后缀数组