SP705 SUBST1 - New Distinct Substrings

知识点: SAM,后缀树,SA

原题面 Luogu

分析题意

SAM

SAM 板子背诵检查。
一个字符串唯一对应一个状态,\(ans = \sum\limits_i{\operatorname{len}(i)-\operatorname{len}(\operatorname{link}(i))}\)

后缀树

一种显然的做法是建出后缀树,答案即未压缩信息的后缀树中的节点个数。
在缩链成边的同时维护边中包含的节点个数即可。
总复杂度 \(O(n)\)

后缀数组

另一种想法是用所有子串的个数 \(\frac{n(n+1)}{2}\) 减去重复子串的个数,显然重复的串一定出现在某两个后缀的公共前缀部分。
考虑后缀树上重复统计部分的位置,有下图所示:

ugly

观察其中的单色区域的形态,可以考虑增量法统计答案,按照字典序依次将所有后缀加入到后缀树中。
考虑加入 \(sa_i\) 后,新增的本质不同的子串的数量,显然即 \(\operatorname{dep}(sa_i) - \operatorname{dep}(\operatorname{lca}(sa_i, sa_{i-1}))\),代表不作为之前加入的,\(sa_i\) 的前缀的数量。
字典序相邻的节点的 \(lca\) 的深度即为 SA 中的 \(\operatorname{height}\),则最终答案即:

\[\frac{n(n+1)}{2} - \sum_{i = 2}^{n}\operatorname{height}_i \]

SA 简单实现即可,总复杂度 \(O(n)\sim O(n\log n)\),依赖于实现。

代码实现

SAM

//知识点:SAM 
/*
By:Luckyblock
*/
#include <algorithm>
#include <cctype>
#include <cstdio>
#include <cstring>
#define ll long long
const int kMaxn = 5e4 + 10;
const int kMaxm = 300 + 10;
//=============================================================
int n, last = 1, node_num = 1, ans;
int ch[kMaxn << 1][kMaxm], link[kMaxn << 1], len[kMaxn << 1];
char S[kMaxn];
//=============================================================
inline int read() {
  int f = 1, w = 0;
  char ch = getchar();
  for (; !isdigit(ch); ch = getchar())
    if (ch == '-') f = -1;
  for (; isdigit(ch); ch = getchar()) w = (w << 3) + (w << 1) + (ch ^ '0');
  return f * w;
}
void GetMax(int &fir, int sec) {
  if (sec > fir) fir = sec;
}
void GetMin(int &fir, int sec) {
  if (sec < fir) fir = sec;
}
void Insert(int c_) {
  int p = last, now = last = ++ node_num;
  len[now] = len[p] + 1;
  for (; p && ! ch[p][c_]; p = link[p]) ch[p][c_] = now;
  if (! p) {link[now] = 1; return ;}  
  int q = ch[p][c_];
  if (len[q] == len[p] + 1) {link[now] = q; return ;}
  int newq = ++ node_num;
  memcpy(ch[newq], ch[q], sizeof (ch[q]));
  link[newq] = link[q], len[newq] = len[p] + 1;
  link[q] = link[now] = newq;
  for (; p && ch[p][c_] == q; p = link[p]) ch[p][c_] = newq;
}

void Init() {
  last = node_num = 1;
  ans = 0;
  memset(ch, 0, sizeof (ch));
  memset(link, 0, sizeof (link));
  memset(len, 0, sizeof (len));
}
//=============================================================
int main() { 
  int T = read();
  while (T --) {
    Init();
    scanf("%s", S + 1);
    n = strlen(S + 1);
    for (int i = 1; i <= n; ++ i) Insert(S[i]);
    for (int i = 2; i <= node_num; ++ i) ans += len[i] - len[link[i]];
    printf("%d\n", ans);
  }
  
  return 0; 
}

SA

//知识点:SA 
/*
By:Luckyblock
*/
#include <algorithm>
#include <cctype>
#include <cstdio>
#include <cstring>
#define LL long long
const int kN = 1e5 + 10;
//=============================================================
char s[kN];
int n, m, sa[kN], rk[kN << 1], oldrk[kN << 1], height[kN];
int id[kN], cnt[kN], rkid[kN];
//=============================================================
inline int read() {
  int f = 1, w = 0;
  char ch = getchar();
  for (; !isdigit(ch); ch = getchar())
    if (ch == '-') f = -1;
  for (; isdigit(ch); ch = getchar()) {
    w = (w << 3) + (w << 1) + (ch ^ '0');
  }
  return f * w;
}
void Chkmax(int &fir_, int sec_) {
  if (sec_ > fir_) fir_ = sec_;
}
void Chkmin(int &fir_, int sec_) {
  if (sec_ < fir_) fir_ = sec_;
}
bool cmp(int x_, int y_, int w_) {
  return oldrk[x_] == oldrk[y_] && 
         oldrk[x_ + w_] == oldrk[y_ + w_];
}
void GetHeight() {
  for (int i = 1, k = 0; i <= n; ++ i) {
    if (rk[i] == 1) k = 0;
    else {
      if (k > 0) -- k;
      int j = sa[rk[i] - 1];
      while (i + k <= n && j + k <=n && 
             s[i + k] == s[j + k]) {
               ++ k;
      }
    }
    height[rk[i]] = k;
  }
}
void SuffixSort() {
  scanf("%s", s + 1);
  m = 300;
  n = strlen(s + 1);
  
  memset(cnt, 0, sizeof (cnt));
  for (int i = 1; i <= n; ++ i) cnt[rk[i] = s[i]] ++;
  for (int i = 1; i <= m; ++ i) cnt[i] += cnt[i - 1];
  for (int i = n; i >= 1; -- i) sa[cnt[rk[i]] --] = i;
  
  for (int p, w = 1; w < n; w <<= 1) {
    p = 0;
    for (int i = n; i > n - w; -- i) id[++ p] = i;
    for (int i = 1; i <= n; ++ i) {
      if (sa[i] > w) id[++ p] = sa[i] - w;
    }
    
    memset(cnt, 0, sizeof (cnt));
    for (int i = 1; i <= n; ++ i) cnt[rkid[i] = rk[id[i]]] ++;
    for (int i = 1; i <= m; ++ i) cnt[i] += cnt[i - 1];
    for (int i = n; i >= 1; -- i) sa[cnt[rkid[i]] --] = id[i];
    
    m = 0;
    memcpy(oldrk, rk, sizeof (rk));
    for (int i = 1; i <= n; ++ i) {
      m += (cmp(sa[i], sa[i - 1], w) ^ 1);
      rk[sa[i]] = m;
    }
  }
  GetHeight();
}
//=============================================================
int main() {
  int T = read();
  while (T --) {
    SuffixSort();
    LL ans = 1ll * n * (n + 1) / 2ll; 
    for (int i = 1; i <= n; ++ i) ans -= height[i];
    printf("%lld\n", ans);
  }
  return 0;
}
posted @ 2020-08-17 08:13  Luckyblock  阅读(132)  评论(0编辑  收藏  举报