Codeforces Gym 103409 J. Suffix Automaton

Codeforces Gym 103409 J. Suffix Automaton

容易将询问转化为求给定长度的本质不同字典序第 k 大的子串, 对于每一个长度离线处理所有询问即可.

这里使用后缀数组去解决该问题. 求出 height 数组后, 长度为 l 的本质不同子串数量等于 height 数组中小于 l 的位置中除去长度小于 l 的后缀的个数. 于是可以维护 height 数组中小于 l 且后缀长度大于等于 l 的位置集合 S , 由于 height 数组按字典序排列, 于是找到集合 S 中的第 k 小和第 k+1 小的 Sk,Sk+1 , 答案就是区间 [Sk,Sk+1) 中长度等于 l 对应的字符串. 由于需要找下标最小, 因此需要求该区间中后缀起点的最小值, 用一个 ST 表维护即可.

至于维护区间第 k 小, 区间插入, 区间删除, 只需一个平衡树/树状数组即可, 复杂度为 O(nlogn) .

于是总时间复杂度为 O(nlogn) .

参考代码

#include <bits/stdc++.h>
using namespace std;

static constexpr int Maxn = 1e6 + 5, LOG = 21;

int n, Q;
pair<int, int> ans[Maxn];
char s[Maxn];
struct query {
  int64_t k;
  int id;
  query() = default;
  query(int64_t k, int id) : k(k), id(id) { }
  friend bool operator <(const query &lhs, const query &rhs) {
    return make_pair(lhs.k, lhs.id) < make_pair(rhs.k, rhs.id);
  }
} q[Maxn];

class suffix_array {
 public:
  typedef int index_t;
 private:
 template<class T>
  static int sz(T &&arg) {
    using std::size; return int(size(std::forward<T>(arg)));
  }
 public:
  suffix_array() { }
  const index_t &sa(int k) const { return _sa[k]; }
  const index_t &rank(int k) const { return _rank[k]; }
  const index_t &lcp(int k) const { return _lcp[k]; }
 std::tuple<std::vector<index_t>, std::vector<index_t>, std::vector<index_t>>
  to() { return std::make_tuple(_sa, _rank, _lcp); }
 template<typename string_t>
  static suffix_array construct(const string_t &S) {
    int N = sz(S);
    suffix_array _sa(N);
    _sa.build_sa(S);
    _sa.build_rank();
    _sa.build_lcp(S);
    return _sa;
  }
 template<typename string_t, typename F>
  static suffix_array map_and_construct(const string_t &S, const F &f) {
    std::vector<decltype(f(S[0]))> mapped(sz(S));
    for (int i = 0; i < sz(S); i++) mapped[i] = f(S[i]);
    return construct(mapped);
  }
 template<typename string_t>
  static suffix_array compress_and_construct(const string_t &S) {
    using std::begin; using std::end;
    std::vector<decltype(S[0])> vals(begin(S), end(S));
    std::sort(vals.begin(), vals.end());
    vals.resize(unique(vals.begin(), vals.end()) - vals.begin());
    std::vector<decltype(S[0])> compressed_s(sz(S));
    for (int i = 0; i < sz(S); i++)
      compressed_s[i] = lower_bound(vals.begin(), vals.end(), S[i]) - vals.begin();
    return construct(compressed_s);
  }
  static suffix_array construct_lower_alpha(const std::string &s) {
    return suffix_array::map_and_construct(s, [](char c) -> char { return char(c - 'a'); });
  }
  static suffix_array construct_upper_alpha(const std::string &s) {
    return suffix_array::map_and_construct(s, [](char c) -> char { return char(c - 'A'); });
  }
 private:
  explicit suffix_array(int N_) : N(N_) { }
 template<typename string_t>
  void build_sa(const string_t &S) {
    _sa = std::vector<index_t>(N + 1);
    for (auto s : S) assert(index_t(s) >= 0);
    int sigma = N ? *max_element(S.begin(), S.end()) + 1 : 0;
    std::vector<index_t> tmp(std::max(N, sigma * 2));
    suffix_array::sais<string_t>(N, S, _sa.data(), sigma, tmp.data());
    _sa.erase(_sa.begin());
  }
 template<typename string_t>
  static void sais(int N, const string_t &S, index_t *_sa, int sigma, index_t *tmp) {
    if (N == 0) return void(_sa[0] = 0);
    else if (N == 1) return _sa[0] = 1, _sa[1] = 0, void();
    index_t *freq = tmp; tmp += sigma;
    memset(freq, 0, sizeof(*freq) * sigma);
    for (int i = 0; i < N; i++) ++freq[index_t(S[i])];
    auto build_bucket_start = [&]() {
      for (int v = 0, cur = 1; v < sigma; v++) tmp[v] = cur, cur += freq[v];
    };
    auto build_bucket_end = [&]() {
      for (int v = 0, cur = 1; v < sigma; v++) cur += freq[v], tmp[v] = cur;
    };
    int num_pieces = 0, first_endpoint = 0;
    build_bucket_end(); _sa[0] = N;
    index_t c0 = S[N - 1], c1 = -1;
    bool isS = false;
    for (int i = N - 2; i >= 0; i--) {
      c1 = c0; c0 = S[i];
      if (c0 < c1) isS = true;
      else if (c0 > c1 && isS)
        isS = false, _sa[first_endpoint = --tmp[c1]] = i + 1, ++num_pieces;
    }
    if (num_pieces > 1) {
      _sa[first_endpoint] = 0;
      build_bucket_start();
      for (int z = 0; z <= N; z++) {
        int v = _sa[z];
        if (v <= 0) continue;
        _sa[z] = 0; --v;
        index_t c0 = S[v - 1], c1 = S[v];
        _sa[tmp[c1]++] = (c0 < c1) ? ~v : v;
      }
      index_t *const sa_end = _sa + N + 1;
      index_t *pieces = sa_end;
      build_bucket_end();
      for (int z = N; z >= 0; z--) {
        int v = _sa[z];
        if (!v) continue;
        _sa[z] = 0;
        if (v > 0) { *--pieces = v; continue; }
        v = ~v; --v;
        index_t c0 = S[v - 1], c1 = S[v];
        _sa[--tmp[c1]] = (c0 > c1) ? v : ~v;
      }
      int prv_start = N;
      index_t c0 = S[N - 1], c1 = -1;
      bool isS = false;
      for (int i = N - 2; i >= 0; i--) {
        c1 = c0; c0 = S[i];
        if (c0 < c1) isS = true;
        else if (c0 > c1 && isS) {
          isS = false;
          int v = i + 1;
          _sa[v >> 1] = prv_start == N ? 0 : prv_start - v;
          prv_start = v;
        }
      }
      int next_sigma = 0;
      int prv_len = -1, prv_v = 0;
      for (int i = 0; i < num_pieces; i++) {
        int v = pieces[i];
        int len = _sa[v >> 1];
        bool eq = prv_len == len;
        for (int a = 0; eq && a < len; ++a) eq = S[v + a] == S[prv_v + a];
        if (!eq) next_sigma++, prv_len = len, prv_v = v;
        _sa[v >> 1] = next_sigma;
      }
      if (next_sigma == num_pieces) {
        _sa[0] = N;
        memcpy(_sa + 1, pieces, sizeof(*_sa) * num_pieces);
      } else {
        index_t *next_S = sa_end;
        for (int i = (N - 1) >> 1; i >= 0; i--) {
          int v = _sa[i];
          if (v) *--next_S = v - 1;
          _sa[i] = 0;
        }
        memset(_sa, 0, sizeof(*_sa) * (num_pieces + 1));
        sais<const index_t *>(num_pieces, next_S, _sa, next_sigma, tmp);
        next_S = sa_end;
        index_t c0 = S[N - 1], c1 = -1;
        bool isS = false;
        for (int i = N - 2; i >= 0; i--) {
          c1 = c0; c0 = S[i];
          if (c0 < c1) isS = true;
          else if (c0 > c1 && isS)
            isS = false, *--next_S = i + 1;
        }
        _sa[0] = N;
        for (int i = 1; i <= num_pieces; i++)
          _sa[i] = next_S[_sa[i]];
      }
      memset(_sa + num_pieces + 1, 0, sizeof(*_sa) * (N - num_pieces));
      build_bucket_end();
      for (int i = num_pieces; i > 0; i--) {
        int v = _sa[i];
        _sa[i] = 0;
        index_t c1 = S[v];
        _sa[--tmp[c1]] = v;
      }
    }
    build_bucket_start();
    for (int z = 0; z <= N; z++) {
      int v = _sa[z];
      if (v <= 0) continue;
      index_t c1 = S[--v];
      index_t c0 = v ? S[v - 1] : c1;
      _sa[tmp[c1]++] = (c0 < c1) ? ~v : v;
    }
    build_bucket_end();
    for (int z = N; z >= 0; z--) {
      int v = _sa[z];
      if (v >= 0) continue;
      _sa[z] = v = ~v;
      index_t c1 = S[--v];
      index_t c0 = v ? S[v - 1] : c1 + 1;
      _sa[--tmp[c1]] = (c0 > c1) ? v : ~v;
    }
  }
  void build_rank() {
    _rank = std::vector<index_t>(N);
    for (int i = 0; i < N; i++) _rank[_sa[i]] = i;
  }
 template<typename string_t>
  void build_lcp(const string_t &S) {
    assert(sz(S) == N);
    _lcp = std::vector<index_t>(N);
    for (int i = 0, j = 0; i < N; ++i) {
      if (_rank[i]) {
        if (j) --j;
        for (; S[_sa[_rank[i]] + j] == S[_sa[_rank[i] - 1] + j]; ++j);
        _lcp[_rank[i]] = j;
      }
    }
  }
 private:
  int N;
  std::vector<index_t> _sa;
  std::vector<index_t> _rank;
  std::vector<index_t> _lcp;
};
vector<int> sa, rnk, lcp;
int S[LOG][Maxn], lg2[Maxn], S2[LOG][Maxn];
int ask(int l, int r) {
  int k = lg2[r - l + 1];
  return min(S[k][l], S[k][r - (1 << k) + 1]);
} // ask
int bit[Maxn];
inline void upd(int x, int y) {
  for (++x; x <= n; x += x & -x) bit[x] += y;
} // upd
inline int qry(int x) {
  int r = 0;
  for (++x; x; x -= x & -x) r += bit[x];
  return r;
} // qry
inline int get_rank(int k) {
  int x = 0, i;
  for (i = LOG - 1, ++k; i >= 0; --i)
    if (x + (1 << i) <= n && k > bit[x + (1 << i)])
      x += (1 << i), k -= bit[x];
  return x;
} // get_rank

int main (void) {

  static char ss[Maxn];
  string s;
  scanf("%s", ss); s = ss;
  n = s.length();
  tie(sa, rnk, lcp) = suffix_array::construct(s).to();
  scanf("%d", &Q);
  for (int64_t i = 0, k; i < Q; ++i)
    scanf("%lld", &k), q[i] = query(k, i), q[i].id = i;
  sort(q, q + Q);
  static vector<int> hlcp[Maxn];
  for (int i = 0; i < n; ++i)
    hlcp[lcp[i]].push_back(i);
  lg2[0] = -1;
  for (int i = 1; i <= n; ++i) lg2[i] = lg2[i >> 1] + 1;
  memset(S, 0x3f, sizeof(S));
  for (int i = 0; i < n; ++i) S[0][i] = sa[i];
  for (int j = 1; j < LOG; ++j)
    for (int i = 0; i + (1 << j) <= n; ++i)
      S[j][i] = min(S[j - 1][i], S[j - 1][i + (1 << j - 1)]);
  int j = 0;
  int64_t now = 0;
  for (int i = 0; i < n; ++i) {
    for (const int &x: hlcp[i]) if (n - sa[x] >= i) upd(x, 1);
    if (i > 0 && lcp[rnk[n - i]] <= i) upd(rnk[n - i], -1);
    int sz = qry(n - 1);
    int64_t ns = sz + now;
    for (; j < Q && q[j].k <= ns; ++j) {
      int k = get_rank(q[j].k - now - 1);
      int r = (q[j].k - now == sz ? n - 1 : get_rank(q[j].k - now) - 1);
      k = ask(k, r);
      ans[q[j].id] = {k, k + i};
    }
    now = ns;
  }
  while (j < Q) ans[q[j].id] = {-2, -2}, ++j;
  for (int i = 0; i < Q; ++i)
    printf("%d %d\n", ans[i].first + 1, ans[i].second + 1);

  exit(EXIT_SUCCESS);
} // main

posted @   cutx64  阅读(249)  评论(0编辑  收藏  举报
编辑推荐:
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
阅读排行:
· 地球OL攻略 —— 某应届生求职总结
· 周边上新:园子的第一款马克杯温暖上架
· Open-Sora 2.0 重磅开源!
· 提示词工程——AI应用必不可少的技术
· .NET周刊【3月第1期 2025-03-02】
点击右上角即可分享
微信分享提示