线性求 $i^i$ 的做法

线性求 \(i^i\) 的做法

方便起见,我们记 \(f_i=i^i\)\(i\) 的最小质因子为 \(p=\mathrm{minp}(i)\),第 \(i\) 个质数为 \(\mathrm{pr}_i\)

对于质数 \(p\) 用快速幂计算,这里复杂度 \(\mathcal O(\frac{n}{\ln n}\log n)\)

对于合数 \(i=pq\)\(f_i=(pq)^{pq}=f_p^qf_q^p\),由于 \(p\le \sqrt{n}\),因此我们可以 BSGS 预处理 \(f_p^{1\cdots B}\) 以及 \((f_p^B)^{1\cdots B}\)

考虑如何快速计算后半部分。回顾线性筛的流程,\(i\) 是在外层枚举到 \(q\),内层枚举到 \(p\) 时计算,因此对于 \(q\) 而言,它计算的东西依次为 \(f_q^{\mathrm{pr}_1,\mathrm{pr}_2,\cdots}\),指数增量是 prime gap,即 \(\mathcal O(\ln n)\),因此可以预处理出 \(f_q^{1,2,\cdots,\ln \frac{n}{q}}\),这里复杂度 \(\mathcal O(\frac{n/q}{\ln (n/q)}+\ln \frac{n}{q})\),累加起来 \(\mathcal O(n)\)(实测中,由于枚举到 \(p=\mathrm{minp}(q)\) 就会 break,所以常数极小)。

上面这个 \(1,2,\cdots,\ln \frac{n}{q}\) 也可以用 BSGS 优化到 \(\sqrt{\ln \frac{n}{q}}\),不过没啥影响,说不定还跑不过直接暴力。​

时间复杂度 \(\mathcal O(n)\)


下面是一些实验性代码:

\(\mathcal O(n)\) 的实现

const int N = 100000005;
const int SN = ((int)sqrt(N) + 5);
const int mod = 998244353;

int qpow(int a, int b) {
    int res = 1;
    while (b > 0) {
        if (b & 1) res = 1ull * res * a % mod;
        a = 1ull * a * a % mod, b >>= 1;
    }
    return res;
}

int bsgs1[SN][SN], bsgs2[SN][SN];
bool vis[N];
int f[N], pr[N / 10], len;
int powers[250], S;

void sieve(int n) {
    f[1] = 1;
    const int B = sqrt(n);
    for (int i = 2; i <= n; i++) {
        if (!vis[i]) {
            pr[++len] = i;
            f[i] = qpow(i, i);
            if (i <= B) {
                bsgs1[i][0] = 1;
                for (int j = 1; j <= B; j++)
                    bsgs1[i][j] = 1ull * bsgs1[i][j - 1] * f[i] % mod;
                bsgs2[i][0] = 1;
                for (int j = 1; j <= B; j++)
                    bsgs2[i][j] = 1ull * bsgs2[i][j - 1] * bsgs1[i][B] % mod;
            }
        }
        powers[0] = 1;
        int cur = 1, gap = 0;
        for (int j = 1; j <= len && i * pr[j] <= n; j++) {
            vis[pr[j] * i] = 1;
            int num = i * pr[j], now = pr[j] - pr[j - 1];
            if (now > gap) {
                S++;
                for (int ex = gap + 1; ex <= now; ex++)
                    powers[ex] = 1ull * powers[ex - 1] * f[i] % mod;
                gap = now;
            }
            cur = 1ull * cur * powers[now] % mod;
            f[num] = 1ull * bsgs1[pr[j]][i % B] * bsgs2[pr[j]][i / B] % mod * cur % mod;
            if (i % pr[j] == 0) break;
        }
    }
    fprintf(stderr, "S = %d\n", S);
    fprintf(stderr, "time used = %.10f\n", (clock()) / 1. / CLOCKS_PER_SEC);
}

实验数据:

  • \(n=10^7\)\(\text{0.188s}\)
  • \(n=10^8\)\(\text{1.816s}\)

\(\mathcal O(n \log \ln n)\) 的实现(即 prime gap 每次暴力快速幂计算):

const int N = 100000005;
const int SN = ((int)sqrt(N) + 5);
const int mod = 998244353;

int qpow(int a, int b) {
    int res = 1;
    while (b > 0) {
        if (b & 1) res = 1ull * res * a % mod;
        a = 1ull * a * a % mod, b >>= 1;
    }
    return res;
}

int bsgs1[SN][SN], bsgs2[SN][SN];
bool vis[N];
int f[N], pr[N / 10], len;

void sieve(int n) {
    f[1] = 1;
    const int B = sqrt(n);
    for (int i = 2; i <= n; i++) {
        if (!vis[i]) {
            pr[++len] = i;
            f[i] = qpow(i, i);
            if (i <= B) {
                bsgs1[i][0] = 1;
                for (int j = 1; j <= B; j++)
                    bsgs1[i][j] = 1ull * bsgs1[i][j - 1] * f[i] % mod;
                bsgs2[i][0] = 1;
                for (int j = 1; j <= B; j++)
                    bsgs2[i][j] = 1ull * bsgs2[i][j - 1] * bsgs1[i][B] % mod;
            }
        }
        int cur = 1;
        for (int j = 1; j <= len && i * pr[j] <= n; j++) {
            vis[pr[j] * i] = 1;
            int num = i * pr[j];
            cur = 1ull * cur * qpow(f[i], pr[j] - pr[j - 1]) % mod;
            f[num] = 1ull * bsgs1[pr[j]][i % B] * bsgs2[pr[j]][i / B] % mod * cur % mod;
            if (i % pr[j] == 0) break;
        }
    }
    fprintf(stderr, "time used = %.10f\n", (clock()) / 1. / CLOCKS_PER_SEC);
}

实验数据:

  • \(n=10^7\)\(\text{0.206s}\)
  • \(n=10^8\)\(\text{2.094s}\)

\(\mathcal O(n\log n)\) 的实现(每次暴力计算):

const int N = 100000005;
const int mod = 998244353;

int qpow(int a, int b) {
    int res = 1;
    while (b > 0) {
        if (b & 1) res = 1ull * res * a % mod;
        a = 1ull * a * a % mod, b >>= 1;
    }
    return res;
}

int f[N];

void sieve(int n) {
    f[1] = 1;
    for (int i = 2; i <= n; i++) f[i] = qpow(i, i);
    fprintf(stderr, "time used = %.10f\n", (clock()) / 1. / CLOCKS_PER_SEC);
}

实验数据:

  • \(n=10^7\)\(\text{0.801s}\)
  • \(n=10^8\)\(\text{8.547s}\)
posted @ 2022-04-05 21:32  wlzhouzhuan  阅读(951)  评论(2编辑  收藏  举报