首先对于\(n<10^5\)的范围,我们可以直接状压\(dp\).
设\(dp[i][s]\)表示\(dp\)到了第\(i\)位,最后\(m\)个状态的二进制压缩为\(s\)的方案数.
\(\therefore dp[i][s]=dp[i-1][\frac{s}{2}]+dp[i-1][(\frac{s}{2}+2^{m-1}]\).
但是这个题显然线性做法会超时.
于是使用套路:矩阵快速幂!
认真观察这个\(dp\)方程,我们可以发现对于任意一个合法状态的转移总是单一的,并且总是那个合法的\(bool\)矩阵.
于是我们可以对于中间状态\(T\)自乘\(n\)次,在对于每一个合法的初始状态,算出它的贡献,求和即可.
#pragma GCC optimize(3) #include<bits/stdc++.h> #define il inline #define rg register using namespace std; typedef long long ll; const int mod = 1e9 + 7, O = 55; ll n; int m, k, res, ans, sta[O]; struct Matrix { int a[O][O]; Matrix() { memset(a, 0, sizeof a); } il int * operator [] (int x) { return a[x]; } il Matrix operator * (Matrix rhs) const { Matrix c; for (int k = 0; k <= res; ++k) for (int i = 0; i <= res; ++i) for (int j = 0; j <= res; ++j) (c[i][j] += 1ll * a[i][k] * rhs[k][j] % mod) %= mod; return c; } }S, T, e; il Matrix qpow(Matrix b, ll x) { Matrix Res = e; while (x) { if (x & 1) Res = Res * b; b = b * b; x >>= 1; } return Res; } int main() { scanf("%lld%d%d", &n, &m, &k); for (int i = 0; i < O; ++i) e[i][i] = 1; res = (1 << m) - 1; for (int s = 0; s <= res; ++s) { int tot = 0, num = s; while (num) tot += num & 1, num >>= 1; if (tot > k) continue; sta[s] = 1; T[s >> 1][s] = 1; if (tot != k || s & 1) T[(s >> 1) + (1 << m - 1)][s] = 1; } T = qpow(T, n); for (int s = 0; s <= res; ++s) if (sta[s]) { S[0][s] = 1; S = S * T; (ans += S[0][s]) %= mod; memset(S.a, 0, sizeof S.a); } printf("%d\n", ans); return 0; }