title: DNA Sequence tags:
It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G, and the length of sequences is a given integer $n$.
给定$m$个长度不超过$10$的 DNA 片段,问所有长度为$n$的 DNA 序列中有几个不包含所有给定的 DNA 片段。
数据范围:$0 \le m \le 10, \; 1 \le n \le 2 \times 10^9$。
对$m$个 DNA 片段建立 AC 自动机。令$f_{i, j}$表示长度为$i$的 DNA 序列在自动机上跑到节点$j$的方案数。转移可以用一个矩阵来表示,因此可以用矩阵快速幂来加速转移。
#include <cstdio>
#include <cstring>
static const int N = 11;
static const int POOL = 105;
char s[N];
int get_id(char c) {
return c == 'A' ? 0 : c == 'C' ? 1 : c == 'T' ? 2 : 3;
}
typedef int Array[POOL][POOL];
int numn = 1, que[POOL];
Array mp, a, tmp;
struct ACAutomaton {
int nxt[4], fail;
bool match;
} node[POOL];
void insert(char s[]) {
int root = 1, len = strlen(s);
for (int i = 0; i < len; ++ i) {
int p = get_id(s[i]);
if (! node[root].nxt[p]) node[root].nxt[p] = ++ numn;
root = node[root].nxt[p];
}
node[root].match = true;
}
void build() {
int qb = 0, qe = 1; que[0] = 1;
while (qb < qe) {
int u = que[qb ++];
for (int i = 0; i < 4; ++ i)
if (node[u].nxt[i]) {
node[node[u].nxt[i]].fail = 1;
if (u > 1) {
int root = node[u].fail;
while (root > 1 && ! node[root].nxt[i]) root = node[root].fail;
if (node[root].nxt[i]) {
node[node[u].nxt[i]].fail = node[root].nxt[i];
if (node[node[root].nxt[i]].match) node[node[u].nxt[i]].match = true;
}
}
que[qe ++] = node[u].nxt[i];
}
}
}
void mul(Array a, Array b) {
memset(tmp, 0, sizeof tmp);
for (int i = 1; i <= numn; ++ i)
for (int j = 1; j <= numn; ++ j)
if (a[i][j]) for (int k = 1; k <= numn; ++ k) (tmp[i][k] += 1ll * a[i][j] * b[j][k] % 100000) %= 100000;
memcpy(a, tmp, sizeof tmp);
}
void power(Array a, int b) {
while (b) {
if (b & 1) mul(a, mp);
mul(mp, mp), b >>= 1;
}
}
int main() {
int m, n; scanf("%d%d", &m, &n);
for (int i = 0; i < m; ++ i) scanf(" %s", s), insert(s);
build();
for (int i = 1; i <= numn; ++ i) if (! node[i].match)
for (int j = 0; j < 4; ++ j) {
int root = i;
while (root > 1 && ! node[root].nxt[j]) root = node[root].fail;
if (node[root].nxt[j]) ++ mp[i][node[root].nxt[j]]; else ++ mp[i][root];
}
for (int i = 1; i <= numn; ++ i) a[i][i] = 1;
power(a, n);
int ans = 0;
for (int i = 1; i <= numn; ++ i) if (! node[i].match) (ans += a[1][i]) %= 100000;
printf("%d\n", ans);
return 0;
}