poj_1204 Trie图
题目大意
给出一个RxC的字符组成的puzzle,中间可以从左向右,从右到左,从上到下,从下到上,从左上到右下,从右下到左上,从左下到右上,从右上到左下,八个方向进行查找字符串。
给出M个字符串,找出他们在puzzle中的位置,返回该字符串在puzzle中的起点横纵坐标以及方向。
字符串长度L <=1000, R,C <= 1000, W <= 1000
题目分析
多模式串的字符串匹配问题,考虑使用Trie图。将M个待查的字符串作为模式串插入Trie图中,然后设置前缀指针,构造DFA。
查找的时候,在puzzle的四个边上每个点,沿8个方向分别确定最长的串作为母串,用母串在Trie图上进行行走,进行匹配。
题目比较坑的是,special judge没能对有些正确的结果给AC,只能按照“正常”的顺序来查找。
实现(c++)
#define _CRT_SECURE_NO_WARNINGS #include<stdio.h> #include<string.h> #include<vector> #include<deque> using namespace std; #define MAX_SIZE 1005 #define LETTERS 26 #define MAX_NODES 150000 #define MIN(a, b) a < b? a :b char gPizza[MAX_SIZE][MAX_SIZE]; struct PosInfo{ int row; int col; int dir; void SetInfo(int r, int c, int d){ row = r; col = c; dir = d; } }; PosInfo gPosInfo[MAX_SIZE]; struct Node{ Node* childs[LETTERS]; bool danger_node; Node* prev; int pattern_index; Node(){ memset(childs, 0, sizeof(childs)); prev = NULL; danger_node = false; pattern_index = 0; //可以指示某个终止节点确定的字符串是第几个pattern } }; Node gNodes[MAX_NODES]; int gNodeCount = 2; void Insert(Node* root, char* str, int pattern_index){ Node* node = root; char*p = str; while (*p != '\0'){ int index = *p - 'A'; if (! node->childs[index]){ node->childs[index] = gNodes + gNodeCount++; } node = node->childs[index]; p++; } node->danger_node = true; node->pattern_index = pattern_index; } void BuildDfa(){ Node* root = gNodes + 1; for (int i = 0; i < LETTERS; i++){ gNodes[0].childs[i] = root; } root->prev = gNodes; gNodes[0].prev = NULL; deque<Node*> Q; Q.push_back(root); while (!Q.empty()){ Node* node = Q.front(); Node* prev = node->prev, *p; Q.pop_front(); for (int i = 0; i < LETTERS; i++){ if (node->childs[i]){ p = prev; while (p && !p->childs[i]){ p = p->prev; } node->childs[i]->prev = p->childs[i]; //这个地方注意,不能写成 p->childs[i]->danger_node = node->childs[i]->danger_node if (p->childs[i]->danger_node) node->childs[i]->danger_node = true; Q.push_back(node->childs[i]); } } } } bool gPatterFind[MAX_SIZE]; int gPatternFoundNum = 0; int gMinPatternLen = 0; int gPatternLen[MAX_SIZE]; int gMoveStep[8][2] = { { -1, 0 }, { -1, 1 }, { 0, 1 }, { 1, 1 }, { 1, 0 }, { 1, -1 }, {0, -1 }, { -1, -1 } }; //在Trie图上达到一个“危险”节点,则该节点的各个前缀指针,仍然可能为“终止”节点,沿前缀指针找出所有的终止节点,以防止遗漏 //比如 ABCDFF 中查找 ABCD CD 若到达D,确定为一个危险节点,可以找到ABCD,若不沿着前缀指针找出所有的终止节点,则会遗漏CD void FindPatternFromEndPoint(Node* node, int r, int c, int dir){ do{ if (node->pattern_index == 0){ node = node->prev; continue; } int pattern_index = node->pattern_index; if (gPatterFind[pattern_index]){ //此时找到的串,有可能是别的串的前缀,因此继续向后找 node = node->prev; continue; } gPatterFind[pattern_index] = true; gPatternFoundNum++; int beg_r = r - gPatternLen[pattern_index] * gMoveStep[dir][0]; int beg_c = c - gPatternLen[pattern_index] * gMoveStep[dir][1]; if (gMoveStep[dir][0] == 0) beg_r--; if (gMoveStep[dir][1] == 0) beg_c--; if (dir == 1 || dir == 7 || dir == 0){ beg_r -= 2; } if (dir == 5 || dir == 7 || dir == 6){ beg_c -= 2; } gPosInfo[pattern_index].SetInfo(beg_r, beg_c, dir); node = node->prev; } while (node); } //从某个边界点出发,沿某个方向的最长字符串作为母串,在Trie图上进行查找 void SearchStr(int start_x, int start_y, int dir){ int r = start_x, c = start_y; Node* node = gNodes + 1; while (gPizza[r][c] != '\0'){ int index = gPizza[r][c] - 'A'; while (node && node->childs[index] == NULL){ node = node->prev; } node = node->childs[index]; if (node->danger_node){ FindPatternFromEndPoint(node, r, c, dir); } r += gMoveStep[dir][0]; c += gMoveStep[dir][1]; } } //确定在边界上的某个点,沿某个方向所构成最长字符串的长度 int MaxLen(int R, int C, int r, int c, int dir){ if (dir == 0 || dir == 4) return R; if (dir == 2 || dir == 6) return C; if (dir == 1){ if (c == 1) return r; else if (r == R) return C - c + 1; } if (dir == 5){ if (r == 1) return c; else if (c == C) return R - r + 1; } if (dir == 3){ if (r == 1) return C - c + 1; if (c == 1) return R - r + 1; } if (dir == 7){ if (r == R) return c; if (c == C) return r; } return -1; } //对边界上的每个点,在8个方向进行查找 void SearchPuzzle(int R, int C, int total_word_to_find){ for (int r = 1; r <= R; r++){ for (int dir = 0; dir < 8; dir++){ if (gPatternFoundNum == total_word_to_find){ return; } if (MaxLen(R, C, r, 1, dir) >= gMinPatternLen){ SearchStr(r, 1, dir); } } } for (int r = 1; r <= R; r++){ for (int dir = 0; dir < 8; dir++){ if (gPatternFoundNum == total_word_to_find){ return; } if (MaxLen(R, C, r, C, dir) >= gMinPatternLen){ SearchStr(r, C, dir); } } } for (int c = 1; c <= C; c++){ for (int dir = 0; dir < 8; dir++){ if (gPatternFoundNum == total_word_to_find){ return; } if (MaxLen(R, C, 1, c, dir) >= gMinPatternLen){ SearchStr(1, c, dir); } } } for (int c = 1; c <= C; c++){ for (int dir = 0; dir < 8; dir++){ if (gPatternFoundNum == total_word_to_find){ return; } if (MaxLen(R, C, R, c, dir) >= gMinPatternLen){ SearchStr(R, c, dir); } } } } int main(){ int R, C, M; scanf("%d %d %d", &R, &C, &M); memset(gPizza, 0, sizeof(gPizza)); memset(gPatterFind, false, sizeof(gPatterFind)); gNodeCount = 2; for (int r = 1; r <= R; r++){ getchar(); for (int c = 1; c <= C; c++){ scanf("%c", &gPizza[r][c]); } } getchar(); char str[MAX_SIZE]; Node* root = gNodes + 1; for (int i = 1; i <= M; i++){ scanf("%s", str); Insert(root, str, i); gPatternLen[i] = strlen(str); gMinPatternLen = MIN(gMinPatternLen, gPatternLen[i]); } BuildDfa(); SearchPuzzle(R, C, M); for (int i = 1; i <= M; i++){ printf("%d %d %c\n", gPosInfo[i].row, gPosInfo[i].col, gPosInfo[i].dir + 'A'); } return 0; }