串
串的定义
串(string)是由零个或多个字符组成的有限序列,又名叫字符串。
串的逻辑结构和线性表很相似,都是序列,所以相邻元素之间具有前驱和后继关系。不同之处在于串针对的是字符集,串中的元素都是字符。对于串的基本操作更多的是查找子串位置、得到指定位置的子串等操作,而线性表更关注的是单个元素的操作。
串的存储结构
串的顺序存储结构
串的顺序存储结构是用一组地址连续的存储单元来存储串中的字符序列的,按照预定义的大小,为每个定义的串变量分配一个固定长度的存储区。
结构代码
#define MAXSIZE 40 typedef char String[MAXSIZE + 1]; //0号单元存放串的长度
顺序结构的代码实现
#include <iostream> #include <string> using namespace std; #define OK 1 #define ERROR 0 #define TRUE 1 #define FALSE 0 typedef int Status; typedef int ElemType; #define MAXSIZE 40 typedef char String[MAXSIZE + 1]; //0号单元存放串的长度 //生成一个其值等于chars的串T Status StrAssign(String T, const char* chars) { if (strlen(chars) > MAXSIZE) return ERROR; T[0] = strlen(chars); for (int i = 1; i <= T[0]; i++) T[i] = *(chars + i - 1); return OK; } //由串S复制得串T Status StrCopy(String T, String S) { for (int i = 0; i <= S[0]; i++) T[i] = S[i]; return OK; } //若S为空串,则返回TRUE,否则返回FALSE Status StrEmpty(String S) { if (S[0] == 0) return TRUE; else return FALSE; } //若S>T,则返回值>0;若S=T,则返回值=0;若S<T,则返回值<0 int StrCompare(String S, String T) { for (int i = 1; i <= S[0] && i < T[0]; i++) if (S[i] != T[i]) return S[i] - T[i]; return S[0] - T[0]; } //返回串的元素个数 int StrLength(String S) { return S[0]; } //将S清为空串 Status ClearString(String S) { S[0] = 0; return OK; } //用T返回S1和S2联接而成的新串。若未截断,则返回TRUE,否则FALSE Status Concat(String T, String S1, String S2) { if (S1[0] + S2[0] <= MAXSIZE) { //未截断 T[0] = S1[0] + S2[0]; for (int i = 1; i <= S1[0]; i++) T[i] = S1[i]; for (int i = 1; i <= S2[0]; i++) T[S1[0] + i] = S2[i]; return TRUE; } else { //截断 T[0] = MAXSIZE; for (int i = 1; i <= S1[0]; i++) T[i] = S1[i]; for (int i = 1; i <= MAXSIZE - S1[0]; i++) T[S1[0] + i] = S2[i]; return FALSE; } } //用Sub返回串S的第pos个字符起长度为len的子串 Status SubString(String Sub, String S, int pos, int len) { if (pos<1 || pos>S[0] || len<0 || len>S[0] - pos + 1) return ERROR; for (int i = 1; i <= len; i++) Sub[i] = S[pos + i - 1]; Sub[0] = len; } //返回子串T在主串S中第pos个字符之后的位置。若不存在,返回0 int Index(String S, String T, int pos) { int i = pos; int j = 1; while (i <= S[0] && j <= T[0]) { if (S[i] == T[j]) { ++i; ++j; } else { i = i - j + 2; j = 1; } } if (j > T[0]) return i - T[0]; else return 0; } //返回子串T在主串S中第pos个字符之后的位置。若不存在,返回0 int Index2(String S, String T, int pos) { int n, m, i; String sub; if (pos > 0) { n = StrLength(S); m = StrLength(T); i = pos; while (i <= n - m + 1) { SubString(sub, S, i, m); if (StrCompare(sub, T) != 0) ++i; else return i; } } return 0; } //在串S的第pos个字符之前插入串T。完全插入返回TRUE,部分插入返回FALSE Status StrInsert(String S, int pos, String T) { if (pos<0 || pos>S[0] + 1) return ERROR; if (S[0] + T[0] <= MAXSIZE) { //完全插入 for (int i = S[0]; i >= pos; i--) S[T[0] + i] = S[i]; for (int i = pos; i < pos + T[0]; i++) S[i] = T[i - pos + 1]; S[0] = T[0] + S[0]; return TRUE; } else { //部分插入 for (int i = MAXSIZE; i >= pos; i--) S[i] = S[i - T[0]]; for (int i = pos; i < pos + T[0]; i++) S[i] = T[i - pos + 1]; S[0] = MAXSIZE; return FALSE; } } //从串S中删除第pos个字符起长度为len的子串 Status StrDelete(String S, int pos, int len) { if (pos<1 || pos>S[0] - len + 1 || len < 0) return ERROR; for (int i = pos + len; i <= S[0]; i++) S[i - len] = S[i]; S[0] -= len; return OK; } //用V替换主串S中出现的所有与T相等的不重叠的子串 //此函数与串的存储结构无关 Status Replace(String S, String T, String V) { if (StrEmpty(T)) return ERROR; int i = 1; while (i) { i = Index(S, T, i); if (i) { StrDelete(S, i, StrLength(T)); StrInsert(S, i, V); i += StrLength(V); } } return OK; } //输出字符串T void StrPrint(String T) { for (int i = 1; i <= T[0]; i++) cout << T[i] << " "; cout << endl; } int main() { int i, j; Status k; char s; String t, s1, s2; cout << "请输入串s1: " << endl; k = StrAssign(s1, "abcd"); if (!k) { cout << "串长超过MAXSIZE = " << MAXSIZE << endl; exit(0); } cout << "串长为" << StrLength(s1) << "串空否?(1:是 0:否) " << StrEmpty(s1) << endl; StrCopy(s2, s1); cout << "拷贝s1生成的串为: "; StrPrint(s2); cout << "请输入串s2: " << endl; k = StrAssign(s2, "efghijk"); if (!k) { cout << "串长超过" << MAXSIZE << endl; exit(0); } i = StrCompare(s1, s2); if (i < 0) s = '<'; else if (i == 0) s = '='; else s = '>'; cout << "串s1 " << s << " 串s2" << endl; k = Concat(t, s1, s2); cout << "串s1联接串s2得到的串t为: "; StrPrint(t); if (k == FALSE) cout << "串t有截断" << endl; ClearString(s1); cout << "清为空串后,串s1为: "; StrPrint(s1); cout << "串长为" << StrLength(s1) << " 串空否?(1:是 0 : 否) " << StrEmpty(s1) << endl; cout << "求串t的子串,请输入子串的起始位置,子串长度: "; i = 2; j = 3; cout << i << " " << j << endl; k = SubString(s2, t, i, j); if (k) { cout<<"子串s2为: "; StrPrint(s2); } cout<<"从串t的第pos个字符起,删除len个字符,请输入pos, len: "; i = 4; j = 2; cout << i << " " << j << endl; StrDelete(t, i, j); cout<<"删除后的串t为: "; StrPrint(t); i = StrLength(s2) / 2; StrInsert(s2, i, t); cout << "在串s2的第" << i << "个字符之前插入串t后, 串s2为: "; StrPrint(s2); i = Index(s2, t, 1); cout << "s2的第" << i << "字母起和t第一次匹配" << endl; SubString(t, s2, 1, 1); cout << "串t为:"; StrPrint(t); Concat(s1, t, t); cout << "串s1为:"; StrPrint(s1); Replace(s2, t, s1); cout << "用串s1取代串s2中和串t相同的不重叠的串后,串s2为: "; StrPrint(s2); return 0; }
动态分配内存的字符串:(串值的存储空间也可在程序执行过程中动态分配获得)
串的链式存储结构
与线性表相似,但是考虑到每个结点存放一个字符会造成很大的空间浪费,因此可以考虑一个结点存放多个字符,最后一个结点如果未被占满,可以用“#”或其他非串值字符补全。
链式存储代码实现
(待补)
模式匹配算法
模式匹配:子串的定位操作。
具体的方法有:朴素的模式匹配算法、KMP模式匹配算法和KMP模式匹配算法改进。(具体理论参考《大话数据结构》相关部分)
模式匹配代码实现
#include <iostream> #include <string> using namespace std; #define OK 1 #define ERROR 0 #define TRUE 1 #define FALSE 0 typedef int Status; typedef int ElemType; #define MAXSIZE 100 typedef char String[MAXSIZE + 1]; //0号单元存放串的长度 //生成一个其值等于chars的串T Status StrAssign(String T, const char* chars) { if (strlen(chars) > MAXSIZE) return ERROR; T[0] = strlen(chars); for (int i = 1; i <= T[0]; i++) T[i] = *(chars + i - 1); return OK; } //将S清为空串 Status ClearString(String S) { S[0] = 0; return OK; } //输出字符串T void StrPrint(String T) { for (int i = 1; i <= T[0]; i++) cout << T[i] << " "; cout << endl; } //输出Next数组值 void NextPrint(int next[], int length) { for (int i = 1; i <= length; i++) cout << next[i] << " "; cout << endl; } //返回串的元素个数 int StrLength(String S) { return S[0]; } //朴素的模式匹配法 int Index(String S, String T, int pos) { int n = 0; int i = pos; int j = 1; while (i <= S[0] && j <= T[0]) { if (S[i] == T[j]) { ++i; ++j; } else { i = i - j + 2; j = 1; } cout << "n = " << ++n << " i = " << i << " j = " << j << endl; } if (j > T[0]) return i - T[0]; else return 0; } //通过计算返回子串T的next数组 void get_next(String T, int* next) { int i, k; i = 1; k = 0; next[1] = 0; while (i < T[0]) { if (k == 0 || T[i] == T[k]) { ++i; ++k; next[i] = k; } else k = next[k]; // 若字符不相同,则k值回溯 } } //KMP模式匹配算法 int Index_KMP(String S, String T, int pos) { int n = 0; int i = pos; int j = 1; int next[255]; get_next(T, next); while (i <= S[0] && j <= T[0]) { if (j == 0 || S[i] == T[j]) // 两字母相等则继续,与朴素算法增加了j=0判断 { ++i; ++j; } else j = next[j]; // j退回合适的位置,i值不变 cout << "n = " << ++n << " i = " << i << " j = " << j << endl; } if (j > T[0]) return i - T[0]; else return 0; } //模式串T的next函数修正值并存入数组nextval void get_nextval(String T, int* nextval) { int i, k; i = 1; k = 0; nextval[1] = 0; while (i < T[0]) { if (k == 0 || T[i] == T[k]) { ++i; ++k; if (T[i] != T[k]) // 若当前字符与前缀字符不同 nextval[i] = k; // 则当前的j为nextval在i位置的值 else nextval[i] = nextval[k]; // 如果与前缀字符相同,则将前缀字符的 // nextval值赋值给nextval在i位置的值 } else k = nextval[k]; // 若字符不相同,则k值回溯 } } int Index_KMP1(String S, String T, int pos) { int n = 0; int i = pos; int j = 1; int next[255]; get_nextval(T, next); while (i <= S[0] && j <= T[0]) { if (j == 0 || S[i] == T[j]) { ++i; ++j; } else j = next[j]; cout << "n = " << ++n << " i = " << i << " j = " << j << endl; } if (j > T[0]) return i - T[0]; else return 0; } int main() { int i, * p; String s1, s2; StrAssign(s1, "000000000200000000020000000002000000000200000000020000000001"); cout << "主串为: "; StrPrint(s1); StrAssign(s2, "0000000001"); cout << "子串为: "; StrPrint(s2); cout << endl; cout << "主串和子串在第" << Index(s1, s2, 1) << "个字符处首次匹配(朴素模式匹配算法)"<< endl; cout << "主串和子串在第" << Index_KMP(s1, s2, 1) << "个字符处首次匹配(KMP算法)"<< endl; cout << "主串和子串在第" << Index_KMP1(s1, s2, 1) <<"个字符处首次匹配(KMP改良算法)"<< endl; return 0; }