寻找字符最大公共子串
*题目描述:
请编写一个函数,求n个字符串的最长公共子串,n<20,字符长度不超过255.
例如有三个字符串为:
what is local bus?
Name some local bus.
local bus is high speed I/O bus close to the processor.
则最长的公共子串为“local bus”。
*要求实现的函数:
char* findSameSubStr(const char* pIn[],int n)
【输入】pIn:输入的字符串
n :输入的字符串个数,即数组中元素个数
【返回】动态分配的最长公共子串
【注意】只需完成该函数功能算法,中间不需要任何IO输入输出
*示例
输入: pIn[0] "what is local bus?"
pIn[1] "Name some local bus."
pIn[2] "local bus is high speed I/O bus close to the processor."
n 是 3
返回: “local bus”
实现代码如下:
#include <stdlib.h> #include <string.h> #include <stdio.h> bool InStrArray(const char* pIn[],const char** PosArray,size_t n,const char* szSub,bool bMatch); bool InStr(const char* szMain,const char**Pos,const char* szSub,bool bMatch); char* FindSameSubStr(const char *pIn[],size_t n); typedef struct _StrNode { struct _StrNode * next; char* pdate; size_t size; } StrNode; class SameStrList { private: StrNode * Head; StrNode * CurNode; size_t size; size_t maxsize; public: SameStrList(); ~SameStrList(); void AddList(char* szStr); char * FindLargestStr(); size_t GetMaxSize(){return maxsize;} }; SameStrList::SameStrList() { Head = (StrNode*)malloc(sizeof(StrNode)); Head->next = NULL; Head->pdate = NULL; Head->size = 0; CurNode = Head; size = 0; maxsize = 0; } SameStrList::~SameStrList() { while( size >= 1) { StrNode * pNode = Head->next; Head->next = pNode->next; free(pNode->pdate); free(pNode); size --; } free(Head); } void SameStrList::AddList(char *szStr) { size_t NodeSize = strlen(szStr); //只有字符串大于链表中的最大字符串长度时,才插入 if( NodeSize > maxsize) { //复制字符串 char * NodeStr = (char* )malloc(NodeSize + 1); memset(NodeStr,0,NodeSize + 1); for( size_t i = 0 ; i < NodeSize ; i++ ) { *(NodeStr + i) = *(szStr + i); } //生成节点 StrNode* pNode = (StrNode*)malloc(sizeof(StrNode)); pNode->next = NULL; pNode->pdate = NodeStr; pNode->size = NodeSize; //修改链表 size ++; CurNode->next = pNode; CurNode = pNode; maxsize = NodeSize; } } //找到列表中的最长子串 char * SameStrList::FindLargestStr() { CurNode = Head; char * LargestSubStr ; size_t LargestStrSize = 0; //查找 while( CurNode != NULL ) { if( LargestStrSize < CurNode->size ) { LargestStrSize = CurNode->size; LargestSubStr = CurNode->pdate; } CurNode = CurNode->next; } //生成动态字符数组 char * szResult = (char*)malloc(LargestStrSize + 1); memset(szResult,0,LargestStrSize + 1); char * szTemp = szResult; while ( '\0' != * LargestSubStr ) { *(szTemp++) = *(LargestSubStr++); } return szResult; } //pIn 输入的字符串,n为字符串数组的个数 //返回最长公共字串 char* FindSameSubStr(const char *pIn[],size_t n) { //用来记录每个字符串数组中,上一次查到到的子串的位置 const char** PosArray = ( const char**)malloc(sizeof(const char*) * n); SameStrList lSameStrList; //找到最短的字符串数组,作为查询的基础 size_t ShortIndex = 0; size_t ShortestLength = 0; for(size_t i = 0; i < n; i++) { if( ShortestLength > strlen(pIn[i]) ) { ShortestLength = strlen(pIn[i]); ShortIndex = i; } } //定义一个测试字串,判断在其他的字符串中是否存在 const char* TestStr = pIn[ShortIndex]; while( '\0' != *TestStr ) { while( ' ' == *TestStr ) { TestStr ++; //跳过空格 } //构造子串 char * szTestSub = (char * )malloc(strlen(TestStr) + 1); memset(szTestSub,0,strlen(TestStr) + 1); //遍历查找开始 bool bIsExit = true; //标记是否存在 bool bMatch = false; //标记是否匹配成功过 size_t i = 0 ; //标记遍历次数 for(; bIsExit && i < strlen(TestStr); i++) { *(szTestSub + i) = *(TestStr + i); *(szTestSub + i + 1) = '\0'; //继续添加szTestSub字串,直到其长度达到列表中的maxsize位置, //这样提高效率,避免不必要的比较 if( i < lSameStrList.GetMaxSize()) { continue; } //判断在数组中 bIsExit = InStrArray(pIn,PosArray,n,szTestSub,bMatch); //记录是否匹配成功过 if( bIsExit ) bMatch = true; } if( bMatch ) { //因为i-1不存在所以退出的,所以将i-1处的字符截止 *(szTestSub + i-1) = '\0'; //添加链表 lSameStrList.AddList(szTestSub); } free(szTestSub); //对下一个位置进行检查 TestStr ++; } //while char * szRet = lSameStrList.FindLargestStr(); free(PosArray); return szRet; } //判断字串szSub,是否在所有的字符串中 bool InStrArray(const char* pIn[],const char** PosArray,size_t n,const char* szSub,bool bMatch) { bool bResult = true; for(size_t i = 0; i < n; i++ ) { if( !InStr(pIn[i],&PosArray[i],szSub,bMatch) ) { bResult = false; break; } } return bResult; } //判断子串是否在母串中存在 bool InStr(const char* szMain,const char** Pos,const char* szSub,bool bMatch) { size_t uSubSize = strlen(szSub); //子串长度大于1,说明其前uSubSize-1个字符一定刚被搜索过了,且存在,所以只需要比较剩下的字符 if( bMatch ) { if( *(*Pos + 1) == *(szSub + uSubSize - 1) ) { //标记位置 *Pos = *Pos + 1; //返回结果 return true; } else { //从上次结束的位置开始查找 szMain = *Pos; } } size_t uMainSize = strlen(szMain); size_t index = 0; while( index < uMainSize ) { size_t i = index,j = 0; while( szMain[i] == szSub[j] && i < uMainSize && j < uSubSize ) { i++; j++; } if( j == uSubSize ) { //记录最后一次匹配的位置,因为匹配后要+1,所以此次-1 *Pos = szMain + i -1; return true; } index ++; } *Pos = szMain; return false; } int main() { const char *pIn[3] = { "what is local bus?", "Name some local bus", "local bus is high speed I/O bus close to the processor."}; /* //test InStrArray() char * szSub = "local bus"; bool bResult = InStrArray(pIn,3,szSub); */ char * tempstr = FindSameSubStr(pIn,3); printf("%s\n",tempstr); free(tempstr); /* //test InStr() char * szMain = "abcdef"; char * szSub = "cd"; bool bResult = InStr(szMain,szSub); */ system("pause"); return 0; }