我实现的一个正则表达式代码

#include "stdafx.h"
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#ifndef DEBUG
#define DEBUG 1
#endif
/*enum bool{false=0,true};
typedef enum bool bool;
*/
bool match(char * name2 , char * pre2);
char* FPF_StrSpilit(const char* str, char* token, const char* delimit)
{
    char *p1 = (char *)str, *p2 = token;

    *p2 = '\0';
    if (!p1 || !*p1)
        return NULL;
    for(; *p1 && !strchr(delimit, *p1); *p2++=*p1++);
    *p2 = '\0';
    if (!*p1)
        return p1;
    return (p1+1);
}
bool repeatCheck(char * str)
{
    return (*str=='{'&&strchr(str,'}'));
}
bool repeatSyntaxCheck(char * bracket)
{
    char number[11]="0123456789";
    char temp[30];
    char * p=temp;
    FPF_StrSpilit(bracket+1,p,"}");
    if(*bracket!='{'||*(bracket+strlen(bracket)-1)!='}')
        return false;
    while(*p)
    {
        if(!strchr(number,*p)&&*p!=',')
            return false;
        p++;
    }
    return true;
}
/*
pre2:长度为1或[...]
如果按照指定的匹配次数匹配成功,返回匹配过后的name,如果匹配失败,返回NULL

*/
char * repeatMatch(char *name,char *pre2,int cnt)
{
    char * pre=pre2;
    int i;
    if(DEBUG)
        printf("++repeatMatch(%s,%s,%d)\n",name,pre2,cnt);
    if(cnt<0)
        return NULL;
    else if(cnt==0)
        return name;
    if(strchr(pre2,'*'))
        return NULL;
    if(strlen(pre2)>1&&(*pre2!='['||*(pre2+strlen(pre2)-1)!=']'))
        return NULL;
    for(i=0;i<cnt;i++)
    {
        if(!*name)
            return NULL;
        if(*pre=='[')
        {
            char tmp2[64]={0};
            char * tmp=tmp2;
            if(!strchr(pre,']'))
                return NULL;
            strncpy(tmp,pre+1,strlen(pre)-strlen(strchr(pre,']'))-1);
            if(strlen(tmp)==0)
            {
                name++;
                continue;
            }
            else
            {
                if(strchr(tmp,'-'))
                {
                    if(strlen(tmp)!=3)
                        if(strlen(tmp)!=4)
                        {
                            if(DEBUG)
                                printf("正则表达式错误\n");
                            return NULL;
                        }
                        else if(*tmp!='!')
                        {
                            if(DEBUG)
                                printf("正则表达式错误\n");
                            return NULL;
                        }
                        if(strlen(tmp)==3&&(strncmp(name,tmp,1)<0||strncmp(name,tmp+2,1)>0))
                            return NULL;
                        else if(strlen(tmp)==4&&(strncmp(name,tmp+1,1)>=0&&strncmp(name,tmp+3,1)
                            <=0))
                            return NULL;
                        else
                        {
                            name++;
                            pre=pre2;
                            continue;
                        }
                }
                else
                {
                    if(*tmp!='!')
                    {
                        while(*tmp)
                        {
                            if(*name==*tmp)
                            {
                                name++;
                                pre=pre2;
                                goto continue_process;
                            }
                            else
                                tmp++;
                        }
                        return NULL;
                    }
                    else
                    {
                        tmp++;
                        while(*tmp)
                        {
                            if(*name==*tmp)
                                return NULL;
                            else
                                tmp++;
                        }
                        name++;
                        pre=pre2;
                        continue;

                    }

                }

            }

        }

        else
        {
            if(*name==*pre)
            {
                name++;
                pre=pre2;
                continue;
            }
            else
                return NULL;
        }
continue_process:;

    }
    return name;
}
bool itoa2(int value2,char *ch,int len)
{
    int i,j;
    int value=value2;
    ch[len-1]=0;
    for(i=len-2;i>=0;)
    {
        ch[i]=value%10+48;
        value=value/10;
        i--;
        if(value<=0)
            break;
    }
    if(i==-1)
        return false;
    for(j=0;j+i+1<len;j++)
    {
        ch[j]=ch[j+i+1];
    }
    return true;
}
/*pch:  如{ 1,3}*/
void getMinMax(char *pch,char * min,char *max)
{
    char *index;

    int count=0;
    for(index=pch+1;index<strchr(pch+1,',');index++)
    {
        if(*index!=' '&&*index>='0'&&*index<='9')
            min[count++]=*index;
    }
    min[count]=0;
    count=0;
    for(index=strchr(pch+1,',')+1;index<strchr(pch+1,'}');index++)
    {
        if(*index!=' '&&*index>='0'&&*index<='9')
            max[count++]=*index;
    }
    max[count]=0;
}
int notSpaceStrlen(char * str)
{
    char *p=str;
    int count=0;
    while(*p){
        if(*p!=' ')
            count++;
        p++;
    }
    return count;
}
/*type:2表示为[]类型的
1表示非[]类型
ppch:要重复匹配的正则表达式,只通过判断*(*ppch+1)是不是'{'来判断是否需要重复匹配
a):如果需要并且没有确定匹配的次数,则按照{}
里指定的匹配规则进行匹配,并找出成功的匹配方法,如repeatPorcess("111111IOC","[0
-9]{1,7}IOC",2)
就找出成功的匹配方法:[0-9]匹配6次,并直接把name和*ppch,定位到结尾
b):如果不需要重复匹配,或者匹配次数已经确定,匹配规定的次数,并且返回匹配后的
name和*ppch,如repeatProcess("111111IOC","[0-9]{6}IOC*",2)
就返回"IOC",*ppch="IOC*"
*/

char * repeatProcess(char *name,char **ppch,int type)
{
    /*eg.name="111111QC200",*ppch="[0-9]{6}QC*",type=2*/
    char *pch=*ppch;
    char min[4]={0};
    char max[4]={0};
    bool bFlag=false;
    char bracket[64]={0};
    char *p;
    char *nextPre;/**/
    bool bMaxLimit=false;
    char choice[64]={0};
    if(DEBUG)
        printf("++repeatProcess(%s,%s,%d)\n",name,*ppch,type);
    if(type!=1&&type!=2)
        return NULL;
    if(type==2)
        p=strchr(pch,']')+1;/*p指向'{'*/
    else
        p=pch+1;
    /*取得min,max*/
    if(*p=='{'&&strchr(p,'}'))
    {
        bFlag=true;
        nextPre=strchr(p,'}')+1;
        strncpy(bracket,p,strlen(p)-strlen(strchr(p,'}'))+1);/*bracket:{6}*/
        printf("bracket:%s\n",bracket);
        if(!repeatSyntaxCheck(bracket))
        {
            printf("{}里语法检查错误!\n");
            return NULL;
        }
        if(!strchr(bracket,',')&&notSpaceStrlen(bracket+1)-1>3)
        {
            if(DEBUG)
                printf("{}里的数值太大!\n");
            return NULL;
        }

        if(!strchr(bracket,','))
        {
            FPF_StrSpilit(bracket+1,min,(char *)("}"));
            strcpy(max,min);
            if(DEBUG)
                printf("line %d:max:%s,min:%s\n",__LINE__,max,min);
        }
        else
        {

            getMinMax(bracket,min,max);
            if(*min==0)
                *min='0';
            if(*max==0)
                itoa2(strlen(name),max,sizeof(max));
            else
                bMaxLimit=true;
        }        
    }
    else
    {
        nextPre=p;
        max[0]=min[0]='1';
        max[1]=min[1]=0;
    }
    if(DEBUG)
        printf("min:%s,max:%s\n",min,max);
    if(atoi(min)>atoi(max))
        return NULL;
    if(type==2)
        strncpy(choice,pch,strlen(pch)-strlen(p));/*choice:[0-9]*/
    else
    {
        choice[0]=*pch;    /*choice:要重复匹配的字符*/
        choice[1]=0;
    }
    /*是否有{}*/

    if(*max==NULL&&*min==NULL)
        return NULL;
    if(bMaxLimit&&repeatMatch(name,choice,atoi(max)+1))/*eg:{1,8}
                                                       时判断是否匹配数>=9
*/
                                                       return NULL;
    else
    {
        int k;
        for(k=atoi(min);k<=atoi(max);k++)
        {
            char *nameAftRepeatMatch;
            if(DEBUG)
                printf("................k:%d\n",k);
            nameAftRepeatMatch=repeatMatch(name,choice,k);
            if(nameAftRepeatMatch==NULL)
                return NULL;

            if(atoi(min)!=atoi(max))/*需要寻求最优匹配规则*/
            {
                if(DEBUG)
                    printf("line %d:++match(%s,%s)\n",__LINE__,nameAftRepeatMatch,nextPre);
                if(match(nameAftRepeatMatch,nextPre))
                {
                    *ppch=*ppch+strlen(*ppch);
                    name+=strlen(name);
                    return name;
                }
            }
            else/*匹配次数已经确定*/
            {
                printf("retvalue:%s,nextPre:%s\n",nameAftRepeatMatch,nextPre);
                *ppch=nextPre;
                return nameAftRepeatMatch;
            }

        }
        return NULL;
    }
}
/***************************************************
文件名匹配使用方法:
支持使用[]:可选项和{}重复匹配:
如char ch[256]="QC20081031000000NIC_20081030591.0001";
match(ch,"QC[0-9]{14}NIC_[0-9]{1,11}*");将返回true
{14}表示重复匹配14次
{1,11}表示匹配次数在1和11之间
{a,}表示匹配次数>=a
{,b}表示匹配次数<=b

***************************************************
*/
bool match(char * name2 , char * pre2)
{
    char * name=name2;
    char pre_tmp[512]={0};

    char * pre=pre_tmp;
    printf("match(%s,%s)\n",name2,pre2);
    strcpy(pre_tmp,pre2);
    if(!*pre)
        return true;
    if(name2==NULL)
        return false;
    if(*name2==NULL&&*pre2!=NULL&&*pre2!=NULL&&*pre2!='*')
        return false;
    if(*name2==NULL&&*pre2=='*')
        return true;
    if(*pre=='[')
    {
        char tmp2[64]={0};
        char * tmp=tmp2;
        if(!strchr(pre,']'))
            return false;
        strncpy(tmp,pre+1,strlen(pre)-strlen(strchr(pre,']'))-1);
        if(strlen(tmp)==0)
            return match(name+1,pre+2);
        else
        {
            name=repeatProcess(name,&pre,2);
            if(name==NULL)
                return false;
            else
                return match(name,pre);

        }

    }
    if(*pre=='*')
    {
        if(!*name)
            return *(pre+1)==NULL;
        if(!*(pre+1))
            return true;
        while(*name)
        {
            if(match(name,pre+1))
                return true;
            name++;
        }
        return false;
    }
    else
    {
        if(*name==*pre&&!repeatCheck(pre+1))
            return match(name+1,pre+1);
        else if(*name!=*pre&&!repeatCheck(pre+1))
            return false;
        else
        {
            if(name=repeatProcess(name,&pre,1),name==NULL)
                return false;
            return match(name,pre);
        }

    }

}
/**
文件名过滤。配置方法仿unix正则表达式。可以是如下值:
1)具体ascii字母,如abc
2)* 表示任意匹配
3)[]表示在[]里的任意值,如[AaBb]表示A|a|B|b (|表示或者)
[]里可以使用-表示一段范围的字符,如[a-z]表示所有小写字母[0-9]表示数字
[]里的第一个字段可以用! (感叹号),表示非操作,如[!abc]表示该字符非a,且非b,且非c;[!a-z]表示非小写字母;
[!0-9]表示非数字
4){} {}里的值必须是数字,且长度小于位。它表示前面的配置重复。如a{3}表示aaa,[0-9]{3} 表示个数字
{}里可以使用逗号,表示一个范围,{m,n}表示>=m个且<=n。m或n可以省略,但不能同时省略。如{[0-9]{,8} 表示<=8个
数字

eg. 
NOC* 表示所有NOC为前缀的文件
NOC_[0-9]{8,14}591* 表示NOC_为前缀,后接~14个数字,再接,这样的模式为前缀的文件
*/

int main(int argc,char *argv[]) 

    int i;
    char ch[256]="QC20081031000000NIC_20081030591.0001";
    char str[256];
    printf("%d\n",match(ch,"QC[0-9]{14}NIC_[0-9]{1,11}.[0-9]{4}"));

    return 0;
}

posted on 2012-08-07 10:50  山本二十八  阅读(1051)  评论(0编辑  收藏  举报

导航