最初步的正则表达式引擎:将显示的连接符改为了非显示的连接符
由于当前的连接符变为非显示,所以在有些时候需要考虑当前输入指针所指的位置是否缺少连接符,如果缺少,则将连接符入栈。
1 #include <stdio.h> 2 #include <malloc.h> 3 #include <string.h> 4 //这里默认是没有显示的连接运算符,运算符的优先级为括号、闭包*、连接.、并| 5 //在括号后及闭包后可能缺少显示的连接符,因此需要考虑添加连接符,而由于并操作符的优先级比连接符低 6 //所以就不需要在处理并操作符的时候去考虑是否缺少显示的连接符了 7 int token[100]; 8 int token_pointer; 9 char reg_operator[100]; 10 int reg_operator_pointer; 11 int name_number; 12 int input_pointer; 13 char reg_input[20]; 14 int is_operator(char for_in)//判断输入字符是否是操作符 15 { 16 switch(for_in) 17 { 18 case'(': 19 case')': 20 case'*': 21 case'|': 22 return 1; 23 default: return 0; 24 } 25 } 26 void tackle_or()//处理并操作符 27 { 28 if(reg_operator_pointer!=0)//如果操作符栈中已经有操作符了 29 { 30 if(reg_operator[reg_operator_pointer-1]!='(')//括号另外说 31 { 32 name_number++; 33 if(reg_operator[reg_operator_pointer-1]=='.') 34 //如果前面的优先级比当前的高,则处理前面的优先级 35 { 36 printf("name%d is concat of name%d and name%d\n",name_number,token[token_pointer-2],token[token_pointer-1]); 37 } 38 else 39 //这里处理的是相同优先级的情况,其实这里可以与前面的合并的,只不过打印信息不同 40 { 41 printf("name%d is name%d or name%d\n",name_number,token[token_pointer-2],token[token_pointer-1]); 42 } 43 token[token_pointer-2]=name_number; 44 token_pointer--; 45 reg_operator[reg_operator_pointer-1]='|'; 46 input_pointer++; 47 } 48 else//对于括号,则直接入栈 49 { 50 reg_operator[reg_operator_pointer++]='|'; 51 input_pointer++; 52 } 53 } 54 else//对于空操作符栈,也是直接入栈 55 { 56 reg_operator[reg_operator_pointer++]='|'; 57 input_pointer++; 58 } 59 } 60 void tackle_cat()//处理连接符,事实上跟前面的or操作符讨论的差不多,差异就在优先级那 61 { 62 if(reg_operator_pointer!=0)//如果操作符栈不为空 63 { 64 if(reg_operator[reg_operator_pointer-1]=='(')//如果前面有括号,则直接入栈 65 { 66 reg_operator[reg_operator_pointer++]='.'; 67 } 68 else//对于前面不是括号的情况下 69 { 70 if(reg_operator[reg_operator_pointer-1]=='.')//优先级相同则输出前面的那个 71 { 72 name_number++; 73 printf("name%d is the concat of name%d and name%d\n",name_number,token[token_pointer-2],token[token_pointer-1]); 74 token[token_pointer-1]=0; 75 token[token_pointer-2]=name_number; 76 token_pointer--; 77 } 78 else//否则的话,前面的优先级比当前优先级低,操作符入栈 79 { 80 reg_operator[reg_operator_pointer++]='.'; 81 } 82 } 83 } 84 else//如果操作符栈为空,则入栈 85 { 86 reg_operator[reg_operator_pointer++]='.'; 87 } 88 } 89 void tackle_parenthesis(void)//处理闭括号模式,这里有点复杂 90 { 91 if(reg_operator[reg_operator_pointer-1]=='(')//如果前面那个操作符为开括号,则匹配输出 92 { 93 name_number++; 94 printf("name%d is (name%d)\n",name_number,token[token_pointer-1]); 95 token[token_pointer-1]=name_number; 96 input_pointer++; 97 reg_operator[--reg_operator_pointer]='\0'; 98 //这时候需要考虑后面的是否少了显示的连接符,如果判断缺少连接符,则需要加上去 99 if(!is_operator(reg_input[input_pointer])) 100 { 101 if(reg_input[input_pointer]!='\0') 102 { 103 tackle_cat(); 104 } 105 } 106 else 107 { 108 if(reg_input[input_pointer]=='(') 109 { 110 tackle_cat(); 111 } 112 } 113 } 114 else//如果闭括号前面还有运算符,那么根据他们的优先级输出,这个时候输入指针是不变的,注意 115 { 116 name_number++; 117 if(reg_operator[reg_operator_pointer-1]=='.') 118 { 119 printf("name%d is the concat of name%d and name%d\n",name_number,token[token_pointer-2],token[token_pointer-1]); 120 } 121 else 122 { 123 printf("name%d is name%d or name%d\n",name_number,token[token_pointer-2],token[token_pointer-1]); 124 } 125 token[token_pointer-1]=0; 126 token[token_pointer-2]=name_number; 127 token_pointer--; 128 reg_operator_pointer--; 129 } 130 } 131 int main(void) 132 { 133 reg_operator_pointer=name_number=token_pointer=0; 134 for(input_pointer=0;input_pointer<100;input_pointer++)//初始化栈 135 { 136 reg_operator[input_pointer]='\0'; 137 token[input_pointer]=0; 138 } 139 input_pointer=0; 140 printf("please type in you regex short phrase\n"); 141 scanf("%s",reg_input); 142 while(reg_input[input_pointer]!='\0') 143 { 144 if(!is_operator(*(reg_input+input_pointer)))//对于操作符和非操作符分开讨论 145 { 146 name_number++; 147 token[token_pointer++]=name_number; 148 printf("name%d is %c\n",name_number,*(reg_input+input_pointer)); 149 input_pointer++; 150 //非操作符直接进字符栈,这个时候需要考虑后面是否缺少显示的连接符 151 if(!is_operator(reg_input[input_pointer])) 152 { 153 if(reg_input[input_pointer]!='\0') 154 { 155 tackle_cat(); 156 } 157 } 158 else 159 { 160 if(reg_input[input_pointer]=='(') 161 { 162 tackle_cat(); 163 } 164 } 165 } 166 else//通过多路选择来处理操作符的情况 167 { 168 switch(reg_input[input_pointer]) 169 { 170 case '('://开括号直接入栈 171 reg_operator[reg_operator_pointer++]='('; 172 input_pointer++; 173 break; 174 case ')'://闭括号专门处理 175 tackle_parenthesis(); 176 break; 177 case '*'://由于*运算符的优先级第二高,只比括号低,所以可以直接输出 178 name_number++; 179 printf("name%d is multiple of name%d\n",name_number,token[token_pointer-1]); 180 token[token_pointer-1]=name_number; 181 input_pointer++; 182 //这个时候仍然需要考虑后面缺少显示连接符的情况 183 if(!is_operator(reg_input[input_pointer])&®_input[input_pointer]!='\0') 184 { 185 tackle_cat(); 186 } 187 else 188 { 189 if(reg_input[input_pointer]=='(') 190 { 191 tackle_cat(); 192 } 193 } 194 break; 195 case '|'://对于并操作符,调用函数处理 196 tackle_or(); 197 break; 198 default: break; 199 } 200 } 201 } 202 while(reg_operator_pointer>=1)//如果全部的输入都弄完了,可是 操作符栈中还有数据,则输出 203 { 204 name_number++; 205 if(reg_operator[reg_operator_pointer-1]=='.') 206 { 207 printf("name%d is concat of name%d and name%d\n",name_number,token[token_pointer-2],token[token_pointer-1]); 208 } 209 else 210 { 211 printf("name%d is name%d or name%d\n",name_number,token[token_pointer-2],token[token_pointer-1]); 212 } 213 token[token_pointer-2]=name_number; 214 token_pointer--; 215 reg_operator_pointer--; 216 } 217 }