c、c++字符串匹配

 

fnmatch(pattern, str, FNM_NOESCAPE)
 
头文件:https://github.com/gcc-mirror/gcc/blob/master/include/fnmatch.h
源文件:https://github.com/gcc-mirror/gcc/blob/master/libiberty/fnmatch.c
 
源码解析,转自:https://www.cnblogs.com/oloroso/p/6861576.html
  1 /*
  2 * Copyright (c) 1989, 1993, 1994
  3 *    The Regents of the University of California.  All rights reserved.
  4 *
  5 * This code is derived from software contributed to Berkeley by
  6 * Guido van Rossum.
  7 *
  8 * Redistribution and use in source and binary forms, with or without
  9 * modification, are permitted provided that the following conditions
 10 * are met:
 11 * 1. Redistributions of source code must retain the above copyright
 12 *    notice, this list of conditions and the following disclaimer.
 13 * 2. Redistributions in binary form must reproduce the above copyright
 14 *    notice, this list of conditions and the following disclaimer in the
 15 *    documentation and/or other materials provided with the distribution.
 16 * 3. All advertising materials mentioning features or use of this software
 17 *    must display the following acknowledgement:
 18 *    This product includes software developed by the University of
 19 *    California, Berkeley and its contributors.
 20 * 4. Neither the name of the University nor the names of its contributors
 21 *    may be used to endorse or promote products derived from this software
 22 *    without specific prior written permission.
 23 *
 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 34 * SUCH DAMAGE.
 35 *
 36 * From FreeBSD fnmatch.c 1.11
 37 * $Id: fnmatch.c,v 1.3 1997/08/19 02:34:30 jdp Exp $
 38 */
 39 
 40 #ifndef    _FNMATCH_H_
 41 #define    _FNMATCH_H_
 42 
 43 #define    FNM_NOMATCH    1    /* Match failed. */
 44 
 45 #define    FNM_NOESCAPE    0x01    /* 禁用反斜杠进行转义 */
 46 #define    FNM_PATHNAME    0x02    /* 斜杠只能被斜杠匹配(即不能被*或者?匹配) */
 47 #define    FNM_PERIOD    0x04    /* Period must be matched by period. */
 48 /*如果这个标志设置了,string 里的起始点号必须匹配 pattern 里的点号。
 49 一个点号被认为是起始点号,如果它是string 第一个字符,或者如果同时设
 50 置了 FNM_PATHNAME,紧跟在斜杠后面的点号。
 51 */
 52 #define    FNM_LEADING_DIR    0x08    /* Ignore /<tail> after Imatch. */
 53 /*如果这个标志(GNU 扩展)设置了,模式必须匹配跟随在斜杠之后的 string
 54 的初始片断。这个标志主要是给 glibc 内部使用并且只在一定条件下实现。
 55 即只匹配目录路径部分,不匹配到具体文件名
 56 */
 57 #define    FNM_CASEFOLD    0x10    /* 模式匹配忽略大小写. */
 58 #define FNM_PREFIX_DIRS    0x20    /* Directory prefixes of pattern match too. */
 59 
 60 /* Make this compile successfully with "gcc -traditional" */
 61 #ifndef __STDC__
 62 #define const    /* empty */
 63 #endif
 64 
 65 int     fnmatch(const char *, const char *, int);
 66 
 67 #endif /* !_FNMATCH_H_ */
 68 
 69 #if defined(LIBC_SCCS) && !defined(lint)
 70 static char sccsid[] = "@(#)fnmatch.c    8.2 (Berkeley) 4/16/94";
 71 #endif /* LIBC_SCCS and not lint */
 72 
 73 /*
 74 * 函数fnmatch(),如POSIX 1003.2-1992 B.6节所述。
 75 * 将文件名或者目录名与pattern进行比较
 76 */
 77 
 78 #include <ctype.h>
 79 #include <string.h>
 80 #include <stdio.h>
 81 
 82 // 定义字符串结尾标志
 83 #define    EOS    '\0'
 84 
 85 static const char *rangematch(const char *, char, int);
 86 
 87 int
 88 fnmatch(const char *pattern, const char *string, int flags)
 89 {
 90     const char *stringstart;
 91     char c, test;
 92 
 93     for (stringstart = string;;){
 94         // 逐个取匹配字符串中的成分(分为*?和range三种)
 95         switch (c = *pattern++) {
 96         case EOS:   // 没有匹配串的情况
 97             // 如果忽略'/'后面的部分,则匹配成功
 98             if ((flags & FNM_LEADING_DIR) && *string == '/'){
 99                 return (0);
100             }
101             // 如果string也是空串,则匹配成功
102             return (*string == EOS ? 0 : FNM_NOMATCH);
103         case '?':   // 匹配单个任意字符
104             // string为空则不能匹配
105             if (*string == EOS){
106                 return (FNM_NOMATCH);
107             }
108             // 判断'/'是否只能由'/'进行匹配
109             if (*string == '/' && (flags & FNM_PATHNAME)){
110                 return (FNM_NOMATCH);
111             }
112             // 判断是否string中的起始'.'必须匹配pattern中的'.'(即'?'不能匹配'.')
113             if (*string == '.' && (flags & FNM_PERIOD) &&
114                 (string == stringstart ||
115                 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))){
116                 return (FNM_NOMATCH);
117             }
118             // 匹配成功则匹配string的下一个字符
119             ++string;
120             break;
121         case '*':   // 匹配单个或多个任意字符
122             c = *pattern;
123             /* 多个'*'当做一个 */
124             while (c == '*'){
125                 c = *++pattern;
126             }
127             // 判断是否需要对'.'进行处理
128             if (*string == '.' && (flags & FNM_PERIOD) &&
129                 (string == stringstart ||
130                 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))){
131                 return (FNM_NOMATCH);
132             }
133 
134             /* 优化 * 在匹配串结尾或者在 /. 之前的匹配*/
135             if (c == EOS){  // 在结尾
136                 // 判断 * 是否不匹配斜杠
137                 if (flags & FNM_PATHNAME){
138                     // 不匹配斜杠,则判断是否忽略'/'之后部分
139                     return ((flags & FNM_LEADING_DIR) ||
140                         ((strchr(string, '/') == NULL) ? 0 : FNM_NOMATCH));
141                 }else{
142                     return (0);
143                 }
144             }
145             else if (c == '/' && flags & FNM_PATHNAME) { // 在 /. 之前
146                 // 如果string后没有 '/'则匹配失败
147                 if ((string = strchr(string, '/')) == NULL){
148                     return (FNM_NOMATCH);
149                 }
150                 break;
151             }
152 
153             /* 非特殊情况下,递归匹配 */
154             while ((test = *string) != EOS) {
155                 // 不对'.'进行特殊处理,进行匹配(则只需判断'/'匹配情况)
156                 if (!fnmatch(pattern, string, flags & ~FNM_PERIOD)){
157                     return (0); // 匹配成功
158                 }
159                 // 对 '/'进行处理(斜杠只匹配斜杠,则匹配失败)
160                 if (test == '/' && flags & FNM_PATHNAME){
161                     break;
162                 }
163                 ++string;
164             }
165             // 返回匹配失败(即*没有匹配成功,'.'和'/'上的匹配没有成功)
166             return (FNM_NOMATCH);
167         case '[':   // range 范围匹配
168             if (*string == EOS){
169                 return (FNM_NOMATCH);   // 空串匹配失败
170             }
171             if (*string == '/' && flags & FNM_PATHNAME){
172                 return (FNM_NOMATCH);   // '/'匹配失败
173             }
174             if ((pattern =
175                 rangematch(pattern, *string, flags)) == NULL){
176                 return (FNM_NOMATCH);   // 范围匹配失败
177             }
178             ++string;
179             break;
180         case '\\':  // 斜杠匹配(判断是否需要转义)
181             if (!(flags & FNM_NOESCAPE)) {
182                 if ((c = *pattern++) == EOS) {
183                     c = '\\';
184                     --pattern;
185                 }
186             }
187             /* 非上述部分,则直接匹配单个字符 */
188         default:
189             if (c == *string){
190                 ;   // 直接匹配上了
191             }else if ((flags & FNM_CASEFOLD) &&
192                 (tolower((unsigned char)c) ==
193                     tolower((unsigned char)*string))){
194                 ;   // 忽略大小写匹配成功
195             }
196             else if ((flags & FNM_PREFIX_DIRS) && *string == EOS &&
197                 ((c == '/' && string != stringstart) ||
198                 (string == stringstart + 1 && *stringstart == '/'))){
199                 return (0); // 匹配成功
200             }
201             else{
202                 return (FNM_NOMATCH); // 匹配失败
203             }
204             string++;
205             break;
206         }
207     }
208     /* NOTREACHED */
209 }
210 
211 // 字符范围匹配
212 // pattern传入如 [a-x]*** 形式的字符串
213 // 匹配失败或匹配到EOS结束(也是失败),返回NULL
214 // 成功返回匹配串的下一个匹配成分首地址
215 static const char *
216 rangematch(const char *pattern, char test, int flags)
217 {
218     // 此处没有对c进行初始化,可能出问题(栈上变量默认值未定)
219     int negate, ok;
220     char c, c2;
221 
222     /*
223     * A bracket expression starting with an unquoted circumflex
224     * character produces unspecified results
225     * 以无引号 ^ 字符开始的方括号表达式,将产生未指定的结果
226     * (IEEE 1003.2-1992,3.13.2).  此实现将其视为 '!',以与正则表达式语法保持一致.
227     * J.T. Conklin (conklin@ngai.kaleida.com)
228     */
229     // 检测方括号表达式中第一个字符
230     // 如果为!或者^,则对后面匹配的结果取反
231     if ((negate = (*pattern == '!' || *pattern == '^'))){
232         ++pattern;
233     }
234 
235     // 忽略大小写,则转为小写处理
236     if (flags & FNM_CASEFOLD){
237         test = tolower((unsigned char)test);
238     }
239     // 循环到方括号表达式结束
240     for (ok = 0; (c = *pattern++) != ']';) {
241         // 如果没有禁用转义,获取字符
242         if (c == '\\' && !(flags & FNM_NOESCAPE)){
243             c = *pattern++;
244         }
245         // 匹配结束
246         if (c == EOS){
247             return (NULL);
248         }
249         // 忽略大小写,则转为小写
250         if (flags & FNM_CASEFOLD){
251             c = tolower((unsigned char)c);
252         }
253         // 如果当前匹配项c 的下一个是'-',则获取'-'后面的一个字符
254         // 例如,匹配串为 [a-x] 当前c为a,则c2为x,表示匹配a-x之间字符
255         if (*pattern == '-'
256             && (c2 = *(pattern + 1)) != EOS && c2 != ']') {
257             pattern += 2;
258             // 判断是否需要转义
259             if (c2 == '\\' && !(flags & FNM_NOESCAPE)){
260                 c2 = *pattern++;
261             }
262             if (c2 == EOS){
263                 return (NULL);
264             }
265             // 判断是否区分大小写
266             if (flags & FNM_CASEFOLD){
267                 c2 = tolower((unsigned char)c2);
268             }
269             // 判断test是否位于 [c,c2]区间
270             if ((unsigned char)c <= (unsigned char)test &&
271                 (unsigned char)test <= (unsigned char)c2){
272                 ok = 1;
273             }
274         }
275         else if (c == test){
276             ok = 1;
277         }
278     }
279     // 返回匹配结果
280     return (ok == negate ? NULL : pattern);
281 }
View Code

 

posted @ 2023-03-03 17:08  墨尔基阿德斯  阅读(26)  评论(0编辑  收藏  举报