A Simple C++ Template Class that Matches a String to a Wildcard Pattern
A recently implemented enhanced wildcard string matcher, features of which including,
- Supporting wildcard character '*' for matching zero or more characters
- Supporting wildcard character '?' for matching exactly one character
- Supporting parentheses '(' and ')' for referencing the matches
- Supporting escape character (back-slash)
C++ features demonstrated by this implementation,
- Functors with a consideration of possible function pointers/user instantiated functors with user data
- Specialized templates
- Template rebinding
The implementation is maintained as part of the ongoing project of quanben's C++ template library qcpplib publicly on github at https://github.com/lincolnyu/qcpplib/
The current snapshot of the code is following,
1 // 2 // qcpplib v1.00 3 // quanben's C++ template library 4 // 5 // Author Lincoln Yu 6 // 7 // lincoln.yu@gmail.com 8 // https://github.com/lincolnyu/qcpplib 9 // 10 // The MIT License (MIT) 11 // 12 // Copyright (c) <year> <copyright holders> 13 // 14 // Permission is hereby granted, free of charge, to any person obtaining a copy 15 // of this software and associated documentation files (the "Software"), to deal 16 // in the Software without restriction, including without limitation the rights 17 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 18 // copies of the Software, and to permit persons to whom the Software is 19 // furnished to do so, subject to the following conditions: 20 // 21 // The above copyright notice and this permission notice shall be included in 22 // all copies or substantial portions of the Software. 23 // 24 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 25 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 26 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 27 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 28 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 29 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 30 // THE SOFTWARE. 31 // 32 33 #if !defined (_WILDCARD_H_) 34 #define _WILDCARD_H_ 35 36 #include <map> 37 #include <vector> 38 #include <cstring> 39 #include <string> 40 41 /// @brief Contains class definitions that deal with wildcard matching 42 namespace Qtl { namespace String { namespace Wildcard { 43 44 /// @brief An implementation of the functor that returns the length of string whose iterators are applicable 45 /// to subtract operator 46 template <class TStringRef, class TSubtractableIter> 47 struct CharDistFunctorIndexed 48 { 49 size_t operator()(TStringRef str, TSubtractableIter iterBegin, TSubtractableIter iterEnd) 50 { 51 return (iterEnd-iterBegin); 52 } 53 }; 54 55 /// @brief An implementation of the functor that returns the position of the first character for character-based 56 /// zero-terminated string 57 struct StringBeginFunctorPsz 58 { 59 char * operator()(char *str) 60 { 61 return (str); 62 } 63 }; 64 65 /// @brief An implementation of the functor that returns the position of the first character for a std::string 66 struct StringBeginFunctorStdStr 67 { 68 std::string::const_iterator operator()(const std::string& str) 69 { 70 return str.begin(); 71 } 72 }; 73 74 /// @brief An implementation of the functor that determines if the position is at the end of a character-based 75 /// zero-terminated string 76 struct StringEndFunctorPsz 77 { 78 bool operator()(char *iter, char *str) 79 { 80 return (*iter == 0); 81 } 82 }; 83 84 /// @brief An implementation of the functor that determines if the position is at the end of a std::string 85 struct StringEndFunctorStdStr 86 { 87 bool operator()(std::string::const_iterator iter, const std::string& str) 88 { 89 return (iter==str.end()); 90 } 91 }; 92 93 /// @brief An implementation of the functor that appends a character to a character-based zero-terminated string 94 struct AppendCharFunctorPsz 95 { 96 void operator()(char *str, char *&iter, char ch) 97 { 98 *iter++ = ch; 99 } 100 }; 101 102 /// @brief An implementation of the functor that appends a character to a std::string 103 struct AppendCharFunctorStdStr 104 { 105 void operator()(std::string &str, std::string::iterator &iter, char ch) 106 { 107 str.push_back(ch); 108 } 109 }; 110 111 /// @brief The default class that provides string functors 112 struct DefaultStringFunctorSelector 113 { 114 /// @brief The generic rebinder 115 template <class TStringRef, class TCharIter> 116 struct rebind 117 { 118 // unimplemented, compiler error occurs if getting here 119 }; 120 121 /// @brief The rebinder to the character array based string functors 122 template <> 123 struct rebind<char*,char*> 124 { 125 typedef StringBeginFunctorPsz StringBeginFunctor; 126 typedef StringEndFunctorPsz StringEndFunctor; 127 typedef CharDistFunctorIndexed<char*,char*> CharDistFunctor; 128 }; 129 130 /// @brief The rebinder to the std::string based string functors 131 template <> 132 struct rebind<const std::string&, std::string::const_iterator> 133 { 134 typedef StringBeginFunctorStdStr StringBeginFunctor; 135 typedef StringEndFunctorStdStr StringEndFunctor; 136 typedef CharDistFunctorIndexed<std::string::const_iterator, std::string::const_iterator> CharDistFunctor; 137 }; 138 }; 139 140 /// @brief The default class that provides functor that appends character to string 141 struct DefaultAppendCharFunctorSelector 142 { 143 /// @brief The generic binder 144 template <class TStringRef, class TCharIter, class TChar> 145 struct rebind 146 { 147 // unimplemented, compiler error occurs if getting here 148 }; 149 150 /// @brief The rebinder to the functor that appends character to character-based zero-terminating string 151 template <> 152 struct rebind<char*, char*, char> 153 { 154 typedef AppendCharFunctorPsz AppendCharFunctor; 155 }; 156 157 /// @brief The rebinder to the functor that appends character to std::string 158 template <> 159 struct rebind<std::string&, std::string::iterator, char> 160 { 161 typedef AppendCharFunctorStdStr AppendCharFunctor; 162 }; 163 }; 164 165 /// @brief A class that encapsulates a wildcard pattern 166 /// @param TString The type of the pattern string 167 /// @param TStringRef The type of the reference to the pattern string (for efficient parameter passing) 168 /// @param TCharIter The type of the iterator through the characters 169 /// @param TStringBeginFunctor The type of the functor that returns the iterator at the beginning of a string 170 /// @param TStringEndFunctor The type of the functor that determines if the iterator is at the end of a string 171 template <class TString=char*, class TStringRef=char*, class TCharIter=char*, 172 class TStringFunctorSelector=DefaultStringFunctorSelector> 173 class Pattern 174 { 175 public: 176 typedef TStringRef StringRef; 177 178 /// @brief The type of iterator through the characters in the pattern string 179 typedef TCharIter CharIter; 180 181 /// @brief The type of the functor that returns the iterator at the beginning of a string 182 typedef typename TStringFunctorSelector::template rebind<TStringRef, TCharIter>::StringBeginFunctor StringBeginFunctor; 183 184 /// @brief The type of the functor that determines if the iterator is at the end of a string 185 typedef typename TStringFunctorSelector::template rebind<TStringRef, TCharIter>::StringEndFunctor StringEndFunctor; 186 187 /// @brief The type of the functor that returns the distance between two characters 188 typedef typename TStringFunctorSelector::template rebind<TStringRef, TCharIter>::CharDistFunctor CharDistFunctor; 189 190 private: 191 /// @brief The pattern string 192 TString _pattern; 193 194 /// @brief The functor that returns the beginning of the string 195 StringBeginFunctor _getStringBegin; 196 197 /// @brief The functor that returns if the iterator is at the end of the string 198 StringEndFunctor _isStringEnd; 199 200 /// @brief The functor that returns the distance between two characters 201 CharDistFunctor _getCharDist; 202 203 /// @brief The look-up table that maps iterator of pattern to the index of match result entry 204 std::map<CharIter, int> _mapIterToIndex; 205 206 public: 207 // a typical wildcard pattern: 208 // a*b?C(*) 209 // 210 /// @brief Instantiates a pattern with the pattern string and the functors 211 /// @param pattern The pattern string 212 /// @param stringBegin The functor that provides the beginning of the string 213 /// @param stringEnd The functor that determines the end of the string 214 /// @remarks A typical wildcard pattern is like: a*b?C(*)D\) 215 /// where normal characters (alphanumerics, punctuation etc) expect exact match, asteroids match whatever 216 /// string of whatever length, question marks match any single character and an escape character 217 /// (back-slash) turns a succeeding special character to a normal matching character. 218 Pattern(TStringRef pattern, StringBeginFunctor stringBegin, StringBeginFunctor stringEnd) 219 : _pattern(pattern), _getStringBegin(stringBegin), _isStringEnd(stringEnd) 220 { 221 PreProcessParentheses(); 222 } 223 224 /// @brief Instantiates a pattern with the pattern string 225 /// @param pattern The pattern string 226 Pattern(TStringRef pattern) : _pattern(pattern) 227 { 228 PreProcessParentheses(); 229 } 230 231 private: 232 /// @brief Creates the mapping from parenthesis pointer to index from the pattern string 233 void PreProcessParentheses() 234 { 235 _mapIterToIndex.clear(); 236 int openingIndex = 0; 237 int closingIndex = 0; 238 for (CharIter iter = GetBegin(); !IsEnd(iter); ++iter) 239 { 240 if (*iter=='\\') 241 { 242 ++iter; // skip the character that follows 243 } 244 else if (*iter == '(') 245 { 246 _mapIterToIndex[iter] = closingIndex = openingIndex++; 247 } 248 else if (*iter == ')') 249 { 250 // NOTE We don't need to differentiate opening and closing parentheses as 251 // the matcher has the knowledge of the pattern characters 252 _mapIterToIndex[iter] = closingIndex--; 253 } 254 } 255 } 256 257 public: 258 /// @brief Returns the beginning of the pattern string 259 /// @return The iterator point to the beginning of the pattern string 260 CharIter GetBegin() 261 { 262 return _getStringBegin(_pattern); 263 } 264 265 /// @brief Determines if the iterator is at the end of the pattern string 266 /// @param The interator in question 267 /// @return true if the iterator is at the beginning of the pattern string 268 bool IsEnd(CharIter iter) 269 { 270 return _isStringEnd(iter, _pattern); 271 } 272 273 /// @brief Returns the match entry index for the specified parenthesis pointer 274 /// @return The match entry index 275 int PatternIterToIndex(CharIter patternIter) 276 { 277 return _mapIterToIndex[patternIter]; 278 } 279 280 /// @brief Returns the distance between two characters (the number of characters in between plus one) 281 /// @param iterBegin The iterator that points to the character on the left hand 282 /// @param iterEnd The iterator that points to the character on the right hand 283 /// @return The distance 284 size_t GetQuotedLength(CharIter iterBegin, CharIter iterEnd) 285 { 286 return _getCharDist(_pattern, iterBegin, iterEnd); 287 } 288 }; 289 290 /// @brief A class that converts a wildcard pattern to its equivalent regular expression 291 /// @param TPattern The type of the pattern class 292 /// @param TRegexStringRef The type of the reference to the string for regular expression 293 /// @param TRegexCharIter The iterator through characters in the string for regular expression 294 /// @param TPatternFunctorSelector The functor selector for pattern 295 /// @param TRegexAppendCharFunctorSelector The append-character functor selector for regular expression 296 /// @remarks NOTE TRegexChar has to be compatible with the character type TPattern::CharIter iterates through 297 template <class TPattern=Pattern<>, class TRegexStringRef=char*, class TRegexCharIter=char*, 298 class TRegexChar=char, class TRegexAppendCharFunctorSelector=DefaultAppendCharFunctorSelector> 299 class WildCardToRegex 300 { 301 public: 302 /// @brief The type of the reference to regular expression string 303 typedef TRegexStringRef RegexStringRef; 304 /// @brief The type of the iteartor through the characters in the regular expression string 305 typedef TRegexCharIter RegexCharIter; 306 /// @brief The type of the character that can be append to the regular expression string 307 typedef TRegexChar RegexChar; 308 309 /// @brief The type of the reference to the wildcard string 310 typedef typename TPattern::StringRef PatternStringRef; 311 /// @brief The type of the iterator through the characters in the wildcard string 312 typedef typename TPattern::CharIter PatternStringIter; 313 314 /// @brief 315 typedef typename TRegexAppendCharFunctorSelector::template rebind<RegexStringRef, RegexCharIter, RegexChar>::AppendCharFunctor 316 RegexAppendCharFunctor; 317 318 private: 319 /// @brief The functor that appends character to the regular expression string 320 RegexAppendCharFunctor _regexAppendChar; 321 322 public: 323 /// @brief Initialises a WildCardToRegex with the specified functor instances 324 /// @param regexAppendChar The functor that appends character to the regular expression string 325 WildCardToRegex(RegexAppendCharFunctor ®exAppendChar) 326 : _regexAppendChar(regexAppendChar) 327 { 328 } 329 330 /// @brief Initialises a WildCardToRegex with the default settings 331 WildCardToRegex() 332 { 333 } 334 335 public: 336 /// @brief Converts a wildcard string to its equivalent regular expression 337 /// @remarks This is supposed to comply with the rules set by the regex implementation in QSharp 338 /// See https://qsharp.codeplex.com/SourceControl/latest#QSharp/QSharp.String.Rex/Creator.cs 339 /// for more detail. It has yet to be tested though. 340 void Convert(TPattern &pattern, TRegexStringRef regex, TRegexCharIter iterRegex) 341 { 342 for (PatternStringIter iter = pattern.GetBegin(); !pattern.IsEnd(iter); ++iter) 343 { 344 switch (*iter) 345 { 346 case '\\': 347 _regexAppendChar(regex, iterRegex, *iter); 348 ++iter; 349 if (!pattern.IsEnd(iter)) 350 { 351 _regexAppendChar(regex, iterRegex, *iter); 352 } 353 else 354 { 355 _regexAppendChar(regex, iterRegex, '\\'); 356 } 357 break; 358 case '*': 359 _regexAppendChar(regex, iterRegex, '.'); 360 _regexAppendChar(regex, iterRegex, *iter); 361 break; 362 case '?': 363 _regexAppendChar(regex, iterRegex, '.'); 364 break; 365 case '(': case ')': 366 _regexAppendChar(regex, iterRegex, *iter); 367 break; 368 case '[': case ']': case '{': case '}': case '^': case '.': case '-': case '+': 369 _regexAppendChar(regex, iterRegex, '\\'); 370 _regexAppendChar(regex, iterRegex, *iter); 371 break; 372 default: 373 _regexAppendChar(regex, iterRegex, *iter); 374 break; 375 } 376 } 377 } 378 }; 379 380 /// @brief A class that represents a match of quotation enclosed by a pair of parentheses in the pattern 381 /// @param TCharIter The type of iterator through the source string 382 /// @param TDiff The type of a integer number that indicates the length of string or the distance between characters 383 template <class TCharIter=char*, class TDiff=size_t> 384 class MatchQuote 385 { 386 public: 387 /// @brief The type of iterator through the source string 388 typedef TCharIter CharIter; 389 390 /// @brief The type of a integer number that indicates the length of string or the distance between characters 391 typedef TDiff Diff; 392 393 public: 394 /// @brief The beginning of the substring that matches 395 CharIter Begin; 396 397 /// @brief The end of the substring that matches 398 CharIter End; 399 }; 400 401 /// @brief A class that contains all the matched quotations 402 /// @param TCharIter The iterator through the source string 403 /// @param TDiff The type of the integer that indicates a string length or a character distance 404 template <class TCharIter=char*, class TDiff=size_t> 405 class MatchResult 406 { 407 public: 408 /// @brief The iterator through the source string 409 typedef TCharIter CharIter; 410 /// @brief The type of the integer that indicates a string length or a character distance 411 typedef TDiff Diff; 412 /// @brief The type of match entries listed in this object 413 typedef MatchQuote<CharIter, Diff> MatchType; 414 415 public: 416 /// @brief A list of matched quotation entries 417 std::vector<MatchType> Matches; 418 419 public: 420 /// @brief Records the beginning of a quotation encountered 421 /// @param index The index of the match entry 422 /// @param iterChar The pointer to the source string where the quotation starts 423 void Open(int index, CharIter iterChar) 424 { 425 while (index >= Matches.size()) 426 { 427 Matches.push_back(MatchType()); 428 } 429 Matches[index].Begin = iterChar; 430 } 431 432 /// @brief Records the end of a quotation encountered 433 /// @param index The index of the match entry 434 /// @param iterChar The pointer to the source string where the quotation ends 435 void Close(int index, CharIter iterChar) 436 { 437 // cell index must have already been allocated in the array of Matches 438 Matches[index].End = iterChar; 439 } 440 }; 441 442 /// @brief A default trait class that provides types needed by Matcher 443 /// @param TChar 444 template <class TStringRef=char*, class TCharIter=char*, class TDiff=size_t, 445 class TStringFunctorSelector=DefaultStringFunctorSelector> 446 struct MatcherTraits 447 { 448 /// @brief The type of the reference to the source string 449 typedef TStringRef StringRef; 450 /// @brief The type of iterator through the characters in the source string 451 typedef TCharIter CharIter; 452 453 /// @brief The type of the match result (matched quotation entry container) 454 typedef MatchResult<TCharIter, TDiff> MatchResultType; 455 /// @brief The type of the reference to the match result 456 typedef MatchResultType & MatchResultRef; 457 458 /// @brief The type of the functor that returns the beginning of a string 459 typedef typename TStringFunctorSelector::template rebind<StringRef, CharIter>::StringBeginFunctor StringBeginFunctor; 460 /// @brief The type of the functor that determines if an iterator is at the end of a string 461 typedef typename TStringFunctorSelector::template rebind<StringRef, CharIter>::StringEndFunctor StringEndFunctor; 462 }; 463 464 /// @brief A wildcard string matcher 465 template <class Traits = MatcherTraits<>> 466 class Matcher 467 { 468 public: 469 /// @brief The type of the reference to the source string 470 typedef typename Traits::StringRef StringRef; 471 /// @brief The type of iterator through the characters in the source string 472 typedef typename Traits::CharIter CharIter; 473 474 /// @brief The type of the reference to the match result 475 typedef typename Traits::MatchResultRef MatchResultRef; 476 477 /// @brief The type of the functor that returns the beginning of a string 478 typedef typename Traits::StringBeginFunctor StringBeginFunctor; 479 /// @brief The type of the functor that determines if an iterator is at the end of a string 480 typedef typename Traits::StringEndFunctor StringEndFunctor; 481 482 private: 483 /// @brief The functor that returns the beginning of a string 484 StringBeginFunctor _stringBegin; 485 486 /// @brief The functor that determines if an iterator is at the end of a string 487 StringEndFunctor _stringEnd; 488 489 public: 490 /// @brief Instantiates a Matcher with the specified string functor instances 491 /// @param stringBegin The functor that returns the beginning of a string 492 /// @param stringEnd The functor that determines if an iterator is at the end of a string 493 Matcher(StringBeginFunctor &stringBegin, StringEndFunctor &stringEnd) 494 : _stringBegin(stringBegin), _stringEnd(stringEnd) 495 { 496 } 497 498 /// @brief Instantiates a Matcher with default settings 499 Matcher() 500 { 501 } 502 503 public: 504 /// @brief Match The source to the pattern 505 /// @param source The source string to match 506 /// @param pattern The pattern to match against 507 /// @param matchResult The container of matched quotation entries 508 /// @return true if the matching is successful (the pattern is completely consumed) 509 template <class TPattern> 510 bool Match(StringRef source, TPattern &pattern, MatchResultRef matchResult) 511 { 512 CharIter iterSource = _stringBegin(source); 513 TPattern::CharIter iterPattern = pattern.GetBegin(); 514 return Match(source, iterSource, pattern, iterPattern, matchResult); 515 } 516 517 /// @brief Match the source to the pattern (recursive) 518 /// @param source The source string to match 519 /// @param iterSource The iterator through the source string at its current position 520 /// @param pattern The pattern to match against 521 /// @param iterPattern The iterator through the pattern string at its current position 522 /// @param matchResult The container of matched quotation entries 523 /// @return true if the matching is successful (the pattern is completely consumed) 524 template <class TPattern> 525 bool Match(StringRef source, CharIter &iterSource, TPattern &pattern, typename TPattern::CharIter &iterPattern, 526 MatchResultRef matchResult) 527 { 528 while (! pattern.IsEnd(iterPattern)) 529 { 530 if (*iterPattern == '\\') 531 { 532 ++iterPattern; 533 } 534 else if (*iterPattern == '*') 535 { 536 CharIter savedIterSource = iterSource; 537 TPattern::CharIter savedIterPattern = iterPattern; 538 // greedy strategy 539 if (!_stringEnd(savedIterSource, source)) 540 { 541 ++iterSource; 542 if (Match(source, iterSource, pattern, savedIterPattern, matchResult)) 543 { 544 return true; 545 } 546 } 547 ++iterPattern; 548 if (Match(source, savedIterSource, pattern, iterPattern, matchResult)) 549 { 550 return true; 551 } 552 return false; 553 } 554 else if (*iterPattern == '?') 555 { 556 if (_stringEnd(iterSource, source)) 557 { 558 return false; 559 } 560 ++iterPattern; 561 ++iterSource; 562 continue; 563 } 564 else if (*iterPattern == '(') 565 { 566 int index = pattern.PatternIterToIndex(iterPattern); 567 matchResult.Open(index, iterSource); 568 ++iterPattern; 569 continue; 570 } 571 else if (*iterPattern == ')') 572 { 573 int index = pattern.PatternIterToIndex(iterPattern); 574 matchResult.Close(index, iterSource); 575 ++iterPattern; 576 continue; 577 } 578 579 if (!_stringEnd(iterSource, source) && *iterPattern == *iterSource) 580 { 581 ++iterPattern; 582 ++iterSource; 583 } 584 else 585 { 586 return false; 587 } 588 } 589 return true; 590 } 591 }; 592 }}} 593 594 #endif
enjoy every minute of an appless, googless and oracless life