Linux C下的正则表达式



/* Type for byte offsets within the string. POSIX mandates this. */
typedef int regoff_t;

typedef struct
  regoff_t rm_so;   /* Byte offset from string's start to substring's start. */
  regoff_t rm_eo;   /* Byte offset from string's start to substring's end. */
} regmatch_t;

# define __RE_TRANSLATE_TYPE unsigned char *
# ifdef __USE_GNU
# endif

#ifdef __USE_GNU
# define __REPB_PREFIX(name) name
# define __REPB_PREFIX(name) __##name

struct re_pattern_buffer
/* Space that holds the compiled pattern. It is declared as
`unsigned char *' because its elements are sometimes used as
array indexes. */
  unsigned char *__REPB_PREFIX(buffer);

/* Number of bytes to which `buffer' points. */
  unsigned long int __REPB_PREFIX(allocated);

/* Number of bytes actually used in `buffer'. */
  unsigned long int __REPB_PREFIX(used);

/* Syntax setting with which the pattern was compiled. */
  reg_syntax_t __REPB_PREFIX(syntax);

/* Pointer to a fastmap, if any, otherwise zero. re_search uses the
fastmap, if there is one, to skip over impossible starting points
for matches. */
  char *__REPB_PREFIX(fastmap);

/* Either a translate table to apply to all characters before
comparing them, or zero for no translation. The translation is
applied to a pattern when it is compiled and to a string when it
is matched. */

/* Number of subexpressions found by the compiler. */
  size_t re_nsub;

/* Zero if this pattern cannot match the empty string, one else.
Well, in truth it's used only in `re_search_2', to see whether or
not we should use the fastmap, so we don't set this absolutely
perfectly; see `re_compile_fastmap' (the `duplicate' case). */
  unsigned __REPB_PREFIX(can_be_null) : 1;

/* If REGS_UNALLOCATED, allocate space in the `regs' structure
for `max (RE_NREGS, re_nsub + 1)' groups.
If REGS_REALLOCATE, reallocate space if necessary.
If REGS_FIXED, use what's there. */
  #ifdef __USE_GNU
  # define REGS_REALLOCATE 1
  # define REGS_FIXED 2
  unsigned __REPB_PREFIX(regs_allocated) : 2;

/* Set to zero when `regex_compile' compiles a pattern; set to one
by `re_compile_fastmap' if it updates the fastmap. */
  unsigned __REPB_PREFIX(fastmap_accurate) : 1;

/* If set, `re_match_2' does not return information about
subexpressions. */
  unsigned __REPB_PREFIX(no_sub) : 1;

/* If set, a beginning-of-line anchor doesn't match at the beginning
of the string. */
  unsigned __REPB_PREFIX(not_bol) : 1;

/* Similarly for an end-of-line anchor. */
  unsigned __REPB_PREFIX(not_eol) : 1;

/* If true, an anchor at a newline matches. */
  unsigned __REPB_PREFIX(newline_anchor) : 1;

typedef struct re_pattern_buffer regex_t;

 Linux C 使用reg 一般步骤:

编译  regcomp()
匹配  regexec()
释放  regfree()


int regcomp(regex_t *preg, const char *pattern, int cflags);             //编译
int regexec(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);   //匹配
size_t regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size);
void regfree(regex_t *preg);        //释放

regex regHead;


regcomp(&regHead, "(.?)xml", REG_EXTENDED);

static CHAR str[MAX_STR_LINE];

regmatch_t pmatch[2];


regexec(&regHead, str, 2, pmatch, 0) == 0

regmatch_t 是一个结构体数据类型,在regex.h中定义:成员rm_so 存放匹配文本串在目标串中的开始位置,rm_eo 存放结束位置。




pmatch[0].rm_so和pmatch[0].rm_eo代表主正则表达式的启止位置(从x的前一个字符  到  字符l的后一个字符 ),pmatch[1].rm_so和pmatch[1].rm_eo代表子正则表达式的启止位置(从x的前一个字符  到  字符x)。


void regfree (regex_t *compiled)


 * return zero if the regular expression matches; otherwise, it returns a nonzero value.
 * MSGDEF regular one preChar, otherwise regular afterStr in "".
 * pmatch[0].rm_so, pmatch[0].rm_eo represent all subStr's start and end[close&open rule] without first blank space.
 * pmatch[n].rm_so, pmatch[n].rm_eo represent one subStr's start and end[close&open rule] if n bigger than zero.

 : 正则匹配的是满足条件的最后一个str ,在使用strstr实现的时候应注意,strstr是匹配第一个str


1. C语言用regcomp、regexec、regfree和regerror函数实现正则表达式校验 

2. C语言正则表达式详解 regcomp() regexec() regfree()用法详解


posted @ 2021-03-25 14:23  Lunais  阅读(550)  评论(0编辑  收藏  举报