re模块
常用匹配模式表
详解
import re
# 匹配字母、数字、下划线
print(re.findall('\w','aAbc123_*()-='))
# ['a', 'A', 'b', 'c', '1', '2', '3', '_']
# 匹配非字母、数字、下划线
print(re.findall('\W','aAbc123_*()-= '))
# ['*', '(', ')', '-', '=', ' ']
# 匹配任意空白字符,等价于[ \t \n \r \f ]
print(re.findall('\s','aA\rbc\t\n12\f3_*()-= '))
# ['\r', '\t', '\n', '\x0c', ' ']
# 匹配任意非空字符
print(re.findall('\S','aA\rbc\t\n12\f3_*()-= '))
# ['a', 'A', 'b', 'c', '1', '2', '3', '_', '*', '(', ')', '-', '=']
# 匹配任意数字,等价于[0-9]
print(re.findall('\d','aA\rbc\t\n12\f3_*()-= '))
# ['1', '2', '3']
# 匹配任意非数字
print(re.findall('\D','aA\rbc\t\n12\f3_*()-= '))
# ['a', 'A', '\r', 'b', 'c', '\t', '\n', '\x0c', '_', '*', '(', ')', '-', '=', ' ']
# 匹配字符串开始
print(re.findall('\Aalex',' alexis alex sb'))
# alex
# []
#匹配字符串结束,如果是存在换行,只匹配到换行前的结束字符串
print(re.findall('sb\Z',' alexis alexsb sb'))
# sb\Z
# ['sb']
print(re.findall('sb\Z',"""alex
alexis
alex
sb
"""))
# []
# 匹配字符串的开头
print(re.findall('^alex','alexis alex sb'))
# ['alex']
# 匹配字符串的末尾
print(re.findall('sb$','alexis alex sb'))
# ['sb']
print(re.findall('sb$',"""alex
alexis
alex
sb
"""))
# ['sb']
print(re.findall('^alex$','alexis alex sb')) # []
print(re.findall('^alex$','al ex')) # []
print(re.findall('^alex$','alex')) # ['alex']
# 重复匹配:| . | * | ? | .* | .*? | + | {n,m} |
# 1、.:匹配任意字符,除了换行符;当re.DOTALL标记被指定时,则可以匹配包括换行符的任意字符
print(re.findall('a.b','a1b a2b a b abbbb a\nb a\tb a*b'))
# a.b
# ['a1b','a2b','a b','abb','a\tb','a*b']
print(re.findall('a.b','a1b a2b a b abbbb a\nb a\tb a*b',re.DOTALL))
# ['a1b', 'a2b', 'a b', 'abb', 'a\nb', 'a\tb', 'a*b']
# 2、*:匹配0个或多个的表达式,左侧字符重复0次或无穷次,性格贪婪
print(re.findall('ab*','a ab abb abbbbbbbb bbbbbbbb'))
# ab*
#['a','ab','abb','abbbbbbbb']
# 3、+:匹配1个或多个的表达式,左侧字符重复1次或无穷次,性格贪婪
print(re.findall('ab+','a ab abb abbbbbbbb bbbbbbbb'))
# ab+
# ['ab', 'abb', 'abbbbbbbb']
# 4、?:左侧字符重复0次或1次,性格贪婪
print(re.findall('ab?','a ab abb abbbbbbbb bbbbbbbb'))
# ab?
# ['a','ab','ab','ab']
# 5、{n,m}:匹配n到m次,左侧字符重复n次到m次,由前面的正则表达式定义的片段,贪婪方式
# {0,} => *
# {1,} => +
# {0,1} => ?
# {n}单独一个n代表只出现n次,多一次不行少一次也不行
print(re.findall('ab{2,5}','a ab abb abbb abbbb abbbbbbbb bbbbbbbb'))
# ab{2,5}
# ['abb','abbb','abbbb','abbbbb]
print(re.findall('\d+\.?\d*',"asdfasdf123as1111111.123dfa12adsf1asdf3"))
# \d+\.?\d* \d+\.?\d+
# ['123', '1111111.123', '12', '1', '3']
# []匹配指定字符一个
print(re.findall('a\db','a1111111b a3b a4b a9b aXb a b a\nb',re.DOTALL))
# ['a3b', 'a4b', 'a9b']
print(re.findall('a[501234]b','a1111111b a3b a4b a9b aXb a b a\nb',re.DOTALL))
# ['a3b', 'a4b']
print(re.findall('a[0-5]b','a1111111b a3b a1b a0b a4b a9b aXb a b a\nb',re.DOTALL))
# ['a3b', 'a1b', 'a0b', 'a4b']
print(re.findall('a[0-9a-zA-Z]b','a1111111b axb a3b a1b a0b a4b a9b aXb a b a\nb',re.DOTALL))
# ['axb', 'a3b', 'a1b', 'a0b', 'a4b', 'a9b', 'aXb']
print(re.findall('a[^0-9a-zA-Z]b','a1111111b axb a3b a1b a0b a4b a9b aXb a b a\nb',re.DOTALL))
# ['a b', 'a\nb']
print(re.findall('a-b','a-b aXb a b a\nb',re.DOTALL))
# ['a-b']
print(re.findall('a[-0-9\n]b','a-b a0b a1b a8b aXb a b a\nb',re.DOTALL))
# ['a-b', 'a0b', 'a1b', 'a8b', 'a\nb']