re模块
re模块
一、导入方式
import re
二、作用
从字符串里找特定的字符串
三、基本语法
^
匹配开头
s = 'yjyyjyijksodh'
print(re.findall('^yjy',s))
----------------------------------------------------------
yjy
$
匹配结尾
s = 'yjyyjyijksodh'
print(re.findall('dh$',s))
----------------------------------------------------------
dh
[]
匹配中间的字符
s = 'acefghjkacefsdfsdf'
print(re.findall('[acef]', s)) # 只要单个字符
-------------------------------------------------------------
['a', 'c', 'e', 'f', 'a', 'c', 'e', 'f', 'f', 'f']
^[]
^对[]里面的元素取反
s = 'acefghjkacefsdfsdf'
print(re.findall('[^acef]', s))
------------------------------------------------------
['g', 'h', 'j', 'k', 's', 'd', 's', 'd']
.
任意字符(除了\n)
s = 'acefghjkacefsdfsdf'
print(re.findall('a..', s))
------------------------------------------------------
['aba', 'ada']
*
前面的字符0-无穷个
s = 'abaacaaaaa'
print(re.findall('a*', s))
------------------------------------------------------
['a', '', 'aa', '', 'aaaaa', '']
+
前面的字符1-无穷个
s = 'abaacaaaaa'
print(re.findall('a+', s))
------------------------------------------------------
['a', 'aa', 'aaaaa']
?
前面的字符0-1个
s = 'abaacaaaaa'
print(re.findall('a?', s))
------------------------------------------------------
['a', '', 'a', 'a', '', 'a', 'a', 'a', 'a', 'a', '']
{m}
前面的字符m个
s = 'abaacaaaaa'
print(re.findall('a{5}', s))
------------------------------------------------------
['aaaaa']
{m,n}
前面的字符m-n个
s = 'abaacaaaaa'
print(re.findall('a{2,5}', s))
------------------------------------------------------
['aa', 'aaaaa']
\d
数字
s = 's 1 s+\n=$\t2_s 3'
print(re.findall('\d', s)
------------------------------------------------------
['1', '2', '3']
\D
非数字
s = 's 1 s+\n=$\t2_s 3'
print(re.findall('\D', s)
------------------------------------------------------
['s', ' ', ' ', ' ', ' ', ' ', 's', '+', '\n', '=', '$', '\t', '_', 's', ' ', ' ']
\w
数字/字母/下划线
s = 's 1 s+\n=$\t2_s 3'
print(re.findall('\w', s))
------------------------------------------------------
['s', '1', 's', '2', '_', 's', '3']
\W
非数字/字母/下划线
s = 's 1 s+\n=$\t2_s 3'
print(re.findall('\W', s))
------------------------------------------------------
[' ', ' ', ' ', ' ', ' ', '+', '\n', '=', '$', '\t', ' ', ' ']
\s
空格/\t/\n
s = 's 1 s+\n=$\t2_s 3'
print(re.findall('\s', s))
------------------------------------------------------
[' ', ' ', ' ', ' ', ' ', '\n', '\t', ' ', ' ']
\S
非空格/\t/\n
s = 's 1 s+\n=$\t2_s 3'
print(re.findall('\S', s))
------------------------------------------------------
['s', '1', 's', '+', '=', '$', '2', '_', 's', '3']
\
取消意义
s = 'aba\d'
print(re.findall(r'a\\d', s))
------------------------------------------------------
['a\\d']
.*
贪婪模式(最大化),找到继续找,让结果最大化
s = 'abbbcabc'
print(re.findall('a.*c', s))
------------------------------------------------------
['abbbcabc']
.*?
非贪婪模式(最小化),找到就马上停止
s = 'abbbcabc'
print(re.findall('a.*?c', s))
------------------------------------------------------
['abbbc', 'abc']
()
只要括号内的
s = 'abacad'
print(re.findall('a(.)', s))
------------------------------------------------------
['b', 'c', 'd']
A|B
A和B都要
s = 'abacad'
print(re.findall('a|b', s))
------------------------------------------------------
['a', 'b', 'a', 'a']
四、模块方法
re.mathch(): 从开头搜索,搜索到了就有,没搜索到就是none
s = 'abc123\ndef456'
res = re.match('\d+', s) #从开头搜索数字,搜索到了就有,没搜索到就是none
print(res)
----------------------------------------------
None
s = '123abc123\ndef456'
res = re.match('\d+', s)
print(res) #返回的是一个对象
print(res.group()) #对象必须用group()返回
-----------------------------------------------------
<re.Match object; span=(0, 3), match='123'>
re.search(): 搜索第一个匹配结果,找到了就不找了
s = '123abc123\ndef456'
res = re.search('\d+', s)
print(res)
print(res.group())
------------------------------------------------------
123
re.split(): 按照匹配规则切割
s1 = 'abc324asdfk234lkjsf324lkj'
print(re.split('\d+', s1))
-----------------------------------------------
['abc', 'asdfk', 'lkjsf', 'lkj']
re.sub(): 按照匹配规则替换(重点)
s1 = 'abc324asdfk234lkjsf324lkj'
print(re.sub('\d+', '***', s1))
-----------------------------------------------
abc***asdfk***lkjsf***lkj
re.subn(): 按照匹配规则替换,并计数
s1 = 'abc324asdfk234lkjsf324lkj'
print(re.subn('\d+', '***', s1))
-----------------------------------------------
('abc***asdfk***lkjsf***lkj', 3)