正则表达式的使用

1.正则表达式介绍

#严格的字符匹配示例
import re

string = '1. A small sentence. - 2. Another tiny sentence. '

#查询
re.findall('sentence', string)
re.search('sentence', string)
re.match('1. A small sentence', string)
#替换
re.sub(pattern='small', repl='large', string=string)
re.sub(pattern='small', repl='', string=string)

In [ ]:

string = 'small smell smll smsmll sm3ll sm.ll sm?ll sm\nll sm\tll'

#能代表除换行符“\n”任意一个字符;
re.findall('sm.ll', string)
#被包含在中括号内部,任何中括号内的字符都会被匹配;
re.findall('sm[asdbf]ll', string)
re.findall('sm[a-zA-Z0-9]ll', string)
#转义字符\
re.findall('sm\.ll', string)
re.findall('sm[.\- ?]ll', string)
#该字符被视为OR操作
re.findall('small|smell', string)
re.findall('sm\wll', string)

In [ ]:

re.findall('sm..ll', string)
#匹配次数
re.findall('sm.{2}ll', string)
re.findall('sm.{1,2}ll', string)
re.findall('sm.{1,}ll', string)
re.findall('sm.?ll', string)  # {0,1}
print(re.findall('sm.+ll', string))  # {0,}
print(re.findall('sm.*ll', string))  # {1,}
re.findall('sm\?ll', string)

In [ ]:

#使用正则表达式提取电话号码
import pandas as pd
rawdata = '555-1239Moe Szyslak(636) 555-0113Burns, C.Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson,Homer5553642Dr. Julius Hibbert'
names = re.findall('[A-Z][A-Za-z,. ]*', rawdata)
tels = re.findall('\(?[0-9]{0,3}\)?[ \-]?[0-9]{3}[ \-]?[0-9]{4}', rawdata)
pd.DataFrame({'Names': names, 'TelPhone': tels})

2.参考文章

【创作不易,望点赞收藏,若有疑问,请留言,谢谢】

posted @ 2022-07-01 15:29  东血  阅读(62)  评论(0编辑  收藏  举报

载入天数...载入时分秒...