python 正则表达式

 

#conding:utf-8
__author__ = 'similarface'
import sys
#子字符串选择
print('HK usa china'[5:10])                         #a chi
#字符串可以+
print('similar'+'face')                             #similarface
#匹配
print('Python is %s %s'%('good','language'))        #Python is good language
#formart String
print('Gril {} {}'.format('come','back'))           #Gril come back
#定制格式
print('boy = "%-5s",%+06d' % ('age',18))            #boy = "age  ",+00018
print('gril = "{0:<5}", {1:+06}'.format('lucy', 18))#gril = "lucy ", +00018

str='0123456789'
print(str.find('2'))        #2-->index
print(str.replace('0','!'))     #!123456789
print(str.rjust(13,'*'))        #***0123456789
print(str.isdigit())#True
#sys.stdout.write(('.'*4).join(sys.stdin.read().split('\t')))
print('-'.join(str.split('0')))

import re
text1 = 'Hello boy...World'
text2 = 'Hello gril...You'
print(repr(re.match('Hello(.*)World',text1)))#<_sre.SRE_Match object; span=(0, 17), match='Hello boy...World'>
print(re.match('Hello(.*)World',text1).group(1))# boy...
#提前编译
pattobj=re.compile('Hello(.*)Word')
pattobj.match(text1)
print(re.match('Hello(.*)World',text2))#None

print('----------------')
patt='[ \t]*China[ ]+(.*)[Aa]rab'
line="  China is willing to cooperate with okArab "
mobj=re.match(patt,line)
print(mobj.group(0))
print(mobj.group(1))

strdemo='111--222==333'
print(re.split('--',strdemo))           #['111', '222==333']
print(re.sub('--','...',strdemo))       #111...222==333
print(re.split('--|==',strdemo))        #['111', '222', '333']
print(re.sub('--|==','...',strdemo))    #111...222...333
print(re.split('[+=]','1+2=3'))         #['1', '2', '3']
#(--)|(==)表示一组匹配式
print(re.split('(--)|(==)', strdemo))            #['111', '--', None, '222', None, '==', '333']
#? 这人表示是否存在
print(re.split('(?:--)|(?:==)', 'aaa--bbb==ccc'))#['aaa', 'bbb', 'ccc']

#('spam', 'ham')
re.search('<(.*?)>/?<(.*?)>', 'todays menu: <spam>/<ham>...<eggs><s>').groups()
#[('spam', 'ham'), ('eggs', 'cheese')]
re.findall('<(.*?)>/?<(.*?)>', '<spam>/<ham> ... <eggs><cheese>')
#[]
re.findall('<(.*?)>.*<(.*?)>', '<spam> \n <ham>\n<eggs>')
#[('spam', 'eggs')]
re.findall('(?s)<(.*?)>.*<(.*?)>', '<spam> \n <ham>\n<eggs>') #贪婪模式
#[('spam', 'ham')]
re.findall('(?s)<(.*?)>.*?<(.*?)>', '<spam> \n <ham>\n<eggs>')#非贪婪模式
#{'part1': 'aaa', 'part2': 'bbb', 'part3': 'ccc'}
re.search('(?P<part1>\w*)/(?P<part2>\w*)/(?P<part3>\w*)', '...aaa/bbb/ccc]').groupdict()

#['aaa', 'bbb', 'ccc', 'ddd', 'e&e*e']
line = 'aaa...bbb-ccc / ddd.-/e&e*e'
re.findall('[^ .\-\/]+', line)

print('-----xx------xx------')
pattern,string="A.C","xxABCDxx"
matchobj=re.search(pattern,string)
if matchobj:
    print(matchobj.start())

pattobj = re.compile("A.*C.*")
matchobj=pattobj.search(string)
if matchobj:
    print(matchobj.start())

print(re.search(" *A.C[DE][D-F][^G-ZE]G\t+?","..ABCDEFG\t..").start())
print(re.search("A|XB|YC|ZD", "..AYCD..").start())
print(re.search(r"\bABCD", "..ABCD ").start())
print(re.search(r"ABCD\b", "..ABCD ").start())

###GROUPS###
#每个() 就是一组 从1.开始递增
patt=re.compile("A(.)B(.)C(.)")
mobj=patt.match("A0BsCd")
print(mobj.group(1),mobj.group(2),mobj.group(3))#0 s d

#加了* 表示多个
patt=re.compile("A(.*)B(.*)C(.*)")
mobj=patt.match("AOOBxCssaa")
print(mobj.group(1),mobj.group(2),mobj.group(3))#OO x ssaa

print(re.search("(A|X)(B|Y)(C|Z)D", "..AYCD..").groups())
print(re.search("(?P<a>A|X)(?P<b>B|Y)(?P<c>C|Z)D", "..AYCD..").groupdict())

patt = re.compile(r"[\t ]*#\s*define\s*([a-z0-9_]*)\s*(.*)")
mobj=patt.search(" # define spam 1 + 2 + 3")
print(mobj.groups())                            #('spam', '1 + 2 + 3')

###subst####
print(re.sub('[ABC]','*','DAKCNBEJICD')) #D*K*N*EJI*D
#表示 A_ B_ C_
print(re.sub('[ABC]_','?','Da_JNF_FBB_CSQ'))#Da_JNF_FB?CSQ
#\\1表示匹配的
print(re.sub('(.) spam', 'spam\\1', 'x spam, y spam'))#spamx, spamy

#同上
def mapper(matchobj):
    return 'spam' + matchobj.group(1)
print(re.sub('(.) spam', mapper, 'x spam, y spam'))  

 

posted @ 2016-01-15 11:08  similarface  阅读(432)  评论(0编辑  收藏  举报