Python模块学习------ 正则表达式
import re #f = open('data.txt','r') #for eachline in f.readlines(): #print re.split('\s\s+', eachline) ## !/usr/bin/env python from random import randint, choice from string import lowercase from sys import maxint from time import ctime doms = ('com', 'edu', 'net', 'org', 'gov') for i in range(randint(5, 10)): dtint = randint(0, maxint -1) dtstr = ctime(dtint) shorter = randint (4,7) em = '' for j in range(shorter): em += choice(lowercase) longer = randint (shorter, 12) dn = '' for j in range(longer): dn += choice(lowercase) print "%s::%s@%s.%s::%d-%d-%d" % (dtstr, em, dn, choice(doms), dtint, shorter, longer)
运行结果:
Sat Mar 13 01:58:52 2032::cdrnpl@spvxjivzlzux.net::1962727132-6-12 Wed May 17 00:02:49 2034::cvyyd@nrfry.net::2031408169-5-5 Mon Oct 04 00:23:55 2004::zvkxrhv@fwbdncrl.org::1096820635-7-8 Sat Feb 16 01:46:48 2013::lgkzuz@fvratahza.edu::1360950408-6-9 Fri Aug 13 02:20:00 1976::dvsptdy@szsfeub.edu::208722000-7-7 Sun Dec 01 04:54:59 1991::oqdoh@dcuqagr.gov::691534499-5-7 Thu Oct 11 01:32:08 1990::qaoevt@kvbjqd.edu::655579928-6-6 Tue Aug 04 04:33:58 2009::jnejg@jmzwb.gov::1249331638-5-5 Sun Jul 20 08:11:48 2003::nuohl@svtgnn.net::1058659908-5-6 Tue May 29 12:09:07 2001::pgfzf@segsnmusti.net::991109347-5-10
import re data = 'Mon May 18 01:40:03 2020::mkxvej@pbhhwbxzgk.edu::1589737203-6-10' patt1 = '^(Mon|Tue|Wed|Thu|Fri|Sat|Sun)' s_patt = '-(\d)-' s_search = re.search(s_patt, data) print s_search.group() m_patt_0 = '.+(\d+-\d+-\d+)' m_match_0 = re.match(m_patt_0, data) print m_match_0.group(1) m_patt = '.+?(\d+-\d+-\d+)' m_match = re.match(m_patt, data) print m_match.group(1)
“非贪婪”操作符“?”, 可以用在“*”、“+”、“?”的后面。可以使正则表达式引擎匹配的字符越少越好。
运行结果:
-6- 3-6-10 1589737203-6-10
日行一善, 日写一撰