Long Way To Go 之 Python 5
模块
time&datetime 模块 时间
random 模块 随机数
shutil 模块 文件、文件夹、压缩包
json & pickle 模块 字符串 \ python特有的类型 与 python数据类型间进行转换
shelve 模块 以key,value将内存数据通过文件持久化
xml 模块 不同语言或程序之间进行数据交换,跟json差不多,但json使用起来更简单
configparser 模块 生成和修改常见配置文档
hashlib 模块 加密
logging 模块 记录日志
re 模块 处理字符串
time & datetime 模块
time 模块
import time print(" time applications ".center(80,"*")) print(time.process_time()) # 测量处理器运算时间,不包括sleep时间 print(time.time()) # 时间戳,1970 年的时间到现在,以秒计算 print(time.gmtime()) # utc 时间的sturct time(格林威治时间) print("--------- 本地时间 -----------") print(time.altzone) # 返回与utc时间的时间差,已秒计算 print(time.localtime()) # 返回本地时间的struct time对象格式 print(time.localtime(time.time())) # 返回本地时间的struct time对象格式 print(time.localtime(time.time()+3600*3)) # 修改本地时间并返回struct time对象格式 t1 = time.localtime(time.time()) print(t1.tm_year,t1.tm_yday) # year to day month to day print("----------- 时间格式 ------------") # 返回时间格式 week month day H:M:S year print(time.asctime()) print(time.asctime(time.localtime())) print(time.ctime()) print("---------- 日期字符串 转 时间对象 转 时间戳 -----------") # 日期字符串 转 时间对象 struct_time = time.strptime("2016-11-11 23:30","%Y-%m-%d %H:%M") #struct_time = time.strptime("16-11-11 23:30","%y-%m-%d %H:%M") print(struct_time) # 获取了时间对象,但是不能对其进行运算,得转成时间戳才能运算 struct_time_stamp = time.mktime(struct_time) # 转时间戳 print(struct_time_stamp) print("----------- 时间戳 转 时间对象 转 字符串 -----------") # 时间戳 转 时间对象 struct_time2 = time.localtime(struct_time_stamp) print(struct_time2) # 时间对象 转 字符串 string_time = time.strftime("%Y_%m_%d_%H_%M.log",struct_time2) print(string_time)
***************************** time applications ****************************** 0.156001 1494746244.919801 time.struct_time(tm_year=2017, tm_mon=5, tm_mday=14, tm_hour=7, tm_min=17, tm_sec=24, tm_wday=6, tm_yday=134, tm_isdst=0) --------- 本地时间 ----------- -3600 time.struct_time(tm_year=2017, tm_mon=5, tm_mday=14, tm_hour=8, tm_min=17, tm_sec=24, tm_wday=6, tm_yday=134, tm_isdst=1) time.struct_time(tm_year=2017, tm_mon=5, tm_mday=14, tm_hour=8, tm_min=17, tm_sec=24, tm_wday=6, tm_yday=134, tm_isdst=1) time.struct_time(tm_year=2017, tm_mon=5, tm_mday=14, tm_hour=11, tm_min=17, tm_sec=24, tm_wday=6, tm_yday=134, tm_isdst=1) 2017 134 ---------- 时间格式 ------------ Sun May 14 08:17:24 2017 Sun May 14 08:17:24 2017 Sun May 14 08:17:24 2017 ---------- 日期字符串 转 时间对象 转 时间戳 ----------- time.struct_time(tm_year=2016, tm_mon=11, tm_mday=11, tm_hour=23, tm_min=30, tm_sec=0, tm_wday=4, tm_yday=316, tm_isdst=-1) 1478907000.0 ---------- 时间戳 转 时间对象 转 字符串 ----------- time.struct_time(tm_year=2016, tm_mon=11, tm_mday=11, tm_hour=23, tm_min=30, tm_sec=0, tm_wday=4, tm_yday=316, tm_isdst=0) 2016_11_11_23_30.log
datetime 模块
import datetime print(" datetime applications ".center(80,"*")) print("--------------- 本地时间 ----------------") print(datetime.datetime.now()) # 当前本地时间 print(datetime.datetime.fromtimestamp(time.time())) # 时间戳 直接转 日期格式 #print(datetime.datetime.fromtimestamp(time.time()-3600)) # 时间戳 直接转 日期格式 print("--------------- 时间运算 ----------------") # 时间运算 print(datetime.datetime.now() + datetime.timedelta(days = 3)) # 当前时间 +3 天 print(datetime.datetime.now() - datetime.timedelta(days = 3)) # 当前时间 -3 天 print(datetime.datetime.now() + datetime.timedelta(hours = 3)) # 当前时间 +3 小时 print(datetime.datetime.now() + datetime.timedelta(minutes = 30 )) # 当前时间 +30 分钟 print("--------------- 时间替换 ----------------") # 时间替换 now = datetime.datetime.now() print(now.replace(month=1,day=3))
*************************** datetime applications **************************** --------------- 本地时间 ---------------- 2017-05-14 08:17:24.970803 2017-05-14 08:17:24.970804 --------------- 时间运算 ---------------- 2017-05-17 08:17:24.970803 2017-05-11 08:17:24.970803 2017-05-14 11:17:24.970803 2017-05-14 08:47:24.970803 --------------- 时间替换 ---------------- 2017-01-03 08:17:24.970803
字符串 & 时间戳 的转换:
字符串 ---------------------------------------> 时间对象(stuct_time) ---------------> 时间戳(stuct_time_stamp)
time.strptime("日期字符串内容","想转的日期格式") time.mktime(stuct_time)
时间戳 --------------------------------------> 时间对象(stuct_time) ----------------> 字符串(string_time)
time.gmtime(stuct_time_stamp) time.strftime("想转的日期格式", stuct_time)
or
time.localtime(stuct_time_stamp)
random 模块
生成随机数:
import random print(random.random()) print(random.randint(1,5)) print(random.randrange(1,10)) print(random.sample(range(100),5)) # 100 里随机选5 print(random.sample("abcde",2)) # abcde 里随机选2
string 模块:
import string print(string.ascii_letters) print(string.digits) print(string.hexdigits)
随机验证码:
import random import string # method 1 str_source = string.ascii_letters + string.digits print(''.join(random.sample(str_source,6))) # method 2 checkcode = '' for i in range(4): # 4位 随机数 current = random.randrange(0,4) if current != i: temp = chr(random.randint(65,90)) # chr(65)--> "A" chr(90)--> "Z" else: temp = random.randint(0,9) checkcode += str(temp) print(checkcode)
shutil 模块
a) 文件、文件夹的处理
import shutil f1 = open("random mod.py") f2 = open("random new.py","w") shutil.copyfileobj(f1,f2) # copy data from file-like object fsrc to file-like object fdst, 将文件内容拷贝到另一个文件中 shutil.copyfileobj(fsrc, fdst[, length]) # 可以拷贝部分内容 shutil.copyfile(src, dst)# Copy data from src to dst. 调用的copyfileobj的底层函数,当然前提是目标地址是具备可写权限 shutil.copymode(src,dst) # Copy mode bits from src to dst. 内容、组、用户均不变 shutil.copystat(src, dst) # Copy all stat info (mode bits权限, atime最后访问时间, mtime最后修改时间, flags) from src to dst shutil.copytree(src, dst) # copy一个目录 shutil.copy(r"C:\Users\apple\PycharmProjects\s14\day3\haproxy","test")# Copy data and mode bits. The destination may be a directory.不用打开文件的copy. shutil.copy2(src,dst) # Copy data and all stat info. The destination may be a directory. 内容+权限+状态 shutil.rmtree(path) # 删除文件 shutil.move(src, dst) # 移动文件
b)压缩包处理:
shutil.make_archive(base_name, format,...)
创建压缩包并返回文件路径,例如:zip、tar
- base_name: 压缩包的文件名,也可以是压缩包的路径。只是文件名时,则保存至当前目录,否则保存至指定路径,
如:www =>保存至当前路径
如:/Users/wupeiqi/www =>保存至/Users/wupeiqi/ - format: 压缩包种类,“zip”, “tar”, “bztar”,“gztar”
- root_dir: 要压缩的文件夹路径(默认当前目录)
- owner: 用户,默认当前用户
- group: 组,默认当前组
- logger: 用于记录日志,通常是logging.Logger对象
#shutil.make_archive(base_name, format,...) #将 /Users/wupeiqi/Downloads/test 下的文件打包放置当前程序目录 ret = shutil.make_archive("wwwwwwwwww", format = 'gztar', root_dir='/Users/wupeiqi/Downloads/test') #将 /Users/wupeiqi/Downloads/test 下的文件打包放置 /Users/wupeiqi/目录 ret = shutil.make_archive("/Users/wupeiqi/wwwwwwwwww", format = 'gztar', root_dir='/Users/wupeiqi/Downloads/test')
c) shutil 对压缩包的处理是调用 ZipFile 和 TarFile 两个模块来进行的
ZipFile 模块:
import zipfile # 添加压缩文件 zip_obj = zipfile.ZipFile(r"c:\zip_test.zip","w") zip_obj.write("test") # 添加要压缩的文件 zip_obj.write(r"C:\Users\Adminisrator\PycharmProjects\s14\day5") # 添加只有目录,文件是空的 zip_obj.close() # 解压 zip_obj = zipfile.ZipFile(r"c:\zip_test.zip","r") zip_obj.extractall() zip_obj.close()
TarFile 模块:
# 压缩 tar = tarfile.open(r"c:\your.tar","w") tar.add("C:\Users\Adminisrator\PycharmProjects\s14\day5",arcname = "day5") # 可以添加目录以及内容 tar.add(r"c:\zip_test.zip") tar.close() # 解压 tar = tarfile.open('your.tar','r') tar.extractall() # 可设置解压地址 tar.close()
json & pickle 模块
用于序列化的两个模块
- json,用于字符串 和 python数据类型间进行转换
- pickle,用于python特有的类型 和 python的数据类型间进行转换(eg. 函数)
Json模块提供了四个功能:dumps、dump、loads、load
pickle模块提供了四个功能:dumps、dump、loads、load
PS :
json和pickle读文件只能处理只有一行的文件数据,所以基本用json和pickle读写只处理一次,所以只有dump一次,load一次。
如果想存好几个状态,就只能存好几个文件 或者 使用shelve模块来处理多次
json(反)序列化:
import json info = { "name": "alex", "age":22, } f = open("test.test","w") f.write(json.dumps(info)) # 字典 转 字符串,并写入文件 # = json.dump(info,f) print(type(json.dumps(info))) # str f.close() f2 = open("test.test","r") data = json.loads(f2.read()) # 字符串 转 字典 print(type(data)) # dict print(data["age"]) f.close()
json(反)序列化 之 多dumps/loads:
import json info = { "name": "alex", "age":22, } f = open("test.test","w") f.write(json.dumps(info)) # 可以存多次 info["age"] = 21 f.write(json.dumps(info)) f.close() f2 = open("test.test","r") data = json.loads(f2.read()) # dumps 多次以后,不能使用loads;必须只有dumps一次,loads一次 print(data["age"]) # 如果想存好几个状态,就只能存好几个文件
pickle(反)序列化:
import pickle def sayhi(name): print("hello,",name) info = { "name": "alex", "age":22, "func":sayhi } f = open("test.test","wb") # 二进制文件 f.write(pickle.dumps(info)) # 字典 转 bytes,不是写入的乱码噢 # = pickle.dump(info,f) print(type(pickle.dumps(info))) # bytes f.close() #def sayhi(name): # print("hello,",name) # print("hello2,",name) f2 = open("test.test","rb") data = pickle.loads(f2.read()) # bytes 转 字典 print(type(data)) # dict print(data["func"]("AAA")) f2.close()
shelve 模块
shelve模块是一个简单的 key, value 将内存数据通过文件持久化的模块,可以持久化任何pickle可支持的python数据格式
import shelve def stu_data(name,age): print("register stu",name,age) name = ["alex","roger","vivian"] info = { "name":"alex", "age":22 } d = shelve.open("shelve_test") d["test"] = name # 持久化列表 d["info"] = info d["func"] = stu_data d.close()
import shelve def stu_data(name,age): print("Yoo! register stu",name,age) f = shelve.open("shelve_test") print(f["test"]) print(f["info"]) print(f["func"]("david",23)) f.close()
xml 模块
xml是实现不同语言或程序之间进行数据交换的协议(xml协议在各个语言里都支持),跟json差不多,但json使用起来更简单
至今很多传统公司如金融行业的很多系统的接口还主要是xml
xml的格式:(通过<>节点来区别数据结构)
<?xml version="1.0"?> <data> <country name="Liechtenstein"> <rank updated="yes">2</rank> <year>2008</year> <gdppc>141100</gdppc> <neighbor name="Austria" direction="E"/> <neighbor name="Switzerland" direction="W"/> </country> <country name="Singapore"> <rank updated="yes">5</rank> <year>2011</year> <gdppc>59900</gdppc> <neighbor name="Malaysia" direction="N"/> </country> <country name="Panama"> <rank updated="yes">69</rank> <year>2011</year> <gdppc>13600</gdppc> <neighbor name="Costa Rica" direction="W"/> <neighbor name="Colombia" direction="E"/> </country> </data>
创建/增:
import xml.etree.ElementTree as ET # 创建/增加 namelist = ET.Element("NameList") # 创建根节点 name = ET.SubElement(namelist,"name",attrib={"whatever":"yes","name":"Alex"}) age = ET.SubElement(name,"age") age.text = "22" role = ET.SubElement(name,"role") role.text = "teacher" name2 = ET.SubElement(namelist,"name",attrib={"name":"SanPao"}) age = ET.SubElement(name2,"age") age.text = "22" role = ET.SubElement(name2,"role") role.text = "teacher" et = ET.ElementTree(namelist) #生成文档对象 et.write("text3.xml",encoding="utf-8",xml_declaration=True) ET.dump(namelist) # 打印生成的格式
删除:
import xml.etree.ElementTree as ET for country in root.findall('country'): rank = int(country.find('rank').text) if rank > 50: root.remove(country) # 删除xml文档内容country tree.write('output.xml')
修改:
import xml.etree.ElementTree as ET # 修改 for node in root.iter("year"): new_year = int(node.text) +1 node.text = str(new_year) # 改text node.set("check","yes") # 更属性 tree.write("xmltest.xml")
查找:
import xml.etree.ElementTree as ET tree = ET.parse("test.xml") root = tree.getroot() # 获取分节点 print(root.tag) # 打印分节点标签 #遍历xml文档 for child in root: print(child.tag, child.attrib) for i in child: print("\t",i.tag,i.attrib,i.text) #只遍历year 节点 for node in root.iter('year'): print(node.tag,node.attrib,node.text) # or for child in root: print(child.tag, child.attrib) for i in child.iter("year"): print(i.tag,i.attrib,i.text)
configparser 模块
用于生成和修改常见配置文档
软件的常见文档格式:
[DEFAULT] ServerAliveInterval = 45 Compression = yes CompressionLevel = 9 ForwardX11 = yes [bitbucket.org] User = hg [topsecret.server.com] Port = 50022 ForwardX11 = no
创建/增:
import configparser # 创建/增加 config = configparser.ConfigParser() config["DEFAULT"] = {'ServerAliveInterval': '45', 'Compression': 'yes', 'CompressionLevel': '9'} config['DEFAULT']['ForwardX11'] = 'yes' # 增加的 config['bitbucket.org'] = {} config['bitbucket.org']['User'] = 'hg' config['topsecret.server.com'] = {} topsecret = config['topsecret.server.com'] topsecret['Host Port'] = '50022' # mutates the parser topsecret['ForwardX11'] = 'no' # same here sec = config.add_section('wupeiqi') # 增加的 with open('config.ini', 'w') as f: config.write(f)
删除:
import configparser # 删除 config.remove_option(section_name,"forwardx11") config.write(open("config2.ini","w"))
修改:
import configparser # 修改 config.set(section_name,"host port","3000") config.write(open("config2.ini","w"))
查找:
import configparser # 读/查 config = configparser.ConfigParser() config.read("config.ini") print(config.sections()) #print(config.defaults()) print(config.has_section('wupeiqi')) # or print(config['bitbucket.org']['User']) # or print(config.sections()[1]) section_name = config.sections()[1] print(config[section_name]["host port"]) print(config.options(section_name)) # 打印自己的和全局的key # 循环 for i in config["bitbucket.org"]: # 循环自己的和全局的key print(i) for i,v in config["bitbucket.org"].items(): # 打印自己的和全区的key+value print(i,v) # or print(config.items(section_name)) # 打印自己的和全区的key+value
hashlib 模块
用于加密相关的操作
主要提供 SHA1, SHA224, SHA256, SHA384, SHA512 ,MD5 算法:
import hashlib # 不同的算法 # ######## md5 ######## hash = hashlib.md5() hash.update(b'admin') print(hash.hexdigest()) # ######## sha1 ######## hash = hashlib.sha1() hash.update(b'admin') print(hash.hexdigest()) # ######## sha256 ######## 这个用的比较多, 比md5安全 hash = hashlib.sha256() hash.update(b'admin') print(hash.hexdigest()) # ######## sha384 ######## hash = hashlib.sha384() hash.update(b'admin') print(hash.hexdigest()) # ######## sha512 ######## 比较慢 hash = hashlib.sha512() hash.update(b'admin') print(hash.hexdigest())
ps:
import hashlib >>>m = hashlib.md5() >>>m.update(b"alex") >>>print(m.hexdigest()) 534b44a19bf18d20b71ecc4eb77c572f >>> >>>m.update(b"li") >>>print(m.hexdigest()) 5f48164ebf9ea14d675ff31bce71c7da >>> >>> >>> >>>m2 = hashlib.md5() >>>m2.update(b"alexli") >>>print(m2.hexdigest()) 5f48164ebf9ea14d675ff31bce71c7da # 与上面分开update的“alexli”一致
hmac模块:
散列消息鉴别码,简称HMAC,用于网络消息加密传输。双方先要约定好key,然后P1用key加密并发送,P2用key + 消息明文进行加密,再与P1的消息对比,来验证消息的真实性
import hmac h = hmac.new(b'salt', b'hello') # key,msg,digestmod print(h.hexdigest())
logging 模块
提供了标准的日志接口,存储各种格式的日志。
logging的日志可以分为 debug()
, info()
, warning()
, error(),
critical() 5个级别
Level | When it’s used |
---|---|
DEBUG |
Detailed information, typically of interest only when diagnosing problems. |
INFO |
Confirmation that things are working as expected. |
WARNING |
An indication that something unexpected happened, or indicative of some problem in the near future (e.g. ‘disk space low’). The software is still working as expected. |
ERROR |
Due to a more serious problem, the software has not been able to perform some function. |
CRITICAL |
A serious error, indicating that the program itself may be unable to continue running. |
把日志写到文件:
import logging # 日志写到文件 logging.basicConfig(filename = "app.log",level = logging.DEBUG) logging.basicConfig(filename = "app.log",level = logging.WARNING) # 往后追加,只追加WARNING及WARNING以后的 logging.basicConfig(filename = "app.log", level = logging.WARNING, format = "%(asctime)s %(filename)s: %(lineno)d %(funcName)s %(levelname)s: %(message)s", datefmt='%m/%d/%Y %I:%M:%S %p') logging.debug("test debug") logging.info("test") # 单独打印没有输出,是因为模块有默认的日志级别 logging.warning("user [alex] attempted wrong password more than 3 times") logging.error("test error") logging.critical("server is down") def app_run(): logging.warning("app has been run too long") app_run()
关于日志格式:
%(name)s |
Logger的名字 |
%(levelno)s |
数字形式的日志级别 |
%(levelname)s |
文本形式的日志级别 |
%(pathname)s |
调用日志输出函数的模块的完整路径名,可能没有 |
%(filename)s |
调用日志输出函数的模块的文件名 |
%(module)s |
调用日志输出函数的模块名 |
%(funcName)s |
调用日志输出函数的函数名 |
%(lineno)d |
调用日志输出函数的语句所在的代码行 |
%(created)f |
当前时间,用UNIX标准的表示时间的浮 点数表示 |
%(relativeCreated)d |
输出日志信息时的,自Logger创建以 来的毫秒数 |
%(asctime)s |
字符串形式的当前时间。默认格式是 “2003-07-08 16:49:45,896”。逗号后面的是毫秒 |
%(thread)d |
线程ID。可能没有 |
%(threadName)s |
线程名。可能没有 |
%(process)d |
进程ID。可能没有 |
%(message)s |
用户输出的消息 |
既又写入文件,又在屏幕上打印:
import logging ### 既又写入文件,又在屏幕输出,同时 ### # create logger logger = logging.getLogger('TEST-LOG') logger.setLevel(logging.DEBUG) # 设置最低的等级 # create console handler and set level to debug ch = logging.StreamHandler() # 屏幕 ch.setLevel(logging.WARNING) # create file handler and set level to warning fh = logging.FileHandler("access.log",encoding="utf-8") # 日志 fh.setLevel(logging.ERROR) # create formatter ch_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') # 输出的屏幕格式 fh_formatter = logging.Formatter("%(asctime)s %(filename)s: %(lineno)d -- %(levelname)s: %(message)s")# 日志格式 # add formatter to ch and fh ch.setFormatter(ch_formatter) fh.setFormatter(fh_formatter) # 告诉logger 去两个地址去输出 # add ch and fh to logger logger.addHandler(ch) logger.addHandler(fh) logger.warning("warning commit") logger.error("error happend")
文件自动截断:
import logging
from logging import handlers
logger = logging.getLogger("TEST") #
log_file = "timelog.log"
#fh = handlers.RotatingFileHandler(filename=log_file,maxBytes=10,backupCount=3,encoding="utf-8") # 按大小截断
fh = handlers.TimedRotatingFileHandler(filename=log_file,when="S",interval=5,backupCount=3,encoding="utf-8") # 按时间截断
# interval是时间间隔。
# when参数是一个字符串。表示时间间隔的单位,不区分大小写。它有以下取值:
# S 秒
# M 分
# H 小时
# D 天
# W 每星期(interval==0时代表星期一)
# midnight 每天凌晨
formatter = logging.Formatter('%(asctime)s %(module)s:%(lineno)d %(message)s')
fh.setFormatter(formatter)
logger.addHandler(fh)
import time
logger.warning("test1")
time.sleep(2)
logger.warning("test12")
time.sleep(2)
logger.warning("test13")
logger.warning("test14")
time.sleep(2)
logger.warning("test15")
logger.warning("test16")
re(正则表达式) 模块
经常用来处理字符串
常用正则表达式符号:
import re data = "c3R1c2FABCkabbbbb͸abaackjkjdb89y2abca" print(re.search("^c3",data)) # 匹配字符串开头 print(re.search("ca$",data)) # 匹配字符串结尾 print(re.findall("ab*",data)) # 匹配*号前的字符0次或多次 print(re.findall("ab+",data)) # 匹配前一个字符1次或多次 print(re.findall("ab?",data)) # 匹配前一个字符0次或1次 print(re.findall("ab{2}",data)) # 匹配前一个字符m次 print(re.findall("ab{1,5}",data)) # 匹配前一个字符n到m次 print(re.findall("abc|ABC",data)) # 匹配|左或右的字符 print(re.search("abc|ABC","ABCBCDabc").group()) # 字符串里哪个先出现就是哪个 print(re.search("(abc){2}a(123|456)c", "abcabca456c").group())# 分组匹配 print("----------------------------------------------------") print(re.search("\Aalex","alexabc").group()) # 匹配字符开头,同^ print(re.search("abc\Z","alexabc").group()) # 匹配字符结尾,同$ print(re.findall(".","a7b4c5jhh6986")) # 默认匹配除\n之外的任意一个字符 print(re.findall(".","a7b4c5jh\nh69\n86",flags = re.S)) print(re.findall("\d","a7b4c5jhh6986")) # 匹配数字0-9 print(re.findall("\d+","a7b4c5jhh6986")) # 匹配数字0-9 print(re.findall("\D","a7b4c5jhh6986")) #匹配非数字 print(re.findall("\w","#a7b4c5jhh6986%")) # 匹配[A-Za-z0-9] print(re.findall("\W","#a7b4c5jhh6986%")) # 匹配非[A-Za-z0-9] 特殊字符 print(re.findall("\s","ab\tc1\n3")) # 匹配空白字符、\t、\n、\r , 结果 '\t' print(re.search("\s+","ab\tc1\n3").group()) print("----------------------------------------------------\n") print(re.search("(?P<province>\d{4})(?P<city>\d{2})(?P<birthday>\d{4})","371481199306143242").groups()) # 分组匹配 print(re.search("(?P<province>\d{4})(?P<city>\d{2})(?P<birthday>\d{4})","371481199306143242").groupdict("city")) # 分组匹配
<_sre.SRE_Match object; span=(0, 2), match='c3'> <_sre.SRE_Match object; span=(39, 41), match='ca'> ['abbbbb', 'ab', 'a', 'a', 'ab', 'a'] ['abbbbb', 'ab', 'ab'] ['ab', 'ab', 'a', 'a', 'ab', 'a'] ['abb'] ['abbbbb', 'ab', 'ab'] ['ABC', 'abc'] ABC abcabca456c ---------------------------------------------------- alex abc ['a', '7', 'b', '4', 'c', '5', 'j', 'h', 'h', '6', '9', '8', '6'] ['a', '7', 'b', '4', 'c', '5', 'j', 'h', '\n', 'h', '6', '9', '\n', '8', '6'] ['7', '4', '5', '6', '9', '8', '6'] ['7', '4', '5', '6986'] ['a', 'b', 'c', 'j', 'h', 'h'] ['a', '7', 'b', '4', 'c', '5', 'j', 'h', 'h', '6', '9', '8', '6'] ['#', '%'] ['\t', '\n'] ---------------------------------------------------- ('3714', '81', '1993') {'province': '3714', 'city': '81', 'birthday': '1993'}
常用的匹配语法:
import re # findall print(re.findall("\d+","c3R1c2FkamZoYWpza2ZuYXNkZndoZWlxa253ZXdpNzI5Mzg3NDkyNHdoZTkyM28yandlZgo"))# 找出按要求所有的对象 并 返回一个list tt = "Tina is a good girl, she is cool, clever, and so on..." print(re.findall('\w*oo\w*',tt)) #print(re.findall(r'\w*oo\w*',tt)) # match print(re.match("com","comwww.runcomoob").group())# 从头开始match,开头如果有,返回match的对象;开头如果没有,则出错 # search print(re.search('\dcom','www.4comrunoob.5com').group()) # 在字符串里查找,找到第一个匹配值就返回这个值,没找到就出错 print(re.search("(\d{1,3}\.){3}\d{1,3}","inet 地址: 192.168.12.55 广播:192.168.12.255").group()) # split print(re.split('\d+','one1two22three3four4five5')) # 按要求隔开字符串 # sub text = "Jason is a handsome boy." print(re.sub(r'\s+', '-', text)) print(re.sub(r'\s+', '-', text,count=2)) # re.sub(pattern, repl, string, count) # 其中第二个函数是替换后的字符串;本例中为'-' # 第四个参数指替换个数。默认为0,表示每个匹配项都替换 # 反斜杠 print(re.split("\\\\",r"c:\user\apple\PycharmProjects")) # ps print(re.split('a','ABC',flags = re.I)) # 忽略大小写 print(re.search(r"^a","\nabc\neee",flags=re.MULTILINE))# 多行模式
['3', '1', '2', '2', '253', '5', '3', '28'] ['good', 'cool'] com 4com 192.168.12.55 ['one', 'two', 'three', 'four', 'five', ''] Jason-is-a-handsome-boy. Jason-is-a handsome boy. ['c:', 'user', 'apple', 'PycharmProjects'] ['', 'BC'] <_sre.SRE_Match object; span=(1, 2), match='a'>