Python笔记（七）：内置模块

time & datetime 模块

在python中，与时间处理有关的模块有：time ，datetime ，calendar

一、在python中，通常有这几种方式来表示时间：

1、时间戳

2、格式化的时间字符串

3、元组（struct_time）共9个元素。python的time模块的实现主要调用C库

二、几个定义

UTC（Coordinated Universal Time，世界协调时）亦即格林威治天文时间，世界标准时间。在中国为UTC+8。

DST（Daylight Saving Time）夏令时

时间戳（timestamp）表示从1970年1月1日00：00：00开始按秒计算的偏移量。运行time.time()返回的是float类型

元组（struct_time）的方式：struct_time元组共有9个元素

　　time.struct_time(tm_year=2019, tm_mon=2, tm_mday=27, tm_hour=4, tm_min=57, tm_sec=32, tm_wday=2, tm_yday=58, tm_isdst=0)

　　（年，月，日，时，分，秒，星期，一年中的第几天，是否夏令时）

time模块的方法

 1 import time
 2 
 3 print(time.localtime())     # 把一个时间戳转换为当前时区的struct_time。参数未提供，以当前时间为准
 4 # time.struct_time(tm_year=2019, tm_mon=2, tm_mday=27, tm_hour=12, tm_min=56, tm_sec=7, tm_wday=2, tm_yday=58, tm_isdst=0)
 5 
 6 print(time.gmtime())    # 把一个时间戳转换为UTC时区（0时区）的struct_time
 7 # time.struct_time(tm_year=2019, tm_mon=2, tm_mday=27, tm_hour=4, tm_min=57, tm_sec=32, tm_wday=2, tm_yday=58, tm_isdst=0)
 8 
 9 print(time.time())      # 返回当前时间戳
10 # 1551243500.973885
11 
12 print(time.mktime((2019, 2, 27, 4, 57, 32, 2, 58, 0)))  # 把一个struct_time转换为时间戳
13 # 1551214652.0
14 print(time.mktime(time.gmtime()))
15 
16 time.sleep(1)   # 线程推迟指定的时间运行。单位是秒
17 
18 
19 print(time.asctime((2019, 2, 27, 4, 57, 32, 2, 58, 0)))     # 把一个表示时间的元组或struct_time 表示为‘Wed Feb 27 04:57:32 2019’格式。如果没参数，传入当前时间
20 # Wed Feb 27 04:57:32 2019
21 print(time.asctime())
22 # Wed Feb 27 13:07:27 2019
23 
24 print(time.ctime(1551244046.412985))    # 把一个时间戳转化为time.asctime()形式。参数未给或为None时，默认time.time()为参数
25 # Wed Feb 27 13:07:26 2019

time.strftime(format[,t]) 　　# 把一个代表时间的元组或者struct_time转化为格式化的时间字符串。如果t未指定，将传入time.localtime()
time.strptime(string[,format]) 　　 # strftime() 的逆操作

>>> struct_time
time.struct_time(tm_year=2019, tm_mon=2, tm_mday=28, tm_hour=13, tm_min=24, tm_sec=23, tm_wday=3, tm_yday=59, tm_isdst=0)
>>> time.mktime(struct_time)
1551331463.0
>>> struct_time2 = time.gmtime(1551331463.0)
>>> struct_time2
time.struct_time(tm_year=2019, tm_mon=2, tm_mday=28, tm_hour=5, tm_min=24, tm_sec=23, tm_wday=3, tm_yday=59, tm_isdst=0)
>>> format_time = time.strftime('%Y-%m-%d %H:%M:%S',struct_time2)
>>> format_time
'2019-02-28 05:24:23'
>>> time.strptime(format_time, '%Y-%m-%d %H:%M:%S')
time.struct_time(tm_year=2019, tm_mon=2, tm_mday=28, tm_hour=5, tm_min=24, tm_sec=23, tm_wday=3, tm_yday=59, tm_isdst=-1)
>>>

Commonly used format codes:
    
    %Y  Year with century as a decimal number.
    %m  Month as a decimal number [01,12].
    %d  Day of the month as a decimal number [01,31].
    %H  Hour (24-hour clock) as a decimal number [00,23].
    %M  Minute as a decimal number [00,59].
    %S  Second as a decimal number [00,61].
    %z  Time zone offset from UTC.
    %a  Locale's abbreviated weekday name.
    %A  Locale's full weekday name.
    %b  Locale's abbreviated month name.
    %B  Locale's full month name.
    %c  Locale's appropriate date and time representation.
    %I  Hour (12-hour clock) as a decimal number [01,12].
    %p  Locale's equivalent of either AM or PM.

datetime 模块

datetime模块定义的类：

　　datetime.date：表示日期的类。常用的属性有year，month，day；

　　datetime.time：表示时间的类。常用属性有hour，minute，second，microsecond；

　　datetime.datetime：表示日期的类。

　　datetime.timedelta：表示时间间隔，即两个时间点之间的长度。

　　datetime.tzinfo：与时区有关的相关信息

>>> import datetime    
>>> d = datetime.datetime.now()　　# 返回当前的datetime日期类型
>>> d
datetime.datetime(2019, 2, 28, 23, 35, 11, 504424)
>>> d.timestamp()
1551368111.504424
>>> d.today()
datetime.datetime(2019, 2, 28, 23, 35, 56, 315996)
>>> d.year
2019
>>> d.timetuple()
time.struct_time(tm_year=2019, tm_mon=2, tm_mday=28, tm_hour=23, tm_min=35, tm_sec=11, tm_wday=3, tm_yday=59, tm_isdst=-1)

>>> datetime.date.fromtimestamp(3222222)　　# 把一个时间戳转化成datetime日期类型
datetime.date(1970, 2, 7)

时间运算

datetime.datetime(2019, 2, 28, 23, 35, 11, 504424)
>>> datetime.datetime.now()
datetime.datetime(2019, 2, 28, 23, 46, 20, 742818)
>>> datetime.datetime.now() + datetime.timedelta(4)　　# 当前时间 + 4天
datetime.datetime(2019, 3, 4, 23, 47, 11, 259892)
>>> datetime.datetime.now() + datetime.timedelta(hours=4)　　# 当前时间 + 4小时
datetime.datetime(2019, 3, 1, 3, 47, 30, 426140)

时间替换

>>> d.replace(year=2020,month=3,day=15)
datetime.datetime(2020, 3, 15, 23, 35, 11, 504424)

random模块

>>> import random
>>> 
>>> random.randrange(1,10)  # 返回1-10之间的一个随机数，不包括10  
8
>>> random.randint(1,10)    # 返回1-10之间的一个随机数，包括10  
4
>>> random.randrange(0,100,2)    # 随机选取0到100之间的偶数
4
>>> random.random()    # 返回一个随机浮点数
0.4898701529400532
>>> random.choice('jdhfdfjh483687w%^%#')    # 返回一个给定数据集合中的随机字符
'4'
>>> random.sample('asdfghwertyu',3)    # 从多个字符中选取特定数量的随机字符
['h', 'g', 'r']
>>> 
# 生成随机字符串
>>> import string
>>> ''.join(random.sample(string.ascii_lowercase + string.digits, 6))
'0b2he5'
>>> 
# 洗牌
>>> a = list(range(10))
>>> a
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
>>> random.shuffle(a)
>>> a
[1, 7, 0, 9, 6, 5, 3, 2, 8, 4]
>>>

os模块

>>> import os
>>>
>>> os.getcwd()    # 返回当前目录
'/home/wxg'
>>> os.listdir()    # 返回指定目录下的所有文件和目录名
['.mozilla', 'examples.desktop', '图片', '.profile', '公共的', '.macromedia', '视频', '音乐', '.bashrc', 'b.db', '.xinputrc', '.local', '下载', '.sudo_as_admin_successful', 'a', '.ICEauthority', '.rpmdb', '.cache', '.viminfo', 'b', '.PyCharm2018.3', '.python_history', '文档', '.gnupg', '桌面', '.bash_history', '.bash_logout', 'a.txt', '.java', '模板', '.config']
>>>
>>> os.remove('a.txt')    # 删除文件
>>>
>>> os.path.isfile('a')    # 检测给出的路径是否是一个文件
False
>>> os.path.isdir('a')    # 检测给出的路径是否是一个目录
True
>>> os.path.isdir('/home/wxg/a')
True
>>> os.path.isabs('/home/wxg/a')    # 检测是否绝对路径
True
>>> os.path.exists('a')    # 检测路径是否存在
True
>>> os.path.split('b.db')    # 返回一个路径的目录名和文件名
('', 'b.db')
>>> os.path.split('/home/wxg/b.db')
('/home/wxg', 'b.db')
>>> os.path.splitext('b.db')    # 分离扩展名
('b', '.db')
>>> os.path.dirname('a')    # 获取路径名
''
>>> os.path.dirname('/home/wxg/a')
'/home/wxg'
>>> os.path.abspath('a')    # 获取绝对路径
'/home/wxg/a'
>>> os.path.basename('/home/wxg/a') # 获取文件名
'a'
>>> os.system('ls') # 运行shell命令
a    b        examples.desktop  test   公共的  视频  文档  音乐
abc  b.txt  r              test1  模板    图片  下载  桌面
0
>>> os.getenv('HOME')   # 读取操作系统环境变量'HOME'的值
'/home/wxg'
>>> os.environ  # 返回操作系统所有的环境变量
environ({'CLUTTER_IM_MODULE': 'xim', 'LS_COLORS': 'rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.Z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=00;36:*.au=00;36:*.flac=00;36:*.m4a=00;36:*.mid=00;36:*.midi=00;36:*.mka=00;36:*.mp3=00;36:*.mpc=00;36:*.ogg=00;36:*.ra=00;36:*.wav=00;36:*.oga=00;36:*.opus=00;36:*.spx=00;36:*.xspf=00;36:', 'LC_MEASUREMENT': 'zh_CN.UTF-8', 'LESSCLOSE': '/usr/bin/lesspipe %s %s', 'LC_PAPER': 'zh_CN.UTF-8', 'LC_MONETARY': 'zh_CN.UTF-8', 'XDG_MENU_PREFIX': 'gnome-', 'LANG': 'zh_CN.UTF-8', 'DISPLAY': ':0', 'GNOME_SHELL_SESSION_MODE': 'ubuntu', 'COLORTERM': 'truecolor', 'USERNAME': 'wxg', 'XDG_VTNR': '2', 'SSH_AUTH_SOCK': '/run/user/1000/keyring/ssh', 'LC_NAME': 'zh_CN.UTF-8', 'XDG_SESSION_ID': '2', 'USER': 'wxg', 'DESKTOP_SESSION': 'ubuntu', 'QT4_IM_MODULE': 'xim', 'TEXTDOMAINDIR': '/usr/share/locale/', 'GNOME_TERMINAL_SCREEN': '/org/gnome/Terminal/screen/d5ea3c15_b4c1_4f6c_a84e_dc444c172bee', 'PWD': '/home/wxg', 'HOME': '/home/wxg', 'TEXTDOMAIN': 'im-config', 'SSH_AGENT_PID': '1714', 'QT_ACCESSIBILITY': '1', 'XDG_SESSION_TYPE': 'x11', 'XDG_DATA_DIRS': '/usr/share/ubuntu:/usr/local/share:/usr/share:/var/lib/snapd/desktop', 'XDG_SESSION_DESKTOP': 'ubuntu', 'LC_ADDRESS': 'zh_CN.UTF-8', 'GJS_DEBUG_OUTPUT': 'stderr', 'LC_NUMERIC': 'zh_CN.UTF-8', 'GTK_MODULES': 'gail:atk-bridge', 'WINDOWPATH': '2', 'TERM': 'xterm-256color', 'SHELL': '/bin/bash', 'VTE_VERSION': '5202', 'QT_IM_MODULE': 'xim', 'XMODIFIERS': '@im=ibus', 'IM_CONFIG_PHASE': '2', 'XDG_CURRENT_DESKTOP': 'ubuntu:GNOME', 'GPG_AGENT_INFO': '/run/user/1000/gnupg/S.gpg-agent:0:1', 'GNOME_TERMINAL_SERVICE': ':1.85', 'XDG_SEAT': 'seat0', 'SHLVL': '1', 'LANGUAGE': 'zh_CN:zh:en_US:en', 'LC_TELEPHONE': 'zh_CN.UTF-8', 'GDMSESSION': 'ubuntu', 'GNOME_DESKTOP_SESSION_ID': 'this-is-deprecated', 'LOGNAME': 'wxg', 'DBUS_SESSION_BUS_ADDRESS': 'unix:path=/run/user/1000/bus', 'XDG_RUNTIME_DIR': '/run/user/1000', 'XAUTHORITY': '/run/user/1000/gdm/Xauthority', 'XDG_CONFIG_DIRS': '/etc/xdg/xdg-ubuntu:/etc/xdg', 'PATH': '/home/wxg/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin', 'LC_IDENTIFICATION': 'zh_CN.UTF-8', 'GJS_DEBUG_TOPICS': 'JS ERROR;JS LOG', 'SESSION_MANAGER': 'local/wxg-Lenovo-B4400:@/tmp/.ICE-unix/1630,unix/wxg-Lenovo-B4400:/tmp/.ICE-unix/1630', 'LESSOPEN': '| /usr/bin/lesspipe %s', 'GTK_IM_MODULE': 'ibus', 'LC_TIME': 'zh_CN.UTF-8', '_': '/usr/bin/python3'})
>>>
>>> os.environ.setdefault('HOME','/home/alex')  # 设置系统环境变量，仅程序运行时有效
'/home/wxg'
>>> os.getenv('HOME')
'/home/wxg'
>>> os.linesep  # 给出当前平台使用的行终止符
'\n'
>>> os.name     # 指示正在使用的平台
'posix'
>>> os.rename('b.db','b.txt')   # 重命名（old，new）
>>> os.mkdir('abc')     # 创建单个目录
>>> os.makedirs(r'/home/wxg/test')  # 创建多级目录
>>> os.makedirs(r'/home/wxg/test1')
>>> os.removedirs(r'/home/wxg/test1')   # 删除多级目录
>>> os.stat('b.txt')    # 获取文件属性
os.stat_result(st_mode=33188, st_ino=17572170, st_dev=2049, st_nlink=1, st_uid=1000, st_gid=1000, st_size=0, st_atime=1551540591, st_mtime=1551540591, st_ctime=1551544136)
>>> os.chmod('b.txt',777)   # 修改文件权限与时间戳
>>> os.stat('b.txt')
os.stat_result(st_mode=33545, st_ino=17572170, st_dev=2049, st_nlink=1, st_uid=1000, st_gid=1000, st_size=0, st_atime=1551540591, st_mtime=1551540591, st_ctime=1551545398)
>>> os.path.getsize('b.txt')    # 获取文件大小
4
>>> os.path.join('/home/wxg','b.txt')   # 结合目录名与文件名
'/home/wxg/b.txt'
>>> os.chdir('a')   # 改变工作目录到指定位置
>>> os.getcwd()
'/home/wxg/a'
>>> os.get_terminal_size()  # 获取当前终端的大小
os.terminal_size(columns=80, lines=24)
>>>
>>> import signal
>>> os.kill(4864,signal.SIGKILL)    # 杀死进程
>>>

sys模块

>>> import sys
>>>
>>> sys.argv    # 命令行参数List，第一个元素是程序本身路径
['']
>>> sys.exit()  # 推出程序，正常退出时exit(0)
wxg@wxg-Lenovo-B4400:~$ python3
Python 3.6.7 (default, Oct 22 2018, 11:32:17)
[GCC 8.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import sys
>>>
>>> sys.version     # 获取python解释器的版本信息
'3.6.7 (default, Oct 22 2018, 11:32:17) \n[GCC 8.2.0]'
>>> sys.maxsize     # 最大的int值
9223372036854775807
>>> sys.path    # 返回模块的搜索路径，初始化时使用PYTHONPATH环境变量的值
['', '/usr/lib/python36.zip', '/usr/lib/python3.6', '/usr/lib/python3.6/lib-dynload', '/home/wxg/.local/lib/python3.6/site-packages', '/usr/local/lib/python3.6/dist-packages', '/usr/lib/python3/dist-packages']
>>> sys.platform    # 返回操作系统平台名称
'linux'
>>> sys.stdout.write('please:')     # 标准输出
please:7
>>> val = sys.stdin.readline()[:-1]     # 标准输入

>>> sys.getrecursionlimit()     # 获取最大递归层数
1000
>>> sys.setrecursionlimit(1200)     #设置最大递归层数
>>> sys.getdefaultencoding()    # 获取解释器默认编码
'utf-8'
>>> sys.getfilesystemencoding()     # 获取内存数据存到文件里的默认编码
'utf-8'
>>>

shutil模块

>>> import shutil
>>>
>>> shutil.copyfileobj(open('a.txt','r'),open('a_new.txt','w')) # 将文件内容拷贝到另一个文件中
>>> shutil.copyfileobj(open('a.txt','r'),open('b.txt','w'))
>>> shutil.copyfile('a.txt','a.txt.bak')    # 拷贝文件  目标文件无需存在
'a.txt.bak'
>>>
>>> shutil.copymode('a.txt','a.txt.bak')    # 仅拷贝权限。内容、组、用户均不变  目标文件必须存在
>>> shutil.copystat('a.txt','a.txt.bak')    # 仅拷贝状态的信息，包括：mode bits，atime，mtime，flags 目标文件必须存在
>>> shutil.copy('a.txt','a_cp.txt')     # 拷贝文件和权限
'a_cp.txt'
>>> shutil.copy2('a.txt','a_cp2.txt')   # 拷贝文件和状态信息
'a_cp2.txt'
>>>
>>> shutil.ignore_patterns('*.txt')
<function ignore_patterns.<locals>._ignore_patterns at 0x7fd8c8469c80>
>>>shutil.copytree('test01','test02',ignore=shutil.ignore_patterns('*.pyc','tmp*'))    # 递归拷贝文件夹
'test02'        # 目标目录不能存在，且对目标目录的父级目录要有写的权限，ignore的意思是排除，即不会复制
>>>
>>> shutil.rmtree('test03')     # 递归的去删除文件
>>> shutil.move('test02','test02_new')  # 递归的去移动文件，类似mv命令，就是重命名
'test02_new'
>>>

shutil.make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0, dry_run=0, owner=None, group=None, logger=None)
# 创建压缩包并返回文件路径
base_name：压缩包的文件名，也可以是压缩包的路径。只是文件名时，保存在当前目录，否则保存至指定路径
format：压缩包种类，zip、tar、bztar、gztar
root_dir：要压缩的文件夹路径（默认当前目录）
owner：用户，默认当前用户
group：组，默认当前组
logger：用于记录日志，通常是logging.Logger对象

# 把 /home/wxg/test02_new 下的文件打包放置到当前目录
>>> shutil.make_archive('test02_new_bak','gztar',root_dir='/home/wxg/test02_new')
'/home/wxg/test02_new_bak.tar.gz'
# 把 /home/wxg/test02_new 下的文件打包放置到 /home/wxg/test/目录
>>> shutil.make_archive('/home/wxg/test/test02_new_bak','gztar',root_dir='/home/wxg/test02_new')
'/home/wxg/test/test02_new_bak.tar.gz'
>>>

json&pickle模块

序列化：序列化是指把内存里的数据类型转换成字符串，以使其能存储到硬盘或通过网络传输到远程，因为硬盘或网络传输时只能接受bytes

为什么要序列化：

　　是要把程序的状态，或临时数据保存下来，再次启动程序时读取上次的数据，继续运行；这些数据可能有各种类型，我们最好有一种方式能够把这些数据直接写到硬盘上，下次读取时再从硬盘上读回来，还是原来的格式类型

json：用于字符串和python数据类型间进行转换
pickle：用于python特有的类型和python的数据类型间进行转换

>>> import pickle
>>> import json
>>>
>>> data = {'k1':123,'k2':'hello'}
>>>
>>> p_str = pickle.dumps(data)  # 将数据通过特殊的形式转换为只有python语言认识的字符串
>>> p_str
b'\x80\x03}q\x00(X\x02\x00\x00\x00k1q\x01K{X\x02\x00\x00\x00k2q\x02X\x05\x00\x00\x00helloq\x03u.'
>>>
>>> with open('result.pk','wb') as fp:  # 将数据通过特殊的形式转换为只有python语言认识的字符串，并写入文件
...     pickle.dump(data,fp)
...
>>>
>>> j_str = json.dumps(data)    # 将数据通过特殊的形式转换为所有程序语言都认识的字符串
>>> j_str
'{"k1": 123, "k2": "hello"}'
>>>
>>> with open('result.json','w') as fp: # 将数据通过特殊的形式转换为所有程序语言都认识的字符串，并写入文件
...     json.dump(data,fp)
...
>>>

json：
　　优点：跨语言，体积小
　　缺点：只能支持int、str、list、tuple、dict
pickle：
　　优点：专为python设计，支持python所有数据类型
　　缺点：只能再python中使用，存储数据占空间大

shelve模块

shelve模块是一个简单的k，v将内存数据通过文件持久化的模块，可以持久化任何pickle可支持的python数据格式

# 序列化

import shelve

f = shelve.open('test001')  # 打开一个文件

names = ['alex', 'rain', 'test']
info = {'name': 'alex', 'age': 22}

f['names'] = names  # 持久化列表
f['info_dic'] = info

f.close()

# 反序列化

import shelve

d = shelve.open('test01')   # 打开文件

print(d['names'])
print(d['info_dic'])


del d['names']  # 删除持久化列表

xml模块

xml模块是实现不同语言或程序之间进行数据交换的协议，跟json差不多，但json使用起来更简单

xml的格式如下，它是通过<>节点来区别数据结构的：

<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank updated="yes">69</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>

再python中可以通过以下模块操作xml

import xml.etree.ElementTree as ET

tree = ET.parse('xmltest.xml')
root = tree.getroot()
print(root.tag)     # data


# 遍历xml文档
for child in root:
    print(child.tag, child.attrib)
    for i in child:
        print(i.tag, i.text)

# country {'name': 'Liechtenstein'}
# rank 2
# year 2008
# gdppc 141100
# neighbor None
# neighbor None
# country {'name': 'Singapore'}
# rank 5
# year 2011
# gdppc 59900
# neighbor None
# country {'name': 'Panama'}
# rank 69
# year 2011
# gdppc 13600
# neighbor None
# neighbor None

# 只遍历year节点
for node in root.iter('year'):
    print(node.tag, node.text)

# year 2008
# year 2011
# year 2011

修改和删除xml文档内容

import xml.etree.ElementTree as ET

tree = ET.parse('xmltest.xml')
root = tree.getroot()

# 修改

for node in root.iter('year'):
    new_year = int(node.text) + 1
    node.text = str(new_year)
    node.set('updated', 'yes')

tree.write('xmltest.xml')

# 删除node

for country in root.findall('country'):
    rank = int(country.find('rank').text)
    if rank > 50:
        root.remove(country)

tree.write('output.xml')

创建xml文档

import xml.etree.ElementTree as ET


new_xml = ET.Element('namelist')
name = ET.SubElement(new_xml, 'name', attrib={'enrolled': 'yes'})
age = ET.SubElement(name, 'age', attrib={'checked': 'no'})
sex = ET.SubElement(name, 'sex')
sex.text = '33'
name2 = ET.SubElement(new_xml, 'name', attrib={'enrolled': 'no'})
age = ET.SubElement(name2, 'age')
age.text = '19'

et = ET.ElementTree(new_xml)    # 生成文档对象
et.write('text.xml', encoding='utf-8', xml_declaration=True)

ET.dump(new_xml)    # 打印生成的格式
# <namelist><name enrolled="yes"><age checked="no" /><sex>33</sex></name><name enrolled="no"><age>19</age></name></namelist>

configparser模块

配置文件example.ini内容如下：

[DEFAULT]
ServerAliveInterval = 45   
Compression = yes
CompressionLevel = 9
ForwardX11 = yes

[bitbucket.org]
User = hg

[topsecret.server.com]
Port = 50022
ForwardX11 = no

解析配置文件

>>> import configparser     # 导入模块
>>>
>>> config = configparser.ConfigParser()    # 实例化(生成对象)
>>> config.sections()   # 调用sections方法
[]
>>> config.read('example.ini')  # 读配置文件(注意文件路径)
['example.ini']
>>> config.sections()   # 调用sections方法(默认不会读取default)
['bitbucket.org', 'topsecret.server.com']
>>> 'bitbucket.org' in config   # 判断元素是否在sections列表内
True
>>> 'bytebong.com' in config
False
>>> config['bitbucket.org']['User']     # 通过字典的形式取值
'hg'
>>> config['DEFAULT']['Compression']
'yes'
>>> topsecret = config['topsecret.server.com']
>>> topsecret['ForwardX11']
'no'
>>> topsecret['Port']
'50022'
# for循环 bitbucket.org 字典的 key
>>> for key in config['bitbucket.org']: print(key)
...
user
serveraliveinterval
compression
compressionlevel
forwardx11
>>> config['bitbucket.org']['ForwardX11']
'yes'
>>>

其它增删改查语法

# i.cfg 内容
# 支持的两种分隔符“=”, “:”
[group1] 
k1 = v1
k2:v2

[group2]
k1 = v1

import configparser

config = configparser.ConfigParser()
config.read('i.cfg')

# 读
secs = config.sections()
print(secs)     # ['group1', 'group2']

options = config.options('group2')  # 获取指定section的keys
print(options)  # ['k1']

item_list = config.items('group2')  # 获取指定 section 的 keys & values ,key value 以元组的形式
print(item_list)    # [('k1', 'v1')]

val = config.get('group1', 'k1')    # 获取指定的key 的value
print(val)  # v1
val = config.getint('group1', 'k2')
print(val)  # 123

# 改写

sec = config.remove_section('group1')   # 删除section并返回状态(True, False)
print(sec)
config.write(open('i.cfg', 'w'))    # 对应的删除操作要写入文件才生效

sec = config.has_section('wupeiqi')
sec = config.add_section('wupeiqi')
config.write(open('i.cfg', 'w'))

config.set('group2', 'k1', '11111')
config.write(open('i.cfg', 'w'))

config.remove_option('group2', 'age')
config.write(open('i.cfg', "w"))

hashlib模块

加密算法介绍

hash,一般翻译成"散列",或音译为"哈希",就是把任意长度的输入(又叫做预映射,pre-image),通过散列算法,变换成固定长度的输出,该输出就是散列值.
这种转换是一种压缩映射,也就是,散列值的空间通常远小于输入的空间,不同的输入可能会散列成相同的输出,而不可能从散列值来唯一的确定输入值.
简单的说就是一种将任意长度的消息压缩到某一固定长度的消息摘要的函数
hash主要用于信息安全领域中的加密算法,他把一些不同长度的信息转化成杂乱的128位的编码里,叫做hash值.也可以说,hash就是找到一种数据内容和数据存放地址之间的映射关系.

MD5,讯息摘要演算法(MD5 Message-Digest Algorithm),一种被广泛使用的密码杂凑函数,可以产生出一个128位的散列值(hash value), (用于确保信息传输完整一致)
MD5功能:
    输入任意长度的信息,经过处理,输出为128位的信息(数字指纹);
    不同的输入得到不同的结果(唯一性);
MD5算法特点:
    1.压缩性:任意长度的数据,算出的MD5值的长度都是固定的
    2.容易计算:从原数据计算出MD5值很容易
    3.抗修改性:对原数据进行任何改动,修改一个字节生成的MD5值区别也会很大
    4.强抗碰撞:已知原数据和MD5,想找到一个具有相同MD5值的数据(即伪造数据)是非常困难的
MD5不可逆的原因是 其是一种散列函数,使用的是hash算法,在计算过程中原文的部分信息是丢失了的
MD5用途:
    1.防止被篡改
    2.防止直接看到明文
    3.防止抵赖(数字签名)

SHA-1:安全哈希算法(Secure Hash Algorithm)主要适用于数字签名标准(Digital Signature Standard DSS)里面定义的数字签名算法(Digital Signature Algorithm DSA).
对于长度小于2^64位的消息,SHA1会产生一个160位的消息摘要.当接收到消息的时候,这个消息摘要可以用来验证数据的完整性.

hashlib模块主要提供SHA1，SHA224，SHA256，SHA384，SHA512，MD5算法

import hashlib

m = hashlib.md5()
m.update(b'hello')
m.update(b"It's me")
print(m.digest())
m.update(b"It's been a long time since last time we ...")
# b"d\xf6\x9d\x95\x13[\xc1=H'\xf8q\xb3\x7fx\x0f"

print(m.digest())   # 二进制格式哈希
print(len(m.hexdigest()))   # 16进制格式哈希
# b';\xee2VSR\x13\x0c\x1aC\xb2\xf0\xbf8\x15\xda'
# 32

# md5
hash = hashlib.md5()
hash.update(b'rain')
print(hash.hexdigest())

# sha1
hash = hashlib.sha1()
hash.update(b'rain')
print(hash.hexdigest())

# sha256
hash = hashlib.sha256()
hash.update(b'rain')
print(hash.hexdigest())

# sha384
hash = hashlib.sha384()
hash.update(b'rain')
print(hash.hexdigest())

# sha512
hash = hashlib.sha512()
hash.update(b'rain')
print(hash.hexdigest())

###
23678db5efde9ab46bce8c23a6d91b50
fbec17cb2fcbbd1c659b252230b48826fc563788
319b44c570a417ff3444896cd4aa77f052b6781773fc2f9aa1f1180ac745005c
04657bf3a9ece15806d0326150fdff482a0cb6ca008b89701f5f262b771497532a2bb131f9fd5b64af558a06836a2eec
07fb03b192b2bbd906461de2cd99fa088f96af2f344d7b65db1964f8118cdc51a0d28825cba3968b23396134f98d5d7e4d4eb3538c1b76c7647c08b09876954b

subprocess模块

python官方推出的模块，目的是提供统一的模块来实现对系统命令或脚本的调用

三种执行命令方法

subprocess.run(*popenargs, input=None, timeout=None, check=False, **kwargs)
　　# Run command with arguments and return a CompletedProcess instance.官方推荐
subprocess.call = call(*popenargs, timeout=None, **kwargs)　　# 跟run实现的内容差不多，另一种写法
　　# Run command with arguments. Wait for command to complete or timeout, then return the returncode attribute.
subprocess.Popen(object)　　# 上面各种方法的底层封装
标准写法

>>> subprocess.run(['df','-h'], stderr=subprocess.PIPE, stdout=subprocess.PIPE, check=True)
CompletedProcess(args=['df', '-h'], returncode=0, stdout=b'\xe6\x96\x87\xe4\xbb\xb6\xe7\xb3\xbb\xe7\xbb\x9f        \xe5\xae\xb9\xe9\x87\x8f  \xe5\xb7\xb2\xe7\x94\xa8  \xe5\x8f\xaf\xe7\x94\xa8 \xe5\xb7\xb2\xe7\x94\xa8% \xe6\x8c\x82\xe8\xbd\xbd\xe7\x82\xb9\nudev            1.8G     0  1.8G    0% /dev\ntmpfs           369M  2.0M  367M    1% /run\n/dev/sda1       293G   11G  268G    4% /\ntmpfs           1.9G   26M  1.8G    2% /dev/shm\ntmpfs           5.0M  4.0K  5.0M    1% /run/lock\ntmpfs           1.9G     0  1.9G    0% /sys/fs/cgroup\n/dev/loop1       35M   35M     0  100% /snap/gtk-common-themes/818\n/dev/loop2       13M   13M     0  100% /snap/gnome-characters/139\n/dev/loop3      2.3M  2.3M     0  100% /snap/gnome-calculator/260\n/dev/loop4       15M   15M     0  100% /snap/gnome-logs/37\n/dev/loop7      3.8M  3.8M     0  100% /snap/gnome-system-monitor/51\n/dev/loop8       87M   87M     0  100% /snap/core/4917\n/dev/loop9      2.4M  2.4M     0  100% /snap/gnome-calculator/180\n/dev/loop10     3.8M  3.8M     0  100% /snap/gnome-system-monitor/57\n/dev/loop11      13M   13M     0  100% /snap/gnome-characters/103\n/dev/loop12      91M   91M     0  100% /snap/core/6405\n/dev/loop13      35M   35M     0  100% /snap/gtk-common-themes/1122\n/dev/loop15      91M   91M     0  100% /snap/core/6350\n/dev/loop16     141M  141M     0  100% /snap/gnome-3-26-1604/78\n/dev/loop0       35M   35M     0  100% /snap/gtk-common-themes/319\n/dev/loop5       15M   15M     0  100% /snap/gnome-logs/45\n/dev/loop6      141M  141M     0  100% /snap/gnome-3-26-1604/70\n/dev/loop14     141M  141M     0  100% /snap/gnome-3-26-1604/74\ntmpfs           369M   16K  369M    1% /run/user/121\ntmpfs           369M   28K  369M    1% /run/user/1000\n', stderr=b'')
>>>

涉及到管道 | 的命令写法

>>> subprocess.run('df -h | grep disk1', shell=True)
CompletedProcess(args='df -h | grep disk1', returncode=1)
>>> 
# shell = True的意思是这条命令直接交给系统去执行，不需要python负责解析

call（）方法

# 执行命令，返回命令执行状态，0 or 非0
>>> subprocess.call(["ls", "-l"])
总用量 72
drwxr-xr-x 2 wxg wxg 4096 3月   2 23:15 a
drwxr-xr-x 2 wxg wxg 4096 3月   3 00:34 abc
drwxr-xr-x 2 wxg wxg 4096 3月   2 23:16 b
-rwxrwxrwx 1 wxg wxg    4 3月   3 00:57 b.txt
-rw-r--r-- 1 wxg wxg 8980 2月   9 07:35 examples.desktop
drwxr-xr-x 3 wxg wxg 4096 3月   3 00:37 r
drwxr-xr-x 2 wxg wxg 4096 3月   3 00:40 test
drwxr-xr-x 3 wxg wxg 4096 3月   3 00:44 test1
drwxr-xr-x 2 wxg wxg 4096 2月   9 07:44 公共的
drwxr-xr-x 2 wxg wxg 4096 2月   9 07:44 模板
drwxr-xr-x 2 wxg wxg 4096 2月   9 07:44 视频
drwxr-xr-x 2 wxg wxg 4096 3月   2 23:04 图片
drwxr-xr-x 2 wxg wxg 4096 2月   9 07:44 文档
drwxr-xr-x 4 wxg wxg 4096 2月  21 21:48 下载
drwxr-xr-x 2 wxg wxg 4096 2月   9 07:44 音乐
drwxr-xr-x 2 wxg wxg 4096 2月   9 16:35 桌面
0

# 执行命令，如果命令结果为0，就正常返回，否则抛出异常
>>> subprocess.check_call(["ls", "-l"])
总用量 72
drwxr-xr-x 2 wxg wxg 4096 3月   2 23:15 a
drwxr-xr-x 2 wxg wxg 4096 3月   3 00:34 abc
drwxr-xr-x 2 wxg wxg 4096 3月   2 23:16 b
-rwxrwxrwx 1 wxg wxg    4 3月   3 00:57 b.txt
-rw-r--r-- 1 wxg wxg 8980 2月   9 07:35 examples.desktop
drwxr-xr-x 3 wxg wxg 4096 3月   3 00:37 r
drwxr-xr-x 2 wxg wxg 4096 3月   3 00:40 test
drwxr-xr-x 3 wxg wxg 4096 3月   3 00:44 test1
drwxr-xr-x 2 wxg wxg 4096 2月   9 07:44 公共的
drwxr-xr-x 2 wxg wxg 4096 2月   9 07:44 模板
drwxr-xr-x 2 wxg wxg 4096 2月   9 07:44 视频
drwxr-xr-x 2 wxg wxg 4096 3月   2 23:04 图片
drwxr-xr-x 2 wxg wxg 4096 2月   9 07:44 文档
drwxr-xr-x 4 wxg wxg 4096 2月  21 21:48 下载
drwxr-xr-x 2 wxg wxg 4096 2月   9 07:44 音乐
drwxr-xr-x 2 wxg wxg 4096 2月   9 16:35 桌面
0

# 接收字符串格式命令，返回元组形式，第1个元素是执行状态，第2个是命令结果
>>> subprocess.getstatusoutput('ls /bin/ls')
(0, '/bin/ls')

# 接收字符串格式命令，并返回结果
>>> subprocess.getoutput('ls /bin/ls')
'/bin/ls'

# 执行命令，并返回结果，（返回结果并不是打印）
>>> res = subprocess.check_output(["ls", "-l"])
>>> res
b'\xe6\x80\xbb\xe7\x94\xa8\xe9\x87\x8f 72\ndrwxr-xr-x 2 wxg wxg 4096 3\xe6\x9c\x88   2 23:15 a\ndrwxr-xr-x 2 wxg wxg 4096 3\xe6\x9c\x88   3 00:34 abc\ndrwxr-xr-x 2 wxg wxg 4096 3\xe6\x9c\x88   2 23:16 b\n-rwxrwxrwx 1 wxg wxg    4 3\xe6\x9c\x88   3 00:57 b.txt\n-rw-r--r-- 1 wxg wxg 8980 2\xe6\x9c\x88   9 07:35 examples.desktop\ndrwxr-xr-x 3 wxg wxg 4096 3\xe6\x9c\x88   3 00:37 r\ndrwxr-xr-x 2 wxg wxg 4096 3\xe6\x9c\x88   3 00:40 test\ndrwxr-xr-x 3 wxg wxg 4096 3\xe6\x9c\x88   3 00:44 test1\ndrwxr-xr-x 2 wxg wxg 4096 2\xe6\x9c\x88   9 07:44 \xe5\x85\xac\xe5\x85\xb1\xe7\x9a\x84\ndrwxr-xr-x 2 wxg wxg 4096 2\xe6\x9c\x88   9 07:44 \xe6\xa8\xa1\xe6\x9d\xbf\ndrwxr-xr-x 2 wxg wxg 4096 2\xe6\x9c\x88   9 07:44 \xe8\xa7\x86\xe9\xa2\x91\ndrwxr-xr-x 2 wxg wxg 4096 3\xe6\x9c\x88   2 23:04 \xe5\x9b\xbe\xe7\x89\x87\ndrwxr-xr-x 2 wxg wxg 4096 2\xe6\x9c\x88   9 07:44 \xe6\x96\x87\xe6\xa1\xa3\ndrwxr-xr-x 4 wxg wxg 4096 2\xe6\x9c\x88  21 21:48 \xe4\xb8\x8b\xe8\xbd\xbd\ndrwxr-xr-x 2 wxg wxg 4096 2\xe6\x9c\x88   9 07:44 \xe9\x9f\xb3\xe4\xb9\x90\ndrwxr-xr-x 2 wxg wxg 4096 2\xe6\x9c\x88   9 16:35 \xe6\xa1\x8c\xe9\x9d\xa2\n'
>>>

Popen()方法

subprocess.Popen = class Popen(builtins.object)
 |  Execute a child program in a new process.
 |  
 |  For a complete description of the arguments see the Python documentation.
 |  
 |  Arguments:
 |    args: A string, or a sequence of program arguments.
 |  
 |    bufsize: supplied as the buffering argument to the open() function when
 |        creating the stdin/stdout/stderr pipe file objects
 |  
 |    executable: A replacement program to execute.
 |  
 |    stdin, stdout and stderr: These specify the executed programs' standard
 |        input, standard output and standard error file handles, respectively.
 |  
 |    preexec_fn: (POSIX only) An object to be called in the child process
 |        just before the child is executed.
 |  
 |    close_fds: Controls closing or inheriting of file descriptors.
 |  
 |    shell: If true, the command will be executed through the shell.
 |  
 |    cwd: Sets the current directory before the child is executed.
 |  
 |    env: Defines the environment variables for the new process.
 |  
 |    universal_newlines: If true, use universal line endings for file
 |        objects stdin, stdout and stderr.
 |  
 |    startupinfo and creationflags (Windows only)
 |  
 |    restore_signals (POSIX only)
 |  
 |    start_new_session (POSIX only)
 |  
 |    pass_fds (POSIX only)
 |  
 |    encoding and errors: Text mode encoding and error handling to use for
 |        file objects stdin, stdout and stderr.
 |  
 |  Attributes:
 |      stdin, stdout, stderr, pid, returncode
 |  
 |  Methods defined here:
 |  
 |  __del__(self, _maxsize=9223372036854775807, _warn=<built-in function warn>)
 |  
 |  __enter__(self)
 |  
 |  __exit__(self, type, value, traceback)
 |  
 |  __init__(self, args, bufsize=-1, executable=None, stdin=None, stdout=None, stderr=None, preexec_fn=None, close_fds=<object object at 0x7f0e58bdb130>, shell=False, cwd=None, env=None, universal_newlines=False, startupinfo=None, creationflags=0, restore_signals=True, start_new_session=False, pass_fds=(), *, encoding=None, errors=None)
 |      Create new Popen instance.
 |  
 |  communicate(self, input=None, timeout=None)
 |      Interact with process: Send data to stdin.  Read data from
 |      stdout and stderr, until end-of-file is reached.  Wait for
 |      process to terminate.
 |      
 |      The optional "input" argument should be data to be sent to the
 |      child process (if self.universal_newlines is True, this should
 |      be a string; if it is False, "input" should be bytes), or
 |      None, if no data should be sent to the child.
 |      
 |      communicate() returns a tuple (stdout, stderr).  These will be
 |      bytes or, if self.universal_newlines was True, a string.
 |  
 |  kill(self)
 |      Kill the process with SIGKILL
 |  
 |  poll(self)
 |      Check if child process has terminated. Set and return returncode
 |      attribute.
 |  
 |  send_signal(self, sig)
 |      Send a signal to the process.
 |  
 |  terminate(self)
 |      Terminate the process with SIGTERM
 |  
 |  wait(self, timeout=None, endtime=None)
 |      Wait for child process to terminate.  Returns returncode
 |      attribute.
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)
(END)

View Code

常用参数：

args：shell命令，可以是字符串或者序列类型（如：list， tuple）

stdin， stdout， stderr：分别表示程序的标准输入、输出、错误句柄

preexec_fn：只在Unix平台下有效，用于指定一个可执行对象（callable object），它将在子进程运行之前被调用

shell：同上

cwd：用于设置子进程的当前目录

env：用于指定子进程的环境变量。如果env = None，子进程的环境变量将从父进程中继承

执行下面两条语句

>>> a = subprocess.run('sleep 10', shell=True, stdout=subprocess.PIPE)
>>> a = subprocess.Popen('sleep 10', shell=True, stdout=subprocess.PIPE)
>>>

区别是Popen会在发起命令后立刻返回，而不必等命令执行结果。

这样的好处是：如果你调用的命令或脚本需要执行10分钟，你的主程序不需卡在这里10分钟，可以继续往下走，干别的事情，每过一会儿，通过一个什么方法来检测一下命令是否执行完就好了

Popen调用后会返回一个对象，可以通过这个对象拿到命令执行结果或状态等，该对象有以下方法

poll()：

wait()：

terminate()：

kill()：

communicate()：

>>> a = subprocess.Popen('python3 guess_age.py', stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, shell=True)
>>> a.communicate(b'22')
(b'>>>:', b'Traceback (most recent call last):\n  File "guess_age.py", line 6, in <module>\n    if age > 22:\nTypeError: \'>\' not supported between instances of \'str\' and \'int\'\n')
>>>

logging模块

很多程序都有记录日志的需求，并且日志中包含的信息既有正常的程序访问日志，还可能有错误、警告等信息输出

python的 logging模块提供了标准的日志接口，可以通过它存储各种格式的日志，分为debug(), info(), warning(), error(), critical() 5个级别

>>> logging.
logging.BASIC_FORMAT          logging.currentframe(
logging.BufferingFormatter(   logging.debug(
logging.CRITICAL              logging.disable(
logging.DEBUG                 logging.error(
logging.ERROR                 logging.exception(
logging.FATAL                 logging.fatal(
logging.FileHandler(          logging.getLevelName(
logging.Filter(               logging.getLogRecordFactory(
logging.Filterer(             logging.getLogger(
logging.Formatter(            logging.getLoggerClass(
logging.Handler(              logging.info(
logging.INFO                  logging.io
logging.LogRecord(            logging.lastResort
logging.Logger(               logging.log(
logging.LoggerAdapter(        logging.logMultiprocessing
logging.Manager(              logging.logProcesses
logging.NOTSET                logging.logThreads
logging.NullHandler(          logging.makeLogRecord(
logging.PercentStyle(         logging.os
logging.PlaceHolder(          logging.raiseExceptions
logging.RootLogger(           logging.root
logging.StrFormatStyle(       logging.setLogRecordFactory(
logging.StreamHandler(        logging.setLoggerClass(
logging.StringTemplateStyle(  logging.shutdown(
logging.Template(             logging.sys
logging.WARN                  logging.threading
logging.WARNING               logging.time
logging.addLevelName(         logging.traceback
logging.atexit                logging.warn(
logging.basicConfig(          logging.warning(
logging.captureWarnings(      logging.warnings
logging.collections           logging.weakref
logging.critical(

View Code

最简单用法

>>> import logging      
>>>
>>> logging.warning('user [alex] attempted wrong password more than 3 times')
WARNING:root:user [alex] attempted wrong password more than 3 times
>>> logging.critical('server is down')
CRITICAL:root:server is down

写到文件里

import logging

logging.basicConfig(filename='example.log', level=logging.DEBUG)
logging.debug('this message should go to the log file')
logging.info('So should this')
logging.warning('And this, too')

level = logging.DEBUG的意思是把日志记录级别设置为DEBUG，也就是说，只有级别为DEBUG或者比DEBUG级别更高的日志才会被记录到文件中

自定义日志格式

添加时间

import logging

logging.basicConfig(filename='example.log',
                    level=logging.INFO,
                    format='%(asctime)s %(message)s',
                    datefmt='%Y-%m-%d %I:%M:%S %p')

logging.debug('this message should go to the log file')
logging.info('So should this')
logging.warning('And this, too')

# 输出
2019-03-04 11:42:44 PM So should this
2019-03-04 11:42:44 PM And this, too

所有支持的格式

%(levelno)s：数字形式的日志级别

%(levelname)s：文本形式的日志级别

%(pathname)s：调用日志输出函数的模块的完整路径名，可能没有

%(filename)s：调用日志输出函数的模块文件名

%(module)s：调用日志输出函数的模块名

%(funcName)s：调用日志输出函数的函数名

%(lineno)s：调用日志输出函数的语句所在的代码行

%(created)s：当前时间，用UNIX标准的表示时间的浮点数表示

%(relativeCreated)s：输出日志信息时间，自Logger 创建以来的毫秒数

%(asctime)s：字符串形式的当前时间；默认格式“3019-03-04 22：22：22，234” 逗号后面的是毫秒

%(thread)s：线程ID，可能没有

%(threadName)s：线程名，可能没有

%(process)s：进程ID。可能没有

%(message)s：用户输出的消息

日志同时输出到屏幕和文件

import logging


class IgnoreBackupLogFilter(logging.Filter):
    """忽略带db backup 的日志"""
    def filter(self, record):   # 固定写法
        return "db backup" not in record.getMessage()


# 1、生成logger对象
logger = logging.getLogger('web')
# logger.setLevel(logging.INFO)
logger.setLevel(logging.DEBUG)

# 1.1、把filter对象添加到logger中
logger.addFilter(IgnoreBackupLogFilter())

# 2、生成handler对象
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
fh = logging.FileHandler('web.log')
fh.setLevel(logging.WARNING)


# 2.1、把handler对象 绑定到logger
logger.addHandler(ch)
logger.addHandler(fh)


# 3、生成formatter对象
file_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
console_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(lineno)s - %(message)s')


# 3.1、把formatter对象绑定到 handler对象
ch.setFormatter(console_formatter)
fh.setFormatter(file_formatter)


logger.debug('test log')
logger.info('test log 2')
logger.error('test log 3')
logger.warning('test log 4')
logger.debug('test log db backup')



console：DEBUG
global：INFO   默认（default level）是WARNING
file：WARNING

全局设置为DEBUG后，console handler设置为INFO，如果输出的日志级别是DEBUG，那就不会在屏幕输出

文件自动切割

import logging
from logging import handlers

logger = logging.getLogger('chat')
# logger.setLevel(logging.INFO)

ch = logging.StreamHandler()
# fh = handlers.RotatingFileHandler('chat.log', maxBytes=50, backupCount=3)   # 按大小
fh = handlers.TimedRotatingFileHandler('chat_t.log', when='S', interval=5, backupCount=3)   # 按时间

logger.addHandler(ch)
logger.addHandler(fh)

file_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
console_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(lineno)s - %(message)s')

ch.setFormatter(console_formatter)
fh.setFormatter(file_formatter)


logger.debug('test log')
logger.info('test log 2')
logger.error('test log 3')
logger.warning('test log 4')
logger.debug('test log db backup')

re模块

正则表达式就是字符串的匹配规则，python中对应的模块是re模块

常用的表达式规则

'.'　　默认匹配除\n之外的任意一个字符，若指定flag DOTALL（re.S)，则匹配任意字符，包括换行

'^'　　匹配字符开头，若指定flag MULTILINE (re.M)，re.search('^a', '\nabc', re.M)可以匹配

'$'　　匹配字符结尾，指定flag 同上

'*'　　匹配*号前的字符0次或多次

'+'　　匹配+前一个字符1次或多次

'?'　　匹配？前一个字符1次或0次

'{m}'　　匹配{m}前一个字符m次

'{n,m}'　　匹配{n，m}前一个字符n到m次

'|'　　匹配|左或|右的字符

'(...)(...)'　　分组匹配

'\A'　　只从字符开头匹配,同^

'\Z'　　匹配字符结尾，同$

'\d'　　匹配数字0-9

'\D'　　匹配非数字

'\w'　　匹配[A-Za-z0-9]

'\W'　　匹配非[A-Za-z0-9]

'\s'　　匹配空白字符、\n、\t、\r

'(?P<name>...)'　　分组匹配

re的匹配语法

re.match(pattern, string, flags=0)　　从头开始匹配

re.fullmatch(pattern, string, flags=0)　　精确匹配

re.search(pattern, string, flags=0)　　匹配包含，找到即返回

re.sub(pattern, repl, string, count=0, flags=0)　　匹配字符并替换，count设置次数

re.split(pattern, string, maxsplit=0, flags=0)　　以匹配到的字符当作分隔符来分割，maxsplit设置次数

re.findall(pattern, string, flags=0)　　全部匹配，找到所有符合规则的字符，以列表形式返回

re.compile(pattern, flags=0)　　提前写好规则，提高匹配效率

　　pattern　　正则表达式

　　string　　要匹配的字符串

　　flags　　标志位，用于控制正则表达式的匹配方式

Flags标识符

re.I（IGNORECASE）：忽略大小写

re.M（MULTILINE）：多行模式，改变'^'和'$'的行为

re.S（DOTALL）：改变'.'的行为

re.X（VERBOSE）：可以给表达式写注释，使其更可读

# -*- coding:utf-8 -*-
''''''
# st = """况咏蜜     北京    171    48    13651054608
# 王心颜     上海    169    46    13813234424
# 马纤羽     深圳    173    50    13744234523
# 乔亦菲     广州    172    52    15823423525
# 罗梦竹     北京    175    49    18623423421
# 刘诺涵     北京    170    48    18623423765
# 岳妮妮     深圳    177    54    18835324553
# 贺婉萱     深圳    174    52    18933434452
# 叶梓萱    上海    171    49    18042432324
# 杜姗姗   北京    167    49       13324523342
# """

# f = open("兼职白领学生空姐模特护士联系方式.txt", 'w', encoding="utf-8")
# f.write(st)
# f.close()

''''''
# 取出所有手机号
"""
f = open("兼职白领学生空姐模特护士联系方式.txt", 'r', encoding="utf-8")

phones = []

for line in f:
    name, city, height, weight, phone = line.split()
    if phone.startswith('1') and len(phone) == 11:
        phones.append(phone)

print(phones)
"""

"""
import re

f = open("兼职白领学生空姐模特护士联系方式.txt", 'r', encoding="utf-8")

data = f.read()
phone = re.findall("[0-9]{11}", data)

print(phone)
# ['13651054608', '13813234424', '13744234523', '15823423525', '18623423421', '18623423765', '18835324553', '18933434452', '18042432324', '13324523342']
"""

# 正则表达式就是字符串的匹配规则

"""
The special characters are:
    "."      Matches any character except a newline.
    "^"      Matches the start of the string.
    "$"      Matches the end of the string or just before the newline at
             the end of the string.
    "*"      Matches 0 or more (greedy) repetitions of the preceding RE.
             Greedy means that it will match as many repetitions as possible.
    "+"      Matches 1 or more (greedy) repetitions of the preceding RE.
    "?"      Matches 0 or 1 (greedy) of the preceding RE.
    *?,+?,?? Non-greedy versions of the previous three special characters.
    {m,n}    Matches from m to n repetitions of the preceding RE.
    {m,n}?   Non-greedy version of the above.
    "\\"     Either escapes special characters or signals a special sequence.
    []       Indicates a set of characters.
             A "^" as the first character indicates a complementing set.
    "|"      A|B, creates an RE that will match either A or B.
    (...)    Matches the RE inside the parentheses.
             The contents can be retrieved or matched later in the string.
    (?aiLmsux) Set the A, I, L, M, S, U, or X flag for the RE (see below).
    (?:...)  Non-grouping version of regular parentheses.
    (?P<name>...) The substring matched by the group is accessible by name.
    (?P=name)     Matches the text matched earlier by the group named name.
    (?#...)  A comment; ignored.
    (?=...)  Matches if ... matches next, but doesn't consume the string.
    (?!...)  Matches if ... doesn't match next.
    (?<=...) Matches if preceded by ... (must be fixed length).
    (?<!...) Matches if not preceded by ... (must be fixed length).
    (?(id/name)yes|no) Matches yes pattern if the group with id/name matched,
                       the (optional) no pattern otherwise.

The special sequences consist of "\\" and a character from the list
below.  If the ordinary character is not on the list, then the
resulting RE will match the second character.
    \number  Matches the contents of the group of the same number.
    \A       Matches only at the start of the string.
    \Z       Matches only at the end of the string.
    \b       Matches the empty string, but only at the start or end of a word.
    \B       Matches the empty string, but not at the start or end of a word.
    \d       Matches any decimal digit; equivalent to the set [0-9] in
             bytes patterns or string patterns with the ASCII flag.
             In string patterns without the ASCII flag, it will match the whole
             range of Unicode digits.
    \D       Matches any non-digit character; equivalent to [^\d].
    \s       Matches any whitespace character; equivalent to [ \t\n\r\f\v] in
             bytes patterns or string patterns with the ASCII flag.
             In string patterns without the ASCII flag, it will match the whole
             range of Unicode whitespace characters.
    \S       Matches any non-whitespace character; equivalent to [^\s].
    \w       Matches any alphanumeric character; equivalent to [a-zA-Z0-9_]
             in bytes patterns or string patterns with the ASCII flag.
             In string patterns without the ASCII flag, it will match the
             range of Unicode alphanumeric characters (letters plus digits
             plus underscore).
             With LOCALE, it will match the set [0-9_] plus characters defined
             as letters for the current locale.
    \W       Matches the complement of \w.
    \\       Matches a literal backslash.
"""

"""
This module exports the following functions:
    match     Match a regular expression pattern to the beginning of a string.
    fullmatch Match a regular expression pattern to all of a string.
    search    Search a string for the presence of a pattern.
    sub       Substitute occurrences of a pattern found in a string.
    subn      Same as sub, but also return the number of substitutions made.
    split     Split a string by the occurrences of a pattern.
    findall   Find all occurrences of a pattern in a string.
    finditer  Return an iterator yielding a match object for each match.
    compile   Compile a pattern into a RegexObject.
    purge     Clear the regular expression cache.
    escape    Backslash all non-alphanumerics in a string.
"""

"""
Some of the functions in this module takes flags as optional parameters:
    A  ASCII       For string patterns, make \w, \W, \b, \B, \d, \D
                   match the corresponding ASCII character categories
                   (rather than the whole Unicode categories, which is the
                   default).
                   For bytes patterns, this flag is the only available
                   behaviour and needn't be specified.
    I  IGNORECASE  Perform case-insensitive matching.
    L  LOCALE      Make \w, \W, \b, \B, dependent on the current locale.
    M  MULTILINE   "^" matches the beginning of lines (after a newline)
                   as well as the string.
                   "$" matches the end of lines (before a newline) as well
                   as the end of the string.
    S  DOTALL      "." matches any character at all, including the newline.
    X  VERBOSE     Ignore whitespace and comments for nicer looking RE's.
    U  UNICODE     For compatibility only. Ignored for string patterns (it
                   is the default), and forbidden for bytes patterns.
"""

import re

# re.match()  # 从头开始匹配
# re.search() # 匹配包含
# re.findall()    # 把所有匹配到的字符以列表形式返回
# re.split()  # 以匹配到的字符当作列表分隔符
# re.sub()    # 匹配字符并替换
# re.fullmatch()  # 全部匹配

"""
s = 'dfft45egzdt y ^**^%8JHSyG'

s1 = re.match('[0-9]', s)
s11 = re.match('[0-9]', '2ds3f')
print(s1, s11)

s2 = re.search('[0-9]', s)
print(s2)

s3 = re.findall('[0-9]', s)
print(s3)

match_res = re.search('[0-9]', '2dds4d4f')
if match_res:
    print(match_res.group())
"""


s = 'abc2d4f*d5-m'

# print(re.search('.', s))
# print(re.search('..', s))



"""
>>> import re
>>> re.
re.A            re.M            re.UNICODE      re.findall(     re.sre_compile
re.ASCII        re.MULTILINE    re.VERBOSE      re.finditer(    re.sre_parse
re.DEBUG        re.RegexFlag(   re.X            re.fullmatch(   re.sub(
re.DOTALL       re.S            re.compile(     re.functools    re.subn(
re.I            re.Scanner(     re.copyreg      re.match(       re.template(
re.IGNORECASE   re.T            re.enum         re.purge(       
re.L            re.TEMPLATE     re.error(       re.search(      
re.LOCALE       re.U            re.escape(      re.split(       
>>> s = 'abc1d2f*g&m'
>>> 
>>> re.search('.', s)
<_sre.SRE_Match object; span=(0, 1), match='a'>
>>> re.search('..', s)
<_sre.SRE_Match object; span=(0, 2), match='ab'>
>>> re.search('..', '*asd')
<_sre.SRE_Match object; span=(0, 2), match='*a'>
>>> re.search('^', '*asd')
<_sre.SRE_Match object; span=(0, 0), match=''>
>>> re.search('^*', '*asd')
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/usr/lib/python3.6/re.py", line 182, in search
    return _compile(pattern, flags).search(string)
  File "/usr/lib/python3.6/re.py", line 301, in _compile
    p = sre_compile.compile(pattern, flags)
  File "/usr/lib/python3.6/sre_compile.py", line 562, in compile
    p = sre_parse.parse(p, flags)
  File "/usr/lib/python3.6/sre_parse.py", line 855, in parse
    p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, 0)
  File "/usr/lib/python3.6/sre_parse.py", line 416, in _parse_sub
    not nested and not items))
  File "/usr/lib/python3.6/sre_parse.py", line 616, in _parse
    source.tell() - here + len(this))
sre_constants.error: nothing to repeat at position 1
>>> re.search('^a', 'asd')
<_sre.SRE_Match object; span=(0, 1), match='a'>
>>> re.search('^as', 'asd')
<_sre.SRE_Match object; span=(0, 2), match='as'>
>>> re.search('^s', 'asd')
>>> re.match('as', 'asd')
<_sre.SRE_Match object; span=(0, 2), match='as'>
>>> re.match('$s', 'asd')
>>> re.match('$d', 'asd')
>>> re.search('$s', 'asd')
>>> re.search('$d', 'asd')
>>> re.search('s$', 'asd')
>>> re.search('d$', 'asd')
<_sre.SRE_Match object; span=(2, 3), match='d'>
>>> re.match('a.d$', 'asd')
<_sre.SRE_Match object; span=(0, 3), match='asd'>
>>> 
"""

"""
>>> 
>>> 
>>> re.search('a*', 'Alex')
<_sre.SRE_Match object; span=(0, 0), match=''>
>>> re.search('a*', 'Alex').group()
''
>>> re.search('a*', 'alex').group()
'a'
>>> re.search('a*', 'aaaaalex').group()
'aaaaa'
>>> 
>>> re.search('ab*', 'aaaaalex').group()
'a'
>>> re.search('ab*', 'aaaaalex')
<_sre.SRE_Match object; span=(0, 1), match='a'>
>>> re.search('ab*', 'abaaaalex')
<_sre.SRE_Match object; span=(0, 2), match='ab'>
>>> re.search('ab*', 'abbaaaalex')
<_sre.SRE_Match object; span=(0, 3), match='abb'>
>>> re.search('ab*', 'ababaaaalex')
<_sre.SRE_Match object; span=(0, 2), match='ab'>
>>> 
>>> 
>>> re.search('a+', 'ababaaaalex')
<_sre.SRE_Match object; span=(0, 1), match='a'>
>>> re.search('a+', 'lex')
>>> re.search('a+', 'aaalex')
<_sre.SRE_Match object; span=(0, 3), match='aaa'>
>>> re.search('.+', 'aaalex')
<_sre.SRE_Match object; span=(0, 6), match='aaalex'>
>>> re.search('al+', 'aaalex')
<_sre.SRE_Match object; span=(2, 4), match='al'>
>>> re.search('ab+', 'aaalex')
>>> re.search('al+', 'aaalllex')
<_sre.SRE_Match object; span=(2, 6), match='alll'>
>>> 
>>> 
>>> re.search('a?', 'aaalllex')
<_sre.SRE_Match object; span=(0, 1), match='a'>
>>> re.search('b?', 'aaalllex')
<_sre.SRE_Match object; span=(0, 0), match=''>
>>> 
>>> 
>>> 
>>> re.search('a{2}', 'aaalllex')
<_sre.SRE_Match object; span=(0, 2), match='aa'>
>>> re.search('a{2}', 'acaxalllex')
>>> re.search('.{2}', 'acaxalllex')
<_sre.SRE_Match object; span=(0, 2), match='ac'>
>>> re.search('[0-9]{2}', 'aaalllex')
>>> re.search('[0-9]{2}', 'aaalllex1')
>>> re.search('[0-9]{2}', 'aaalllex12')
<_sre.SRE_Match object; span=(8, 10), match='12'>
>>> 
>>> 
>>> 
>>> re.search('[0-9]{2,5}', 'aaalllex12')
<_sre.SRE_Match object; span=(8, 10), match='12'>
>>> re.search('[a-z]{2,5}', 'aaalllex12')
<_sre.SRE_Match object; span=(0, 5), match='aaall'>
>>> re.search('[a-z]{2,5}', 'a2aalllex12')
<_sre.SRE_Match object; span=(2, 7), match='aalll'>
>>> re.search('[a-z]{2,5}', 'aa2alllex12')
<_sre.SRE_Match object; span=(0, 2), match='aa'>
>>> re.search('[a-z]{2,5}', 'aaa2lllex12')
<_sre.SRE_Match object; span=(0, 3), match='aaa'>
>>> 
>>> 
>>> 
>>> 
>>> re.search('alex|Alex', 'Alex')
<_sre.SRE_Match object; span=(0, 4), match='Alex'>
>>> re.search('alex|Alex', 'alex')
<_sre.SRE_Match object; span=(0, 4), match='alex'>
>>> re.search('a|Alex', 'alex')
<_sre.SRE_Match object; span=(0, 1), match='a'>
>>> re.search('[a|A]lex', 'Alex')
<_sre.SRE_Match object; span=(0, 4), match='Alex'>
>>> 
>>> 
>>> 
>>> 
>>> re.search('[a-z]+[0-9]+', 'alex123')
<_sre.SRE_Match object; span=(0, 7), match='alex123'>
>>> re.search('([a-z]+)([0-9]+)', 'alex123')
<_sre.SRE_Match object; span=(0, 7), match='alex123'>
>>> re.search('([a-z]+)([0-9]+)', 'alex123').group()
'alex123'
>>> re.search('([a-z]+)([0-9]+)', 'alex123').groups()
('alex', '123')
>>> 
>>> 
>>> 
"""

re.search('^ab', 'abd') == re.match('ab', 'abd') == re.search('\Aab', 'abd')

"""
>>> re.search('\Aalex', 'alex')
<_sre.SRE_Match object; span=(0, 4), match='alex'>
>>>
>>>

>>>
>>> re.search('[0-9]', 'alex2')
<_sre.SRE_Match object; span=(4, 5), match='2'>
>>> re.search('\d', 'alex2')
<_sre.SRE_Match object; span=(4, 5), match='2'>
>>> re.search('\d+', 'alex2')
<_sre.SRE_Match object; span=(4, 5), match='2'>
>>> re.search('\d+', 'alex')
>>> re.search('\d+', 'alex12345')   # 贪婪匹配
<_sre.SRE_Match object; span=(4, 9), match='12345'>
>>> re.search('\d+', 'alex12345sd456')
<_sre.SRE_Match object; span=(4, 9), match='12345'>
>>>


>>>
>>>
>>> re.search('\D+', 'alex12345sd456')
<_sre.SRE_Match object; span=(0, 4), match='alex'>
>>> re.search('\D+', 'al%*ex12345sd456')
<_sre.SRE_Match object; span=(0, 6), match='al%*ex'>
>>> re.search('\w+', 'al%*ex12345sd456')
<_sre.SRE_Match object; span=(0, 2), match='al'>
>>> re.search('\w+', 'alex12345sd456')
<_sre.SRE_Match object; span=(0, 14), match='alex12345sd456'>
>>> re.search('\w+', 'alex12345sd*456')
<_sre.SRE_Match object; span=(0, 11), match='alex12345sd'>
>>> re.search('\W+', 'alex12345sd*456')
<_sre.SRE_Match object; span=(11, 12), match='*'>
>>> re.search('\W+', 'alex12%^&*345sd*456')
<_sre.SRE_Match object; span=(6, 10), match='%^&*'>
>>>
>>>
>>> s = 'alex\nrain'
>>> s
'alex\nrain'
>>> print(s)
alex
rain
>>> re.search('\s', s)
<_sre.SRE_Match object; span=(4, 5), match='\n'>
>>>
>>> re.findall('\s', 'alex\njack\train\rjay')
['\n', '\t', '\r']
>>>
>>>
>>>



>>>
>>>
>>> s = '130123199909094321'
>>> s
'130123199909094321'
>>> re.search('(?P<province>\d{2})(?P<city>\d{4})(?P<born_year>\d{4})', s)
<_sre.SRE_Match object; span=(0, 10), match='1301231999'>
>>> re.search('(?P<province>\d{2})(?P<city>\d{4})(?P<born_year>\d{4})', s).groups()
('13', '0123', '1999')
>>> res = re.search('(?P<province>\d{2})(?P<city>\d{4})(?P<born_year>\d{4})', s)
>>> res.
res.end(        res.group(      res.lastgroup   res.re          res.start(
res.endpos      res.groupdict(  res.lastindex   res.regs        res.string
res.expand(     res.groups(     res.pos         res.span(
>>> res.groupdict()
{'province': '13', 'city': '0123', 'born_year': '1999'}
>>>
>>>

"""


"""

>>>
>>> s = 'alex22jack33rain26jinxing50'
>>> s.split()
['alex22jack33rain26jinxing50']
>>> s.split('[0-9]')
['alex22jack33rain26jinxing50']
>>>
>>>
>>> re.split('\d', s)
['alex', '', 'jack', '', 'rain', '', 'jinxing', '', '']
>>>
>>> re.split('\d+', s)
['alex', 'jack', 'rain', 'jinxing', '']
>>>
>>>
>>> re.findall('\d+', s)
['22', '33', '26', '50']
>>>
>>> s = 'alex22jack33rain26jinxing50|mack-oldboy'
>>> s = 'alex22jack33rain26jinxing50#mack-oldboy'
>>> re.split('\d+|#|-', s)
['alex', 'jack', 'rain', 'jinxing', '', 'mack', 'oldboy']
>>>
>>>
>>> s = '9-3*4/3+44*234+45/34-5*6/9'
>>> s
'9-3*4/3+44*234+45/34-5*6/9'
>>> re.split('[-\*/+]', s)
['9', '3', '4', '3', '44', '234', '45', '34', '5', '6', '9']
>>> re.split('[-\*/+]', s, maxsplit=2)
['9', '3', '4/3+44*234+45/34-5*6/9']
>>> 


"""


# 转译
"""
>>> s = 'alex22jack33rain26jinxing50|mack-oldboy'
>>> re.split('|', s)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/usr/lib/python3.6/re.py", line 212, in split
    return _compile(pattern, flags).split(string, maxsplit)
ValueError: split() requires a non-empty pattern match.
>>> re.split('\|', s)
['alex22jack33rain26jinxing50', 'mack-oldboy']
>>> 



>>> 
>>> s = 'alex22jack33rain26\jinxing50|mack-oldboy'
>>> s
'alex22jack33rain26\\jinxing50|mack-oldboy'
>>> 
>>> re.split('\', s)
  File "<stdin>", line 1
    re.split('\', s)
                   ^
SyntaxError: EOL while scanning string literal
>>> re.split('\\', s)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/usr/lib/python3.6/re.py", line 212, in split
    return _compile(pattern, flags).split(string, maxsplit)
  File "/usr/lib/python3.6/re.py", line 301, in _compile
    p = sre_compile.compile(pattern, flags)
  File "/usr/lib/python3.6/sre_compile.py", line 562, in compile
    p = sre_parse.parse(p, flags)
  File "/usr/lib/python3.6/sre_parse.py", line 847, in parse
    source = Tokenizer(str)
  File "/usr/lib/python3.6/sre_parse.py", line 231, in __init__
    self.__next()
  File "/usr/lib/python3.6/sre_parse.py", line 245, in __next
    self.string, len(self.string) - 1) from None
sre_constants.error: bad escape (end of pattern) at position 0
>>> re.split('\\\', s)
  File "<stdin>", line 1
    re.split('\\\', s)
                     ^
SyntaxError: EOL while scanning string literal
>>> re.split('\\\\', s)
['alex22jack33rain26', 'jinxing50|mack-oldboy']
>>> 

"""



"""

>>> s = 'alex22jack33rain26\jinxing50|mack-oldboy'
>>> s
'alex22jack33rain26\\jinxing50|mack-oldboy'
>>>
>>> re.sub('\d+', '=', s)
'alex=jack=rain=\\jinxing=|mack-oldboy'
>>> re.sub('\d+', '=', s, count=2)
'alex=jack=rain26\\jinxing50|mack-oldboy'
>>>
"""

"""
>>> 
>>> re.fullmatch('alex', 'alex123')
>>> re.fullmatch('alex124', 'alex123')
>>> re.fullmatch('alex123', 'alex123')
<_sre.SRE_Match object; span=(0, 7), match='alex123'>
>>> 
>>> 
>>> 
>>> re.fullmatch('\w+@\w+.com|cn|edu','alex@oldboyedu.com')
<_sre.SRE_Match object; span=(0, 18), match='alex@oldboyedu.com'>
>>> re.fullmatch('\w+@\w+.com|cn|edu','alex@oldboyedu.cn')
>>> re.fullmatch('\w+@\w+.(com|cn|edu)','alex@oldboyedu.cn')
<_sre.SRE_Match object; span=(0, 17), match='alex@oldboyedu.cn'>
>>> 
>>> 
"""


# 提前把匹配规则翻译，提高效率
"""
>>> pattern = re.compile('\w+@\w+\.(com|cn|edu)')
>>> pattern
re.compile('\\w+@\\w+\\.(com|cn|edu)')
>>> pattern.fullmatch('alex@oldboyedu.cn')
<_sre.SRE_Match object; span=(0, 17), match='alex@oldboyedu.cn'>
>>>

"""

# 标识符
"""
>>> re.search('a', 'Alex', re.I)    # 忽略大小写
<_sre.SRE_Match object; span=(0, 1), match='A'>
>>>


>>> re.search('foo.$', 'foo1\nfoo2\n')
<_sre.SRE_Match object; span=(5, 9), match='foo2'>
>>> re.search('foo.$', 'foo1\nfoo2\n', re.M)
<_sre.SRE_Match object; span=(0, 4), match='foo1'>
>>>


>>>
>>> re.search('.', '\n')
>>> re.search('.', '\n', re.S)
<_sre.SRE_Match object; span=(0, 1), match='\n'>
>>>


>>> re.search('. #test', 'alex')
>>> re.search('. #test', 'alex', re.X)
<_sre.SRE_Match object; span=(0, 1), match='a'>
>>> 


"""


# a = re.compile(r"""\d+  # the integral part
# \.  # the decimal point
# \d *    # some fractional digits""",
#                re.X)



>>>
>>> re.search('\([^()]+\)',s)

posted @ 2019-03-20 13:20 无影连剑决阅读(708) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

无影连剑决