16. 常见函数与模块介绍

内置函数介绍

print(abs(-1))  # 取绝对值
print(all([1,'aaa','1']))  # 全为真返回真
print(all([]))  # 有一个为假返回假
print(any([0,None,1]))  # 有一个为真返回真
print(any([]))  # 全为假返回假
# 传入的都是可迭代对象
# 进制转换
print(bin(11))  # 十进制转换成二进制
print(oct(11))  # 十进制转换成八进制
print(hex(11))  # 十进制转换成十六进制
print(bool(''))  # 转换成布尔值

def func():
    pass
class Foo:
    pass
print(callable(Foo))  # 判断是否可以被调用，返回true或者false

print(chr(65))  # 将数字转换成ASCII码
print(ord('A'))  # 将ASCII码转换成数字

s=frozenset({1,2,3})  # 不可变集合

# hash(不可变类型)

print(round(1.5))  # 四舍五入

# 10 ** 2 % 3
print(pow(10,2,3))

s=slice(1,4,2)  # 切片，括号里面分别是（开始，结束，步长）
l1=['a','b','c','d','e']
l2=['aaa','bbb','ccc','ddd',444]
print(l1[1:4:2])  # l1[s] 
print(l2[1:4:2])  # l2[s]

v1='hello'
v2=[111,222,333,444,5555,6666]
res=zip(v1,v2)  # 拉链函数
print(list(res))  # 返回元组组成的列表[('h',111)('e',222)......]必须一一对应（数量一致），不然会报错

print(divmod(10000,33))  # 返回元组（商，余数）

class Foo:
    pass
obj=Foo()
obj.xxx=1111
print(dir(obj))  # 查看obj对象具有哪些属性

for i,v in enumerate(['a','b','c']):  # 返回元祖：（顺序数，元素）
    print(i,v)

res=eval('{"a":1}')  # 执行字符串中的表达式
print(res,type(res))

class Foo:
    pass
obj=Foo()
print(isinstance(obj,Foo))  # 判断一个对象是不是一个类的实例（就是判断对象的类型），和type的用法差不多
print(isinstance([],list))  # 类型判断推荐使用isinstance
print(type([]) is list) # 不推荐使用type进行类型判断

# import 'time' # 错误，不能导入字符串
time=__import__('time')
time.sleep(3)

一些常用的模块介绍

时间模块

time模块

# 时间分为三种格式
# 1. 时间戳（timestamp）：从1970年到现在的秒数，主要用于时间间隔的计算
import time
print(time.time)
# 2. 格式化的字符串时间（format string）：主要用于展示时间
print(time.strftime('%Y.%m.%d %H:%M:%S'))  # 按照顺序分别是年月日时分秒的格式进行显示
  # %p 是AM和PM（商务和下午的显示）
  # 也可以将%H:%M:%S 换成 %X 直接显示时分秒
# 3. 结构化的时间（struct_time）：主要用于单独获取时间的某一部分
res=time.localtime
print(res)
res.tm_year  # 得到年份
res.tm_mon  # 得到月份
res.tm_mday  # 得到月中的第几天
res.tm_hour  # 得到多少小时
res.tm_min  # 得到分钟
res.tm_sec  # 得到秒数
res.tm_wday  # 得到一星期的第几天
res.tm_yday  # 得到一年中的第几天
res.tm_isdst  # 返回是否为夏令时，0为否，1为是

# 其他的了解即可
%a    Locale’s abbreviated weekday name.     
%A    Locale’s full weekday name.     
%b    Locale’s abbreviated month name.     
%B    Locale’s full month name.     
%c    Locale’s appropriate date and time representation.     
%d    Day of the month as a decimal number [01,31].     
%H    Hour (24-hour clock) as a decimal number [00,23].     
%I    Hour (12-hour clock) as a decimal number [01,12].     
%j    Day of the year as a decimal number [001,366].     
%m    Month as a decimal number [01,12].     
%M    Minute as a decimal number [00,59].     
%p    Locale’s equivalent of either AM or PM.    (1)
%S    Second as a decimal number [00,61].    (2)
%U    Week number of the year (Sunday as the first day of the week) as a decimal number [00,53]. All days in a new year preceding the first Sunday are considered to be in week 0.    (3)
%w    Weekday as a decimal number [0(Sunday),6].     
%W    Week number of the year (Monday as the first day of the week) as a decimal number [00,53]. All days in a new year preceding the first Monday are considered to be in week 0.    (3)
%x    Locale’s appropriate date representation.     
%X    Locale’s appropriate time representation.     
%y    Year without century as a decimal number [00,99].     
%Y    Year with century as a decimal number.     
%z    Time zone offset indicating a positive or negative time difference from UTC/GMT of the form +HHMM or -HHMM, where H represents decimal hour digits and M represents decimal minute digits [-23:59, +23:59].     
%Z    Time zone name (no characters if no time zone exists).     
%%    A literal '%' character.

datetime模块

import datetime
datetime.datetime.now()  # 返回当前时间，结果是格式化的时间即年月日时分秒毫秒
datetime.datetime.now()+datetime.timedelta(day=-3)  # 进行时间的相加减

时间模式的转换（就是上面的三种时间格式）

# 1.1 时间戳——》struct_time（secs参数未提供，则以当前时间为准）：localtime([secs])
time.localtime()
time.localtime(1473525444.037215)
gmtime([secs]) # 和localtime()方法类似，gmtime()方法是将一个时间戳转换为UTC时区（0时区，格林尼治时间）的struct_time
# 1.2 struct_time——》为时间戳：mktime(t)
print(time.mktime(time.localtime()))  # 1473525749.0

# 2.1 struct_time——》format string
strftime(format,string)   # 把一个代表时间的元组或者struct_time转化为格式化的时间字符串。如果t未指定，将传入time.localtime()。如果元组中任何一个元素越界，ValueError的错误将会被抛出。
print(time.strftime("%Y-%m-%d %X", time.localtime()))  # 2016-09-11 00:49:56
# 2.2 format string——》struct_time
time.strptime(string,format)  # 把一个格式化时间字符串转化为struct_time，实际上它和strftime()是逆操作
print(time.strptime('2011-05-05 16:37:06', '%Y-%m-%d %X'))

time.struct_time(tm_year=2011, tm_mon=5, tm_mday=5, tm_hour=16, tm_min=37, tm_sec=6,tm_wday=3, tm_yday=125, tm_isdst=-1)
# 在这个函数中，format默认为："%a %b %d %H:%M:%S %Y"。

format string是字符串格式，一般我们获得的都是字符串类型的时间，时间戳的优点是能进行时间的加减操作，实际上struct_time是一个中转，能够将时间戳和format string相互联系起来，完成转换

补充

import time
time.sleep(3)  # 睡三秒

time.asctime()  # 把一个表示时间的元组或者struct_time转化为下面格式的时间戳：星期 月份 日期 时分秒 年份（Mon Mar 21 10:20:20 2021）这种时间格式在Linux系统上比较常见
# 如果没有参数，将会将time.localtime()作为参数传入

time.ctime(1024)  # 将时间戳（按秒计算的浮点数）转化为time.asctime()的形式
# 如果参数未给或者为None的时候，将会默认time.time()为参数，效果等同于time.asctime(time.localtime(secs))
print(time.ctime())  # Sun Sep 11 00:46:38 2016
print(time.ctime(time.time()))  # Sun Sep 11 00:46:38 2016

import datetime
datetime.datetime.now()  # 当前时间
datetime.datetime.utnow()  # 标准时间，格林威治时间
datetime.datetime.fromtimestamp(333333)  # 括号里面是秒数，转换成时间戳，就是从1970年开始加上333333秒之后的时间，格式是年月日 时分秒

案例：模拟进度条

# 模拟进度条
import time
def progress(percent):
    if percent > 1:
        percent = 1
    res = int(50 * percent) * '#'
    print('\r[%-50s] %d%%' % (res, int(100 * percent)), end='')  #-50s是左对齐总长度是50个字符长度，\r含义是总是从本行的开头开始输入（就是光标回到本行最开始）
recv_size=0
total_size=1025011
while recv_size < total_size:
    time.sleep(0.01) # 下载了1024个字节的数据
    recv_size+=1024 # recv_size=2048
    # 打印进度条
    # print(recv_size)
    percent = recv_size / total_size  # 1024 / 333333
    progress(percent)

random模块

import random
print(random.random())  # 在0到1之间的一个浮点数（0，1）
print(random.randint(1,3))  # [1,3]    大于等于1且小于等于3之间的整数 
print(random.randrange(1,3))  # [1,3)    大于等于1且小于3之间的整数
print(random.choice([1,'23',[4,5]]))  # 1或者23或者[4,5]
print(random.sample([1,'23',[4,5]],2))  # 列表元素任意2个组合，第二个参数必须有
print(random.uniform(1,3))  # 大于1小于3的小数，如1.927109612082716 
item=[1,3,5,7,9]
random.shuffle(item)  # 打乱item的顺序,shuffle是"洗牌"的意思
print(item)

# 模拟生成验证码
import random
def make_code(n):
    res=''
    for i in range(n):
        s1=chr(random.randint(65,90))
        s2=str(random.randint(0,9))
        res+=random.choice([s1,s2])
    return res

print(make_code(9))

os模块

# os模块是与操作系统相关的模块，与操作系统相关的还有sys模块
os.getcwd()  # 获取当前工作目录，即当前python脚本工作的目录路径
os.chdir("dirname")   # 改变当前脚本工作目录；相当于shell下cd
os.curdir   #  返回当前目录: ('.')
os.pardir   #  获取当前目录的父目录字符串名：('..')
os.makedirs('dirname1/dirname2')   # 可生成多层递归目录
os.removedirs('dirname1')   # 若目录为空，则删除，并递归到上一级目录，如若也为空，则删除，依此类推
os.mkdir('dirname')  # 生成单级目录；相当于shell中mkdir dirname
os.rmdir('dirname')  # 删除单级空目录，若目录不为空则无法删除，报错；相当于shell中rmdir dirname
os.listdir('dirname')  # 列出指定目录下的所有文件和子目录，包括隐藏文件，并以列表方式打印，传入路径
os.remove()   # 删除一个文件
os.rename("oldname","newname")   # 重命名文件/目录
os.stat('path/filename')   # 获取文件/目录信息

os.sep     # 输出操作系统特定的路径分隔符，win下为"\\",Linux下为"/"
os.linesep     # 输出当前平台使用的行终止符，win下为"\r\n",Linux下为"\n"
os.pathsep     # 输出用于分割文件路径的字符串 win下为;,Linux下为:
os.name    # 输出字符串指示当前使用平台。win->'nt'; Linux->'posix'

os.system("bash command")   # 运行shell命令，直接显示
os.environ    # 获取系统环境变量，返回一个字典，键和值必须都是字符串
### os.path系列 ###
os.path.abspath(path)   # 返回path规范化的绝对路径
os.path.split(path)   # 将path分割成目录和文件名二元组返回
os.path.dirname(path)   # 返回path的目录。其实就是os.path.split(path)的第一个元素
os.path.basename(path)   # 返回path最后的文件名。如何path以／或\结尾，那么就会返回空值。即os.path.split(path)的第二个元素
os.path.exists(path)   # 如果path存在，返回True；如果path不存在，返回False
os.path.isabs(path)   # 如果path是绝对路径，返回True
os.path.isfile(path)    # 如果path是一个存在的文件，返回True。否则返回False
os.path.isdir(path)    # 如果path是一个存在的目录，则返回True。否则返回False
os.path.join(path1[, path2[, ...]])   # 将多个路径组合后返回，第一个绝对路径之前的参数将被忽略
os.path.getatime(path)    # 返回path所指向的文件或者目录的最后存取时间
os.path.getmtime(path)   # 返回path所指向的文件或者目录的最后修改时间
os.path.getsize(path)   # 返回path的大小

# 在Linux和Mac平台上，该函数会原样返回path，在windows平台上会将路径中所有字符转换为小写，并将所有斜杠转换为饭斜杠。
os.path.normcase('c:/windows\\system32\\')   
# 'c:\\windows\\system32\\'   
# 规范化路径，如..和/
os.path.normpath('c://windows\\System32\\../Temp/')   
# 'c:\\windows\\Temp'   
a='/Users/jieli/test1/\\\a1/\\\\aa.py/../..'
print(os.path.normpath(a))
# /Users/jieli/test1

# 路径处理
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# python3.5之后推出了一个新的模块叫pathlib
from pathlib import Path
root=path(__file__)
res=root.parent.parent  # 也能得到路径

sys模块

sys.path           # 返回模块的搜索路径，初始化时使用PYTHONPATH环境变量的值
sys.argv           # 命令行参数List，第一个元素是程序本身路径，获取的是解释器后的参数值
# 下面的内容了解
sys.exit(n)        # 退出程序，正常退出时exit(0)
sys.version        # 获取Python解释程序的版本信息
sys.maxint         # 最大的Int值
sys.platform       # 返回操作系统平台名称

shutil模块（了解）

高级的文件、文件夹、压缩包处理模块

shutil.copyfileobj(fsrc, fdst[, length])
将文件内容拷贝到另一个文件中

import shutil
shutil.copyfileobj(open('old.xml','r'), open('new.xml', 'w'))

shutil.copyfile(src, dst)
拷贝文件

shutil.copyfile('f1.log', 'f2.log')  # 目标文件无需存在

shutil.copymode(src, dst)
仅拷贝权限，内容、组、用户均不变

shutil.copymode('f1.log', 'f2.log')  # 目标文件必须存在

shutil.copystat(src, dst)
仅拷贝状态的信息，包括：mode bits, atime, mtime, flags

shutil.copystat('f1.log', 'f2.log')  # 目标文件必须存在

shutil.copy(src, dst)
拷贝文件和权限

import shutil
shutil.copy('f1.log', 'f2.log')

shutil.copy2(src, dst)
拷贝文件和状态信息

import shutil
shutil.copy2('f1.log', 'f2.log')

shutil.ignore_patterns(*patterns)
shutil.copytree(src, dst, symlinks=False, ignore=None)
递归的去拷贝文件夹

import shutil
shutil.copytree('folder1', 'folder2', ignore=shutil.ignore_patterns('*.pyc', 'tmp*')) # 目标目录不能存在，注意对folder2目录父级目录要有可写权限，ignore的意思是排除以.pyc结尾和以tmp开头的文件

import shutil
shutil.copytree('f1', 'f2', symlinks=True, ignore=shutil.ignore_patterns('*.pyc', 'tmp*'))
'''
通常的拷贝都把软连接拷贝成硬链接，即对待软连接来说，创建新的文件
'''

shutil.rmtree(path[, ignore_errors[, onerror]])
递归的去删除文件

import shutil
shutil.rmtree('folder1')

shutil.move(src, dst)
递归的去移动文件，它类似mv命令，其实就是移动和重命名

import shutil
shutil.move('folder1', 'folder3')

shutil.make_archive(base_name, format,...)

创建压缩包并返回文件路径，例如：zip、tar

base_name：压缩包的文件名，也可以是压缩包的路径。只是文件名时，则保存至当前目录，否则保存至指定路径，
如 data_bak =>保存至当前路径
如：/tmp/data_bak =>保存至/tmp/
format：压缩包种类，“zip”, “tar”, “bztar”，“gztar”
root_dir：要压缩的文件夹路径（默认当前目录）
owner：用户，默认当前用户
group：组，默认当前组
logger：用于记录日志，通常是logging.Logger对象

# 将 /data 下的文件打包放置当前程序目录
import shutil
ret = shutil.make_archive("data_bak", 'gztar', root_dir='/data')
# 将 /data下的文件打包放置 /tmp/目录
import shutil
ret = shutil.make_archive("/tmp/data_bak", 'gztar', root_dir='/data')

shutil 对压缩包的处理是调用 ZipFile 和 TarFile 两个模块来进行的，详细：

import zipfile
# 压缩
z = zipfile.ZipFile('laxi.zip', 'w')
z.write('a.log')
z.write('data.data')
z.close()

# 解压
z = zipfile.ZipFile('laxi.zip', 'r')
z.extractall(path='.')
z.close()

import tarfile
# 压缩
t=tarfile.open('/tmp/egon.tar','w')
t.add('/test1/a.py',arcname='a.bak')
t.add('/test1/b.py',arcname='b.bak')
t.close()

# 解压
t=tarfile.open('/tmp/egon.tar','r')
t.extractall('/egon')
t.close()

序列与反序列化模块

序列化：将内存中的数据类型转化成一个特定格式的内容（json格式或者pickle格式）

反序列化：将json格式或者pickle格式的内容转换回内存中的数据类型

dumps与dump之间的区别就是，dumps进行数据类型的转换（将一个python类型的数据转换成json字符串数据）dump函数能够直接操作文件句柄

序列化的格式有两种用途：

可用于存储——用于存档（存到硬盘，一般不涉及其他的语言，所以pickle更合适）
可传输给其他平台使用——跨平台数据交互（这就需要适用各种语言——》json格式的数据，pickle只适用于python）

import json
res1=json.dumps('[1,2,3,'nihao']')  # 序列化
res2=json.loads(res1)  # 反序列化

dic = {'k1':'v1','k2':'v2','k3':'v3'}
str_dic = json.dumps(dic)  # 序列化：将一个字典转换成一个字符串
print(type(str_dic),str_dic)  #<class 'str'> {"k3": "v3", "k1": "v1", "k2": "v2"}
# 注意，json转换完的字符串类型的字典中的字符串是由""表示的

dic2 = json.loads(str_dic)  # 反序列化：将一个字符串格式的字典转换成一个字典
# 注意，要用json的loads功能处理的字符串类型的字典中的字符串必须由""表示
print(type(dic2),dic2)  # <class 'dict'> {'k1': 'v1', 'k2': 'v2', 'k3': 'v3'}


# 序列化的结果写入文件的复杂方法
son_res=json.dumps([1,'aaa',True,False])
print(json_res,type(json_res)) # "[1, "aaa", true, false]"
with open('test.json',mode='wt',encoding='utf-8') as f:
    f.write(json_res)
# 从文件读取json格式的字符串进行反序列化操作的复杂方法
with open('test.json',mode='rt',encoding='utf-8') as f:
    json_res=f.read()
    l=json.loads(json_res)
    print(l,type(l))

# 将序列化的结果写入文件的简单方法
with open('test.json',mode='wt',encoding='utf-8') as f:
    json.dump([1,'aaa',True,False],f)  # 直接将序列化的结果写到相应的文件句柄中
# 从文件读取json格式的字符串进行反序列化操作的简单方法
with open('test.json',mode='rt',encoding='utf-8') as f:
    l=json.load(f)
    print(l,type(l))


# json验证: json格式兼容的是所有语言通用的数据类型，不能识别某一语言的所独有的类型（json序列化只支持部分Python数据结构：dict,list, tuple,str,int, float,True,False,None）
json.dumps({1,2,3,4,5})  # 集合是python独有的数据类型，json的话会报错

# json强调：一定要搞清楚json格式，不要与python混淆，一些格式是与python的数据类型是不同的
l=json.loads("[1, "aaa", true, false]")
l=json.loads("[1,1.3,true,'aaa', true, false]")
print(l[0])

# 在python解释器2.7与3.6之后都可以json.loads(bytes类型)，但唯独3.5不可以，下面是3.5中返回的结果
import json
json.loads(b'{"a":111}')
'''
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/linhaifeng/anaconda3/lib/python3.5/json/__init__.py", line 312, in loads
    s.__class__.__name__))
TypeError: the JSON object must be str, not 'bytes'
'''

import json
# dct="{'1':111}" # json 不认单引号
# dct=str({"1":111})#报错,因为生成的数据还是单引号:{'one': 1}
dct='{"1":"111"}'
print(json.loads(dct))
# conclusion:无论数据是怎样创建的，只要满足json格式，就可以json.loads出来,不一定非要dumps的数据才能loads

# 猴子补丁
# 一.什么是猴子补丁?
      属性在运行时的动态替换，叫做猴子补丁（Monkey Patch）。
      猴子补丁的核心就是用自己的代码替换所用模块的源代码，详细地如下
　　1，这个词原来为Guerrilla Patch，杂牌军、游击队，说明这部分不是原装的，在英文里guerilla发音和gorllia(猩猩)相似，再后来就写了monkey(猴子)。
　　2，还有一种解释是说由于这种方式将原来的代码弄乱了(messing with it)，在英文里叫monkeying about(顽皮的)，所以叫做Monkey Patch。
# 在入口处打一个猴子补丁
import json
import ujson  # ujson的效率更高
def monkey_patch_json():
    json.__name__ = 'ujson'
    json.dumps = ujson.dumps
    json.loads = ujson.loads
monkey_patch_json() # 在入口文件出运行
# 二. 猴子补丁的功能(一切皆对象)
# 拥有在模块运行时替换的功能, 例如: 一个函数对象赋值给另外一个函数对象(把函数原本的执行的功能给替换了)
class Monkey:
    def hello(self):
        print('hello')
    def world(self):
        print('world')
def other_func():
    print("from other_func")
monkey = Monkey()
monkey.hello = monkey.world
monkey.hello()
monkey.world = other_func
monkey.world()

# 三.monkey patch的应用场景
如果我们的程序中已经基于json模块编写了大量代码了，发现有一个模块ujson比它性能更高，但用法一样，我们肯定不会想所有的代码都换成ujson.dumps或者ujson.loads,那我们可能会想到这么做import ujson as json，但是这么做的需要每个文件都重新导入一下，维护成本依然很高此时我们就可以用到猴子补丁了只需要在入口处加上:

import json
import ujson
def monkey_patch_json():
    json.__name__ = 'ujson'
    json.dumps = ujson.dumps
    json.loads = ujson.loads
monkey_patch_json() # 之所以在入口处加，是因为模块在导入一次后，后续的导入便直接引用第一次的成果
# 使用场景
# 其实这种场景也比较多, 比如我们引用团队通用库里的一个模块, 又想丰富模块的功能, 除了继承之外也可以考虑用Monkey Patch.采用猴子补丁之后，如果发现ujson不符合预期，那也可以快速撤掉补丁。个人感觉Monkey Patch带了便利的同时也有搞乱源代码的风险!

# pickle模块，与json的用法一样
'''
pickle模块是将Python所有的数据结构以及对象等转化成bytes类型，也可以反序列化还原回去

    刚才也提到了pickle模块，pickle模块是只能Python语言识别的序列化模块，如果把序列化模块比喻成全世界公认的一种交流语言，也就是标准的话，json就是像是英语，全世界（python，java，php，C，等等）都遵循这个标准，而pickle就是中文，只有中国人（python）作为第一交流语言

    既然只是Python语言使用，那么它支持Python所有的数据类型包括后面我们要讲的实例化对象等，它能将这些所有的数据结构序列化成特殊的bytes，然后还可以反序列化还原。使用上与json几乎差不多，也是两对四个方法
'''
import pickle
# res=pickle.dumps({1,2,3,4,5})
# print(res,type(res))

# s=pickle.loads(res)
# print(s,type(s))

# python2与python3的兼容问题  
# coding:utf-8
import pickle
with open('a.pkl',mode='wb') as f:
    # 一：在python3中执行的序列化操作如何兼容python2
    # python2不支持protocol>2，python3中默认protocol=4
    # 所以在python3中dump操作应该指定protocol=2
    pickle.dump('你好啊',f,protocol=2)
with open('a.pkl', mode='rb') as f:
    # 二：python2中反序列化才能正常使用
    res=pickle.load(f)
    print(res)

json序列化存储多个数据到同一个文件中

对于json序列化，存储多个数据到一个文件中是有问题的，默认一个json文件只能存储一个json数据，但是也可以解决，举例说明：

对于json 存储多个数据到文件中
dic1 = {'name':'oldboy1'}
dic2 = {'name':'oldboy2'}
dic3 = {'name':'oldboy3'}
f = open('序列化',encoding='utf-8',mode='a')
json.dump(dic1,f)
json.dump(dic2,f)
json.dump(dic3,f)
f.close()

f = open('序列化',encoding='utf-8')
ret = json.load(f)
ret1 = json.load(f)
ret2 = json.load(f)
print(ret)

# 上面会报错，下面是解决办法
dic1 = {'name':'oldboy1'}
dic2 = {'name':'oldboy2'}
dic3 = {'name':'oldboy3'}
f = open('序列化',encoding='utf-8',mode='a')
str1 = json.dumps(dic1)
f.write(str1+'\n')
str2 = json.dumps(dic2)
f.write(str2+'\n')
str3 = json.dumps(dic3)
f.write(str3+'\n')
f.close()

f = open('序列化',encoding='utf-8')
for line in f:
    print(json.loads(line))

为了实现与其他语言的互通，推荐使用json模块

xml模块（了解）

xml是实现不同语言或程序之间进行数据交换的协议，跟json差不多，但json使用起来更简单，不过，古时候，在json还没诞生的黑暗年代，大家只能选择用xml，至今很多传统公司如金融行业的很多系统的接口还主要是xml。

xml的格式如下，就是通过<>节点来区别数据结构的:

<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank updated="yes">69</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>

xml协议在各个语言里的都是支持的，在python中可以用以下模块操作xml：

import xml.etree.ElementTree as ET
 
tree = ET.parse("xmltest.xml")
root = tree.getroot()
print(root.tag)
 
#遍历xml文档
for child in root:
    print('========>',child.tag,child.attrib,child.attrib['name'])
    for i in child:
        print(i.tag,i.attrib,i.text)
 
#只遍历year 节点
for node in root.iter('year'):
    print(node.tag,node.text)
#---------------------------------------

import xml.etree.ElementTree as ET
 
tree = ET.parse("xmltest.xml")
root = tree.getroot()
 
#修改
for node in root.iter('year'):
    new_year=int(node.text)+1
    node.text=str(new_year)
    node.set('updated','yes')
    node.set('version','1.0')
tree.write('test.xml')
 
 
#删除node
for country in root.findall('country'):
   rank = int(country.find('rank').text)
   if rank > 50:
     root.remove(country)
 
tree.write('output.xml')

#在country内添加（append）节点year2
import xml.etree.ElementTree as ET
tree = ET.parse("a.xml")
root=tree.getroot()
for country in root.findall('country'):
    for year in country.findall('year'):
        if int(year.text) > 2000:
            year2=ET.Element('year2')
            year2.text='新年'
            year2.attrib={'update':'yes'}
            country.append(year2) #往country节点下添加子节点

tree.write('a.xml.swap')

import xml.etree.ElementTree as ET
 
# 自己创建xml文件
new_xml = ET.Element("namelist")
name = ET.SubElement(new_xml,"name",attrib={"enrolled":"yes"})
age = ET.SubElement(name,"age",attrib={"checked":"no"})
sex = ET.SubElement(name,"sex")
sex.text = '33'
name2 = ET.SubElement(new_xml,"name",attrib={"enrolled":"no"})
age = ET.SubElement(name2,"age")
age.text = '19'
 
et = ET.ElementTree(new_xml) #生成文档对象
et.write("test.xml", encoding="utf-8",xml_declaration=True)
 
ET.dump(new_xml) #打印生成的格式

shelve模块（了解）

shelve模块比pickle模块简单，只有一个open函数，返回类似字典的对象，可读可写;key必须为字符串，而值可以是python所支持的数据类型

import shelve
f=shelve.open(r'sheve.txt')
# f['stu1_info']={'name':'egon','age':18,'hobby':['piao','smoking','drinking']}
# f['stu2_info']={'name':'gangdan','age':53}
# f['school_info']={'website':'http://www.pypy.org','city':'beijing'}
print(f['stu1_info']['hobby'])
f.close()

configparser模块

配置文件如下：

# 注释1
; 注释2

[section1]  # 这个是不同的部分，查找的时候顺序是section——option——value
k1 = v1  # 前面的k1就是option项，后面的v1是value项
k2:v2
user=egon
age=18
is_admin=true
salary=31
[section2]
k1 = v1

读取

import configparser

config=configparser.ConfigParser()
config.read('a.cfg')

# 查看所有的标题
res=config.sections() #['section1', 'section2']
print(res)

# 查看标题section1下所有key=value的key
options=config.options('section1')
print(options) #['k1', 'k2', 'user', 'age', 'is_admin', 'salary']

# 查看标题section1下所有key=value的(key,value)格式
item_list=config.items('section1')
print(item_list) #[('k1', 'v1'), ('k2', 'v2'), ('user', 'egon'), ('age', '18'), ('is_admin', 'true'), ('salary', '31')]

# 查看标题section1下user的值=>字符串格式
val=config.get('section1','user')
print(val) #egon

# 查看标题section1下age的值=>整数格式
val1=config.getint('section1','age')
print(val1) #18

# 查看标题section1下is_admin的值=>布尔值格式
val2=config.getboolean('section1','is_admin')
print(val2) #True

# 查看标题section1下salary的值=>浮点型格式
val3=config.getfloat('section1','salary')
print(val3)  # 31.0

改写

import configparser

config=configparser.ConfigParser()
config.read('a.cfg',encoding='utf-8')


# 删除整个标题section2
config.remove_section('section2')

# 删除标题section1下的某个k1和k2
config.remove_option('section1','k1')
config.remove_option('section1','k2')

# 判断是否存在某个标题
print(config.has_section('section1'))

# 判断标题section1下是否有user
print(config.has_option('section1',''))


# 添加一个标题
config.add_section('egon')

# 在标题egon下添加name=egon,age=18的配置
config.set('egon','name','egon')
config.set('egon','age',18) #报错,必须是字符串


# 最后将修改的内容写入文件,完成最终的修改
config.write(open('a.cfg','w'))

import configparser
  
config = configparser.ConfigParser()
config["DEFAULT"] = {'ServerAliveInterval': '45',
                      'Compression': 'yes',
                     'CompressionLevel': '9'}
  
config['bitbucket.org'] = {}
config['bitbucket.org']['User'] = 'hg'
config['topsecret.server.com'] = {}
topsecret = config['topsecret.server.com']
topsecret['Host Port'] = '50022'     # mutates the parser
topsecret['ForwardX11'] = 'no'  # same here
config['DEFAULT']['ForwardX11'] = 'yes'
with open('example.ini', 'w') as configfile:
   config.write(configfile)

hashlib模块（重要）

# hash（哈希）:hash是一种算法（3.x里代替了md5模块和sha模块，主要提供 SHA1, SHA224, SHA256, SHA384, SHA512 ，MD5 算法），该算法接受传入的内容，经过运算得到一串hash值
# hash值的特点：
# 1 传入的内容对应的hash值必然一样=====>输入的密码与文件中的密码进行校验
# 2 hash值无法返解成原来的内容=======>密文的传输与验证
# 3 只要使用的hash算法不变，无论校验的内容有多大，得到的hash值长度是固定的
# 1和3结合起来完成文件的完整性校验

用法：密码的加密

import hashlib
m=hashlib.md5()  # m=hashlib.sha256() 这里的加密方法不一样
m.update('hello'.encode('utf8'))
print(m.hexdigest())  #5d41402abc4b2a76b9719d911017c592
m.update('alvin'.encode('utf8'))
print(m.hexdigest())  #92a7e713c30abbb0319fa07da2a5c4af 
m2=hashlib.md5()
m2.update('helloalvin'.encode('utf8'))
print(m2.hexdigest()) #92a7e713c30abbb0319fa07da2a5c4af
'''
注意：推荐奖一个update文件多次，因为文件过大时一次性update容易造成内存溢出。
把一段很长的数据update多次，与一次update这段长数据，得到的结果是一样的，但是update多次为校验大文件提供了可能
但是相对的，每一次都要进行加密，所以多次update的效率会比较低，下面说一下解决方法：
'''
f=open('a.txt',mode='rb')
f.seek()  # 随机定位到文件的位置
res=f.read(2000)  # 去一定量的文件内容进行校验
m1.update(res)
m1.hexdigest()

以上加密算法虽然依然非常厉害，但时候存在缺陷，即通过撞库可以反解，这时就需要对密码进行加盐

# 模拟撞库
cryptograph='aee949757a2e698417463d47acac93df'  # 截取密文内容
import hashlib
passwds=[
    'alex3714',
    'alex1313',
    'alex94139413',
    'alex123456',
    '123456alex',
    'a123lex',
    ]
def make_passwd_dic(passwds):
    dic={}
    for passwd in passwds:
        m=hashlib.md5()
        m.update(passwd.encode('utf-8'))
        dic[passwd]=m.hexdigest()
    return dic
def break_code(cryptograph,passwd_dic):
    for k,v in passwd_dic.items():
        if v == cryptograph:
            print('密码是===>\033[46m%s\033[0m' %k)
cryptograph='aee949757a2e698417463d47acac93df'
break_code(cryptograph,make_passwd_dic(passwds))
# 提升撞库的成本=>密码加盐
import hashlib
m=hashlib.md5()
m.update('天王'.encode('utf-8'))
m.update('alex3714'.encode('utf-8'))
m.update('盖地虎'.encode('utf-8'))
print(m.hexdigest())

python 还有一个hmac模块，它内部对我们创建 key和内容进行进一步的处理然后再加密:

import hmac
h1=hmac.new('hello'.encode('utf-8'),digestmod='md5')
h1.update('world'.encode('utf-8'))
print(h1.hexdigest())
# 要想保证hmac最终结果一致，必须保证：
# 1:hmac.new括号内指定的初始key一样
# 2:无论update多少次，校验的内容累加到一起是一样的内容

# 操作一
import hmac
h1=hmac.new('hello'.encode('utf-8'),digestmod='md5')
h1.update('world'.encode('utf-8'))

print(h1.hexdigest()) # 0e2564b7e100f034341ea477c23f283b

# 操作二
import hmac
h2=hmac.new('hello'.encode('utf-8'),digestmod='md5')
h2.update('w'.encode('utf-8'))
h2.update('orld'.encode('utf-8'))

print(h1.hexdigest()) # 0e2564b7e100f034341ea477c23f283b

suprocess模块（子进程模块）

import  subprocess
# Mac下：
res1=subprocess.Popen('ls /Users/jieli/Desktop',shell=True,stdout=subprocess.PIPE)
res=subprocess.Popen('grep txt$',shell=True,stdin=res1.stdout,stdout=subprocess.PIPE)
# 解释一下上面的代码：ls /Users/jieli/Desktop是命令，shell=True是声明使用shell输入命令（就是cmd），stdout是标准输出，也就是将输出的结果放到管道中（subprocess.PIPE）还可以加上一个参数是stderr=subprocess.PIPE，是命令出错时将出错的结果传入到这个管道
print(res.stdout.read().decode('utf-8'))  # 读取执行后的结果，解码是按照操作系统的编码来进行的，因为是在shell上执行的，所以是操作系统的默认编码
# 下面的命令等同于上面,但是上面的优势在于,一个数据流可以和另外一个数据流交互,可以通过爬虫得到结果然后交给grep，下面的命令就是将上面的两条命令结合到了一起
res1=subprocess.Popen('ls /Users/jieli/Desktop | grep txt$',shell=True,stdout=subprocess.PIPE)
print(res1.stdout.read().decode('utf-8'))

# windows下:
dir | findstr 'test*'
dir | findstr 'txt$'
import subprocess
res1=subprocess.Popen(r'dir C:\Users\Administrator\PycharmProjects\test\函数备课',shell=True,stdout=subprocess.PIPE)
res=subprocess.Popen('findstr test*',shell=True,stdin=res1.stdout,stdout=subprocess.PIPE)
print(res.stdout.read().decode('gbk')) # subprocess使用当前系统默认编码，得到结果为bytes类型，在windows下需要用gbk解码

详细参考官网

logging模块（日志模块）

# 日志的级别
import logging
logging.debug('调试信息')
logging.info('信息')
logging.warning('警告信息')
logging.error('错误信息')
logging.critical('严重错误，需要处理')
# 默认只处理warning以及以上级别的（error、critical）的日志，剩下的不作处理

# 日志的配置
import logging
logging.basicConfig(filename='access.log',format='%(asctime)s - %(name)s - %(levelname)s -%(module)s:  %(message)s',datefmt='%Y-%m-%d %H:%M:%S %p',level=10)  # 默认的话level是30，也就是说默认的情况下执行warning以及以上级别的日志

logging.debug('调试debug')  # level=10
logging.info('消息info')  # level=20
logging.warning('警告warn')  # level=30
logging.error('错误error')  # level=40
logging.critical('严重critical')  # level=50

# logging.basicConfig()介绍
可在logging.basicConfig()函数中可通过具体参数来更改logging模块默认行为，可用参数有：
filename：用指定的文件名创建FiledHandler（后边会具体讲解handler的概念），这样日志会被存储在指定的文件中，可以指定文件存储位置filename='access.log'，不指定默认打印到终端
filemode：文件打开方式，在指定了filename时使用这个参数，默认值为“a”还可指定为“w”
format：指定handler使用的日志显示格式。
datefmt：指定日期时间格式：datefmt='%Y-%m-%d %H:%M:%S %p'
level：设置rootlogger（后边会讲解具体概念）的日志级别
stream：用指定的stream创建StreamHandler。可以指定输出到sys.stderr,sys.stdout或者文件，默认为sys.stderr。若同时列出了filename和stream两个参数，则stream参数会被忽略。


# format参数中可能用到的格式化串：
%(name)s Logger的名字
%(levelno)s 数字形式的日志级别
%(levelname)s 文本形式的日志级别
%(pathname)s 调用日志输出函数的模块的完整路径名，可能没有
%(filename)s 调用日志输出函数的模块的文件名
%(module)s 调用日志输出函数的模块名
%(funcName)s 调用日志输出函数的函数名
%(lineno)d 调用日志输出函数的语句所在的代码行
%(created)f 当前时间，用UNIX标准的表示时间的浮 点数表示
%(relativeCreated)d 输出日志信息时的，自Logger创建以 来的毫秒数
%(asctime)s 字符串形式的当前时间。默认格式是 “2003-07-08 16:49:45,896”。逗号后面的是毫秒
%(thread)d 线程ID。可能没有
%(threadName)s 线程名。可能没有
%(process)d 进程ID。可能没有
%(message)s用户输出的消息


# 强调：其中的%(name)s为getlogger时指定的名字
standard_format = '%(asctime)s - %(threadName)s:%(thread)d - 日志名字:%(name)s - %(filename)s:%(lineno)d -' \'%(levelname)s - %(message)s'
simple_format = '[%(levelname)s][%(asctime)s][%(filename)s:%(lineno)d]%(message)s'
test_format = '%(asctime)s] %(message)s'

# 日志字典的使用
LOGGING_DIC = {
    'version': 1,
    'disable_existing_loggers': False,
    'formatters': {
        'standard': {
            'format': standard_format
        },
        'simple': {
            'format': simple_format
        },
        'test': {
            'format': test_format
        },
    },
    'filters': {},
    # handlers是日志的接收者，不同的handler会将日志输出到不同的位置
    'handlers': {
        #打印到终端的日志
        'console': {
            'level': 'DEBUG',
            'class': 'logging.StreamHandler',  # 打印到屏幕
            'formatter': 'simple'
        },
        'default': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',  # 保存到文件
            # 'maxBytes': 1024*1024*5,  # 日志大小 5M
            'maxBytes': 1000,
            'backupCount': 5,  # 是指最大的备份数量
            'filename': 'a1.log',  # os.path.join(os.path.dirname(os.path.dirname(__file__)),'log','a2.log')
            'encoding': 'utf-8',
            'formatter': 'standard',

        },
        #打印到文件的日志,收集info及以上的日志
        'other': {
            'level': 'DEBUG',
            'class': 'logging.FileHandler',  # 保存到文件
            'filename': 'a2.log', # os.path.join(os.path.dirname(os.path.dirname(__file__)),'log','a2.log')
            'encoding': 'utf-8',
            'formatter': 'test',

        },
    },
    # loggers是日志的产生者，产生的日志会传递给handler然后控制输出，下面的是日志生成器的名字和配置，理论上可以有多个，名称也没有限制
    'loggers': {
        #logging.getLogger(__name__)拿到的logger配置
        'kkk': {
            'handlers': ['console','other'],  # 这里把上面定义的两个handler都加上，即log数据既写入文件又打印到屏幕
            'level': 'DEBUG', # loggers(第一层日志级别关限制)--->handlers(第二层日志级别关卡限制)
            'propagate': False,  # 默认为True，向上（更高level的logger）传递，通常设置为False即可，否则会一份日志向上层层传递
        },
        '终端提示': {
            'handlers': ['console',],  # 这里把上面定义的两个handler都加上，即log数据既写入文件又打印到屏幕
            'level': 'DEBUG',  # loggers(第一层日志级别关限制)--->handlers(第二层日志级别关卡限制)
            'propagate': False,  # 默认为True，向上（更高level的logger）传递，通常设置为False即可，否则会一份日志向上层层传递
        },
        '': {
            'handlers': ['default', ],  # 这里把上面定义的两个handler都加上，即log数据既写入文件又打印到屏幕
            'level': 'DEBUG',  # loggers(第一层日志级别关限制)--->handlers(第二层日志级别关卡限制)
            'propagate': False,  # 默认为True，向上（更高level的logger）传递，通常设置为False即可，否则会一份日志向上层层传递
        },
    },
}
# 上面的日志的命名不够正式，在命名的时候应该有一定的提示性，能够表示日志的主要作用
# 如何使用上面的日志字典
import logging.config
logging.config.dictConfig(settings.LOGGING_DIC)
print(logging.getLogger)
# 接下来要做的是：拿到日志的产生者即loggers来产生日志
# 第一个日志的产生者：kkk
# 第二个日志的产生者：bbb
# 先导入日志配置字典LOGGING_DIC
import settings
from logging import config,getLogger
config.dictConfig(settings.LOGGING_DIC)
# 再生成日志
logger1=getLogger('kkk')
logger1.info('这是一条info日志')
logger2=getLogger('终端提示')
logger2.info('logger2产生的info日志')
logger3=getLogger('用户交易')
logger3.info('logger3产生的info日志')
logger4=getLogger('用户常规')  # 对于没有名称的日志生产者，默认使用空的（''）日志生产者
logger4.info('logger4产生的info日志')

# 用法:拿到俩个logger
logger = logging.getLogger(__name__) # 线上正常的日志
collect_logger = logging.getLogger("collect") # 需要为领导们单独定制领导们看的日志

# 补充的知识点：
# 1. 日志轮转：日志记录着程序员运行程序的过程中产生的关键信息，所以不能轻易的删除，但是内存是一定的，所以定期对日志文件进行复制，将其中的内容传入到另一个文件中，当做备份，这个功能的实现依赖于日志字典中的handlers中的default，将 'class'设置为 'logging.handlers.RotatingFileHandler'（rotating是轮转的意思）

re模块

一：正则

　正则就是用一些具有特殊含义的符号组合到一起（称为正则表达式）来描述字符或者字符串的方法。

它内嵌在Python中，并通过 re 模块实现。正则表达式模式被编译成一系列的字节码，然后由用 C 编写的匹配引擎执行

二：常用匹配模式(元字符)

http://blog.csdn.net/yufenghyc/article/details/51078107

# =================================匹配模式=================================
# 一对一的匹配（原始方法）
# 'hello'.replace(old,new)
# 'hello'.find('pattern')

# 正则匹配
import re
# \w与\W（word，数字字母下划线和非数字字母下划线）
print(re.findall('\w','hello egon 123')) # ['h', 'e', 'l', 'l', 'o', 'e', 'g', 'o', 'n', '1', '2', '3']  # 就是将字符串分解成一个个字符，然后将符合条件的返回
print(re.findall('\W','hello egon 123')) #[' ', ' ']

#\s与\S（space）空格
print(re.findall('\s','hello  egon  123')) #[' ', ' ', ' ', ' ']
print(re.findall('\S','hello  egon  123')) #['h', 'e', 'l', 'l', 'o', 'e', 'g', 'o', 'n', '1', '2', '3']

# \n \t都是空,都可以被\s匹配
print(re.findall('\s','hello \n egon \t 123')) # [' ', '\n', ' ', ' ', '\t', ' ']

# \n与\t
print(re.findall(r'\n','hello egon \n123')) # ['\n']
print(re.findall(r'\t','hello egon\t123')) # ['\n']

# \d与\D（digital）数字与非数字
print(re.findall('\d','hello egon 123')) # ['1', '2', '3']
print(re.findall('\D','hello egon 123')) # ['h', 'e', 'l', 'l', 'o', ' ', 'e', 'g', 'o', 'n', ' ']

# \A与\Z（以什么开始，以什么结束）只返回匹配的第一个结果
print(re.findall('\Ahe','hello egon 123')) #['he'],\A==>^
print(re.findall('123\Z','hello egon 123')) #['he'],\Z==>$
# 一般不使用A和Z，而是使用^和$,用法是一样的
# ^ 指定匹配必须出现在字符串的开头或行的开头
# \A 指定匹配必须出现在字符串的开头（忽略 Multiline 选项）
# $ 指定匹配必须出现在以下位置：字符串结尾、字符串结尾的 \n 之前或行的结尾
# \Z 指定匹配必须出现在字符串的结尾或字符串结尾的 \n 之前（忽略 Multiline 选项）

# 重复匹配：| . | * | ? | .* | .*? | + | {n,m} |
#. 匹配除了换行符（\n）之外的任意字符，re.findall('a.b','a1b',re.DOTALL)  re.DOTALL参数能够将换行符匹配出来，参数re.S是一样的作用
print(re.findall('a.b','a1b')) #['a1b']
print(re.findall('a.b','a1b a*b a b aaab')) #['a1b', 'a*b', 'a b', 'aab']
print(re.findall('a.b','a\nb')) #[]
print(re.findall('a.b','a\nb',re.S)) #['a\nb']
print(re.findall('a.b','a\nb',re.DOTALL)) #['a\nb']同上一条意思一样

#* 左侧字符重复一次或者无穷次出现（大于等于0），贪婪匹配
print(re.findall('ab*','bbbbbbb')) #[]
print(re.findall('ab*','a')) #['a']
print(re.findall('ab*','abbbb')) #['abbbb']

#+ 出现一次或者无穷次（大于等于1），贪婪匹配
print(re.findall('ab+','a')) #[]
print(re.findall('ab+','abbb')) #['abbb']

#? 出现0次或者一次，贪婪匹配
print(re.findall('ab?','a')) #['a']
print(re.findall('ab?','abbb')) #['ab']

# 匹配所有包含小数在内的数字
print(re.findall('\d+\.?\d*',"asdfasdf123as1.13dfa12adsf1asdf3")) #['123', '1.13', '12', '1', '3'] 但是这种方法并不是很好，一些非法输入也能搜索到，比如01，0001，541.这种，后面会对其进行优化

# {n,m} 左侧字符出现大于等于n次小于等于m次，贪婪匹配
# {0,}相当于*
# {1,}相当于+
# {0,1}相当于?
print(re.findall('ab{2}','abbb')) #['abb']
print(re.findall('ab{2,4}','abbb')) #['abb']
print(re.findall('ab{1,}','abbb')) #'ab{1,}' ===> 'ab+'
print(re.findall('ab{0,}','abbb')) #'ab{0,}' ===> 'ab*'

#.*默认为贪婪匹配
print(re.findall('a.*b','a1b22222222b')) #['a1b22222222b']

#.*?为非贪婪匹配：推荐使用
print(re.findall('a.*?b','a1b22222222b')) #['a1b']

# [] 比配指定的字符
print(re.findall('a[1*-]b','a1b a*b a-b')) #[]内的都为普通字符了，且如果-没有被转意的话，应该放到[]的开头或结尾，像[-a]这种
print(re.findall('a[^1*-]b','a1b a*b a-b a=b')) #[]内的^代表的意思是取反，所以结果为['a=b']
print(re.findall('a[0-9]b','a1b a*b a-b a=b')) #[]内的-代表的意思是范围，所以结果为['a1b']
print(re.findall('a[a-z]b','a1b a*b a-b a=b aeb')) 
print(re.findall('a[a-zA-Z]b','a1b a*b a-b a=b aeb aEb')) 

# \# 
print(re.findall('a\\c','a\c')) # 对于正则来说a\\c确实可以匹配到a\c,但是在python解释器读取a\\c时，会发生转义，然后交给re去执行，所以抛出异常
print(re.findall(r'a\\c','a\c')) #r代表告诉解释器使用rawstring，即原生字符串，把我们正则内的所有符号都当普通字符处理，不要转义
print(re.findall('a\\\\c','a\c')) #同上面的意思一样，和上面的结果一样都是['a\\c']

# ():分组
print(re.findall('ab+','ababab123')) #['ab', 'ab', 'ab']
print(re.findall('(ab)+123','ababab123')) #['ab']，匹配到末尾的ab123中的ab
print(re.findall('(?:ab)+123','ababab123')) #findall的结果不是匹配的全部内容，而是组内的内容,?:可以让结果为匹配的全部内容
print(re.findall('href="(.*?)"','<a href="http://www.baidu.com">点击</a>'))#['http://www.baidu.com']
print(re.findall('href="(?:.*?)"','<a href="http://www.baidu.com">点击</a>'))#['href="http://www.baidu.com"']

#|
print(re.findall('compan(?:y|ies)','Too many companies have gone bankrupt, and the next one is my company'))

# ===========================re模块提供的方法介绍===========================
import re
#1
print(re.findall('e','alex make love') )   #['e', 'e', 'e'],返回所有满足匹配条件的结果,放在列表里
#2
print(re.search('e','alex make love').group()) #e,只到找到第一个匹配然后返回一个包含匹配信息的对象,该对象可以通过调用group()方法得到匹配的字符串,如果字符串没有匹配，则返回None。

#3
print(re.match('e','alex make love'))    #None,同search,不过在字符串开始处进行匹配,完全可以用search+^代替match

#4
print(re.split('[ab]','abcd'))     #['', '', 'cd']，先按'a'分割得到''和'bcd',再对''和'bcd'分别按'b'分割

#5
print('===>',re.sub('a','A','alex make love')) #===> Alex mAke love，不指定n，默认替换所有
print('===>',re.sub('a','A','alex make love',1)) #===> Alex make love
print('===>',re.sub('a','A','alex make love',2)) #===> Alex mAke love
print('===>',re.sub('^(\w+)(.*?\s)(\w+)(.*?\s)(\w+)(.*?)$',r'\5\2\3\4\1','alex make love')) #===> love make alex

print('===>',re.subn('a','A','alex make love')) #===> ('Alex mAke love', 2),结果带有总共替换的个数


#6
obj=re.compile('\d{2}')

print(obj.search('abc123eeee').group()) #12
print(obj.findall('abc123eeee')) #['12'],重用了obj

posted @ 2021-11-05 21:52 奇点^ 阅读(142) 评论(0) 收藏举报

刷新页面返回顶部

奇点^

想走上写代码的不归路