(七)python语法之常用扩展

1.正则表达式

import re

string = 'Hello123World456Hello'

# 从起始位置匹配第一个
print(re.match('Hello', string).span()) 
# (0, 5)      
print(re.match('World', string))        
# None

# 在整个字符串匹配第一个
print(re.search('Hello', string).span()) 
# (0, 5)      
print(re.search('World', string).span()) 
# (8, 13)

result = re.search(r'([A-Za-z]+)(\d+)', string)
print(result.group(0)) # Hello123
print(result.group(1)) # Hello
print(result.group(2)) # 123

# 匹配所有
pattern = re.compile(r'\d+')   
result = pattern.findall(string)
print(result) 
# ['123', '456']

pattern = re.compile(r'([A-Za-z]+)(\d+)')   
result = pattern.findall(string)
print(result) 
# [('Hello', '123'), ('World', '456')]

# 将匹配的子串替换
result = re.sub(r'[A-Za-z]+', '', string)
print(result) 
# 123456

# 将匹配的数字乘以2
def double(matched):
    value = int(matched.group('value'))
    return str(value * 2)
print(re.sub('(?P<value>\d+)', double, string))
# Hello246World912Hello

# 按照匹配的子串分割
result = re.split(r'[A-Za-z]+', string)
print(result) 
# ['', '123', '456', '']

2.日期时间

time

import time

# 获取时间戳
t = time.time()         
print(t)  # 1594974068.2558458

# 从时间戳获取详细时间信息
lt = time.localtime(t)  
print(lt) 
# time.struct_time(tm_year=2020, tm_mon=7, tm_mday=17, tm_hour=16, tm_min=22, tm_sec=2, tm_wday=4, tm_yday=199, tm_isdst=0)

# 格式化的时间信息
at = time.asctime(lt)   
print(at) 
# Fri Jul 17 16:23:03 2020

print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))   
# 2020-07-17 16:26:02
print(time.strftime("%a %b %d %H:%M:%S %Y", time.localtime()))
# Fri Jul 17 16:26:02 2020

# 将格式字符串转换为时间戳
a = "Fri Jul 17 16:26:02 2020"
print(time.mktime(time.strptime(a,"%a %b %d %H:%M:%S %Y")))
# 1594974362.0

datetime

from datetime import datetime, timedelta, date

# 获取datatime
print(datetime.now())                
# 2020-07-17 16:35:17.112810
print(datetime(2020, 7, 17, 16, 35)) 
# 2020-07-17 16:35:00

# datatime和时间戳的相互转换
print(datetime(2020, 7, 17, 16, 35).timestamp()) 
# 1594974900.0
print(datetime.fromtimestamp(1594974900.0))      
# 2020-07-17 16:35:00       

# datetime和格式字符串的相互转换
print(datetime.strptime('2020-7-17 16:35:59', '%Y-%m-%d %H:%M:%S'))
# str->datetime 2020-07-17 16:35:59
print(datetime.now().strftime('%a, %b %d %H:%M'))                  
# datetime->str Fri, Jul 17 16:38 

# datetime加减
now = datetime.now()     
print(now)               
# 2020-07-17 16:40:40.539739
now += timedelta(days=2, hours=12)
now -= timedelta(days=1)
print(now)               
# 2020-07-19 04:40:14.693033

# 格式化输出
d = date(2020, 7, 17)
print(format(d, '%A, %B %d, %Y'))
# Friday, July 17, 2020
print('Today is {:%d %b %Y}'.format(d))
# Today is 17 Jul 2020

3.序列化

pickcle

import pickle

# 序列化和反序列化
d1 = dict(name='Tom', age=20)
s  = pickle.dumps(d1)   # 序列化
d2 = pickle.loads(s)    # 反序列化
print(d2) 
# {'name': 'Tom', 'age': 20}

# 将对象保存到文件中
with open('dump.txt', 'wb') as f:
    pickle.dump(d1, f)   
    
# 从文件中加载对象 
with open('dump.txt', 'rb') as f:
    d3 = pickle.load(f)  
    print(d3) 
    # {'name': 'Tom', 'age': 20}

json

import json

# 字典序列化为json字符串
d = dict(name='Tom', age=20)
json_str = json.dumps(d)  
print(json_str)
# {"name": "Tom", "age": 20}

# json字符串反序列化为字典
json_str = '{"age": 20, "name": "Tom"}'
d = json.loads(json_str)  
print(d)
# {'age': 20, 'name': 'Tom'}

# 对象实例的序列化和反序列化
class Student(object):
    def __init__(self, name, age):
        self.name = name
        self.age = age
        
def student2dict(std):
    return {
        'name': std.name,
        'age': std.age
    }

def dict2student(d):
    return Student(d['name'], d['age'])
        
s = Student('Tom', 20)
json_str = json.dumps(s, default=student2dict)
print(json_str)
# {"name": "Tom", "age": 20}

json_str = '{"age": 20, "name": "Tom"}'
s = json.loads(json_str, object_hook=dict2student)
print(s.name, s.age)
# Tom 20

4.哈希函数

#1 使用hashlib    
import hashlib
md5 = hashlib.md5()
md5.update('how to use md5 in python hashlib?'.encode('utf-8'))
print(md5.hexdigest())  
# d26a53750bc40b38b65a520292f69306

md5 = hashlib.md5()
md5.update('how to use md5 in '.encode('utf-8'))
md5.update('python hashlib?'.encode('utf-8'))
print(md5.hexdigest())  
# 多次调用update()结果一样

sha1 = hashlib.sha1()
sha1.update('how to use sha1 in '.encode('utf-8'))
sha1.update('python hashlib?'.encode('utf-8'))
print(sha1.hexdigest()) 
# 2c76b57293ce30acef38d98f6046927161b46a44

#2 使用hmac实现带key的哈希
import hmac
message = b'Hello, world!'
key = b'secret'
h = hmac.new(key, message, digestmod='MD5') 
h.hexdigest()  
# 'fa4ee7d173f2d97ee79022d1a7355bcf'

5.二进制编码

basea64

# base64是一种用64个字符来表示二进制数据的方法。
import base64
#二进制转base64
base64.b64encode(b'binary\x00string')     # b'YmluYXJ5AHN0cmluZw=='
#base64转二进制
base64.b64decode(b'YmluYXJ5AHN0cmluZw==') # b'binary\x00string'

#处理URL时+/替换为-_
base64.b64encode(b'i\xb7\x1d\xfb\xef\xff')         #b'abcd++//'
base64.urlsafe_b64encode(b'i\xb7\x1d\xfb\xef\xff') #b'abcd--__' #把字符+和/分别变成-和_
base64.urlsafe_b64decode('abcd--__')               #b'i\xb7\x1d\xfb\xef\xff'

struct

# struct模块用来处理的是python数据和表示成python bytes对象的C结构体(struct)之间的转换,
# 应用场景一般是处理文件和网络传输中的二进制数据。

'''
struct s_data {
    unsigned short id;
    unsigned int length;
    char[5] data;
}
'''

from struct import Struct
p_id = 0
p_length = 5
p_data = b'hello'
c_struct = Struct('>HI5s') # >大端存储 <小端存储 !network(=大端存储)

# python数据转换到C结构体二进制数据
packed = c_struct.pack(p_id, p_length, p_data)
print(packed)    # b'\x00\x00\x00\x00\x00\x05hello'

# C结构体二进制数据转换到python数据
unpacked = c_struct.unpack(b'\x00\x00\x00\x00\x00\x05hello')
print(unpacked)  # (0, 5, b'hello')

BytesIO

# BytesIO StringIO 将IO操作放到内存中提高运行效率

# BytesIO
#在内存中开辟一个二进制模式的buffer,可以像文件对象一样操作它
from io import BytesIO 
bio = BytesIO()        
print(bio.readable(), bio.writable(), bio.seekable())
bio.write(b'hello\nPython')
bio.seek(0)
print(bio.readline())
print(bio.getvalue())  # 无视指针,输出全部内容
bio.close()            # 释放buffer

StringIO

# StringIO
from io import StringIO
sio = StringIO()       # 像文件对象一样操作
print(sio.readable(), sio.writable(), sio.seekable())
sio.write("hello\nPython")
sio.seek(0)
print(sio.readline())
print(sio.getvalue())  # 无视指针,输出全部内容
sio.close()            # 释放buffer

6.日志

#1 基本使用
import logging  

# 信息级别:DEBUG < INFO < WARNING < ERROR
logging.basicConfig(
    level = logging.DEBUG, 
    filename = "log1.txt",
    format = '%(asctime)s - %(name)s - %(levelname)s : %(message)s')

logger = logging.getLogger(__name__)
logger.info("Start wirte log")
logger.warning("Something maybe wrong")
logger.debug("Try to fix bug")
logger.info("Finish")

#2 多进程轮转,用于多进程写同一日志文件
import os, datetime
import logging
import logging.handlers
from cloghandler import ConcurrentRotatingFileHandler

def console_out(errorInfo):
    path = './logs'
    if not os.path.exists(path):
        os.makedirs(path)
    today = datetime.date.today().strftime('%Y%m%d') + '.txt'
    logFile = os.path.join(path, today)
    
    handler = ConcurrentRotatingFileHandler(logFile, "a", 20 * 1024 * 1024, 10)
    fmt = '%(asctime)s - %(levelname)s - %(message)s'
    formatter = logging.Formatter(fmt)  
    handler.setFormatter(formatter)  

    logger = logging.getLogger()
    logger.addHandler(handler)  
    logger.setLevel(logging.INFO)

    logger.info(errorInfo)
    logger.removeHandler(handler)

7.表格

# pip install openpyxl

import openpyxl

#1 新建表格并写入
wb = openpyxl.Workbook()
ws = wb.create_sheet(index=0)

for i in range(1,5):   
    # 第i行第一列                 
    ws.cell(i, 1).value = "NAME"   
    # 第i行第二列
    ws.cell(i, 2).value = "AGE" 
    # 第i行第三列   
    ws.cell(i, 3).value = "BIRTH"  

wb.save("test.xlsx")

#2 加载表格并读取
wb = openpyxl.load_workbook('test.xlsx')
ws = wb.active 

# 遍历所有行
for row in ws.rows:
    name = row[0].value
    age = row[1].value
    birth = row[2].value
    print(name, age, birth)
posted @ 2020-08-20 17:11  qxcheng  阅读(129)  评论(0编辑  收藏  举报