Python复习
最近一直在搞Rust,时间一长,Python许多东西反倒不那么熟悉了。不过这正好是个把Python系统复习一遍的机会。
Python中一切皆对象
一切皆对象是好多面相对象的语言都提的一句话,不过Python中是真一切皆对象。包括常见的函数和类,也是对象,作为Python中的一等公民,可以['被赋值给一个变量', '添加到集合对象中', '作为函数参数/返回值']。
type、object、class的关系
- 所有类都继承object,包括type
- type实例化出了所有对象,包括object和它自己
魔法函数
class A:
def __init__(self, names: list):
self.names = names
def __getitem__(self, item):
return self.names[item]
// 无法打开迭代器时会尝试调用__getitem__方法
for i in A([1, 2, 3, 4, 5, 6]):
print(i)
常见魔法函数
-
字符串相关
# __repr__ # __str__ class A: def __init__(self, names: list): self.names = names def __getitem__(self, item): return self.names[item] def __str__(self): return ', '.join([str(n) for n in self.names]) def __repr__(self): return f'from __repr__ {self.__str__()}' a = A([1, 2, 3, 4, 5, 6]) b = A([7, 8, 9, 10]) a.names.extend(b) # __getitem__ print(a) # __str__ a # __repr__
-
集合、序列相关
# __len__ # __getitem__ # __setitem__ # __delitem__ # __contains__
-
迭代相关
# __iter__ # __next__
-
可调用
# __call__
-
with上下文管理器
# __enter__ # __exit__
-
数值相关
# __abs__ # __bool__ # __int__ # __float__ # __hash__ # __index__
-
元类相关
# __new__ # __init__
-
属性相关
# __getattr__, __setattr__ # __getattribute, __setattribute__ # __dir__
-
属性描述符
# __get__ # __set__ # __delete__
-
协程
# __await__ # __aiter__ # __anext__ # __aenter__ # __aexit__
Python中的类和对象
鸭子类型
即在分类时更关注对象的行为。Golang中的面向接口编程应该也借鉴了这种思想(Golang的错误处理太丑了)。我觉得这一点是克服面向对象编程中某些冗杂问题而保持灵活性的很关键的一点,但是我更倾向于直接移除继承这个特性,继承真的是多数混乱的开始,它带来的问题简直比它带来的便捷更多。
抽象基类
import abc
import random
class Payment(metaclass=abc.ABCMeta):
@abc.abstractmethod
def pay(self):
...
class WeChatPay(Payment):
def pay(self):
print('use WeChatPay')
class AliPay(Payment):
def pay(self):
print('use AliPay')
def get_payment():
x = random.randint(0, 1)
if x:
return WeChatPay()
return AliPay()
a = get_payment()
a.pay()
类变量和对象变量
class A:
class_var = 'A'
def __init__(self):
self.inst_var = 'self'
a = A()
A.class_var = 'class_var_changed_by_class'
print(a.inst_var, a.class_var, A.class_var)
# 当实例访问自己不存在的变量时可以查找读取类(或父类)的变量
# 但当实例尝试修改(赋值)自己不存在的变量时,会为实例创建该变量并赋值
a.class_var = 'self'
print(a.inst_var, a.class_var, A.class_var)
# 注意由于python中MRO的深度优先和广度优先都存在问题,所以已改为C3算法
class D:
...
class C(D):
...
class B(D):
...
class A(B, C):
...
print(A.__mro__)
数据封装
python中没有其他语言中那么多或者那么细分的权限修饰关键词,但是仍可以通过语法约定私有属性。
class A:
def __init__(self, x):
self.__x = x
def get_x(self):
return self.__x
a = A(99)
print(a.get_x())
print(a._A__x) # 双下划线开头的私有属性并非以绝对方式禁止实例直接读取,而更像是一种约定
自省
python可以通过一定方式获取对象当前的内部结构
class A:
x = 'A'
class B(A):
def __init__(self, x):
self.x = x
b = B('self')
print(b.__dict__)
print(A.__dict__)
dir(b)
super
class A:
def __init__(self):
print('A')
self.x = 'A'
class B(A):
def __init__(self):
print('B')
self.x = 'B'
super().__init__()
b = B()
print(b.x)
super()其实并非调用父类的方法,而是从MRO列表中找到给定类的之后的第一个类查找要调用的方法
class D:
def __init__(self):
print('D')
class B(D):
def __init__(self):
print('B')
super().__init__()
class C(D):
def __init__(self):
print('C')
super().__init__()
class A(B, C):
def __init__(self):
print('A')
super().__init__()
a = A()
mixin模式
这种模式和新兴语言中的去继承、使用组合或者嵌入这类思想有异曲同工之妙。每个mixin类只实现单一功能,摆脱和其他类的强关联。这样,使用者可以自由组合各种功能而无需担心多继承带来的属性混乱。
with上下文管理器协议
class A:
def __enter__(self):
print('__enter__')
if not hasattr(self, 'x'):
self.x = 'x' # 仅为演示,不应该在__init__方法外定义实例属性
return self
def __exit__(self, *_args):
print('__exit__')
def say_x(self):
print(self.x)
with A() as a:
a.say_x()
还可以通过contextmanager来聚合上下文管理器协议的实现
from contextlib import contextmanager
@contextmanager
def open_source(source):
print(f'{source} open')
yield
print(f'{source} close')
with open_source('source') as obj:
print('process with source')
元类编程
property动态属性
class A:
def __init__(self, x):
self._x = x
@property
def x(self):
return self._x
@x.setter
def x(self, val):
self._x = val
a = A(123)
print(a.x)
a.x = 456
print(a.x)
属性相关魔法函数
class A:
def __init__(self, x, y):
self.x = x
self.y = y
# 当查找不到属性时进入此方法
def __getattr__(self, item):
print(item)
print('attribute not found')
# 当查找属性时会优先无条件进入__getattribute__
# 但很多时候不应该自定义此方法
a = A(123, 456)
a.z
属性描述符
class CheckStr:
def __get__(self, instance, owner):
return self.value
def __set__(self, instance, value):
if not isinstance(value, int):
raise ValueError
self.value = value
def __delete__(self, instance):
...
class A:
to_check = CheckStr()
a = A()
# 此处需要注意,属性查找顺序
# 1.当属性出现在类或父类的__dict__中且为data descriptor,优先进入__get__方法
# 2.当属性出现在obj的__dict__中直接返回obj.__dict__[属性名]
# 3.当属性出现在类或父类的__dict__中且为non-data descriptor,进入non-data descriptor的__get__方法
# 4.当属性出现在类或父类的__dict__中且不是属性描述符时返回cls.__dict__[属性名]
# 5.如果有__getattr__则进入
# 6.抛出AttributeError
a.to_check = 123
print(a.to_check)
自定义元类
class Field:
...
class CHarField(Field):
def __init__(self, db_column='', max_length=10):
self.db_column = db_column
self._value = None
if not isinstance(max_length, int):
raise TypeError('max_length should be int')
if max_length <= 0:
raise ValueError('max_length should be positive')
self.max_length = max_length
def __get__(self, instance, owner):
return self._value
def __set__(self, instance, value):
if not isinstance(value, str):
raise TypeError(f'expected str instance, {type(value)} found')
if len(value) > self.max_length:
raise ValueError(f'exceed maximum length')
self._value = value
class IntField(Field):
def __init__(self, db_column='', min_value=0, max_value=100):
self.db_column = db_column
self._value = None
if not (isinstance(min_value, int) and isinstance(max_value, int)):
raise TypeError('min_value and max_value should be both int')
if max_value < min_value:
raise ValueError('max_value < min_value')
self.min_value = min_value
self.max_value = max_value
def __get__(self, instance, owner):
return self._value
def __set__(self, instance, value):
if not isinstance(value, int):
raise TypeError(f'expected int instance, {type(value)} found')
if value < self.min_value or value > self.max_value:
raise ValueError(f'value should between {self.min_value} and {self.max_value}')
self._value = value
class ModelMetaClass(type):
def __new__(cls, *args, **kwargs):
name, bases, attrs = args
if name == 'BaseModel':
return super().__new__(cls, *args, **kwargs)
field = dict()
for k, v in attrs.items():
if isinstance(v, Field):
field[k] = v
_meta = dict()
db_table = name.lower()
if (attrs_meta := attrs.get('Meta', None)) and (table := getattr(attrs_meta, 'db_table', None)):
db_table = table
_meta['db_table'] = db_table
attrs['_meta'] = _meta
attrs['field'] = field
return super().__new__(cls, name, bases, attrs, **kwargs)
class BaseModel(metaclass=ModelMetaClass):
def __init__(self, *args, **kwargs):
for k, v in kwargs.items():
setattr(self, k, v)
super().__init__()
def save(self):
fields = list()
values = list()
for k, v in self.field.items():
col = getattr(v, 'db_column', None) or k.lower()
fields.append(col)
values.append(str(getattr(self, k)))
sql = f'insert {self._meta["db_table"]}({", ".join(fields)}) value({", ".join(values)})'
sql = f'"{sql}"'
print(sql)
class User(BaseModel):
name = CHarField(db_column='', max_length=10)
age = IntField(db_column='', min_value=0, max_value=100)
class Meta:
db_table = 'user'
if __name__ == '__main__':
user = User(name='abc', age=18)
user.save()
迭代器和生成器
可迭代对象和迭代器
# Iterable实现了__iter__
class Iterable(Protocol[_T_co]):
@abstractmethod
def __iter__(self) -> Iterator[_T_co]: ...
# Iterator增加实现了__next__
class Iterator(Iterable[_T_co], Protocol[_T_co]):
@abstractmethod
def __next__(self) -> _T_co: ...
def __iter__(self) -> Iterator[_T_co]: ...
from collections.abc import Iterable, Iterator
class IteratorA:
def __init__(self, li):
self.li = li
self.index = 0
def __next__(self):
try:
res = self.li[self.index]
except IndexError:
raise StopIteration
self.index += 1
return res
class A:
def __init__(self, a):
self.a = a
def __iter__(self):
return IteratorA(self.a)
if __name__ == '__main__':
a = A([1, 2, 3])
iter_a = iter(a)
while True:
try:
print(next(iter_a))
except StopIteration:
break
生成器
函数中出现yield关键字即称该函数为生成器函数。
python中调用函数创建的栈帧对象分配在堆内存上,即栈帧对象不依赖于调用行为,在回收前可以独立存在。
def gen():
a = 1
yield a
a += 1
yield a
a += 1
yield a
return a
if __name__ == '__main__':
import dis
g = gen()
print(dis.dis(g))
'''
2 0 LOAD_CONST 1 (1)
2 STORE_FAST 0 (a)
3 4 LOAD_FAST 0 (a)
6 YIELD_VALUE
8 POP_TOP
4 10 LOAD_FAST 0 (a)
12 LOAD_CONST 1 (1)
14 INPLACE_ADD
16 STORE_FAST 0 (a)
5 18 LOAD_FAST 0 (a)
20 YIELD_VALUE
22 POP_TOP
6 24 LOAD_FAST 0 (a)
26 LOAD_CONST 1 (1)
28 INPLACE_ADD
30 STORE_FAST 0 (a)
7 32 LOAD_FAST 0 (a)
34 YIELD_VALUE
36 POP_TOP
8 38 LOAD_FAST 0 (a)
40 RETURN_VALUE
None
'''
print(g.gi_frame.f_lasti) # -1
print(g.gi_frame.f_locals) # {}
next(g)
print(g.gi_frame.f_lasti) # 6
print(g.gi_frame.f_locals) # {'a': 1}
next(g)
print(g.gi_frame.f_lasti) # 20
print(g.gi_frame.f_locals) # {'a': 2}
next(g)
print(g.gi_frame.f_lasti) # 34
print(g.gi_frame.f_locals) # {'a': 3}
def read_file(f, sep, length):
buf = ''
while True:
while sep in buf:
pos = buf.index(sep)
yield buf[:pos]
buf = buf[pos+len(sep):]
chunk = f.read(length)
if not chunk:
yield buf
break
buf += chunk
多线程、多进程
多线程通信
import time
import threading
import wx
keep_going = True
lock = threading.Lock()
class A(threading.Thread):
def __init__(self, name, tasks):
super().__init__(name=name)
self.tasks = tasks
self.setDaemon(True)
@staticmethod
def handle_task(task):
print(f'开始处理任务{task}')
time.sleep(1)
print(f'任务{task}处理完成')
def run(self) -> None:
global keep_going
global lock
task = None
for task in self.tasks:
lock.acquire()
if not keep_going:
lock.release()
break
lock.release()
self.handle_task(task)
print(f'任务处理已停止/结束,当前任务{task}')
class TestFrame(wx.Frame):
def __init__(self, parent, max_seconds, task_handler: A):
wx.Frame.__init__(self, parent, -1)
self.max_seconds = max_seconds * 4
self.task_handler = task_handler
self.run()
def run(self):
dlg = wx.ProgressDialog("实例", "内容", maximum=self.max_seconds, parent=self,
style=0 | wx.PD_APP_MODAL | wx.PD_CAN_ABORT | wx.PD_ESTIMATED_TIME |
wx.PD_REMAINING_TIME)
global keep_going
count = 0
self.task_handler.start()
while keep_going and count < self.max_seconds:
count += 1
wx.MilliSleep(250)
wx.Yield()
global lock
lock.acquire()
if count >= self.max_seconds / 2:
(keep_going, skip) = dlg.Update(count, "时间过半")
else:
(keep_going, skip) = dlg.Update(count)
lock.release()
dlg.Destroy()
print(f'当前剩余时间{(self.max_seconds-count) // 4}秒')
self.Destroy()
class App(wx.App):
def OnInit(self):
task_handler = A('task_handler', list(range(1, 21)))
frame = TestFrame(None, 20, task_handler)
frame.Show()
return True
if __name__ == '__main__':
app = App()
app.MainLoop()
线程同步
存在数据竞争的场景或者脏读敏感的场景应该加锁,对于典型的生产者消费者模型,可以用Queue实现线程间安全通信。
在同一个线程中有多次acquire的需求,可以用RLock。
Condition
复杂的线程间(尤其是线程交替逻辑比较多的)通讯可以使用Condition,注意start顺序。
Condition的锁:wait()的时候会释放Condition层的锁,并在deque中放入一把临时锁,其他拿到Condition层锁的地方可以继续执行并在调用notify()时取出Queue中的临时锁并释放。
import threading
cond = threading.Condition()
class A(threading.Thread):
def __init__(self, cond: threading.Condition):
super().__init__(name='A')
self.cond = cond
self.rang = iter(range(0, 10, 2))
def run(self) -> None:
with self.cond:
print(f'{self.name}: {next(self.rang)}')
self.cond.notify()
for i in self.rang:
self.cond.wait()
print(f'{self.name}: {i}')
self.cond.notify()
class B(threading.Thread):
def __init__(self, cond):
super().__init__(name='B')
self.cond = cond
self.rang = iter(range(1, 10, 2))
def run(self) -> None:
with self.cond:
for i in self.rang:
self.cond.wait()
print(f'{self.name}: {i}')
self.cond.notify()
if __name__ == '__main__':
a = A(cond)
b = B(cond)
b.start()
a.start()
a.join()
b.join()
Semaphore
import threading
import time
cond = threading.Condition()
class A(threading.Thread):
def __init__(self, task, semaphore: threading.Semaphore):
super().__init__()
self.task = task
self.semaphore = semaphore
def run(self) -> None:
print(f'开始处理任务{self.task}')
time.sleep(2)
self.semaphore.release()
print(f'任务{self.task}处理结束')
class B(threading.Thread):
def __init__(self, semaphore: threading.Semaphore):
super().__init__()
self.task = range(0, 12)
self.semaphore = semaphore
def run(self) -> None:
tmp_li = list()
for i in self.task:
self.semaphore.acquire()
tmp_thread = A(i, self.semaphore)
tmp_thread.start()
tmp_li.append(tmp_thread)
_ = [t.join() for t in tmp_li if t.is_alive()]
print('全部结束')
if __name__ == '__main__':
semaphore = threading.Semaphore(4)
b = B(semaphore)
b.start()
线程池
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
def handle_task(task):
print(f'开始处理任务{task}')
time.sleep(2)
print(f'任务{task}处理结束')
return f'任务{task}处理结束'
if __name__ == '__main__':
tasks = range(10)
executor = ThreadPoolExecutor(max_workers=3)
handle_threads = [executor.submit(handle_task, task) for task in tasks]
# handle_results = [handle_thread.result() for handle_thread in handle_threads]
# print(handle_results)
# as_completed返回生成器,且执行完一个线程yield一个结果
for handle_thread in as_completed(handle_threads):
print(f'全局已知:{handle_thread.result()}')
print('全局退出')
多进程
对于IO操作(即压力在硬盘、内存、网络等外部瓶颈上的情况)更密集的场景,多进程是没有优势的,反而可能因为切换或者调度的成本而显劣势,此时使用多线程会更合适。而对于CPU计算密集的任务,多进程会有优势。
import time
from concurrent.futures import ThreadPoolExecutor, as_completed, ProcessPoolExecutor
def fib(n):
if n <= 2:
return 1
return fib(n-2) + fib(n-1)
if __name__ == '__main__':
tasks = range(30, 40)
start_time = time.time()
with ThreadPoolExecutor() as executor:
handle_threads = [executor.submit(fib, task) for task in tasks]
for handle_thread in as_completed(handle_threads):
print(f'thread:{handle_thread.result()}')
thread_time = time.time()
print(f'ThreadPoolExecutor time: {thread_time - start_time}')
with ProcessPoolExecutor() as executor:
handle_processes = [executor.submit(fib, task) for task in tasks]
for handle_process in as_completed(handle_processes):
print(f'process:{handle_process.result()}')
print(f'ProcessPoolExecutor time: {time.time() - thread_time}')
multiprocessing
import multiprocessing
def fib(n):
if n <= 2:
return 1
return fib(n-2) + fib(n-1)
if __name__ == '__main__':
pool = multiprocessing.Pool(multiprocessing.cpu_count() - 1)
for result in pool.imap_unordered(fib, [36, 22, 40, 16]):
print(result)
print('全局退出')
Manager
multiprocessing里实现了可以进程间通信的Queue(不能用于pool进程池),可以使用Manager().Queue()来用于进程池通信。Manager中也维护了Python中常见的值和容器结构,可以用于进程间的安全通信。
import time
from multiprocessing import Pool, Manager
def put(q):
q.put(123)
time.sleep(2)
def get(q):
time.sleep(2)
print(q.get())
if __name__ == '__main__':
q = Manager().Queue(6)
with Pool(2) as pool:
pool.apply_async(put, (q, ))
pool.apply_async(get, (q, ))
pool.close()
pool.join()
print('全局退出')
对于两个进程间的简单通信可以考虑下性能更高的Pipe。
协程和异步IO
概念
并发即通过一定调度策略使一段时间内多个程序在一个CPU上运行,来模拟并行的效果,但某一瞬间,只有一个程序在运行。
并行是真同时多个程序运行在多个CPU上。
同步即调用IO操作时,等待IO操作完成再返回的方式。
异步即调用IO操作时,不等待IO操作完成就返回的方式。
阻塞即调用函数时当前线程被挂起。
非阻塞即调用函数时当前线程不被挂起,立即返回。
IO多路复用
select,pool,epool都是IO多路复用的机制,本质上都是同步IO。
在并发高、连接活跃度不高的情况下,epoll比select更合适,比如web网站。
在并发不高、连接活跃度高的情况下,select比epool更合适,比如游戏服务。
from selectors import DefaultSelector
这种select代码不好维护,DEBUG也不好做。
协程
为了可以以同步方式编写更加高效的代码,可以使用函数切换代替线程切换。而实现函数执行中的暂停、切换、传值需要生成器的某些特性。
def gen():
x = yield '123'
print(x)
yield '456'
if __name__ == '__main__':
g = gen()
print(next(g)) # 此处获取一个值后生成器暂停等待传入值
print(g.send('x')) # 此处将值传入生成器并解除生成器暂停状态并尝试返回下一个yield
yield from
data = {'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}
def gen():
li = list()
s = 0
while 1:
tmp = yield
if tmp:
li.append(tmp)
s += tmp
else:
break
return s, li
def shell(data_new, k):
while True:
data_new[k] = yield from gen()
if __name__ == '__main__':
data_new = dict()
for k, v in data.items():
sh = shell(data_new, k)
sh.send(None)
for num in v:
sh.send(num)
sh.send(None)
print(data_new)
asyncio
import asyncio
import time
async def handle(task):
print(f'处理任务{task}开始')
await asyncio.sleep(2)
print(f'处理任务{task}结束')
if __name__ == '__main__':
start_time = time.time()
loop = asyncio.get_event_loop()
handles = [handle(task) for task in range(1, 20)]
# loop.run_until_complete(asyncio.wait(handles))
loop.run_until_complete(asyncio.gather(*handles)) # gather可以接受分组任务,并且分组任务可以取消,类似场景可以优先考虑gather
print(f'用时:{time.time() - start_time}秒')
asyncio内容有点多,这个笔记字数有点多了,还是在另外一篇笔记中体现吧。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 地球OL攻略 —— 某应届生求职总结
· 周边上新:园子的第一款马克杯温暖上架
· Open-Sora 2.0 重磅开源!
· 提示词工程——AI应用必不可少的技术
· .NET周刊【3月第1期 2025-03-02】