面向对象补充,进程,数据共享,锁,进程池,模块(requests,bs4(beautifulsoup)),协程
一丶面向对象补充
""" class Foo(object): def __init__(self): self.info = {} def __setitem__(self, key, value): self.info[key] = value def __getitem__(self, item): return self.info.get(item) obj = Foo() obj['x'] = 123 print(obj['x']) """ from flask import globals class Foo(object): def __init__(self): object.__setattr__(self, 'info', {}) # 在对象中设置值的本质 def __setattr__(self, key, value): self.info[key] = value def __getattr__(self, item): print(item) return self.info[item] obj = Foo() obj.name = 'alex' print(obj.name) v = [] for i in range(10000): v.append(i) print(v)
二丶进程
进程间数据不共享
data_list = [] def task(arg): data_list.append(arg) print(data_list) def run(): for i in range(10): p = multiprocessing.Process(target=task,args=(i,)) # p = threading.Thread(target=task,args=(i,)) p.start() if __name__ == '__main__': run()
常用功能:
- join
- deamon
- name
- multiprocessing.current_process()
- multiprocessing.current_process().ident/pid
类继承方式创建进程
class MyProcess(multiprocessing.Process): def run(self): print('当前进程',multiprocessing.current_process()) def run(): p1 = MyProcess() p1.start() p2 = MyProcess() p2.start() if __name__ == '__main__': run()
进程间数据共享
Queue:
q = multiprocessing.Queue() def task(arg,q): q.put(arg) def run(): for i in range(10): p = multiprocessing.Process(target=task, args=(i, q,)) p.start() while True: v = q.get() print(v) run()
def task(arg,q): q.put(arg) if __name__ == '__main__': q = multiprocessing.Queue() for i in range(10): p = multiprocessing.Process(target=task,args=(i,q,)) p.start() while True: v = q.get() print(v)
Manager:(*)
m = multiprocessing.Manager() dic = m.dict() def task(arg): dic[arg] = 100 def run(): for i in range(10): p = multiprocessing.Process(target=task, args=(i,)) p.start() input('>>>') print(dic.values()) if __name__ == '__main__': run()
def task(arg,dic): time.sleep(2) dic[arg] = 100 if __name__ == '__main__': m = multiprocessing.Manager() dic = m.dict() process_list = [] for i in range(10): p = multiprocessing.Process(target=task, args=(i,dic,)) p.start() process_list.append(p) while True: count = 0 for p in process_list: if not p.is_alive(): count += 1 if count == len(process_list): break print(dic)
三丶进程锁
四丶进程池
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor (官方推荐方式)
import time from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor def task(arg): time.sleep(2) print(arg) if __name__ == '__main__': pool = ProcessPoolExecutor(5) for i in range(10): pool.submit(task,i)
五丶初识爬虫
安装:
pip3 install requests
pip3 intall beautifulsoup4
问题:
找不到命令?
方式一:
C:\Users\Administrator\AppData\Local\Programs\Python\Python36\Scripts\pip3 install requests
方式二:
C:\Users\Administrator\AppData\Local\Programs\Python\Python36\Scripts\pip3 install requests
实例:
import requests from bs4 import BeautifulSoup from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor # 模拟浏览器发送请求 # 内部创建 sk = socket.socket() # 和抽屉进行socket连接 sk.connect(...) # sk.sendall('...') # sk.recv(...) def task(url): print(url) r1 = requests.get( url=url, headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36' } ) # 查看下载下来的文本信息 soup = BeautifulSoup(r1.text,'html.parser') print(soup.text) # content_list = soup.find('div',attrs={'id':'content-list'}) # for item in content_list.find_all('div',attrs={'class':'item'}): # title = item.find('a').text.strip() # target_url = item.find('a').get('href') # print(title,target_url) def run(): pool = ThreadPoolExecutor(5) for i in range(1,50): pool.submit(task,'https://dig.chouti.com/all/hot/recent/%s' %i) if __name__ == '__main__': run()
相关:
a. 以上示例进程和线程那个好?
- 线程好
b. requests模块模拟浏览器发送请求
- 本质 requests.get(...):
- 创建socket客户端
- 连接 【阻塞】
- 发送请求
- 接收请求【阻塞】
- 断开连接
c. 线程和进程池