多进程
-
进程和程序
进程:正在执行的程序
程序:还没有执行的代码,处于静态
一、进程的状态
使用进程实现多任务
二、进程和线程之间的对比
import threading
import time
import multiprocessing
def demo1():
while True:
print("我是demo1")
time.sleep(1)
def demo2():
while True:
print("我是demo2")
time.sleep(1)
if __name__ == '__main__':
t1 = threading.Thread(target=demo1)
t2 = threading.Thread(target=demo2)
t1.start()
t2.start()
# p1 = multiprocessing.Process(target=demo1)
# p2 = multiprocessing.Process(target=demo2)
# p1.start()
# p2.start()
- 线程。
- 进程。
三、进程之间的通信-Queue
Queue---先进先出队列
import multiprocessing
def down_load(queue):
list1 =[1,2,3,4,5,6,7,8,9,0]
for li in list1:
queue.put(li)
print("数据添加完成!")
def parse_data(queue):
list_data = []
while True:
data = queue.get()
list_data.append(data)
if queue.empty():
break
print(list_data)
def main():
queue = multiprocessing.Queue()
q1 = multiprocessing.Process(target=down_load,args=(queue,))
q2 = multiprocessing.Process(target=parse_data,args=(queue,))
q1.start()
q2.start()
if __name__ == '__main__':
main()
-----------------------------------------
数据添加完成!
[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
四、进程池
当需要创建的子进程数量不多时,,可以直接利用multiprocessing中的Process,动态生成多个进程,但是如果有上千进程,手动创建工作量巨大,这个时候我们可以创建一个进程池。
- 进程池实例
from concurrent.futures import ProcessPoolExecutor
import requests
import json
class Vegtable_Data(object):
def __init__(self):
self.url = "http://www.xinfadi.com.cn/getPriceData.html"
self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.52"}
def parse_data(self,num):
data = {
"limit": "20",
"current": num,
"pubDateStartTime": "",
"pubDateEndTime": "",
"prodPcatid": "",
"prodCatid": "",
"prodName": ""
}
resp = requests.post(self.url,data=data,headers=self.headers)
dict_data = json.loads(resp.text)["list"]
for data in dict_data:
prodCat = data["prodCat"]
prodName = data["prodName"]
lowPrice = data["lowPrice"]
avgPrice = data["avgPrice"]
highPrice = data["highPrice"]
place = data["place"]
unitInfo = data["unitInfo"]
print(prodCat,prodName,lowPrice,avgPrice,highPrice,place,unitInfo)
if __name__ == '__main__':
spider=Vegtable_Data()
with ProcessPoolExecutor(10) as pe:
for num in range(1,21):
pe.submit(spider.parse_data,num)