进程
1.进程:数据是不共享的
- 1.进程之间数据是不共享的
import multiprocessing
lst=[]
def task(arg):
lst.append(arg)
print(lst)
def fun():
for i in range(10):
p=multiprocessing.Process(target=task,args=(i,))
p.start()
if __name__ == '__main__':
fun()
-2.通过继承的方式来创建进程
import multiprocessing
class Myprosess(multiprocessing.Process):
def run(self):
print("当前的线程是multiprocessing.process")
def run():
p1=Myprosess()
p1.start()
if __name__ == '__main__':
run()
name方法:获取当前线程的名字
import multiprocessing
import time
def task(arg):
name=multiprocessing.current_process()
time.sleep(2)
print(name,arg)
def run():
print(123)
p=multiprocessing.Process(target=task,args=(000,))
p.name="Jave Yang"
# name=multiprocessing.current_process()
p.start()
print(11212313)
if __name__ == '__main__':
run()
ident/pip方法:查看ID
import time
def task(arg):
name=multiprocessing.current_process().ident
time.sleep(2)
print(name,arg)
def run():
print(123)
p=multiprocessing.Process(target=task,args=(000,))
p.name="Jave Yang"
# name=multiprocessing.current_process()
p.start()
print(11212313)
if __name__ == '__main__':
run()
deamon方法:主线程停,子线程必须停
import multiprocessing
def func(arg):
time.sleep(2)
print(arg)
def run():
t=multiprocessing.Process(target=func,args=(20,))
t.daemon=False
t.start()
if __name__ == '__main__':
run()
join方法:人为的控制让主线程等待子线程
import multiprocessing
def func(arg):
time.sleep(2)
print(arg)
print(123)
def run():
t=multiprocessing.Process(target=func,args=(20,))
t.start()
t.join(3)
if __name__ == '__main__':
run()
常用功能:
join,deamon,name, multiprocessing.current_process().ident/pip
2.进程的数据共享
linux(Queue)
import multiprocessing
q=multiprocessing.Queue()
def task(arg):
# time.sleep(2)
q.put(arg)
# print(arg)
def fun():
for i in range(10):
p=multiprocessing.Process(target=task,args=(i,))
p.start()
while 1:
v=q.get()
print(v)
if __name__ == '__main__':
fun()
linux(Manger)
m = multiprocessing.Manager()
dic = m.dict()
def task(arg):
dic[arg] = 100
def run():
for i in range(10):
p = multiprocessing.Process(target=task, args=(i,))
p.start()
input('>>>')
print(dic.values())
if __name__ == '__main__':
run()
windows(Queue)
def task(arg,q):
q.put(arg)
if __name__ == '__main__':
q = multiprocessing.Queue()
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,q,))
p.start()
while True:
v = q.get()
print(v)
linux(Queue)
def task(arg,dic):
time.sleep(2)
dic[arg] = 100
if __name__ == '__main__':
m = multiprocessing.Manager()
dic = m.dict()
process_list = []
for i in range(10):
p = multiprocessing.Process(target=task, args=(i, dic,))
p.start()
process_list.append(p)
while True:
count = 0
for p in process_list:
if not p.is_alive():
count += 1
if count == len(process_list):
break
print(dic)
3.锁
import multiprocessing
lock = multiprocessing.RLock()
def task(arg):
print('鬼子来了')
lock.acquire()
time.sleep(2)
print(arg)
lock.release()
if __name__ == '__main__':
p1 = multiprocessing.Process(target=task,args=(1,))
p1.start()
p2 = multiprocessing.Process(target=task, args=(2,))
p2.start()
4.进程池
import time
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
def task(arg):
time.sleep(2)
print(arg)
if __name__ == '__main__':
pool=ProcessPoolExecutor(5)
for i in range(10):
pool.submit(task,i)
5.模块(爬虫)
import requests from bs4 import BeautifulSoup from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor def task(url):#把网址当做参数传进去 print(url) r1=requests.get(url=url, headers= {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:62.0) Gecko/20100101 Firefox/62.0'}) ## #伪装浏览器来进行访问 # print(r1.text) soup=BeautifulSoup(r1.text,'html.parser')#使用某种方法来获取到你要趴下来的内容 content_list=soup.find('div',attrs={'id':'content-list'})#找到当前块 # print(soup.text) # print(content_list) for item in content_list.find_all('div',attrs={'class':'item'}): #拿到div下的class=item # print(item) title = item.find("a").text.strip()#拿到所有的文字,并做字符串的相关处理 print(title) title_url=item.find('a').get('href')#获取到网址 print(title_url) def run (): pool=ProcessPoolExecutor(20)#创建20个线程 for i in range(1,121): pool.submit(task,'https://dig.chouti.com/all/hot/recent/%s' %i) if __name__ == '__main__': run()