进程

1.进程:数据是不共享的

     - 1.进程之间数据是不共享的
import multiprocessing
lst=[]
def task(arg):
    lst.append(arg)
    print(lst)
def fun():
    for i in range(10):
        p=multiprocessing.Process(target=task,args=(i,))
        p.start()
if __name__ == '__main__':
    fun()
        -2.通过继承的方式来创建进程
import multiprocessing
class Myprosess(multiprocessing.Process):
    def run(self):
        print("当前的线程是multiprocessing.process")
def run():
    p1=Myprosess()
    p1.start()
if __name__ == '__main__':
    run()
name方法:获取当前线程的名字
     import multiprocessing
import time
def task(arg):
    name=multiprocessing.current_process()
    time.sleep(2)
    print(name,arg)
def run():
    print(123)
    p=multiprocessing.Process(target=task,args=(000,))
    p.name="Jave Yang"
    # name=multiprocessing.current_process()
    p.start()
    print(11212313)
if __name__ == '__main__':
    run()
ident/pip方法:查看ID
import time
def task(arg):
    name=multiprocessing.current_process().ident
    time.sleep(2)
    print(name,arg)
def run():
    print(123)
    p=multiprocessing.Process(target=task,args=(000,))
    p.name="Jave Yang"
    # name=multiprocessing.current_process()
    p.start()
    print(11212313)
if __name__ == '__main__':
    run()
 
deamon方法:主线程停,子线程必须停
import multiprocessing
def func(arg):
    time.sleep(2)
    print(arg)
def run():
    t=multiprocessing.Process(target=func,args=(20,))
    t.daemon=False
   
t.start()
if __name__ == '__main__':
    run()
join方法:人为的控制让主线程等待子线程
import multiprocessing
def func(arg):
    time.sleep(2)
    print(arg)
print(123)
def run():
    t=multiprocessing.Process(target=func,args=(20,))

    t.start()
    t.join(3)

if __name__ == '__main__':
    run()
 

 

常用功能:

              join,deamon,name, multiprocessing.current_process().ident/pip

           

 

2.进程的数据共享

linux(Queue)
import multiprocessing
q=multiprocessing.Queue()
def task(arg):
    # time.sleep(2)
    q.put(arg)
    # print(arg)
def fun():
    for i in range(10):
        p=multiprocessing.Process(target=task,args=(i,))
        p.start()
    while 1:
        v=q.get()
        print(v)
if __name__ == '__main__':
    fun()
linux(Manger)
m = multiprocessing.Manager()
dic = m.dict()
def task(arg):
    dic[arg] = 100
def run():
    for i in range(10):
        p = multiprocessing.Process(target=task, args=(i,))
        p.start()
    input('>>>')
    print(dic.values())
if __name__ == '__main__':
    run()
windows(Queue)
def task(arg,q):
    q.put(arg)

if __name__ == '__main__':
    q = multiprocessing.Queue()
    for i in range(10):
        p = multiprocessing.Process(target=task,args=(i,q,))
        p.start()
    while True:
        v = q.get()
        print(v)
linux(Queue)
def task(arg,dic):
    time.sleep(2)
    dic[arg] = 100
if __name__ == '__main__':
    m = multiprocessing.Manager()
    dic = m.dict()
    process_list = []
    for i in range(10):
        p = multiprocessing.Process(target=task, args=(i, dic,))
        p.start()
        process_list.append(p)
    while True:
        count = 0
        for p in process_list:
            if not p.is_alive():
                count += 1
        if count == len(process_list):
            break
   
print(dic)
 

 

     

3.

import multiprocessing

lock = multiprocessing.RLock()

def task(arg):
    print('鬼子来了')
    lock.acquire()
    time.sleep(2)
    print(arg)
    lock.release()

if __name__ == '__main__':
    p1 = multiprocessing.Process(target=task,args=(1,))
    p1.start()

    p2 = multiprocessing.Process(target=task, args=(2,))
    p2.start()

 

4.进程池

import time
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
def task(arg):
    time.sleep(2)
    print(arg)
if __name__ == '__main__':
    pool=ProcessPoolExecutor(5)
    for i in range(10):
        pool.submit(task,i)

5.模块(爬虫)

 

import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
def task(url):#把网址当做参数传进去
    print(url)
    r1=requests.get(url=url,
                    headers=
                    {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:62.0) Gecko/20100101 Firefox/62.0'})
    ## #伪装浏览器来进行访问
    # print(r1.text)

    soup=BeautifulSoup(r1.text,'html.parser')#使用某种方法来获取到你要趴下来的内容
    content_list=soup.find('div',attrs={'id':'content-list'})#找到当前块
    # print(soup.text)
    # print(content_list)
    for item in content_list.find_all('div',attrs={'class':'item'}):

        #拿到div下的class=item
        # print(item)
        title = item.find("a").text.strip()#拿到所有的文字,并做字符串的相关处理
        print(title)
        title_url=item.find('a').get('href')#获取到网址
        print(title_url)
def run ():
    pool=ProcessPoolExecutor(20)#创建20个线程
    for i in range(1,121):
        pool.submit(task,'https://dig.chouti.com/all/hot/recent/%s' %i)
if __name__ == '__main__':
    run()

 

posted @ 2018-09-12 21:54  魏三斗  阅读(168)  评论(0编辑  收藏  举报