面向对象补充,进程,数据共享,锁,进程池,模块(requests,bs4(beautifulsoup)),协程

一丶面向对象补充

"""
class Foo(object):
    def __init__(self):
        self.info = {}

    def __setitem__(self, key, value):
        self.info[key] = value

    def __getitem__(self, item):
        return self.info.get(item)


obj = Foo()
obj['x'] = 123
print(obj['x'])
"""
from flask import globals
class Foo(object):

    def __init__(self):
        object.__setattr__(self, 'info', {}) # 在对象中设置值的本质

    def __setattr__(self, key, value):
        self.info[key] = value

    def __getattr__(self, item):
        print(item)
        return self.info[item]

obj = Foo()
obj.name = 'alex'
print(obj.name)
v = []
for i in range(10000):
    v.append(i)

print(v)
小补充...

 

 

二丶进程

进程间数据不共享

data_list = []
            def task(arg):
                data_list.append(arg)
                print(data_list)


            def run():
                for i in range(10):
                    p = multiprocessing.Process(target=task,args=(i,))
                    # p = threading.Thread(target=task,args=(i,))
                    p.start()

            if __name__ == '__main__':
                run()
...

 

常用功能:
- join
- deamon
- name
- multiprocessing.current_process()
- multiprocessing.current_process().ident/pid

类继承方式创建进程

class MyProcess(multiprocessing.Process):

                def run(self):
                    print('当前进程',multiprocessing.current_process())


                def run():
                    p1 = MyProcess()
                    p1.start()

                    p2 = MyProcess()
                    p2.start()

            if __name__ == '__main__':
                run()
...

 

进程间数据共享

Queue:

                q = multiprocessing.Queue()

                def task(arg,q):
                    q.put(arg)

                def run():
                    for i in range(10):
                        p = multiprocessing.Process(target=task, args=(i, q,))
                        p.start()

                    while True:
                        v = q.get()
                        print(v)

                run()
linux:

 

                def task(arg,q):
                    q.put(arg)

                if __name__ == '__main__':
                    q = multiprocessing.Queue()
                    for i in range(10):
                        p = multiprocessing.Process(target=task,args=(i,q,))
                        p.start()
                    while True:
                        v = q.get()
                        print(v)
windows:

 

Manager:(*)

                m = multiprocessing.Manager()
                dic = m.dict()

                def task(arg):
                    dic[arg] = 100

                def run():
                    for i in range(10):
                        p = multiprocessing.Process(target=task, args=(i,))
                        p.start()

                    input('>>>')
                    print(dic.values())
                    
                if __name__ == '__main__':
                    
                    run()
Linux:

 

                def task(arg,dic):
                    time.sleep(2)
                    dic[arg] = 100

                if __name__ == '__main__':
                    m = multiprocessing.Manager()
                    dic = m.dict()

                    process_list = []
                    for i in range(10):
                        p = multiprocessing.Process(target=task, args=(i,dic,))
                        p.start()

                        process_list.append(p)

                    while True:
                        count = 0
                        for p in process_list:
                            if not p.is_alive():
                                count += 1
                        if count == len(process_list):
                            break
                    print(dic)
    
windows:

 

 

三丶进程锁

  

 

 

 

四丶进程池

from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor (官方推荐方式)

import time
        from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor

        def task(arg):
            time.sleep(2)
            print(arg)

        if __name__ == '__main__':

            pool = ProcessPoolExecutor(5)
            for i in range(10):
                pool.submit(task,i)
进程池

 

 

五丶初识爬虫

  安装:

      pip3 install requests

      pip3 intall beautifulsoup4

    问题:

      找不到命令?

        方式一:

        C:\Users\Administrator\AppData\Local\Programs\Python\Python36\Scripts\pip3 install requests
        方式二:
        C:\Users\Administrator\AppData\Local\Programs\Python\Python36\Scripts\pip3 install requests

    实例:

import requests
            from bs4 import BeautifulSoup
            from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor


            # 模拟浏览器发送请求
            # 内部创建 sk = socket.socket()
            # 和抽屉进行socket连接 sk.connect(...)
            # sk.sendall('...')
            # sk.recv(...)

            def task(url):
                print(url)
                r1 = requests.get(
                    url=url,
                    headers={
                        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36'
                    }
                )

                # 查看下载下来的文本信息
                soup = BeautifulSoup(r1.text,'html.parser')
                print(soup.text)
                # content_list = soup.find('div',attrs={'id':'content-list'})
                # for item in content_list.find_all('div',attrs={'class':'item'}):
                #     title = item.find('a').text.strip()
                #     target_url = item.find('a').get('href')
                #     print(title,target_url)

            def run():
                pool = ThreadPoolExecutor(5)
                for i in range(1,50):
                    pool.submit(task,'https://dig.chouti.com/all/hot/recent/%s' %i)


            if __name__ == '__main__':
                run()
实例

 

  相关:
  a. 以上示例进程和线程那个好?
  - 线程好
  b. requests模块模拟浏览器发送请求
  - 本质 requests.get(...):
  - 创建socket客户端
  - 连接 【阻塞】
  - 发送请求
  - 接收请求【阻塞】
  - 断开连接

  c. 线程和进程池

 

posted @ 2018-09-12 17:51  心跳+  阅读(298)  评论(0编辑  收藏  举报
-->