python3多线程真实案例

工作中遇到处理大批量数据的问题,大概有8w条左右的excel要取出来一一去调用api,得出的结果还需要进行数据过滤,然后再写回excel,单线程跑大概跑了2个多小时,实属麻烦,万一代码中有什么bug,快结束的时候报错的话..................,于是换成多线程,15个线程大概跑了10多分钟,太棒了,需要注意的是线程不能设置太多,不然server有可能没那么快回应,导致timeout

import requests
from openpyxl import load_workbook,workbook
from concurrent.futures import ThreadPoolExecutor
import threading
import time
import json


info = {}
def sendGet(location):
    url = "https://xxxxxxx"
    name = threading.current_thread().name
    print(f"========================================第{len(info)}条========================================")
    params = {
        "origins": location[1],
        "destination": location[2],
        "type": "1",
        "key": "xxxxx"
    }
    res = requests.get(url, params)
    a = {
        location[0]:res.text
    }
    info.update(a)

def getLocation(filepath):
    info = []
    r = 2
    rows = []
    wb = load_workbook(filepath)
    sheet = wb["评测数据+计算"]
    #读取的2个location做对比
    location1_list = sheet["B2":"B76058"]
    location2_list = sheet["D2":"D76058"]
    result = zip(location1_list,location2_list)
    for l1,l2 in result:
        listTotal = []
        listTotal.append(r)
        listTotal.append(l1[0].value)
        listTotal.append(l2[0].value)
        r+=1
        info.append(listTotal)
    return info
def filter_data(info):
    for k,v in info.items():
        result = json.loads(v)
        status = result.get("status")
        count = result.get("count")
        distance = ""
        if result.get("results"):
            distance = result.get("results")[0].get("distance")
        res = {
            "row": k,
            "status": status,
            "count": count,
            "distance": distance
        }
        yield res


def write_excel(res_info):
    wb = load_workbook(filepath)
    sheet = wb["评测数据+计算"]
    for i in res_info:
        sheet["E%s" % i.get("row")] = i.get("distance")
        sheet["F%s" % i.get("row")] = i.get("status")
        sheet["G%s" % i.get("row")] = i.get("count")
    wb.save(filepath)


if __name__ == '__main__':
    # filepath = r"C:\Users\fengzi\Desktop\性能横评_8月.xlsx"
    # filepath = r"C:\Users\fengzi\Desktop\test3.xlsx"
    filepath = r"C:\Users\fengzi\Desktop\性能横评_8月 new.xlsx"
    location_map = getLocation(filepath)
    with ThreadPoolExecutor(max_workers=15,thread_name_prefix="test-") as pool:
        pool.map(sendGet, location_map)
    res_info = filter_data(info)
    write_excel(res_info)

 

posted @ 2021-11-05 12:04  力王7314  阅读(165)  评论(0编辑  收藏  举报