python3多线程真实案例

工作中遇到处理大批量数据的问题，大概有8w条左右的excel要取出来一一去调用api，得出的结果还需要进行数据过滤，然后再写回excel，单线程跑大概跑了2个多小时，实属麻烦，万一代码中有什么bug，快结束的时候报错的话..................，于是换成多线程，15个线程大概跑了10多分钟，太棒了，需要注意的是线程不能设置太多，不然server有可能没那么快回应，导致timeout

import requests
from openpyxl import load_workbook,workbook
from concurrent.futures import ThreadPoolExecutor
import threading
import time
import json


info = {}
def sendGet(location):
    url = "https://xxxxxxx"
    name = threading.current_thread().name
    print(f"========================================第{len(info)}条========================================")
    params = {
        "origins": location[1],
        "destination": location[2],
        "type": "1",
        "key": "xxxxx"
    }
    res = requests.get(url, params)
    a = {
        location[0]:res.text
    }
    info.update(a)

def getLocation(filepath):
    info = []
    r = 2
    rows = []
    wb = load_workbook(filepath)
    sheet = wb["评测数据+计算"]
    #读取的2个location做对比
    location1_list = sheet["B2":"B76058"]
    location2_list = sheet["D2":"D76058"]
    result = zip(location1_list,location2_list)
    for l1,l2 in result:
        listTotal = []
        listTotal.append(r)
        listTotal.append(l1[0].value)
        listTotal.append(l2[0].value)
        r+=1
        info.append(listTotal)
    return info
def filter_data(info):
    for k,v in info.items():
        result = json.loads(v)
        status = result.get("status")
        count = result.get("count")
        distance = ""
        if result.get("results"):
            distance = result.get("results")[0].get("distance")
        res = {
            "row": k,
            "status": status,
            "count": count,
            "distance": distance
        }
        yield res


def write_excel(res_info):
    wb = load_workbook(filepath)
    sheet = wb["评测数据+计算"]
    for i in res_info:
        sheet["E%s" % i.get("row")] = i.get("distance")
        sheet["F%s" % i.get("row")] = i.get("status")
        sheet["G%s" % i.get("row")] = i.get("count")
    wb.save(filepath)


if __name__ == '__main__':
    # filepath = r"C:\Users\fengzi\Desktop\性能横评_8月.xlsx"
    # filepath = r"C:\Users\fengzi\Desktop\test3.xlsx"
    filepath = r"C:\Users\fengzi\Desktop\性能横评_8月 new.xlsx"
    location_map = getLocation(filepath)
    with ThreadPoolExecutor(max_workers=15,thread_name_prefix="test-") as pool:
        pool.map(sendGet, location_map)
    res_info = filter_data(info)
    write_excel(res_info)

posted @ 2021-11-05 12:04 力王7314 阅读(165) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

疯子7314

python3多线程真实案例

公告