网站更新内容:请访问: https://bigdata.ministep.cn/

多线程调用

# -*- coding: utf-8 -*-
"""
Created on Thu Nov 24 10:10:13 2016

@author: Acer
"""

# -*- coding: utf-8 -*-
import sys, urllib, urllib2, json ,requests,os,uniout
import random
import _uniout
import pandas as pd
from pandas import DataFrame
import xlrd,openpyxl

##读取文件
import pandas as pd

##读取excel文档地址信息
def xlsx_read(path=path,sheet_name=u'Sheet1'):
    xlsx = pd.ExcelFile(path)
    sheet = xlsx.parse(sheet_name)
    return(sheet)
#sheet = xlsx_read()
##选取excel文档的需要地址信息
def sheet_values(sheet=sheet,address=u'地址'):
    sheet.columns=sheet.columns.str.strip()
    sheet.columns
    #sheet[u'地址']=sheet[u'收货地址'].str.replace(" ","")
    sheet[address]=sheet[u'收货地址'].str.replace(" ","")
    del sheet[u'收货地址']
    #del sheet[u'收货人姓名.1']
    location_data = sheet[[u'地址',u'收货人姓名']]
    return(location_data)
#location_data =  sheet_values(sheet)


##地址转换成url地址
def url_add(location):
    import random 
    AK = random.sample(api_key, 1)
    key = ",".join(AK)
    url = 'http://restapi.amap.com/v3/geocode/geo?key=' + key + '&address=' + location
    return(url)
#location_data['url']=location_data[u'地址'].apply(url_add)

##调用API,返回信息,'URLError: <urlopen error timed out> All times is failed ' 再次调用2次
def requests_get(url,n=2):
    import requests
    global Max_Num
    Max_Num = n
    for i in range(Max_Num):
        try:
            return(requests.get(url))
        except:
             if i < Max_Num - 1:
                 continue
             else:
                 pass

##results=pool.map(requests_get,sheet['url'])
##多线程调用
def ThreadPool(list_url,n=4):
    from multiprocessing.dummy import Pool as ThreadPool
    pool = ThreadPool(n)
    results=pool.map(requests_get,list_url)
    return results
#函数实例化
#results=ThreadPool(location_data['url'])

def url_data(results):
    import json
    import pandas as pd
    n_ok=0
    n_false=0
    location_items = ['formatted_address','location','province','city','district','street','number','lng','lat']
    temp = pd.DataFrame([],columns=location_items)
    for resq in results:
        try:
            data=json.loads(resq.text)
            n_ok=n_ok+1
            print str(n_ok)+'正在解析ing...................'
            '''
            上面有这里就只是显示下
            location_items = ['formatted_address','location','province','city','district','street','number']
            '''
            location_data = pd.DataFrame(data[u'geocodes'],columns=location_items)
            location_data['lng'] = location_data['location'][0].split(',')[0]  ##经度
            location_data['lat'] = location_data['location'][0].split(',')[1]  ##维度
            temp = pd.concat([temp,location_data])
            location_address_data = temp

        except:
            n_false=n_false+1
            print str(n_false)+'解析错误'
            pass
    return(location_address_data)
##测试用例
#data = url_data(results)


api_key=['8191d1b4718e17d8b5b2e2b9a9f31bb0','09b7d72a3dc2bd30e86b23dc11b382fc','efe64265959124ade43857e06322577b','00210b231b1895ddfc190142ccbfda59']


path = 'C:\\Users\\Acer\\Desktop\\temp_location_1123.xlsx'
path = 'C:\\Users\\Acer\\Desktop\\orders_location_1111.xlsx'
path = unicode(path,encoding='utf-8')


sheet = xlsx_read(path=path,sheet_name=u'Sheet1')
location_data =  sheet_values(sheet=sheet,address=u'地址')
location_data['url']=location_data[u'地址'].apply(url_add)
results=ThreadPool(location_data['url'])
data_values = url_data(results)
data_values.head()
sheet
help(sheet.to_excel)
posted @ 2021-10-05 11:26  ministep88  阅读(64)  评论(0编辑  收藏  举报
网站更新内容:请访问:https://bigdata.ministep.cn/