宜出行人口热力图
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | #!/usr/bin/env python # -*- coding:utf-8 -*- # Author : zhibo.wang # E-mail : d_1206@qq.com # Date : 18/03/23 14:22:58 # Desc : qq登陆 , 滑动验证暂没处理 import os import time from selenium import webdriver from yichuxing.settings import qq_list from selenium.webdriver.common.desired_capabilities import DesiredCapabilities class Login( object ): # 登陆qq,获取cookie LoginURL = "http://c.easygo.qq.com/eg_toc/map.html?origin=csfw&cityid=110000" def __init__( self , * * kwargs): self .qq_num = kwargs.get( "qq_num" ) self .qq_passwd = kwargs.get( "qq_passwd" ) def after_smoothly_login( self , driver): cookie = {} for elem in driver.get_cookies(): cookie[elem[ "name" ]] = elem[ "value" ] #driver.quit() return cookie def get_cookie_by_Chrome( self ): try : chromedriver = "C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe" os.environ[ "webdriver.chrme.driver" ] = chromedriver driver = webdriver.Chrome(chromedriver) #driver = webdriver.Chrome() driver.set_page_load_timeout( 10 ) driver.get( self .LoginURL) driver.find_element_by_id( "u" ).send_keys( self .qq_num) driver.find_element_by_id( "p" ).send_keys( self .qq_passwd) driver.maximize_window() driver.find_element_by_id( "go" ).click() time.sleep( 6 ) if "宜出行" in driver.title: return self .after_smoothly_login(driver) elif "手机统一登录" in driver.title: return None except Exception: # driver.close() return None def get_cookie_by_PhantomJS( self ): try : dcap = dict (DesiredCapabilities.PHANTOMJS) dcap[ "phantomjs.page.settings.userAgent" ] = ( "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36" ) driver = webdriver.PhantomJS(desired_capabilities = dcap) driver.set_page_load_timeout( 10 ) driver.get( self .LoginURL) driver.find_element_by_id( "u" ).clear() driver.find_element_by_id( "u" ).send_keys( self .qq_num) driver.find_element_by_id( "p" ).clear() driver.find_element_by_id( "p" ).send_keys( self .qq_passwd) driver.find_element_by_id( "go" ).click() time.sleep( 6 ) if "宜出行" in driver.title: return self .after_smoothly_login(driver) elif "手机统一登录" in driver.title: return None except : # driver.close() return None class CookieException(Exception): # 创建一个异常类,用于在cookie失效时抛出异常 def __init__( self ): Exception.__init__( self ) """ if __name__ == "__main__": #L = Login(qq_num="xxxx", qq_passwd="xxxx") #L.get_cookie_by_Chrome() """ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | #!/usr/bin/env python # -*- coding:utf-8 -*- # Author : zhibo.wang # E-mail : d_1206@qq.com # Date : 18/03/23 14:22:58 # Desc : 宜出行热力图 import hashlib import socket import os import json import time import random import datetime import requests from yichuxing.settings import qq_list, s_fre, proxyMeta, is_proxy from requests.exceptions import RequestException #from utils.user_angents import agents from data_utils.ali_oss import OSS2 from data_utils.time_convert import get_time_stamp from yichuxing.yichuxing_utils.qqlogin import CookieException, Login from data_utils.conmongodb import mongo_con_keepalive from yichuxing.yichuxing_utils.create_grid import create_grid_by_center, get_gd_data class Crawl(): db = mongo_con_keepalive() header = { "Host" : "c.easygo.qq.com" , "Connection" : "keep-alive" , "Accept" : "application/json" , "Accept-Encoding" : "gzip, deflate" , "Accept-Language" : "zh-CN,zh;q=0.9" , "X-Requested-With" : "XMLHttpRequest" , "Referer" : "http://c.easygo.qq.com/eg_toc/map.html?origin=csfw" , "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36" , } start_url = "http://c.easygo.qq.com/api/egc/heatmapdata" cookie_data = None if is_proxy: wait_time = [ 0.16 , 0.17 ] else : wait_time = [ 3 , 3.1 , 3.2 , 3.3 , 3.4 ] time_stamp = get_time_stamp() time_local = time.localtime( int (time_stamp)) date = time.strftime( "%Y-%m-%d" , time_local) proxies = { "http" : proxyMeta, "https" : proxyMeta, } fre_data = { "qq" : None , "pwd" : None } fre = 0 pid = os.getpid() oss = OSS2() path_dir = None website = "population_yichuxing" qq_status = "yichuxing_qq_status" def __init__( self ): self .path_dir = "population/yichuxing/{0}/" . format ( self .time_stamp) self .db.get_collection( 'pathdir_dict' ).insert_one( { 'pathdir' : self .path_dir, 'website' : self .website, 'flag' : False } ) if self .db.get_collection( self .qq_status).find_one({ "date" : self .date}) is None : self .db.get_collection( self .qq_status).remove({}) print ( "新的一天,新的开始 初始化所有账号" ) self .db.get_collection( self .qq_status).insert_many( [{ "qq" : i[ "qq" ], "pwd" : i[ "pwd" ], "n" : 0 , 'status' : False , "date" : self .date} for i in qq_list] ) super (Crawl, self ).__init__() def kill( self ): try : os.system( "kill {0}" . format ( self .pid)) except OSError as e: print ( "kill pid error: " , e) def get_cookie( self ): all_qq = self .db.get_collection( self .qq_status).find( { "status" : False }, { "_id" : 0 } ) qq_list = [i for i in all_qq] if len (qq_list) > 0 : self .fre = 0 self .fre_data = random.choice(qq_list) qq_num = self .fre_data.get( "qq" ) qq_passwd = self .fre_data.get( "pwd" ) L = Login(qq_num = qq_num, qq_passwd = qq_passwd) cookie_data = L.get_cookie_by_PhantomJS() #cookie_data = L.get_cookie_by_Chrome() if cookie_data: self .cookie_data = cookie_data elif len (qq_list) = = 0 : print ( "没有账号了, 杀死自己" ) self .kill() def spyder_params( self , item): # 生成 请求参数 params = { "lng_min" : item.get( "lng_min" ), "lat_max" : item.get( "lat_max" ), "lng_max" : item.get( "lng_max" ), "lat_min" : item.get( "lat_min" ), "level" : 16 , "city" : "", "lat" : "undefined" , "lng" : "undefined" , "_token" : "" } return params def spyder( self , params): time.sleep(random.choice( self .wait_time)) try : if self .fre > = s_fre: print ( "账号: {0}, 抓取次数达到上限, 更换qq账号" . format ( self .fre_data.get( "qq" ))) qq = self .fre_data.get( "qq" ) self .db.get_collection( self .qq_status).update_one( { "qq" : qq}, { "$set" : { "status" : True }} ) self .get_cookie() if is_proxy: r = requests.get( self .start_url, headers = self .header, cookies = self .cookie_data, params = params, proxies = self .proxies) else : r = requests.get( self .start_url, headers = self .header, cookies = self .cookie_data, params = params) if r.status_code = = 200 : self .fre = self .fre + 1 try : return r.json() except : raise CookieException else : raise CookieException except RequestException : self .spyder(params) def get( self , params): data_json = None try : data_json = self .spyder(params) except CookieException: print ( "账号: {0}, cookie 失效,获取新账号登陆, 并抓取" . format ( self .fre_data.get( "qq" ))) qq = self .fre_data.get( "qq" ) self .db.get_collection( self .qq_status).update_one( { "qq" : qq}, { "$set" : { "status" : True }} ) self .get_cookie() data_json = self .spyder(params) return data_json def create_filename( self , url): # 生成文件名 fname = '%s_%s_%s_%s.json' % (socket.gethostname(), url.split( '//' )[ - 1 ].split( '/' )[ 0 ].replace( '.' , '-' ), hashlib.md5(url.encode()).hexdigest(), str (time.time()).split( '.' )[ 0 ]) return fname def start( self ): self .get_cookie() for i in get_gd_data(): print ( "begin: " , i) latlng_dict = create_grid_by_center(i) print ( "将要抓取的次数: " , len (latlng_dict)) for o in latlng_dict: print ( "抓取范围: " , o) params = self .spyder_params(o) data_json = self .get(params) file_ = "{0}{1}" . format ( self .path_dir, self .create_filename( "{0}{1}" . format ( self .start_url, params))) if data_json.get( "code" ) ! = 0 : print ( "code: {0}, 获取新的账号,再一次抓取" . format (data_json.get( "code" ))) qq = self .fre_data.get( "qq" ) self .db.get_collection( self .qq_status).update_one( { "qq" : qq}, { "$set" : { "status" : True }} ) self .get_cookie() data_json = self .get(params) if data_json.get( "code" ) = = 0 and len (data_json.get( "data" )) > 0 : data_json[ "cityname" ] = o[ "cityname" ] #print(data_json) self .oss.uploadfiledata(file_, json.dumps(data_json)) co = self .db.get_collection( self .qq_status).find({ "status" : False }).count() print ( "剩余可用qq count: " , co) if __name__ = = "__main__" : c = Crawl() c.start() |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 | # 每个账号抓取次数 s_fre = 70 # 每次爬取方格的边长(0.04 > 4公里) 平移量 lat_offset = 0.04 lng_offset = 0.04 # 是否开始代理 True:开启, False:不开启 is_proxy = True grade = { 0 : 6 , 1 : 6 , 2 : 5 , 3 : 4 , 4 : 4 , 5 : 4 } # 城市对应 抓取圈数 # 代理ip地址 proxyMeta = "http://xxx:xxx@proxy.abuyun.com:9020" # qq 账号 qq_list = [ { "qq" : "xxx" , "pwd" : "xxx" }, ] |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | #!/usr/bin/env python # -*- coding:utf-8 -*- # Author : zhibo.wang # E-mail : d_1206@qq.com # Date : 18/03/23 16:28:43 # Desc : import json import numpy as np from yichuxing.settings import lat_offset, lng_offset, grade from data_utils.conmongodb import mongo_con_keepalive from data_utils.location_convert import bd09togcj02 db = mongo_con_keepalive() def get_gd_data(): city_list = db.get_collection( "params_citys" ).find({ "exists_city" : True }, { "_id" : 0 }).sort( "class" ) location = [] for city in city_list: if city.get( "province" ) ! = "广东省" : if city.get( "class" ) = = 3 : center_lng = city.get( "center_lng" ) center_lat = city.get( "center_lat" ) del city[ "center_lng" ] del city[ "center_lat" ] lng, lat = bd09togcj02(center_lng, center_lat) # 转腾讯坐标系 city[ "lng" ] = lng city[ "lat" ] = lat location.append(city) return location def create_grid_by_center(location, n = None ): # 以城市中心点辐射n圈 即 4*4*(5*2)`2 1600平方公里 # 4*4 是每个区域的大小 区域大小可在setting里设置, (5*2)`2 5是圈数 lng, lat, city_class, cityname = location[ "lng" ], location[ "lat" ], location[ "class" ], location[ "cityname" ] if n is None : n = grade.get(city_class) n = float (n) bottom_lat, top_lat = lat - lat_offset * n, lat + lat_offset * n left_lng, right_lng = lng - lng_offset * n, lng + lng_offset * n lat_range = np.arange(bottom_lat, top_lat, lat_offset) end_data = [] for lat_ in lat_range: lng_range = np.arange(left_lng, right_lng, lng_offset) for lng_ in lng_range: end_data.append({ "lng_min" : lng_, "lat_max" :lat_ + lat_offset , "lng_max" : lng_ + lng_offset, "lat_min" : lat_, "cityname" : cityname}) return end_data |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | "cityname" : "北京市" , "province" : "北京市" , "citycode" : "131" , "center_lat" : 39.904211 , # 百度坐标 "center_lng" : 116.407394 , "class" : 0 , "ftx_code" : "bj" , "meituan_code" : "beijing" , "meituan_id" : 1 , "dianping_id" : 2 , "dianping_code" : "beijing" , "gd_adcode" : "110000" , "gd_citycode" : "010" , "shunqi_code" : "beijing" , "xiecheng_code" : "BJS" , "xiecheng_status" : true, "zhilian_code" : "beijing" , "baidu_id" : 131 , "exists_city" : true { "scale" : "20,50,100,200" , "lng_a" : 116.550125 , "lat_a" : 39.843624999999996 , "lng_b" : 116.55662935278988 , "lat_b" : 39.84962393215385 , "lng_g" : 116.54429316621265 , "lat_g" : 39.842540318493164 , "gps_s" : "a" , "count" : 800 , "grid_y" : 159374 , "grid_x" : 466200 , "max_data" : 32000 , "crawl_time" : "2018-05-29 10:03:37" , "city" : "北京市" , } |
经纬度解密代码
http://c.easygo.qq.com/eg_toc/js/map-d76c21c16d.bundle.js
lng = 1e-6 * (250.0 * d['grid_x'] + 125.0)
lat = 1e-6 * (250.0 * d['grid_y'] + 125.0)
教程仅供技术研究学习使用,若有侵权,联系本人删除
本文作者:🦉. 城南
本文链接:https://www.cnblogs.com/dockers/p/9238535.html
版权声明:本作品采用知识共享署名-非商业性使用-禁止演绎 2.5 中国大陆许可协议进行许可。
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步