宜出行人口热力图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env python
# -*- coding:utf-8 -*-
 
# Author : zhibo.wang
# E-mail : d_1206@qq.com
# Date   : 18/03/23 14:22:58
# Desc   : qq登陆 , 滑动验证暂没处理
 
 
import os
import time
from selenium import webdriver
from yichuxing.settings import qq_list
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 
 
class Login(object):
    # 登陆qq,获取cookie
    LoginURL = "http://c.easygo.qq.com/eg_toc/map.html?origin=csfw&cityid=110000"
    def __init__(self, **kwargs):
        self.qq_num = kwargs.get("qq_num")
        self.qq_passwd = kwargs.get("qq_passwd")
 
    def after_smoothly_login(self, driver):
        cookie = {}
        for elem in driver.get_cookies():
            cookie[elem["name"]] = elem["value"]
            #driver.quit()
        return cookie
 
    def get_cookie_by_Chrome(self):
        try:
            chromedriver = "C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe"
            os.environ["webdriver.chrme.driver"] = chromedriver
            driver = webdriver.Chrome(chromedriver)
            #driver = webdriver.Chrome()
            driver.set_page_load_timeout(10)
            driver.get(self.LoginURL)
            driver.find_element_by_id("u").send_keys(self.qq_num)
            driver.find_element_by_id("p").send_keys(self.qq_passwd)
            driver.maximize_window()
            driver.find_element_by_id("go").click()
            time.sleep(6)
 
            if "宜出行" in driver.title:
                return self.after_smoothly_login(driver)
            elif "手机统一登录" in driver.title:
                return None
 
        except Exception:
            # driver.close()
            return None
 
    def get_cookie_by_PhantomJS(self):
        try:
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = (
            "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36"
            )
            driver  = webdriver.PhantomJS(desired_capabilities=dcap)
            driver.set_page_load_timeout(10)
            driver.get(self.LoginURL)
            driver.find_element_by_id("u").clear()
            driver.find_element_by_id("u").send_keys(self.qq_num)
            driver.find_element_by_id("p").clear()
            driver.find_element_by_id("p").send_keys(self.qq_passwd)
            driver.find_element_by_id("go").click()
            time.sleep(6)
 
            if "宜出行" in driver.title:
                return self.after_smoothly_login(driver)
            elif "手机统一登录" in driver.title:
                return None
 
        except:
            # driver.close()
            return None
 
class CookieException(Exception):
    # 创建一个异常类,用于在cookie失效时抛出异常
    def __init__(self):
        Exception.__init__(self)
 
"""
if __name__ == "__main__":
    #L = Login(qq_num="xxxx", qq_passwd="xxxx")
    #L.get_cookie_by_Chrome()
"""

  

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#!/usr/bin/env python
# -*- coding:utf-8 -*-
 
# Author : zhibo.wang
# E-mail : d_1206@qq.com
# Date   : 18/03/23 14:22:58
# Desc   : 宜出行热力图
 
 
import hashlib
import socket
import os
import json
import time
import random
import datetime
import requests
from yichuxing.settings import qq_list, s_fre, proxyMeta, is_proxy
from requests.exceptions import RequestException
#from utils.user_angents import agents
from data_utils.ali_oss import OSS2
from data_utils.time_convert import get_time_stamp
from yichuxing.yichuxing_utils.qqlogin import CookieException, Login
from data_utils.conmongodb import mongo_con_keepalive
from yichuxing.yichuxing_utils.create_grid import create_grid_by_center, get_gd_data
 
 
class Crawl():
    db = mongo_con_keepalive()
    header = {
              "Host": "c.easygo.qq.com",
              "Connection": "keep-alive",
              "Accept": "application/json",
              "Accept-Encoding": "gzip, deflate",
              "Accept-Language": "zh-CN,zh;q=0.9",
              "X-Requested-With": "XMLHttpRequest",
              "Referer": "http://c.easygo.qq.com/eg_toc/map.html?origin=csfw",
              "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
            }
    start_url = "http://c.easygo.qq.com/api/egc/heatmapdata"
    cookie_data = None
    if is_proxy:
        wait_time = [0.16, 0.17]
    else:
        wait_time = [3, 3.1, 3.2, 3.3, 3.4]
 
    time_stamp = get_time_stamp()
    time_local = time.localtime(int(time_stamp))
    date = time.strftime("%Y-%m-%d", time_local)
    proxies = {
        "http": proxyMeta,
        "https": proxyMeta,
    }
    fre_data = {"qq": None,"pwd": None}
    fre = 0
    pid = os.getpid()
    oss = OSS2()
    path_dir = None
    website = "population_yichuxing"
    qq_status = "yichuxing_qq_status"
 
    def __init__(self):
        self.path_dir = "population/yichuxing/{0}/".format(self.time_stamp)
        self.db.get_collection('pathdir_dict').insert_one(
            {'pathdir': self.path_dir, 'website': self.website, 'flag': False}
        )
        if self.db.get_collection(self.qq_status).find_one({"date": self.date}) is None:
            self.db.get_collection(self.qq_status).remove({})
            print("新的一天,新的开始 初始化所有账号")
            self.db.get_collection(self.qq_status).insert_many(
                [{"qq": i["qq"], "pwd": i["pwd"], "n": 0,
                  'status': False, "date": self.date} for i in qq_list]
                )
        super(Crawl, self).__init__()
 
    def kill(self):
        try:
            os.system("kill {0}".format(self.pid))
        except OSError as e:
            print("kill pid error: ", e)
 
    def get_cookie(self):
        all_qq = self.db.get_collection(self.qq_status).find(
            {"status": False}, {"_id": 0}
        )
        qq_list = [i for i in all_qq]
        if len(qq_list) > 0:
            self.fre = 0
            self.fre_data = random.choice(qq_list)
            qq_num = self.fre_data.get("qq")
            qq_passwd = self.fre_data.get("pwd")
            L = Login(qq_num=qq_num, qq_passwd=qq_passwd)
            cookie_data = L.get_cookie_by_PhantomJS()
            #cookie_data = L.get_cookie_by_Chrome()
            if cookie_data:
                self.cookie_data = cookie_data
        elif len(qq_list) == 0:
            print("没有账号了, 杀死自己")
            self.kill()
 
 
    def spyder_params(self, item):
        # 生成 请求参数
        params = {"lng_min": item.get("lng_min"),
                  "lat_max": item.get("lat_max"),
                  "lng_max": item.get("lng_max"),
                  "lat_min": item.get("lat_min"),
                  "level": 16,
                  "city": "",
                  "lat": "undefined",
                  "lng": "undefined",
                  "_token": ""
                }
        return params
 
    def spyder(self, params):
        time.sleep(random.choice(self.wait_time))
        try:
            if self.fre >= s_fre:
                print("账号: {0}, 抓取次数达到上限, 更换qq账号".format(self.fre_data.get("qq")))
                qq = self.fre_data.get("qq")
                self.db.get_collection(self.qq_status).update_one(
                    {"qq": qq}, {"$set": {"status": True}}
                )
                self.get_cookie()
            if is_proxy:
                r = requests.get(self.start_url, headers=self.header,
                             cookies=self.cookie_data, params=params, proxies=self.proxies)
            else:
 
                r = requests.get(self.start_url, headers=self.header,
                             cookies=self.cookie_data, params=params)
            if r.status_code == 200:
                self.fre = self.fre + 1
                try:
                    return r.json()
                except:
                    raise CookieException
            else:
                raise CookieException
        except RequestException :
            self.spyder(params)
 
    def get(self, params):
        data_json = None
        try:
            data_json = self.spyder(params)
        except CookieException:
            print("账号: {0}, cookie 失效,获取新账号登陆, 并抓取".format(
                  self.fre_data.get("qq")))
            qq = self.fre_data.get("qq")
            self.db.get_collection(self.qq_status).update_one(
                {"qq": qq}, {"$set": {"status": True}}
            )
            self.get_cookie()
            data_json = self.spyder(params)
        return data_json
 
    def create_filename(self, url):
        # 生成文件名
        fname = '%s_%s_%s_%s.json' % (socket.gethostname(),
                                          url.split('//')[-1].split('/')[0].replace('.', '-'),
                                          hashlib.md5(url.encode()).hexdigest(),
                                          str(time.time()).split('.')[0])
        return fname
 
    def start(self):
        self.get_cookie()
        for i in get_gd_data():
            print("begin: ", i)
            latlng_dict = create_grid_by_center(i)
            print("将要抓取的次数: ", len(latlng_dict))
            for o in latlng_dict:
                print("抓取范围: ", o)
                params = self.spyder_params(o)
                data_json = self.get(params)
                file_ = "{0}{1}".format(self.path_dir, self.create_filename("{0}{1}".format(self.start_url, params)))
                if data_json.get("code") != 0:
                    print("code: {0}, 获取新的账号,再一次抓取".format(data_json.get("code")))
                    qq = self.fre_data.get("qq")
                    self.db.get_collection(self.qq_status).update_one(
                        {"qq": qq}, {"$set": {"status": True}}
                    )
                    self.get_cookie()
                    data_json = self.get(params)
 
                if data_json.get("code") == 0 and len(data_json.get("data")) > 0:
                    data_json["cityname"] = o["cityname"]
                    #print(data_json)
                    self.oss.uploadfiledata(file_, json.dumps(data_json))
        co = self.db.get_collection(self.qq_status).find({"status": False}).count()
        print("剩余可用qq count: ", co)
 
 
 
if __name__ == "__main__":
    c = Crawl()
    c.start()

  

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# 每个账号抓取次数
s_fre = 70
# 每次爬取方格的边长(0.04 > 4公里) 平移量
lat_offset = 0.04
lng_offset = 0.04
# 是否开始代理 True:开启, False:不开启
is_proxy = True
grade = {0:6, 1: 6, 2: 5, 3: 4, 4: 4, 5: 4# 城市对应 抓取圈数
# 代理ip地址
proxyMeta = "http://xxx:xxx@proxy.abuyun.com:9020"
# qq 账号
qq_list = [
{"qq": "xxx", "pwd": "xxx"},
]

  

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python
# -*- coding:utf-8 -*-
 
# Author : zhibo.wang
# E-mail : d_1206@qq.com
# Date   : 18/03/23 16:28:43
# Desc   :
 
import json
import numpy as np
from yichuxing.settings import lat_offset, lng_offset, grade
from data_utils.conmongodb import mongo_con_keepalive
from data_utils.location_convert import bd09togcj02
 
 
db = mongo_con_keepalive()
 
def get_gd_data():
    city_list = db.get_collection("params_citys").find({"exists_city": True}, {"_id": 0}).sort(
        "class")
    location = []
    for city in city_list:
        if city.get("province") != "广东省":
            if city.get("class") == 3:
                center_lng = city.get("center_lng")
                center_lat = city.get("center_lat")
                del city["center_lng"]
                del city["center_lat"]
                lng, lat = bd09togcj02(center_lng, center_lat)  # 转腾讯坐标系
                city["lng"] = lng
                city["lat"] = lat
                location.append(city)
    return location
 
def create_grid_by_center(location, n=None):
    # 以城市中心点辐射n圈 即 4*4*(5*2)`2 1600平方公里
    # 4*4 是每个区域的大小 区域大小可在setting里设置, (5*2)`2  5是圈数
 
 
    lng, lat, city_class, cityname = location["lng"], location["lat"], location["class"], location["cityname"]
    if n is None:
        n = grade.get(city_class)
    n = float(n)
    bottom_lat, top_lat = lat - lat_offset*n, lat + lat_offset*n
    left_lng, right_lng = lng - lng_offset*n, lng + lng_offset*n
    lat_range = np.arange(bottom_lat, top_lat, lat_offset)
    end_data = []
 
    for lat_ in lat_range:
        lng_range = np.arange(left_lng, right_lng, lng_offset)
        for lng_ in lng_range:
            end_data.append({"lng_min": lng_,
                            "lat_max":lat_ + lat_offset ,
                            "lng_max": lng_ + lng_offset,
                            "lat_min": lat_,
                            "cityname": cityname})
    return end_data

  

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
        "cityname" : "北京市",
    "province" : "北京市",
    "citycode" : "131",
    "center_lat" : 39.904211,   # 百度坐标
    "center_lng" : 116.407394,
    "class" : 0,
    "ftx_code" : "bj",
    "meituan_code" : "beijing",
    "meituan_id" : 1,
    "dianping_id" : 2,
    "dianping_code" : "beijing",
    "gd_adcode" : "110000",
    "gd_citycode" : "010",
    "shunqi_code" : "beijing",
    "xiecheng_code" : "BJS",
    "xiecheng_status" : true,
    "zhilian_code" : "beijing",
    "baidu_id" : 131,
    "exists_city" : true  {
    "scale" : "20,50,100,200",
    "lng_a" : 116.550125,
    "lat_a" : 39.843624999999996,
    "lng_b" : 116.55662935278988,
    "lat_b" : 39.84962393215385,
    "lng_g" : 116.54429316621265,
    "lat_g" : 39.842540318493164,
    "gps_s" : "a",
    "count" : 800,
    "grid_y" : 159374,
    "grid_x" : 466200,
    "max_data" : 32000,
    "crawl_time" : "2018-05-29 10:03:37",
    "city" : "北京市",
}
复制代码
复制代码

经纬度解密代码

http://c.easygo.qq.com/eg_toc/js/map-d76c21c16d.bundle.js 

            lng = 1e-6 * (250.0 * d['grid_x'] + 125.0)

            lat = 1e-6 * (250.0 * d['grid_y'] + 125.0)

  

 教程仅供技术研究学习使用,若有侵权,联系本人删除

本文作者:🦉. 城南

本文链接:https://www.cnblogs.com/dockers/p/9238535.html

版权声明:本作品采用知识共享署名-非商业性使用-禁止演绎 2.5 中国大陆许可协议进行许可。

posted @   🐳.城南  阅读(10188)  评论(7编辑  收藏  举报
点击右上角即可分享
微信分享提示
评论
收藏
关注
推荐
深色
回顶
收起