ORDER BY today_used ASC' % (MAX_USED_TIMES)

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
python D:\pymine\clean\spider_map\get_bd_uid_rest_b.py
 
python D:\pymine\clean\spider_map\get_bd_uid_rest.py
 
python D:\pymine\clean\spider_map\get_bd_uid_28_other20_b.py
  
#MAX_USED_TIMES = 1900
python D:\pymine\clean\spider_map\get_bd_uid_28_other20.py
 
python D:\pymine\clean\spider_map\get_bd_uid.py
 
python D:\pymine\clean\spider_map\get_bd_uid.py
 
python D:\pymine\clean\spider_map\get_bd_uid.py

  

 

 

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import xlrd
import time
import sys
import os
import requests
import sqlite3
import threading
 
curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath)
 
MAX_USED_TIMES, overrun_str, DB_KEY_EXHAUST = 1900, '天配额超限,限制访问', 'DB_KEY_EXHAUST'
 
db = 'py_bdspider_status.db'
db = '%s\\%s' % (curPath, db)
 
pcity_list = []
pcity_file = '%s\\%s' % (curPath, '省会城市.txt')
with open(pcity_file, 'r', encoding='utf-8') as pf:
    c_ = 0
    for i in pf:
        c_ += 1
        if c_ == 3:
            c_ = 0
            pcity_list.append(i.replace(' ', '').replace('\n', '') + '市')
pcity_sorted_list = sorted(pcity_list)
 
target_city_list_big = ['广州市', '厦门市', '深圳市', '北京市', '杭州市', '成都市', '上海市', '西安市']
target_city_list_pass = target_city_list_big
 
for i in pcity_list:
    if i not in target_city_list_big:
        target_city_list_pass.append(i)
 
 
# def db_init_key_table():
#     conn = sqlite3.connect(db)
#     c = conn.cursor()
#     sql = 'DELETE  FROM  baidu_map_key_used'
#     c.execute(sql)
#     conn.commit()
#     pcity_file = '%s\\%s' % (curPath, 'bdmap_key.txt')
#     with open(pcity_file, 'r', encoding='utf-8') as pf:
#         c_ = 0
#         for i in pf:
#             if len(i) < 4:
#                 continue
#             author, key = i.replace('\n', '').split('\t')
#             localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
#             sql = 'INSERT INTO baidu_map_key_used (author,key,update_time,today_used) VALUES ("%s","%s","%s",%s) ' % (
#                 author, key, localtime_, 0)
#             c.execute(sql)
#     conn.commit()
#     conn.close()
 
 
# db_init_key_table()
# target_city_list = target_city_list[0:11]
# target_city_list = target_city_list[0:11]
 
 
 
def db_get_one_effective():
    conn = sqlite3.connect(db)
    c = conn.cursor()
    sql = 'SELECT key FROM baidu_map_key_used WHERE today_used<=%s ORDER BY today_used ASC' % (MAX_USED_TIMES)
 
    res = c.execute(sql).fetchone()
    if res is None:
        return DB_KEY_EXHAUST
    else:
        return res[0]
    conn.close
 
 
def db_update_one_today_used(key):
    conn = sqlite3.connect(db)
    c = conn.cursor()
    localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
    sql = 'UPDATE baidu_map_key_used SET today_used = today_used+1 ,update_time=%s WHERE key="%s" ' % (
        localtime_, key)
    c.execute(sql)
    conn.commit()
    conn.close()
 
 
dir_, dir_exception = 'baidu_map_uid', 'baidu_map_uid_exception'
requested_file_list = []
requested_file_dir_str, requested_file_dir_exception_str = '%s\\%s\\' % (curPath, dir_), '%s\\%s\\' % (
    curPath, dir_exception)
requested_file_dir = os.listdir(requested_file_dir_str)
 
 
def chk_if_requested_file():
    for f in requested_file_dir:
        to_in = f.split('.txt')[0]
        if to_in not in requested_file_list:
            requested_file_list.append(to_in)
 
 
chk_if_requested_file()
 
 
def write_requested_res(request_name, str_, type_='.txt'):
    fname = '%s%s%s' % (requested_file_dir_str, request_name, type_)
    # 上海市虹口区岳阳医院?.txt
    fname = fname.replace('?', '')
    with open(fname, 'w', encoding='utf-8') as ft:
        ft.write(str_)
    print('ok', threading.get_ident(), request_name)
 
 
def write_requested_exception_res(request_name, str_, type_='.txt'):
    fname = '%s%s%s' % (requested_file_dir_exception_str, request_name, type_)
    # 上海市虹口区岳阳医院?.txt
    fname = fname.replace('?', '')
    with open(fname, 'w', encoding='utf-8') as ft:
        ft.write(str_)
 
 
request_dic = {}
 
target_city_list = []
 
 
def gen_request_dic_list():
    fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'
    fname_open = '%s\\%s' % (curPath, fname_source)
    FEXCEL = '%s%s' % (fname_open, '.xlsx')
    data = xlrd.open_workbook(FEXCEL)
    table = data.sheets()[0]
    nrows, ncols = table.nrows, table.ncols
    for i in range(1, nrows):
        l = table.row_values(i)
        dbid, area_code, name_, request_name, type_, city, district, addr, street = l
        # if city not in target_city_list:
        #     continue
        if city in target_city_list_pass:
            continue
        if city not in target_city_list:
            target_city_list.append(city)
        request_name_chk = '%s%s%s' % (city, district, request_name)
        if request_name_chk in requested_file_list:
            continue
        if city not in request_dic:
            request_dic[city] = {}
        if district not in request_dic[city]:
            request_dic[city][district] = {}
            request_dic[city][district] = []
        if request_name not in request_dic[city][district]:
            request_dic[city][district].append(request_name)
 
 
gen_request_dic_list()
 
fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'
 
# http://api.map.baidu.com/place/v2/suggestion?query=瀛嘉天下&region=重庆市&city_limit=true&output=json&ak=oy2Q7IluhhwTGlz6l8pXYv6a0m6hXxr1
base_url = 'http://api.map.baidu.com/place/v2/suggestion?query=R-QUERY&region=R-CITY&city_limit=true&output=json&ak=R-AK'
 
 
def fun_(city):
    for district in request_dic[city]:
        for request_name in request_dic[city][district]:
            request_name_chk = '%s%s%s' % (city, district, request_name)
            chk_if_requested_file()
            if request_name_chk in requested_file_list:
                continue
            ak = db_get_one_effective()
            if ak == DB_KEY_EXHAUST:
                print(DB_KEY_EXHAUST)
                break
            else:
                url_ = base_url.replace('R-QUERY', request_name).replace('R-CITY', city).replace('R-AK', ak)
            try:
                bd_res_json_str = requests.get(url_).text
                db_update_one_today_used(ak)
                write_requested_res(request_name_chk, bd_res_json_str)
            except Exception:
                bd_res_json_str = '请求百度-异常'
                write_requested_exception_res(request_name_chk, bd_res_json_str)
                print(bd_res_json_str)
 
 
class MyThread(threading.Thread):
    def __init__(self, func, args):
        threading.Thread.__init__(self)
        self.func, self.args = func, args
 
    def run(self):
        self.func(self.args)
 
 
thread_sum = len(target_city_list)
 
 
def main():
    threads_list = []
    for nloop in range(0, thread_sum, 1):
        city = target_city_list[nloop]
        thread_instance = MyThread(fun_, (city))
        threads_list.append(thread_instance)
    for t in threads_list:
        t.setDaemon = False
        t.start()
    for t in threads_list:
        t.join()
 
 
if __name__ == '__main__':
    main()

  

 

 

 

复制代码
import xlrd
import time
import sys
import os
import requests
import sqlite3
import threading

curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath)

MAX_USED_TIMES, overrun_str, DB_KEY_EXHAUST = 1900, '天配额超限,限制访问', 'DB_KEY_EXHAUST'

db = 'py_bdspider_status.db'
db = '%s\\%s' % (curPath, db)

pcity_list = []
pcity_file = '%s\\%s' % (curPath, '省会城市.txt')
with open(pcity_file, 'r', encoding='utf-8') as pf:
    c_ = 0
    for i in pf:
        c_ += 1
        if c_ == 3:
            c_ = 0
            pcity_list.append(i.replace(' ', '').replace('\n', '') + '')
pcity_sorted_list = sorted(pcity_list)

target_city_list_big = ['广州市', '厦门市', '深圳市', '北京市', '杭州市', '成都市', '上海市', '西安市']
target_city_list = []

for i in pcity_list:
    if i not in target_city_list_big:
        target_city_list.append(i)

# def db_init_key_table():
#     conn = sqlite3.connect(db)
#     c = conn.cursor()
#     sql = 'DELETE  FROM  baidu_map_key_used'
#     c.execute(sql)
#     conn.commit()
#     pcity_file = '%s\\%s' % (curPath, 'bdmap_key.txt')
#     with open(pcity_file, 'r', encoding='utf-8') as pf:
#         c_ = 0
#         for i in pf:
#             if len(i) < 4:
#                 continue
#             author, key = i.replace('\n', '').split('\t')
#             localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
#             sql = 'INSERT INTO baidu_map_key_used (author,key,update_time,today_used) VALUES ("%s","%s","%s",%s) ' % (
#                 author, key, localtime_, 0)
#             c.execute(sql)
#     conn.commit()
#     conn.close()


# db_init_key_table()
target_city_list = target_city_list[11:]


def db_get_one_effective():
    conn = sqlite3.connect(db)
    c = conn.cursor()
    sql = 'SELECT key FROM baidu_map_key_used WHERE today_used<=%s ' % (MAX_USED_TIMES)
    res = c.execute(sql).fetchone()
    if res is None:
        return DB_KEY_EXHAUST
    else:
        return res[0]
    conn.close


def db_update_one_today_used(key):
    conn = sqlite3.connect(db)
    c = conn.cursor()
    localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
    sql = 'UPDATE baidu_map_key_used SET today_used = today_used+1 ,update_time=%s WHERE key="%s" ' % (
        localtime_, key)
    c.execute(sql)
    conn.commit()
    conn.close()


dir_, dir_exception = 'baidu_map_uid', 'baidu_map_uid_exception'
requested_file_list = []
requested_file_dir_str, requested_file_dir_exception_str = '%s\\%s\\' % (curPath, dir_), '%s\\%s\\' % (
    curPath, dir_exception)
requested_file_dir = os.listdir(requested_file_dir_str)


def chk_if_requested_file():
    for f in requested_file_dir:
        to_in = f.split('.txt')[0]
        if to_in not in requested_file_list:
            requested_file_list.append(to_in)


chk_if_requested_file()


def write_requested_res(request_name, str_, type_='.txt'):
    fname = '%s%s%s' % (requested_file_dir_str, request_name, type_)
    # 上海市虹口区岳阳医院?.txt
    fname = fname.replace('?', '')
    with open(fname, 'w', encoding='utf-8') as ft:
        ft.write(str_)
    print('ok', threading.get_ident(), request_name)


def write_requested_exception_res(request_name, str_, type_='.txt'):
    fname = '%s%s%s' % (requested_file_dir_exception_str, request_name, type_)
    # 上海市虹口区岳阳医院?.txt
    fname = fname.replace('?', '')
    with open(fname, 'w', encoding='utf-8') as ft:
        ft.write(str_)


request_dic = {}


def gen_request_dic_list():
    fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'
    fname_open = '%s\\%s' % (curPath, fname_source)
    FEXCEL = '%s%s' % (fname_open, '.xlsx')
    data = xlrd.open_workbook(FEXCEL)
    table = data.sheets()[0]
    nrows, ncols = table.nrows, table.ncols
    for i in range(1, nrows):
        l = table.row_values(i)
        dbid, area_code, name_, request_name, type_, city, district, addr, street = l
        if city not in target_city_list:
            continue
        request_name_chk = '%s%s%s' % (city, district, request_name)
        if request_name_chk in requested_file_list:
            continue
        if city not in request_dic:
            request_dic[city] = {}
        if district not in request_dic[city]:
            request_dic[city][district] = {}
            request_dic[city][district] = []
        if request_name not in request_dic[city][district]:
            request_dic[city][district].append(request_name)


gen_request_dic_list()

fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'

# http://api.map.baidu.com/place/v2/suggestion?query=瀛嘉天下&region=重庆市&city_limit=true&output=json&ak=oy2Q7IluhhwTGlz6l8pXYv6a0m6hXxr1
base_url = 'http://api.map.baidu.com/place/v2/suggestion?query=R-QUERY&region=R-CITY&city_limit=true&output=json&ak=R-AK'


def fun_(city):
    for district in request_dic[city]:
        for request_name in request_dic[city][district]:
            request_name_chk = '%s%s%s' % (city, district, request_name)
            chk_if_requested_file()
            if request_name_chk in requested_file_list:
                continue
            ak = db_get_one_effective()
            if ak == DB_KEY_EXHAUST:
                print(DB_KEY_EXHAUST)
                break
            else:
                url_ = base_url.replace('R-QUERY', request_name).replace('R-CITY', city).replace('R-AK', ak)
            try:
                bd_res_json_str = requests.get(url_).text
                db_update_one_today_used(ak)
                write_requested_res(request_name_chk, bd_res_json_str)
            except Exception:
                bd_res_json_str = '请求百度-异常'
                write_requested_exception_res(request_name_chk, bd_res_json_str)
                print(bd_res_json_str)


class MyThread(threading.Thread):
    def __init__(self, func, args):
        threading.Thread.__init__(self)
        self.func, self.args = func, args

    def run(self):
        self.func(self.args)


thread_sum = len(target_city_list)


def main():
    threads_list = []
    for nloop in range(0, thread_sum, 1):
        city = target_city_list[nloop]
        if city not in request_dic:
            continue
        thread_instance = MyThread(fun_, (city))
        threads_list.append(thread_instance)
    for t in threads_list:
        t.setDaemon = False
        t.start()
    for t in threads_list:
        t.join()


if __name__ == '__main__':
    main()
复制代码

 

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
import xlrd
import time
import sys
import os
import requests
import sqlite3
import threading
 
curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath)
 
MAX_USED_TIMES, overrun_str, DB_KEY_EXHAUST = 1900, '天配额超限,限制访问', 'DB_KEY_EXHAUST'
 
db = 'py_bdspider_status.db'
db = '%s\\%s' % (curPath, db)
 
pcity_list = []
pcity_file = '%s\\%s' % (curPath, '省会城市.txt')
with open(pcity_file, 'r', encoding='utf-8') as pf:
    c_ = 0
    for i in pf:
        c_ += 1
        if c_ == 3:
            c_ = 0
            pcity_list.append(i.replace(' ', '').replace('\n', '') + '市')
pcity_sorted_list = sorted(pcity_list)
 
target_city_list_big = ['广州市', '厦门市', '深圳市', '北京市', '杭州市', '成都市', '上海市', '西安市']
target_city_list = []
 
for i in pcity_list:
    if i not in target_city_list_big:
        target_city_list.append(i)
 
# def db_init_key_table():
#     conn = sqlite3.connect(db)
#     c = conn.cursor()
#     sql = 'DELETE  FROM  baidu_map_key_used'
#     c.execute(sql)
#     conn.commit()
#     pcity_file = '%s\\%s' % (curPath, 'bdmap_key.txt')
#     with open(pcity_file, 'r', encoding='utf-8') as pf:
#         c_ = 0
#         for i in pf:
#             if len(i) < 4:
#                 continue
#             author, key = i.replace('\n', '').split('\t')
#             localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
#             sql = 'INSERT INTO baidu_map_key_used (author,key,update_time,today_used) VALUES ("%s","%s","%s",%s) ' % (
#                 author, key, localtime_, 0)
#             c.execute(sql)
#     conn.commit()
#     conn.close()
 
 
# db_init_key_table()
# target_city_list = target_city_list[0:11]
# target_city_list = target_city_list[0:11]
target_city_list =target_city_list[11:]
 
def db_get_one_effective():
    conn = sqlite3.connect(db)
    c = conn.cursor()
    sql = 'SELECT key FROM baidu_map_key_used WHERE today_used<=%s ORDER BY today_used ASC' % (MAX_USED_TIMES)
 
    res = c.execute(sql).fetchone()
    if res is None:
        return DB_KEY_EXHAUST
    else:
        return res[0]
    conn.close
 
 
def db_update_one_today_used(key):
    conn = sqlite3.connect(db)
    c = conn.cursor()
    localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
    sql = 'UPDATE baidu_map_key_used SET today_used = today_used+1 ,update_time=%s WHERE key="%s" ' % (
        localtime_, key)
    c.execute(sql)
    conn.commit()
    conn.close()
 
 
dir_, dir_exception = 'baidu_map_uid', 'baidu_map_uid_exception'
requested_file_list = []
requested_file_dir_str, requested_file_dir_exception_str = '%s\\%s\\' % (curPath, dir_), '%s\\%s\\' % (
    curPath, dir_exception)
requested_file_dir = os.listdir(requested_file_dir_str)
 
 
def chk_if_requested_file():
    for f in requested_file_dir:
        to_in = f.split('.txt')[0]
        if to_in not in requested_file_list:
            requested_file_list.append(to_in)
 
 
chk_if_requested_file()
 
 
def write_requested_res(request_name, str_, type_='.txt'):
    fname = '%s%s%s' % (requested_file_dir_str, request_name, type_)
    # 上海市虹口区岳阳医院?.txt
    fname = fname.replace('?', '')
    with open(fname, 'w', encoding='utf-8') as ft:
        ft.write(str_)
    print('ok', threading.get_ident(), request_name)
 
 
def write_requested_exception_res(request_name, str_, type_='.txt'):
    fname = '%s%s%s' % (requested_file_dir_exception_str, request_name, type_)
    # 上海市虹口区岳阳医院?.txt
    fname = fname.replace('?', '')
    with open(fname, 'w', encoding='utf-8') as ft:
        ft.write(str_)
 
 
request_dic = {}
 
 
def gen_request_dic_list():
    fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'
    fname_open = '%s\\%s' % (curPath, fname_source)
    FEXCEL = '%s%s' % (fname_open, '.xlsx')
    data = xlrd.open_workbook(FEXCEL)
    table = data.sheets()[0]
    nrows, ncols = table.nrows, table.ncols
    for i in range(1, nrows):
        l = table.row_values(i)
        dbid, area_code, name_, request_name, type_, city, district, addr, street = l
        if city not in target_city_list:
            continue
        request_name_chk = '%s%s%s' % (city, district, request_name)
        if request_name_chk in requested_file_list:
            continue
        if city not in request_dic:
            request_dic[city] = {}
        if district not in request_dic[city]:
            request_dic[city][district] = {}
            request_dic[city][district] = []
        if request_name not in request_dic[city][district]:
            request_dic[city][district].append(request_name)
 
 
gen_request_dic_list()
 
fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'
 
# http://api.map.baidu.com/place/v2/suggestion?query=瀛嘉天下&region=重庆市&city_limit=true&output=json&ak=oy2Q7IluhhwTGlz6l8pXYv6a0m6hXxr1
base_url = 'http://api.map.baidu.com/place/v2/suggestion?query=R-QUERY&region=R-CITY&city_limit=true&output=json&ak=R-AK'
 
 
def fun_(city):
    for district in request_dic[city]:
        for request_name in request_dic[city][district]:
            request_name_chk = '%s%s%s' % (city, district, request_name)
            chk_if_requested_file()
            if request_name_chk in requested_file_list:
                continue
            ak = db_get_one_effective()
            if ak == DB_KEY_EXHAUST:
                print(DB_KEY_EXHAUST)
                break
            else:
                url_ = base_url.replace('R-QUERY', request_name).replace('R-CITY', city).replace('R-AK', ak)
            try:
                bd_res_json_str = requests.get(url_).text
                db_update_one_today_used(ak)
                write_requested_res(request_name_chk, bd_res_json_str)
            except Exception:
                bd_res_json_str = '请求百度-异常'
                write_requested_exception_res(request_name_chk, bd_res_json_str)
                print(bd_res_json_str)
 
 
class MyThread(threading.Thread):
    def __init__(self, func, args):
        threading.Thread.__init__(self)
        self.func, self.args = func, args
 
    def run(self):
        self.func(self.args)
 
 
thread_sum = len(target_city_list)
 
 
def main():
    threads_list = []
    for nloop in range(0, thread_sum, 1):
        city = target_city_list[nloop]
        thread_instance = MyThread(fun_, (city))
        threads_list.append(thread_instance)
    for t in threads_list:
        t.setDaemon = False
        t.start()
    for t in threads_list:
        t.join()
 
 
if __name__ == '__main__':
    main()

  

posted @   papering  阅读(311)  评论(0编辑  收藏  举报
编辑推荐:
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
阅读排行:
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· DeepSeek在M芯片Mac上本地化部署
历史上的今天:
2016-08-29 three levels of abstraction
2016-08-29 Copying and Cloning Objects
点击右上角即可分享
微信分享提示