1 2 3 4 5 6 7 8 9 10 11 12 13 14 | python D:\pymine\clean\spider_map\get_bd_uid_rest_b.py python D:\pymine\clean\spider_map\get_bd_uid_rest.py python D:\pymine\clean\spider_map\get_bd_uid_28_other20_b.py #MAX_USED_TIMES = 1900 python D:\pymine\clean\spider_map\get_bd_uid_28_other20.py python D:\pymine\clean\spider_map\get_bd_uid.py python D:\pymine\clean\spider_map\get_bd_uid.py python D:\pymine\clean\spider_map\get_bd_uid.py |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 | import xlrd import time import sys import os import requests import sqlite3 import threading curPath = os.path.abspath(os.path.dirname(__file__)) rootPath = os.path.split(curPath)[ 0 ] sys.path.append(rootPath) MAX_USED_TIMES, overrun_str, DB_KEY_EXHAUST = 1900 , '天配额超限,限制访问' , 'DB_KEY_EXHAUST' db = 'py_bdspider_status.db' db = '%s\\%s' % (curPath, db) pcity_list = [] pcity_file = '%s\\%s' % (curPath, '省会城市.txt' ) with open (pcity_file, 'r' , encoding = 'utf-8' ) as pf: c_ = 0 for i in pf: c_ + = 1 if c_ = = 3 : c_ = 0 pcity_list.append(i.replace( ' ' , ' ').replace(' \n ', ' ') + ' 市') pcity_sorted_list = sorted (pcity_list) target_city_list_big = [ '广州市' , '厦门市' , '深圳市' , '北京市' , '杭州市' , '成都市' , '上海市' , '西安市' ] target_city_list_pass = target_city_list_big for i in pcity_list: if i not in target_city_list_big: target_city_list_pass.append(i) # def db_init_key_table(): # conn = sqlite3.connect(db) # c = conn.cursor() # sql = 'DELETE FROM baidu_map_key_used' # c.execute(sql) # conn.commit() # pcity_file = '%s\\%s' % (curPath, 'bdmap_key.txt') # with open(pcity_file, 'r', encoding='utf-8') as pf: # c_ = 0 # for i in pf: # if len(i) < 4: # continue # author, key = i.replace('\n', '').split('\t') # localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime()) # sql = 'INSERT INTO baidu_map_key_used (author,key,update_time,today_used) VALUES ("%s","%s","%s",%s) ' % ( # author, key, localtime_, 0) # c.execute(sql) # conn.commit() # conn.close() # db_init_key_table() # target_city_list = target_city_list[0:11] # target_city_list = target_city_list[0:11] def db_get_one_effective(): conn = sqlite3.connect(db) c = conn.cursor() sql = 'SELECT key FROM baidu_map_key_used WHERE today_used<=%s ORDER BY today_used ASC' % (MAX_USED_TIMES) res = c.execute(sql).fetchone() if res is None : return DB_KEY_EXHAUST else : return res[ 0 ] conn.close def db_update_one_today_used(key): conn = sqlite3.connect(db) c = conn.cursor() localtime_ = time.strftime( "%y%m%d%H%M%S" , time.localtime()) sql = 'UPDATE baidu_map_key_used SET today_used = today_used+1 ,update_time=%s WHERE key="%s" ' % ( localtime_, key) c.execute(sql) conn.commit() conn.close() dir_, dir_exception = 'baidu_map_uid' , 'baidu_map_uid_exception' requested_file_list = [] requested_file_dir_str, requested_file_dir_exception_str = '%s\\%s\\' % (curPath, dir_), ' % s\\ % s\\' % ( curPath, dir_exception) requested_file_dir = os.listdir(requested_file_dir_str) def chk_if_requested_file(): for f in requested_file_dir: to_in = f.split( '.txt' )[ 0 ] if to_in not in requested_file_list: requested_file_list.append(to_in) chk_if_requested_file() def write_requested_res(request_name, str_, type_ = '.txt' ): fname = '%s%s%s' % (requested_file_dir_str, request_name, type_) # 上海市虹口区岳阳医院?.txt fname = fname.replace( '?' , '') with open (fname, 'w' , encoding = 'utf-8' ) as ft: ft.write(str_) print ( 'ok' , threading.get_ident(), request_name) def write_requested_exception_res(request_name, str_, type_ = '.txt' ): fname = '%s%s%s' % (requested_file_dir_exception_str, request_name, type_) # 上海市虹口区岳阳医院?.txt fname = fname.replace( '?' , '') with open (fname, 'w' , encoding = 'utf-8' ) as ft: ft.write(str_) request_dic = {} target_city_list = [] def gen_request_dic_list(): fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821' fname_open = '%s\\%s' % (curPath, fname_source) FEXCEL = '%s%s' % (fname_open, '.xlsx' ) data = xlrd.open_workbook(FEXCEL) table = data.sheets()[ 0 ] nrows, ncols = table.nrows, table.ncols for i in range ( 1 , nrows): l = table.row_values(i) dbid, area_code, name_, request_name, type_, city, district, addr, street = l # if city not in target_city_list: # continue if city in target_city_list_pass: continue if city not in target_city_list: target_city_list.append(city) request_name_chk = '%s%s%s' % (city, district, request_name) if request_name_chk in requested_file_list: continue if city not in request_dic: request_dic[city] = {} if district not in request_dic[city]: request_dic[city][district] = {} request_dic[city][district] = [] if request_name not in request_dic[city][district]: request_dic[city][district].append(request_name) gen_request_dic_list() fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821' # http://api.map.baidu.com/place/v2/suggestion?query=瀛嘉天下®ion=重庆市&city_limit=true&output=json&ak=oy2Q7IluhhwTGlz6l8pXYv6a0m6hXxr1 base_url = 'http://api.map.baidu.com/place/v2/suggestion?query=R-QUERY®ion=R-CITY&city_limit=true&output=json&ak=R-AK' def fun_(city): for district in request_dic[city]: for request_name in request_dic[city][district]: request_name_chk = '%s%s%s' % (city, district, request_name) chk_if_requested_file() if request_name_chk in requested_file_list: continue ak = db_get_one_effective() if ak = = DB_KEY_EXHAUST: print (DB_KEY_EXHAUST) break else : url_ = base_url.replace( 'R-QUERY' , request_name).replace( 'R-CITY' , city).replace( 'R-AK' , ak) try : bd_res_json_str = requests.get(url_).text db_update_one_today_used(ak) write_requested_res(request_name_chk, bd_res_json_str) except Exception: bd_res_json_str = '请求百度-异常' write_requested_exception_res(request_name_chk, bd_res_json_str) print (bd_res_json_str) class MyThread(threading.Thread): def __init__( self , func, args): threading.Thread.__init__( self ) self .func, self .args = func, args def run( self ): self .func( self .args) thread_sum = len (target_city_list) def main(): threads_list = [] for nloop in range ( 0 , thread_sum, 1 ): city = target_city_list[nloop] thread_instance = MyThread(fun_, (city)) threads_list.append(thread_instance) for t in threads_list: t.setDaemon = False t.start() for t in threads_list: t.join() if __name__ = = '__main__' : main() |
import xlrd import time import sys import os import requests import sqlite3 import threading curPath = os.path.abspath(os.path.dirname(__file__)) rootPath = os.path.split(curPath)[0] sys.path.append(rootPath) MAX_USED_TIMES, overrun_str, DB_KEY_EXHAUST = 1900, '天配额超限,限制访问', 'DB_KEY_EXHAUST' db = 'py_bdspider_status.db' db = '%s\\%s' % (curPath, db) pcity_list = [] pcity_file = '%s\\%s' % (curPath, '省会城市.txt') with open(pcity_file, 'r', encoding='utf-8') as pf: c_ = 0 for i in pf: c_ += 1 if c_ == 3: c_ = 0 pcity_list.append(i.replace(' ', '').replace('\n', '') + '市') pcity_sorted_list = sorted(pcity_list) target_city_list_big = ['广州市', '厦门市', '深圳市', '北京市', '杭州市', '成都市', '上海市', '西安市'] target_city_list = [] for i in pcity_list: if i not in target_city_list_big: target_city_list.append(i) # def db_init_key_table(): # conn = sqlite3.connect(db) # c = conn.cursor() # sql = 'DELETE FROM baidu_map_key_used' # c.execute(sql) # conn.commit() # pcity_file = '%s\\%s' % (curPath, 'bdmap_key.txt') # with open(pcity_file, 'r', encoding='utf-8') as pf: # c_ = 0 # for i in pf: # if len(i) < 4: # continue # author, key = i.replace('\n', '').split('\t') # localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime()) # sql = 'INSERT INTO baidu_map_key_used (author,key,update_time,today_used) VALUES ("%s","%s","%s",%s) ' % ( # author, key, localtime_, 0) # c.execute(sql) # conn.commit() # conn.close() # db_init_key_table() target_city_list = target_city_list[11:] def db_get_one_effective(): conn = sqlite3.connect(db) c = conn.cursor() sql = 'SELECT key FROM baidu_map_key_used WHERE today_used<=%s ' % (MAX_USED_TIMES) res = c.execute(sql).fetchone() if res is None: return DB_KEY_EXHAUST else: return res[0] conn.close def db_update_one_today_used(key): conn = sqlite3.connect(db) c = conn.cursor() localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime()) sql = 'UPDATE baidu_map_key_used SET today_used = today_used+1 ,update_time=%s WHERE key="%s" ' % ( localtime_, key) c.execute(sql) conn.commit() conn.close() dir_, dir_exception = 'baidu_map_uid', 'baidu_map_uid_exception' requested_file_list = [] requested_file_dir_str, requested_file_dir_exception_str = '%s\\%s\\' % (curPath, dir_), '%s\\%s\\' % ( curPath, dir_exception) requested_file_dir = os.listdir(requested_file_dir_str) def chk_if_requested_file(): for f in requested_file_dir: to_in = f.split('.txt')[0] if to_in not in requested_file_list: requested_file_list.append(to_in) chk_if_requested_file() def write_requested_res(request_name, str_, type_='.txt'): fname = '%s%s%s' % (requested_file_dir_str, request_name, type_) # 上海市虹口区岳阳医院?.txt fname = fname.replace('?', '') with open(fname, 'w', encoding='utf-8') as ft: ft.write(str_) print('ok', threading.get_ident(), request_name) def write_requested_exception_res(request_name, str_, type_='.txt'): fname = '%s%s%s' % (requested_file_dir_exception_str, request_name, type_) # 上海市虹口区岳阳医院?.txt fname = fname.replace('?', '') with open(fname, 'w', encoding='utf-8') as ft: ft.write(str_) request_dic = {} def gen_request_dic_list(): fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821' fname_open = '%s\\%s' % (curPath, fname_source) FEXCEL = '%s%s' % (fname_open, '.xlsx') data = xlrd.open_workbook(FEXCEL) table = data.sheets()[0] nrows, ncols = table.nrows, table.ncols for i in range(1, nrows): l = table.row_values(i) dbid, area_code, name_, request_name, type_, city, district, addr, street = l if city not in target_city_list: continue request_name_chk = '%s%s%s' % (city, district, request_name) if request_name_chk in requested_file_list: continue if city not in request_dic: request_dic[city] = {} if district not in request_dic[city]: request_dic[city][district] = {} request_dic[city][district] = [] if request_name not in request_dic[city][district]: request_dic[city][district].append(request_name) gen_request_dic_list() fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821' # http://api.map.baidu.com/place/v2/suggestion?query=瀛嘉天下®ion=重庆市&city_limit=true&output=json&ak=oy2Q7IluhhwTGlz6l8pXYv6a0m6hXxr1 base_url = 'http://api.map.baidu.com/place/v2/suggestion?query=R-QUERY®ion=R-CITY&city_limit=true&output=json&ak=R-AK' def fun_(city): for district in request_dic[city]: for request_name in request_dic[city][district]: request_name_chk = '%s%s%s' % (city, district, request_name) chk_if_requested_file() if request_name_chk in requested_file_list: continue ak = db_get_one_effective() if ak == DB_KEY_EXHAUST: print(DB_KEY_EXHAUST) break else: url_ = base_url.replace('R-QUERY', request_name).replace('R-CITY', city).replace('R-AK', ak) try: bd_res_json_str = requests.get(url_).text db_update_one_today_used(ak) write_requested_res(request_name_chk, bd_res_json_str) except Exception: bd_res_json_str = '请求百度-异常' write_requested_exception_res(request_name_chk, bd_res_json_str) print(bd_res_json_str) class MyThread(threading.Thread): def __init__(self, func, args): threading.Thread.__init__(self) self.func, self.args = func, args def run(self): self.func(self.args) thread_sum = len(target_city_list) def main(): threads_list = [] for nloop in range(0, thread_sum, 1): city = target_city_list[nloop] if city not in request_dic: continue thread_instance = MyThread(fun_, (city)) threads_list.append(thread_instance) for t in threads_list: t.setDaemon = False t.start() for t in threads_list: t.join() if __name__ == '__main__': main()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 | import xlrd import time import sys import os import requests import sqlite3 import threading curPath = os.path.abspath(os.path.dirname(__file__)) rootPath = os.path.split(curPath)[ 0 ] sys.path.append(rootPath) MAX_USED_TIMES, overrun_str, DB_KEY_EXHAUST = 1900 , '天配额超限,限制访问' , 'DB_KEY_EXHAUST' db = 'py_bdspider_status.db' db = '%s\\%s' % (curPath, db) pcity_list = [] pcity_file = '%s\\%s' % (curPath, '省会城市.txt' ) with open (pcity_file, 'r' , encoding = 'utf-8' ) as pf: c_ = 0 for i in pf: c_ + = 1 if c_ = = 3 : c_ = 0 pcity_list.append(i.replace( ' ' , ' ').replace(' \n ', ' ') + ' 市') pcity_sorted_list = sorted (pcity_list) target_city_list_big = [ '广州市' , '厦门市' , '深圳市' , '北京市' , '杭州市' , '成都市' , '上海市' , '西安市' ] target_city_list = [] for i in pcity_list: if i not in target_city_list_big: target_city_list.append(i) # def db_init_key_table(): # conn = sqlite3.connect(db) # c = conn.cursor() # sql = 'DELETE FROM baidu_map_key_used' # c.execute(sql) # conn.commit() # pcity_file = '%s\\%s' % (curPath, 'bdmap_key.txt') # with open(pcity_file, 'r', encoding='utf-8') as pf: # c_ = 0 # for i in pf: # if len(i) < 4: # continue # author, key = i.replace('\n', '').split('\t') # localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime()) # sql = 'INSERT INTO baidu_map_key_used (author,key,update_time,today_used) VALUES ("%s","%s","%s",%s) ' % ( # author, key, localtime_, 0) # c.execute(sql) # conn.commit() # conn.close() # db_init_key_table() # target_city_list = target_city_list[0:11] # target_city_list = target_city_list[0:11] target_city_list = target_city_list[ 11 :] def db_get_one_effective(): conn = sqlite3.connect(db) c = conn.cursor() sql = 'SELECT key FROM baidu_map_key_used WHERE today_used<=%s ORDER BY today_used ASC' % (MAX_USED_TIMES) res = c.execute(sql).fetchone() if res is None : return DB_KEY_EXHAUST else : return res[ 0 ] conn.close def db_update_one_today_used(key): conn = sqlite3.connect(db) c = conn.cursor() localtime_ = time.strftime( "%y%m%d%H%M%S" , time.localtime()) sql = 'UPDATE baidu_map_key_used SET today_used = today_used+1 ,update_time=%s WHERE key="%s" ' % ( localtime_, key) c.execute(sql) conn.commit() conn.close() dir_, dir_exception = 'baidu_map_uid' , 'baidu_map_uid_exception' requested_file_list = [] requested_file_dir_str, requested_file_dir_exception_str = '%s\\%s\\' % (curPath, dir_), ' % s\\ % s\\' % ( curPath, dir_exception) requested_file_dir = os.listdir(requested_file_dir_str) def chk_if_requested_file(): for f in requested_file_dir: to_in = f.split( '.txt' )[ 0 ] if to_in not in requested_file_list: requested_file_list.append(to_in) chk_if_requested_file() def write_requested_res(request_name, str_, type_ = '.txt' ): fname = '%s%s%s' % (requested_file_dir_str, request_name, type_) # 上海市虹口区岳阳医院?.txt fname = fname.replace( '?' , '') with open (fname, 'w' , encoding = 'utf-8' ) as ft: ft.write(str_) print ( 'ok' , threading.get_ident(), request_name) def write_requested_exception_res(request_name, str_, type_ = '.txt' ): fname = '%s%s%s' % (requested_file_dir_exception_str, request_name, type_) # 上海市虹口区岳阳医院?.txt fname = fname.replace( '?' , '') with open (fname, 'w' , encoding = 'utf-8' ) as ft: ft.write(str_) request_dic = {} def gen_request_dic_list(): fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821' fname_open = '%s\\%s' % (curPath, fname_source) FEXCEL = '%s%s' % (fname_open, '.xlsx' ) data = xlrd.open_workbook(FEXCEL) table = data.sheets()[ 0 ] nrows, ncols = table.nrows, table.ncols for i in range ( 1 , nrows): l = table.row_values(i) dbid, area_code, name_, request_name, type_, city, district, addr, street = l if city not in target_city_list: continue request_name_chk = '%s%s%s' % (city, district, request_name) if request_name_chk in requested_file_list: continue if city not in request_dic: request_dic[city] = {} if district not in request_dic[city]: request_dic[city][district] = {} request_dic[city][district] = [] if request_name not in request_dic[city][district]: request_dic[city][district].append(request_name) gen_request_dic_list() fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821' # http://api.map.baidu.com/place/v2/suggestion?query=瀛嘉天下®ion=重庆市&city_limit=true&output=json&ak=oy2Q7IluhhwTGlz6l8pXYv6a0m6hXxr1 base_url = 'http://api.map.baidu.com/place/v2/suggestion?query=R-QUERY®ion=R-CITY&city_limit=true&output=json&ak=R-AK' def fun_(city): for district in request_dic[city]: for request_name in request_dic[city][district]: request_name_chk = '%s%s%s' % (city, district, request_name) chk_if_requested_file() if request_name_chk in requested_file_list: continue ak = db_get_one_effective() if ak = = DB_KEY_EXHAUST: print (DB_KEY_EXHAUST) break else : url_ = base_url.replace( 'R-QUERY' , request_name).replace( 'R-CITY' , city).replace( 'R-AK' , ak) try : bd_res_json_str = requests.get(url_).text db_update_one_today_used(ak) write_requested_res(request_name_chk, bd_res_json_str) except Exception: bd_res_json_str = '请求百度-异常' write_requested_exception_res(request_name_chk, bd_res_json_str) print (bd_res_json_str) class MyThread(threading.Thread): def __init__( self , func, args): threading.Thread.__init__( self ) self .func, self .args = func, args def run( self ): self .func( self .args) thread_sum = len (target_city_list) def main(): threads_list = [] for nloop in range ( 0 , thread_sum, 1 ): city = target_city_list[nloop] thread_instance = MyThread(fun_, (city)) threads_list.append(thread_instance) for t in threads_list: t.setDaemon = False t.start() for t in threads_list: t.join() if __name__ = = '__main__' : main() |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· DeepSeek在M芯片Mac上本地化部署
2016-08-29 three levels of abstraction
2016-08-29 Copying and Cloning Objects