effect request
from bs4 import BeautifulSoup import os filepath = 'D:\\pymine\\clean\\spider_map\\baidu_map_html_firstpage_pc_test\\' pathDir = os.listdir(filepath) name_poi_dic = {} need_todo_request = ['搜索结果'] no_list = ['全国范围内未找到相关地点', '共找到0个搜索结果'] bd_no_this_name_str = '百度对此条无结果' #未找到结果,为您提供"大兴店"的搜索结果 for allDir in pathDir: child = os.path.join('%s%s' % (filepath, allDir)) if child.find('&')>-1 or child.find('170')>-1: os.remove(child) requested_file = child.split('baidu_map_html_firstpage_pc')[1].split('&')[0].split('.html')[0].replace('\\', '') name_poi_dic[requested_file] = {} name_poi_dic[requested_file]['poi_list'] = [] mybytes = open(child, 'r', encoding='utf-8') soup = BeautifulSoup(mybytes, "lxml") soup_text = soup.text if soup_text.find('全国范围内未找到') > -1: name_poi_dic[requested_file]['poi_list'].append(bd_no_this_name_str) elif soup_text.find('商户免费标注') > -1: name_l, addr_l = soup.find_all(class_='n-blue'), soup.find_all(class_='n-grey') len_, len_addr = len(name_l), len(addr_l) for index_ in range(0, len_, 1): dic_ = {} if index_ < len_addr: dic_['name'], dic_['addr'] = name_l[index_].text, addr_l[index_].text else: dic_['name'], dic_['addr'] = name_l[index_].text, '百度此处无地址' name_poi_dic[requested_file]['poi_list'].append(dic_) elif soup_text.find('m.hao123.com') > -1: name_l, addr_l = soup.find_all(class_='text-ellipsis -ft-primary -ft-large'), soup.find_all( class_='dis-inf text-ellipsis -col-auto') # len_ =min(len(name_l),len(addr_l)) len_, len_addr = len(name_l), len(addr_l) for index_ in range(0, len_, 1): dic_ = {} if index_ < len_addr: dic_['name'], dic_['addr'] = name_l[index_].text.split('.')[1], addr_l[index_].text else: dic_['name'], dic_['addr'] = name_l[index_].text.split('.')[1], '百度此处无地址' name_poi_dic[requested_file]['poi_list'].append(dic_) elif soup_text.find('地址:') > -1: dic_ = {} dic_['name'], dic_['addr'] = soup.find_all('td').text.split('地址:') name_poi_dic[requested_file]['poi_list'].append(dic_) else: mybytes.close() os.remove(child) print('TODO', requested_file)