from lxml import etree
import requests
#爬取所有城市名称
if __name__ == '__main__':
url = 'https://www.aqistudy.cn/historydata/'
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
}
response= requests.get(url=url, headers=headers)
# response.encoding= 'utf-8'
page_text = response.text
tree = etree.HTML(page_text)
print(tree)
fp = open('city.txt','w',encoding='utf-8')
all_city = []
top_li_list = tree.xpath('//div[@class="hot"]//ul/li')
all_li_list = tree.xpath('//div[@class="all"]//ul/div[2]/li')
for li in top_li_list:
data = li.xpath('./a/text()')[0]
all_city.append(data)
for li in all_li_list:
data = li.xpath('./a/text()')[0]
all_city.append(data)
for city in all_city:
fp.write(city+'\n')