爬取所有城市名称

from lxml import etree
import requests
#爬取所有城市名称
if __name__ == '__main__':

    url = 'https://www.aqistudy.cn/historydata/'
    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
    }
    response= requests.get(url=url, headers=headers)
    # response.encoding= 'utf-8'
    page_text = response.text
    tree = etree.HTML(page_text)
    print(tree)
    fp = open('city.txt','w',encoding='utf-8')
    all_city = []
    top_li_list = tree.xpath('//div[@class="hot"]//ul/li')
    all_li_list = tree.xpath('//div[@class="all"]//ul/div[2]/li')
    for li in top_li_list:
        data = li.xpath('./a/text()')[0]
        all_city.append(data)
    for li in all_li_list:
        data = li.xpath('./a/text()')[0]
        all_city.append(data)
    for city in all_city:
        fp.write(city+'\n')
posted @ 2024-02-26 21:23  会秃头的小白  阅读(13)  评论(0编辑  收藏  举报