python之简单页面爬取

复制代码
import requests
import re
import bs4
import pandas as pd
import csv

a = []


url = "http://www.tianqihoubao.com/lishi/shijiazhuang/month/202004.html"

r = requests.get(url)

soup = bs4.BeautifulSoup(r.text,"html.parser")

datas_tr = soup.find_all("tr")

head = 0
for data_tr in datas_tr:
    b = []
    if head == 0:
        datas_b = data_tr.find_all("b")
        for data_b in datas_b:
            print(data_b.string)
            b.append(data_b.string)
            head=head+1

        print(b)
        print("*"*20)
    else:
        i = 0
        datas_td = data_tr.find_all("td")
        for data_td in datas_td:
            if i == 0:
                print(data_td.find("a").text)
                i=i+1
                value=data_td.find("a").text
                value=value.replace("\n","")
                value=value.replace("\r","")
                value=value.replace(" ","")
                b.append(value)
            else:
                print(data_td.string)
                value=data_td.string
                value=value.replace("\n","")
                value=value.replace("\r","")
                value=value.replace(" ","")
                b.append(value)

        print(b)
        print("*"*20)
    a.append(b)
    b=None
print(a)
print("爬取完毕,向csv输入ing")

with open('D:\\test.csv',"w",newline='') as csvfile: 
    writer = csv.writer(csvfile)
    for i in a:
        print(i[0]+i[1]+i[2]+i[3])
        writer.writerow([i[0],i[1],i[2],i[3]])
复制代码

 

posted @   子过杨梅  阅读(22)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· ollama系列01:轻松3步本地部署deepseek,普通电脑可用
· 25岁的心里话
· 按钮权限的设计及实现
点击右上角即可分享
微信分享提示