骑行入门

对每一个骑者的入门进行推荐,爬虫:

 

 

import re
import requests
from bs4 import BeautifulSoup
import csv

def paqu():
    f = open('../../Documents/Tencent Files/631836111/FileRecv/骑行入门.csv', 'a+', encoding='utf-8', newline='')
    csv_writer = csv.writer(f)
    csv_writer.writerow(["题目", "内容链接", "详细内容"])
    headers = {
            'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36',
        }
    link = 'http://www.biketo.com/beginnerguide/'
    r = requests.get(link, headers=headers, timeout=10)
    soup = BeautifulSoup(r.text, "lxml")
    li_list = soup.find_all('li')[4:140]
    for each in li_list:
        txt=each.find('a').string
        href=each.find('a')['href']
        print(txt + "      " + href)
        Response = requests.get(href, headers=headers, timeout=10)
        soup1 = BeautifulSoup(Response.text, "lxml")
        div_list = soup1.find_all('p', style='text-indent:2em;')
        #div_list= soup1.find_all('div',class_='article-content')

        txt1=[]
        for each1 in div_list:
            if(each1.string is not None):
                txt1.append(each1.string)
        #print(*txt1)
        csv_writer.writerow([txt, href,*txt1])

if __name__ == '__main__':
    paqu()

获得的数据:

 

posted @ 2021-03-14 21:39  忒儿  阅读(76)  评论(0编辑  收藏  举报