demo

import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

class tieba():
    def request(self,url):
       headers={"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"}
       content=requests.get(url,headers=headers)
       return content

    def begain_page(self,url):
        html=self.request(url)
        html = html.text
        b = BeautifulSoup(html, "lxml")
        links = b.select('.thread-name-wraper a')
        for link in links:
            href=link['href']
            full_url=urljoin('https://tieba.baidu.com/',href)
            self.second_page(full_url)

    def second_page(self,full_url):
        page=self.request(full_url)
        html=page.text
        data=BeautifulSoup(html,"lxml")
        title = data.select('.j_lzl_c_b_a core_reply_content')[0].get_text()
        print(title)

TIEBA=tieba()
TIEBA.begain_page('https://tieba.baidu.com/')

 

posted @ 2017-11-27 16:58  不可叽叽歪歪  阅读(135)  评论(0编辑  收藏  举报