BeautifulSoup

from bs4 import BeautifulSoup
import urllib2

html = urllib2.urlopen('http://tieba.baidu.com/p/5058456989')
bsobj = BeautifulSoup(html.read(), "html.parser")  # 不加"html.parser"会有警告。。。。
print bsobj.title

underline = '-'*100

def get_title(url):
    try:
        html = urllib2.urlopen(url)
    except HTTPError, e:
        raise e
        return None
    try:
        bsobj = BeautifulSoup(html.read(), "html.parser")
        title = bsobj
    except AttributeError, e:
        raise e
        return None
    return title

url = 'http://tieba.baidu.com/p/4420237089?see_lz=1'
title = get_title(url)
if title is None:
    print 'title is none'
else:
    print underline
    # print title
tmp = title.findAll("div", {"class": "d_post_content j_d_post_content "})
vmp = title.findAll("span", {"class": "tail-info"})
# for v in vmp.tr.next_siblings:
#     print v
for val, f in zip(tmp, vmp[1:-1:3]):
    print val.get_text()
    print f.get_text(), underline

posted @ 2017-04-14 15:42 好奇的小明阅读(200) 评论(0) 收藏举报

刷新页面返回顶部

走起走起

BeautifulSoup

公告