python bs4

# -*- coding: UTF-8 -*-
#爬虫

import urllib2
#import bs4
import re
import sys
from bs4 import BeautifulSoup
# import time
reload(sys)
sys.setdefaultencoding("utf-8")


#html=urllib2.urlopen("http://121.196.21.238/report.html")
#html=urllib2.urlopen("http://image.baidu.com/")

with open("report.html") as f:
    content = f.read()

bsObj = BeautifulSoup(content,features='html.parser')

print bsObj.prettify()

#打印title
# print bsObj.title

# #<a href="http://baijiahao.baidu.com/s?id=1665731690282269956" target="_blank" mon="p=1&amp;a=1&amp;pn=1">G15沈海高速轿车起火 现场火光冲天</a>
# titlist  = bsObj.findAll("a", {"target":"_blank"})


# #下面这两个功能是一样的
# bsObj.findAll(id="text")
# bsObj.findAll("", {"id":"text"})

# for x in titlist:
#     print x.get_text()

# #http://t8.baidu.com/it/u=3571592872,3353494284&fm=79&app=86&size=h300&n=0&g=4n&f=jpeg?sec=1589296136&t=e713d1fe058c0dcb1714f9bc0fd4ee92

# titlist  = bsObj.find("table",{"id":"result_table"}).tr.next_siblings

# for i in titlist:
#     print i

list = bsObj.findAll(class="testcase")

for i in  list:
    print i

 

posted on 2020-06-04 00:12  思此狂  阅读(133)  评论(0编辑  收藏  举报

导航