爬取校园新闻
2018-04-03 16:17 Molemole 阅读(248) 评论(0) 编辑 收藏 举报str
=
requests.get(
'http://news.gzcc.cn/html/xiaoyuanxinwen/'
)
str
.encoding
=
'utf-8'
from
bs4
import
BeautifulSoup
soup
=
BeautifulSoup(
str
.text,
'html.parser'
)
#print(soup)
d
=
soup.select(
'li'
)
for
news
in
d:
if
len
(news.select(
'.news-list-title'
)) >
0
:
t
=
print
(news.select(
'.news-list-title'
)[
0
].text)
a
=
news.select(
'a'
)[
0
].attrs
print
(a[
'href'
])
strd
=
requests.get(a[
'href'
])
strd.encoding
=
'utf-8'
soupd
=
BeautifulSoup(strd.text,
'html.parser'
)
cont
=
soupd.select(
'#content'
)
timet
=
soupd.select(
'.show-info'
)
print
(timet[
0
].text[
0
:
25
])
print
(timet[
0
].text[
30
:
38
])
print
(timet[
0
].text[
38
:
45
])
print
(timet[
0
].text[
46
:
56
])
print
(timet[
0
].text[
62
:])