爬取校园新闻首页的新闻

import requests
from bs4 import BeautifulSoup
from datetime import  datetime
url="http://news.gzcc.cn/html/xiaoyuanxinwen/"
res = requests.get(url)
res.encoding="utf-8"
soup=BeautifulSoup(res.text,"html.parser")
for li in soup.select("li"):
        print(li.select(".news-list-title"))
        print(li.select("a"[0].attrs['href']))
        print(li.select(".news-list-descriptio").text)

url="http://news.gzcc.cn/html/2018/xiaoyuanxinwen_0329/9129.html"
ress = requests.get(url)
ress.encoding="utf-8"
soup1=BeautifulSoup(ress.text,"html.parser")
news = soup1.select(".show-info").text
news = news.split(" ")
print("发布时间:"+news.lstrip('发布时间:')[:19]+"作者:"+news.find('摄影:'))

str = '2018-04-2 18:20:19 '
datetime.strptime(str,'%Y-%m-%d %H:%M:%S ')

posted on 2018-04-02 22:05  121林伟聪  阅读(131)  评论(0编辑  收藏  举报