爬取校园新闻首页的新闻
import requests from datetime import datetime from bs4 import BeautifulSoup ksul='http://news.gzcc.cn/html/xiaoyuanxinwen/' res=requests.get(ksul) res.encoding='utf-8' soup=BeautifulSoup(res.text,'html.parser') for i in soup.select('li'): if len(i.select('.news-list-title'))>0: t=i.select('.news-list-title')[0].text b=i.select('.news-list-info')[0].contents[0].text a=i.select('a')[0].attrs['href'] print(t, a, b, ) res2=requests.get(a) res2.encoding='u6tf-8' soup2=BeautifulSoup(res2.text,'html.parser') d=soup2.select('#content')[0].text print(d) info= soup2.select('.show-info')[0].text print(info) dt1=info.lstrip("发布时间:")[:19] print(dt1) dt=datetime.strptime(dt1,'%Y-%m-%d %H:%M:%S') now=datetime.now() type(now) now.strftime('%Y-%m-%d %H:%M:%S')