获取一篇新闻的全部信息
新闻的链接:http://news.gzcc.cn/html/2019/xiaoyuanxinwen_0331/11110.html
import requests import re from bs4 import BeautifulSoup def getTime(showinfo): newsDT=showinfo[0].split(':')[1]+showinfo[1]+"" return datetime.strptime(newsDT,'%Y-%m-%d %H:%M:%S') def click(url): id=re.findall('(\d{1,5})',url)[-1] clickUrl='http://oa.gzcc.cn/api.php?op=count&id={}&modelid=80'.format(id) res=requests.get(clickUrl) newsClick=res.text.split('.html')[-1].lstrip("('").rstrip("');") newsDJ='点击次数:'+newsClick+'次' return newsDJ def showInfo(info,soup): news_title=soup.select('.show-title')[0].text; news_time=info[0].split(':')[1]+info[1]+"" news_shenhen=info[3].split('\xa0')[4].split(':')[1] news_orign=info[3].split('\xa0')[6].split(':')[1] news_anthuor=info[3].split('\xa0')[2].split(':')[1] return '标题:'+news_title+'\n'+'作者:'+news_anthuor+'\n发布单位:'+news_orign+'\n审核:'+news_shenhen news_url='http://news.gzcc.cn/html/2019/xiaoyuanxinwen_0331/11110.html' res=requests.get(news_url) res.encoding='utf-8' soup=BeautifulSoup(res.text,'html.parser') list=soup.select('.show-info')[0].text.split(' '); print(list[3].split('\xa0')) # print( list[3][ re.search('作者:',list[3]).span()[1] ] ) print(click(news_url)) print(showInfo(list,soup))