>>> import requests >>> import pandas >>> from bs4 import BeautifulSoup >>> from datetime import datetime >>> def Click(newurl): id = re.search('_(.*).html', newurl).group(1).split('/')[1] clickurl='http://oa.gzcc.cn/api.php?op=count&id={}&modelid=80'.format(id) click = int(requests.get(clickurl).text.split(".")[-1].lstrip("html('").rstrip("');")) return(click) >>> def getdetail(url): #单条新闻链接,字典 read = request.get(url) res.encoding = 'utf-8' soup = BeautifulSoup(res.text, 'html.parser') new={} news['url']=url news['title']=soup.select('.show-title')[0].text info = soup.select('.show-info')[0].text news['dt']=datetime.strptime(info.lstrip('发布时间:')[0:19],'%Y-%m-%d %H:%M:%S') news['click'] = getclick(url) return(news) >>> def onepage(pageurl):#新闻列表 res = requests.get(pageurl) res.encoding='utf-8' soup =BeautifulSoup(res.text,'html.parser') newsls = [] for news in soup.select('li'): if len(news.select('.news-list-title'))>0: newsls.append(getdetail(news.select('a')[0]['href'])) return newsls