使用正则表达式,取得点击次数,函数抽离
import re ''' 1.用正则表达式判断邮箱是否输入正确 2.用正则表达式识别出全部的电话号码(这里用的是固话模式) 3.用正则表达式进行英文单词的分词 ''' def em_match(e): try: c = re.findall(r'[0-9a-zA-Z][0-9a-zA-z\_]*\@[a-z]+\.[c,o,m,n]+', str(e)) if c == e: print('Success!') else: print('Wrong layout') except Exception : print('Wrong layout') pass def tel_match(n): try: nn = re.findall(r'[0-9]{3,5}-[0-9]{6,8}', str(n))[0] if nn == n: print('Success!') else: print('Wrong layout') except Exception : print('Wrong layout') pass def word_split(mm): mess = re.split(r"\s", str(mm)) print(mess) e = input("请输入您的email:") em_match(e) n = input("请输入您的电话:") tel_match(n) m = ''' Five score years ago, a great American, in whose symbolic shadow we stand today, signed the Emancipation Proclamation. This momentous decree came as a great beacon light of hope to millions of Negro slaves who had been seared in the flames of withering injustice. It came as a joyous daybreak to end the long night of bad captivity. ''' word_split(m)
import re import requests from bs4 import BeautifulSoup def get_ncoding(url): coding = re.findall(r'_[0-9]+/(\d+)\.html', str(url))[0] return str(coding) def click_count(num): c_url = 'http://oa.gzcc.cn/api.php?op=count&id=' + str(num) + '&modelid=80' # print(c_url) resc = requests.get(c_url) resc.encoding = 'utf-8' soupc = BeautifulSoup(resc.text, 'html.parser') # print(soupc) count = re.findall(r'.html\(\'(\d+)\'\);', str(soupc))[-1] # print(count) return str(count) def get_new_details(url): res = requests.get(url) res.encoding = 'utf-8' soup = BeautifulSoup(res.text, 'html.parser') title = soup.select('.show-title')[0].get_text() info = soup.select('.show-info')[0].get_text() content = soup.select('.show-content')[0].get_text().strip() return title, info, content if __name__ == '__main__': n_url = 'http://news.gzcc.cn/html/2018/xiaoyuanxinwen_0404/9183.html' num_news = int(get_ncoding(n_url)) cl_times = int(click_count(num_news)) tit = get_new_details(n_url)[0] inf = get_new_details(n_url)[1] cont = get_new_details(n_url)[2] print(num_news, '\n', cl_times, '\n', tit, '\n', inf, '\n', cont)