使用正则表达式,取得点击次数,函数抽离
1、用正则表达式判断邮箱输入是否正确
import re str=r'^[a-zA-Z0-9]+(\.[a-zA-Z0-9_-]+){0,4}@[a-zA-Z0-9]+(\.[a-zA-Z0-9]+){0,4}$' are=('840805339@qq.com') if re.match(str,are): print('success') else: print('please input ...')
2、用正则表达式识别出全部电话号码
3、用正则表达式进行英文分词
import re m = re.split('\d+' , '123abc321cba') print(m) print("\n") m = re.split('(\d+)' , '123abc321cba') print(m)
4、用正则表达式获取新闻编号
d = re.match('http://news.gzcc.cn/html/2018/xiaoyuanxinwen_(.*).html',a).group(1).split('/')[-1] f = re.findall('\_(.*).html',a)[0] print(d) print(f)
5、生成点击次数的request.URL
6、获取点击次数
import re clickUrl = 'http://oa.gzcc.cn/api.php?op=count&id=9183&modelid=80' resc = requests.get(clickUrl) resc.encoding = 'utf-8' click = resc.text.split('.html')[-1].lstrip("('").rstrip("');") print(click)
7、
import re import requests def getClickCount(newsUrl): newId=re.search("/(\d*).html$",newsUrl).group(1) print(newId) clickUrl ="http://oa.gzcc.cn/api.php?op=count&id={}&modelid=80".format(newId) print(clickUrl) resc = requests.get(clickUrl) num = re.search(".html\('(\d*)'\)",resc.text).group(1) print(num) getClickCount("http://news.gzcc.cn/html/2018/xiaoyuanxinwen_0404/9183.html");