day 18 - 2 正则与 re 模块练习
1、爬虫的例子
#爬虫的例子(方法一) import re import urllib,request import urlopen def getPage(url): response = urlopen(url) return response.read().decode('utf-8') def parsePage(s): ret = re.findall( '<div class="item">.*?<div class="pic">.*?<em .*?>(?P<id>\d+).*?<span class="title">(?P<title>.*?)</span>' '.*?<span class="rating_num" .*?>(?P<rating_num>.*?)</span>.*?<span>(?P<comment_num>.*?)评价</span>',s,re.S) return ret def main(num): url = 'https://movie.douban.com/top250?start=%s&filter=' % num response_html = getPage(url) ret = parsePage(response_html) print(ret) count = 0 for i in range(10): # 10页 main(count) count += 25 # url 从网页上把代码搞下来 # bytes decode ——> utf-8 网页内容就是我的待匹配字符串 # ret = re.findall(正则,带匹配的字符串) #ret是所有匹配到的内容组成的列表
#爬虫的例子(方法一) import requests import re import json def getPage(url): response=requests.get(url) return response.text def parsePage(s): com=re.compile('<div class="item">.*?<div class="pic">.*?<em .*?>(?P<id>\d+).*?<span class="title">(?P<title>.*?)</span>' '.*?<span class="rating_num" .*?>(?P<rating_num>.*?)</span>.*?<span>(?P<comment_num>.*?)评价</span>',re.S) ret=com.finditer(s) for i in ret: yield { "id":i.group("id"), "title":i.group("title"), "rating_num":i.group("rating_num"), "comment_num":i.group("comment_num"), } def main(num): url='https://movie.douban.com/top250?start=%s&filter='%num response_html=getPage(url) ret=parsePage(response_html) print(ret) f=open("move_info7","a",encoding="utf8") for obj in ret: print(obj) data=json.dumps(obj,ensure_ascii=False) f.write(data+"\n") if __name__ == '__main__': count=0 for i in range(10): main(count) count+=25
1、计算器
#计算下面式子 a = '1 - 2 * ( ( 6 0 -3 0 +(-40/5) * (9-2*5/3 + 7 /3*99/4*2998 +10 * 568/14 )) - (-4*3)/ (16-3*2) )' import re def format(new_equation): new_equation = new_equation.replace('+-','-') new_equation = new_equation.replace('--', '+') return new_equation def cal(val_son): '''加减乘除的计算''' #print(new_val) if '/' in val_son: a,b = val_son.split('/') return str(float(a)/float(b)) elif '*' in val_son: a,b = val_son.split('*') return str(float(a)*float(b)) def no_brackets(val): '''去括号''' new_val = val.strip('()') while True: ret = re.search('\d+\.?\d*[*/]-?\d+\.?\d*',new_val) #匹配第一个乘除 if ret: #说明 表达式中海油乘除法 val_son = ret.group() #子表达式 ret = cal(val_son) new_val = new_val.replace(val_son,ret) new_val = format(new_val) else: ret = re.findall('-?\d+\.?\d*',new_val) sum = 0 for i in ret: sum += float(i) return str(sum) def func(new_equation): while True: val = re.search('\([^()]+\)',new_equation) if val: val = val.group() ret = no_brackets(val) new_equation = new_equation.replace(val,ret) new_equation = format(new_equation) else: return no_brackets(new_equation) a = input("请输入要计算的式子>>>") new_equation = a.replace(' ','') print(func(new_equation))