Python网页正则匹配搜索工具
主要解决问题:
使用浏览器自带搜索工具时,只能使用常规的查找方式进行搜索,而且搜索到的内容只是被包含的字符,而不能提取所有符合要求的内容,如果浏览器搜索工具自带正则表达式搜索,则提取内容就会更加准确、高效。为此,本文将使用Python编写了一个小工具,解决这一问题。
程序涉及:
tkinter组件、re、requests
1、程序代码:
1 # 当前网页正则搜索工具 2 from tkinter import * 3 import re,requests,datetime 4 5 # 获取网址 6 def GetUrl(): 7 try: 8 mark = str(datetime.datetime.now())+':\n' 9 if url_inpt.get(): 10 url = url_inpt.get() 11 response = requests.get(url=url) 12 if response.status_code == 200: 13 out_text.delete(1.0,END) 14 out_text.insert(END,mark+response.text+'\n\n') 15 else: 16 out_text.insert(END,mark+"输入错误"+'\n\n') 17 except Exception as e: 18 out_text.insert(END,mark+str(e)+'\n\n') 19 20 # 工具界面函数:输入、处理、输出 21 def Search(): 22 pat_str=str(pat_inpt.get()) 23 try: 24 mark = str(datetime.datetime.now())+':\n' 25 if pat_inpt.get(): 26 pat_str = pat_inpt.get() 27 web_str = out_text.get(27.0,END) 28 if not isinstance(pat_str,list): 29 pat = re.compile(pat_str) 30 result = pat.findall(web_str) 31 out_text.delete(0.0,END) 32 for num,item in enumerate(result): 33 out_text.insert(END,"{0}{1}".format(str(num+1),' '*(5-len(str(num+1))))+str(item)+'\n') 34 elif isinstance(pat_str,list): 35 pat = re.compile(pat_str[0],pat_str[1]) 36 result = pat.findall(web_str) 37 out_text.delete(0,END) 38 out_text.insert(END,mark+str(result)+'\n\n') 39 except Exception as e: 40 out_text.delete(0,END) 41 out_text.insert(END,mark+str(e)+'\n\n') 42 43 def GetWidth(): 44 url_lbl.place(x=0.0,y=0.0,height=25,width=70) 45 url_inpt.place(x=70,y=0.0,height=25,width=300) 46 url_Butt.place(x=370,y=0.0,height=25,width=50) 47 url_cls_Butt.place(x=420,y=0.0,height=25,width=50) 48 49 pat_lbl.place(x=0.0,y=30,height=25,width=70) 50 pat_inpt.place(x=70,y=30,height=25,width=300) 51 pat_Butt.place(x=370,y=30,height=25,width=50) 52 pat_cls_Butt.place(x=420,y=30,height=25,width=50) 53 54 out_lbl.place(x=0.0,y=60,height=25,width=70) 55 out_cls_Butt.place(x=370,y=60,height=25,width=80) 56 out_text.place(relx=0.0,y=90,relheight=0.81,relwidth=0.95) 57 scl_h.place(relx=0.95,y=90,relheight=0.85,width=20) # 纵轴滚动条 58 scl_v.place(x=0.0,rely=0.96,height=20,relwidth=0.95) # 横轴滚动条 59 win.after(50,GetWidth) 60 61 # 工具界面:输入框、按钮、结果输出框 62 win = Tk() 63 win.title("网页正则搜索工具") 64 scl_h = Scrollbar(win) # 纵轴 65 scl_v = Scrollbar(win,orient=HORIZONTAL) # 横向、横轴 66 # 设置窗口大小 67 width = 500 68 height = 600 69 screenwidth = win.winfo_screenwidth() 70 screenheight = win.winfo_screenheight() 71 alignstr = '%dx%d+%d+%d'%(width,height,(screenwidth-width)/2,(screenheight-height)/2) 72 win.geometry(alignstr) 73 74 75 # 第一行:标签文字【网址】、网址输入框、跳转按钮 76 url_lbl = Label(win,text="搜索网址url",width=8,height=1) 77 url_lbl.grid(row=1,column=0,columnspan=1) 78 url_inpt = Entry(win,width=int(width/12.5)) 79 url_inpt.grid(row=1,column=1,rowspan=1,columnspan=1) 80 url_inpt.insert(END,"https://www.rebooo.com/archives/3053") 81 url_Butt = Button(win,text="转到",command=GetUrl) 82 url_Butt.grid(row=1,column=3) 83 url_cls_Butt = Button(win,text="清除",command=lambda :url_inpt.delete(0,END)) 84 url_cls_Butt.grid(row=1,column=4) 85 86 # 第二行:标签文字【正则表达式】、正则输入框、搜索按钮 87 pat_lbl = Label(win,text="正则表达式",width=8,height=1) 88 pat_lbl.grid(row=2,column=0,columnspan=1) 89 pat_inpt = Entry(win,width=int(width/12.5)) 90 pat_inpt.grid(row=2,column=1,rowspan=1,columnspan=1) 91 pat_inpt.insert(END,"src=\"ed2k.*?\"") 92 pat_Butt = Button(win,text="搜索",command=Search) 93 pat_Butt.grid(row=2,column=2) 94 pat_cls_Butt = Button(win,text="清除",command=lambda :pat_inpt.delete(0,END)) 95 pat_cls_Butt.grid(row=1,column=4) 96 97 # 第三行:结果输出框【搜索结果】 98 out_lbl = Label(win,text="搜索结果",width=8,height=1) 99 out_lbl.grid(row=3,column=0,columnspan=1) 100 out_text = Text(win,width=int(width/7.14),height=int(height/15.38)) # width=70,height=26 101 out_text.place(relx=0.0,rely=0.0,relheight=0.2,relwidth=0.6) 102 out_text.config(xscrollcommand=scl_v.set,yscrollcommand=scl_h.set,wrap='none') 103 scl_h.config(command=out_text.yview) 104 scl_v.config(command=out_text.xview) 105 out_cls_Butt = Button(win,text="清除结果",command=lambda :out_text.delete(0.0,END)) 106 out_cls_Butt.grid(row=3,column=4) 107 GetWidth() 108 win.mainloop()
2、编译成exe文件方法
①安装pyinstaller:pip install pyinstaller
②编译命令:pyinstaller -F -w -i logo.ico retool.py (-F:打包成单个文件;-w:运行时不显示黑色命令窗口;-i:添加应用图标)
3、应用程序界面: