Python网页正则匹配搜索工具

主要解决问题:

使用浏览器自带搜索工具时,只能使用常规的查找方式进行搜索,而且搜索到的内容只是被包含的字符,而不能提取所有符合要求的内容,如果浏览器搜索工具自带正则表达式搜索,则提取内容就会更加准确、高效。为此,本文将使用Python编写了一个小工具,解决这一问题。

 

程序涉及:

tkinter组件、re、requests 

 

1、程序代码:

  1 # 当前网页正则搜索工具
  2 from tkinter import *
  3 import re,requests,datetime
  4 
  5 # 获取网址
  6 def GetUrl():
  7     try:
  8         mark = str(datetime.datetime.now())+':\n'
  9         if url_inpt.get():
 10             url = url_inpt.get()
 11             response = requests.get(url=url)
 12             if response.status_code == 200:
 13                 out_text.delete(1.0,END)
 14                 out_text.insert(END,mark+response.text+'\n\n')
 15         else:
 16             out_text.insert(END,mark+"输入错误"+'\n\n')
 17     except Exception as e:
 18         out_text.insert(END,mark+str(e)+'\n\n')
 19 
 20 # 工具界面函数:输入、处理、输出
 21 def Search():
 22     pat_str=str(pat_inpt.get())
 23     try:
 24         mark = str(datetime.datetime.now())+':\n'
 25         if pat_inpt.get():
 26             pat_str = pat_inpt.get()
 27             web_str = out_text.get(27.0,END)
 28             if not isinstance(pat_str,list):
 29                 pat = re.compile(pat_str)
 30                 result = pat.findall(web_str)
 31                 out_text.delete(0.0,END)
 32                 for num,item in enumerate(result):
 33                     out_text.insert(END,"{0}{1}".format(str(num+1),' '*(5-len(str(num+1))))+str(item)+'\n')
 34             elif isinstance(pat_str,list):
 35                 pat = re.compile(pat_str[0],pat_str[1])
 36                 result = pat.findall(web_str)
 37                 out_text.delete(0,END)
 38                 out_text.insert(END,mark+str(result)+'\n\n')
 39     except Exception as e:
 40         out_text.delete(0,END)
 41         out_text.insert(END,mark+str(e)+'\n\n')
 42 
 43 def GetWidth():
 44     url_lbl.place(x=0.0,y=0.0,height=25,width=70)
 45     url_inpt.place(x=70,y=0.0,height=25,width=300)
 46     url_Butt.place(x=370,y=0.0,height=25,width=50)
 47     url_cls_Butt.place(x=420,y=0.0,height=25,width=50)
 48 
 49     pat_lbl.place(x=0.0,y=30,height=25,width=70)
 50     pat_inpt.place(x=70,y=30,height=25,width=300)
 51     pat_Butt.place(x=370,y=30,height=25,width=50)
 52     pat_cls_Butt.place(x=420,y=30,height=25,width=50)
 53 
 54     out_lbl.place(x=0.0,y=60,height=25,width=70)
 55     out_cls_Butt.place(x=370,y=60,height=25,width=80)
 56     out_text.place(relx=0.0,y=90,relheight=0.81,relwidth=0.95)
 57     scl_h.place(relx=0.95,y=90,relheight=0.85,width=20) # 纵轴滚动条
 58     scl_v.place(x=0.0,rely=0.96,height=20,relwidth=0.95) # 横轴滚动条
 59     win.after(50,GetWidth)
 60 
 61 # 工具界面:输入框、按钮、结果输出框
 62 win = Tk()
 63 win.title("网页正则搜索工具")
 64 scl_h = Scrollbar(win) # 纵轴
 65 scl_v = Scrollbar(win,orient=HORIZONTAL) # 横向、横轴
 66 # 设置窗口大小
 67 width = 500
 68 height = 600
 69 screenwidth = win.winfo_screenwidth()
 70 screenheight = win.winfo_screenheight()
 71 alignstr = '%dx%d+%d+%d'%(width,height,(screenwidth-width)/2,(screenheight-height)/2)
 72 win.geometry(alignstr)
 73 
 74 
 75 # 第一行:标签文字【网址】、网址输入框、跳转按钮
 76 url_lbl = Label(win,text="搜索网址url",width=8,height=1)
 77 url_lbl.grid(row=1,column=0,columnspan=1)
 78 url_inpt = Entry(win,width=int(width/12.5))
 79 url_inpt.grid(row=1,column=1,rowspan=1,columnspan=1)
 80 url_inpt.insert(END,"https://www.rebooo.com/archives/3053")
 81 url_Butt = Button(win,text="转到",command=GetUrl)
 82 url_Butt.grid(row=1,column=3)
 83 url_cls_Butt = Button(win,text="清除",command=lambda :url_inpt.delete(0,END))
 84 url_cls_Butt.grid(row=1,column=4)
 85 
 86 # 第二行:标签文字【正则表达式】、正则输入框、搜索按钮
 87 pat_lbl = Label(win,text="正则表达式",width=8,height=1)
 88 pat_lbl.grid(row=2,column=0,columnspan=1)
 89 pat_inpt = Entry(win,width=int(width/12.5))
 90 pat_inpt.grid(row=2,column=1,rowspan=1,columnspan=1)
 91 pat_inpt.insert(END,"src=\"ed2k.*?\"")
 92 pat_Butt = Button(win,text="搜索",command=Search)
 93 pat_Butt.grid(row=2,column=2)
 94 pat_cls_Butt = Button(win,text="清除",command=lambda :pat_inpt.delete(0,END))
 95 pat_cls_Butt.grid(row=1,column=4)
 96 
 97 # 第三行:结果输出框【搜索结果】
 98 out_lbl = Label(win,text="搜索结果",width=8,height=1)
 99 out_lbl.grid(row=3,column=0,columnspan=1)
100 out_text = Text(win,width=int(width/7.14),height=int(height/15.38)) # width=70,height=26
101 out_text.place(relx=0.0,rely=0.0,relheight=0.2,relwidth=0.6)
102 out_text.config(xscrollcommand=scl_v.set,yscrollcommand=scl_h.set,wrap='none')
103 scl_h.config(command=out_text.yview)
104 scl_v.config(command=out_text.xview)
105 out_cls_Butt = Button(win,text="清除结果",command=lambda :out_text.delete(0.0,END))
106 out_cls_Butt.grid(row=3,column=4)
107 GetWidth()
108 win.mainloop()

 

2、编译成exe文件方法

①安装pyinstaller:pip install pyinstaller

②编译命令:pyinstaller -F -w -i logo.ico retool.py (-F:打包成单个文件;-w:运行时不显示黑色命令窗口;-i:添加应用图标)

 

3、应用程序界面:

 

posted @ 2021-03-07 00:00  猎奇会员  阅读(300)  评论(0编辑  收藏  举报