利用Python自动识别目标网页是否有注释

  本代码主要识别网页源代码中是否有注释,并将注释显示出来,因为有些时候注释具有一定的信息收集价值,主要用到的模块包括:

  1. requests请求模块

  2. re正则表达式模块

 1 import requests
 2 import re
 3 import sys
 4 import optparse
 5 import termcolor
 6 
 7 class CommentFinder:
 8     def __init__(self) -> None:
 9         self.url = self.url_prefix_formatter(self.get_params())
10         self.headers = {
11             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0'
12         }
13 
14 
15     def get_params(self):
16         parser = optparse.OptionParser('Usage: < Program > -u website url ')
17         parser.add_option('-u', '--url', dest='url', type='string', help='Specify website url')       
18         options, args = parser.parse_args()
19         if options.url is None :
20             print(parser.usage)
21             sys.exit(0)
22         return options.url
23     
24     def url_prefix_formatter(self, url):
25         if url.startswith('http://'):
26             return url
27         elif url.startswith('https://'):
28             return url
29         else:
30             return 'http://' + url
31     
32     def retrieve_webpage(self):
33         try:
34             response = requests.get(url=self.url, headers=self.headers)
35             if response.status_code == 200:
36                 return response.text
37         except Exception as e:
38             print(e)
39             sys.eixt(0)
40     
41     def run(self):
42         response = self.retrieve_webpage()
43         pattern = r'<!--(.*)-->'
44         if response:
45             comment_list = re.findall(pattern, response)
46             if len(comment_list)==0:
47                 print("No comment found on the target")
48             else:
49                 print("Has found the following comments on the target: %s" % self.url)
50                 i = 1
51                 for comment in comment_list:
52                     print('\t%d'%i,termcolor.colored(comment, 'blue'))
53                     i +=1
54 
55 if __name__ == '__main__':
56     commentfinder = CommentFinder()
57     commentfinder.run()

 

posted @ 2022-05-29 22:35  Jason_huawen  阅读(46)  评论(0编辑  收藏  举报