利用Python下载目标网站图片并利用PIL模块提取出图片中的Exif信息

  本代码主要由以下几部分组成:

  1. 获取目标网站页面

  2. 利用xpath提取出图片的链接

  3. 利用PIL模块提取出Exif信息

 

 1 from email import header
 2 import requests
 3 import sys
 4 from PIL import Image
 5 from PIL.ExifTags import TAGS
 6 import optparse
 7 from lxml import etree
 8 
 9 
10 class ImagesExifExtractor:
11     def __init__(self) -> None:
12         self.url = self.get_params()
13        
14     def get_params(self):
15         parser = optparse.OptionParser("Usage: <Program> -u website url")
16         parser.add_option('-u', '--url', dest='url', type='string', help='Specify URL of target website')
17         options, args = parser.parse_args()
18         if options.url is None:
19             print(parser.usage)
20             sys.exit(0)
21         return options.url
22     
23     def get_web_page(self, url):
24         print("[-] Retrieve web page of target...\n")
25         headers = {
26         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0'
27     }
28         try:
29             response = requests.get(url=url, headers=headers)
30            
31             if response.status_code == 200:    #这里一定要注意,此处200位整数,而不是字符串,否则判断条件无法成真,调试的时候琢磨了半天            
32                 return response.text
33             
34         except Exception as e:            
35             print(e)
36             sys.exit(0)
37     
38     def extract_images_links(self,response):
39         try:
40             images_links = []
41             html = etree.HTML(response)
42             images_list = html.xpath('//img')
43             for link in images_list:
44                 images_links.append(link.xpath('./@src')[0])
45                 print(link.xpath('./@src')[0])
46             
47             return images_links
48             
49         except Exception as e:           
50             print(e)
51             sys.exit(0)
52 
53     def extract_exif(self,img_filename):
54         try:
55             image = Image.open(img_filename)
56             img_exif = image.getexif()            
57             print("[-] Extract exif data from the image [%s]" % img_filename)
58 
59             exif_data = {
60             TAGS[k]: v
61             for k, v in img_exif.items()
62             if k in TAGS
63             }
64             print(exif_data)
65           
66             
67         except Exception as e:
68         
69             print(e)
70             sys.exit(0)
71     
72     def download_image(self, url):
73         print("[-] Downdload image from %s" % url)
74         try:
75             headers = {
76             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0'
77         }
78             filename = 'images/' + url.split('/')[-1]
79             with open(filename, 'wb') as f:
80                 f.write(requests.get(url=url, headers=headers).content)
81             print("[-] Saved image successfully: %s" % url.split('/')[-1])
82         except Exception as e:            
83             print(e)
84             pass
85             
86 
87     def run(self):
88         response = self.get_web_page(self.url)
89         images_links = self.extract_images_links(response)
90         for img_link in images_links:
91             filename = 'images/' + img_link.split('/')[-1]
92             self.download_image(img_link)
93             self.extract_exif(filename)
94             
95 
96 if __name__ == "__main__":
97     image_exif = ImagesExifExtractor()
98     
99     image_exif.run()

 

posted @ 2022-05-22 11:00  Jason_huawen  阅读(164)  评论(0编辑  收藏  举报