利用Python下载目标网站图片并利用PIL模块提取出图片中的Exif信息
本代码主要由以下几部分组成:
1. 获取目标网站页面
2. 利用xpath提取出图片的链接
3. 利用PIL模块提取出Exif信息
1 from email import header 2 import requests 3 import sys 4 from PIL import Image 5 from PIL.ExifTags import TAGS 6 import optparse 7 from lxml import etree 8 9 10 class ImagesExifExtractor: 11 def __init__(self) -> None: 12 self.url = self.get_params() 13 14 def get_params(self): 15 parser = optparse.OptionParser("Usage: <Program> -u website url") 16 parser.add_option('-u', '--url', dest='url', type='string', help='Specify URL of target website') 17 options, args = parser.parse_args() 18 if options.url is None: 19 print(parser.usage) 20 sys.exit(0) 21 return options.url 22 23 def get_web_page(self, url): 24 print("[-] Retrieve web page of target...\n") 25 headers = { 26 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0' 27 } 28 try: 29 response = requests.get(url=url, headers=headers) 30 31 if response.status_code == 200: #这里一定要注意,此处200位整数,而不是字符串,否则判断条件无法成真,调试的时候琢磨了半天 32 return response.text 33 34 except Exception as e: 35 print(e) 36 sys.exit(0) 37 38 def extract_images_links(self,response): 39 try: 40 images_links = [] 41 html = etree.HTML(response) 42 images_list = html.xpath('//img') 43 for link in images_list: 44 images_links.append(link.xpath('./@src')[0]) 45 print(link.xpath('./@src')[0]) 46 47 return images_links 48 49 except Exception as e: 50 print(e) 51 sys.exit(0) 52 53 def extract_exif(self,img_filename): 54 try: 55 image = Image.open(img_filename) 56 img_exif = image.getexif() 57 print("[-] Extract exif data from the image [%s]" % img_filename) 58 59 exif_data = { 60 TAGS[k]: v 61 for k, v in img_exif.items() 62 if k in TAGS 63 } 64 print(exif_data) 65 66 67 except Exception as e: 68 69 print(e) 70 sys.exit(0) 71 72 def download_image(self, url): 73 print("[-] Downdload image from %s" % url) 74 try: 75 headers = { 76 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0' 77 } 78 filename = 'images/' + url.split('/')[-1] 79 with open(filename, 'wb') as f: 80 f.write(requests.get(url=url, headers=headers).content) 81 print("[-] Saved image successfully: %s" % url.split('/')[-1]) 82 except Exception as e: 83 print(e) 84 pass 85 86 87 def run(self): 88 response = self.get_web_page(self.url) 89 images_links = self.extract_images_links(response) 90 for img_link in images_links: 91 filename = 'images/' + img_link.split('/')[-1] 92 self.download_image(img_link) 93 self.extract_exif(filename) 94 95 96 if __name__ == "__main__": 97 image_exif = ImagesExifExtractor() 98 99 image_exif.run()
STRIVE FOR PROGRESS,NOT FOR PERFECTION