自己写python爬虫从百度上下载图片脚本
参考URL: https://blog.csdn.net/z704630835/article/details/82992036
1 下载脚本
# 导入需要的库 import requests import os import json # 爬取百度图片,解析页面的函数 def getManyPages(keyword, pages): ''' 参数keyword:要下载的影像关键词 参数pages:需要下载的页面数 ''' params = [] for i in range(30, 30 * pages + 30, 30): params.append({ 'tn': 'resultjson_com', 'ipn': 'rj', 'ct': 201326592, 'is': '', 'fp': 'result', 'queryWord': keyword, 'cl': 2, 'lm': -1, 'ie': 'utf-8', 'oe': 'utf-8', 'adpicid': '', 'st': -1, 'z': '', 'ic': 0, 'word': keyword, 's': '', 'se': '', 'tab': '', 'width': '', 'height': '', 'face': 0, 'istype': 2, 'qc': '', 'nc': 1, 'fr': '', 'pn': i, 'rn': 30, 'gsm': '1e', '1488942260214': '' }) url = 'https://image.baidu.com/search/acjson' urls = [] for i in params: try: urls.append(requests.get(url, params=i).json().get('data')) except json.decoder.JSONDecodeError: print("解析出错") return urls # 下载图片并保存 def getImg(dataList, localPath): ''' 参数datallist:下载图片的地址集 参数localPath:保存下载图片的路径 ''' if not os.path.exists(localPath): # 判断是否存在保存路径,如果不存在就创建 os.makedirs(localPath) x = 0 for list in dataList: for i in list: if i.get('thumbURL') != None: print('正在下载:%s' % i.get('thumbURL')) ir = requests.get(i.get('thumbURL')) open(localPath + '%d.jpg' % x, 'wb').write(ir.content) x += 1 else: print('图片链接不存在') # 根据关键词来下载图片 if __name__ == '__main__': dataList = getManyPages('吃惊', 20) # 参数1:关键字,参数2:要下载的页数 getImg(dataList, './data/chijing/') # 参数2:指定保存的路径
2 通过人脸检测来过滤非人脸和剪切人脸
2.1 使用opencv的人脸检测 #!/usr/bin/env python # -*- coding:utf-8-*- import os import os.path as osp import cv2 import glob from io_helper import * cv_root = 'D:/install packages/opencv-3.4.2/data/haarcascades' cv_face_model_path = cv_root + '/haarcascade_frontalface_alt2.xml' cv_face_model_path2 = cv_root + '/haarcascade_profileface.xml' def test_face_detect_cv(): classifier1 = cv2.CascadeClassifier(cv_face_model_path) # 正脸 filepath = '' img = cv2.imread(filepath) # 读取图片 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 转换灰色 faceRects = classifier1.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=1, minSize=(10, 10)) if len(faceRects): # 大于0则检测到人脸 for box in faceRects: # 单独框出每一张人脸 x, y, w, h = box cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 1) cv2.imshow('a', img) cv2.waitKey(0) cv2.destroyAllWindows() def face_detect_save(): path = r'D:/AI/DataSet/emotion/fer2013/train_class' files = glob.glob(path + '/**/*.jpg') new_dir = path + '/cut_face' new_dir2 = path + '/no_face' mkdir_if_not_exist(new_dir) mkdir_if_not_exist(new_dir2) # OpenCV人脸识别分类器 classifier1 = cv2.CascadeClassifier(cv_face_model_path) # 正脸 # classifier2 = cv2.CascadeClassifier(cv_face_model_path2) # 侧脸 for filepath in files: chd_dir = new_dir + '/' + filepath.split('\\')[-2] mkdir_if_not_exist(chd_dir) chd_dir2 = new_dir2 + '/' + filepath.split('\\')[-2] mkdir_if_not_exist(chd_dir2) filename = osp.basename(filepath) img = cv2.imread(filepath) # 读取图片 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 转换灰色 color = (0, 255, 0) # 定义绘制颜色 # 调用识别 正脸人脸 faceRects = classifier1.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=1, minSize=(10, 10)) if len(faceRects): for box in faceRects: # 单独框出每一张人脸 x, y, w, h = box face_roi = img[y:y + h, x:x + w, :] file = chd_dir + '/' + filename cv2.imwrite(file,face_roi) else: file = chd_dir2 + '/' + filename shutil.copy(filepath,file) print('work is done .') if __name__ == '__main__': face_detect_save() 2.2 使用mtcnn的包进行人脸检测 ----------------------------------------- 使用python公开包 mtcnn 来进行人脸检测和关键点检测 pip install -i https://pypi.tuna.tsinghua.edu.cn/simple mtcnn
gpu_id = 3 os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) import tensorflow as tf from mtcnn.mtcnn import MTCNN detector = MTCNN(scale_factor=0.99) face_list = detector.detect_faces(img) for item in face_list: box = item['box'] conf = item['confidence'] keypoints_dict = item['keypoints'] # {'left_eye': (14, 16), 'right_eye': (31, 12), \ # 'nose': (23, 25), 'mouth_left': (19, 35), 'mouth_right': (33, 32)} left_eyeXY = keypoints_dict['left_eye'] right_eyeXY = keypoints_dict['right_eye'] noseXY = keypoints_dict['nose'] mouth_leftXY = keypoints_dict['mouth_left'] mouth_rightXY = keypoints_dict['mouth_right'] if conf > 0: print('detect a face .') x, y, w, h = box offset = 5 x = max(0, x - offset) y = max(0, y - offset) w = min(w + 2 * offset, src_w - x) h = min(h + 2 * offset, src_h - y) face_img = img[y:y + h, x:x + w, :]
-----------------------------------
2.3 使用关键点来进行人脸对齐