python 图像去重(imagededup) 比较图片相似度
开源地址:
https://github.com/idealo/imagededup
安装库
pip install imagededup
示例代码
from imagededup.methods import PHash phasher = PHash() # 生成图像目录中所有图像的二值hash编码 encodings = phasher.encode_images(image_dir='/tmp/close_eyes_jt/jingtiao_eyes_img') # 图像路径 # 对已编码图像寻找重复图像 d_1 = phasher.find_duplicates(encoding_map=encodings) # 给定一幅图像,显示与其重复的图像 from imagededup.utils import plot_duplicates plot_duplicates(image_dir='path/to/image/directory', duplicate_map=d_1, filename='ukbench00120.jpg') repeat_img = [] # 重复图片列表 is_img = [] # 不重复图片列表 for k, v in d_1.items(): if not v: is_img.append(k) elif k not in repeat_img: is_img.append(k) repeat_img.extend(v) else: repeat_img.extend(v) print(len(is_img))
单张图片调用方法
from imagededup.methods import PHash def compare_image_similarity(photo_id, photo_path, encoding_map: dict): """ 比较图片相似度 :param photo_id: :param photo_path: :param encoding_map: 哈希值map 首次传空 {} :return: """ encoding = "" try: phasher = PHash() # 生成图像的二值hash编码 encoding = phasher.encode_image(photo_path) encoding_map[photo_id] = encoding # 满分10分 相似度小于5分的图片过滤 duplicates = phasher.find_duplicates(encoding_map=encoding_map, scores=True, max_distance_threshold=10) # 获取重复的图片 {'001.jpg': [('002.jpg', 0)],'003.jpg': []} duplicates_list = duplicates.get(photo_id) for duplicate_img_name_score in duplicates_list: image_name, score = duplicate_img_name_score[0], duplicate_img_name_score[1] if score < 5: encoding_map.pop(photo_id) return False, encoding return True, encoding except Exception as e: print(e) print(traceback.print_exc()) return True, encoding if __name__ == "__main__": # 单张循环比较图片相似度 compare_image_similarity("11111.jpg","/tmp/11111.jpg",{})