Python 哈希算法实现图片相似度比较
# coding: utf-8 import operator from PIL import Image import numpy as np import cv2 """图片处理: 图片截取、图片相似度比对、哈希算法比对""" def cmp_pic(pic1, pic2): """ 比对图片相似度 @param pic1: @param pic2: @return: """ a = Image.open(pic1) b = Image.open(pic2) return operator.eq(a, b) def image_interception(image): """ 图片截取 @param image: 目标图片 @return: """ img = cv2.imread(image) print('图片{}高度、宽度、通道数为:{}'.format(image, img.shape)) # (1792, 828, 3) 高度、宽度、通道数 cropped = img[170:650, 0:900] # 裁剪坐标为[y0:y1, x0:x1] cv2.imwrite(image, cropped) return image def aHash(img): """ 均值哈希算法 @param img: @return: """ # 缩放为8*8 img = cv2.resize(cv2.imread(img), (8, 8)) # 转换为灰度图 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # s为像素和初值为0,hash_str为hash值初值为'' s = 0 hash_str = '' # 遍历累加求像素和 for i in range(8): for j in range(8): s = s + gray[i, j] # 求平均灰度 avg = s / 64 # 灰度大于平均值为1相反为0生成图片的hash值 for i in range(8): for j in range(8): if gray[i, j] > avg: hash_str = hash_str + '1' else: hash_str = hash_str + '0' return hash_str def dHash(img): """ 差值感知算法 @param img: @return: """ # 缩放8*8 img = cv2.resize(cv2.imread(img), (9, 8)) # 转换灰度图 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) hash_str = '' # 每行前一个像素大于后一个像素为1,相反为0,生成哈希 for i in range(8): for j in range(8): if gray[i, j] > gray[i, j + 1]: hash_str = hash_str + '1' else: hash_str = hash_str + '0' return hash_str def pHash(img): """ 感知哈希算法(pHash) @param img: @return: """ # 缩放32*32 img = cv2.resize(cv2.imread(img), (32, 32)) # , interpolation=cv2.INTER_CUBIC # 转换为灰度图 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 将灰度图转为浮点型,再进行dct变换 dct = cv2.dct(np.float32(gray)) # opencv实现的掩码操作 dct_roi = dct[0:8, 0:8] hash = [] avreage = np.mean(dct_roi) for i in range(dct_roi.shape[0]): for j in range(dct_roi.shape[1]): if dct_roi[i, j] > avreage: hash.append(1) else: hash.append(0) return hash def classify_hist_with_split(image1, image2, size=(256, 256)): """ 通过得到RGB每个通道的直方图来计算相似度 @param image1: @param image2: @param size: @return: """ # 将图像resize后,分离为RGB三个通道,再计算每个通道的相似值 image1 = cv2.resize(cv2.imread(image1), size) image2 = cv2.resize(cv2.imread(image2), size) sub_image1 = cv2.split(image1) sub_image2 = cv2.split(image2) sub_data = 0 for im1, im2 in zip(sub_image1, sub_image2): sub_data += calculate(im1, im2) sub_data = sub_data / 3 # print(sub_data) return sub_data def calculate(image1, image2): """ 计算单通道的直方图的相似值 @param image1: @param image2: @return: """ hist1 = cv2.calcHist([image1], [0], None, [256], [0.0, 255.0]) hist2 = cv2.calcHist([image2], [0], None, [256], [0.0, 255.0]) # 计算直方图的重合度 degree = 0 for i in range(len(hist1)): if hist1[i] != hist2[i]: degree = degree + (1 - abs(hist1[i] - hist2[i]) / max(hist1[i], hist2[i])) else: degree = degree + 1 degree = degree / len(hist1) return degree def cmpHash(hash1, hash2): """ Hash值对比 @param hash1: @param hash2: @return: """ n = 0 # hash长度不同则返回-1代表传参出错 if len(hash1) != len(hash2): return -1 # 遍历判断 for i in range(len(hash1)): # 不相等则n计数+1,n最终为相似度 if hash1[i] != hash2[i]: n = n + 1 return n image_interception('11.png') image_interception('11.png') img1 = '1.png' img2 = '2.png' hash1 = aHash(img1) hash2 = aHash(img2) n = cmpHash(hash1, hash2) print('均值哈希算法相似度:', n) hash1 = dHash(img1) hash2 = dHash(img2) n = cmpHash(hash1, hash2) print('差值哈希算法相似度:', n) hash1 = pHash(img1) hash2 = pHash(img2) n = cmpHash(hash1, hash2) print('感知哈希算法相似度:', n) n = classify_hist_with_split(img1, img2) print('三直方图算法相似度:', n)