话不多说 先上几张结果图
可以看到上述的几种算法的明显差异 ,首先从效率上来说 当图集数量大于100时 SSIM>Hash>Gray>特征结构匹配
从结果精确度上 特征匹配>Hash>Gray>SSIM
def are_images_similar_ssim(img1_data, img2_data, img1_name, img2_name, threshold=0.85):
img1 = preprocess_image(img1_data)
img2 = preprocess_image(img2_data)
if img1 is None or img2 is None:
return False
# 确保两张图像的尺寸相同
if img1.shape != img2.shape:
img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0]))
similarity = ssim(img1, img2)
if similarity >= threshold:
print(f"SSIM: {similarity}")
return (img1_name, img2_name)
return False
def are_images_similar_psnr(img1_data, img2_data, img1_name, img2_name, threshold=9):
img1 = preprocess_image(img1_data)
img2 = preprocess_image(img2_data)
if img1 is None or img2 is None:
return False
# 确保两张图像的尺寸相同
if img1.shape != img2.shape:
img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0]))
psnr_value = psnr(img1, img2)
print(f"PSNR: {psnr_value}")
if psnr_value >= threshold:
print(f"符合PSNR: {psnr_value}")
return (img1_name, img2_name)
return False
def normalized_mutual_info(img1, img2):
hist_2d, x_edges, y_edges = np.histogram2d(img1.ravel(), img2.ravel(), bins=256)
pxy = hist_2d / float(np.sum(hist_2d))
px = np.sum(pxy, axis=1)
py = np.sum(pxy, axis=0)
px_py = px[:, None] * py[None, :]
nmi = entropy(pxy.ravel()) - entropy(px_py.ravel())
print(nmi)
return nmi
def are_images_similar_nmi(img1_data, img2_data, img1_name, img2_name, threshold=0.7):
img1 = preprocess_image(img1_data)
img2 = preprocess_image(img2_data)
if img1 is None or img2 is None:
return False
# 确保两张图像的尺寸相同
if img1.shape != img2.shape:
img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0]))
nmi_value = normalized_mutual_info(img1, img2)
print(f"NMI: {nmi_value}")
if nmi_value >= threshold:
return (img1_name, img2_name)
return False
# 比较特征结构进行相似对比 较为精准 耗时较长
def feature_matching(img1_data, img2_data, img1_name, img2_name, threshold=0.5):
img1 = cv2.imdecode(np.frombuffer(img1_data, np.uint8), cv2.IMREAD_GRAYSCALE)
img2 = cv2.imdecode(np.frombuffer(img2_data, np.uint8), cv2.IMREAD_GRAYSCALE)
if img1 is None or img2 is None:
return False
# 确保两张图像的尺寸相同
if img1.shape != img2.shape:
img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0]))
orb = cv2.ORB_create()
keypoints1, descriptors1 = orb.detectAndCompute(img1, None)
keypoints2, descriptors2 = orb.detectAndCompute(img2, None)
if descriptors1 is None or descriptors2 is None:
return False
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = bf.match(descriptors1, descriptors2)
matches = sorted(matches, key=lambda x: x.distance)
if len(matches) > 0:
match_ratio = len(matches) / max(len(keypoints1), len(keypoints2))
if match_ratio >= threshold:
print(f"Match Ratio: {match_ratio}")
return (img1_name, img2_name)
return False
def perceptual_hashing(img1_data, img2_data, img1_name, img2_name, threshold=4):
img1 = Image.open(BytesIO(img1_data))
img2 = Image.open(BytesIO(img2_data))
hash1 = imagehash.average_hash(img1)
hash2 = imagehash.average_hash(img2)
hamming_distance = hash1 - hash2
if hamming_distance <= threshold:
print(f"Hamming Distance: {hamming_distance}")
return (img1_name, img2_name)
return False
#图片转化灰度进行测试对比
def are_images_similar_gray(img1_data, img2_data, img1_name, img2_name, threshold=0.9):
try:
img1 = preprocess_image(img1_data)
img2 = preprocess_image(img2_data)
if img1 is None or img2 is None:
return False
# 确保两张图像的尺寸相同
if img1.shape != img2.shape:
img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0]))
# 计算像素差异
diff = cv2.absdiff(img1, img2)
diff_mean = diff.mean()
#print(f"Gray Difference Mean: {diff_mean}")
if diff_mean <= threshold:
print(f"Gray Difference Mean: {diff_mean}")
return (img1_name, img2_name)
return False
except Exception as e:
print(f"Error in gray comparison: {e}")
return False
#感知哈希值算法
def are_images_similar_perceptual_hash(img1_data, img2_data, img1_name, img2_name, threshold=5):
try:
hash1 = get_image_hash_0416(img1_data)
hash2 = get_image_hash_0416(img2_data)
hamming_distance = hash1 - hash2
if hamming_distance <= threshold:
print(f"Hamming Distance: {hamming_distance}")
return (img1_name, img2_name)
return False
except Exception as e:
print(f"Error in perceptual hash comparison: {e}")
return False
300张图片计算差异
差异值hash算法 平均耗时76s 感知hash值 平均耗时109s 特征匹配耗时 130s (以上三个精确度较高) gray灰度测试 50s SSIm 40s(灰度和SSIM的结果准确率较低)