话不多说 先上几张结果图

 

 

 

 

 

 

 

可以看到上述的几种算法的明显差异 ,首先从效率上来说 当图集数量大于100时 SSIM>Hash>Gray>特征结构匹配  

从结果精确度上 特征匹配>Hash>Gray>SSIM

def are_images_similar_ssim(img1_data, img2_data, img1_name, img2_name, threshold=0.85):
img1 = preprocess_image(img1_data)
img2 = preprocess_image(img2_data)

if img1 is None or img2 is None:
return False

# 确保两张图像的尺寸相同
if img1.shape != img2.shape:
img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0]))

similarity = ssim(img1, img2)
if similarity >= threshold:
print(f"SSIM: {similarity}")
return (img1_name, img2_name)
return False

def are_images_similar_psnr(img1_data, img2_data, img1_name, img2_name, threshold=9):
img1 = preprocess_image(img1_data)
img2 = preprocess_image(img2_data)

if img1 is None or img2 is None:
return False

# 确保两张图像的尺寸相同
if img1.shape != img2.shape:
img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0]))

psnr_value = psnr(img1, img2)
print(f"PSNR: {psnr_value}")
if psnr_value >= threshold:
print(f"符合PSNR: {psnr_value}")
return (img1_name, img2_name)
return False

def normalized_mutual_info(img1, img2):
hist_2d, x_edges, y_edges = np.histogram2d(img1.ravel(), img2.ravel(), bins=256)
pxy = hist_2d / float(np.sum(hist_2d))
px = np.sum(pxy, axis=1)
py = np.sum(pxy, axis=0)
px_py = px[:, None] * py[None, :]
nmi = entropy(pxy.ravel()) - entropy(px_py.ravel())
print(nmi)
return nmi

def are_images_similar_nmi(img1_data, img2_data, img1_name, img2_name, threshold=0.7):
img1 = preprocess_image(img1_data)
img2 = preprocess_image(img2_data)

if img1 is None or img2 is None:
return False

# 确保两张图像的尺寸相同
if img1.shape != img2.shape:
img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0]))

nmi_value = normalized_mutual_info(img1, img2)
print(f"NMI: {nmi_value}")
if nmi_value >= threshold:

return (img1_name, img2_name)
return False


# 比较特征结构进行相似对比 较为精准 耗时较长
def feature_matching(img1_data, img2_data, img1_name, img2_name, threshold=0.5):
img1 = cv2.imdecode(np.frombuffer(img1_data, np.uint8), cv2.IMREAD_GRAYSCALE)
img2 = cv2.imdecode(np.frombuffer(img2_data, np.uint8), cv2.IMREAD_GRAYSCALE)

if img1 is None or img2 is None:
return False

# 确保两张图像的尺寸相同
if img1.shape != img2.shape:
img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0]))

orb = cv2.ORB_create()
keypoints1, descriptors1 = orb.detectAndCompute(img1, None)
keypoints2, descriptors2 = orb.detectAndCompute(img2, None)

if descriptors1 is None or descriptors2 is None:
return False

bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = bf.match(descriptors1, descriptors2)
matches = sorted(matches, key=lambda x: x.distance)

if len(matches) > 0:
match_ratio = len(matches) / max(len(keypoints1), len(keypoints2))

if match_ratio >= threshold:
print(f"Match Ratio: {match_ratio}")
return (img1_name, img2_name)
return False

def perceptual_hashing(img1_data, img2_data, img1_name, img2_name, threshold=4):
img1 = Image.open(BytesIO(img1_data))
img2 = Image.open(BytesIO(img2_data))

hash1 = imagehash.average_hash(img1)
hash2 = imagehash.average_hash(img2)

hamming_distance = hash1 - hash2

if hamming_distance <= threshold:
print(f"Hamming Distance: {hamming_distance}")
return (img1_name, img2_name)
return False

#图片转化灰度进行测试对比
def are_images_similar_gray(img1_data, img2_data, img1_name, img2_name, threshold=0.9):
try:
img1 = preprocess_image(img1_data)
img2 = preprocess_image(img2_data)

if img1 is None or img2 is None:
return False

# 确保两张图像的尺寸相同
if img1.shape != img2.shape:
img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0]))

# 计算像素差异
diff = cv2.absdiff(img1, img2)
diff_mean = diff.mean()
#print(f"Gray Difference Mean: {diff_mean}")
if diff_mean <= threshold:
print(f"Gray Difference Mean: {diff_mean}")
return (img1_name, img2_name)
return False
except Exception as e:
print(f"Error in gray comparison: {e}")
return False


#感知哈希值算法
def are_images_similar_perceptual_hash(img1_data, img2_data, img1_name, img2_name, threshold=5):
try:
hash1 = get_image_hash_0416(img1_data)
hash2 = get_image_hash_0416(img2_data)

hamming_distance = hash1 - hash2
if hamming_distance <= threshold:
print(f"Hamming Distance: {hamming_distance}")
return (img1_name, img2_name)
return False
except Exception as e:
print(f"Error in perceptual hash comparison: {e}")
return False

 

 300张图片计算差异

 差异值hash算法 平均耗时76s  感知hash值 平均耗时109s   特征匹配耗时 130s (以上三个精确度较高)  gray灰度测试 50s  SSIm 40s(灰度和SSIM的结果准确率较低)