使用Python递归比较两个文件夹下所有文件内容不同的文件

比较两份源代码文件异同

  • 使用Python递归比较两个文件夹下所有同名文件内容中存在不同的文件。
  • 这么做的应用场景是做代码审计的时候,通过比对两份相同项目的源代码,一份是破解的版本一份是正版的版本,比对破解的版本更改了哪些代码,从而挖掘后门代码。

具体代码如下:


import os
import filecmp
import difflib

# 定义目录路径
base_dir = "/home/viadmin/finddiffiles"
wordfencedaoban_dir = os.path.join(base_dir, "wordfencedaoban")
wordfencezhengban_dir = os.path.join(base_dir, "wordfencezhengban")

# 获取两个文件夹中的所有文件(排除图片文件)
def get_files(directory):
    files = []
    for root, _, filenames in os.walk(directory):
        for filename in filenames:
            if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
                files.append(os.path.join(root, filename))
    return files

# 获取文件列表
daoban_files = get_files(wordfencedaoban_dir)
zhengban_files = get_files(wordfencezhengban_dir)

# 打印获取的文件名称
print("Files in wordfencedaoban:")
for file in daoban_files:
    print(file)

print("\nFiles in wordfencezhengban:")
for file in zhengban_files:
    print(file)

# 比较文件内容
def compare_files(file1, file2):
    with open(file1, 'r') as f1, open(file2, 'r') as f2:
        lines1 = f1.readlines()
        lines2 = f2.readlines()
    
    diff = difflib.unified_diff(lines1, lines2, fromfile=file1, tofile=file2)
    return ''.join(diff)

# 记录差异文件
diff_files = []
for daoban_file in daoban_files:
    relative_path = os.path.relpath(daoban_file, wordfencedaoban_dir)
    zhengban_file = os.path.join(wordfencezhengban_dir, relative_path)
    
    if not os.path.exists(zhengban_file):
        diff_files.append((daoban_file, "Not found in zhengban"))
    elif not filecmp.cmp(daoban_file, zhengban_file, shallow=False):
        diff_content = compare_files(daoban_file, zhengban_file)
        diff_files.append((daoban_file, zhengban_file, diff_content))

# 输出结果到屏幕
for diff in diff_files:
    print(f"Difference found: {diff[0]} vs {diff[1]}")
    if len(diff) > 2:
        print(diff[2])
    print("-" * 80)

# 输出结果到文本文件
output_file = os.path.join(base_dir, "diff_results.txt")
with open(output_file, 'w') as f:
    for diff in diff_files:
        f.write(f"Difference found: {diff[0]} vs {diff[1]}\n")
        if len(diff) > 2:
            f.write(diff[2])
        f.write("-" * 80 + "\n")

print(f"Comparison completed. Results saved to {output_file}")

执行结果和方式

  • 执行过程就是将上述代码保存到类似compare.py文件,然后使用python3版本执行
  • 只要必要的库都安装了,执行是没有什么问题,这里为了测试,将遍历的文件名称打印出来了,可以根据实际情况注释掉!
posted @ 2024-09-12 10:57  皇帽讲绿帽带法技巧  阅读(69)  评论(0编辑  收藏  举报