python查询MongoDB数据库中重复数据的标题和数量
from urllib.parse import quote_plus
import pymongo
# 创建MongoClient实例,连接到MongoDB
connection_string = f''
client = pymongo.MongoClient(connection_string)
db = client['spider']
collection = db['inner_books']
# 定义聚合管道
pipeline = [
{
'$group': {
'_id': '$title', # 按照title字段分组
'count': {'$sum': 1} # 计算每个title出现的次数
}
},
{
'$match': {
'count': {'$gt': 1} # 筛选出出现次数大于1的title
}
}
]
# 执行聚合查询
results = collection.aggregate(pipeline)
# 输出重复数据的标题和数量
for result in results:
print(f"标题: {result['_id']}, 数量: {result['count']}")
# 如果需要统计总的重复数据数量
total_duplicates = sum(result['count'] for result in results)
print(f"总的重复数据数量: {total_duplicates}")