18.MongDB系列之批量更新写入Python版

在实际的工作中,难免批量更新的数量极大,pymongo提供了便捷的客户端供使用

假设读者对pandas比较熟悉,下图为事先准备好的dataframe

1

import pandas as pd
from pymongo import MongoClient
from bson.codec_options import CodecOptions
from retry import retry
import pytz
from pymongo import UpdateOne


class MongoDbClient:
    def __init__(self, uri):
        self.mongoClient = MongoClient(uri)

    @retry(tries=3, delay=5)
    def get_collection(self, db, collection):
        return self.mongoClient.get_database(db).get_collection(collection) \
            .with_options(codec_options=CodecOptions(tz_aware=True, tzinfo=pytz.timezone('Asia/Shanghai')))

// 获取mongo客户端    
MONGO_CLIENT = MongoDbClient("mongodb://username:password@127.0.0.1:27017/?connectTimeoutMS=60000"
                                  "&socketTimeoutMS=10000000")
// 获取具体的集合连接
township_mca_conn = MONGO_CLIENT.get_collection('study', 'township_mca')

actions = list()
count = 0
// UpdateOne更新, upsert=True表示文档不存在则进行插入
for index, item in df.iterrows():
    action = UpdateOne({'city_name': item['city_name'], 'area_name': item['area_name'], 'street_name': item['street_name']}, \
                             {'$set': {'city_code': item['city_code'], 'city_name': item['city_name'], 'area_code': item['area_code'], 'area_name': item['area_name'], \
                   'street_code': item['street_code'], 'street_name': item['street_name'], 'alias': []}}, upsert=True)
    actions.append(action)
    if len(actions) == 10000:
        // 每10000批次进行批量插入
        township_mca_conn.bulk_write(actions)
        count += 10000
        print(count)
        actions.clear()
if len(actions) > 0:
    township_mca_conn.bulk_write(actions)
    count += len(actions)
    print(count)
    actions.clear()          

image.png

欢迎关注公众号算法小生沈健的技术博客

posted @ 2022-10-18 20:59  算法小生  阅读(72)  评论(0编辑  收藏  举报