python将redis中set类型的大数据,写入到parquet文件中

import json

import pandas as pd

from crawler.db.redis_client import redis_cli

file2 = "2.parquet"
set_key = "agi_laion_parquet_update"
# df = pq.read_pandas(file2).to_pandas()
# print(df)

# 定义列表存储读取的元素
elements = []
batch_size = 1000
cursor = 0


def gen_part_data(data_list):
    for i in data_list:
        item = json.loads(i)
        redis_cli.srem(set_key, i)
        yield item


# 循环读取所有元素
while True:
    cursor, batch = redis_cli.sscan(set_key, cursor=cursor, count=batch_size)
    elements.extend(gen_part_data(batch))
    if cursor == 0:
        break
df = pd.DataFrame.from_dict(elements)
df.to_parquet(file2)
posted @ 2023-03-13 17:54  公众号python学习开发  阅读(27)  评论(0编辑  收藏  举报