pandas分页读取亿级大数据csv文件

`

reader = pd.read_csv('/home/eric/data.csv', iterator=True)
loop = True
chunkSize = 10000000
chunks = []
while loop:
    try:
        chunk = reader.get_chunk(chunkSize)
        chunks.append(chunk)
    except StopIteration:
        loop = False
        print("Iteration is stopped.")
df = pd.concat(chunks, ignore_index=True)
sum = len(df)
print("rows:{0}".format(sum))

page = 1  # 页码数
limit = 100  # 每页的数据量
total = math.ceil(sum / limit)
for i in range(1, total):
    page = i
    df2 = df[(int(page) - 1) * int(limit): (int(page) * int(limit))]
    print("page:{0}".format(page))

`

posted @ 2021-07-19 15:37  Eric小星  阅读(484)  评论(0编辑  收藏  举报