pandas分页读取亿级大数据csv文件
`
reader = pd.read_csv('/home/eric/data.csv', iterator=True)
loop = True
chunkSize = 10000000
chunks = []
while loop:
try:
chunk = reader.get_chunk(chunkSize)
chunks.append(chunk)
except StopIteration:
loop = False
print("Iteration is stopped.")
df = pd.concat(chunks, ignore_index=True)
sum = len(df)
print("rows:{0}".format(sum))
page = 1 # 页码数
limit = 100 # 每页的数据量
total = math.ceil(sum / limit)
for i in range(1, total):
page = i
df2 = df[(int(page) - 1) * int(limit): (int(page) * int(limit))]
print("page:{0}".format(page))
`