pandas read 系列 之 如何读取较大的数据
使用chunksize 参数 函数返回iterator可迭代对象
data = pd.read_sql(
"""
select * from youtable
""", db.session.bind, chunksize=20)
lis = []
for chunk in data:
lis.append(chunk)
chunk.to_dict("records")
data = pd.concat(lis, ignore_index=True)
源码
if chunksize is not None:
return self._query_iterator(
cursor,
chunksize,
columns,
index_col=index_col,
coerce_float=coerce_float,
parse_dates=parse_dates,
)
def _query_iterator(
cursor, chunksize, columns, index_col=None, coerce_float=True, parse_dates=None
):
"""Return generator through chunked result set"""
while True:
data = cursor.fetchmany(chunksize)
if type(data) == tuple:
data = list(data)
if not data:
cursor.close()
break
else:
yield _wrap_result(
data,
columns,
index_col=index_col,
coerce_float=coerce_float,
parse_dates=parse_dates,
)
本质上是执行的cursor.fetchmany方法