Python 使用 Vaex 处理海量数据
Vaex :https://vaex.io/docs/examples.html
Examples — vaex 4.3.0 documentation
使用中的问题
#coding:utf-8 import python_utils import vaex from vaex import groupby,grids,utils,legacy,selections import numpy as np import pandas as pa from pandas import Series,DataFrame # df = vaex.open("C:\\Users\\Anchnet\\Desktop\\ttt\\aa.csv_chunk_0..hdf5") # df_p = pa.read_csv("C:\\Users\\Anchnet\\Desktop\\ttt\\aa.csv") # print(df_p.count()) # # print(df_p) # print(pa.get_versions()) df = vaex.read_csv("C:\\Users\\Anchnet\\Desktop\\ttt\\aa.csv") # assert isinstance(df, vaex.groupby) # # print(df) # df_goup= df.sort # assert isinstance(df, vaex.groupby) # print( df_goup) # print(df) # df # a= vgroup["企业名称"] # df.groupby(df["k"]) df_a= df[df["e"] =="化纤针织内裤"] print(df.select(df["a"]=="义乌市智洋商品采购有限公司")) df_s= df.sort('e', ascending=False) # type: vaex.dataframe.DataFrameLocal # print(type(df)) print(df_s.count()) # assert isinstance(df, vaex.dataframe.DataFrameLocal) # print(type(df)) print(df_a) # dv_group = df_s.groupby(df_s['e'], agg=vaex.agg.sum(df_s['i'])) dv_group = df.groupby(df['i'], agg=vaex.agg.count(df['i'])) print( dv_group)
print(type(df))
# type: vaex.dataframe.DataFrameLocal
可以代码补全啦!!!!!