python DataFrame数据格式常用操作(备份)
啊
# 读取前 import pandas as pd # 读取时 a=pd.read_csv("d:/data/111.csv",encoding="GBK")#读gbk编码文件 b=pd.read_csv("d:/data/222.csv")#读utf8的csv文件 c = pd.read_csv(path + 'dat/import.csv', converters={'id'=str})#设置某列的格式 # 读取后 dfname.columns=['kind', 'd1-1', 'd1-2', 'd1-3'] #修改列名 dat=dat.drop('colname',axis=1) #删除某一列 x1=x.iloc[[12,13,19,21],[1,2,4,5,6,8,9,11,12,14,15,18,19,20,21,22]]# 按照位置选择(数据筛选) result=pd.DataFrame.drop_duplicates(data1,keep='first',inplace=False) #过滤重复值 result = a.append([b,c,d,e,f,g,h,i,j,k,l,m], ignore_index=False) #行拼接(类似R的rbind) df1_nona = df1.dropna()#删除含有缺失值的行 df1_nona_cp.reset_index(drop=True, inplace=True) #重置目录 #删除某些列含有负数的行 df1_nona_cp[df1_nona_cp.iloc[:,2:]<0]=np.nan df1_nona_cp = df1_nona_cp.dropna() #分组 df1_gp=df1.groupby('nyr') #将df1按照nyr字段分组 df1_gp_count=df1_gp.count() #计算每个字段的分组计数结果,并把结果赋给df1_gp_count #0-1化 df1_nona_cp['colmn'] = df1_nona_cp['colm'].apply(lambda x:x[:4]) df1_nona_cp = df1_nona_cp.drop(['colm'],axis=1) af = colm[['y','colmn']] dumm = pd.get_dummies(af['colmn'], prefix='colmn') dumm = dumm[['colmn_1','colmn_3','colmn_8']] #before this need to test which value is useful df1_nona_cp=df1_nona_cp.join(dumm)
del dumm,af
啊
以上为全部内容■■■