python pandas模块的基本使用
import pandas as pd df=pd.DataFrame({ "name":["Tom","Jerry","Tony","June"], "age":[16,42,38,22], "province":["A","B","C","D"], "country":["Amerian","Japan","Ruisa","Roman"] }) print(df) #获取所有列名 print(df.columns) #获取所有行名 print(df.index) #获取所有值 print(df.values) #查看顶部n行 print(df.head(2)) #查看尾部n行 print(df.tail(2)) #获取所有统计摘要 print(df.describe()) # count 4.000000 # mean 30.000000 # std 12.110601 # min 16.000000 # 25% 22.000000 # 50% 31.000000 # 75% 39.000000 # max 42.000000 #转置数据 print(df.T) #排序,按index df1=df.sort_index(ascending=False) print(df1) #排序,按value df2=df.sort_values(by="age",ascending=False) print(df2) df3=df.sort_values(by="province",ascending=True) print(df3) #获取单列 print(df["country"]) #获取多行 print(df[0:3]) #获取多列 df4=df.loc[:,["country","name"]] print(df4) #获取单行 df5=df.loc[2] print(df5) #获取多行多列 print(df.loc[[0,2],["country","name"]]) df6=pd.DataFrame({ "name":["PF","QC"], "age":[16,22], "province":["A","B"], "country":["Ruisa","Roman"] }) print(df6) #多数据合并concat df7=pd.concat([df,df6],ignore_index=True) print(df7) #去重 res=df7.duplicated("country") print(res) res1=df7.drop_duplicates("country") print(res1) print("--------------------") # df7.to_csv("lianxi.csv") df7.to_excel("lianxi.xlsx",sheet_name="Sname")
"C:\Program Files\Python37\python.exe" C:/Users/Administrator/Desktop/note/exer/lianxi.py name age province country 0 Tom 16 A Amerian 1 Jerry 42 B Japan 2 Tony 38 C Ruisa 3 June 22 D Roman Index(['name', 'age', 'province', 'country'], dtype='object') RangeIndex(start=0, stop=4, step=1) [['Tom' 16 'A' 'Amerian'] ['Jerry' 42 'B' 'Japan'] ['Tony' 38 'C' 'Ruisa'] ['June' 22 'D' 'Roman']] name age province country 0 Tom 16 A Amerian 1 Jerry 42 B Japan name age province country 2 Tony 38 C Ruisa 3 June 22 D Roman age count 4.000000 mean 29.500000 std 12.476645 min 16.000000 25% 20.500000 50% 30.000000 75% 39.000000 max 42.000000 0 1 2 3 name Tom Jerry Tony June age 16 42 38 22 province A B C D country Amerian Japan Ruisa Roman name age province country 3 June 22 D Roman 2 Tony 38 C Ruisa 1 Jerry 42 B Japan 0 Tom 16 A Amerian name age province country 1 Jerry 42 B Japan 2 Tony 38 C Ruisa 3 June 22 D Roman 0 Tom 16 A Amerian name age province country 0 Tom 16 A Amerian 1 Jerry 42 B Japan 2 Tony 38 C Ruisa 3 June 22 D Roman 0 Amerian 1 Japan 2 Ruisa 3 Roman Name: country, dtype: object name age province country 0 Tom 16 A Amerian 1 Jerry 42 B Japan 2 Tony 38 C Ruisa country name 0 Amerian Tom 1 Japan Jerry 2 Ruisa Tony 3 Roman June name Tony age 38 province C country Ruisa Name: 2, dtype: object country name 0 Amerian Tom 2 Ruisa Tony name age province country 0 PF 16 A Ruisa 1 QC 22 B Roman name age province country 0 Tom 16 A Amerian 1 Jerry 42 B Japan 2 Tony 38 C Ruisa 3 June 22 D Roman 4 PF 16 A Ruisa 5 QC 22 B Roman 0 False 1 False 2 False 3 False 4 True 5 True dtype: bool name age province country 0 Tom 16 A Amerian 1 Jerry 42 B Japan 2 Tony 38 C Ruisa 3 June 22 D Roman -------------------- Process finished with exit code 0