python的dataFrame操作方法
dataFrame的数据看上去有点像Excel。下面记录它的操作方法,以备用
# -*- coding:utf-8 -*-
'''
pandas 使用演示
'''
import pandas as pd
l1 = list()
#tprice代表总价
d10={"date":"2021-01-03","tprice":10000}
d11={"date":"2021-01-04","tprice":10200}
l1.append(d10)
l1.append(d11)
pd1 = pd.DataFrame(l1,columns = ['date', 'tprice'])
print(pd1)
l2 = list()
#pnumber代表商品数量
d20={"date":"2021-01-03","pnumber":500}
d21={"date":"2021-01-04","pnumber":425}
l2.append(d20)
l2.append(d21)
pd2 = pd.DataFrame(l2,columns = ['date', 'pnumber'])
print(pd2)
#把两个dataFrame合并成一个新的dataFrame
pd_merge = pd.merge(pd1, pd2, left_on="date", right_on="date")
print(pd_merge)
#一个dataFrame的行数
i_rows = pd_merge.shape[0]
print("pd_merge 行数:%r " % (i_rows))
#一个dataFrame的列数
i_cols = pd_merge.shape[1]
print("pd_merge 列数:%r" % (i_cols))
#判断一个dataFrame是否为空
if pd_merge.empty:
print("pd_merge 为空")
else:
print("pd_merge 不为空")
print("pd_merge: %r" % (pd_merge))
pd_merge = pd_merge.eval("avg = tprice / pnumber")
#新增一列avg,代表商品的平均价格
print("新增一列后")
print("pd_merge: %r" % (pd_merge))
#删除两列tprice、pnumber
pd_merge.drop(["tprice", "pnumber"], axis=1, inplace=True)
print("删除tprice、pnumber两列后")
print("pd_merge: %r" % (pd_merge))
#按avg列降序排序
pd_merge.sort_values(by='avg', axis=0, ascending=False, inplace=True)
print("按avg降序排序后")
print("pd_merge: %r" % (pd_merge))
#修改dataFrame的数据
for i in range(i_rows):
#平均数+10后保留2位小数
pd_merge.loc[i, 'avg'] = round(pd_merge.loc[i, 'avg']+10, 2)
print("修改dataFrame的数据")
print("pd_merge: %r" % (pd_merge))
#对一个dataFrame进行操作,删除原数据上的第0、1行的数据
pd_merge.drop([0,1],inplace=True)
if pd_merge.empty:
print("pd_merge 为空")
else:
print("pd_merge 不为空")
print("pd_merge: %r" % (pd_merge))
'''
下面演示一个学生成绩表的例子
'''
df_student = pd.DataFrame([['Snow','M',22],['Tyrion','M',32],['Sansa','F',18],['Arya','F',14]], columns=['name','gender','age'])
print("====学生成绩表====")
print(df_student)
#增加一列成绩要跟原来行数一样
df_student['score']=[80,98,67,90]
print("增加一列score:")
print(df_student)
#向dataframe中插入一行
df_student.loc[4] = ["LiLei","M",25,100]
print("某位置增加一行:")
print(df_student)
#最前面插入一行
df_student.loc[-1] = ["Jim","M",26,99]
df_student.index = df_student.index + 1 # shifting index
df_student = df_student.sort_index() # sorting by index
print("最前面插入一行:")
print(df_student)
#以第2列为界限拆出来两个dataFrame(原dataFrame不会改变)
df_student1 = df_student.iloc[:, :2]
print("df_student1:")
print(df_student1)
df_student2 = df_student.iloc[:, 2:]
print("df_student2:")
print(df_student2)
#把姓名这一列单独拿出来
df_student3 = df_student.name.to_frame()
print("df_student3:")
print(df_student3)
#把一个dataFrame转成字符串string
str_student3 = str(df_student3)
print("把一个dataFrame转成字符串string")
print(type(str_student3))
print(str_student3)
#使用Series插入一行
df = pd.DataFrame(columns=['a', 'b'])
add_data = pd.Series({'a': 100, 'b': 1})
# ignore_index=True不能少
df = df.append(add_data, ignore_index=True)
print("使用Series插入一行")
print(df)
#下面是运行结果
'''
date tprice
0 2021-01-03 10000
1 2021-01-04 10200
date pnumber
0 2021-01-03 500
1 2021-01-04 425
date tprice pnumber
0 2021-01-03 10000 500
1 2021-01-04 10200 425
pd_merge 行数:2
pd_merge 列数:3
pd_merge 不为空pd_merge:
date tprice pnumber
0 2021-01-03 10000 500
1 2021-01-04 10200 425
新增一列后pd_merge:
date tprice pnumber avg
0 2021-01-03 10000 500 20.0
1 2021-01-04 10200 425 24.0
删除tprice、pnumber两列后pd_merge:
date avg
0 2021-01-03 20.0
1 2021-01-04 24.0
按avg降序排序后pd_merge:
date avg
1 2021-01-04 24.0
0 2021-01-03 20.0
修改dataFrame的数据pd_merge:
date avg
1 2021-01-04 34.0
0 2021-01-03 30.0
pd_merge 为空
====学生成绩表====
name gender age
0 Snow M 22
1 Tyrion M 32
2 Sansa F 18
3 Arya F 14
增加一列score:
name gender age score
0 Snow M 22 80
1 Tyrion M 32 98
2 Sansa F 18 67
3 Arya F 14 90
某位置增加一行:
name gender age score
0 Snow M 22 80
1 Tyrion M 32 98
2 Sansa F 18 67
3 Arya F 14 90
4 LiLei M 25 100
最前面插入一行:
name gender age score
0 Jim M 26 99
1 Snow M 22 80
2 Tyrion M 32 98
3 Sansa F 18 67
4 Arya F 14 90
5 LiLei M 25 100
df_student1:
name gender
0 Jim M
1 Snow M
2 Tyrion M
3 Sansa F
4 Arya F
5 LiLei M
df_student2:
age score
0 26 99
1 22 80
2 32 98
3 18 67
4 14 90
5 25 100
df_student3:
name
0 Jim
1 Snow
2 Tyrion
3 Sansa
4 Arya
5 LiLei
把一个dataFrame转成字符串string
<class 'str'>
name
0 Jim
1 Snow
2 Tyrion
3 Sansa
4 Arya
5 LiLei
使用Series插入一行
a b
0 100 1
'''