Pandas入门之三:DataFrame
已信任 Jupyter 服务器: 本地 Python 3: Idle [2] import pandas as pd import numpy as np [-] [3] # pd.DataFrame(data,index,columns,dtype) # 创建空的DataFrame df = pd.DataFrame() df [4] # 通过列表创建 data = [1,2,3,4,5,6] df = pd.DataFrame(data) df 0 0 1 1 2 2 3 3 4 4 5 5 6 [5] # 2列数据:名字,年龄 data = [['xiaoming', 10],['xiaochen',13]] df = pd.DataFrame(data, columns=['username','age']) df username age 0 xiaoming 10 1 xiaochen 13 [6] # 字典创建 data ={ 'username':['小黑','小白','小刘'], 'income':[1000,2000,3000] } df = pd.DataFrame(data,index=[1,2,3]) df username income 1 小黑 1000 2 小白 2000 3 小刘 3000 [7] d = { 'one':pd.Series([1,2,3],index=['a','b','c']), 'two':pd.Series([1,2,3,4],index=['a','b','c','d']) } df = pd.DataFrame(d) df one two a 1.0 1 b 2.0 2 c 3.0 3 d NaN 4 [8] df['one']# 获取1列的方式,通过列名 a 1.0 b 2.0 c 3.0 d NaN Name: one, dtype: float64 [9] # 增加列 df['three'] = pd.Series([4,5,6],index=['a','b','c']) df one two three a 1.0 1 4.0 b 2.0 2 5.0 c 3.0 3 6.0 d NaN 4 NaN [10] df['four'] = df['one']+df['three'] df one two three four a 1.0 1 4.0 5.0 b 2.0 2 5.0 7.0 c 3.0 3 6.0 9.0 d NaN 4 NaN NaN [11] # 删除列 del df['four'] df one two three a 1.0 1 4.0 b 2.0 2 5.0 c 3.0 3 6.0 d NaN 4 NaN [12] df.pop('two') df one three a 1.0 4.0 b 2.0 5.0 c 3.0 6.0 d NaN NaN [13] # 标签选择行 df.loc['a'] one 1.0 three 4.0 Name: a, dtype: float64 [16] # 通过顺序选择行 df.iloc[1]# 选择第2行 one 2.0 three 5.0 Name: b, dtype: float64 [17] # 切片,选择行 df[0:2] one three a 1.0 4.0 b 2.0 5.0 [18] df one three a 1.0 4.0 b 2.0 5.0 c 3.0 6.0 d NaN NaN [19] # 添加行 df2 = pd.DataFrame([[2,5], [5,6]],columns=['one','three']) df2 one three 0 2 5 1 5 6 [22] df = df.append(df2) df one three a 1.0 4.0 b 2.0 5.0 c 3.0 6.0 d NaN NaN 0 2.0 5.0 1 5.0 6.0 0 2.0 5.0 1 5.0 6.0 [23] # 删除行 df.drop(0) one three a 1.0 4.0 b 2.0 5.0 c 3.0 6.0 d NaN NaN 1 5.0 6.0 1 5.0 6.0 [-]