Pandas-DataFrame
一、Pandas-DataFrame特点
二、特征练习
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | # 从列表嵌套字典创建DataFrame data = [{ 'a' : 1 , 'b' : 2 },{ 'a' : 5 , 'b' : 10 , 'c' : 20 }] df = pd.DataFrame(data) print (df) print ( '-----------' ) # 从字典创建DataFrame data = { 'Name' :[ 'Tom' , 'Jack' , 'Steve' , 'Ricky' ], 'Age' :[ 28 , 34 , 29 , 42 ]} df = pd.DataFrame(data, index = [ 's1' , 's2' , 's3' , 's4' ]) print (df) print ( '-----------' ) # 从字典创建DataFrame data = { 'one' : pd.Series([ 1 , 2 , 3 ], index = [ 'a' , 'b' , 'c' ]), 'two' : pd.Series([ 1 , 2 , 3 , 4 ], index = [ 'a' , 'b' , 'c' , 'd' ])} df = pd.DataFrame(data) print (df) print ( '-----------' ) #从数组中读取数据 data = [[ 'zhaosan' , 12 ],[ 'lisi' , 15 ],[ 'wangwu' , 19 ]] df = pd.DataFrame(data,columns = [ 'name' , 'age' ]) print (df) print ( '-----------' ) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | a b c 0 1 2 NaN 1 5 10 20.0 - - - - - - - - - - - Name Age s1 Tom 28 s2 Jack 34 s3 Steve 29 s4 Ricky 42 - - - - - - - - - - - one two a 1.0 1 b 2.0 2 c 3.0 3 d NaN 4 - - - - - - - - - - - name age 0 zhaosan 12 1 lisi 15 2 wangwu 19 - - - - - - - - - - - <br><br>执行结果 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | data = { 'Name' : [ 'Alice' , 'Bob' , 'Charlie' , 'David' ], 'Age' : [ 25 , 30 , 35 , 40 ], 'City' : [ 'New York' , 'Los Angeles' , 'Chicago' , 'Houston' ] } df = pd.DataFrame(data) # 查看前两行数据 print (df.head( 2 )) print ( "-----------------" ) # 查看 DataFrame 的基本信息 print (df.info()) print ( "-----------------" ) # 获取描述统计信息 print (df.describe()) print ( "-----------------" ) # 按年龄排序 df_sorted = df.sort_values(by = 'Age' , ascending = False ) print (df_sorted) print ( "-----------------" ) # 选择指定列 print (df[[ 'Name' , 'Age' ]]) print ( "-----------------" ) # 按索引选择行 print (df.iloc[ 1 : 3 ]) # 选择第二到第三行(按位置) print ( "-----------------" ) # 按标签选择行 print (df.loc[: 2 ]) # 选择第二到第三行(按标签) print ( "-----------------" ) # 计算分组统计(按城市分组,计算平均年龄) print (df.groupby( 'City' )[ 'Age' ].mean()) print ( "-----------------" ) # 处理缺失值(填充缺失值) df.loc[ 2 , 'Age' ] = None print (df) print ( "-----------------" ) df[ 'Age' ] = df[ 'Age' ].fillna( 30 ) print (df) print ( "-----------------" ) # 导出为 CSV 文件 df.to_csv( 'output.csv' , index = False ) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | 执行结果: Name Age City 0 Alice 25 New York 1 Bob 30 Los Angeles - - - - - - - - - - - - - - - - - < class 'pandas.core.frame.DataFrame' > RangeIndex: 4 entries, 0 to 3 Data columns (total 3 columns): # Column Non-Null Count Dtype - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 Name 4 non - null object 1 Age 4 non - null int64 2 City 4 non - null object dtypes: int64( 1 ), object ( 2 ) memory usage: 224.0 + bytes None - - - - - - - - - - - - - - - - - Age count 4.000000 mean 32.500000 std 6.454972 min 25.000000 25 % 28.750000 50 % 32.500000 75 % 36.250000 max 40.000000 - - - - - - - - - - - - - - - - - Name Age City 3 David 40 Houston 2 Charlie 35 Chicago 1 Bob 30 Los Angeles 0 Alice 25 New York - - - - - - - - - - - - - - - - - Name Age 0 Alice 25 1 Bob 30 2 Charlie 35 3 David 40 - - - - - - - - - - - - - - - - - Name Age City 1 Bob 30 Los Angeles 2 Charlie 35 Chicago - - - - - - - - - - - - - - - - - Name Age City 0 Alice 25 New York 1 Bob 30 Los Angeles 2 Charlie 35 Chicago - - - - - - - - - - - - - - - - - City Chicago 35.0 Houston 40.0 Los Angeles 30.0 New York 25.0 Name: Age, dtype: float64 - - - - - - - - - - - - - - - - - Name Age City 0 Alice 25.0 New York 1 Bob 30.0 Los Angeles 2 Charlie NaN Chicago 3 David 40.0 Houston - - - - - - - - - - - - - - - - - Name Age City 0 Alice 25.0 New York 1 Bob 30.0 Los Angeles 2 Charlie 30.0 Chicago 3 David 40.0 Houston - - - - - - - - - - - - - - - - - |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· winform 绘制太阳,地球,月球 运作规律
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人