一、dataframe的增加操作

 1 import pandas as pd
 2 
 3 # 加载数据
 4 users = pd.read_excel("./users.xlsx")
 5 print("users:\n", users)
 6 print("users 的类型：\n", type(users))
 7 print("users 的列索引名称：\n", users.columns)
 8 print("*" * 100)
 9 # 获取age列
10 print("获取age:\n", users.loc[:, "age"])
11 
12 # 增加 next_year_age
13 # users.loc[:, "next_year_age"] = 18
14 users.loc[:, "next_year_age"] = users.loc[:, "age"] + 1
15 
16 print(users)

二、dataframe的删除操作

 1 import pandas as pd
 2 
 3 # 加载数据
 4 info = pd.read_csv("./meal_order_info.csv", encoding="ansi")
 5 # print("info:\n", info)
 6 print("info 的列索引：\n", info.columns)
 7 
 8 # 删除数据
 9 # drop
10 # 可以删除行，也可以删除列
11 # labels ---指定要删除的行或者列的名称
12 # axis ---按行的方向删除  还是按列的方向删除
13 # inplace  = True ，对原来df产生影响，没有返回值
14 # inplace = False  对原来的df不产生影响，会返回一个删除之后的结果，供我们查看
15 
16 # 删除 "org_id", "phone" 两列
17 res = info.drop(labels=["org_id", "phone"], axis=1, inplace=True)
18 # res = info.drop(labels=["org_id", "phone"], axis=1, inplace=False)
19 print(info.shape)
20 print(res)
21 
22 # 删除指定的行
23 info.drop(labels=[942, 943, 944],axis=0,inplace=True)
24 print(info.shape)

三、dataframe的修改操作

 1 import pandas as pd
 2 
 3 # 加载数据
 4 users = pd.read_excel("./users.xlsx")
 5 print("users:\n", users)
 6 print("users 的类型：\n", type(users))
 7 print("users 的列索引名称：\n", users.columns)
 8 print("*" * 100)
 9 
10 print("users[age]: ", users["age"][:5])
11 print("users[age]: ", users.loc[:5, "age"])
12 
13 # 修改数据 ---不太切合实际
14 # users.loc[:, "age"] = 18
15 
16 # 修改的时候，需要 涉及到一些条件，满足这个条件 才进行修改
17 
18 # age 年龄是偶数的年龄 改为 18岁
19 
20 # 利用bool数组进行设置条件
21 bool_index = users.loc[:, "age"] % 2 == 0
22 print(bool_index)
23 users.loc[bool_index, "age"] = 18
24 #
25 # print("users:\n",users)
26 
27 
28 # 将sex 列里面 为男 的所有数据 改为 女
29 bool_index = users.loc[:,"sex"] == "男"
30 
31 users.loc[bool_index,"sex"] = "女"
32 
33 print("users:\n",users)
34 
35 # 将 arithmetic_name  里面的值为关联规则  改为呵呵哒
36 bool_index = users.loc[:, "arithmetic_name"] == "关联规则"
37 
38 users.loc[bool_index, "arithmetic_name"] = "呵呵哒"
39 
40 print("users:\n",users)

四、dataframe的查询操作

 1 import pandas as pd
 2 
 3 # 1、加载数据
 4 detail = pd.read_excel("./meal_order_detail.xlsx")
 5 print("detail :\n", detail)
 6 print("detail 的类型:\n", type(detail))
 7 print("detail 列索引名称:\n", detail.columns)
 8 
 9 index = ["index_" + str(i) for i in range(detail.shape[0])]
10 print("index:\n", index)
11 # 给detail 重新设置行索引
12 detail.index = index
13 
14 # 获取元素---直接获取
15 # 数组 arr[行,列] ---行列同时索引
16 
17 # dataframe 直接获取元素 先列后行，这种是先后索引，不是同时索引
18 # 获取 dishes_name  这一列数据
19 print("获取单列数据：\n",detail["dishes_name"])
20 # 获取 dishes_name 与 dishes_id 与 order_id  这三列数据
21 # 获取多列 数据，需要传  df[[列1，列2，列3，...]]
22 print("获取多列数据：\n",detail[["dishes_name","dishes_id","order_id"]])
23 
24 
25 # 单列的数据----series
26 # 获取 dishes_name 这一列数据的 前10行
27 print("获取单列数据：\n",detail["dishes_name"][:10])
28 # head  默认获取前5行，可以传参，来获取指定的前n行
29 print("获取单列数据：\n",detail["dishes_name"].head(10))
30 # 也可以使用先单列 之后，再进行使用行名称列表来获取指定的行
31 print("获取单列数据：\n",detail["dishes_name"][["index_0","index_1","index_2"]])
32 # 也可以使用先单列 之后，再进行使用行下标列表来获取指定的行
33 print("获取单列数据：\n",detail["dishes_name"][[0,1,2,3]])
34 # 获取 dishes_name 这一列数据的 后10行
35 print("获取单列数据：\n", detail["dishes_name"][-10:])
36 # tail 默认获取后5行，也可以传参，来获取指定的后n行
37 print("获取单列数据：\n", detail["dishes_name"].tail(10))
38 
39 
40 ## 获取 dishes_name 与 dishes_id 与 order_id  这三列数据的前10行
41 print("获取多列数据：\n",detail[["dishes_name","dishes_id","order_id"]][:10])
42 print("获取多列数据：\n",detail[["dishes_name","dishes_id","order_id"]].head(10))
43 # dataframe 直接获取元素，只能先列，后行
44 # print("获取多列数据：\n",detail[["dishes_name","dishes_id","order_id"]][["index_0","index_1","index_2"]]) # 错误的
45 # print("获取多列数据：\n",detail[["dishes_name","dishes_id","order_id"]][[0,1,2,3,4]]) # 错误的
46 
47 
48 ## 获取 dishes_name 与 dishes_id 与 order_id  这三列数据的后10行
49 print("获取多列数据：\n", detail[["dishes_name", "dishes_id", "order_id"]][-10:])
50 print("获取多列数据：\n", detail[["dishes_name", "dishes_id", "order_id"]].tail(10))
51 
52 
53 # loc   同时索引的时候，只能使用名称
54 # df.loc[行名称,列名称]
55 # 使用loc 获取 dishes_name 与 dishes_id 与 order_id  这三列数据
56 print("使用loc获取多列数据：\n",detail.loc[:,["dishes_name","dishes_id","order_id"]] )
57 # 使用loc 获取 dishes_name 与 dishes_id 与 order_id  这三列数据的指定行
58 print("使用loc获取多列数据：\n",detail.loc[["index_0","index_1","index_2"],["dishes_name","dishes_id","order_id"]] )
59 # 可以使用名称切片，名称切片的时候，首尾都包含
60 print("使用loc获取多列数据：\n",detail.loc["index_0":"index_2",["dishes_name","dishes_id","order_id"]] )
61 # print("使用loc获取多列数据：\n", detail.loc[0:2, ["dishes_name", "dishes_id", "order_id"]]) # 错误的 # loc的时候不能使用下标
62 
63 
64 # iloc  同时索引的时候，只能使用下标
65 # df.iloc[行下标,列下标]
66 # 使用iloc 获取 dishes_name 与 dishes_id 与 order_id  这三列数据
67 print("使用iloc获取多列数据：\n",detail.iloc[:,[5,2,1]] )
68 # 都使用下标列表
69 print("使用iloc获取多列数据：\n",detail.iloc[[0,1,2],[5,2,1]] )
70 # 可以使用下标切片
71 print("使用iloc获取多列数据：\n",detail.iloc[0:3,[5,2,1]] )
72 # print("使用iloc获取多列数据：\n",detail.iloc["index_0":"index_2",[5,2,1]] ) # 错误的，iloc 不能使用名称
73 
74 
75 # ix  混合索引
76 # 名称和下标可以同时使用
77 # 行---名称，列 --可以是名称，也可以是下标
78 # 行--下标，列 --可以是名称，也可以是下标
79 # 使用ix 获取 dishes_name 与 dishes_id 与 order_id  这三列数据 的指定行数据
80 print("使用ix获取多列数据：\n", detail.ix[0:3, ["dishes_name", "dishes_id", "order_id"]])
81 print("使用ix获取多列数据：\n", detail.ix[["index_0", "index_1", "index_2"], [5, 2, 1]])
82 
83 
84 # print("使用ix获取多列数据：\n", detail.ix["index_0":5, [5, 2, 1]]) # 错误的 #  不能这样的混搭
85 
86 
87 # ix最为强大，但是效率最慢
88 # 直接获取方式 ---最快，但是大部风平台 不使用，我们不推荐
89 # loc 与iloc 效率适中， 我们推荐使用

发表于 2019-12-29 19:09 可西可彻阅读(532) 评论(0) 收藏举报

【数据分析&数据挖掘】dataframe的增删改查

一、dataframe的增加操作

二、dataframe的删除操作

三、dataframe的修改操作

四、dataframe的查询操作

公告