pandas之filter
数据准备
import pandas as pd
product_info = {
"订单号": [ "2951110000099262111", "2181910000909928191", "2194560000121355545", "1194560000121311126", "1483160000121315483"],
"数量": [92, 61, 66, 33, 15],
"价格(USD)": [230, 122, 150, 190, 200],
"状态": ["Not Delivered", "Not Delivered", "Not Delivered", "Not Shipped", "Not Delivered"],
"订单日期": ["2022-02-12", "2022-03-02", "2022-01-22", "2022-02-22", "2022-02-15"],
"订单编号": ["444111", "444122", "444132", "444003", "444244"],
}
df = pd.DataFrame(product_info, index=['one', 'two', 'three', 'four', 'five'])
print(df)
'''
订单号 数量 价格(USD) 状态 订单日期 订单编号
one 2951110000099262111 92 230 Not Delivered 2022-02-12 444111
two 2181910000909928191 61 122 Not Delivered 2022-03-02 444122
three 2194560000121355545 66 150 Not Delivered 2022-01-22 444132
four 1194560000121311126 33 190 Not Shipped 2022-02-22 444003
five 1483160000121315483 15 200 Not Delivered 2022-02-15 444244
'''
方法介绍
按columns过滤
全列名过滤
# 过滤出列名为“订单号”和“订单编号”列
res_df = df.filter(items=["订单号", '订单编号'])
print(res_df)
'''
订单号 订单编号
one 2951110000099262111 444111
two 2181910000909928191 444122
three 2194560000121355545 444132
four 1194560000121311126 444003
five 1483160000121315483 444244
'''
模糊过滤
# 过滤出列名中包含“订单”的所有列
res_df = df.filter(like='订单', axis=1)
print(res_df)
'''
订单号 订单日期 订单编号
one 2951110000099262111 2022-02-12 444111
two 2181910000909928191 2022-03-02 444122
three 2194560000121355545 2022-01-22 444132
four 1194560000121311126 2022-02-22 444003
five 1483160000121315483 2022-02-15 444244
'''
正则过滤
# 过滤出以"订单"开头的所有列
res_df = df.filter(regex='^订单', axis=1)
print(res_df)
'''
订单号 订单日期 订单编号
one 2951110000099262111 2022-02-12 444111
two 2181910000909928191 2022-03-02 444122
three 2194560000121355545 2022-01-22 444132
four 1194560000121311126 2022-02-22 444003
five 1483160000121315483 2022-02-15 444244
'''
按index过滤
全名称过滤
# 过滤出index为"one", "four"的行
res_df = df.filter(items=["one", 'four'], axis=0)
print(res_df)
'''
订单号 数量 价格(USD) 状态 订单日期 订单编号
one 2951110000099262111 92 230 Not Delivered 2022-02-12 444111
four 1194560000121311126 33 190 Not Shipped 2022-02-22 444003
'''
模糊过滤
# 过滤出index包含"e"的所有行
res_df = df.filter(like='e', axis=0)
print(res_df)
'''
订单号 数量 价格(USD) 状态 订单日期 订单编号
one 2951110000099262111 92 230 Not Delivered 2022-02-12 444111
three 2194560000121355545 66 150 Not Delivered 2022-01-22 444132
five 1483160000121315483 15 200 Not Delivered 2022-02-15 444244
'''
正则过滤
# 过滤出以"t"开头的所有行
res_df = df.filter(regex='^t', axis=0)
print(res_df)
'''
订单号 数量 价格(USD) 状态 订单日期 订单编号
two 2181910000909928191 61 122 Not Delivered 2022-03-02 444122
three 2194560000121355545 66 150 Not Delivered 2022-01-22 444132
'''
多条件过滤
# 过滤出以"价格"开头或名称为"订单号"的所有列
res_df = df.filter(regex='^价格|订单号', axis=1)
print(res_df)
'''
订单号 价格(USD)
0 295111000262111 230
1 218191000928191 122
2 21945600355545 150
3 11945600311126 190
4 14831600315483 200
'''
# 过滤出以"t"开头或以"f"开头的所有行
res_df = df.filter(regex='^t|^f', axis=0)
print(res_df)
'''
订单号 数量 价格(USD) 状态 订单日期 订单编号
two 2181910000909928191 61 122 Not Delivered 2022-03-02 444122
three 2194560000121355545 66 150 Not Delivered 2022-01-22 444132
four 1194560000121311126 33 190 Not Shipped 2022-02-22 444003
five 1483160000121315483 15 200 Not Delivered 2022-02-15 444244
'''
本文来自博客园,仅供参考学习,如有不当之处还望不吝赐教,不胜感激!转载请注明原文链接:https://www.cnblogs.com/rong-z/p/17610672.html
作者:cnblogs用户