2018.03.27 pandas duplicated 和 replace 使用
1 #.duplicated / .replace 2 import numpy as np 3 import pandas as pd 4 s = pd.Series([1,1,1,1,1,2,3,3,3,4,4,5,6,6]) 5 print(s) 6 print(s.duplicated())#True表示重复 得到布尔型 7 print(s[s.duplicated() == False])# 8 #通过布尔类型的判断来得到不重复的值 9 10 s_re = s.drop_duplicates()#直接去除重复值 11 #inplace=True 表示直接修改原来的值 12 print(s_re) 13 print('------') 14 15 #DataFrame测试 16 df = pd.DataFrame({'key1':['a','a',3,4,5], 17 'key2':['a','a','b','b','c']}) 18 print(df) 19 print('---------------------') 20 print(df.duplicated()) 21 print('---------------------') 22 print(df.drop_duplicates())
结果:
0 1 1 1 2 1 3 1 4 1 5 2 6 3 7 3 8 3 9 4 10 4 11 5 12 6 13 6 dtype: int64 0 False 1 True 2 True 3 True 4 True 5 False 6 False 7 True 8 True 9 False 10 True 11 False 12 False 13 True dtype: bool 0 1 5 2 6 3 9 4 11 5 12 6 dtype: int64 0 1 5 2 6 3 9 4 11 5 12 6 dtype: int64 ------ key1 key2 0 a a 1 a a 2 3 b 3 4 b 4 5 c --------------------- 0 False 1 True 2 False 3 False 4 False dtype: bool --------------------- key1 key2 0 a a 2 3 b 3 4 b 4 5 c
1 #.replace() 2 s = pd.Series(list('aaabbbcdd')) 3 print(s) 4 print(s.replace('a',np.nan)) 5 print(s.replace(['a','d'],np.nan)) 6 print(s.replace({'a':'Hello','d':'World'}))
结果:
0 a 1 a 2 a 3 b 4 b 5 b 6 c 7 d 8 d dtype: object 0 NaN 1 NaN 2 NaN 3 b 4 b 5 b 6 c 7 d 8 d dtype: object 0 NaN 1 NaN 2 NaN 3 b 4 b 5 b 6 c 7 NaN 8 NaN dtype: object 0 Hello 1 Hello 2 Hello 3 b 4 b 5 b 6 c 7 World 8 World dtype: object