Pandas笔记:数据离散化(one-hot)
import pandas as pd data = pd.Series([176, 174, 160, 180, 159, 163, 192, 184], index=["No1:176", "No2:174", "No3:160", "No4:180", "No5:159", "No6:163", "No7:192", "No8:184"]) print(data) str = pd.qcut(data, 3) print() # 自动分组 print(pd.get_dummies(str, prefix="height")) # one-hot # 自定义 bins = [150, 165, 180, 195] str = pd.cut(data, bins) print(str) print(str.value_counts()) print(pd.get_dummies(str, prefix="身高"))
No1:176 176 No2:174 174 No3:160 160 No4:180 180 No5:159 159 No6:163 163 No7:192 192 No8:184 184 dtype: int64 height_(158.999, 166.667] ... height_(178.667, 192.0] No1:176 0 ... 0 No2:174 0 ... 0 No3:160 1 ... 0 No4:180 0 ... 1 No5:159 1 ... 0 No6:163 1 ... 0 No7:192 0 ... 1 No8:184 0 ... 1 [8 rows x 3 columns] No1:176 (165, 180] No2:174 (165, 180] No3:160 (150, 165] No4:180 (165, 180] No5:159 (150, 165] No6:163 (150, 165] No7:192 (180, 195] No8:184 (180, 195] dtype: category Categories (3, interval[int64]): [(150, 165] < (165, 180] < (180, 195]] (165, 180] 3 (150, 165] 3 (180, 195] 2 dtype: int64 身高_(150, 165] 身高_(165, 180] 身高_(180, 195] No1:176 0 1 0 No2:174 0 1 0 No3:160 1 0 0 No4:180 0 1 0 No5:159 1 0 0 No6:163 1 0 0 No7:192 0 0 1 No8:184 0 0 1 Process finished with exit code 0