数据转换

'''
    转换数据
'''
import pandas as pd

df = pd.DataFrame([
    ['green', 'M', '10.2', 'class1'],
    ['red', 'L', '13.5', 'class2'],
    ['blue', 'XL', '15.3', 'class1'],
])
df.columns = ['color', 'size', 'prize', 'class label']
print(df)
size_mapping = {
    'XL': 3,
    'L': 2,
    'M': 1
}
df['size'] = df['size'].map(size_mapping)
print(df)

class_mapping = {label: idx for idx, label in enumerate(set(df['class label']))}
df['class label'] = df['class label'].map(class_mapping)
print(df)
result = pd.get_dummies(df)
print(result)

输出结果:
   color size prize class label
0  green    M  10.2      class1
1    red    L  13.5      class2
2   blue   XL  15.3      class1
   color  size prize class label
0  green     1  10.2      class1
1    red     2  13.5      class2
2   blue     3  15.3      class1
   color  size prize  class label
0  green     1  10.2            1
1    red     2  13.5            0
2   blue     3  15.3            1
   size  class label  color_blue  ...  prize_10.2  prize_13.5  prize_15.3
0     1            1           0  ...           1           0           0
1     2            0           0  ...           0           1           0
2     3            1           1  ...           0           0           1

 

'''
    转换数据----连续数据离散化
'''
import pandas as pd
import matplotlib.pyplot as mp

ages = [20, 22, 25, 27, 21, 23, 37, 31, 61, 45, 41, 32]
# 有一组人员年龄数据,希望将这些数据划分为'18-25','25-35','35-60','60以上'几个部分
bins = [0, 25, 35, 60, 100]
cut_1 = pd.cut(ages, bins)
print(cut_1)
data = pd.value_counts(cut_1)
data.plot(kind='bar', rot=30)
mp.show()

输出结果:
[(0, 25], (0, 25], (0, 25], (25, 35], (0, 25], ..., (25, 35], (60, 100], (35, 60], (35, 60], (25, 35]]
Length: 12
Categories (4, interval[int64]): [(0, 25] < (25, 35] < (35, 60] < (60, 100]]

posted @ 2019-08-02 13:59  一如年少模样  阅读(199)  评论(0编辑  收藏  举报