数据归一化
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
data = np.arange(36)
data = data.reshape(6,6)
data = pd.DataFrame(data)
data
|
0 |
1 |
2 |
3 |
4 |
5 |
0 |
0 |
1 |
2 |
3 |
4 |
5 |
1 |
6 |
7 |
8 |
9 |
10 |
11 |
2 |
12 |
13 |
14 |
15 |
16 |
17 |
3 |
18 |
19 |
20 |
21 |
22 |
23 |
4 |
24 |
25 |
26 |
27 |
28 |
29 |
5 |
30 |
31 |
32 |
33 |
34 |
35 |
sclar = MinMaxScaler(feature_range=(0,1)).fit(data)
result = sclar.transform(data)
result
array([[0. , 0. , 0. , 0. , 0. , 0. ],
[0.2, 0.2, 0.2, 0.2, 0.2, 0.2],
[0.4, 0.4, 0.4, 0.4, 0.4, 0.4],
[0.6, 0.6, 0.6, 0.6, 0.6, 0.6],
[0.8, 0.8, 0.8, 0.8, 0.8, 0.8],
[1. , 1. , 1. , 1. , 1. , 1. ]])
sclar_ = sclar.inverse_transform(result)
sclar_
array([[ 0., 1., 2., 3., 4., 5.],
[ 6., 7., 8., 9., 10., 11.],
[12., 13., 14., 15., 16., 17.],
[18., 19., 20., 21., 22., 23.],
[24., 25., 26., 27., 28., 29.],
[30., 31., 32., 33., 34., 35.]])
使用 np 实现数据归一化
data = np.arange(36)
data = data.reshape(6,6)
data = pd.DataFrame(data)
data
|
0 |
1 |
2 |
3 |
4 |
5 |
0 |
0 |
1 |
2 |
3 |
4 |
5 |
1 |
6 |
7 |
8 |
9 |
10 |
11 |
2 |
12 |
13 |
14 |
15 |
16 |
17 |
3 |
18 |
19 |
20 |
21 |
22 |
23 |
4 |
24 |
25 |
26 |
27 |
28 |
29 |
5 |
30 |
31 |
32 |
33 |
34 |
35 |
x_nor = (data - data.min(axis=0)) / (data.max(axis=0) - data.min(axis=0))
x_nor
|
0 |
1 |
2 |
3 |
4 |
5 |
0 |
0.0 |
0.0 |
0.0 |
0.0 |
0.0 |
0.0 |
1 |
0.2 |
0.2 |
0.2 |
0.2 |
0.2 |
0.2 |
2 |
0.4 |
0.4 |
0.4 |
0.4 |
0.4 |
0.4 |
3 |
0.6 |
0.6 |
0.6 |
0.6 |
0.6 |
0.6 |
4 |
0.8 |
0.8 |
0.8 |
0.8 |
0.8 |
0.8 |
5 |
1.0 |
1.0 |
1.0 |
1.0 |
1.0 |
1.0 |
x_nor_inverse = x_nor * (data.max(axis=0) - data.min(axis=0)) + data.min(axis=0)
x_nor_inverse
|
0 |
1 |
2 |
3 |
4 |
5 |
0 |
0.0 |
1.0 |
2.0 |
3.0 |
4.0 |
5.0 |
1 |
6.0 |
7.0 |
8.0 |
9.0 |
10.0 |
11.0 |
2 |
12.0 |
13.0 |
14.0 |
15.0 |
16.0 |
17.0 |
3 |
18.0 |
19.0 |
20.0 |
21.0 |
22.0 |
23.0 |
4 |
24.0 |
25.0 |
26.0 |
27.0 |
28.0 |
29.0 |
5 |
30.0 |
31.0 |
32.0 |
33.0 |
34.0 |
35.0 |
数据标准化
data = np.arange(36)
data = data.reshape(6,6)
data = pd.DataFrame(data)
data
|
0 |
1 |
2 |
3 |
4 |
5 |
0 |
0 |
1 |
2 |
3 |
4 |
5 |
1 |
6 |
7 |
8 |
9 |
10 |
11 |
2 |
12 |
13 |
14 |
15 |
16 |
17 |
3 |
18 |
19 |
20 |
21 |
22 |
23 |
4 |
24 |
25 |
26 |
27 |
28 |
29 |
5 |
30 |
31 |
32 |
33 |
34 |
35 |
from sklearn.preprocessing import StandardScaler
std = StandardScaler().fit(data)
result1 = std.transform(data)
result1
array([[-1.46385011, -1.46385011, -1.46385011, -1.46385011, -1.46385011,
-1.46385011],
[-0.87831007, -0.87831007, -0.87831007, -0.87831007, -0.87831007,
-0.87831007],
[-0.29277002, -0.29277002, -0.29277002, -0.29277002, -0.29277002,
-0.29277002],
[ 0.29277002, 0.29277002, 0.29277002, 0.29277002, 0.29277002,
0.29277002],
[ 0.87831007, 0.87831007, 0.87831007, 0.87831007, 0.87831007,
0.87831007],
[ 1.46385011, 1.46385011, 1.46385011, 1.46385011, 1.46385011,
1.46385011]])
result1.mean()
-3.700743415417188e-17
result1.var()
1.0000000000000002
std.inverse_transform(result1)
array([[ 0., 1., 2., 3., 4., 5.],
[ 6., 7., 8., 9., 10., 11.],
[12., 13., 14., 15., 16., 17.],
[18., 19., 20., 21., 22., 23.],
[24., 25., 26., 27., 28., 29.],
[30., 31., 32., 33., 34., 35.]])