预处理算法_9_数据标准化
#!/usr/bin/env python # -*- coding:utf-8 -*- # <editable> def execute(): # <editable> ''' 载入模块 ''' from sklearn import preprocessing import pandas as pd from sqlalchemy import create_engine ''' 连接数据库 ''' engine = create_engine('mysql+pymysql://root:123123qwe@127.0.0.1:3306/analysis') ''' 选择目标数据 ''' params = { "columns": "score", "method": "0_scale", } inputs = {"table": 'test'} sql = 'select ' + params['columns'] + ' from ' + inputs['table'] data_in = pd.read_sql_query(sql, engine) print(data_in) ''' 标准化 ''' data_in = data_in.select_dtypes(include=['number']) # 筛选数值型数据 data_out = data_in if (params['method'] == '0_scale'): data_out = preprocessing.scale(data_in) else: data_out = preprocessing.minmax_scale(data_in) data_out = pd.DataFrame(data_out, columns=data_in.columns) ''' 将结果写出 ''' print(data_out) ''' 数据示例 score 0 80.0 1 20.0 2 NaN 3 5.0 4 4.0 5 20.0 score 0 1.937494 1 -0.207333 2 NaN 3 -0.743540 4 -0.779287 5 -0.207333 ''' # </editable> if __name__ == '__main__': execute()
作者:沐禹辰
出处:http://www.cnblogs.com/renfanzi/
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接。
出处:http://www.cnblogs.com/renfanzi/
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接。