python统计分析-主成份分析
#!/usr/bin/env python # -*- coding:utf-8 -*- # <editable> def execute(): # <editable> ''' 载入模块 ''' from sklearn.decomposition import PCA import numpy as np import pandas as pd from sqlalchemy import create_engine ''' 连接数据库 ''' engine = create_engine('mysql+pymysql://root:123123qwe@127.0.0.1:3306/analysis') ''' 选择目标数据 ''' # 生成数据 params = { "columns": "SUNACTIVITY", "n_components": 1, # } inputs = {"table": '纯随机性检验'} data_sql = 'select ' + params['columns'] + ' from ' + inputs['table'] data_in = pd.read_sql_query(data_sql, engine) print(data_in) ''' 主成分分析 ''' data_in = data_in.select_dtypes(include=['number']) # 筛选数值型数据 n_samples, n_features = data_in.shape if not 1 <= int(params['n_components']) <= n_features: raise ValueError("\n降维后的维数为%r,该值必须要在[1,%r]之间." % (int(params['n_components']), n_features)) pca_model = PCA(n_components=int(params['n_components'])) pca_model.fit(data_in) print(pca_model.explained_variance_ratio_) print(pca_model.explained_variance_) # 执行降维 data_out = pca_model.transform(data_in) columns = list(range(1, int(params['n_components']) + 1)) columns = ['comp_' + str(i) for i in columns] data_out = pd.DataFrame(data_out, columns=columns) data_out = np.around(data_out, decimals=4) ''' 将结果写出 ''' print(data_out) ''' 数据示例 SUNACTIVITY 0 5.0 1 11.0 2 16.0 3 23.0 4 36.0 5 40.4 6 29.8 7 15.2 8 7.5 9 2.9 10 83.4 11 47.7 12 47.8 13 30.7 14 12.2 15 40.4 16 29.8 17 15.2 18 7.5 19 2.9 20 12.6 [1.] [394.82661905] comp_1 0 -19.619 1 -13.619 2 -8.619 3 -1.619 4 11.381 5 15.781 6 5.181 7 -9.419 8 -17.119 9 -21.719 10 58.781 11 23.081 12 23.181 13 6.081 14 -12.419 15 15.781 16 5.181 17 -9.419 18 -17.119 19 -21.719 20 -12.019 ''' # </editable> if __name__ == '__main__': execute()
作者:沐禹辰
出处:http://www.cnblogs.com/renfanzi/
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接。
出处:http://www.cnblogs.com/renfanzi/
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接。