预处理算法_12_分组聚合
#!/usr/bin/env python # -*- coding:utf-8 -*- # <editable> def execute(): # <editable> ''' 载入模块 ''' import pandas as pd from sqlalchemy import create_engine ''' 连接数据库 ''' engine = create_engine('mysql+pymysql://root:123123qwe@127.0.0.1:3306/analysis') ''' 选择目标数据 ''' params = { "features": "score as fea", "label": 'score', "method": "count", } inputs = {"table": 'test'} sql = 'select ' + params['features'] + ',' + params['label'] + ' from ' + inputs['table'] data_in = pd.read_sql_query(sql, engine) data_in = data_in.fillna(float(20)) print(data_in) ''' 分组聚合 用法: obj.groupby(‘key’) obj.groupby([‘key1’,’key2’]) ''' b = params['label'].split(',') if params['method'] == 'count': data_out = data_in.groupby(b).count().reset_index() elif params['method'] == 'max': data_out = data_in.groupby(b).max().reset_index() elif params['method'] == 'mean': data_out = data_in.groupby(b).mean().reset_index() elif params['method'] == 'median': data_out = data_in.groupby(b).median().reset_index() elif params['method'] == 'size': data_out = data_in.groupby(b).size().reset_index() elif params['method'] == 'min': data_out = data_in.groupby(b).min().reset_index() elif params['method'] == 'std': data_out = data_in.groupby(b).std().reset_index() else: data_out = data_in.groupby(b).sum().reset_index() ''' 将结果写出 ''' print(data_out) ''' 数据示例 fea score 0 80.0 80.0 1 20.0 20.0 2 20.0 20.0 3 5.0 5.0 4 4.0 4.0 5 20.0 20.0 score fea 0 4.0 1 1 5.0 1 2 20.0 3 3 80.0 1 ''' # </editable> if __name__ == '__main__': execute()
作者:沐禹辰
出处:http://www.cnblogs.com/renfanzi/
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接。
出处:http://www.cnblogs.com/renfanzi/
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接。