预处理算法_6_缺失值处理

 

#!/usr/bin/env python
# -*- coding:utf-8 -*-


def execute():
    params = {"method": '', "columns": "score", "value": 20}
    inputs = {"table": 'test'}
    # <editable>
    '''
    载入模块
    '''
    import pandas as pd
    import numpy as np
    from sqlalchemy import create_engine
    '''
    选择目标数据
    '''
    engine = create_engine('mysql+pymysql://root:123123qwe@127.0.0.1:3306/analysis')
    sql = 'select ' + params['columns'] + ' from ' + inputs['table']
    data_in = pd.read_sql_query(sql, engine)

    '''
    判断输入数据是否为数值类型
    '''

    def is_number(s):
        try:  # 如果能运行float(s)语句,返回True(字符串s是浮点数)
            float(s)
            return True
        except ValueError:  # ValueError为Python的一种标准异常,表示"传入无效的参数"
            pass  # 如果引发了ValueError这种异常,不做任何事情(pass:不做任何事情,一般用做占位语句)
        try:
            import unicodedata  # 处理ASCii码的包
            unicodedata.numeric(s)  # 把一个表示数字的字符串转换为浮点数返回的函数
            return True
        except (TypeError, ValueError):
            pass
        return False

    '''
    缺失值处理
    '''
    if params['method'] == 'drop':
        data_out = data_in.dropna()
    elif params['method'] == 'Median_interpolation':
        data_out = data_in.fillna(data_in.median())
    elif params['method'] == 'Mode_interpolation':
        data_out = data_in.fillna(data_in.mode())
    elif params['method'] == 'slinear':
        data_out = data_in.interpolate(method='slinear')
    elif params['method'] == 'quadratic':
        data_out = data_in.interpolate(method='quadratic')
    elif params['method'] == 'polynomial':
        data_out = data_in.fillna(data_in.mean())
    else:
        if is_number(params['value']):
            data_out = data_in.fillna(float(params['value']))
        else:
            data_out = data_in.fillna(params['value'])

    '''
    将结果写出
    '''
    print(data_out)
    # </editable>


if __name__ == '__main__':
    execute()

 

posted @ 2021-03-03 19:52  我当道士那儿些年  阅读(94)  评论(0编辑  收藏  举报