预处理算法_8_异常值处理

 

#!/usr/bin/env python
# -*- coding:utf-8 -*-

# <editable>

def execute():
# <editable>

import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from sqlalchemy import create_engine
'''
连接数据库
'''
engine = create_engine('mysql+pymysql://root:123123qwe@127.0.0.1:3306/analysis')
params = {
"columns": "SUNACTIVITY",
"method": "Mean_interpolation"

}
inputs = {"table": '纯随机性检验'}
'''
选择目标数据
'''
data_sql = 'select ' + params['columns'] + ' from ' + inputs['table']
data_in = pd.read_sql_query(data_sql, engine)
data_in.iloc[1] = np.nan
print(data_in)
# data_name = db_utils.query(conn, 'select ' + params['columns'] + ' from ' + inputs['data_in'])
'''
找出缺失值
'''

def outRange(Ser1):
QL = Ser1.quantile(float(params['upper_quantile']))
QU = Ser1.quantile(float(params['lower_quantile']))
IQR = QU - QL
Ser1.loc[Ser1 > (QU + 1.5 * IQR)] = None
Ser1.loc[Ser1 < (QL - 1.5 * IQR)] = None
return Ser1

# names = data_name.columns
# for j in names:
# data_in[j] = outRange(data_in[j])

'''
对异常值处理
'''

'''
异常值处理方法:
删除异常值:drop
中位数插补:Median_interpolation
众数插补:Mode_interpolation
均值插补:Mean_interpolation
线性插值:slinear
多项式插值:polynomial
'''
if params['method'] == 'drop':
data_out = data_in.dropna()
elif params['method'] == 'Median_interpolation':
data_out = data_in.fillna(data_in.median())
elif params['method'] == 'Mode_interpolation':
data_out = data_in.fillna(data_in.mode())
elif params['method'] == 'slinear':
data_out = data_in.interpolate(method='slinear')
elif params['method'] == 'quadratic':
data_out = data_in.interpolate(method='quadratic')
elif params['method'] == 'Mean_interpolation':
data_out = data_in.fillna(data_in.mean())
else:
data_out = data_in.fillna(data_in.mean())

'''
将结果写出
'''

print(data_out)

# </editable>


if __name__ == '__main__':
execute()

 

posted @ 2021-03-03 19:54  我当道士那儿些年  阅读(181)  评论(0编辑  收藏  举报