预处理算法_8_异常值处理
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# <editable>
def execute():
# <editable>
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from sqlalchemy import create_engine
'''
连接数据库
'''
engine = create_engine('mysql+pymysql://root:123123qwe@127.0.0.1:3306/analysis')
params = {
"columns": "SUNACTIVITY",
"method": "Mean_interpolation"
}
inputs = {"table": '纯随机性检验'}
'''
选择目标数据
'''
data_sql = 'select ' + params['columns'] + ' from ' + inputs['table']
data_in = pd.read_sql_query(data_sql, engine)
data_in.iloc[1] = np.nan
print(data_in)
# data_name = db_utils.query(conn, 'select ' + params['columns'] + ' from ' + inputs['data_in'])
'''
找出缺失值
'''
def outRange(Ser1):
QL = Ser1.quantile(float(params['upper_quantile']))
QU = Ser1.quantile(float(params['lower_quantile']))
IQR = QU - QL
Ser1.loc[Ser1 > (QU + 1.5 * IQR)] = None
Ser1.loc[Ser1 < (QL - 1.5 * IQR)] = None
return Ser1
# names = data_name.columns
# for j in names:
# data_in[j] = outRange(data_in[j])
'''
对异常值处理
'''
'''
异常值处理方法:
删除异常值:drop
中位数插补:Median_interpolation
众数插补:Mode_interpolation
均值插补:Mean_interpolation
线性插值:slinear
多项式插值:polynomial
'''
if params['method'] == 'drop':
data_out = data_in.dropna()
elif params['method'] == 'Median_interpolation':
data_out = data_in.fillna(data_in.median())
elif params['method'] == 'Mode_interpolation':
data_out = data_in.fillna(data_in.mode())
elif params['method'] == 'slinear':
data_out = data_in.interpolate(method='slinear')
elif params['method'] == 'quadratic':
data_out = data_in.interpolate(method='quadratic')
elif params['method'] == 'Mean_interpolation':
data_out = data_in.fillna(data_in.mean())
else:
data_out = data_in.fillna(data_in.mean())
'''
将结果写出
'''
print(data_out)
# </editable>
if __name__ == '__main__':
execute()
作者:沐禹辰
出处:http://www.cnblogs.com/renfanzi/
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接。
出处:http://www.cnblogs.com/renfanzi/
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接。