拉格朗日插值法

复制代码
# -*- coding: utf-8 -*-
import pandas as pd
from scipy.interpolate import lagrange
inputfile='F:\\python数据挖掘\\chapter6\\chapter6\\demo\\data\\missing_data.xls'
outputfile='F:\\python数据挖掘\\chapter6\\chapter6\\demo\\tmp\\missing_data_sale.xls'
data=pd.read_excel(inputfile,header=None)
#data[u'销量'][(data[u'销量']<400)|(data[u'销量']>5000)]=None
def ployinterp(s,n,k=5):
    y=s[list(range(n-k,n))+list(range(n+1,n+1+k))]
    print(y)
    y=y[y.notnull()]
    return lagrange(y.index,list(y))(n)
for i in data.columns:
    for j in range(len(data)):
        if(data[i].isnull())[j]:
            data[i][j]=ployinterp(data[i],j)
            print(data[i][j])
data.to_excel(outputfile,header=None,index=False)
复制代码

 

posted @   你若精彩,蝴蝶自来  阅读(794)  评论(1编辑  收藏  举报
努力加载评论中...
点击右上角即可分享
微信分享提示