actanble

导航

错误查漏记载

</pre><pre name="code" class="python">
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 21 11:12:44 2016

@author: 白于空
"""

#遗漏模型,3D彩票;记录每个号值得历史遗漏

import numpy as np
import pandas as pd
data = pd.read_csv('F:/2014.csv')
data = np.array(data.value)
data = data[np.arange(0,300)]
for i in np.arange(0,len(data)):
    data[i] = data[i].split(',')
    data[i] = np.array(data[i],dtype = 'int32')
#到这里,301个数字已经完全提取完毕;开始计算遗漏值
ylz = {}
for k in np.arange(0,9):
    ws = ylz[k] = [] #这里是起始空列表
    for j in np.arange(0,len(data)) :
        if k in data[j]:
            ws.append(1)
        else:
            ws.append(0)
'''
#对于0-1序列,返回其中首个1到最终添加1的间隔数;
例[0,1,0,0,1,1,0,0]目标返回[2,4,1,2];总长为len+1
[0,0,0,1,1]返回[4,1,1]
'''
ylxl = {}
for s in np.arange(9):
    ylxl[s] = []
    ylz[s].append(1)
    ycc = ylz[s]
    for so in np.arange(0,len(ycc)):
        if ycc[so]==1:
            ylxl[s].append(1)
            else:
                pass
zc = {}
for oi in np.arange(9):
    tu = np.array(ylxl[oi])
    zc[oi] = [tu[1:len(tu)] - tu[0:(len(tu)-1)]]
    zc[oi].insert(0,tu[0])
zc #记录各个值在3D彩票2014年中的历史遗漏序列
#开始记录每个值得历史遗漏,最大遗漏,平均遗漏,数学期望遗漏[]


----------------上面是数组预处理,设为模块zcp3d---------------------下面一直到调用knn分类chassify出现问题了;;数据集我已经采集好了,你帮我看看那个分类;;也可以把50-240改成30-300这样数据集就多了;;选一部分拿着训练,一部分检验,,看看准确率。这个只是[1]的;;如果分类成功了,跟我说下,百分比;;

import numpy as np
import zcp3d
'''
每一次的zcc和yxdl都得重新计算生成新的序列集;;50-240
'''
def sd(c):
    c = np.array(c)
    for i in np.arange(len(c)-1):
        sd = np.sum((c[i]-np.mean(c))**2)/len(c) 
    return sd
def train_set(p):
    ylzc = ylz[1]
    ylzp = ylzc[0:p]#检测项就是ylzc[p+1]了;这里不能直接替换。
    #开始对ylzc
    x_xsj = []
    for l in np.arange(p):
        if ylzp[l]==1:
            x_xsj.append(l)
        else:
            pass
    tu = np.array(x_xsj)
    jh_xl = [tu[1:(len(tu)-1)] - tu[0:(len(tu)-2)]]
    #实现在数组中首单元插入新元素
    zcc = [tu[0]]
    zcc.extend(jh_xl)
    zcc = np.array(zcc[1])
    train_x = [np.mean(zcc),np.max(zcc),sd(zcc),np.min(zcc),zcc[(len(zcc)-1)]]
    return train_x
train = []
lable = []
for ik in np.arange(50,240):
    train.append(train_set(ik))
    lable.append(ylzc[ik+1])
#train试验集合
#lable目标集合

def Classify(newInput, dataSet, labels, k):  
    numSamples = dataSet.shape[0] 
    diff = np.tile(newInput, (numSamples, 1)) - dataSet
    squaredDiff = diff ** 2 
    squaredDist = np.sum(squaredDiff, axis = 1)  
    distance = squaredDist ** 0.5  
    sortedDistIndices = np.argsort(distance)  
    classCount = {}   
    for i in np.arange(k):  
        voteLabel = labels[sortedDistIndices[i]]  
        classCount[voteLabel] = classCount.get(voteLabel, 0) + 1   
    for key, value in classCount.items():  
        if value > maxCount:  
            maxCount = value  
            maxIndex = key
    return maxIndex

t = 1 #t取与数据集相差t内的单位,t小于70
dataSet, labels = np.array(train), lable 
testX = train_set(240+t)
y_ = ylzc[240+t+1]
k = 9  
outputLabel = Classify(testX, dataSet, labels, k)  
print (outputLabel, y_)









        





posted on 2016-03-26 22:13  白于空  阅读(155)  评论(0编辑  收藏  举报