错误查漏记载
</pre><pre name="code" class="python">
# -*- coding: utf-8 -*- """ Created on Mon Mar 21 11:12:44 2016 @author: 白于空 """ #遗漏模型,3D彩票;记录每个号值得历史遗漏 import numpy as np import pandas as pd data = pd.read_csv('F:/2014.csv') data = np.array(data.value) data = data[np.arange(0,300)] for i in np.arange(0,len(data)): data[i] = data[i].split(',') data[i] = np.array(data[i],dtype = 'int32') #到这里,301个数字已经完全提取完毕;开始计算遗漏值 ylz = {} for k in np.arange(0,9): ws = ylz[k] = [] #这里是起始空列表 for j in np.arange(0,len(data)) : if k in data[j]: ws.append(1) else: ws.append(0) ''' #对于0-1序列,返回其中首个1到最终添加1的间隔数; 例[0,1,0,0,1,1,0,0]目标返回[2,4,1,2];总长为len+1 [0,0,0,1,1]返回[4,1,1] ''' ylxl = {} for s in np.arange(9): ylxl[s] = [] ylz[s].append(1) ycc = ylz[s] for so in np.arange(0,len(ycc)): if ycc[so]==1: ylxl[s].append(1) else: pass zc = {} for oi in np.arange(9): tu = np.array(ylxl[oi]) zc[oi] = [tu[1:len(tu)] - tu[0:(len(tu)-1)]] zc[oi].insert(0,tu[0]) zc #记录各个值在3D彩票2014年中的历史遗漏序列 #开始记录每个值得历史遗漏,最大遗漏,平均遗漏,数学期望遗漏[]
----------------上面是数组预处理,设为模块zcp3d---------------------下面一直到调用knn分类chassify出现问题了;;数据集我已经采集好了,你帮我看看那个分类;;也可以把50-240改成30-300这样数据集就多了;;选一部分拿着训练,一部分检验,,看看准确率。这个只是[1]的;;如果分类成功了,跟我说下,百分比;;
import numpy as np import zcp3d ''' 每一次的zcc和yxdl都得重新计算生成新的序列集;;50-240 ''' def sd(c): c = np.array(c) for i in np.arange(len(c)-1): sd = np.sum((c[i]-np.mean(c))**2)/len(c) return sd def train_set(p): ylzc = ylz[1] ylzp = ylzc[0:p]#检测项就是ylzc[p+1]了;这里不能直接替换。 #开始对ylzc x_xsj = [] for l in np.arange(p): if ylzp[l]==1: x_xsj.append(l) else: pass tu = np.array(x_xsj) jh_xl = [tu[1:(len(tu)-1)] - tu[0:(len(tu)-2)]] #实现在数组中首单元插入新元素 zcc = [tu[0]] zcc.extend(jh_xl) zcc = np.array(zcc[1]) train_x = [np.mean(zcc),np.max(zcc),sd(zcc),np.min(zcc),zcc[(len(zcc)-1)]] return train_x train = [] lable = [] for ik in np.arange(50,240): train.append(train_set(ik)) lable.append(ylzc[ik+1]) #train试验集合 #lable目标集合 def Classify(newInput, dataSet, labels, k): numSamples = dataSet.shape[0] diff = np.tile(newInput, (numSamples, 1)) - dataSet squaredDiff = diff ** 2 squaredDist = np.sum(squaredDiff, axis = 1) distance = squaredDist ** 0.5 sortedDistIndices = np.argsort(distance) classCount = {} for i in np.arange(k): voteLabel = labels[sortedDistIndices[i]] classCount[voteLabel] = classCount.get(voteLabel, 0) + 1 for key, value in classCount.items(): if value > maxCount: maxCount = value maxIndex = key return maxIndex t = 1 #t取与数据集相差t内的单位,t小于70 dataSet, labels = np.array(train), lable testX = train_set(240+t) y_ = ylzc[240+t+1] k = 9 outputLabel = Classify(testX, dataSet, labels, k) print (outputLabel, y_)