Relief 过滤式特征选择
给定训练集{(x1,y1),(x2,y2).....(xm,ym)} ,对每个示例xi,Relief在xi的同类样本中寻找其最近邻xi,nh(猜中近邻),再从xi的异类样本中寻找其最近邻xi,nm(猜错近邻)
代码如下:
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Wed Feb 28 20:16:09 2018 @author: jzc """ import numpy as np import csv from random import randrange from sklearn import preprocessing #抽样次数 m=8 def Compute_Distance_Discrete(diff_distance): # 使用欧几里得距离计算最近邻 counter = np.power(diff_distance,2) counter = np.sum(counter) counter = np.sqrt(counter) return counter def loadcsv(filename): """载入文件""" lines = csv.reader(open(filename,'r')) data = list(lines) for i in range(1,len(data)): data[i] = [float(x) for x in data[i]] result = np.array(data[1:]) features = result[:,1:-1] labels = result[:,-1] return features,labels def Relief(features,labels): #初始化 (n_samples,n_features)=np.shape(features) distance = np.zeros((n_samples,n_samples)) weights = np.zeros(n_features) nearHit= list() nearMiss= list() distance_sort=list() """寻找每个样本的距离""" for i in range(0,n_samples): for j in range(0,n_samples): diff_distance = features[i]-features[j] if i==j: distance[i,j]=99999 else: distance[i,j] = Compute_Distance_Discrete(diff_distance) for i in range(0,m): one_sample = randrange(0,n_samples,1) #随机选择一个样本 one_feature = features[one_sample] for index in range(n_samples): distance_sort.append([distance[one_sample,index],index,labels[index]]) #从小到大排序 distance_sort.sort(key = lambda x:x[0]) """寻找样本的猜错近邻和猜中近邻""" for index in range(n_samples): if nearHit ==[] and distance_sort[index][2]==labels[one_sample]: nearHit = features[distance_sort[index][1]] elif nearMiss==[] and distance_sort[index][2]!=labels[one_sample]: nearMiss = features[distance_sort[index][1]] elif nearHit!=[] and nearMiss!=[]: break; else: continue; sum_nh = list() sum_nm =list() # 若属性j离散,Xaj==Xbj 则diff的值为0;否则为1 for k in range(len(one_feature[:-2])): if one_feature[k] != nearHit[k]: sum_nh.append(1) else: sum_nh.append(0) if one_feature[k] != nearMiss[k]: sum_nm.append(1) else: sum_nm.append(0) #print sum_nh,sum_nm #print one_feature[-2:]-nearHit[-2:] """若为属性j为连续, diff(Xaj-Xbj)=|Xaj-Xbj| 并且Xaj,Xbj要归一化到[0,1]区间""" weights[-2:] = weights[-2:]-np.power(one_feature[-2:]-nearHit[-2:],2) +np.power(one_feature[-2:]-nearMiss[-2:],2) weights[:-2] = weights[:-2]-np.power(sum_nh,2)+np.power(sum_nm,2) #print weights/n_samples return weights/n_samples filename = '/Users/jzc/DeepLearning(7.8-)/data/watermelon3_0.csv' features,labels = loadcsv(filename) #features[-2:] = preprocessing.normalize(features[-2:],norm='l2') #print features for x in range(1,10): result = Relief(features,labels) print result #print features[0],labels[0]
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· go语言实现终端里的倒计时
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 使用C#创建一个MCP客户端
· ollama系列1:轻松3步本地部署deepseek,普通电脑可用
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· 按钮权限的设计及实现