import math
from math import log
import numpy as np
import operator
import csv
def loaddata():
dataSet = [[0, 0, 0, 0, 0, 0, 'yes'],
[1, 0, 1, 0, 0, 0, 'yes'],
[1, 0, 0, 0, 0, 0, 'yes'],
[0, 0, 1, 0, 0, 0, 'yes'],
[2, 0, 0, 0, 0, 0, 'yes'],
[0, 1, 0, 0, 1, 1, 'yes'],
[1, 1, 0, 1, 1, 1, 'yes'],
[1, 1, 0, 0, 1, 0, 'yes'],
[1, 1, 1, 1, 1, 0, 'no'],
[0, 2, 2, 0, 2, 1, 'no'],
[2, 2, 2, 2, 2, 0, 'no'],
[2, 0, 0, 2, 2, 1, 'no'],
[0, 1, 0, 1, 0, 0, 'no'],
[2, 1, 1, 1, 0, 0, 'no'],
[1, 1, 0, 0, 1, 1, 'no'],
[2, 0, 0, 2, 2, 0, 'no'],
[0, 0, 1, 1, 1, 0, 'no']]
feature_name = ['a1', 'a2', 'a3', 'a4', 'a5', 'a6']
return dataSet, feature_name
def entropy(dataSet):
m = len(dataSet)
labelCounts = {}
for featVec in dataSet:
currentLabel = featVec[-1]
if currentLabel not in labelCounts.keys():
labelCounts[currentLabel] = 0
labelCounts[currentLabel] += 1
i = 0
s = ['a', 'b', 'c']
for E_key in labelCounts.keys():
s[i] = E_key
i = i + 1
a = labelCounts[s[0]]
b = labelCounts[s[1]]
p1 = a / m
p2 = b / m
e = 0.0
e = -1 * (p1 * math.log2(p1) + p2 * math.log2(p2))
return e
def splitDataSet(dataSet, axis, value):
retDataSet = []
for featVec in dataSet:
if featVec[axis] == value:
reduceFeatVec = featVec[:axis]
reduceFeatVec.extend(featVec[axis + 1:])
retDataSet.append(reduceFeatVec)
return retDataSet
def calcEnt(dataSet):
countDataSet = len(dataSet)
labelCounts = {}
for featVec in dataSet:
currentLabel = featVec[-1]
if currentLabel not in labelCounts.keys():
labelCounts[currentLabel] = 0
labelCounts[currentLabel] += 1
Ent = 0.0
for key in labelCounts:
prob = float(labelCounts[key]) / countDataSet
Ent -= prob * log(prob, 2)
return Ent
def chooseBestFeature(dataSet):
n = len(dataSet[0]) - 1
ll = len(dataSet)
baseEntropy = entropy(dataSet)
bestInfoGain = 0.0
bestFeature = -1
for i in range(n):
featList = [example[i] for example in dataSet]
uniqueVals = set(featList)
newEntropy = 0.0
res = 0
for value in uniqueVals:
subDataSet = splitDataSet(dataSet, i, value)
len1 = len(subDataSet) - 1
prob = len(subDataSet) / float(len(dataSet))
newEntropy += prob * calcEnt(subDataSet)
infoGain = baseEntropy - newEntropy
if infoGain > bestInfoGain:
bestInfoGain = infoGain
bestFeature = i
return bestFeature
myDat, feature_name = loaddata()
chooseBestFeature(myDat)
def classVote(classList):
classCount = {}
for vote in classList:
if vote not in classCount.keys():
classCount[vote] = 0
classCount[vote] += 1
sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
return sortedClassCount[0][0]
def trainTree(dataSet, feature_name):
classList = [example[-1] for example in dataSet]
if classList.count(classList[0]) == len(classList):
return classList[0]
if len(dataSet[0]) == 1:
return classVote(classList)
bestFeat = chooseBestFeature(dataSet)
bestFeatName = feature_name[bestFeat]
myTree = {bestFeatName: {}}
featValues = [example[bestFeat] for example in dataSet]
del [feature_name[bestFeat]]
uniqueVals = set(featValues)
for value in uniqueVals:
sub_feature_name = feature_name[:]
myTree[bestFeatName][value] = trainTree(splitDataSet(dataSet, bestFeat, value), sub_feature_name)
return myTree
myDat, feature_name = loaddata()
myTree = trainTree(myDat, feature_name)
print(myTree)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?