import numpy as np
import pandas
data=pandas.read_excel('心脏病患者临床数据.xlsx')
data
#对性别进行处理(男为0,女为1)
xingbie=[]
for i in data['性别']:
if i =='男':
xingbie.append(0)
else:
xingbie.append(1)
#对年龄段进行预处理(<70为1,70-80为2,>80为3)
ages=[]
for j in data['年龄']:
if j =='<70':
ages.append(1)
elif j =='70-80':
ages.append(2)
else:
ages.append(3)
#对住院天数进行处理(<70为1,7-14为2,>14为3)
days=[]
for k in data['住院天数']:
if k=='<7':
days.append(1)
elif k=='7-14':
days.append(2)
else:
days.append(3)
#处理后的数据
data1=data
data1['性别']=xingbie
data1['年龄']=ages
data1['住院天数']=days
#将数据转成数组
data_arr=np.array(data1)
data_arr
#利用贝叶斯算法对给定的组别进行分类
def NB(xingbie, ages, KILLP, drink, smoke, days):
#初始化值
x1_y1,x2_y1,x3_y1,x4_y1,x5_y1,x6_y1 = 0,0,0,0,0,0
x1_y2,x2_y2,x3_y2,x4_y2,x5_y2,x6_y2 = 0,0,0,0,0,0
y1 = 0
y2 = 0
#计算为心梗的概率
for a in data_arr:
if a[6]=='心梗':
y1+=1
if a[0]==xingbie:
x1_y1+=1
if a[1]==ages:
x2_y1+=1
if a[2]==KILLP:
x3_y1+=1
if a[3]==drink:
x4_y1+=1
if a[4]==smoke:
x5_y1+=1
if a[5]==days:
x6_y1+=1
else:#计算患有不稳定性心绞痛的概率
y2+=1
if a[0]==xingbie:
x1_y2+=1
if a[1]==ages:
x2_y2+=1
if a[2]==KILLP:
x3_y2+=1
if a[3]==drink:
x4_y2+=1
if a[4]==smoke:
x5_y2+=1
if a[5]==days:
x6_y2+=1
#计算每种症状在心梗下的概率
x1_y1, x2_y1, x3_y1, x4_y1, x5_y1, x6_y1 = x1_y1/y1, x2_y1/y1, x3_y1/y1, x4_y1/y1, x5_y1/y1, x6_y1/y1
#计算每种症状在不稳定性心绞痛的概率
x1_y2, x2_y2, x3_y2, x4_y2, x5_y2, x6_y2 = x1_y2/y2, x2_y2/y2, x3_y2/y2, x4_y2/y2, x5_y2/y2, x6_y2/y2
#多个症状在心梗下的概率
x_y1 = x1_y1 * x2_y1 * x3_y1 * x4_y1 * x5_y1 * x6_y1
#多个症状在不稳定性心绞痛下的概率
x_y2 = x1_y2 * x2_y2 * x3_y2 * x4_y2 * x5_y2 * x6_y2
##初始化各个特征x的值
x1,x2,x3,x4,x5,x6=0,0,0,0,0,0
for a in data_arr:
if a[0]==xingbie:
x1+=1
if a[1]==ages:
x2+=1
if a[2]==KILLP:
x3+=1
if a[3]==drink:
x4+=1
if a[4]==smoke:
x5+=1
if a[5]==days:
x6+=1
lens = len(data_arr)
#所有x的可能性
x = x1/lens * x2/lens * x3/lens * x4/lens * x5/lens* x6/lens
# 分别计算心梗和不稳定性心绞痛的概率
y1_x = (x_y1)*(y1/lens)/x
print(y1_x)
y2_x = (x_y2)*(y2/lens)/x
print(y2_x)
# 判断是哪中疾病的可能更大
if y1_x > y2_x:
print('病人患心梗的可能更大,可能性为:',y1_x)
else:
print('病人患不稳定性心绞痛的可能更大,可能性为:',y2_x)
# 判断:性别=‘男’,年龄<70, KILLP=1,饮酒=‘是’,吸烟=‘是”,住院天数<7
NB(0,1,1,'是','是',1)