python 隐马尔可夫链
马尔科夫转换矩阵
Out[72]:
Sunny Cloudy Rainy
Sunny 0.50 0.375 0.125
Cloudy 0.25 0.125 0.625
Rainy 0.25 0.375 0.375
混淆矩阵
dry dryish damp soggy
Sunny 0.60 0.20 0.15 0.05
Cloudy 0.25 0.25 0.25 0.25
Rainy 0.05 0.10 0.35 0.50
初始向量概率
Sunny 0.63
Cloudy 0.17
Rainy 0.20
dtype: float64
观测状态
obs=('soggy','dryish','dry')
# -*- coding: utf-8 -*-
"""
作者:罗干
Thomas luo
代码优雅是非常必要的
学习算法的最佳方式是分析代码
伪代码与代码的区别等价于猴子与人之间的区别
"""
from numpy import *
import pandas as pd
def viterbi(obs,states,start_p,trans_p,emit_p):
v=[]
fst=obs[0]
vn=emit_p[fst]*start_p#计算初始状态
v.append(vn)
for t in range(1,len(obs)):
tp=emit_p[obs[t]] #通过观测状态,提取混淆矩阵的某一列
cc=[]
for y in states:
#v[t-1]是上一个状态
#trans_p[y]是其他状态转换到y状态转换概率,简言之就所有其他状态的的条件概率
gv=v[t-1]*trans_p[y]*tp[y]
#如果执行这一行代码,就是经典的隐马尔科夫链
cc.append(gv.sum())
#如果这一行代码就是威比特算法,一言以蔽之,威比特就职最大值,经典算法就是求和
#cc.append(gv.max())
cc1=pd.Series(cc,index=states)
v.append(cc1)
result=[]
for vector in v :
p=vector
p1=p.sort_values(ascending=False)
p2=p1[:1]
result.append(dict(p2))
return result
states=('Sunny','Cloudy','Rainy')
obs=('soggy','dryish','dry')
start_p=pd.Series({'Sunny':0.63,'Cloudy':0.17,'Rainy':0.20})
trans_p=pd.DataFrame([[0.50,0.375,0.125],
[0.25,0.125,0.625],
[0.25,0.375,0.375]],
index=('Sunny','Cloudy','Rainy'),
columns=('Sunny','Cloudy','Rainy'))
emit_p=pd.DataFrame([ [0.60,0.20,0.15,0.05],
[0.25,0.25,0.25,0.25],
[0.05,0.10,0.35,0.50]],
index=('Sunny','Cloudy','Rainy'),
columns=('dry','dryish','damp','soggy'))
tt=viterbi(obs,states,start_p,trans_p,emit_p)
print(tt)
[{'Rainy': 0.1}, {'Cloudy': 0.013656250000000002}, {'Sunny': 0.0061509375}]
计算结果:雨,云,晴天