from hmm_wb.prob_start import P as p_start
from hmm_wb.prob_trans import P as p_trans
from hmm_wb.prob_emit import P as p_emit
# 维特比算法
def vtb(n, o, s, ps, pe, pt):
ret = {}
path = {}
if n == 0:
for x in s:
ret[x] = ps[x] + pe[x][o[n]]
path[x] = [x]
else:
lret, lp = vtb(n - 1, o, s, ps, pe, pt) # n-1天的,结果有3^n-1个
for x in s:
ret[x], mlx = max((lret[lx] + pt[lx][x] + pe[x][o[n]], lx) for lx in s)
path[x] = lp[mlx] + [x]
return ret, path # 返回最大值和路径
# 马尔科夫算法
def hmm(n, o, s, ps, pe, pt):
ret = {}
if n == 0:
for x in s:
ret[x] = ps[x] * pe[x][o[n]]
else:
lret = hmm(n - 1, o, s, ps, pe, pt) # n-1天的,结果有3^n-1个
for k, v in lret.items():
for x in s:
ret[k + "-" + x] = v * pt[k.split("-")[-1]][x] * pe[x][o[n]]
return ret
def fenci(path, obs):
ret = []
str = ""
for i in range(len(path)):
if path[i] == 'B':
str = ""
str += obs[i]
elif path[i] == 'E':
str += obs[i]
ret.append(str)
str = ""
elif path[i] == 'M':
str += obs[i]
elif path[i] == 'S':
ret.append(obs[i])
return ret
def get_stop():
with open("data/stopword.txt", 'r', encoding="gbk") as f:
dic = [line.strip() for line in f.readlines()]
return dic
# obs:分词后的列表,stop:停留词列表
def rm_stop(obs, stop):
nostp = []
for i in obs:
if i not in stop:
nostp.append(i)
return nostp
if __name__ == '__main__':
stats = "BMES"
obs = "今天我来到北京清华大学"
r, mpath = vtb(len(obs) - 1, obs, stats, p_start, p_emit, p_trans) # 获取概率及路径
mv, mk = max(zip(r.values(), r.keys())) # 最大概率的路径
path = mpath[mk] # 最大概率路径
fc = fenci(path, obs)
stp = get_stop() # 读取停留词文档
str = rm_stop(fc, stp) # 移除停留词
print(r)
print(path)
print(str)
# print(get_stop())
运行结果:
1 2 3 | { 'B' : - 69.1432668906028 , 'M' : - 68.42252839082724 , 'E' : - 68.04528930719714 , 'S' : - 71.23304310663175 } [ 'B' , 'E' , 'S' , 'S' , 'S' , 'B' , 'E' , 'B' , 'M' , 'M' , 'E' ] [ '北京' , '清华大学' ] |
分类:
python
标签:
python数据分析
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 分享4款.NET开源、免费、实用的商城系统
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
· 上周热点回顾(2.24-3.2)