python 机器学习之NLP 和分词
# coding=UTF-8
import os
from snownlp import SnowNLP
from snownlp import sentiment
s = SnowNLP(u"奖励你")
print(s.words)
print(s.keywords(3))
print(s.sentences)
print(list(s.tags))
print(s.sentiments)
# print(os.getcwd())
# 训练数据集 pos.txt 为积极情绪数据,neg.txt 为消极情绪数据集 ,执行完分数越高积极情绪越高, 每次添加新词或者短语时会触发机器学习cpu运算开销很大可以放GPU跑
sentiment.train(
'D:\\Anaconda3\\Lib\\site-packages\\snownlp\\sentiment\\neg.txt',
'D:\\Anaconda3\\Lib\\site-packages\\snownlp\\sentiment\\pos.txt'
)
sentiment.save('sentiment.marshal.20210521')
consolelog
D:\Anaconda3\python.exe "E:/Program Files/wechat-bot-master/client/python/DemoNlp.py" ['奖励', '你'] [] ['奖励你'] [('奖励', 'v'), ('你', 'r')] 0.6153846153846156