我们观察用户评论发现:属性词往往和情感词伴随出现,原因是用户通常会在描述属性时表达情感,属性是情感表达的对象。还发现:属性词和专用情感词基本都是名词或形容词(形谓词)。
算法流程图如下:
评论数据如下:
代码如下:
代码如下:
#encoding=utf-8
#############################
#
# 功能:给定一些中文的产品评论,希望从中找到评价对象及评价词。
#
# @author:licl
#
##############################
fdata = open('JD_DFB_comments_out.txt','r')
Output = open('Pattern_Result.txt','a')
try:
data = fdata.readlines()
listline = []
for line in data:
listline = line.replace("
","/")
listline =
listline.split("/")
i = 1
while i <
len(listline):
if listline[i] !=
"名词":
i = i
2
else:
new_list =
["","",""]
new_list[0]
= listline[i-1]
a =
i-1
i = i
2
while i
<
len(listline):
if listline[i] == "标点":
i = i 2
break
else:
if listline[i-1]=='不' or listline[i-1]=='不怎么样' or
listline[i-1]=='不怎么' or listline[i-1]=='不太':
new_list[1] = listline[i-1]
if listline[i] == "形容词" or listline[i] ==
"形谓词":
new_list[1] = listline[i-1]
b = i-1
t = (b-a)/2
new_list[2] = str(t)
for line in new_list:
Output.write(line " ")