文本摘要 Rouge 指标计算 python
#Rouge gram-1,2,4,SU4 def Rouge_1(pred, true): #结巴精准分词 gram_1_true = list(jieba.cut(true)) gram_1_pred = list(jieba.cut(pred)) #统计参考摘要长度、预测摘要长度、预测正确的摘要长度 total_num = len(gram_1_true) pred_num = len(gram_1_pred) correct_num = len(set(gram_1_pred) & set(gram_1_true)) return total_num, pred_num, correct_num def Rouge_2(pred, true): terms_true = list(jieba.cut(true)) terms_pred = list(jieba.cut(pred)) gram_2_pred = [] gram_2_true = [] temp = 0 for i in range(len(terms_pred)-1): gram_2_pred.append(terms_pred[i] + terms_pred[i+1]) for i in range(len(terms_true)-1): gram_2_true.append(terms_true[i] + terms_true[i+1]) total_num = len(gram_2_true) pred_num = len(gram_2_pred) correct_num = len(set(gram_2_pred) & set(gram_2_true)) return total_num, pred_num, correct_num def Rouge_4(pred, true): terms_true = list(jieba.cut(true)) terms_pred = list(jieba.cut(pred)) gram_4_pred = [] gram_4_true = [] for i in range(len(terms_pred)-3): gram_4_pred.append(terms_pred[i] + terms_pred[i+1] + terms_pred[i+2] + terms_pred[i+3]) for i in range(len(terms_true)-3): gram_4_true.append(terms_true[i] + terms_true[i+1] + terms_true[i+2] + terms_true[i+3]) total_num = len(gram_4_true) pred_num = len(gram_4_pred) correct_num = len(set(gram_4_pred) & set(gram_4_true)) return total_num, pred_num, correct_num def update_rouge_score(rouge,data=None,mode='1'): if data: rouge['Rouge_'+ mode]['total_gram'] += int(data[0]) rouge['Rouge_'+ mode]['pred_gram'] += int(data[1]) rouge['Rouge_'+ mode]['correct_gram'] += int(data[2]) else: corr = rouge['Rouge_'+ mode]['correct_gram'] total = rouge['Rouge_'+ mode]['total_gram'] pred = rouge['Rouge_'+ mode]['pred_gram'] rouge['Rouge_'+ mode]['precision'] = corr/(e + pred) rouge['Rouge_'+ mode]['recall'] = corr/(e + total) precision = rouge['Rouge_'+ mode]['precision'] recall = rouge['Rouge_'+ mode]['recall'] rouge['Rouge_'+ mode]['f1'] = (2*precision*recall) / (e + precision + recall) return rouge