代码改变世界

医疗知识图谱问答 —— 数据同步

  北桥苏  阅读(120)  评论(0编辑  收藏  举报

前言

        前面的文章已经介绍了 neo4j 服务的本地安装,以及数据的增删改查操作方法。那么这里就要进入 python 项目,来完成医疗知识的构建,问答机器人的代码实现。但篇幅较长,本文就主要介绍知识图谱的构建吧。

 

环境

Anaconda3

Python3.8

Py2neo (新版)

 

数据来源 (结构)

 

编码

1. 引入依赖

1
2
import json
from py2neo import Graph, Node

2. 类的初始化 (连接 neo4j)

1
2
3
def __init__(self):
    self.data_path = "./data/medical.json"
    self.neo4j = Graph('bolt://localhost:7687', auth=('neo4j', 'beiqiaosu123456'))

3.  读取数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
def read_data(self):
    # 疾病
    diseases = []
    # 症状
    symptoms = []
    # 科室
    departments = []
    # 药品
    drugs = []
    # 食物
    foods = []
    # 出药厂商
    producers = []
    # 检查项目
    checks = []
 
    # 疾病信息
    disease_info = []
 
    # 疾病与症状
    rels_symptom = []
    # 疾病与并发症
    rels_acompany = []
    # 疾病与科室
    rels_category = []
    # 科室与科室
    rels_department = []
    # 疾病与通用药品
    rels_commondrug = []
    # 疾病与推荐药品
    rels_recommenddrug = []
    # 疾病与不可吃
    rels_noteat = []
    # 疾病与可以吃
    rels_doeat = []
    # 疾病与推荐吃
    rels_recommendeat = []
    # 疾病与检查项
    rels_check = []
    # 厂商与药品
    rels_drug_producer = []
 
    for data in open(self.data_path, encoding="utf8", mode="r"):
        data_json = json.loads(data)
        disease = data_json['name']
        disease_dict = dict()
        disease_dict['get_prob'] = ''
        disease_dict['yibao_status'] = ''
        disease_dict['easy_get'] = ''
        disease_dict['get_way'] = ''
        disease_dict['cure_lasttime'] = ''
        disease_dict['cured_prob'] = ''
        disease_dict['cost_money'] = ''
        disease_dict['cure_department'] = []
        diseases.append(disease)
        disease_dict['name'] = disease
        disease_dict['desc'] = data_json['desc']
        disease_dict['prevent'] = data_json['prevent']
        disease_dict['cause'] = data_json['cause']
 
        if "get_prob" in data_json:
            disease_dict['get_prob'] = data_json['get_prob']
        if "yibao_status" in data_json:
            disease_dict['yibao_status'] = data_json['yibao_status']
        if "easy_get" in data_json:
            disease_dict['easy_get'] = data_json['easy_get']
        if "get_way" in data_json:
            disease_dict['get_way'] = data_json['get_way']
        if "cure_lasttime" in data_json:
            disease_dict['cure_lasttime'] = data_json['cure_lasttime']
        if "cured_prob" in data_json:
            disease_dict['cured_prob'] = data_json['cured_prob']
        if "cost_money" in data_json:
            disease_dict['cost_money'] = data_json['cost_money']
        disease_info.append(disease_dict)
 
        symptom = data_json['symptom']
        for symptom_i in symptom:
            rels_symptom.append([disease, symptom_i])
        symptoms += symptom
 
        # 科室
        if "cure_department" in data_json:
            cure_department = data_json['cure_department']
            departments += cure_department
            if len(cure_department) == 1:
                rels_category.append([disease, cure_department[0]])
            if len(cure_department) == 2:
                large = cure_department[0]
                small = cure_department[1]
                rels_department.append([large, small])
                rels_category.append([disease, large])
            disease_dict['cure_department'] = cure_department
 
        # 并发症
        if 'acompany' in data_json:
            acompanys = data_json['acompany']
            for acompany in data_json['acompany']:
                rels_acompany.append([disease, acompany])
            symptoms += acompanys
 
        if 'common_drug' in data_json:
            commondrug = data_json['common_drug']
            drugs += commondrug
            for drug_c in commondrug:
                rels_commondrug.append([disease, drug_c])
 
            recommenddrug = data_json['recommand_drug']
            for drug_recom in recommenddrug:
                rels_recommenddrug.append([disease, drug_recom])
            drugs += recommenddrug
 
        if 'not_eat' in data_json:
            noteat = data_json['not_eat']
            for noteat_i in noteat:
                rels_noteat.append([disease, noteat_i])
            foods += noteat
 
        if 'do_eat' in data_json:
            doeat = data_json['do_eat']
            for doeat_i in doeat:
                rels_doeat.append([disease, doeat_i])
            foods += doeat
 
        if 'recommand_eat' in data_json:
            recommendfood = data_json['recommand_eat']
            for food_i in recommendfood:
                rels_recommendeat.append([disease, food_i])
            foods += recommendfood
 
        checkitem = data_json['check']
        for check_i in checkitem:
            check_i.replace("'", "")
            if check_i != "血清5'-核苷酸酶(5'-NT)":
                rels_check.append([disease, check_i])
        checks += checkitem
 
        # 厂商与药品
        druginfo = data_json['drug_detail']
        producers += [name.split("(")[0] for name in druginfo]
        rels_drug_producer += [[name.split("(")[0], name.split("(")[-1].replace(")", "")] for name in druginfo]
 
    return set(diseases), set(symptoms), set(producers), set(departments), set(drugs), set(foods), set(
        checks), disease_info, rels_symptom, rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, \
           rels_doeat, rels_recommendeat, rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer

4. 创建节点

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def create_medical_nodes(self):
    print("start create nodes")
    diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom,\
    rels_acompany,rels_commondrug,rels_recommenddrug,rels_noteat,rels_doeat,rels_recommendeat,\
    rels_check,rels_drug_producer,rels_department, rels_category, rels_drug_producer = \
        build_medical_graph.read_data()
 
    # 创建疾病节点
    # self.create_node('Diseases', diseases)
    # 创建症状节点
    # self.create_node('Symptoms', symptoms)
    # 创建科室
    # self.create_node('Departments', departments)
    # 创建药品
    # self.create_node('Drugs', drugs)
    # 创建食品
    # self.create_node('Foods', foods)
    # 创建出药厂商
    # self.create_node('Producers', producers)
    # 创建检查项
    # self.create_node('Checks', checks)
    self.create_disease_node('Diseases', disease_info)
    return
 
# 疾病节点单独创建
def create_node(self, label, values):
    count = 0;
    for val in values:
        count += 1
        print("节点: " + label + ", 名称为: " + val)
        node = Node(label, name = val)
        self.neo4j.create(node)
    return count
 
def create_disease_node(self, label, values):
    count = 0
    for disease in values:
        print("节点" + label + ", 名称:" + disease['name'])
        node = Node(label, name=disease['name'], desc=disease['desc'], prevent=disease['prevent'],cause=disease['cause'],
                    get_prob=disease['get_prob'],yibao_status=disease['yibao_status'],easy_get=disease['easy_get'],
                    get_way=disease['get_way'],cure_lasttime=disease['cure_lasttime'],cured_prob=disease['cured_prob'],
                    cost_money=disease['cost_money'],cure_department=disease['cure_department'])
        self.neo4j.create(node)
    return count

5. 创建关联边

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def create_medical_rels(self):
    print("start create rels")
    diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \
    rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \
    rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \
        build_medical_graph.read_data()
 
    # 疾病与状态
    # self.create_rel("Diseases", "Symptoms", rels_symptom, "has_symptoms", "疾病症状")
    # 疾病与并发症
    # self.create_rel("Diseases", "Symptoms", rels_acompany, "acompany_with", "疾病并发症")
    # 疾病与科室
    # self.create_rel("Diseases", "Departments", rels_category, "belongs_to", "所属科室")
    # 科室与科室
    # self.create_rel("Departments", "Departments", rels_department, "belongs_to", "所属")
    # 疾病与通用药品
    # self.create_rel("Diseases", "Drugs", rels_commondrug, "common_drug", "常用备药")
    # 疾病与推荐药品
    # self.create_rel("Diseases", "Drugs", rels_recommenddrug, "recommand_drug", "推荐用药")
    # 疾病与忌口
    # self.create_rel("Diseases", "Foods", rels_noteat, "not_eat", "忌吃")
    # 疾病与可以吃
    # self.create_rel("Diseases", "Foods", rels_doeat, "do_eat", "可以吃")
    # 疾病与推荐吃
    # self.create_rel("Diseases", "Foods", rels_recommendeat, "recomment_eat", "推荐吃")
    # 疾病与检查项
    self.create_rel("Diseases", "Checks", rels_check, "need_check", "需要检查")
    # 厂商与药品
    # self.create_rel("Producers", "drugs", rels_drug_producer, "drug_of", "生产药品")
 
 
def create_rel(self, start_node, end_node, list, rel_name, rel_attr):
    count = 0
    for item in list:
        count += 1
        s = item[0]
        e = item[1]
 
        print ("创建边:" +rel_name +",("+start_node+"->"+end_node+"),点1:"+s+"点2:"+e)
 
        query = "Match (start:%s), (end:%s) where start.name='%s' and end.name='%s' create (start)-[rel:%s{name:'%s'}]->(end)" % (
            start_node, end_node, s, e, rel_name, rel_attr
        )
        self.neo4j.run(query)
 
    return count

6. 导出节点数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# 导出实体的节点分词
def export_data(self):
    diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \
    rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \
    rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \
        build_medical_graph.read_data()
 
    # 疾病名
    # f_diseases = open("dict/diseases.txt", encoding="utf-8", mode="w+")
    # f_diseases.write("\n".join(list(diseases)))
    # 症状名
    f_symptoms = open("dict/symptoms.txt", encoding="utf-8", mode="w+")
    f_symptoms.write("\n".join(list(symptoms)))
 
    f_producers = open("dict/producers.txt", encoding="utf-8", mode="w+")
    f_producers.write("\n".join(list(producers)))
 
    f_departments = open("dict/departments.txt", encoding="utf-8", mode="w+")
    f_departments.write("\n".join(list(departments)))
 
    f_drugs = open("dict/drugs.txt", encoding="utf-8", mode="w+")
    f_drugs.write("\n".join(list(drugs)))
 
    f_foods = open("dict/foods.txt", encoding="utf-8", mode="w+")
    f_foods.write("\n".join(list(foods)))
 
    f_checks = open("dict/checks.txt", encoding="utf-8", mode="w+")
    f_checks.write("\n".join(list(checks)))
 
    f_checks = open("dict/checks.txt", encoding="utf-8", mode="w+")
    f_checks.write("\n".join(list(checks)))

 

相关博文:
阅读排行:
· CSnakes vs Python.NET:高效嵌入与灵活互通的跨语言方案对比
· DeepSeek “源神”启动!「GitHub 热点速览」
· 我与微信审核的“相爱相杀”看个人小程序副业
· Plotly.NET 一个为 .NET 打造的强大开源交互式图表库
· 上周热点回顾(2.17-2.23)
点击右上角即可分享
微信分享提示