医疗知识图谱问答 —— 数据同步
2023-08-02 10:26 北桥苏 阅读(120) 评论(0) 编辑 收藏 举报前言
前面的文章已经介绍了 neo4j 服务的本地安装,以及数据的增删改查操作方法。那么这里就要进入 python 项目,来完成医疗知识的构建,问答机器人的代码实现。但篇幅较长,本文就主要介绍知识图谱的构建吧。
环境
Anaconda3
Python3.8
Py2neo (新版)
数据来源 (结构)
编码
1. 引入依赖
1 2 | import json from py2neo import Graph, Node |
2. 类的初始化 (连接 neo4j)
1 2 3 | def __init__(self): self.data_path = "./data/medical.json" self.neo4j = Graph( 'bolt://localhost:7687' , auth=( 'neo4j' , 'beiqiaosu123456' )) |
3. 读取数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | def read_data(self): # 疾病 diseases = [] # 症状 symptoms = [] # 科室 departments = [] # 药品 drugs = [] # 食物 foods = [] # 出药厂商 producers = [] # 检查项目 checks = [] # 疾病信息 disease_info = [] # 疾病与症状 rels_symptom = [] # 疾病与并发症 rels_acompany = [] # 疾病与科室 rels_category = [] # 科室与科室 rels_department = [] # 疾病与通用药品 rels_commondrug = [] # 疾病与推荐药品 rels_recommenddrug = [] # 疾病与不可吃 rels_noteat = [] # 疾病与可以吃 rels_doeat = [] # 疾病与推荐吃 rels_recommendeat = [] # 疾病与检查项 rels_check = [] # 厂商与药品 rels_drug_producer = [] for data in open(self.data_path, encoding= "utf8" , mode= "r" ): data_json = json.loads(data) disease = data_json[ 'name' ] disease_dict = dict() disease_dict[ 'get_prob' ] = '' disease_dict[ 'yibao_status' ] = '' disease_dict[ 'easy_get' ] = '' disease_dict[ 'get_way' ] = '' disease_dict[ 'cure_lasttime' ] = '' disease_dict[ 'cured_prob' ] = '' disease_dict[ 'cost_money' ] = '' disease_dict[ 'cure_department' ] = [] diseases.append(disease) disease_dict[ 'name' ] = disease disease_dict[ 'desc' ] = data_json[ 'desc' ] disease_dict[ 'prevent' ] = data_json[ 'prevent' ] disease_dict[ 'cause' ] = data_json[ 'cause' ] if "get_prob" in data_json: disease_dict[ 'get_prob' ] = data_json[ 'get_prob' ] if "yibao_status" in data_json: disease_dict[ 'yibao_status' ] = data_json[ 'yibao_status' ] if "easy_get" in data_json: disease_dict[ 'easy_get' ] = data_json[ 'easy_get' ] if "get_way" in data_json: disease_dict[ 'get_way' ] = data_json[ 'get_way' ] if "cure_lasttime" in data_json: disease_dict[ 'cure_lasttime' ] = data_json[ 'cure_lasttime' ] if "cured_prob" in data_json: disease_dict[ 'cured_prob' ] = data_json[ 'cured_prob' ] if "cost_money" in data_json: disease_dict[ 'cost_money' ] = data_json[ 'cost_money' ] disease_info.append(disease_dict) symptom = data_json[ 'symptom' ] for symptom_i in symptom: rels_symptom.append([disease, symptom_i]) symptoms += symptom # 科室 if "cure_department" in data_json: cure_department = data_json[ 'cure_department' ] departments += cure_department if len(cure_department) == 1: rels_category.append([disease, cure_department[0]]) if len(cure_department) == 2: large = cure_department[0] small = cure_department[1] rels_department.append([large, small]) rels_category.append([disease, large]) disease_dict[ 'cure_department' ] = cure_department # 并发症 if 'acompany' in data_json: acompanys = data_json[ 'acompany' ] for acompany in data_json[ 'acompany' ]: rels_acompany.append([disease, acompany]) symptoms += acompanys if 'common_drug' in data_json: commondrug = data_json[ 'common_drug' ] drugs += commondrug for drug_c in commondrug: rels_commondrug.append([disease, drug_c]) recommenddrug = data_json[ 'recommand_drug' ] for drug_recom in recommenddrug: rels_recommenddrug.append([disease, drug_recom]) drugs += recommenddrug if 'not_eat' in data_json: noteat = data_json[ 'not_eat' ] for noteat_i in noteat: rels_noteat.append([disease, noteat_i]) foods += noteat if 'do_eat' in data_json: doeat = data_json[ 'do_eat' ] for doeat_i in doeat: rels_doeat.append([disease, doeat_i]) foods += doeat if 'recommand_eat' in data_json: recommendfood = data_json[ 'recommand_eat' ] for food_i in recommendfood: rels_recommendeat.append([disease, food_i]) foods += recommendfood checkitem = data_json[ 'check' ] for check_i in checkitem: check_i.replace( "'" , "" ) if check_i != "血清5'-核苷酸酶(5'-NT)" : rels_check.append([disease, check_i]) checks += checkitem # 厂商与药品 druginfo = data_json[ 'drug_detail' ] producers += [name.split( "(" )[0] for name in druginfo] rels_drug_producer += [[name.split( "(" )[0], name.split( "(" )[-1].replace( ")" , "" )] for name in druginfo] return set (diseases), set (symptoms), set (producers), set (departments), set (drugs), set (foods), set ( checks), disease_info, rels_symptom, rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, \ rels_doeat, rels_recommendeat, rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer |
4. 创建节点
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | def create_medical_nodes(self): print( "start create nodes" ) diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom,\ rels_acompany,rels_commondrug,rels_recommenddrug,rels_noteat,rels_doeat,rels_recommendeat,\ rels_check,rels_drug_producer,rels_department, rels_category, rels_drug_producer = \ build_medical_graph.read_data() # 创建疾病节点 # self.create_node('Diseases', diseases) # 创建症状节点 # self.create_node('Symptoms', symptoms) # 创建科室 # self.create_node('Departments', departments) # 创建药品 # self.create_node('Drugs', drugs) # 创建食品 # self.create_node('Foods', foods) # 创建出药厂商 # self.create_node('Producers', producers) # 创建检查项 # self.create_node('Checks', checks) self.create_disease_node( 'Diseases' , disease_info) return # 疾病节点单独创建 def create_node(self, label, values): count = 0; for val in values: count += 1 print( "节点: " + label + ", 名称为: " + val) node = Node(label, name = val) self.neo4j.create(node) return count def create_disease_node(self, label, values): count = 0 for disease in values: print( "节点" + label + ", 名称:" + disease[ 'name' ]) node = Node(label, name=disease[ 'name' ], desc=disease[ 'desc' ], prevent=disease[ 'prevent' ],cause=disease[ 'cause' ], get_prob=disease[ 'get_prob' ],yibao_status=disease[ 'yibao_status' ],easy_get=disease[ 'easy_get' ], get_way=disease[ 'get_way' ],cure_lasttime=disease[ 'cure_lasttime' ],cured_prob=disease[ 'cured_prob' ], cost_money=disease[ 'cost_money' ],cure_department=disease[ 'cure_department' ]) self.neo4j.create(node) return count |
5. 创建关联边
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | def create_medical_rels(self): print( "start create rels" ) diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \ rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \ rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \ build_medical_graph.read_data() # 疾病与状态 # self.create_rel("Diseases", "Symptoms", rels_symptom, "has_symptoms", "疾病症状") # 疾病与并发症 # self.create_rel("Diseases", "Symptoms", rels_acompany, "acompany_with", "疾病并发症") # 疾病与科室 # self.create_rel("Diseases", "Departments", rels_category, "belongs_to", "所属科室") # 科室与科室 # self.create_rel("Departments", "Departments", rels_department, "belongs_to", "所属") # 疾病与通用药品 # self.create_rel("Diseases", "Drugs", rels_commondrug, "common_drug", "常用备药") # 疾病与推荐药品 # self.create_rel("Diseases", "Drugs", rels_recommenddrug, "recommand_drug", "推荐用药") # 疾病与忌口 # self.create_rel("Diseases", "Foods", rels_noteat, "not_eat", "忌吃") # 疾病与可以吃 # self.create_rel("Diseases", "Foods", rels_doeat, "do_eat", "可以吃") # 疾病与推荐吃 # self.create_rel("Diseases", "Foods", rels_recommendeat, "recomment_eat", "推荐吃") # 疾病与检查项 self.create_rel( "Diseases" , "Checks" , rels_check, "need_check" , "需要检查" ) # 厂商与药品 # self.create_rel("Producers", "drugs", rels_drug_producer, "drug_of", "生产药品") def create_rel(self, start_node, end_node, list, rel_name, rel_attr): count = 0 for item in list: count += 1 s = item[0] e = item[1] print ( "创建边:" +rel_name + ",(" +start_node+ "->" +end_node+ "),点1:" +s+ "点2:" +e) query = "Match (start:%s), (end:%s) where start.name='%s' and end.name='%s' create (start)-[rel:%s{name:'%s'}]->(end)" % ( start_node, end_node, s, e, rel_name, rel_attr ) self.neo4j.run(query) return count |
6. 导出节点数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | # 导出实体的节点分词 def export_data(self): diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \ rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \ rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \ build_medical_graph.read_data() # 疾病名 # f_diseases = open("dict/diseases.txt", encoding="utf-8", mode="w+") # f_diseases.write("\n".join(list(diseases))) # 症状名 f_symptoms = open( "dict/symptoms.txt" , encoding= "utf-8" , mode= "w+" ) f_symptoms.write( "\n" . join (list(symptoms))) f_producers = open( "dict/producers.txt" , encoding= "utf-8" , mode= "w+" ) f_producers.write( "\n" . join (list(producers))) f_departments = open( "dict/departments.txt" , encoding= "utf-8" , mode= "w+" ) f_departments.write( "\n" . join (list(departments))) f_drugs = open( "dict/drugs.txt" , encoding= "utf-8" , mode= "w+" ) f_drugs.write( "\n" . join (list(drugs))) f_foods = open( "dict/foods.txt" , encoding= "utf-8" , mode= "w+" ) f_foods.write( "\n" . join (list(foods))) f_checks = open( "dict/checks.txt" , encoding= "utf-8" , mode= "w+" ) f_checks.write( "\n" . join (list(checks))) f_checks = open( "dict/checks.txt" , encoding= "utf-8" , mode= "w+" ) f_checks.write( "\n" . join (list(checks))) |
个人网站:www.zerofc.cn
公众号:ZEROFC_DEV
QQ交流群:515937120
QQ:2652364582
头条号:1637769351151619
B站:286666708
大鱼号:北桥苏
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· CSnakes vs Python.NET:高效嵌入与灵活互通的跨语言方案对比
· DeepSeek “源神”启动!「GitHub 热点速览」
· 我与微信审核的“相爱相杀”看个人小程序副业
· Plotly.NET 一个为 .NET 打造的强大开源交互式图表库
· 上周热点回顾(2.17-2.23)