2023-08-03 12:57:25 来源: 博客园
【资料图】
前言前面的文章已经介绍了 neo4j 服务的本地安装,以及数据的增删改查操作方法。那么这里就要进入 python 项目,来完成医疗知识的构建,问答机器人的代码实现。但篇幅较长,本文就主要介绍知识图谱的构建吧。
环境Anaconda3
Python3.8
Py2neo (新版)
数据来源 (结构)编码1. 引入依赖
import jsonfrom py2neo import Graph, Node
2. 类的初始化 (连接 neo4j)
def __init__(self):self.data_path = "./data/medical.json"self.neo4j = Graph("bolt://localhost:7687", auth=("neo4j", "beiqiaosu123456"))
3. 读取数据
def read_data(self):# 疾病diseases = []# 症状symptoms = []# 科室departments = []# 药品drugs = []# 食物foods = []# 出药厂商producers = []# 检查项目checks = []# 疾病信息disease_info = []# 疾病与症状rels_symptom = []# 疾病与并发症rels_acompany = []# 疾病与科室rels_category = []# 科室与科室rels_department = []# 疾病与通用药品rels_commondrug = []# 疾病与推荐药品rels_recommenddrug = []# 疾病与不可吃rels_noteat = []# 疾病与可以吃rels_doeat = []# 疾病与推荐吃rels_recommendeat = []# 疾病与检查项rels_check = []# 厂商与药品rels_drug_producer = []for data in open(self.data_path, encoding="utf8", mode="r"):data_json = json.loads(data)disease = data_json["name"]disease_dict = dict()disease_dict["get_prob"] = ""disease_dict["yibao_status"] = ""disease_dict["easy_get"] = ""disease_dict["get_way"] = ""disease_dict["cure_lasttime"] = ""disease_dict["cured_prob"] = ""disease_dict["cost_money"] = ""disease_dict["cure_department"] = []diseases.append(disease)disease_dict["name"] = diseasedisease_dict["desc"] = data_json["desc"]disease_dict["prevent"] = data_json["prevent"]disease_dict["cause"] = data_json["cause"]if "get_prob" in data_json:disease_dict["get_prob"] = data_json["get_prob"]if "yibao_status" in data_json:disease_dict["yibao_status"] = data_json["yibao_status"]if "easy_get" in data_json:disease_dict["easy_get"] = data_json["easy_get"]if "get_way" in data_json:disease_dict["get_way"] = data_json["get_way"]if "cure_lasttime" in data_json:disease_dict["cure_lasttime"] = data_json["cure_lasttime"]if "cured_prob" in data_json:disease_dict["cured_prob"] = data_json["cured_prob"]if "cost_money" in data_json:disease_dict["cost_money"] = data_json["cost_money"]disease_info.append(disease_dict)symptom = data_json["symptom"]for symptom_i in symptom:rels_symptom.append([disease, symptom_i])symptoms += symptom# 科室if "cure_department" in data_json:cure_department = data_json["cure_department"]departments += cure_departmentif len(cure_department) == 1:rels_category.append([disease, cure_department[0]])if len(cure_department) == 2:large = cure_department[0]small = cure_department[1]rels_department.append([large, small])rels_category.append([disease, large])disease_dict["cure_department"] = cure_department# 并发症if "acompany" in data_json:acompanys = data_json["acompany"]for acompany in data_json["acompany"]:rels_acompany.append([disease, acompany])symptoms += acompanysif "common_drug" in data_json:commondrug = data_json["common_drug"]drugs += commondrugfor drug_c in commondrug:rels_commondrug.append([disease, drug_c])recommenddrug = data_json["recommand_drug"]for drug_recom in recommenddrug:rels_recommenddrug.append([disease, drug_recom])drugs += recommenddrugif "not_eat" in data_json:noteat = data_json["not_eat"]for noteat_i in noteat:rels_noteat.append([disease, noteat_i])foods += noteatif "do_eat" in data_json:doeat = data_json["do_eat"]for doeat_i in doeat:rels_doeat.append([disease, doeat_i])foods += doeatif "recommand_eat" in data_json:recommendfood = data_json["recommand_eat"]for food_i in recommendfood:rels_recommendeat.append([disease, food_i])foods += recommendfoodcheckitem = data_json["check"]for check_i in checkitem:check_i.replace(""", "")if check_i != "血清5"-核苷酸酶(5"-NT)":rels_check.append([disease, check_i])checks += checkitem# 厂商与药品druginfo = data_json["drug_detail"]producers += [name.split("(")[0] for name in druginfo]rels_drug_producer += [[name.split("(")[0], name.split("(")[-1].replace(")", "")] for name in druginfo]return set(diseases), set(symptoms), set(producers), set(departments), set(drugs), set(foods), set(checks), disease_info, rels_symptom, rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, \ rels_doeat, rels_recommendeat, rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer
4. 创建节点
def create_medical_nodes(self):print("start create nodes")diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom,\rels_acompany,rels_commondrug,rels_recommenddrug,rels_noteat,rels_doeat,rels_recommendeat,\rels_check,rels_drug_producer,rels_department, rels_category, rels_drug_producer = \build_medical_graph.read_data()# 创建疾病节点# self.create_node("Diseases", diseases)# 创建症状节点# self.create_node("Symptoms", symptoms)# 创建科室# self.create_node("Departments", departments)# 创建药品# self.create_node("Drugs", drugs)# 创建食品# self.create_node("Foods", foods)# 创建出药厂商# self.create_node("Producers", producers)# 创建检查项# self.create_node("Checks", checks)self.create_disease_node("Diseases", disease_info)return# 疾病节点单独创建def create_node(self, label, values):count = 0;for val in values:count += 1print("节点: " + label + ", 名称为: " + val)node = Node(label, name = val)self.neo4j.create(node)return countdef create_disease_node(self, label, values):count = 0for disease in values:print("节点" + label + ", 名称:" + disease["name"])node = Node(label, name=disease["name"], desc=disease["desc"], prevent=disease["prevent"],cause=disease["cause"],get_prob=disease["get_prob"],yibao_status=disease["yibao_status"],easy_get=disease["easy_get"],get_way=disease["get_way"],cure_lasttime=disease["cure_lasttime"],cured_prob=disease["cured_prob"],cost_money=disease["cost_money"],cure_department=disease["cure_department"])self.neo4j.create(node)return count
5. 创建关联边
def create_medical_rels(self):print("start create rels")diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \build_medical_graph.read_data()# 疾病与状态# self.create_rel("Diseases", "Symptoms", rels_symptom, "has_symptoms", "疾病症状")# 疾病与并发症# self.create_rel("Diseases", "Symptoms", rels_acompany, "acompany_with", "疾病并发症")# 疾病与科室# self.create_rel("Diseases", "Departments", rels_category, "belongs_to", "所属科室")# 科室与科室# self.create_rel("Departments", "Departments", rels_department, "belongs_to", "所属")# 疾病与通用药品# self.create_rel("Diseases", "Drugs", rels_commondrug, "common_drug", "常用备药")# 疾病与推荐药品# self.create_rel("Diseases", "Drugs", rels_recommenddrug, "recommand_drug", "推荐用药")# 疾病与忌口# self.create_rel("Diseases", "Foods", rels_noteat, "not_eat", "忌吃")# 疾病与可以吃# self.create_rel("Diseases", "Foods", rels_doeat, "do_eat", "可以吃")# 疾病与推荐吃# self.create_rel("Diseases", "Foods", rels_recommendeat, "recomment_eat", "推荐吃")# 疾病与检查项self.create_rel("Diseases", "Checks", rels_check, "need_check", "需要检查")# 厂商与药品# self.create_rel("Producers", "drugs", rels_drug_producer, "drug_of", "生产药品")def create_rel(self, start_node, end_node, list, rel_name, rel_attr):count = 0for item in list:count += 1s = item[0]e = item[1]print ("创建边:" +rel_name +",("+start_node+"->"+end_node+"),点1:"+s+"点2:"+e)query = "Match (start:%s), (end:%s) where start.name="%s" and end.name="%s" create (start)-[rel:%s{name:"%s"}]->(end)" % (start_node, end_node, s, e, rel_name, rel_attr)self.neo4j.run(query)return count
6. 导出节点数据
# 导出实体的节点分词def export_data(self):diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \build_medical_graph.read_data()# 疾病名# f_diseases = open("dict/diseases.txt", encoding="utf-8", mode="w+")# f_diseases.write("\n".join(list(diseases)))# 症状名f_symptoms = open("dict/symptoms.txt", encoding="utf-8", mode="w+")f_symptoms.write("\n".join(list(symptoms)))f_producers = open("dict/producers.txt", encoding="utf-8", mode="w+")f_producers.write("\n".join(list(producers)))f_departments = open("dict/departments.txt", encoding="utf-8", mode="w+")f_departments.write("\n".join(list(departments)))f_drugs = open("dict/drugs.txt", encoding="utf-8", mode="w+")f_drugs.write("\n".join(list(drugs)))f_foods = open("dict/foods.txt", encoding="utf-8", mode="w+")f_foods.write("\n".join(list(foods)))f_checks = open("dict/checks.txt", encoding="utf-8", mode="w+")f_checks.write("\n".join(list(checks)))f_checks = open("dict/checks.txt", encoding="utf-8", mode="w+")f_checks.write("\n".join(list(checks)))