知识图谱
程序员文章站
2022-06-12 18:06:59
...
主体类 MedicalGraph 介绍
class MedicalGraph:
def __init__(self):
pass
# 读取文件,获得实体,实体关系
def read_file(self):
psss
# 创建节点
def create_node(self, label, nodes):
pass
# 创建疾病节点的属性
def create_diseases_nodes(self, disease_info):
pass
# 创建知识图谱实体
def create_graphNodes(self):
pass
# 创建实体关系边
def create_graphRels(self):
pass
# 创建实体关系边
def create_relationship(self, start_node, end_node, edges, rel_type, rel_name):
pass在这里插入代码片
主体类 MedicalGraph 中关键代码讲解
def read_file(self):
"""
读取文件,获得实体,实体关系
:return:
"""
# cols = ["name", "alias", "part", "age", "infection", "insurance", "department", "checklist", "symptom",
# "complication", "treatment", "drug", "period", "rate", "money"]
# 实体
diseases = [] # 疾病
aliases = [] # 别名
symptoms = [] # 症状
parts = [] # 部位
departments = [] # 科室
complications = [] # 并发症
drugs = [] # 药品
# 疾病的属性:age, infection, insurance, checklist, treatment, period, rate, money
diseases_infos = []
# 关系
disease_to_symptom = [] # 疾病与症状关系
disease_to_alias = [] # 疾病与别名关系
diseases_to_part = [] # 疾病与部位关系
disease_to_department = [] # 疾病与科室关系
disease_to_complication = [] # 疾病与并发症关系
disease_to_drug = [] # 疾病与药品关系
all_data = pd.read_csv(self.data_path, encoding='gb18030').loc[:, :].values
for data in all_data:
disease_dict = {} # 疾病信息
# 疾病
disease = str(data[0]).replace("...", " ").strip()
disease_dict["name"] = disease
# 别名
line = re.sub("[,、;,.;]", " ", str(data[1])) if str(data[1]) else "未知"
for alias in line.strip().split():
aliases.append(alias)
disease_to_alias.append([disease, alias])
# 部位
part_list = str(data[2]).strip().split() if str(data[2]) else "未知"
for part in part_list:
parts.append(part)
diseases_to_part.append([disease, part])
# 年龄
age = str(data[3]).strip()
disease_dict["age"] = age
# 传染性
infect = str(data[4]).strip()
disease_dict["infection"] = infect
# 医保
insurance = str(data[5]).strip()
disease_dict["insurance"] = insurance
# 科室
department_list = str(data[6]).strip().split()
for department in department_list:
departments.append(department)
disease_to_department.append([disease, department])
# 检查项
check = str(data[7]).strip()
disease_dict["checklist"] = check
# 症状
symptom_list = str(data[8]).replace("...", " ").strip().split()[:-1]
for symptom in symptom_list:
symptoms.append(symptom)
disease_to_symptom.append([disease, symptom])
# 并发症
complication_list = str(data[9]).strip().split()[:-1] if str(data[9]) else "未知"
for complication in complication_list:
complications.append(complication)
disease_to_complication.append([disease, complication])
# 治疗方法
treat = str(data[10]).strip()[:-4]
disease_dict["treatment"] = treat
# 药品
drug_string = str(data[11]).replace("...", " ").strip()
for drug in drug_string.split()[:-1]:
drugs.append(drug)
disease_to_drug.append([disease, drug])
# 治愈周期
period = str(data[12]).strip()
disease_dict["period"] = period
# 治愈率
rate = str(data[13]).strip()
disease_dict["rate"] = rate
# 费用
money = str(data[14]).strip() if str(data[14]) else "未知"
disease_dict["money"] = money
diseases_infos.append(disease_dict)
return set(diseases), set(symptoms), set(aliases), set(parts), set(departments), set(complications), \
set(drugs), disease_to_alias, disease_to_symptom, diseases_to_part, disease_to_department, \
disease_to_complication, disease_to_drug, diseases_infos