欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

二、知识图谱的搭建

程序员文章站 2022-06-12 17:13:39
...

先贴代码


__author__ = 'ding'
'''
知识图谱搭建
'''
import string
import xlrd
from tqdm import tqdm
from py2neo import Graph, Node, Relationship, NodeSelector


class movieGraph:
    def __init__(self, password="123"):
        self.rdb = None
        self.graph = Graph("http://localhost:7474/db/data", password=password)
        self.selector = NodeSelector(self.graph)

    # 电影节点
    def add_Movie_Cell(self, label='Movie', name=None, rating="", time='', genres='', content='', actors=''):
        assert name is not None
        node = self.selector.select('Movie').where(name=name).first()
        if node:
            node['name'] = name
            node['rating'] = rating
            node['time'] = time
            node['genres'] = genres
            node['content'] = content
            node['actors'] = actors
            self.graph.push(node)
        else:
            node = Node(label, name=name, rating=rating, time=time, genres=genres, content=content, actors=actors)
            self.graph.create(node)
        for genre in genres.split('|'):
            node_genre = self.selector.select('Genre').where(genre=genre).first()
            if node_genre:
                g_r_n = Relationship(node, 'is', node_genre)
            else:
                node_genre = Node('Genre', genre=genre)
                self.graph.create(node_genre)
                g_r_n = Relationship(node, 'is', node_genre)
            self.graph.create(g_r_n)
        for actor in actors.split('|'):
            node_actor = self.selector.select('Person').where(name=actor).first()
            if node_actor:
                a_r_n = Relationship(node, 'acting', node_actor)
            else:
                node_actor = Node('Person', name=actor)
                self.graph.create(node_actor)
                a_r_n = Relationship(node, 'acting', node_actor)
            self.graph.create(a_r_n)

    # 电影种类节点
    def add_Movie_Genre(self, label='Genre', genre=None, movie=""):
        assert genre is not None
        node = self.selector.select('Genre').where(genre=genre).first()
        if node:
            node['genre'] = genre
            self.graph.push(node)
        else:
            node = Node(label, genre=genre)
            self.graph.create(node)
        node_movie = self.selector.select('Movie').where(name=movie).first()
        assert node_movie is not None
        node_r_movie = Relationship(node, 'is', node_movie)
        self.graph.create(node_r_movie)

    # 人物信息节点
    def add_Person_Cell(self, label='Person', name=None, borndata='', desc='', movie=''):
        assert name is not None
        node = self.selector.select('Person').where(name=name).first()
        if node:
            node['name'] = name
            node['borndata'] = borndata
            node['desc'] = desc
            self.graph.push(node)
        else:
            node = Node(label, name=name, borndata=borndata, desc=desc)
            self.graph.create(node)
        node_movie = self.selector.select('Movie').where(name=movie).first()
        assert node_movie is not None
        node_r_movie = Relationship(node, 'acting', node_movie)
        self.graph.create(node_r_movie)

    def delete(self, pattern="n", label=None):
        """Batch delete data or subgraph in database.
        在数据库中批量删除数据或者子图。

        Args:
            pattern: Type of subgraph. 子图类型。
            label: Label of subgraph. 子图标签。
        """
        if pattern == "all":
            self.graph.delete_all()
        elif pattern == "n":
            self.graph.run("MATCH(n:" + label + ") DETACH DELETE n")
        elif pattern == "r":
            self.graph.run("MATCH (n)-[r:" + label + "]-(m) DETACH DELETE r")
        elif pattern == "nr":
            self.graph.run("MATCH (n)<-[r:" + label + "]-(m) DETACH DELETE r, n")
        elif pattern == "rm":
            self.graph.run("MATCH (n)-[r:" + label + "]->(m) DETACH DELETE r, m")
        elif pattern == "nrm":
            self.graph.run("MATCH (n)-[r:" + label + "]-(m) DETACH DELETE r, n, m")
    # 读取excel中的信息
    def handle_excel(self, filename=None, custom_sheets=[]):
        assert filename is not None
        data = xlrd.open_workbook(filename)
        data_sheets = data.sheet_names()
        if custom_sheets:  # 可自定义要导入的子表格
            sheet_names = list(set(data_sheets).intersection(set(custom_sheets)))
        else:
            sheet_names = data_sheets
        for sheet_name in sheet_names:
            table = data.sheet_by_name(sheet_name)
            if table:
                col_format = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
                try:
                    nrows = table.nrows
                    str_upcase = [i for i in string.ascii_uppercase]
                    i_upcase = range(len(str_upcase))
                    ncols_dir = dict(zip(str_upcase, i_upcase))
                    col_index = [ncols_dir.get(i) for i in col_format]
                    for i in tqdm(range(1, nrows)):
                        name = table.cell_value(i, col_index[0])
                        rating = table.cell_value(i, col_index[7])
                        time = table.cell_value(i, col_index[4])
                        genres = table.cell_value(i, col_index[3])
                        # for genre in genres.split('|'):
                        #     self.add_Movie_Genre(genre=genre, movie=name)
                        content = table.cell_value(i, col_index[6])
                        actors = table.cell_value(i, col_index[2])
                        # for actor in actors.split('|'):
                        #     self.add_Person_Cell(name=actor, movie=name)
                        self.add_Movie_Cell(name=name, rating=rating, time=time, genres=genres,
                                            content=content, actors=actors)
                except Exception as error:
                    print('Error: %s' % error)
                    return None

此处是搭建了一个简单的知识图谱,电影、演员、电影种类各有自己的标签,电影节点中的属性还包含了许多信息。为查询的时候提供反馈。
代码比较简单就不解释了。本人也才接触没多久,有错误的地方,请不吝赐教