欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

PLY库-实现最简单的sql语法的数据库

程序员文章站 2022-03-31 10:28:49
...
本文通过PLY实现一个简单的sql库

本文主要简述一下,有关语法解析的库的使用,并使用改实例编写一个简单的模仿最基本的sql语句的数据库,本文代码仅供示例参考。

语法解析与上下文

语法解析是一个比较大的内容,早起比较成熟的有lex与yacc,该工具相对比较成熟,详细的资料大家可以自行查阅资料,在编译中使用较多的是上下文无关文法主要的是BNF,大家有兴趣可查阅sql语句的BNF文档,标准的sql的文法相对复杂,本文只是为了实现最基本的工作故只是做了最简单的解析。

示例代码
import ply.lex as lex
import ply.yacc as yacc

import collections

tokens = (
    'LFPARENTH',
    'RGPARENTH',
    'TABLE',
    'CREATE',
    'INSERT',
    'UPDATE',
    'INTO',
    'VALUES',
    'SELECT',
    'COLUMN',
    "COMMA",
    'WHERE',
    'FROM',
    'AND',
    'SET',
    'EQUAL',
    'STAR',
    "END"
)


t_LFPARENTH = r'\('
t_RGPARENTH = r'\)'
t_SELECT = r'SELECT|select'
t_CREATE = r'CREATE|create'
t_INSERT = r'INSERT|insert'
t_UPDATE = r'UPDATE|update'
t_INTO = r'INTO|into'
t_VALUES = r'VALUES|values'
t_WHERE = r'WHERE|where'
t_FROM = r'FROM|from'
t_AND = r'AND|and'
t_SET = r'SET|set'
t_EQUAL = r'\='
t_TABLE = r'TABLE|table'
t_COMMA = r','
t_STAR = r'\*'
t_END = r';'


def t_COLUMN(t):
    r'[a-zA-Z0-9/.-]+'
    if t.value in ['FROM', 'from']:
        t.type = 'FROM'
    if t.value in ['CREATE', 'create']:
        t.type = 'CREATE'
    if t.value in ['TABLE', 'table']:
        t.type = 'TABLE'
    if t.value in ['INSERT', 'insert']:
        t.type = 'INSERT'
    if t.value in ['INTO', 'into']:
        t.type = 'INTO'
    if t.value in ['VALUES', 'values']:
        t.type = 'VALUES'
    if t.value in ['UPDATE', 'update']:
        t.type = 'UPDATE'
    if t.value in ['SET', 'set']:
        t.type = 'SET'
    if t.value in ['WHERE', 'where']:
        t.type = 'WHERE'
    if t.value in ['SELECT', 'select']:
        t.type = 'SELECT'
    if t.value in ['AND', 'and']:
        t.type = 'AND'
    return t


def t_newline(t):
    r'\n+'
    t.lexer.lineno += len(t.value)


t_ignore = ' \t'


def t_error(t):
    print("Illegal character {0}".format(t.value[0]))
    t.lexer.skip(1)


lexer = lex.lex()

datas = {}


class Stack(object):

    def __init__(self):
        self.is_columns = False
        self._stack = []

    def reset(self):
        self._stack = []

    def append(self, value):
        self._stack.append(value)

    def __iter__(self):
        return iter(self._stack)

    def __len__(self):
        return len(self._stack)

    def __str__(self):
        print(self._stack)
        return "stack"

    def __getitem__(self, item):
        return self._stack[item]

    def __setslice__(self, i, j, sequence):
        return self._stack[i:j]


stack = Stack()

current_action = None

columns_dict = {}

condition_dict = {}


def reset_action():
    global current_action, stack, columns_dict, condition_dict
    current_action = None
    stack.reset()
    columns_dict = {}
    condition_dict = {}


class Select(object):

    def __init__(self):
        self.values = []
        self.table = None

    def set_table(self, table):
        self.table = table
        return table in datas

    def add_stack(self, stack):
        [self.add_values(v) for v in stack if v not in self.values]

    def add_values(self, value):
        self.values.append(value)

    def action(self):
        """展示数据"""
        if self.table not in datas:
            print("table {0} not exists")
            return
        table = datas[self.table]
        if self.values:
            for v in self.values:
                if v in table:
                    print("   {0} = {1}".format(v, table[v]))
                else:
                    print("   {0} not in table {1}".format(v, self.table))
        else:
            for v in table:
                print("   {0} = {1}".format(v, table[v]))


class Create(object):

    def __init__(self):
        self.values = []
        self.table = None

    def set_table(self, table):
        self.table = table
        return table not in datas

    def add_stack(self, stack):
        [self.add_values(v) for v in stack if v not in self.values]

    def add_values(self, value):
        self.values.append(value)

    def action(self):
        datas[self.table] = collections.OrderedDict()
        for v in self.values:
            datas[self.table][v] = []
        print("create : ", datas)


class Insert(object):

    def __init__(self):
        self.values = []
        self.columns = set()
        self.table = None
        self._stack = None

    def set_table(self, table):
        self.table = table
        return table not in datas

    def add_stack(self, stack):
        # 判断是否输入的sql 为 insert into table(c1, c2, c3) values(1,2,3)
        self._stack = stack

    def action(self):
        table = datas[self.table]
        if self._stack.is_columns:
            if len(self._stack) and len(self._stack) % 2 == 0:
                index = int(len(self._stack) / 2)
                if index != len(table.keys()):
                    print("error default columns")
                    return
                for i in range(index):
                    if self._stack[i] in table:
                        table[self._stack[i]].append(self._stack[i + index])

            else:
                print(" error columns and values not equal")
                return
        else:
            if len(table.keys()) != len(self._stack):
                print("input values len {0} not equal table columes len {1}".
                      format(len(self._stack), len(table.keys())))
                return
            t_index = 0
            for v in table.keys():
                table[v].append(self._stack[t_index])
                t_index += 1
        print("insert : ", datas)


class Update(object):

    def __init__(self):
        self.values = []
        self.table = None
        self.condition_dict = {}
        self.columns_dict = {}
        self.index_list = None

    def set_table(self, table):
        self.table = table
        return table not in datas

    def add_stack(self, condition, colums):
        self.condition_dict = condition
        self.columns_dict = colums

    def check_dict_key(self, val_dict, table):
        for key in val_dict.keys():
            if key not in table:
                return False
        return True

    def find_keys(self, val_dict, table):
        keys = [key for key in val_dict]
        values = [val_dict[key] for key in val_dict]
        self.index_list = []
        print(keys)
        print(table)
        result_list = []
        if keys:
            first_line = table[keys[0]]
            for i in range(len(first_line)):
                detail_value = []
                for key in keys:
                    detail_value.append(table[key][i])
                result_list.append(detail_value)

        print(values)
        print(result_list)
        for index, v in enumerate(result_list):
            if v == values:
                self.index_list.append(index)

        print(self.index_list)
        return self.index_list

    def action(self):
        table = datas[self.table]
        if not (self.check_dict_key(self.condition_dict, table) and\
                self.check_dict_key(self.columns_dict, table)):
            print(" error found keys ")
            return
        index_list = self.find_keys(self.condition_dict, table)
        if not index_list:
            print(" update condition not found")
            return
        for k in self.columns_dict:
            for index in index_list:
                table[k][index] = self.columns_dict[k]


def p_statement_expr(t):
    '''expressions : expression
                    | expressions expression'''
    if current_action:
        current_action.action()
    reset_action()


def p_expression_start(t):
    '''expression :  exp_select
                    | exp_create
                    | exp_insert
                    | exp_update'''


def p_expression_select(t):
    '''exp_select : SELECT columns FROM COLUMN END
                    | SELECT STAR FROM COLUMN END'''
    print(t[1], t[2])
    global current_action
    current_action = Select()
    if not current_action.set_table(t[4]):
        print("{0} table not exists".format(t[4]))
        return
    if not t[2]:
        current_action.add_stack(stack)


def p_expression_create(t):
    '''exp_create : CREATE TABLE COLUMN LFPARENTH columns RGPARENTH END'''
    print(t[1])
    global current_action
    current_action = Create()
    if not current_action.set_table(t[3]):
        print("{0} table already exists".format(t[3]))
        return
    # 处理参数
    current_action.add_stack(stack)


def p_expression_insert(t):
    '''exp_insert : INSERT INTO COLUMN exp_insert_end'''
    print(t[1])
    global current_action
    current_action = Insert()
    if current_action.set_table(t[3]):
        print("{0} table not exists".format(t[3]))
        reset_action()
        return
    # 处理insert的参数
    current_action.add_stack(stack)


def p_expression_update(t):
    '''exp_update : UPDATE COLUMN SET exp_update_colums WHERE exp_update_condition END'''
    print(t[1])
    global current_action
    current_action = Update()
    if current_action.set_table(t[2]):
        print("{0} table not exists".format(t[2]))
        return
    print(condition_dict, columns_dict)
    current_action.add_stack(condition_dict, columns_dict)


def p_expression_update_columns(t):
    '''exp_update_colums : COLUMN EQUAL COLUMN
                         | COLUMN EQUAL COLUMN COMMA exp_update_colums'''

    columns_dict[t[1]] = t[3]


def p_expression_update_condition(t):
    '''exp_update_condition : COLUMN EQUAL COLUMN
                            | COLUMN EQUAL COLUMN AND exp_update_condition'''
    condition_dict[t[1]] = t[3]


def p_expresssion_insert_end(t):
    '''exp_insert_end : VALUES LFPARENTH columns RGPARENTH END
                      | LFPARENTH columns RGPARENTH VALUES LFPARENTH columns RGPARENTH END'''
    if len(t) == 9:
        stack.is_columns = True


def p_expression_columns(t):
    '''columns : COLUMN
               | COLUMN COMMA columns'''
    stack.append(t[1])


def p_error(p):
    if p:
        print("Syntax error at {0}".format(p.value))
    else:
        print("Syntax error at EOF")

while True:
    data = input("sql>")
    yacc.yacc()
    yacc.parse(data)

运行该示例代码如下;

sql>create table t1 (line1, line2, line3);
create
create :  {'t1': OrderedDict([('line3', []), ('line2', []), ('line1', [])])}
sql>insert into t1 values(1,2,3);
insert
insert :  {'t1': OrderedDict([('line3', ['3']), ('line2', ['2']), ('line1', ['1'])])}
sql>insert into t1 values(1,2,32);
insert
insert :  {'t1': OrderedDict([('line3', ['3', '32']), ('line2', ['2', '2']), ('line1', ['1', '1'])])}
sql>insert into t1 values(1,23, 33);
insert
insert :  {'t1': OrderedDict([('line3', ['3', '32', '33']), ('line2', ['2', '2', '23']), ('line1', ['1', '1', '1'])])}
sql>insert into t1 values(1,2, 43);
insert
insert :  {'t1': OrderedDict([('line3', ['3', '32', '33', '43']), ('line2', ['2', '2', '23', '2']), ('line1', ['1', '1', '1', '1'])])}
sql>update t1 set line1=0, line2=0 where line1=1 and line2=2;
update
{'line2': '2', 'line1': '1'} {'line2': '0', 'line1': '0'}
['line2', 'line1']
OrderedDict([('line3', ['3', '32', '33', '43']), ('line2', ['2', '2', '23', '2']), ('line1', ['1', '1', '1', '1'])])
['2', '1']
[['2', '1'], ['2', '1'], ['23', '1'], ['2', '1']]
[0, 1, 3]
sql>select * from t1;
select *
   line3 = ['3', '32', '33', '43']
   line2 = ['0', '0', '23', '0']
   line1 = ['0', '0', '1', '0']
sql>select line1, line2 from t1;
select None
   line2 = ['0', '0', '23', '0']
   line1 = ['0', '0', '1', '0']
sql>

本文仅仅是支持简单的语法,基本的select 语法也没有支持where条件语句,update的操作支持多条件多行的更改,创建表的语句create也并没有支持字段属性,所有的数据都是存储在python的字典中。

总结

本文只是作为在sql语法与编译器相关的一些基本的原理的知识的梳理,展示的脚本也仅仅是作为演示使用,大家有兴趣可自行学习库。由于本人才疏学浅,如有错误请批评指正。

相关标签: PLY SQL