欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  php教程

分享我写的PHP文法分析的源码

程序员文章站 2024-01-06 17:05:28
...
最近尝试做了文法分析的东东,问题较多。
请提建议。代码放不下,分两页。

下载地址 http://download.csdn.net/detail/xuzuning/4529066


PHP code
		include 'ttrie.php';

class Rule extends TTrie {
  public $rule = array();
  public $savematch = 0;

  function __construct($s='') {
    $this->set( array(
        ' ' => 'Separated',
        "\r\n" => 'set_rule',
        "\n" => 'set_rule',
        "\t" => 'Separated',
        '->' => 'Separated',
        '→' => 'Separated',
        '' => 'set_parallel_rule',
        ));
    $this->match($s);
    if($this->rule[0][0] == $this->rule[0][1]) {
        if(count($this->rule[0]) == 2) $this->rule[0][0] .= "'";
        else array_unshift($this->rule, array($this->rule[0][0]."'", $this->rule[0][0]));
    }else {
        $c = $this->rule[0][0];
        $n = 0;
        foreach($this->rule as $r) if($r[0] == $c) $n++;
        if($n > 1) array_unshift($this->rule, array($this->rule[0][0]."'", $this->rule[0][0]));
    }
  }
  function Separated() {
  }
  function set_rule() {
    $this->rule[] = $this->buffer;
    $this->buffer = array();
  }
  function set_parallel_rule() {
    $t = $this->buffer[0];
    $this->set_rule();
    $this->buffer[] = $t;
  }
}


class Grammar {
  var $closure = array();
  var $first = array();
  var $follow = array();
  var $rule = array();
  var $identifier = array();
  var $leay = array();
  var $forecast = array();
  var $stack = array();
  var $ll = 'LL(0)';
  var $lr = 'LR(0)';

  function __construct($s='') {
    $p = new Rule($s);
    $this->rule = $p->rule;
    $this->set_grammar();
  }

  function set_grammar() {
    foreach($this->rule as $rule) {
        if(! in_array($rule[0], $this->identifier)) $this->identifier[] = $rule[0];
    }
    foreach($this->rule as $rule) {
        foreach($rule as $v)
            if(! in_array($v, $this->identifier) && ! in_array($v, $this->leay))
                $this->leay[] = $v;
    }
    $this->set_first();
    $this->set_follow();
    $this->set_closure();
    $this->set_select();
    $this->set_forecast();
  }
  function set_first() {
    foreach($this->rule as $rule) $this->first[$rule[0]] = array();
    //直接收取 形如U->a…的产生式(其中a是终结符),把a收入到First(U)中
    foreach($this->rule as $v) {
        if(in_array($v[1], $this->leay)) $this->first[$v[0]][] = $v[1];
    }
    //反复传递 形入U->P1P2P3…Pn的产生式(其中P是非终结符),应先把First(P1)中的全部内容传送到First(U)中,如果P1中有ε,把First(P2)中的内容传送到First(U)中,类推直到Pi中无ε
    do {
        $t = serialize($this->first);
        foreach($this->rule as $rule) {
            for($i=1; $iidentifier)) {
                    $this->first[$rule[0]] = array_unique(array_merge($this->first[$rule[0]], $this->first[$v]));
                    if(! in_array('#', $this->first[$v])) break;
                }else break;
            }
        }
    }while($t != serialize($this->first));
  }
  function set_follow() {
    foreach($this->rule as $rule) $this->follow[$rule[0]] = array();
    //直接收取 形如 …Ua… 的,把a直接收入到Follow(U)中
    foreach($this->rule as $rule) {
        for($i=1; $iidentifier) && in_array($rule[$i+1], $this->leay))
                $this->follow[$rule[$i]][] = $rule[$i+1];
        }
        if(in_array($rule[$i], $this->identifier)) $this->follow[$rule[$i]][] = '#';
    }
    foreach($this->follow as &$v) if(! $v) $v[] = '#';
    //直接收取 形如 …UP…(P是非终结符)的,把First(P)中非ε收入到Follow(U)中
    foreach($this->rule as $rule) {
        for($i=1; $iidentifier) && in_array($rule[$i+1], $this->identifier)) {
                $this->follow[$rule[$i]] = array_unique(array_merge($this->follow[$rule[$i]], array_diff($this->first[$rule[$i+1]], array('#'))));
            }
        }
    }
    //反复传递 形如U->aP的(P是非终结符)或U->aPQ(P,Q为非终结符且Q中含ε),应把Follow(U)中的全部内容传送到Follow(P)中
    do {
        $t = serialize($this->follow);
        foreach($this->rule as $rule) {
            $s = $rule[0];
            $d = end($rule);
            if(in_array($d, $this->leay)) continue;
            $p = prev($rule);
            if(in_array($p, $this->leay)) $this->follow[$d] = array_unique(array_merge($this->follow[$d], $this->follow[$s]));
            elseif(in_array('#', $this->follow[$d])) $this->follow[$p] = array_unique(array_merge($this->follow[$p], $this->follow[$s]));
        }
    }while($t != serialize($this->follow));
  }
  function set_closure() {
    $shift = array();
    $this->closure[0][] = array('offs' => 1, 'rule' => 0);
    for($i=0 ; $i closure); $i++) {
        $cnt = count($this->closure);
        //构造闭包 closure
        $ex = array();
        $j = 0;
        $tmp = array();
        do {
            $size = count($this->closure[$i]);
            for($j=0; $jclosure[$i]); $j++) {
                $dfa = $this->closure[$i][$j];
                $rule = $this->rule[$dfa['rule']];
                if(isset($rule[$dfa['offs']])) {
                    $ch = $ex[] = $rule[$dfa['offs']];
                }
                foreach($this->rule as $r=>$rule) {
                    if(in_array($rule[0], $ex)) {
                        $t = array('offs' => 1, 'rule' => $r);
                        if(!isset($tmp[$r][1])) $this->closure[$i][] = $t;
                        $tmp[$r][1] = 1;
                    }
                }
            }
        }while(count($this->closure[$i]) != $size); //直到不再增大

        //判断状态转向 go
        $out = array();
        foreach($this->closure[$i] as $k=>$dfa) {
            $rule = $this->rule[$dfa['rule']];
            if(isset($rule[$dfa['offs']])) {
                $t = "$dfa[rule],$dfa[offs]";
                $ch = $rule[$dfa['offs']];
                $this->closure[$i][$k]['char'] = $ch;
                if(isset($out[$ch])) $shift[$t] = $out[$ch];
                if(isset($shift[$t])) {
                    $this->closure[$i][$k]['target'] = $shift[$t];
                    $dfa['offs']++;
                    if(!$this->in_closure($dfa, $this->closure[$shift[$t]])) $this->closure[$shift[$t]][] = $dfa;

                } else {
                    $cnt = count($this->closure);
                    $this->closure[$i][$k]['target'] = $cnt;
                    $shift[$t] = $cnt;
                    $dfa['offs']++;
                    $this->closure[count($this->closure)][] = $dfa;
                    $out[$ch] = $cnt;
                }
            }
        }

        //构造状态转换表
        foreach($this->closure[$i] as $k=>$dfa) {
            if(isset($dfa['target'])) {
                $v = $dfa['char'];
                if(in_array($v, $this->identifier)) $this->goto[$i][$v] = $dfa['target'];
                else {
                    $this->action[$i][$v][] = "S$dfa[target]";
                    $this->request[$i][$v] = $dfa['rule'];
                }
            } else {
                $ch = $this->rule[$dfa['rule']][0];
                foreach($this->follow[$ch] as $v) {
                    $this->action[$i][$v][] = "r$dfa[rule]";
                    $this->request[$i][$v] = $dfa['rule'];
                }
            }
        }
        foreach($this->action[$i] as $c=>$v) {
            $v = array_unique($v);
            if(count($v) > 1) $this->lr = 'SLR(1)';
            $this->action[$i][$c] = $v;
        }
    }
  }

  function in_closure($t, $s) {
    foreach($s as $r) if($t['offs'] == $r['offs'] && $t['rule'] == $r['rule']) return true;
    return false;
    return in_array(serialize($t), array_map('serialize', $s));
  }
  function set_select() {
    foreach($this->rule as $i=>$rule) {
        $y = array($rule[1]);
        if(in_array($y[0], $this->leay)) {
            if($y[0] != '#') {
                $this->select[$i] = $y;
                continue;
            }
        }else $y = $this->first[$rule[1]];
        $x = $this->follow[$rule[0]];
        //SELECT(X->Y)=(FIRST(Y)-{ε})并FOLLOW(X)
        $this->select[$i] = array_unique( array_merge(array_diff($y, array('#')), $x) );
    }
  }
  /**
   * 构造预测分析表
   **/
  function set_forecast() {
    foreach($this->select as $i=>$r) {
        $c = $this->rule[$i][0];
        $v = array_reverse(array_slice($this->rule[$i], 1));
        foreach($r as $k) {
            $this->forecast[$c][$k][] = $v;
        }
    }
    //检查冲突
    foreach($this->forecast as $c=>$r) {
        foreach($r as $k) {
            if(count($k) > 1) {
                $this->ll = 'LL(1)';
            }
        }
    }
  }

PHP code
		

			function ll_start($s) {
    $t = array();
    foreach($this->rule as $rule) if($rule[0] == $rule[1]) $t[] = $rule;
    if($t) {
        foreach($t as $rule) printf('%s 存在左递归', preg_replace('/ /', ' → ', join(' ', $rule), 1));
        return;
    }
    $stack = array('#', key($this->forecast));
    $i = 0;
    $step = 1;
    $timeout = 10 * strlen($s);
    while($stack && $i forecast[$r][$s{$i}])) $msg = $r . ' → ' . join(' ', array_reverse($this->forecast[$r][$s{$i}][0]));
        else $msg = '错误';
        printf("
%d
%s
%s
%s
", $step++, substr(join('', $stack), -50), substr($s, $i), $msg);
        if($r == $s{$i}) {
            array_pop($stack);
            $i++;
        }elseif(isset($this->forecast[$r][$s{$i}])) {
            array_pop($stack);
            if(current($this->forecast[$r][$s{$i}][0]) != '#')
                $stack = array_merge($stack, $this->forecast[$r][$s{$i}][0]);
        }else break;
    }
  }
  function lr_start($s) {
    $State = array(0); //状态栈
    $Symbol = array('#'); //符号栈
    $i = 0;
    $step = 1;
    $timeout = 10 * strlen($s);
    while($i action[$sp][$ch]) && $this->action[$sp][$ch][0] == 'r0') $msg = 'acc';
        if(isset($this->request[$sp][$ch])) $request = preg_replace('/ /', ' → ', join(' ', $this->rule[$this->request[$sp][$ch]]), 1);
        else $request = 'error';
        printf("
%d
%s
%s
%s
%s
", $step++, substr(join('', $State), -50), join('', $Symbol), $msg, $request);

        if(isset($this->action[$sp][$ch])  isset($this->goto[$sp][$ch])) {
            $t = isset($this->action[$sp][$ch]) ? $this->action[$sp][$ch][0] : $this->goto[$sp][$ch];
            $n = substr($t, 1) + 0;
            if($t{0} == 'r') {
                for($j=0; $jrule[$n])-1; $j++) {
                    array_pop($State);
                    array_pop($Symbol);
                }
                if($n == 0) break;
                $c = $Symbol[] = $this->rule[$n][0];
                $State[] = $this->goto[end($State)][$c];
            }elseif($t{0} == 'S') {
                $State[] = $n;
                $Symbol[] = $ch;
                $i++;
            }else ;
        }else break;
    }
  }
  function report($in='') {
    if($in) $in = trim($in, '#') . '#';
    echo '';

    echo '';
    foreach($this->rule as $rule) {
        echo '';
    }
    echo '
文法
'; echo preg_replace('/ /', ' → ', join(' ', $rule), 1); echo '
'; $identifier = $this->identifier; echo ''; echo ''; echo '
标识符' . join(' ', $identifier) . '
'; $leay = array_diff($this->leay, array('#')); $leay[] = '#'; echo ''; echo ''; echo '
终结符' . join(' ', $leay) . '
'; echo ''; foreach($identifier as $ch) { echo ''; echo ''; echo ''; echo ''; echo ''; } echo '
标识符 推出空 FIRST集 FOLLOW集
' . $ch . '' . (in_array('#', $this->first[$ch]) ? 'True' : 'false') . '' . join(' ', $this->first[$ch]) . '' . join(' ', $this->follow[$ch]) . '
'; echo '
' . $this->ll . '文法分析
'; echo ''; foreach($this->rule as $i=>$rule) { echo ''; echo ''; echo ''; echo ''; } echo '
产生式 Select集
' . preg_replace('/ /', ' → ', join(' ', $rule), 1) . '' . join(' ', $this->select[$i]) . '
'; $forecast = $this->forecast; echo ''; echo ''; echo '
预测分析表
'; echo ''; foreach($identifier as $ch) { echo ''; foreach($leay as $v) { $s = ''; if(isset($forecast[$ch][$v])) { foreach($forecast[$ch][$v] as $t) { if($s) $s .= '
'; $s .= $ch . ' → '. join(' ', array_reverse($t)); } if(count($forecast[$ch][$v]) > 1) $s = "$s"; }else $s .= ' '; echo ''; } echo ''; } echo '
' . join(' ', $leay) . '
' . $ch . '' . $s . '
'; if($in) { echo ''; echo '
测试字符串' . trim($in, '#') . '
'; echo ''; echo ''; $this->ll_start($in); echo '
步骤 分析栈 剩余字符 生成式或匹配
'; } echo ''; echo '
' . $this->lr . '文法分析
'; echo '
状态转移表
'; echo ''; echo ''; echo ''; foreach($this->action as $i=>$item) { echo ""; foreach($leay as $v) { $s = isset($item[$v]) ? join(',', $item[$v]) : ' '; if(strpos($s, ',')) $s = "$s"; echo ""; } foreach($identifier as $v) { $s = isset($this->goto[$i][$v]) ? $this->goto[$i][$v] : ' '; echo ""; } echo ''; echo ''; } echo '
状态 Action Goto DFA
' . join(' ', $leay) . ' '. join(' ', $identifier) . '
$i$s$s' . $this->showDFA($i) .'
'; if($in) { echo ''; echo '
测试字符串' . trim($in, '#') . '
'; echo ''; echo ''; $this->lr_start($in); echo '
步骤 状态栈 符号栈 剩余字符 生成式
'; } } function showDFA($i) { $res = array(); foreach($this->closure[$i] as $dfa) { $rule = $this->rule[$dfa['rule']]; array_splice($rule, $dfa['offs'], 0, '·'); array_splice($rule, 1, 0, '→'); if(isset($dfa['target'])) $rule[] = " [$dfa[target]]"; $res[] = join('', $rule); } return join('; ', $res); } } for($i=1; $i$i"; $n = current($_GET) or $n = 1; $S = ''; include "$n.txt"; $p = new grammar($G); $p->report($S);

ttrie.php

PHP code
		$v) $this->set($k, $v);
        return;
    }
    $p = count($this->dict);
    $cur = 0; //当前节点号
    foreach(str_split($word) as $c) {
        if (isset($this->dict[$cur][$c])) { //已存在就下移
            $cur = $this->dict[$cur][$c];
            continue;
        }
        $this->dict[$p]= array(); //创建新节点
        $this->dict[$cur][$c] = $p; //在父节点记录子节点号
        $cur = $p; //把当前节点设为新插入的
        $p++;
    }
    $this->dict[$cur]['acc'] = $action; //一个词结束,标记叶子节点
  }

  function match($s) {
    $ret = array();
    $cur = 0; //当前节点,初始为根节点
    $i =& $this->input; //字符串当前偏移
    $p =& $this->backtracking; //字符串回溯位置
    $s .= "\0"; //附加结束符
    $len = strlen($s);
    $buf = '';
    while($i dict[$cur][$c])) { //如果存在
            $cur = $this->dict[$cur][$c]; //转到对应的位置
            if(isset($this->dict[$cur][$s[$i+1]])) {//检查下一个字符是否也能匹配,长度优先
                $i++;
                continue;
            }
            if(isset($this->dict[$cur]['acc'])) { //是叶子节点,单词匹配!
                if($buf) {
                    $this->buffer[] = $buf;
                    $buf = '';
                }
                if($this->savematch) $this->buffer[] = substr($s, $p, $i - $p + 1); //取出匹配位置和匹配的词

                $ar = explode(',', $this->dict[$cur]['acc']);
                call_user_func_array( array($this, array_shift($ar)), $ar );

                $p = $i + 1; //设置下一个回溯位置
                $cur = 0; //重置当前节点为根节点
            }
        } else { //不匹配
            $buf .= $s{$p}; //substr($s, $p, $i - $p + 1); //保存未匹配位置和未匹配的内容
            $cur = 0; //重置当前节点为根节点
            $i = $p; //把当前偏移设为回溯位置
            $p = $i + 1; //设置下一个回溯位置
        }
        $i++; //下一个字符
    }
    if(trim($buf, "\0")) $this->buffer[] = trim($buf, "\0");
  }

  function __call($method, $param) {
    if($this->debug) printf("偏移:%d 回溯:%d\n", $this->input, $this->backtracking);

  }
}