使用Scala从头实现一个简单的语法分析器组合字库

程序员文章站 2024-03-03 16:23:34

...

这个例子来源于scala圣经级教程《Functional Programming in Scala》，虽然原书的随书代码可以找到这些类的影子，但却没有关于如何使用这些类的示例代码，本人在阅读此书期间，，除了跟着书中的代码敲了一遍之外，还写了一些测试代码进行验证，贴出来作为blog主要是为了方便自己，也为那些同样在阅读此书的人参考。注释不多，本人也不可能写太多，因为这本书不是简单的入门书，而是一本进阶，升华用的内涵书籍，三言两语也解释不清其中的很多细节，如果看不懂，但是又感兴趣的就去看原书吧……

注：原书中给出的代码是不能直接运行成功的，原因是隐式方法 implicit def tok(s: String) = token(P.string(s)) 在JSON.scala 中不生效，所以本人做了一点点改动，让字符串显示转换为Parser类型。

语法分析器组合子的抽象接口：Parsers.scala：

package parsing

import java.util.regex._
import scala.util.matching.Regex
import testing._
import testing.Prop._
import language.higherKinds
import language.implicitConversions

trait Parsers[Parser[+_]] { self => // so inner classes may call methods of trait
  def run[A](p: Parser[A])(input: String): Either[ParseError,A]

  implicit def string(s: String): Parser[String]
  implicit def operators[A](p: Parser[A]) = ParserOps[A](p)
  implicit def asStringParser[A](a: A)(implicit f: A => Parser[String]):
    ParserOps[String] = ParserOps(f(a))

  def char(c: Char): Parser[Char] =
    string(c.toString) map (_.charAt(0))

  /*
   * A default `succeed` implementation in terms of `string` and `map`.
   * We leave `succeed` abstract, since `map` is defined below in terms of
   * `flatMap` and `succeed`, which would be a circular definition! But we include
   * the definition here in case implementations wish to use it
   * (say if they provide a custom implementation of `map`, breaking the cycle)
   */
  def defaultSucceed[A](a: A): Parser[A] =
    string("") map (_ => a)

  def succeed[A](a: A): Parser[A]

  def slice[A](p: Parser[A]): Parser[String]

  def many1[A](p: Parser[A]): Parser[List[A]] =
    map2(p, many(p))(_ :: _)

  def listOfN[A](n: Int, p: Parser[A]): Parser[List[A]] =
    if (n <= 0) succeed(List())
    else map2(p, listOfN(n-1, p))(_ :: _)

  def many[A](p: Parser[A]): Parser[List[A]] =
    map2(p, many(p))(_ :: _) or succeed(List())

  def or[A](p1: Parser[A], p2: => Parser[A]): Parser[A]

  def flatMap[A,B](p: Parser[A])(f: A => Parser[B]): Parser[B]

  implicit def regex(r: Regex): Parser[String]

  /*
  These can be implemented using a for-comprehension,
  which delegates to the `flatMap` and `map` implementations we've provided on `ParserOps`,
  or they can be implemented in terms of these functions directly.
  */
  def product[A,B](p: Parser[A], p2: => Parser[B]): Parser[(A,B)] =
    flatMap(p)(a => map(p2)(b => (a,b)))

  def map2[A,B,C](p: Parser[A], p2: => Parser[B])(f: (A,B) => C): Parser[C] =
    for { a <- p; b <- p2 } yield f(a,b)

  def map[A,B](a: Parser[A])(f: A => B): Parser[B] =
    flatMap(a)(f andThen succeed)

  def label[A](msg: String)(p: Parser[A]): Parser[A]

  def scope[A](msg: String)(p: Parser[A]): Parser[A]

  def attempt[A](p: Parser[A]): Parser[A]

  /** Sequences two parsers, ignoring the result of the first.
    * We wrap the ignored half in slice, since we don't care about its result. */
  def skipL[B](p: Parser[Any], p2: => Parser[B]): Parser[B] =
    map2(slice(p), p2)((_,b) => b)

  /** Sequences two parsers, ignoring the result of the second.
    * We wrap the ignored half in slice, since we don't care about its result. */
  def skipR[A](p: Parser[A], p2: => Parser[Any]): Parser[A] =
    map2(p, slice(p2))((a,b) => a)

  def opt[A](p: Parser[A]): Parser[Option[A]] =
    p.map(Some(_)) or succeed(None)

  /** Parser which consumes zero or more whitespace characters. */
  def whitespace: Parser[String] = "\\s*".r

  /** Parser which consumes 1 or more digits. */
  def digits: Parser[String] = "\\d+".r

  /** Parser which consumes reluctantly until it encounters the given string. */
  def thru(s: String): Parser[String] = (".*?"+Pattern.quote(s)).r

  /** Unescaped string literals, like "foo" or "bar". */
  def quoted: Parser[String] = string("\"") *> thru("\"").map(_.dropRight(1))

  /** Unescaped or escaped string literals, like "An \n important \"Quotation\"" or "bar". */
  def escapedQuoted: Parser[String] =
    // rather annoying to write, left as an exercise
    // we'll just use quoted (unescaped literals) for now
    token(quoted label "string literal")

  /** C/Java style floating point literals, e.g .1, -1.0, 1e9, 1E-23, etc.
    * Result is left as a string to keep full precision
    */
  def doubleString: Parser[String] =
    token("[-+]?([0-9]*\\.)?[0-9]+([eE][-+]?[0-9]+)?".r)

  /** Floating point literals, converted to a `Double`. */
  def double: Parser[Double] =
    doubleString map (_.toDouble) label "double literal"

  /** Attempts `p` and strips trailing whitespace, usually used for the tokens of a grammar. */
  def token[A](p: Parser[A]): Parser[A] =
    attempt(p) <* whitespace

  /** Zero or more repetitions of `p`, separated by `p2`, whose results are ignored. */
  def sep[A](p: Parser[A], p2: Parser[Any]): Parser[List[A]] = // use `Parser[Any]` since don't care about result type of separator
    sep1(p,p2) or succeed(List())

  /** One or more repetitions of `p`, separated by `p2`, whose results are ignored. */
  def sep1[A](p: Parser[A], p2: Parser[Any]): Parser[List[A]] =
    map2(p, many(p2 *> p))(_ :: _)

  /** Parses a sequence of left-associative binary operators with the same precedence. */
  def opL[A](p: Parser[A])(op: Parser[(A,A) => A]): Parser[A] =
    map2(p, many(op ** p))((h,t) => t.foldLeft(h)((a,b) => b._1(a,b._2)))

  /** Wraps `p` in start/stop delimiters. */
  def surround[A](start: Parser[Any], stop: Parser[Any])(p: => Parser[A]) =
    start *> p <* stop

  /** A parser that succeeds when given empty input. */
  def eof: Parser[String] =
    regex("\\z".r).label("unexpected trailing characters")

  /** The root of the grammar, expects no further input following `p`. */
  def root[A](p: Parser[A]): Parser[A] =
    p <* eof

  case class ParserOps[A](p: Parser[A]) {
    def |[B>:A](p2: => Parser[B]): Parser[B] = self.or(p,p2) // use `self` to explicitly disambiguate reference to the `or` method on the `trait`
    def or[B>:A](p2: => Parser[B]): Parser[B] = self.or(p,p2)

    def map[B](f: A => B): Parser[B] = self.map(p)(f)
    def many = self.many(p)

    def slice: Parser[String] = self.slice(p)

    def **[B](p2: => Parser[B]): Parser[(A,B)] =
      self.product(p,p2)
    def product[B](p2: => Parser[B]): Parser[(A,B)] =
      self.product(p,p2)

    def flatMap[B](f: A => Parser[B]): Parser[B] =
      self.flatMap(p)(f)

    def label(msg: String): Parser[A] = self.label(msg)(p)

    def scope(msg: String): Parser[A] = self.scope(msg)(p)

    def *>[B](p2: => Parser[B]) = self.skipL(p, p2)
    def <*(p2: => Parser[Any]) = self.skipR(p, p2)
    def token = self.token(p)
    def sep(separator: Parser[Any]) = self.sep(p, separator)
    def sep1(separator: Parser[Any]) = self.sep1(p, separator)
    def as[B](b: B): Parser[B] = self.map(self.slice(p))(_ => b)
    def opL(op: Parser[(A,A) => A]): Parser[A] = self.opL(p)(op)
  }
  object Laws {
    def equal[A](p1: Parser[A], p2: Parser[A])(in: Gen[String]): Prop =
      forAll(in)(s => run(p1)(s) == run(p2)(s))

    def mapLaw[A](p: Parser[A])(in: Gen[String]): Prop =
      equal(p, p.map(a => a))(in)
  }
}

case class Location(input: String, offset: Int = 0) {

  lazy val line = input.slice(0,offset+1).count(_ == '\n') + 1
  lazy val col = input.slice(0,offset+1).lastIndexOf('\n') match {
    case -1 => offset + 1
    case lineStart => offset - lineStart
  }

  def toError(msg: String): ParseError =
    ParseError(List((this, msg)))

  def advanceBy(n: Int) = copy(offset = offset+n)

  /* Returns the line corresponding to this location */
  def currentLine: String =
    if (input.length > 1) input.lines.drop(line-1).next
    else ""

  def columnCaret = (" " * (col-1)) + "^"
}

case class ParseError(stack: List[(Location,String)] = List()) {
  def push(loc: Location, msg: String): ParseError =
    copy(stack = (loc,msg) :: stack)

  def label[A](s: String): ParseError =
    ParseError(latestLoc.map((_,s)).toList)

  def latest: Option[(Location,String)] =
    stack.lastOption

  def latestLoc: Option[Location] =
    latest map (_._1)

  /**
  Display collapsed error stack - any adjacent stack elements with the
  same location are combined on one line. For the bottommost error, we
  display the full line, with a caret pointing to the column of the error.
  Example:

  1.1 file 'companies.json'; array
  5.1 object
  5.2 key-value
  5.10 ':'

  { "MSFT" ; 24,
  */
  override def toString =
    if (stack.isEmpty) "no error message"
    else {
      val collapsed = collapseStack(stack)
      val context =
        collapsed.lastOption.map("\n\n" + _._1.currentLine).getOrElse("") +
        collapsed.lastOption.map("\n" + _._1.columnCaret).getOrElse("")
      collapsed.map { case (loc,msg) => loc.line.toString + "." + loc.col + " " + msg }.mkString("\n") +
      context
    }

  /* Builds a collapsed version of the given error stack -
   * messages at the same location have their messages merged,
   * separated by semicolons */
  def collapseStack(s: List[(Location,String)]): List[(Location,String)] =
    s.groupBy(_._1).
      mapValues(_.map(_._2).mkString("; ")).
      toList.sortBy(_._1.offset)

  def formatLoc(l: Location): String = l.line + "." + l.col
}

object Parsers {

}

语法分析组合子的具体实现方式1, Reference.scala：


package parsing.instances

import ReferenceTypes._
import scala.util.matching.Regex
import parsing._

object ReferenceTypes {

  /** A parser is a kind of state action that can fail. */
  type Parser[+A] = ParseState => Result[A]

  /** `ParseState` wraps a `Location` and provides some extra
    * convenience functions. The sliceable parsers defined
    * in `Sliceable.scala` add an `isSliced` `Boolean` flag
    * to `ParseState`.
    */
  case class ParseState(loc: Location) {
    def advanceBy(numChars: Int): ParseState =
      copy(loc = loc.copy(offset = loc.offset + numChars))
    def input: String = loc.input.substring(loc.offset)
    def slice(n: Int) = loc.input.substring(loc.offset, loc.offset + n)
  }

  /* Likewise, we define a few helper functions on `Result`. */
  sealed trait Result[+A] {
    def extract: Either[ParseError,A] = this match {
      case Failure(e,_) => Left(e)
      case Success(a,_) => Right(a)
    }
    /* Used by `attempt`. */
    def uncommit: Result[A] = this match {
      case Failure(e,true) => Failure(e,false)
      case _ => this
    }
    /* Used by `flatMap` */
    def addCommit(isCommitted: Boolean): Result[A] = this match {
      case Failure(e,c) => Failure(e, c || isCommitted)
      case _ => this
    }
    /* Used by `scope`, `label`. */
    def mapError(f: ParseError => ParseError): Result[A] = this match {
      case Failure(e,c) => Failure(f(e),c)
      case _ => this
    }
    def advanceSuccess(n: Int): Result[A] = this match {
      case Success(a,m) => Success(a,n+m)
      case _ => this
    }
  }
  case class Success[+A](get: A, length: Int) extends Result[A]
  case class Failure(get: ParseError, isCommitted: Boolean) extends Result[Nothing]

  /** Returns -1 if s1.startsWith(s2), otherwise returns the
    * first index where the two strings differed. If s2 is
    * longer than s1, returns s1.length. */
  def firstNonmatchingIndex(s1: String, s2: String, offset: Int): Int = {
    var i = 0
    while (i < s1.length && i < s2.length) {
      if (s1.charAt(i+offset) != s2.charAt(i)) return i
      i += 1
    }
    if (s1.length-offset >= s2.length) -1
    else s1.length-offset
  }
}

object Reference extends Parsers[Parser] {

  def run[A](p: Parser[A])(s: String): Either[ParseError,A] = {
    val s0 = ParseState(Location(s))
    p(s0).extract
  }

  // consume no characters and succeed with the given value
  def succeed[A](a: A): Parser[A] = s => Success(a, 0)

  def or[A](p: Parser[A], p2: => Parser[A]): Parser[A] =
    s => p(s) match {
      case Failure(e,false) => p2(s)
      case r => r // committed failure or success skips running `p2`
    }

  def flatMap[A,B](f: Parser[A])(g: A => Parser[B]): Parser[B] =
    s => f(s) match {
      case Success(a,n) => g(a)(s.advanceBy(n))
                           .addCommit(n != 0)
                           .advanceSuccess(n)
      case f@Failure(_,_) => f
    }

  def string(w: String): Parser[String] = {
    val msg = "'" + w + "'"
    s => {
      val i = firstNonmatchingIndex(s.loc.input, w, s.loc.offset)
      if (i == -1) // they matched
        Success(w, w.length)
      else
        Failure(s.loc.advanceBy(i).toError(msg), i != 0)
    }
  }

  /* note, regex matching is 'all-or-nothing':
   * failures are uncommitted */
  def regex(r: Regex): Parser[String] = {
    val msg = "regex " + r
    s => r.findPrefixOf(s.input) match {
      case None => Failure(s.loc.toError(msg), false)
      case Some(m) => Success(m,m.length)
    }
  }

  def scope[A](msg: String)(p: Parser[A]): Parser[A] =
    s => p(s).mapError(_.push(s.loc,msg))

  def label[A](msg: String)(p: Parser[A]): Parser[A] =
    s => p(s).mapError(_.label(msg))

  def fail[A](msg: String): Parser[A] =
    s => Failure(s.loc.toError(msg), true)

  def attempt[A](p: Parser[A]): Parser[A] =
    s => p(s).uncommit

  def slice[A](p: Parser[A]): Parser[String] =
    s => p(s) match {
      case Success(_,n) => Success(s.slice(n),n)
      case f@Failure(_,_) => f
    }

  /* We provide an overridden version of `many` that accumulates
   * the list of results using a monolithic loop. This avoids
   * stack overflow errors for most grammars.
   */
  override def many[A](p: Parser[A]): Parser[List[A]] =
    s => {
      var nConsumed: Int = 0
      val buf = new collection.mutable.ListBuffer[A]
      def go(p: Parser[A], offset: Int): Result[List[A]] = {
        p(s.advanceBy(offset)) match {
          case Success(a,n) => buf += a; go(p, offset+n)
          case f@Failure(e,true) => f
          case Failure(e,_) => Success(buf.toList,offset)
        }
      }
      go(p, 0)
    }
}

语法分析组合子的具体实现方式2, Sliceable.scala：


package parsing.instances

import SliceableTypes._
import scala.util.matching.Regex
import parsing._

/*
This implementation is a bit trickier than the one in `Reference.scala`.
The main change is to add another piece of state to `ParseState`,
an `isSliced` flag, and an additional `Slice` constructor to `Result`.
If the `isSliced` flag is set, parsers avoid building a meaningful
result--see in particular the overridden implementations for `map`,
`map2`, and `many`.

This implementation runs up against some limitations of Scala's
type system--Scala does not appropriately refine type parameters when
pattern matching. Keep reading for more details on this.
*/
object SliceableTypes {

  /* A parser is a kind of state action that can fail.
   * This type is slightly fancier than the one discussed in the chapter,
   * to support efficient slicing. If the parser is surrounded by
   * a `slice` combinator, the `isSliced` field of `ParseState` will
   * be `true`, and we return a `Slice` output.
   */
  type Parser[+A] = ParseState => Result[A]

  /** `isSliced` indicates if the current parser is surround by a
    * `slice` combinator. This lets us avoid building up values that
    * will end up getting thrown away.
    *
    * There are several convenience functions on `ParseState` to make
    * implementing some of the combinators easier.
    */
  case class ParseState(loc: Location, isSliced: Boolean) {
    // some convenience functions
    def advanceBy(numChars: Int): ParseState =
      copy(loc = loc.copy(offset = loc.offset + numChars))
    def input: String = loc.input.substring(loc.offset)
    def unslice = copy(isSliced = false)
    def reslice(s: ParseState) = copy(isSliced = s.isSliced)
    def slice(n: Int) = loc.input.substring(loc.offset, loc.offset + n)
  }

  /** The result of a parse--a `Parser[A]` returns a `Result[A]`.
    *
    * There are three cases:
    *   - Success(a,n): a is the value, n is # of consumed characters
    *   - Slice(n): a successful slice; n is the # of consumed characters
    *   - Failure(n,isCommitted): a failing parse
    *
    * As usual, we define some helper functions on `Result`.
    * Defining functions on `Result` gives us better type
    * information--there are cases (see `map` and `map2` below) where
    * Scala will not appropriately refine type information when
    * pattern matching on `Result`.
    */
  sealed trait Result[+A] {
    def extract(input: String): Either[ParseError,A]
    def slice: Result[String]
    /* Used by `attempt`. */
    def uncommit: Result[A] = this match {
      case Failure(e,true) => Failure(e,false)
      case _ => this
    }
    /* Used by `flatMap` */
    def addCommit(isCommitted: Boolean): Result[A] = this match {
      case Failure(e,c) => Failure(e, c || isCommitted)
      case _ => this
    }
    /* Used by `scope`, `label`. */
    def mapError(f: ParseError => ParseError): Result[A] = this match {
      case Failure(e,c) => Failure(f(e),c)
      case _ => this
    }
    def advanceSuccess(n: Int): Result[A]
  }
  case class Slice(length: Int) extends Result[String] {
    def extract(s: String) = Right(s.substring(0,length))
    def slice = this
    def advanceSuccess(n: Int) = Slice(length+n)
  }
  case class Success[+A](get: A, length: Int) extends Result[A] {
    def extract(s: String) = Right(get)
    def slice = Slice(length)
    def advanceSuccess(n: Int) = Success(get, length+n)
  }
  case class Failure(get: ParseError, isCommitted: Boolean) extends Result[Nothing] {
    def extract(s: String) = Left(get)
    def slice = this
    def advanceSuccess(n: Int) = this
  }

  /** Returns -1 if s.startsWith(s2), otherwise returns the
    * first index where the two strings differed. If s2 is
    * longer than s1, returns s.length. */
  def firstNonmatchingIndex(s: String, s2: String, offset: Int): Int = {
    var i = 0
    while (i+offset < s.length && i < s2.length) {
      if (s.charAt(i+offset) != s2.charAt(i)) return i
      i += 1
    }
    if (s.length-offset >= s2.length) -1
    else s.length-offset
  }
}

object Sliceable extends Parsers[Parser] {

  def run[A](p: Parser[A])(s: String): Either[ParseError,A] = {
    val s0 = ParseState(Location(s), false)
    p(s0).extract(s)
  }

  // consume no characters and succeed with the given value
  def succeed[A](a: A): Parser[A] = s => Success(a, 0)

  def or[A](p: Parser[A], p2: => Parser[A]): Parser[A] =
    s => p(s) match {
      case Failure(e,false) => p2(s)
      case r => r // committed failure or success skips running `p2`
    }

  /*
   * `Result` is an example of a Generalized Algebraic Data Type (GADT),
   * which means that not all the data constructors of `Result` have
   * the same type. In particular, `Slice` _refines_ the `A` type
   * parameter to be `String`. If we pattern match on a `Result`
   * and obtain a `Slice`, we expect to be able to assume that `A` was
   * in fact `String` and use this type information elsewhere.
   *
   * Unfortunately, Scala doesn't quite support this. Let's look
   * at an example, `map`.
   */

  /* Pattern matching on Slice should refine the type `A` to `String`,
   * and allow us to call `f(s.slice(n))`, since `f` accepts an
   * `A` which is known to be `String`. We resort to a cast here.
   */
  override def map[A,B](p: Parser[A])(f: A => B): Parser[B] =
    s => p(s) match {
      case Success(a,n) => Success(f(a),n)
      case Slice(n) => Success(f(s.slice(n).asInstanceOf[A]),n)
      case f@Failure(_,_) => f
    }

  /* See this gist for more information, examples, and discussion
   * of Scala's GADT support:
   * https://gist.github.com/1369239
   */

  /* This implementation is rather delicate. Since we need an `A`
   * to generate the second parser, we need to run the first parser
   * 'unsliced', even if the `flatMap` is wrapped in a `slice` call.
   * Once we have the `A` and have generated the second parser to
   * run, we can 'reslice' the second parser.
   *
   * Note that this implementation is less efficient than it could
   * be in the case where the choice of the second parser does not
   * depend on the first (as in `map2`). In that case, we could
   * continue to run the first parser sliced.
   *
   * Again, note the cast needed.
   */
  def flatMap[A,B](f: Parser[A])(g: A => Parser[B]): Parser[B] =
    s => f(s.unslice) match {
      case Success(a,n) =>
        g(a)(s.advanceBy(n).reslice(s))
        .addCommit(n != 0)
        .advanceSuccess(n)
      case Slice(n) => g(s.slice(n).asInstanceOf[A])(s.advanceBy(n).reslice(s))
                       .advanceSuccess(n)
      case f@Failure(_,_) => f
    }

  // other functions are quite similar to impls in `Reference.scala`

  def string(w: String): Parser[String] = {
    val msg = "'" + w + "'"
    s => {
      val i = firstNonmatchingIndex(s.loc.input, w, s.loc.offset)
      if (i == -1) { // they matched
        if (s.isSliced) Slice(w.length)
        else            Success(w, w.length)
      }
      else
        Failure(s.loc.advanceBy(i).toError(msg), i != 0)
    }
  }

  // note, regex matching is 'all-or-nothing' - failures are
  // uncommitted
  def regex(r: Regex): Parser[String] = {
    val msg = "regex " + r
    s => r.findPrefixOf(s.input) match {
      case None => Failure(s.loc.toError(msg), false)
      case Some(m) =>
        if (s.isSliced) Slice(m.length)
        else            Success(m,m.length)
    }
  }

  def scope[A](msg: String)(p: Parser[A]): Parser[A] =
    s => p(s).mapError(_.push(s.loc,msg))

  def label[A](msg: String)(p: Parser[A]): Parser[A] =
    s => p(s).mapError(_.label(msg))

  def fail[A](msg: String): Parser[A] =
    s => Failure(s.loc.toError(msg), true)

  def attempt[A](p: Parser[A]): Parser[A] =
    s => p(s).uncommit

  def slice[A](p: Parser[A]): Parser[String] =
    s => p(s.copy(isSliced = true)).slice

  /* As with `map`, we require casts in a few places. */
  override def map2[A,B,C](p: Parser[A], p2: => Parser[B])(f: (A,B) => C): Parser[C] =
    s => p(s) match {
      case Success(a,n) => val s2 = s.advanceBy(n); p2(s2) match {
        case Success(b,m) => Success(f(a,b),n+m)
        case Slice(m) => Success(f(a,s2.slice(m).asInstanceOf[B]), n+m)
        case f@Failure(_,_) => f
      }
      case Slice(n) => val s2 = s.advanceBy(n); p2(s2) match {
        case Success(b,m) => Success(f(s.slice(n).asInstanceOf[A],b),n+m)
        case Slice(m) =>
          if (s.isSliced) Slice(n+m).asInstanceOf[Result[C]]
          else Success(f(s.slice(n).asInstanceOf[A],s2.slice(m).asInstanceOf[B]), n+m)
        case f@Failure(_,_) => f
      }
      case f@Failure(_,_) => f
    }

  override def product[A,B](p: Parser[A], p2: => Parser[B]): Parser[(A,B)] =
    map2(p,p2)((_,_))

  /* We provide an overridden version of `many` that accumulates
   * the list of results using a monolithic loop. This avoids
   * stack overflow errors.
   */
  override def many[A](p: Parser[A]): Parser[List[A]] =
    s => {
      var nConsumed: Int = 0
      if (s.isSliced) {
        def go(p: Parser[String], offset: Int): Result[String] =
          p(s.advanceBy(offset)) match {
            case f@Failure(e,true) => f
            case Failure(e,_) => Slice(offset)
            case Slice(n) => go(p, offset+n)
            case Success(_,_) => sys.error("sliced parser should not return success, only slice")
          }
        go(p.slice, 0).asInstanceOf[Result[List[A]]]
      }
      else {
        val buf = new collection.mutable.ListBuffer[A]
        def go(p: Parser[A], offset: Int): Result[List[A]] = {
          p(s.advanceBy(offset)) match {
            case Success(a,n) => buf += a; go(p, offset+n)
            case f@Failure(e,true) => f
            case Failure(e,_) => Success(buf.toList,offset)
            case Slice(n) =>
              buf += s.input.substring(offset,offset+n).
                     asInstanceOf[A]
              go(p, offset+n)
          }
        }
        go(p, 0)
      }
    }
}

JSON解析器的具体实现, JSON.scala：

package parsing

import language.higherKinds
import language.implicitConversions

trait JSON

object JSON {

  case object JNull extends JSON
  case class JNumber(get: Double) extends JSON
  case class JString(get: String) extends JSON
  case class JBool(get: Boolean) extends JSON
  case class JArray(get: IndexedSeq[JSON]) extends JSON
  case class JObject(get: Map[String, JSON]) extends JSON

  def jsonParser[Parser[+_]](P: Parsers[Parser]): Parser[JSON] = {
    // we'll hide the string implicit conversion and promote strings to tokens instead
    // this is a bit nicer than having to write token everywhere

    //import P.{string => _, _}
    import P._
    //implicit def tok(s: String) = token(P.string(s))

    def tok(s: String) = P.token(P.string(s))

    def array = surround( tok("["), tok("]") )(
      value sep tok(",") map (vs => JArray(vs.toIndexedSeq)) ) scope "array"

    def obj = surround( tok("{"), tok("}") )(
      keyval sep tok(",") map (kvs => JObject(kvs.toMap)) ) scope "object"

    def keyval = escapedQuoted ** (tok(":") *> value)

    def lit = scope("literal") {
      tok("null").as(JNull) | double.map(JNumber(_)) |
        escapedQuoted.map(JString(_)) | tok("true").as(JBool(true)) | tok("false").as(JBool(false))
    }

    def value: Parser[JSON] = lit | obj | array

    root(whitespace *> (obj | array))
  }

}

示例代码用到的工具类CommonUtils.scala:


package parsing

object CommonUtils {


  def PrintDivision = {
    println();
    println("------------------------------------------------------------------");
    println()
  }


  def printResult[E](e: Either[E,JSON]) = e.fold(println, println)

}

示例代码用到的JSON数据,JsonTxt.scala：


package parsing

object JsonTxt {

  val jsonTxt = """
{
  "Company name" : "Microsoft Corporation",
  "Ticker"  : "MSFT",
  "Active"  : true,
  "Price"   : 30.66,
  "Shares outstanding" : 8.38e9,
  "Related companies" : [ "HPQ", "IBM", "YHOO", "DELL", "GOOG" ]
}
"""

  val malformedJson1 = """
{
  "Company name" ; "Microsoft Corporation"
}
"""

  val malformedJson2 = """
[
  [ "HPQ", "IBM",
  "YHOO", "DELL" ++
  "GOOG"
  ]
]
"""


}

Reference类的测试代码，ReferenceTest.scala：

package parsing

import parsing.instances.{Reference, ReferenceTypes}
import parsing.instances.ReferenceTypes.ParseState
import parsing.CommonUtils._

object ReferenceTest {


  def main(args: Array[String]): Unit = {

    val stringPar = Reference.string("aaa")

    //验证字符串aaabbb从索引位置2开始是不是紧跟着aaa
    val res = stringPar(ParseState(Location("aaabbb", 2)))
    println(res)

    PrintDivision

    val res1= stringPar(ParseState(Location("aaaaaa", 2)))
    println(res1)

    PrintDivision

    val res2= stringPar(ParseState(Location("aaa", 0)))
    println(res2)

    PrintDivision

    val date = """(\d\d\d\d)-(\d\d)-(\d\d)""".r
    val dateRegexPar = Reference.regex(date)

    //验证字符串dateStr1从索引位置0开始的内容是否匹配日期格式
    val dateStr1 = "2018-05-06xxxx"
    val regexRes = dateRegexPar(ParseState(Location(dateStr1, 0)))
    println(regexRes)

    PrintDivision

    val dateStr2 = "2018x-05-06xxxx"
    val regexRes2 = dateRegexPar(ParseState(Location(dateStr2, 0)))
    println(regexRes2)

    PrintDivision


    //解析器如果报错，scope组合子可以在报错信息中追加指定的错误信息，
    // 这里追加的错误信息是： " added error msg "
    val scopePar = Reference.scope(" added error msg ")(dateRegexPar)
    val scopeRes = scopePar(ParseState(Location(dateStr2, 0)))
    println(scopeRes)

    PrintDivision


    //将最后一个错误信息替换为指定的内容，这里替换为： " label msg "
    val labelPar = Reference.label(" label msg ")(dateRegexPar)
    val labelRes = labelPar(ParseState(Location(dateStr2, 0)))
    println(labelRes)

    PrintDivision


    //无条件报错，构造错误提示时候，将错误信息设置为： " report msg abruptly "
    val failPar = Reference.fail[String](" report msg abruptly ")
    val failRes = failPar(ParseState(Location(dateStr2, 0)))
    println(failRes)

    PrintDivision


    //将错误信息中的 isCommitted字段设置为false
    val attemptPar = Reference.attempt(dateRegexPar)
    val attemptRes = attemptPar(ParseState(Location(dateStr2, 0)))
    println(attemptRes)

    PrintDivision


    //将解析正确的部分抽离出来
    val slicePar = Reference.slice(dateRegexPar)
    val sliceRes = slicePar(ParseState(Location(dateStr1, 0)))
    println(sliceRes)

    PrintDivision


    val matchIndex = ReferenceTypes.firstNonmatchingIndex("aaabbb", "aaa", 2)
    println(matchIndex)

    PrintDivision

    //解析并忽略引号
    val quotedPar = Reference.quoted
    val quotedRRes = quotedPar(ParseState(Location("\"aaa\"", 0)))
    println(quotedRRes)

    PrintDivision


    val escapedPar = Reference.escapedQuoted
    val escapedRes = quotedPar(ParseState(Location("\"aaa\"", 0)))
    println(escapedRes)

  }

}

采用Reference作为具体实现解析json的例子, JsonExampleWithReference.scala：

package parsing;


/**
  * JSON parsing example with Reference implementation
  */
object JsonExampleWithReference  extends  App {


  import parsing.instances.ReferenceTypes.Parser
  val P = parsing.instances.Reference
  val json: Parser[JSON] = JSON.jsonParser(P)

  import parsing.JsonTxt._
  import parsing.CommonUtils._

  PrintDivision
  printResult { P.run(json)(jsonTxt) }

  PrintDivision
  printResult { P.run(json)(malformedJson1) }

  PrintDivision
  printResult { P.run(json)(malformedJson2) }

}

采用Sliceable作为具体实现解析json的例子, JsonExampleWithSliceable.scala：

package parsing


object JsonExampleWithSliceable extends App {


  import parsing.instances.SliceableTypes.Parser

  val P = parsing.instances.Sliceable

  val json: Parser[JSON] = JSON.jsonParser(P)

  import parsing.JsonTxt._
  import parsing.CommonUtils._

  PrintDivision
  printResult { P.run(json)(jsonTxt) }

  PrintDivision
  printResult { P.run(json)(malformedJson1) }

  PrintDivision
  printResult { P.run(json)(malformedJson2) }

}

由于代码太长，示例运行结果就不贴出来了，读者可以拷贝上述代码进行运行，上述代码的代码组织结构如下所示：


├─parsing
│  │  CommonUtils.scala
│  │  JSON.scala
│  │  JsonExampleWithReference.scala
│  │  JsonExampleWithSliceable.scala
│  │  JsonTxt.scala
│  │  Parsers.scala
│  │  ReferenceTest.scala
│  │
│  └─instances
│          Reference.scala
│          Sliceable.scala

879675643@qq.com  lhever

.---.                                                                         
|   |   .              __.....__   .----.     .----.   __.....__              
|   | .'|          .-''         '.  \    \   /    /.-''         '.            
|   |<  |         /     .-''"'-.  `. '   '. /'   //     .-''"'-.  `. .-,.--.  
|   | | |        /     /________\   \|    |'    //     /________\   \|  .-. | 
|   | | | .'''-. |                  ||    ||    ||                  || |  | | 
|   | | |/.'''. \\    .-------------''.   `'   .'\    .-------------'| |  | | 
|   | |  /    | | \    '-.____...---. \        /  \    '-.____...---.| |  '-  
|   | | |     | |  `.             .'   \      /    `.             .' | |      
'---' | |     | |    `''-...... -'      '----'       `''-...... -'   | |      
      | '.    | '.                                                   |_|      
      '---'   '---'

相关标签： Scala json 语法分析组合子

上一篇： SpringBoot和Swagger结合提高API开发效率

下一篇： Android实现千变万化的ViewPager切换动画