欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

使用Scala从头实现一个简单的语法分析器组合字库

程序员文章站 2024-03-03 16:23:34
...

这个例子来源于scala圣经级教程《Functional Programming in Scala》,虽然原书的随书代码可以找到这些类的影子,但却没有关于如何使用这些类的示例代码,本人在阅读此书期间, ,除了跟着书中的代码敲了一遍之外,还写了一些测试代码进行验证,贴出来作为blog主要是为了方便自己,也为那些同样在阅读此书的人参考。注释不多,本人也不可能写太多,因为这本书不是简单的入门书,而是一本进阶,升华用的内涵书籍,三言两语也解释不清其中的很多细节,如果看不懂,但是又感兴趣的就去看原书吧……


注:原书中给出的代码是不能直接运行成功的,原因是隐式方法 implicit def tok(s: String) = token(P.string(s)) 在JSON.scala 中不生效, 所以本人做了一点点改动,让字符串显示转换为Parser类型。

语法分析器组合子的抽象接口:Parsers.scala:

package parsing

import java.util.regex._
import scala.util.matching.Regex
import testing._
import testing.Prop._
import language.higherKinds
import language.implicitConversions

trait Parsers[Parser[+_]] { self => // so inner classes may call methods of trait
  def run[A](p: Parser[A])(input: String): Either[ParseError,A]

  implicit def string(s: String): Parser[String]
  implicit def operators[A](p: Parser[A]) = ParserOps[A](p)
  implicit def asStringParser[A](a: A)(implicit f: A => Parser[String]):
    ParserOps[String] = ParserOps(f(a))

  def char(c: Char): Parser[Char] =
    string(c.toString) map (_.charAt(0))

  /*
   * A default `succeed` implementation in terms of `string` and `map`.
   * We leave `succeed` abstract, since `map` is defined below in terms of
   * `flatMap` and `succeed`, which would be a circular definition! But we include
   * the definition here in case implementations wish to use it
   * (say if they provide a custom implementation of `map`, breaking the cycle)
   */
  def defaultSucceed[A](a: A): Parser[A] =
    string("") map (_ => a)

  def succeed[A](a: A): Parser[A]

  def slice[A](p: Parser[A]): Parser[String]

  def many1[A](p: Parser[A]): Parser[List[A]] =
    map2(p, many(p))(_ :: _)

  def listOfN[A](n: Int, p: Parser[A]): Parser[List[A]] =
    if (n <= 0) succeed(List())
    else map2(p, listOfN(n-1, p))(_ :: _)

  def many[A](p: Parser[A]): Parser[List[A]] =
    map2(p, many(p))(_ :: _) or succeed(List())

  def or[A](p1: Parser[A], p2: => Parser[A]): Parser[A]

  def flatMap[A,B](p: Parser[A])(f: A => Parser[B]): Parser[B]

  implicit def regex(r: Regex): Parser[String]

  /*
  These can be implemented using a for-comprehension,
  which delegates to the `flatMap` and `map` implementations we've provided on `ParserOps`,
  or they can be implemented in terms of these functions directly.
  */
  def product[A,B](p: Parser[A], p2: => Parser[B]): Parser[(A,B)] =
    flatMap(p)(a => map(p2)(b => (a,b)))

  def map2[A,B,C](p: Parser[A], p2: => Parser[B])(f: (A,B) => C): Parser[C] =
    for { a <- p; b <- p2 } yield f(a,b)

  def map[A,B](a: Parser[A])(f: A => B): Parser[B] =
    flatMap(a)(f andThen succeed)

  def label[A](msg: String)(p: Parser[A]): Parser[A]

  def scope[A](msg: String)(p: Parser[A]): Parser[A]

  def attempt[A](p: Parser[A]): Parser[A]

  /** Sequences two parsers, ignoring the result of the first.
    * We wrap the ignored half in slice, since we don't care about its result. */
  def skipL[B](p: Parser[Any], p2: => Parser[B]): Parser[B] =
    map2(slice(p), p2)((_,b) => b)

  /** Sequences two parsers, ignoring the result of the second.
    * We wrap the ignored half in slice, since we don't care about its result. */
  def skipR[A](p: Parser[A], p2: => Parser[Any]): Parser[A] =
    map2(p, slice(p2))((a,b) => a)

  def opt[A](p: Parser[A]): Parser[Option[A]] =
    p.map(Some(_)) or succeed(None)

  /** Parser which consumes zero or more whitespace characters. */
  def whitespace: Parser[String] = "\\s*".r

  /** Parser which consumes 1 or more digits. */
  def digits: Parser[String] = "\\d+".r

  /** Parser which consumes reluctantly until it encounters the given string. */
  def thru(s: String): Parser[String] = (".*?"+Pattern.quote(s)).r

  /** Unescaped string literals, like "foo" or "bar". */
  def quoted: Parser[String] = string("\"") *> thru("\"").map(_.dropRight(1))

  /** Unescaped or escaped string literals, like "An \n important \"Quotation\"" or "bar". */
  def escapedQuoted: Parser[String] =
    // rather annoying to write, left as an exercise
    // we'll just use quoted (unescaped literals) for now
    token(quoted label "string literal")

  /** C/Java style floating point literals, e.g .1, -1.0, 1e9, 1E-23, etc.
    * Result is left as a string to keep full precision
    */
  def doubleString: Parser[String] =
    token("[-+]?([0-9]*\\.)?[0-9]+([eE][-+]?[0-9]+)?".r)

  /** Floating point literals, converted to a `Double`. */
  def double: Parser[Double] =
    doubleString map (_.toDouble) label "double literal"

  /** Attempts `p` and strips trailing whitespace, usually used for the tokens of a grammar. */
  def token[A](p: Parser[A]): Parser[A] =
    attempt(p) <* whitespace

  /** Zero or more repetitions of `p`, separated by `p2`, whose results are ignored. */
  def sep[A](p: Parser[A], p2: Parser[Any]): Parser[List[A]] = // use `Parser[Any]` since don't care about result type of separator
    sep1(p,p2) or succeed(List())

  /** One or more repetitions of `p`, separated by `p2`, whose results are ignored. */
  def sep1[A](p: Parser[A], p2: Parser[Any]): Parser[List[A]] =
    map2(p, many(p2 *> p))(_ :: _)

  /** Parses a sequence of left-associative binary operators with the same precedence. */
  def opL[A](p: Parser[A])(op: Parser[(A,A) => A]): Parser[A] =
    map2(p, many(op ** p))((h,t) => t.foldLeft(h)((a,b) => b._1(a,b._2)))

  /** Wraps `p` in start/stop delimiters. */
  def surround[A](start: Parser[Any], stop: Parser[Any])(p: => Parser[A]) =
    start *> p <* stop

  /** A parser that succeeds when given empty input. */
  def eof: Parser[String] =
    regex("\\z".r).label("unexpected trailing characters")

  /** The root of the grammar, expects no further input following `p`. */
  def root[A](p: Parser[A]): Parser[A] =
    p <* eof

  case class ParserOps[A](p: Parser[A]) {
    def |[B>:A](p2: => Parser[B]): Parser[B] = self.or(p,p2) // use `self` to explicitly disambiguate reference to the `or` method on the `trait`
    def or[B>:A](p2: => Parser[B]): Parser[B] = self.or(p,p2)

    def map[B](f: A => B): Parser[B] = self.map(p)(f)
    def many = self.many(p)

    def slice: Parser[String] = self.slice(p)

    def **[B](p2: => Parser[B]): Parser[(A,B)] =
      self.product(p,p2)
    def product[B](p2: => Parser[B]): Parser[(A,B)] =
      self.product(p,p2)

    def flatMap[B](f: A => Parser[B]): Parser[B] =
      self.flatMap(p)(f)

    def label(msg: String): Parser[A] = self.label(msg)(p)

    def scope(msg: String): Parser[A] = self.scope(msg)(p)

    def *>[B](p2: => Parser[B]) = self.skipL(p, p2)
    def <*(p2: => Parser[Any]) = self.skipR(p, p2)
    def token = self.token(p)
    def sep(separator: Parser[Any]) = self.sep(p, separator)
    def sep1(separator: Parser[Any]) = self.sep1(p, separator)
    def as[B](b: B): Parser[B] = self.map(self.slice(p))(_ => b)
    def opL(op: Parser[(A,A) => A]): Parser[A] = self.opL(p)(op)
  }
  object Laws {
    def equal[A](p1: Parser[A], p2: Parser[A])(in: Gen[String]): Prop =
      forAll(in)(s => run(p1)(s) == run(p2)(s))

    def mapLaw[A](p: Parser[A])(in: Gen[String]): Prop =
      equal(p, p.map(a => a))(in)
  }
}

case class Location(input: String, offset: Int = 0) {

  lazy val line = input.slice(0,offset+1).count(_ == '\n') + 1
  lazy val col = input.slice(0,offset+1).lastIndexOf('\n') match {
    case -1 => offset + 1
    case lineStart => offset - lineStart
  }

  def toError(msg: String): ParseError =
    ParseError(List((this, msg)))

  def advanceBy(n: Int) = copy(offset = offset+n)

  /* Returns the line corresponding to this location */
  def currentLine: String =
    if (input.length > 1) input.lines.drop(line-1).next
    else ""

  def columnCaret = (" " * (col-1)) + "^"
}

case class ParseError(stack: List[(Location,String)] = List()) {
  def push(loc: Location, msg: String): ParseError =
    copy(stack = (loc,msg) :: stack)

  def label[A](s: String): ParseError =
    ParseError(latestLoc.map((_,s)).toList)

  def latest: Option[(Location,String)] =
    stack.lastOption

  def latestLoc: Option[Location] =
    latest map (_._1)

  /**
  Display collapsed error stack - any adjacent stack elements with the
  same location are combined on one line. For the bottommost error, we
  display the full line, with a caret pointing to the column of the error.
  Example:

  1.1 file 'companies.json'; array
  5.1 object
  5.2 key-value
  5.10 ':'

  { "MSFT" ; 24,
  */
  override def toString =
    if (stack.isEmpty) "no error message"
    else {
      val collapsed = collapseStack(stack)
      val context =
        collapsed.lastOption.map("\n\n" + _._1.currentLine).getOrElse("") +
        collapsed.lastOption.map("\n" + _._1.columnCaret).getOrElse("")
      collapsed.map { case (loc,msg) => loc.line.toString + "." + loc.col + " " + msg }.mkString("\n") +
      context
    }

  /* Builds a collapsed version of the given error stack -
   * messages at the same location have their messages merged,
   * separated by semicolons */
  def collapseStack(s: List[(Location,String)]): List[(Location,String)] =
    s.groupBy(_._1).
      mapValues(_.map(_._2).mkString("; ")).
      toList.sortBy(_._1.offset)

  def formatLoc(l: Location): String = l.line + "." + l.col
}

object Parsers {

}


语法分析组合子的具体实现方式1, Reference.scala:


package parsing.instances

import ReferenceTypes._
import scala.util.matching.Regex
import parsing._

object ReferenceTypes {

  /** A parser is a kind of state action that can fail. */
  type Parser[+A] = ParseState => Result[A]

  /** `ParseState` wraps a `Location` and provides some extra
    * convenience functions. The sliceable parsers defined
    * in `Sliceable.scala` add an `isSliced` `Boolean` flag
    * to `ParseState`.
    */
  case class ParseState(loc: Location) {
    def advanceBy(numChars: Int): ParseState =
      copy(loc = loc.copy(offset = loc.offset + numChars))
    def input: String = loc.input.substring(loc.offset)
    def slice(n: Int) = loc.input.substring(loc.offset, loc.offset + n)
  }

  /* Likewise, we define a few helper functions on `Result`. */
  sealed trait Result[+A] {
    def extract: Either[ParseError,A] = this match {
      case Failure(e,_) => Left(e)
      case Success(a,_) => Right(a)
    }
    /* Used by `attempt`. */
    def uncommit: Result[A] = this match {
      case Failure(e,true) => Failure(e,false)
      case _ => this
    }
    /* Used by `flatMap` */
    def addCommit(isCommitted: Boolean): Result[A] = this match {
      case Failure(e,c) => Failure(e, c || isCommitted)
      case _ => this
    }
    /* Used by `scope`, `label`. */
    def mapError(f: ParseError => ParseError): Result[A] = this match {
      case Failure(e,c) => Failure(f(e),c)
      case _ => this
    }
    def advanceSuccess(n: Int): Result[A] = this match {
      case Success(a,m) => Success(a,n+m)
      case _ => this
    }
  }
  case class Success[+A](get: A, length: Int) extends Result[A]
  case class Failure(get: ParseError, isCommitted: Boolean) extends Result[Nothing]

  /** Returns -1 if s1.startsWith(s2), otherwise returns the
    * first index where the two strings differed. If s2 is
    * longer than s1, returns s1.length. */
  def firstNonmatchingIndex(s1: String, s2: String, offset: Int): Int = {
    var i = 0
    while (i < s1.length && i < s2.length) {
      if (s1.charAt(i+offset) != s2.charAt(i)) return i
      i += 1
    }
    if (s1.length-offset >= s2.length) -1
    else s1.length-offset
  }
}

object Reference extends Parsers[Parser] {

  def run[A](p: Parser[A])(s: String): Either[ParseError,A] = {
    val s0 = ParseState(Location(s))
    p(s0).extract
  }

  // consume no characters and succeed with the given value
  def succeed[A](a: A): Parser[A] = s => Success(a, 0)

  def or[A](p: Parser[A], p2: => Parser[A]): Parser[A] =
    s => p(s) match {
      case Failure(e,false) => p2(s)
      case r => r // committed failure or success skips running `p2`
    }

  def flatMap[A,B](f: Parser[A])(g: A => Parser[B]): Parser[B] =
    s => f(s) match {
      case Success(a,n) => g(a)(s.advanceBy(n))
                           .addCommit(n != 0)
                           .advanceSuccess(n)
      case f@Failure(_,_) => f
    }

  def string(w: String): Parser[String] = {
    val msg = "'" + w + "'"
    s => {
      val i = firstNonmatchingIndex(s.loc.input, w, s.loc.offset)
      if (i == -1) // they matched
        Success(w, w.length)
      else
        Failure(s.loc.advanceBy(i).toError(msg), i != 0)
    }
  }

  /* note, regex matching is 'all-or-nothing':
   * failures are uncommitted */
  def regex(r: Regex): Parser[String] = {
    val msg = "regex " + r
    s => r.findPrefixOf(s.input) match {
      case None => Failure(s.loc.toError(msg), false)
      case Some(m) => Success(m,m.length)
    }
  }

  def scope[A](msg: String)(p: Parser[A]): Parser[A] =
    s => p(s).mapError(_.push(s.loc,msg))

  def label[A](msg: String)(p: Parser[A]): Parser[A] =
    s => p(s).mapError(_.label(msg))

  def fail[A](msg: String): Parser[A] =
    s => Failure(s.loc.toError(msg), true)

  def attempt[A](p: Parser[A]): Parser[A] =
    s => p(s).uncommit

  def slice[A](p: Parser[A]): Parser[String] =
    s => p(s) match {
      case Success(_,n) => Success(s.slice(n),n)
      case f@Failure(_,_) => f
    }

  /* We provide an overridden version of `many` that accumulates
   * the list of results using a monolithic loop. This avoids
   * stack overflow errors for most grammars.
   */
  override def many[A](p: Parser[A]): Parser[List[A]] =
    s => {
      var nConsumed: Int = 0
      val buf = new collection.mutable.ListBuffer[A]
      def go(p: Parser[A], offset: Int): Result[List[A]] = {
        p(s.advanceBy(offset)) match {
          case Success(a,n) => buf += a; go(p, offset+n)
          case f@Failure(e,true) => f
          case Failure(e,_) => Success(buf.toList,offset)
        }
      }
      go(p, 0)
    }
}


语法分析组合子的具体实现方式2, Sliceable.scala:


package parsing.instances

import SliceableTypes._
import scala.util.matching.Regex
import parsing._

/*
This implementation is a bit trickier than the one in `Reference.scala`.
The main change is to add another piece of state to `ParseState`,
an `isSliced` flag, and an additional `Slice` constructor to `Result`.
If the `isSliced` flag is set, parsers avoid building a meaningful
result--see in particular the overridden implementations for `map`,
`map2`, and `many`.

This implementation runs up against some limitations of Scala's
type system--Scala does not appropriately refine type parameters when
pattern matching. Keep reading for more details on this.
*/
object SliceableTypes {

  /* A parser is a kind of state action that can fail.
   * This type is slightly fancier than the one discussed in the chapter,
   * to support efficient slicing. If the parser is surrounded by
   * a `slice` combinator, the `isSliced` field of `ParseState` will
   * be `true`, and we return a `Slice` output.
   */
  type Parser[+A] = ParseState => Result[A]

  /** `isSliced` indicates if the current parser is surround by a
    * `slice` combinator. This lets us avoid building up values that
    * will end up getting thrown away.
    *
    * There are several convenience functions on `ParseState` to make
    * implementing some of the combinators easier.
    */
  case class ParseState(loc: Location, isSliced: Boolean) {
    // some convenience functions
    def advanceBy(numChars: Int): ParseState =
      copy(loc = loc.copy(offset = loc.offset + numChars))
    def input: String = loc.input.substring(loc.offset)
    def unslice = copy(isSliced = false)
    def reslice(s: ParseState) = copy(isSliced = s.isSliced)
    def slice(n: Int) = loc.input.substring(loc.offset, loc.offset + n)
  }

  /** The result of a parse--a `Parser[A]` returns a `Result[A]`.
    *
    * There are three cases:
    *   - Success(a,n): a is the value, n is # of consumed characters
    *   - Slice(n): a successful slice; n is the # of consumed characters
    *   - Failure(n,isCommitted): a failing parse
    *
    * As usual, we define some helper functions on `Result`.
    * Defining functions on `Result` gives us better type
    * information--there are cases (see `map` and `map2` below) where
    * Scala will not appropriately refine type information when
    * pattern matching on `Result`.
    */
  sealed trait Result[+A] {
    def extract(input: String): Either[ParseError,A]
    def slice: Result[String]
    /* Used by `attempt`. */
    def uncommit: Result[A] = this match {
      case Failure(e,true) => Failure(e,false)
      case _ => this
    }
    /* Used by `flatMap` */
    def addCommit(isCommitted: Boolean): Result[A] = this match {
      case Failure(e,c) => Failure(e, c || isCommitted)
      case _ => this
    }
    /* Used by `scope`, `label`. */
    def mapError(f: ParseError => ParseError): Result[A] = this match {
      case Failure(e,c) => Failure(f(e),c)
      case _ => this
    }
    def advanceSuccess(n: Int): Result[A]
  }
  case class Slice(length: Int) extends Result[String] {
    def extract(s: String) = Right(s.substring(0,length))
    def slice = this
    def advanceSuccess(n: Int) = Slice(length+n)
  }
  case class Success[+A](get: A, length: Int) extends Result[A] {
    def extract(s: String) = Right(get)
    def slice = Slice(length)
    def advanceSuccess(n: Int) = Success(get, length+n)
  }
  case class Failure(get: ParseError, isCommitted: Boolean) extends Result[Nothing] {
    def extract(s: String) = Left(get)
    def slice = this
    def advanceSuccess(n: Int) = this
  }

  /** Returns -1 if s.startsWith(s2), otherwise returns the
    * first index where the two strings differed. If s2 is
    * longer than s1, returns s.length. */
  def firstNonmatchingIndex(s: String, s2: String, offset: Int): Int = {
    var i = 0
    while (i+offset < s.length && i < s2.length) {
      if (s.charAt(i+offset) != s2.charAt(i)) return i
      i += 1
    }
    if (s.length-offset >= s2.length) -1
    else s.length-offset
  }
}

object Sliceable extends Parsers[Parser] {

  def run[A](p: Parser[A])(s: String): Either[ParseError,A] = {
    val s0 = ParseState(Location(s), false)
    p(s0).extract(s)
  }

  // consume no characters and succeed with the given value
  def succeed[A](a: A): Parser[A] = s => Success(a, 0)

  def or[A](p: Parser[A], p2: => Parser[A]): Parser[A] =
    s => p(s) match {
      case Failure(e,false) => p2(s)
      case r => r // committed failure or success skips running `p2`
    }

  /*
   * `Result` is an example of a Generalized Algebraic Data Type (GADT),
   * which means that not all the data constructors of `Result` have
   * the same type. In particular, `Slice` _refines_ the `A` type
   * parameter to be `String`. If we pattern match on a `Result`
   * and obtain a `Slice`, we expect to be able to assume that `A` was
   * in fact `String` and use this type information elsewhere.
   *
   * Unfortunately, Scala doesn't quite support this. Let's look
   * at an example, `map`.
   */

  /* Pattern matching on Slice should refine the type `A` to `String`,
   * and allow us to call `f(s.slice(n))`, since `f` accepts an
   * `A` which is known to be `String`. We resort to a cast here.
   */
  override def map[A,B](p: Parser[A])(f: A => B): Parser[B] =
    s => p(s) match {
      case Success(a,n) => Success(f(a),n)
      case Slice(n) => Success(f(s.slice(n).asInstanceOf[A]),n)
      case f@Failure(_,_) => f
    }

  /* See this gist for more information, examples, and discussion
   * of Scala's GADT support:
   * https://gist.github.com/1369239
   */

  /* This implementation is rather delicate. Since we need an `A`
   * to generate the second parser, we need to run the first parser
   * 'unsliced', even if the `flatMap` is wrapped in a `slice` call.
   * Once we have the `A` and have generated the second parser to
   * run, we can 'reslice' the second parser.
   *
   * Note that this implementation is less efficient than it could
   * be in the case where the choice of the second parser does not
   * depend on the first (as in `map2`). In that case, we could
   * continue to run the first parser sliced.
   *
   * Again, note the cast needed.
   */
  def flatMap[A,B](f: Parser[A])(g: A => Parser[B]): Parser[B] =
    s => f(s.unslice) match {
      case Success(a,n) =>
        g(a)(s.advanceBy(n).reslice(s))
        .addCommit(n != 0)
        .advanceSuccess(n)
      case Slice(n) => g(s.slice(n).asInstanceOf[A])(s.advanceBy(n).reslice(s))
                       .advanceSuccess(n)
      case f@Failure(_,_) => f
    }

  // other functions are quite similar to impls in `Reference.scala`

  def string(w: String): Parser[String] = {
    val msg = "'" + w + "'"
    s => {
      val i = firstNonmatchingIndex(s.loc.input, w, s.loc.offset)
      if (i == -1) { // they matched
        if (s.isSliced) Slice(w.length)
        else            Success(w, w.length)
      }
      else
        Failure(s.loc.advanceBy(i).toError(msg), i != 0)
    }
  }

  // note, regex matching is 'all-or-nothing' - failures are
  // uncommitted
  def regex(r: Regex): Parser[String] = {
    val msg = "regex " + r
    s => r.findPrefixOf(s.input) match {
      case None => Failure(s.loc.toError(msg), false)
      case Some(m) =>
        if (s.isSliced) Slice(m.length)
        else            Success(m,m.length)
    }
  }

  def scope[A](msg: String)(p: Parser[A]): Parser[A] =
    s => p(s).mapError(_.push(s.loc,msg))

  def label[A](msg: String)(p: Parser[A]): Parser[A] =
    s => p(s).mapError(_.label(msg))

  def fail[A](msg: String): Parser[A] =
    s => Failure(s.loc.toError(msg), true)

  def attempt[A](p: Parser[A]): Parser[A] =
    s => p(s).uncommit

  def slice[A](p: Parser[A]): Parser[String] =
    s => p(s.copy(isSliced = true)).slice

  /* As with `map`, we require casts in a few places. */
  override def map2[A,B,C](p: Parser[A], p2: => Parser[B])(f: (A,B) => C): Parser[C] =
    s => p(s) match {
      case Success(a,n) => val s2 = s.advanceBy(n); p2(s2) match {
        case Success(b,m) => Success(f(a,b),n+m)
        case Slice(m) => Success(f(a,s2.slice(m).asInstanceOf[B]), n+m)
        case f@Failure(_,_) => f
      }
      case Slice(n) => val s2 = s.advanceBy(n); p2(s2) match {
        case Success(b,m) => Success(f(s.slice(n).asInstanceOf[A],b),n+m)
        case Slice(m) =>
          if (s.isSliced) Slice(n+m).asInstanceOf[Result[C]]
          else Success(f(s.slice(n).asInstanceOf[A],s2.slice(m).asInstanceOf[B]), n+m)
        case f@Failure(_,_) => f
      }
      case f@Failure(_,_) => f
    }

  override def product[A,B](p: Parser[A], p2: => Parser[B]): Parser[(A,B)] =
    map2(p,p2)((_,_))

  /* We provide an overridden version of `many` that accumulates
   * the list of results using a monolithic loop. This avoids
   * stack overflow errors.
   */
  override def many[A](p: Parser[A]): Parser[List[A]] =
    s => {
      var nConsumed: Int = 0
      if (s.isSliced) {
        def go(p: Parser[String], offset: Int): Result[String] =
          p(s.advanceBy(offset)) match {
            case f@Failure(e,true) => f
            case Failure(e,_) => Slice(offset)
            case Slice(n) => go(p, offset+n)
            case Success(_,_) => sys.error("sliced parser should not return success, only slice")
          }
        go(p.slice, 0).asInstanceOf[Result[List[A]]]
      }
      else {
        val buf = new collection.mutable.ListBuffer[A]
        def go(p: Parser[A], offset: Int): Result[List[A]] = {
          p(s.advanceBy(offset)) match {
            case Success(a,n) => buf += a; go(p, offset+n)
            case f@Failure(e,true) => f
            case Failure(e,_) => Success(buf.toList,offset)
            case Slice(n) =>
              buf += s.input.substring(offset,offset+n).
                     asInstanceOf[A]
              go(p, offset+n)
          }
        }
        go(p, 0)
      }
    }
}


JSON解析器的具体实现, JSON.scala:

package parsing

import language.higherKinds
import language.implicitConversions

trait JSON

object JSON {

  case object JNull extends JSON
  case class JNumber(get: Double) extends JSON
  case class JString(get: String) extends JSON
  case class JBool(get: Boolean) extends JSON
  case class JArray(get: IndexedSeq[JSON]) extends JSON
  case class JObject(get: Map[String, JSON]) extends JSON

  def jsonParser[Parser[+_]](P: Parsers[Parser]): Parser[JSON] = {
    // we'll hide the string implicit conversion and promote strings to tokens instead
    // this is a bit nicer than having to write token everywhere

    //import P.{string => _, _}
    import P._
    //implicit def tok(s: String) = token(P.string(s))

    def tok(s: String) = P.token(P.string(s))

    def array = surround( tok("["), tok("]") )(
      value sep tok(",") map (vs => JArray(vs.toIndexedSeq)) ) scope "array"

    def obj = surround( tok("{"), tok("}") )(
      keyval sep tok(",") map (kvs => JObject(kvs.toMap)) ) scope "object"

    def keyval = escapedQuoted ** (tok(":") *> value)

    def lit = scope("literal") {
      tok("null").as(JNull) | double.map(JNumber(_)) |
        escapedQuoted.map(JString(_)) | tok("true").as(JBool(true)) | tok("false").as(JBool(false))
    }

    def value: Parser[JSON] = lit | obj | array

    root(whitespace *> (obj | array))
  }

}


示例代码用到的工具类CommonUtils.scala:


package parsing

object CommonUtils {


  def PrintDivision = {
    println();
    println("------------------------------------------------------------------");
    println()
  }


  def printResult[E](e: Either[E,JSON]) = e.fold(println, println)

}

示例代码用到的JSON数据,JsonTxt.scala:


package parsing

object JsonTxt {

  val jsonTxt = """
{
  "Company name" : "Microsoft Corporation",
  "Ticker"  : "MSFT",
  "Active"  : true,
  "Price"   : 30.66,
  "Shares outstanding" : 8.38e9,
  "Related companies" : [ "HPQ", "IBM", "YHOO", "DELL", "GOOG" ]
}
"""

  val malformedJson1 = """
{
  "Company name" ; "Microsoft Corporation"
}
"""

  val malformedJson2 = """
[
  [ "HPQ", "IBM",
  "YHOO", "DELL" ++
  "GOOG"
  ]
]
"""


}


Reference类的测试代码,ReferenceTest.scala:

package parsing

import parsing.instances.{Reference, ReferenceTypes}
import parsing.instances.ReferenceTypes.ParseState
import parsing.CommonUtils._

object ReferenceTest {


  def main(args: Array[String]): Unit = {

    val stringPar = Reference.string("aaa")

    //验证字符串aaabbb从索引位置2开始是不是紧跟着aaa
    val res = stringPar(ParseState(Location("aaabbb", 2)))
    println(res)

    PrintDivision

    val res1= stringPar(ParseState(Location("aaaaaa", 2)))
    println(res1)

    PrintDivision

    val res2= stringPar(ParseState(Location("aaa", 0)))
    println(res2)

    PrintDivision

    val date = """(\d\d\d\d)-(\d\d)-(\d\d)""".r
    val dateRegexPar = Reference.regex(date)

    //验证字符串dateStr1从索引位置0开始的内容是否匹配日期格式
    val dateStr1 = "2018-05-06xxxx"
    val regexRes = dateRegexPar(ParseState(Location(dateStr1, 0)))
    println(regexRes)

    PrintDivision

    val dateStr2 = "2018x-05-06xxxx"
    val regexRes2 = dateRegexPar(ParseState(Location(dateStr2, 0)))
    println(regexRes2)

    PrintDivision


    //解析器如果报错,scope组合子可以在报错信息中追加指定的错误信息,
    // 这里追加的错误信息是: " added error msg "
    val scopePar = Reference.scope(" added error msg ")(dateRegexPar)
    val scopeRes = scopePar(ParseState(Location(dateStr2, 0)))
    println(scopeRes)

    PrintDivision


    //将最后一个错误信息替换为指定的内容,这里替换为: " label msg "
    val labelPar = Reference.label(" label msg ")(dateRegexPar)
    val labelRes = labelPar(ParseState(Location(dateStr2, 0)))
    println(labelRes)

    PrintDivision


    //无条件报错,构造错误提示时候,将错误信息设置为: " report msg abruptly "
    val failPar = Reference.fail[String](" report msg abruptly ")
    val failRes = failPar(ParseState(Location(dateStr2, 0)))
    println(failRes)

    PrintDivision


    //将错误信息中的 isCommitted字段设置为false
    val attemptPar = Reference.attempt(dateRegexPar)
    val attemptRes = attemptPar(ParseState(Location(dateStr2, 0)))
    println(attemptRes)

    PrintDivision


    //将解析正确的部分抽离出来
    val slicePar = Reference.slice(dateRegexPar)
    val sliceRes = slicePar(ParseState(Location(dateStr1, 0)))
    println(sliceRes)

    PrintDivision


    val matchIndex = ReferenceTypes.firstNonmatchingIndex("aaabbb", "aaa", 2)
    println(matchIndex)

    PrintDivision

    //解析并忽略引号
    val quotedPar = Reference.quoted
    val quotedRRes = quotedPar(ParseState(Location("\"aaa\"", 0)))
    println(quotedRRes)

    PrintDivision


    val escapedPar = Reference.escapedQuoted
    val escapedRes = quotedPar(ParseState(Location("\"aaa\"", 0)))
    println(escapedRes)

  }

}


采用Reference作为具体实现解析json的例子, JsonExampleWithReference.scala:

package parsing;


/**
  * JSON parsing example with Reference implementation
  */
object JsonExampleWithReference  extends  App {


  import parsing.instances.ReferenceTypes.Parser
  val P = parsing.instances.Reference
  val json: Parser[JSON] = JSON.jsonParser(P)

  import parsing.JsonTxt._
  import parsing.CommonUtils._

  PrintDivision
  printResult { P.run(json)(jsonTxt) }

  PrintDivision
  printResult { P.run(json)(malformedJson1) }

  PrintDivision
  printResult { P.run(json)(malformedJson2) }

}

采用Sliceable作为具体实现解析json的例子, JsonExampleWithSliceable.scala:

package parsing


object JsonExampleWithSliceable extends App {


  import parsing.instances.SliceableTypes.Parser

  val P = parsing.instances.Sliceable

  val json: Parser[JSON] = JSON.jsonParser(P)

  import parsing.JsonTxt._
  import parsing.CommonUtils._

  PrintDivision
  printResult { P.run(json)(jsonTxt) }

  PrintDivision
  printResult { P.run(json)(malformedJson1) }

  PrintDivision
  printResult { P.run(json)(malformedJson2) }

}

由于代码太长,示例运行结果就不贴出来了,读者可以拷贝上述代码进行运行,上述代码的代码组织结构如下所示:


├─parsing
│  │  CommonUtils.scala
│  │  JSON.scala
│  │  JsonExampleWithReference.scala
│  │  JsonExampleWithSliceable.scala
│  │  JsonTxt.scala
│  │  Parsers.scala
│  │  ReferenceTest.scala
│  │
│  └─instances
│          Reference.scala
│          Sliceable.scala

879675643@qq.com  lhever

.---.                                                                         
|   |   .              __.....__   .----.     .----.   __.....__              
|   | .'|          .-''         '.  \    \   /    /.-''         '.            
|   |<  |         /     .-''"'-.  `. '   '. /'   //     .-''"'-.  `. .-,.--.  
|   | | |        /     /________\   \|    |'    //     /________\   \|  .-. | 
|   | | | .'''-. |                  ||    ||    ||                  || |  | | 
|   | | |/.'''. \\    .-------------''.   `'   .'\    .-------------'| |  | | 
|   | |  /    | | \    '-.____...---. \        /  \    '-.____...---.| |  '-  
|   | | |     | |  `.             .'   \      /    `.             .' | |      
'---' | |     | |    `''-...... -'      '----'       `''-...... -'   | |      
      | '.    | '.                                                   |_|      
      '---'   '---'