使用Scala从头实现一个简单的语法分析器组合字库
程序员文章站
2024-03-03 16:23:34
...
这个例子来源于scala圣经级教程《Functional Programming in Scala》,虽然原书的随书代码可以找到这些类的影子,但却没有关于如何使用这些类的示例代码,本人在阅读此书期间, ,除了跟着书中的代码敲了一遍之外,还写了一些测试代码进行验证,贴出来作为blog主要是为了方便自己,也为那些同样在阅读此书的人参考。注释不多,本人也不可能写太多,因为这本书不是简单的入门书,而是一本进阶,升华用的内涵书籍,三言两语也解释不清其中的很多细节,如果看不懂,但是又感兴趣的就去看原书吧……
注:原书中给出的代码是不能直接运行成功的,原因是隐式方法 implicit def tok(s: String) = token(P.string(s)) 在JSON.scala 中不生效, 所以本人做了一点点改动,让字符串显示转换为Parser类型。
语法分析器组合子的抽象接口:Parsers.scala:
package parsing
import java.util.regex._
import scala.util.matching.Regex
import testing._
import testing.Prop._
import language.higherKinds
import language.implicitConversions
trait Parsers[Parser[+_]] { self => // so inner classes may call methods of trait
def run[A](p: Parser[A])(input: String): Either[ParseError,A]
implicit def string(s: String): Parser[String]
implicit def operators[A](p: Parser[A]) = ParserOps[A](p)
implicit def asStringParser[A](a: A)(implicit f: A => Parser[String]):
ParserOps[String] = ParserOps(f(a))
def char(c: Char): Parser[Char] =
string(c.toString) map (_.charAt(0))
/*
* A default `succeed` implementation in terms of `string` and `map`.
* We leave `succeed` abstract, since `map` is defined below in terms of
* `flatMap` and `succeed`, which would be a circular definition! But we include
* the definition here in case implementations wish to use it
* (say if they provide a custom implementation of `map`, breaking the cycle)
*/
def defaultSucceed[A](a: A): Parser[A] =
string("") map (_ => a)
def succeed[A](a: A): Parser[A]
def slice[A](p: Parser[A]): Parser[String]
def many1[A](p: Parser[A]): Parser[List[A]] =
map2(p, many(p))(_ :: _)
def listOfN[A](n: Int, p: Parser[A]): Parser[List[A]] =
if (n <= 0) succeed(List())
else map2(p, listOfN(n-1, p))(_ :: _)
def many[A](p: Parser[A]): Parser[List[A]] =
map2(p, many(p))(_ :: _) or succeed(List())
def or[A](p1: Parser[A], p2: => Parser[A]): Parser[A]
def flatMap[A,B](p: Parser[A])(f: A => Parser[B]): Parser[B]
implicit def regex(r: Regex): Parser[String]
/*
These can be implemented using a for-comprehension,
which delegates to the `flatMap` and `map` implementations we've provided on `ParserOps`,
or they can be implemented in terms of these functions directly.
*/
def product[A,B](p: Parser[A], p2: => Parser[B]): Parser[(A,B)] =
flatMap(p)(a => map(p2)(b => (a,b)))
def map2[A,B,C](p: Parser[A], p2: => Parser[B])(f: (A,B) => C): Parser[C] =
for { a <- p; b <- p2 } yield f(a,b)
def map[A,B](a: Parser[A])(f: A => B): Parser[B] =
flatMap(a)(f andThen succeed)
def label[A](msg: String)(p: Parser[A]): Parser[A]
def scope[A](msg: String)(p: Parser[A]): Parser[A]
def attempt[A](p: Parser[A]): Parser[A]
/** Sequences two parsers, ignoring the result of the first.
* We wrap the ignored half in slice, since we don't care about its result. */
def skipL[B](p: Parser[Any], p2: => Parser[B]): Parser[B] =
map2(slice(p), p2)((_,b) => b)
/** Sequences two parsers, ignoring the result of the second.
* We wrap the ignored half in slice, since we don't care about its result. */
def skipR[A](p: Parser[A], p2: => Parser[Any]): Parser[A] =
map2(p, slice(p2))((a,b) => a)
def opt[A](p: Parser[A]): Parser[Option[A]] =
p.map(Some(_)) or succeed(None)
/** Parser which consumes zero or more whitespace characters. */
def whitespace: Parser[String] = "\\s*".r
/** Parser which consumes 1 or more digits. */
def digits: Parser[String] = "\\d+".r
/** Parser which consumes reluctantly until it encounters the given string. */
def thru(s: String): Parser[String] = (".*?"+Pattern.quote(s)).r
/** Unescaped string literals, like "foo" or "bar". */
def quoted: Parser[String] = string("\"") *> thru("\"").map(_.dropRight(1))
/** Unescaped or escaped string literals, like "An \n important \"Quotation\"" or "bar". */
def escapedQuoted: Parser[String] =
// rather annoying to write, left as an exercise
// we'll just use quoted (unescaped literals) for now
token(quoted label "string literal")
/** C/Java style floating point literals, e.g .1, -1.0, 1e9, 1E-23, etc.
* Result is left as a string to keep full precision
*/
def doubleString: Parser[String] =
token("[-+]?([0-9]*\\.)?[0-9]+([eE][-+]?[0-9]+)?".r)
/** Floating point literals, converted to a `Double`. */
def double: Parser[Double] =
doubleString map (_.toDouble) label "double literal"
/** Attempts `p` and strips trailing whitespace, usually used for the tokens of a grammar. */
def token[A](p: Parser[A]): Parser[A] =
attempt(p) <* whitespace
/** Zero or more repetitions of `p`, separated by `p2`, whose results are ignored. */
def sep[A](p: Parser[A], p2: Parser[Any]): Parser[List[A]] = // use `Parser[Any]` since don't care about result type of separator
sep1(p,p2) or succeed(List())
/** One or more repetitions of `p`, separated by `p2`, whose results are ignored. */
def sep1[A](p: Parser[A], p2: Parser[Any]): Parser[List[A]] =
map2(p, many(p2 *> p))(_ :: _)
/** Parses a sequence of left-associative binary operators with the same precedence. */
def opL[A](p: Parser[A])(op: Parser[(A,A) => A]): Parser[A] =
map2(p, many(op ** p))((h,t) => t.foldLeft(h)((a,b) => b._1(a,b._2)))
/** Wraps `p` in start/stop delimiters. */
def surround[A](start: Parser[Any], stop: Parser[Any])(p: => Parser[A]) =
start *> p <* stop
/** A parser that succeeds when given empty input. */
def eof: Parser[String] =
regex("\\z".r).label("unexpected trailing characters")
/** The root of the grammar, expects no further input following `p`. */
def root[A](p: Parser[A]): Parser[A] =
p <* eof
case class ParserOps[A](p: Parser[A]) {
def |[B>:A](p2: => Parser[B]): Parser[B] = self.or(p,p2) // use `self` to explicitly disambiguate reference to the `or` method on the `trait`
def or[B>:A](p2: => Parser[B]): Parser[B] = self.or(p,p2)
def map[B](f: A => B): Parser[B] = self.map(p)(f)
def many = self.many(p)
def slice: Parser[String] = self.slice(p)
def **[B](p2: => Parser[B]): Parser[(A,B)] =
self.product(p,p2)
def product[B](p2: => Parser[B]): Parser[(A,B)] =
self.product(p,p2)
def flatMap[B](f: A => Parser[B]): Parser[B] =
self.flatMap(p)(f)
def label(msg: String): Parser[A] = self.label(msg)(p)
def scope(msg: String): Parser[A] = self.scope(msg)(p)
def *>[B](p2: => Parser[B]) = self.skipL(p, p2)
def <*(p2: => Parser[Any]) = self.skipR(p, p2)
def token = self.token(p)
def sep(separator: Parser[Any]) = self.sep(p, separator)
def sep1(separator: Parser[Any]) = self.sep1(p, separator)
def as[B](b: B): Parser[B] = self.map(self.slice(p))(_ => b)
def opL(op: Parser[(A,A) => A]): Parser[A] = self.opL(p)(op)
}
object Laws {
def equal[A](p1: Parser[A], p2: Parser[A])(in: Gen[String]): Prop =
forAll(in)(s => run(p1)(s) == run(p2)(s))
def mapLaw[A](p: Parser[A])(in: Gen[String]): Prop =
equal(p, p.map(a => a))(in)
}
}
case class Location(input: String, offset: Int = 0) {
lazy val line = input.slice(0,offset+1).count(_ == '\n') + 1
lazy val col = input.slice(0,offset+1).lastIndexOf('\n') match {
case -1 => offset + 1
case lineStart => offset - lineStart
}
def toError(msg: String): ParseError =
ParseError(List((this, msg)))
def advanceBy(n: Int) = copy(offset = offset+n)
/* Returns the line corresponding to this location */
def currentLine: String =
if (input.length > 1) input.lines.drop(line-1).next
else ""
def columnCaret = (" " * (col-1)) + "^"
}
case class ParseError(stack: List[(Location,String)] = List()) {
def push(loc: Location, msg: String): ParseError =
copy(stack = (loc,msg) :: stack)
def label[A](s: String): ParseError =
ParseError(latestLoc.map((_,s)).toList)
def latest: Option[(Location,String)] =
stack.lastOption
def latestLoc: Option[Location] =
latest map (_._1)
/**
Display collapsed error stack - any adjacent stack elements with the
same location are combined on one line. For the bottommost error, we
display the full line, with a caret pointing to the column of the error.
Example:
1.1 file 'companies.json'; array
5.1 object
5.2 key-value
5.10 ':'
{ "MSFT" ; 24,
*/
override def toString =
if (stack.isEmpty) "no error message"
else {
val collapsed = collapseStack(stack)
val context =
collapsed.lastOption.map("\n\n" + _._1.currentLine).getOrElse("") +
collapsed.lastOption.map("\n" + _._1.columnCaret).getOrElse("")
collapsed.map { case (loc,msg) => loc.line.toString + "." + loc.col + " " + msg }.mkString("\n") +
context
}
/* Builds a collapsed version of the given error stack -
* messages at the same location have their messages merged,
* separated by semicolons */
def collapseStack(s: List[(Location,String)]): List[(Location,String)] =
s.groupBy(_._1).
mapValues(_.map(_._2).mkString("; ")).
toList.sortBy(_._1.offset)
def formatLoc(l: Location): String = l.line + "." + l.col
}
object Parsers {
}
语法分析组合子的具体实现方式1, Reference.scala:
package parsing.instances
import ReferenceTypes._
import scala.util.matching.Regex
import parsing._
object ReferenceTypes {
/** A parser is a kind of state action that can fail. */
type Parser[+A] = ParseState => Result[A]
/** `ParseState` wraps a `Location` and provides some extra
* convenience functions. The sliceable parsers defined
* in `Sliceable.scala` add an `isSliced` `Boolean` flag
* to `ParseState`.
*/
case class ParseState(loc: Location) {
def advanceBy(numChars: Int): ParseState =
copy(loc = loc.copy(offset = loc.offset + numChars))
def input: String = loc.input.substring(loc.offset)
def slice(n: Int) = loc.input.substring(loc.offset, loc.offset + n)
}
/* Likewise, we define a few helper functions on `Result`. */
sealed trait Result[+A] {
def extract: Either[ParseError,A] = this match {
case Failure(e,_) => Left(e)
case Success(a,_) => Right(a)
}
/* Used by `attempt`. */
def uncommit: Result[A] = this match {
case Failure(e,true) => Failure(e,false)
case _ => this
}
/* Used by `flatMap` */
def addCommit(isCommitted: Boolean): Result[A] = this match {
case Failure(e,c) => Failure(e, c || isCommitted)
case _ => this
}
/* Used by `scope`, `label`. */
def mapError(f: ParseError => ParseError): Result[A] = this match {
case Failure(e,c) => Failure(f(e),c)
case _ => this
}
def advanceSuccess(n: Int): Result[A] = this match {
case Success(a,m) => Success(a,n+m)
case _ => this
}
}
case class Success[+A](get: A, length: Int) extends Result[A]
case class Failure(get: ParseError, isCommitted: Boolean) extends Result[Nothing]
/** Returns -1 if s1.startsWith(s2), otherwise returns the
* first index where the two strings differed. If s2 is
* longer than s1, returns s1.length. */
def firstNonmatchingIndex(s1: String, s2: String, offset: Int): Int = {
var i = 0
while (i < s1.length && i < s2.length) {
if (s1.charAt(i+offset) != s2.charAt(i)) return i
i += 1
}
if (s1.length-offset >= s2.length) -1
else s1.length-offset
}
}
object Reference extends Parsers[Parser] {
def run[A](p: Parser[A])(s: String): Either[ParseError,A] = {
val s0 = ParseState(Location(s))
p(s0).extract
}
// consume no characters and succeed with the given value
def succeed[A](a: A): Parser[A] = s => Success(a, 0)
def or[A](p: Parser[A], p2: => Parser[A]): Parser[A] =
s => p(s) match {
case Failure(e,false) => p2(s)
case r => r // committed failure or success skips running `p2`
}
def flatMap[A,B](f: Parser[A])(g: A => Parser[B]): Parser[B] =
s => f(s) match {
case Success(a,n) => g(a)(s.advanceBy(n))
.addCommit(n != 0)
.advanceSuccess(n)
case f@Failure(_,_) => f
}
def string(w: String): Parser[String] = {
val msg = "'" + w + "'"
s => {
val i = firstNonmatchingIndex(s.loc.input, w, s.loc.offset)
if (i == -1) // they matched
Success(w, w.length)
else
Failure(s.loc.advanceBy(i).toError(msg), i != 0)
}
}
/* note, regex matching is 'all-or-nothing':
* failures are uncommitted */
def regex(r: Regex): Parser[String] = {
val msg = "regex " + r
s => r.findPrefixOf(s.input) match {
case None => Failure(s.loc.toError(msg), false)
case Some(m) => Success(m,m.length)
}
}
def scope[A](msg: String)(p: Parser[A]): Parser[A] =
s => p(s).mapError(_.push(s.loc,msg))
def label[A](msg: String)(p: Parser[A]): Parser[A] =
s => p(s).mapError(_.label(msg))
def fail[A](msg: String): Parser[A] =
s => Failure(s.loc.toError(msg), true)
def attempt[A](p: Parser[A]): Parser[A] =
s => p(s).uncommit
def slice[A](p: Parser[A]): Parser[String] =
s => p(s) match {
case Success(_,n) => Success(s.slice(n),n)
case f@Failure(_,_) => f
}
/* We provide an overridden version of `many` that accumulates
* the list of results using a monolithic loop. This avoids
* stack overflow errors for most grammars.
*/
override def many[A](p: Parser[A]): Parser[List[A]] =
s => {
var nConsumed: Int = 0
val buf = new collection.mutable.ListBuffer[A]
def go(p: Parser[A], offset: Int): Result[List[A]] = {
p(s.advanceBy(offset)) match {
case Success(a,n) => buf += a; go(p, offset+n)
case f@Failure(e,true) => f
case Failure(e,_) => Success(buf.toList,offset)
}
}
go(p, 0)
}
}
语法分析组合子的具体实现方式2, Sliceable.scala:
package parsing.instances
import SliceableTypes._
import scala.util.matching.Regex
import parsing._
/*
This implementation is a bit trickier than the one in `Reference.scala`.
The main change is to add another piece of state to `ParseState`,
an `isSliced` flag, and an additional `Slice` constructor to `Result`.
If the `isSliced` flag is set, parsers avoid building a meaningful
result--see in particular the overridden implementations for `map`,
`map2`, and `many`.
This implementation runs up against some limitations of Scala's
type system--Scala does not appropriately refine type parameters when
pattern matching. Keep reading for more details on this.
*/
object SliceableTypes {
/* A parser is a kind of state action that can fail.
* This type is slightly fancier than the one discussed in the chapter,
* to support efficient slicing. If the parser is surrounded by
* a `slice` combinator, the `isSliced` field of `ParseState` will
* be `true`, and we return a `Slice` output.
*/
type Parser[+A] = ParseState => Result[A]
/** `isSliced` indicates if the current parser is surround by a
* `slice` combinator. This lets us avoid building up values that
* will end up getting thrown away.
*
* There are several convenience functions on `ParseState` to make
* implementing some of the combinators easier.
*/
case class ParseState(loc: Location, isSliced: Boolean) {
// some convenience functions
def advanceBy(numChars: Int): ParseState =
copy(loc = loc.copy(offset = loc.offset + numChars))
def input: String = loc.input.substring(loc.offset)
def unslice = copy(isSliced = false)
def reslice(s: ParseState) = copy(isSliced = s.isSliced)
def slice(n: Int) = loc.input.substring(loc.offset, loc.offset + n)
}
/** The result of a parse--a `Parser[A]` returns a `Result[A]`.
*
* There are three cases:
* - Success(a,n): a is the value, n is # of consumed characters
* - Slice(n): a successful slice; n is the # of consumed characters
* - Failure(n,isCommitted): a failing parse
*
* As usual, we define some helper functions on `Result`.
* Defining functions on `Result` gives us better type
* information--there are cases (see `map` and `map2` below) where
* Scala will not appropriately refine type information when
* pattern matching on `Result`.
*/
sealed trait Result[+A] {
def extract(input: String): Either[ParseError,A]
def slice: Result[String]
/* Used by `attempt`. */
def uncommit: Result[A] = this match {
case Failure(e,true) => Failure(e,false)
case _ => this
}
/* Used by `flatMap` */
def addCommit(isCommitted: Boolean): Result[A] = this match {
case Failure(e,c) => Failure(e, c || isCommitted)
case _ => this
}
/* Used by `scope`, `label`. */
def mapError(f: ParseError => ParseError): Result[A] = this match {
case Failure(e,c) => Failure(f(e),c)
case _ => this
}
def advanceSuccess(n: Int): Result[A]
}
case class Slice(length: Int) extends Result[String] {
def extract(s: String) = Right(s.substring(0,length))
def slice = this
def advanceSuccess(n: Int) = Slice(length+n)
}
case class Success[+A](get: A, length: Int) extends Result[A] {
def extract(s: String) = Right(get)
def slice = Slice(length)
def advanceSuccess(n: Int) = Success(get, length+n)
}
case class Failure(get: ParseError, isCommitted: Boolean) extends Result[Nothing] {
def extract(s: String) = Left(get)
def slice = this
def advanceSuccess(n: Int) = this
}
/** Returns -1 if s.startsWith(s2), otherwise returns the
* first index where the two strings differed. If s2 is
* longer than s1, returns s.length. */
def firstNonmatchingIndex(s: String, s2: String, offset: Int): Int = {
var i = 0
while (i+offset < s.length && i < s2.length) {
if (s.charAt(i+offset) != s2.charAt(i)) return i
i += 1
}
if (s.length-offset >= s2.length) -1
else s.length-offset
}
}
object Sliceable extends Parsers[Parser] {
def run[A](p: Parser[A])(s: String): Either[ParseError,A] = {
val s0 = ParseState(Location(s), false)
p(s0).extract(s)
}
// consume no characters and succeed with the given value
def succeed[A](a: A): Parser[A] = s => Success(a, 0)
def or[A](p: Parser[A], p2: => Parser[A]): Parser[A] =
s => p(s) match {
case Failure(e,false) => p2(s)
case r => r // committed failure or success skips running `p2`
}
/*
* `Result` is an example of a Generalized Algebraic Data Type (GADT),
* which means that not all the data constructors of `Result` have
* the same type. In particular, `Slice` _refines_ the `A` type
* parameter to be `String`. If we pattern match on a `Result`
* and obtain a `Slice`, we expect to be able to assume that `A` was
* in fact `String` and use this type information elsewhere.
*
* Unfortunately, Scala doesn't quite support this. Let's look
* at an example, `map`.
*/
/* Pattern matching on Slice should refine the type `A` to `String`,
* and allow us to call `f(s.slice(n))`, since `f` accepts an
* `A` which is known to be `String`. We resort to a cast here.
*/
override def map[A,B](p: Parser[A])(f: A => B): Parser[B] =
s => p(s) match {
case Success(a,n) => Success(f(a),n)
case Slice(n) => Success(f(s.slice(n).asInstanceOf[A]),n)
case f@Failure(_,_) => f
}
/* See this gist for more information, examples, and discussion
* of Scala's GADT support:
* https://gist.github.com/1369239
*/
/* This implementation is rather delicate. Since we need an `A`
* to generate the second parser, we need to run the first parser
* 'unsliced', even if the `flatMap` is wrapped in a `slice` call.
* Once we have the `A` and have generated the second parser to
* run, we can 'reslice' the second parser.
*
* Note that this implementation is less efficient than it could
* be in the case where the choice of the second parser does not
* depend on the first (as in `map2`). In that case, we could
* continue to run the first parser sliced.
*
* Again, note the cast needed.
*/
def flatMap[A,B](f: Parser[A])(g: A => Parser[B]): Parser[B] =
s => f(s.unslice) match {
case Success(a,n) =>
g(a)(s.advanceBy(n).reslice(s))
.addCommit(n != 0)
.advanceSuccess(n)
case Slice(n) => g(s.slice(n).asInstanceOf[A])(s.advanceBy(n).reslice(s))
.advanceSuccess(n)
case f@Failure(_,_) => f
}
// other functions are quite similar to impls in `Reference.scala`
def string(w: String): Parser[String] = {
val msg = "'" + w + "'"
s => {
val i = firstNonmatchingIndex(s.loc.input, w, s.loc.offset)
if (i == -1) { // they matched
if (s.isSliced) Slice(w.length)
else Success(w, w.length)
}
else
Failure(s.loc.advanceBy(i).toError(msg), i != 0)
}
}
// note, regex matching is 'all-or-nothing' - failures are
// uncommitted
def regex(r: Regex): Parser[String] = {
val msg = "regex " + r
s => r.findPrefixOf(s.input) match {
case None => Failure(s.loc.toError(msg), false)
case Some(m) =>
if (s.isSliced) Slice(m.length)
else Success(m,m.length)
}
}
def scope[A](msg: String)(p: Parser[A]): Parser[A] =
s => p(s).mapError(_.push(s.loc,msg))
def label[A](msg: String)(p: Parser[A]): Parser[A] =
s => p(s).mapError(_.label(msg))
def fail[A](msg: String): Parser[A] =
s => Failure(s.loc.toError(msg), true)
def attempt[A](p: Parser[A]): Parser[A] =
s => p(s).uncommit
def slice[A](p: Parser[A]): Parser[String] =
s => p(s.copy(isSliced = true)).slice
/* As with `map`, we require casts in a few places. */
override def map2[A,B,C](p: Parser[A], p2: => Parser[B])(f: (A,B) => C): Parser[C] =
s => p(s) match {
case Success(a,n) => val s2 = s.advanceBy(n); p2(s2) match {
case Success(b,m) => Success(f(a,b),n+m)
case Slice(m) => Success(f(a,s2.slice(m).asInstanceOf[B]), n+m)
case f@Failure(_,_) => f
}
case Slice(n) => val s2 = s.advanceBy(n); p2(s2) match {
case Success(b,m) => Success(f(s.slice(n).asInstanceOf[A],b),n+m)
case Slice(m) =>
if (s.isSliced) Slice(n+m).asInstanceOf[Result[C]]
else Success(f(s.slice(n).asInstanceOf[A],s2.slice(m).asInstanceOf[B]), n+m)
case f@Failure(_,_) => f
}
case f@Failure(_,_) => f
}
override def product[A,B](p: Parser[A], p2: => Parser[B]): Parser[(A,B)] =
map2(p,p2)((_,_))
/* We provide an overridden version of `many` that accumulates
* the list of results using a monolithic loop. This avoids
* stack overflow errors.
*/
override def many[A](p: Parser[A]): Parser[List[A]] =
s => {
var nConsumed: Int = 0
if (s.isSliced) {
def go(p: Parser[String], offset: Int): Result[String] =
p(s.advanceBy(offset)) match {
case f@Failure(e,true) => f
case Failure(e,_) => Slice(offset)
case Slice(n) => go(p, offset+n)
case Success(_,_) => sys.error("sliced parser should not return success, only slice")
}
go(p.slice, 0).asInstanceOf[Result[List[A]]]
}
else {
val buf = new collection.mutable.ListBuffer[A]
def go(p: Parser[A], offset: Int): Result[List[A]] = {
p(s.advanceBy(offset)) match {
case Success(a,n) => buf += a; go(p, offset+n)
case f@Failure(e,true) => f
case Failure(e,_) => Success(buf.toList,offset)
case Slice(n) =>
buf += s.input.substring(offset,offset+n).
asInstanceOf[A]
go(p, offset+n)
}
}
go(p, 0)
}
}
}
JSON解析器的具体实现, JSON.scala:
package parsing
import language.higherKinds
import language.implicitConversions
trait JSON
object JSON {
case object JNull extends JSON
case class JNumber(get: Double) extends JSON
case class JString(get: String) extends JSON
case class JBool(get: Boolean) extends JSON
case class JArray(get: IndexedSeq[JSON]) extends JSON
case class JObject(get: Map[String, JSON]) extends JSON
def jsonParser[Parser[+_]](P: Parsers[Parser]): Parser[JSON] = {
// we'll hide the string implicit conversion and promote strings to tokens instead
// this is a bit nicer than having to write token everywhere
//import P.{string => _, _}
import P._
//implicit def tok(s: String) = token(P.string(s))
def tok(s: String) = P.token(P.string(s))
def array = surround( tok("["), tok("]") )(
value sep tok(",") map (vs => JArray(vs.toIndexedSeq)) ) scope "array"
def obj = surround( tok("{"), tok("}") )(
keyval sep tok(",") map (kvs => JObject(kvs.toMap)) ) scope "object"
def keyval = escapedQuoted ** (tok(":") *> value)
def lit = scope("literal") {
tok("null").as(JNull) | double.map(JNumber(_)) |
escapedQuoted.map(JString(_)) | tok("true").as(JBool(true)) | tok("false").as(JBool(false))
}
def value: Parser[JSON] = lit | obj | array
root(whitespace *> (obj | array))
}
}
示例代码用到的工具类CommonUtils.scala:
package parsing
object CommonUtils {
def PrintDivision = {
println();
println("------------------------------------------------------------------");
println()
}
def printResult[E](e: Either[E,JSON]) = e.fold(println, println)
}
示例代码用到的JSON数据,JsonTxt.scala:
package parsing
object JsonTxt {
val jsonTxt = """
{
"Company name" : "Microsoft Corporation",
"Ticker" : "MSFT",
"Active" : true,
"Price" : 30.66,
"Shares outstanding" : 8.38e9,
"Related companies" : [ "HPQ", "IBM", "YHOO", "DELL", "GOOG" ]
}
"""
val malformedJson1 = """
{
"Company name" ; "Microsoft Corporation"
}
"""
val malformedJson2 = """
[
[ "HPQ", "IBM",
"YHOO", "DELL" ++
"GOOG"
]
]
"""
}
Reference类的测试代码,ReferenceTest.scala:
package parsing
import parsing.instances.{Reference, ReferenceTypes}
import parsing.instances.ReferenceTypes.ParseState
import parsing.CommonUtils._
object ReferenceTest {
def main(args: Array[String]): Unit = {
val stringPar = Reference.string("aaa")
//验证字符串aaabbb从索引位置2开始是不是紧跟着aaa
val res = stringPar(ParseState(Location("aaabbb", 2)))
println(res)
PrintDivision
val res1= stringPar(ParseState(Location("aaaaaa", 2)))
println(res1)
PrintDivision
val res2= stringPar(ParseState(Location("aaa", 0)))
println(res2)
PrintDivision
val date = """(\d\d\d\d)-(\d\d)-(\d\d)""".r
val dateRegexPar = Reference.regex(date)
//验证字符串dateStr1从索引位置0开始的内容是否匹配日期格式
val dateStr1 = "2018-05-06xxxx"
val regexRes = dateRegexPar(ParseState(Location(dateStr1, 0)))
println(regexRes)
PrintDivision
val dateStr2 = "2018x-05-06xxxx"
val regexRes2 = dateRegexPar(ParseState(Location(dateStr2, 0)))
println(regexRes2)
PrintDivision
//解析器如果报错,scope组合子可以在报错信息中追加指定的错误信息,
// 这里追加的错误信息是: " added error msg "
val scopePar = Reference.scope(" added error msg ")(dateRegexPar)
val scopeRes = scopePar(ParseState(Location(dateStr2, 0)))
println(scopeRes)
PrintDivision
//将最后一个错误信息替换为指定的内容,这里替换为: " label msg "
val labelPar = Reference.label(" label msg ")(dateRegexPar)
val labelRes = labelPar(ParseState(Location(dateStr2, 0)))
println(labelRes)
PrintDivision
//无条件报错,构造错误提示时候,将错误信息设置为: " report msg abruptly "
val failPar = Reference.fail[String](" report msg abruptly ")
val failRes = failPar(ParseState(Location(dateStr2, 0)))
println(failRes)
PrintDivision
//将错误信息中的 isCommitted字段设置为false
val attemptPar = Reference.attempt(dateRegexPar)
val attemptRes = attemptPar(ParseState(Location(dateStr2, 0)))
println(attemptRes)
PrintDivision
//将解析正确的部分抽离出来
val slicePar = Reference.slice(dateRegexPar)
val sliceRes = slicePar(ParseState(Location(dateStr1, 0)))
println(sliceRes)
PrintDivision
val matchIndex = ReferenceTypes.firstNonmatchingIndex("aaabbb", "aaa", 2)
println(matchIndex)
PrintDivision
//解析并忽略引号
val quotedPar = Reference.quoted
val quotedRRes = quotedPar(ParseState(Location("\"aaa\"", 0)))
println(quotedRRes)
PrintDivision
val escapedPar = Reference.escapedQuoted
val escapedRes = quotedPar(ParseState(Location("\"aaa\"", 0)))
println(escapedRes)
}
}
采用Reference作为具体实现解析json的例子, JsonExampleWithReference.scala:
package parsing;
/**
* JSON parsing example with Reference implementation
*/
object JsonExampleWithReference extends App {
import parsing.instances.ReferenceTypes.Parser
val P = parsing.instances.Reference
val json: Parser[JSON] = JSON.jsonParser(P)
import parsing.JsonTxt._
import parsing.CommonUtils._
PrintDivision
printResult { P.run(json)(jsonTxt) }
PrintDivision
printResult { P.run(json)(malformedJson1) }
PrintDivision
printResult { P.run(json)(malformedJson2) }
}
采用Sliceable作为具体实现解析json的例子, JsonExampleWithSliceable.scala:
package parsing
object JsonExampleWithSliceable extends App {
import parsing.instances.SliceableTypes.Parser
val P = parsing.instances.Sliceable
val json: Parser[JSON] = JSON.jsonParser(P)
import parsing.JsonTxt._
import parsing.CommonUtils._
PrintDivision
printResult { P.run(json)(jsonTxt) }
PrintDivision
printResult { P.run(json)(malformedJson1) }
PrintDivision
printResult { P.run(json)(malformedJson2) }
}
由于代码太长,示例运行结果就不贴出来了,读者可以拷贝上述代码进行运行,上述代码的代码组织结构如下所示:
├─parsing
│ │ CommonUtils.scala
│ │ JSON.scala
│ │ JsonExampleWithReference.scala
│ │ JsonExampleWithSliceable.scala
│ │ JsonTxt.scala
│ │ Parsers.scala
│ │ ReferenceTest.scala
│ │
│ └─instances
│ Reference.scala
│ Sliceable.scala
879675643@qq.com lhever
.---.
| | . __.....__ .----. .----. __.....__
| | .'| .-'' '. \ \ / /.-'' '.
| |< | / .-''"'-. `. ' '. /' // .-''"'-. `. .-,.--.
| | | | / /________\ \| |' // /________\ \| .-. |
| | | | .'''-. | || || || || | | |
| | | |/.'''. \\ .-------------''. `' .'\ .-------------'| | | |
| | | / | | \ '-.____...---. \ / \ '-.____...---.| | '-
| | | | | | `. .' \ / `. .' | |
'---' | | | | `''-...... -' '----' `''-...... -' | |
| '. | '. |_|
'---' '---'