nswrap/types/combinators.go

256 lines
6.2 KiB
Go

package types
import (
"regexp"
)
var (
whitespace *regexp.Regexp
wordchars *regexp.Regexp
reservedwords *regexp.Regexp
)
func init() {
whitespace = regexp.MustCompile(" *")
wordchars = regexp.MustCompile(`[_0-9a-zA-Z]`)
reservedwords = regexp.MustCompile("^(void|char|short|int|long|float|double|signed|unsigned|_Bool|_Complex|const|restrict|volatile|struct|union|enum)$")
}
//Parser is a function that takes the string to be parsed plus an input Node
//and returns a new Node and the unparsed remainder string. If the parser fails
//to parse anything in the input, it should return a nil Node.
type Parser func(string, *Node) (string, *Node)
// Adders -- add elements to the Node tree
//ChildOf takes a node and adds results of a parser to it as a child
func ChildOf(ret *Node, p Parser) Parser {
return func(s string, n *Node) (string, *Node) {
dbg("ChildOf(%s %p) %s %p\n", ret.Kind, ret, n.Kind, n)
s2, n2 := p(s, ret)
if n2 == nil {
return s, nil
}
if n2 == ret {
dbg("ChildOf(ret = %p) n2 = %p. WHAT\n", ret, n2)
ret.Children = n2.Children
} else {
dbg("ChildOf(ret = %p) AddChild()\n", ret)
ret.AddChild(n2)
}
return s2, ret
}
}
//Children takes a parser and returns a parser that adds the children of its
//output node to the tree. If multiple parsers are passed in, they are
//passed to Seq(...)
func Children(ps ...Parser) Parser {
if len(ps) > 1 {
return Children(Seq(ps...))
}
p := ps[0]
return func(s string, n *Node) (string, *Node) {
dbg("Children(%s %p)\n", n.Kind, n)
s2, n2 := p(s, n)
if n2 == nil {
return s, nil
}
for _, c := range n2.Children {
dbg("Children(%s %p) AddChild() from %p\n", n.Kind, n, n2)
if c != n {
n.AddChild(c)
}
}
return s2, n
}
}
//ChildrenOf takes a node and adds the children of a parser's output node
//to it as its children.
func ChildrenOf(ret *Node, p Parser) Parser {
return func(s string, n *Node) (string, *Node) {
dbg("ChildrenOf(%s %p) %s %p\n", ret.Kind, ret, n.Kind, n)
return Children(p)(s, ret)
}
}
func NodeNamed(k string, p Parser) Parser {
return func(s string, n *Node) (string, *Node) {
s2, n2 := p(s, n)
if n2 != nil {
n2.Kind = k
}
return s2, n2
}
}
// Combinators -- combine one or more Parsers into a new Parser.
//Opt optionally runs a Parser, returning the input Node (instead of nil)
//if it fails
func Opt(p Parser) Parser {
return func(s string, n *Node) (string, *Node) {
s2, n2 := p(s, n)
if n2 == nil {
return s, n
}
return s2, n2
}
}
//OneOf picks the first matching parser and returns its result
func OneOf(ps ...Parser) Parser {
dbg("OneOf(%p)\n", ps)
return func(s string, n *Node) (string, *Node) {
for _, p := range ps {
s2, n2 := p(s, n)
if n2 != nil {
return s2, n2
}
}
return s, nil
}
}
//Seq applies parsers in sequence, adding results as children to the input
//node. Returns nil and the input string unless the entire sequence succeeds
func Seq(ps ...Parser) Parser {
dbg("Seq(%p)\n", ps)
p := func(s string, n *Node) (string, *Node) {
ret := NewNode("Seq")
s2, n2 := s, n
for _, p := range ps {
s2, n2 = p(s2, ret)
if n2 == nil {
return s, nil
}
if n2 != ret {
dbg("Seq(%p): AddChild()\n", ps)
ret.AddChild(n2)
}
}
return s2, ret
}
return Children(p)
}
//Nest is like Seq but subsequent children are nested inside their earlier
//siblings.
func Nest(ps ...Parser) Parser {
dbg("Nest(%p)\n", ps)
p := func(s string, n *Node) (string, *Node) {
ret := NewNode("Nest")
s2, n2 := Seq(ps...)(s, ret)
if n2 == nil {
return s, nil
}
ocs := n2.Children
ret.Children = []*Node{}
n3 := ret
for _, c := range ocs {
n3.AddChild(c)
n3 = c
}
return s2, ret
}
return Children(p)
}
//ZeroOrMore returns a sequence of zero or more nodes
func ZeroOrMore(p Parser) Parser {
ret := func(s string, n *Node) (string, *Node) {
ret := NewNode("ZeroOrMore")
dbg("ZeroOrMore(%s %p) ret = %p\n", n.Kind, n, ret)
var s2 string
var n2 *Node
for s2, n2 = p(s, n); n2 != nil; s2, n2 = p(s2, n) {
dbg("ZeroOrMore(%p): AddChild()\n", p)
ret.AddChild(n2)
}
if len(ret.Children) > 0 {
return s2, ret
}
return s, n
}
return Children(ret)
}
//OneOrMore is ZeroOrMore, but fails (returns nil) if the input parser does
//not match any elements.
func OneOrMore(p Parser) Parser {
return Seq(p, ZeroOrMore(p))
}
//Parenthesized matches the input parser surrounded by literal parenthesis.
func Parenthesized(p Parser) Parser {
return Children(Seq(Lit("("), p, Lit(")")))
}
//Bracketed matches the input parser surrounded by literal square brackets.
func Bracketed(p Parser) Parser {
return Seq(Lit("["), p, Lit("]"))
}
//AngBracketed matches the input parser surrounded by literal angled brackets.
func AngBracketed(p Parser) Parser {
return Children(Seq(Lit("<"), p, Lit(">")))
}
//CurlyBracketed matches the input parser surrounded by literal curly brackets.
func CurlyBracketed(p Parser) Parser {
return Children(Seq(Lit("{"), p, Lit("}")))
}
// Recognizers -- these functions return parsers that match tokens in the input
// stream. There is no separate tokenizer.
//Word matches an element with a word boundary after its end
func Word(f string) Parser {
return Lit(f, true)
}
//Lit matches a literal string
func Lit(f string, ws ...bool) Parser {
word := false
if len(ws) > 0 {
word = ws[0]
}
lenf := len(f)
return func(s string, n *Node) (string, *Node) {
ret := NewNode("Lit", f)
dbg("Lit(%p) %s ret = %p\n", n, f, ret)
if len(s) < lenf {
return s, nil
}
if f == s[:lenf] && !(word && len(s) > lenf && wordchars.Match([]byte{s[lenf]})) {
adv := lenf
if loc := whitespace.FindStringIndex(s[lenf:]); loc != nil {
adv += loc[1]
}
return s[adv:], ret
}
return s, nil
}
}
//Regexp matches a regular expression at the beginning of the input string
func Regexp(f string) Parser {
f = "^" + f
r := regexp.MustCompile(f)
return func(s string, n *Node) (string, *Node) {
dbg("Regexp(%p) %s\n", n, f)
if loc := r.FindStringIndex(s); loc != nil {
lenf := loc[1]
adv := lenf
if loc := whitespace.FindStringIndex(s[lenf:]); loc != nil {
adv += loc[1]
}
ret := NewNode("Regexp", s[:lenf])
dbg("Regexp(%p): ret = %p (%s)\n", n, ret, s[:lenf])
return s[adv:], ret
}
return s, nil
}
}