package types import ( "regexp" ) var ( whitespace *regexp.Regexp wordchars *regexp.Regexp reservedwords *regexp.Regexp ) func init() { whitespace = regexp.MustCompile(" *") wordchars = regexp.MustCompile(`[_0-9a-zA-Z]`) reservedwords = regexp.MustCompile("^(void|char|short|int|long|float|double|signed|unsigned|_Bool|_Complex|const|restrict|volatile|struct|union|enum)$") } //Parser is a function that takes the string to be parsed plus an input Node //and returns a new Node and the unparsed remainder string. If the parser fails //to parse anything in the input, it should return a nil Node. type Parser func(string, *Node) (string, *Node) // Adders -- add elements to the Node tree //ChildOf takes a node and adds results of a parser to it as a child func ChildOf(ret *Node, p Parser) Parser { return func(s string, n *Node) (string, *Node) { dbg("ChildOf(%s %p) %s %p\n",ret.Kind,ret,n.Kind,n) s2,n2 := p(s,ret) if n2 == nil { return s,nil } if n2 == ret { dbg("ChildOf(ret = %p) n2 = %p. WHAT\n",ret,n2) ret.Children = n2.Children } else { dbg("ChildOf(ret = %p) AddChild()\n",ret) ret.AddChild(n2) } return s2,ret } } //Children takes a parser returns a parser that adds the children of its //output node to the tree. If multiple parsers are passed in, they are //passed to Seq(...) func Children(ps ...Parser) Parser { if len(ps) > 1 { return Children(Seq(ps...)) } p := ps[0] return func(s string, n *Node) (string, *Node) { dbg("Children(%s %p)\n",n.Kind,n) s2,n2 := p(s,n) if n2 == nil { return s,nil } for _,c := range n2.Children { dbg("Children(%s %p) AddChild() from %p\n",n.Kind,n,n2) if c != n { n.AddChild(c) } } return s2,n } } //ChildrenOf takes a node and adds the children of a parser's output node //to it as its children. func ChildrenOf(ret *Node, p Parser) Parser { return func(s string, n *Node) (string, *Node) { dbg("ChildrenOf(%s %p) %s %p\n",ret.Kind,ret,n.Kind,n) return Children(p)(s,ret) } } func NodeNamed(k string, p Parser) Parser { return func(s string, n *Node) (string, *Node) { s2,n2 := p(s,n) if n2 != nil { n2.Kind = k } return s2,n2 } } // Combinators -- combine one or more Parsers into a new Parser. //Opt optionally runs a Parser, returning the input Node (instead of nil) //if it fails func Opt(p Parser) Parser { return func(s string, n *Node) (string, *Node) { s2,n2 := p(s,n) if n2 == nil { return s,n } return s2,n2 } } //OneOf picks the first matching parser and returns its result func OneOf(ps ...Parser) Parser { dbg("OneOf(%p)\n",ps) return func(s string, n *Node) (string, *Node) { for _,p := range ps { s2,n2 := p(s,n) if n2 != nil { return s2,n2 } } return s,nil } } //Seq applies parsers in sequence, adding results as children to the input //node. Returns nil and the input string unless the entire sequence succeeds func Seq(ps ...Parser) Parser { dbg("Seq(%p)\n",ps) p := func(s string, n *Node) (string, *Node) { ret := NewNode("Seq") s2, n2 := s,n for _,p := range ps { s2, n2 = p(s2,ret) if n2 == nil { return s,nil } if n2 != ret { dbg("Seq(%p): AddChild()\n",ps) ret.AddChild(n2) } } return s2,ret } return Children(p) } //Nest is like Seq but subsequent children are nested inside their earlier //siblings. func Nest(ps ...Parser) Parser { dbg("Nest(%p)\n",ps) p := func(s string, n *Node) (string, *Node) { ret := NewNode("Nest") s2,n2 := Seq(ps...)(s,ret) if n2 == nil { return s,nil } ocs := n2.Children ret.Children = []*Node{} n3 := ret for _,c := range ocs { n3.AddChild(c) n3 = c } return s2,ret } return Children(p) } //ZeroOrMore returns a sequence of zero or more nodes func ZeroOrMore(p Parser) Parser { ret := func(s string, n *Node) (string, *Node) { ret := NewNode("ZeroOrMore") dbg("ZeroOrMore(%s %p) ret = %p\n",n.Kind,n,ret) var s2 string var n2 *Node for s2,n2 = p(s,n); n2 != nil; s2,n2 = p(s2,n) { dbg("ZeroOrMore(%p): AddChild()\n",p) ret.AddChild(n2) } if len(ret.Children) > 0 { return s2,ret } return s,n } return Children(ret) } //OneOrMore is ZeroOrMore, but fails (returns nil) if the input parser does //not match any elements. func OneOrMore(p Parser) Parser { return Seq(p,ZeroOrMore(p)) } //Parenthesized matches the input parser surrounded by literal parenthesis. func Parenthesized(p Parser) Parser { return Children(Seq(Lit("("),p,Lit(")"))) } //Bracketed matches the input parser surrounded by literal square brackets. func Bracketed(p Parser) Parser { return Seq(Lit("["),p,Lit("]")) } //AngBracketed matches the input parser surrounded by literal angled brackets. func AngBracketed(p Parser) Parser { return Children(Seq(Lit("<"),p,Lit(">"))) } //CurlyBracketed matches the input parser surrounded by literal curly brackets. func CurlyBracketed(p Parser) Parser { return Children(Seq(Lit("{"),p,Lit("}"))) } // Recognizers -- these functions return parsers that match tokens in the input // stream. There is no separate tokenizer. //Word matches an element with a word boundary after its end func Word(f string) Parser { return Lit(f,true) } //Lit matches a literal string func Lit(f string, ws ...bool) Parser { word := false if len(ws) > 0 { word = ws[0] } lenf := len(f) return func(s string, n *Node) (string, *Node) { ret := NewNode("Lit",f) dbg("Lit(%p) %s ret = %p\n",n,f,ret) if len(s) < lenf { return s,nil } if f == s[:lenf] && !(word && len(s) > lenf && wordchars.Match([]byte{s[lenf]})) { adv := lenf if loc := whitespace.FindStringIndex(s[lenf:]); loc != nil { adv += loc[1] } return s[adv:],ret } return s,nil } } //Regexp matches a regular expression at the beginning of the input string func Regexp(f string) Parser { f = "^" + f r := regexp.MustCompile(f) return func(s string, n *Node) (string, *Node) { dbg("Regexp(%p) %s\n",n,f) if loc := r.FindStringIndex(s); loc != nil { lenf := loc[1] adv := lenf if loc := whitespace.FindStringIndex(s[lenf:]); loc != nil { adv += loc[1] } ret := NewNode("Regexp",s[:lenf]) dbg("Regexp(%p): ret = %p (%s)\n",n,ret,s[:lenf]) return s[adv:],ret } return s,nil } }