Add type parsing infrastructure.

This commit is contained in:
Greg 2019-04-18 09:38:46 -04:00
parent 0b0b73c6bd
commit e7270e95cf
6 changed files with 655 additions and 5 deletions

1
.gitignore vendored
View File

@ -4,4 +4,3 @@ ast.txt
simple
complex
program
types

291
types/combinators.go Normal file
View File

@ -0,0 +1,291 @@
package types
import (
"fmt"
"regexp"
)
var (
whitespace *regexp.Regexp
wordchars *regexp.Regexp
reservedwords *regexp.Regexp
)
func init() {
whitespace = regexp.MustCompile(" *")
wordchars = regexp.MustCompile(`[_0-9a-zA-Z]`)
reservedwords = regexp.MustCompile("^(void|char|short|int|long|float|double|signed|unsigned|_Bool|_Complex|const|restrict|volatile|struct|union|enum)$")
}
func dbg(f string, xs ...interface{}) {
if Debug {
fmt.Printf(f,xs...)
}
}
type Parser func(string, *Node) (string, *Node)
// Adders
//Child takes a parser and adds its output node (if non-nil) to the tree.
//FIXME -- broken?
func Child(p Parser) Parser {
return func(s string, n *Node) (string, *Node) {
dbg("Child(%s %p)\n",n.Kind,n)
s2,n2 := p(s,n)
if n2 == nil {
return s,nil
}
if n2 != n {
dbg("Child(%p): AddChild()\n",p)
n.AddChild(n2)
}
return s2,n
}
}
//ChildOf takes a node and adds results of a parser to it as a child
func ChildOf(ret *Node, p Parser) Parser {
return func(s string, n *Node) (string, *Node) {
dbg("ChildOf(%s %p) %s %p\n",ret.Kind,ret,n.Kind,n)
s2,n2 := p(s,ret)
if n2 == nil {
return s,nil
}
if n2 == ret {
dbg("ChildOf(ret = %p) n2 = %p. WHAT\n",ret,n2)
ret.Children = n2.Children
} else {
dbg("ChildOf(ret = %p) AddChild()\n",ret)
ret.AddChild(n2)
}
return s2,ret
}
}
//Children takes a parser returns a parser that adds the children of its
//output node to the tree. If multiple parsers are passed in, they are
//passed to Seq(...)
func Children(ps ...Parser) Parser {
if len(ps) > 1 {
return Children(Seq(ps...))
}
p := ps[0]
return func(s string, n *Node) (string, *Node) {
dbg("Children(%s %p)\n",n.Kind,n)
s2,n2 := p(s,n)
if n2 == nil {
return s,nil
}
for _,c := range n2.Children {
dbg("Children(%s %p) AddChild() from %p\n",n.Kind,n,n2)
if c != n {
n.AddChild(c)
}
}
return s2,n
}
}
//ChildrenOf takes a node and adds the children of a parser's output node
//to it as its children.
func ChildrenOf(ret *Node, p Parser) Parser {
return func(s string, n *Node) (string, *Node) {
dbg("ChildrenOf(%s %p) %s %p\n",ret.Kind,ret,n.Kind,n)
return Children(p)(s,ret)
}
}
func NodeNamed(k string, p Parser) Parser {
return func(s string, n *Node) (string, *Node) {
s2,n2 := p(s,n)
if n2 != nil {
n2.Kind = k
}
return s2,n2
}
}
// Combinators
//Id is the identity parser
func Id(s string, n *Node) (string, *Node) {
return s,n
}
//Opt optionally runs a Parser, returning the input node if it fails
func Opt(p Parser) Parser {
return func(s string, n *Node) (string, *Node) {
s2,n2 := p(s,n)
if n2 == nil {
return s,n
}
return s2,n2
}
}
//OneOf picks the first matching parser and returns its result
func OneOf(ps ...Parser) Parser {
dbg("OneOf(%p)\n",ps)
return func(s string, n *Node) (string, *Node) {
for _,p := range ps {
s2,n2 := p(s,n)
if n2 != nil {
return s2,n2
}
}
return s,nil
}
}
//Doesn't work? May have side effects that do not get unwound.
func Longest(ps ...Parser) Parser {
dbg("Longest(%p)\n",ps)
return func(s string, n *Node) (string, *Node) {
ss := make([]string,len(ps))
ns := make([]*Node,len(ps))
//An arbitrarily large number so I don't have to import "math"
minrem := 10000
mini := 0
for i,p := range ps {
ss[i],ns[i] = p(s,n)
if ns[i] != nil && len(ss[i]) < minrem {
minrem = len(ss[i])
mini = i
}
}
if minrem < 10000 {
return ss[mini],ns[mini]
}
return s,nil
}
}
//Seq applies parsers in sequence, adding results as children to the input
//node. Returns nil and the input string unless the entire sequence succeeds
func Seq(ps ...Parser) Parser {
dbg("Seq(%p)\n",ps)
return func(s string, n *Node) (string, *Node) {
ret := NewNode("Seq")
s2, n2 := s,n
for _,p := range ps {
s2, n2 = p(s2,ret)
if n2 == nil {
return s,nil
}
if n2 != ret {
dbg("Seq(%p): AddChild()\n",ps)
ret.AddChild(n2)
}
}
return s2,ret
}
}
func SeqC(ps ...Parser) Parser {
return Children(Seq(ps...))
}
//Like Seq but subsequent children are nested inside their earlier siblings.
func Nest(ps ...Parser) Parser {
dbg("Nest(%p)\n",ps)
return func(s string, n *Node) (string, *Node) {
s2,n2 := Seq(ps...)(s,n)
if n2 == nil {
return s,nil
}
ret := NewNode("Nest")
n3 := ret
for _,c := range n2.Children {
n3.AddChild(c)
n3 = c
}
return s2,ret
}
}
//ZeroOrMore returns a sequence of zero or more nodes
func ZeroOrMore(p Parser) Parser {
return func(s string, n *Node) (string, *Node) {
ret := NewNode("ZeroOrMore")
dbg("ZeroOrMore(%s %p) ret = %p\n",n.Kind,n,ret)
var s2 string
var n2 *Node
for s2,n2 = p(s,n); n2 != nil; s2,n2 = p(s2,n) {
dbg("ZeroOrMore(%p): AddChild()\n",p)
ret.AddChild(n2)
}
if len(ret.Children) > 0 {
return s2,ret
}
return s,n
}
}
func OneOrMore(p Parser) Parser {
return Seq(p,Children(ZeroOrMore(p)))
}
func Parenthesized(p Parser) Parser {
return Children(Seq(Lit("("),p,Lit(")")))
}
func Bracketed(p Parser) Parser {
return Seq(Lit("["),p,Lit("]"))
}
func AngBracketed(p Parser) Parser {
return Children(Seq(Lit("<"),p,Lit(">")))
}
func CurlyBracketed(p Parser) Parser {
return Children(Seq(Lit("{"),p,Lit("}")))
}
// Recognizers
func Word(f string) Parser {
return Lit(f,true)
}
func Lit(f string, ws ...bool) Parser {
word := false
if len(ws) > 0 {
word = ws[0]
}
lenf := len(f)
return func(s string, n *Node) (string, *Node) {
ret := NewNode("Lit",f)
dbg("Lit(%p) %s ret = %p\n",n,f,ret)
if len(s) < lenf {
return s,nil
}
if f == s[:lenf] && !(word && len(s) > lenf && wordchars.Match([]byte{s[lenf]})) {
adv := lenf
if loc := whitespace.FindStringIndex(s[lenf:]); loc != nil {
adv += loc[1]
}
return s[adv:],ret
}
return s,nil
}
}
func Regexp(f string) Parser {
f = "^" + f
r := regexp.MustCompile(f)
return func(s string, n *Node) (string, *Node) {
dbg("Regexp(%p) %s\n",n,f)
if loc := r.FindStringIndex(s); loc != nil {
lenf := loc[1]
adv := lenf
if loc := whitespace.FindStringIndex(s[lenf:]); loc != nil {
adv += loc[1]
}
ret := NewNode("Regexp",s[:lenf])
dbg("Regexp(%p): ret = %p (%s)\n",n,ret,s[:lenf])
return s[adv:],ret
}
return s,nil
}
}

208
types/ctypes.go Normal file
View File

@ -0,0 +1,208 @@
package types
// Parsers for recognizing type names in C/Objective-C
func TypeName(s string, n *Node) (string, *Node) {
return NodeNamed("TypeName",Seq(
SpecifierQualifierList,
Opt(AbstractDeclarator),
))(s,n)
}
func AbstractDeclarator(s string, n *Node) (string, *Node) {
return OneOf(SeqC(
Opt(Pointer),
Children(OneOrMore(DirectAbstractDeclarator))),
Pointer,
)(s,n)
}
func DirectAbstractDeclarator(s string, n *Node) (string, *Node) {
return OneOf(
ChildOf(NewNode("Parenthesized"),Parenthesized(AbstractDeclarator)),
NodeNamed("Array",Bracketed(Opt(TypeQualifierList))),
NodeNamed("Array",Bracketed(SeqC(Opt(TypeQualifierList),NodeNamed("Length",Regexp(`[\d]+|\*`))))), // NOTE: Does not allow arbitrary expressions
NodeNamed("Array",Bracketed(SeqC(Word("static"),Opt(TypeQualifierList),NodeNamed("Length",Regexp(`[\d]+`))))), // NOTE: Does not allow arbitrary expressions
NodeNamed("Array",Bracketed(SeqC(Opt(TypeQualifierList),Word("static"),NodeNamed("Length",Regexp(`[\d]+`))))), // NOTE: Does not allow arbitrary expressions
ChildOf(NewNode("Function"),Parenthesized(Opt(ParameterList))),
)(s,n)
}
func ParameterList(s string, n *Node) (string, *Node) {
return SeqC(
Opt(Children(OneOrMore(SeqC(ParameterDeclaration,Lit(","))))),
ParameterDeclaration,
)(s,n)
}
func ParameterDeclaration(s string, n *Node) (string, *Node) {
return ChildOf(NewNode("ParameterDeclaration"),OneOf(
NodeNamed("Ellipsis",Lit("...")),
SeqC(DeclarationSpecifiers,Declarator),
SeqC(DeclarationSpecifiers,Opt(AbstractDeclarator)),
))(s,n)
}
func DeclarationSpecifiers(s string, n *Node) (string, *Node) {
return OneOf(
SeqC(StorageClassSpecifier,Opt(DeclarationSpecifiers)),
SeqC(TypeSpecifier,Opt(DeclarationSpecifiers)),
SeqC(TypeQualifier,Opt(DeclarationSpecifiers)),
SeqC(Identifier,Opt(DeclarationSpecifiers)),
// SeqC(FunctionSpecifier,Opt(DeclarationSpecifiers)),
)(s,n)
}
func StorageClassSpecifier(s string, n *Node) (string, *Node) {
return NodeNamed("StorageClassSpecifier",OneOf(
Word("typedef"),
Word("extern"),
Word("static"),
Word("auto"),
Word("register"),
))(s,n)
}
func Declarator(s string, n *Node) (string, *Node) {
return NodeNamed("Declarator",
Seq(ZeroOrMore(Pointer), DirectDeclarator))(s,n)
}
func DirectDeclarator(s string, n *Node) (string, *Node) {
return NodeNamed("DirectDeclarator",
OneOf(
Identifier,
Parenthesized(Declarator),
// INCOMPLETE
),
)(s,n)
}
func NullableAnnotation(s string, n *Node) (string, *Node) {
return NodeNamed("NullableAnnotation",OneOf(
Word("_Nullable"),
Word("_Nonnull"),
Word("_Null_unspecified"),
))(s,n)
}
func Pointer(s string, n *Node) (string, *Node) {
return SeqC(
NodeNamed("Pointer",Lit("*")),
Opt(TypeQualifierList),
Opt(NullableAnnotation),
Opt(Pointer),
)(s,n)
}
func TypeQualifierList(s string, n *Node) (string, *Node) {
return NodeNamed("TypeQualifierList",
Children((OneOrMore(TypeQualifier))),
)(s,n)
}
func SpecifierQualifierList(s string, n *Node) (string, *Node) {
return NodeNamed("SpecifierQualifierList",
OneOf(
SeqC(TypeSpecifier,Opt(SpecifierQualifierList)),
SeqC(StructOrUnionSpecifier,Opt(SpecifierQualifierList)),
SeqC(TypedefName,Opt(SpecifierQualifierList)),
SeqC(TypeQualifier,Opt(SpecifierQualifierList)),
),
)(s,n)
// OneOrMore(OneOf(TypeQualifier,TypeSpecifier)))(s,n)
}
func TypeSpecifier(s string, n *Node) (string, *Node) {
return NodeNamed("TypeSpecifier",OneOf(
Word("void"),
Word("char"),
Word("short"),
Word("int"),
Word("long"),
Word("float"),
Word("double"),
Word("signed"),
Word("unsigned"),
Word("_Bool"),
Word("_Complex"),
//StructOrUnionSpecifier,
//EnumSpecifier,
//TypedefName,
))(s,n)
}
func TypeQualifier(s string, n *Node) (string, *Node) {
return NodeNamed("TypeQualifier",OneOf(
Word("const"),
Word("restrict"),
Word("volatile"),
))(s,n)
}
func StructOrUnionSpecifier(s string, n *Node) (string, *Node) {
return NodeNamed("StructOrUnionSpecifier",Children(OneOf(
Seq(StructOrUnion,Opt(Identifier),StructDeclarationList),
Nest(StructOrUnion,Identifier),
)))(s,n)
}
func StructOrUnion(s string, n *Node) (string, *Node) {
return OneOf(
NodeNamed("Struct",Word("struct")),
NodeNamed("Union",Word("union")))(s,n)
}
func StructDeclarationList(s string, n *Node) (string, *Node) {
return NodeNamed("StructDeclarationList",OneOrMore(StructDeclaration))(s,n)
}
func StructDeclaration(s string, n *Node) (string, *Node) {
return NodeNamed("StructDeclaration",Seq(
SpecifierQualifierList,
StructDeclaratorList,
Lit(";"),
))(s,n)
}
func StructDeclaratorList(s string, n *Node) (string, *Node) {
return NodeNamed("StructDeclaratorList",Seq(
Opt(OneOrMore(Seq(StructDeclarator,Lit(",")))),
StructDeclarator,
))(s,n)
}
func StructDeclarator(s string, n *Node) (string, *Node) {
return NodeNamed("StructDeclarator",Declarator)(s,n)
}
func Generic(s string, n *Node) (string, *Node) {
return NodeNamed("Generic",TypeName)(s,n)
}
func GenericList(s string, n *Node) (string, *Node) {
return OneOf(
SeqC(Generic,Lit(","),GenericList),
Generic,
)(s,n)
}
func TypedefName(s string, n *Node) (string, *Node) {
return NodeNamed("TypedefName",OneOf(
SeqC(NodeNamed("TypedefName",Identifier),AngBracketed(GenericList)),
Identifier,
))(s,n)
}
func Identifier(s string, n *Node) (string, *Node) {
s2,n2 := NodeNamed("Identifier",
Regexp(`[_a-zA-Z][_0-9a-zA-Z]*`))(s,n)
if n2 == nil {
return s,nil
}
if reservedwords.MatchString(n2.Content) {
dbg("Identifier '%s' contains reserved word\n",n2.Content)
return s,nil
}
return s2,n2
}

76
types/main.go Normal file
View File

@ -0,0 +1,76 @@
package types
import (
"fmt"
)
var (
Debug bool = false
)
func Parse(s string) *Node {
_, n2 := TypeName(s,NewNode("AST"))
return n2
}
//Evaluate a node to determine if it is a pointer or array
func (n *Node) isAbstract(k string) bool {
if n.stripAbstract(k) == nil {
return false
}
return true
}
//Strip one level of pointer or array indirection from a node
func (n *Node) stripAbstract(k string) *Node {
i := len(n.Children) - 1
if i < 1 {
return nil
}
ret := NewNode(n.Kind)
cs := n.Children[:]
fmt.Printf("stripAbstract(): i = %d\n",i)
//Scan backwords skipping NullableAnnotation tags
for ;i > 0 && cs[i].Kind == "NullableAnnotation"; i-- { }
if cs[i].Kind == k {
fmt.Printf("stripAbstract(): last node is %s\n",k)
ret.Children = cs[:i]
return ret
}
if i > 1 && cs[i-1].Kind == "Parenthesized" {
j := len(cs[i-1].Children) - 1
//Scan backwards skipping TypeQualifier tags
for ;j > 0 && cs[i-1].Children[j].Kind == "TypeQualifier"; j-- { }
if cs[i-1].Children[j].Kind != k {
return nil
}
if j == 0 { // strip Parenthesized tag
cs[i-1] = cs[i]
ret.Children = cs[:i]
return ret
}
// strip last child from Parenthesized tag
cs[i-1].Children = cs[i-1].Children[:j]
ret.Children = cs
return ret
}
return nil
}
//PointsTo, when called on a pointer node returns a node describing the type
//pointed to. Otherwise returns nil when called on non-pointer types.
func (n *Node) PointsTo() *Node {
fmt.Printf("PointsTo()\n")
return n.stripAbstract("Pointer")
}
//ArrayOf, when called on an array node returns a node describing the type
//of the elements of the array. Otherwise returns nil when called on
//non-array types.
func (n *Node) ArrayOf() *Node {
fmt.Printf("ArrayOf()\n")
return n.stripAbstract("Array")
}

69
types/node.go Normal file
View File

@ -0,0 +1,69 @@
package types
import (
"fmt"
"os"
"strings"
)
// Type definition and basic functions for Nodes
type Node struct {
Kind, Content string
Children []*Node
}
func NewNode(k string,cs ...string) *Node {
c := ""
if len(cs) > 0 {
c = cs[0]
}
ret := &Node{ Kind: k, Content: c, Children: []*Node{} }
dbg("NewNode(%p) %s\n",ret,ret.Kind)
return ret
}
func (n *Node) String(ls ...int) string {
if n == nil {
return ""
}
var ret strings.Builder
level := 0
if len(ls) > 0 {
level = ls[0]
if level > 100 {
fmt.Println("(*Node)String(): Recursion too deep")
os.Exit(-1)
}
}
prefix := strings.Repeat("-",level)
ret.WriteString(fmt.Sprintf("%s<%s> %p '%s'\n",prefix, n.Kind, n, n.Content))
for _,c := range n.Children {
ret.WriteString(c.String(level+1))
}
return ret.String()
}
func (n *Node) AddChild(c *Node) *Node {
erp := func(s string) {
dbg("(%p)AddChild(%p): %s\n",n,c,s)
os.Exit(-1)
}
if n == nil { erp("Called on nil node") }
if c == nil { erp("Child is nil") }
if n == c { erp("Node cannot be its own child") }
// Skip literals
if c.Kind == "Lit" { return n }
// Do we already have this child? (FIXME: Not needed?)
for _,d := range n.Children {
if c == d {
return n
}
}
dbg("(%p)AddChild(%p)\n",n,c)
n.Children = append(n.Children,c)
return n
}

View File

@ -65,11 +65,16 @@ var builtinTypes map[string]string = map[string]string{
"complex double": "C.complexdouble",
}*/
func (w *Wrapper) AddType(t,class string) {
func (w *Wrapper) AddType(t1,t2,class string) {
fmt.Printf("Type: %s\n",t1)
t := typeOrType2(t1,t2)
if _,ok := builtinTypes[t]; ok {
return
}
nt, err := goType(t,class)
if Debug {
fmt.Printf("AddType(): (%s) (%s) -> %s\n",t1,t2,nt)
}
if err != nil {
return
}
@ -264,7 +269,7 @@ func (w *Wrapper) add(name string, ns []ast.Node) {
Properties: map[string]Property{},
Methods: map[string]Method{},
}
w.AddType(name,name)
w.AddType(name,"",name)
}
var avail bool
for _,c := range ns {
@ -278,7 +283,7 @@ func (w *Wrapper) add(name string, ns []ast.Node) {
}
//_,avail = w.GetParms(x,name) // TODO
//if avail {
w.AddType(typeOrType2(x.Type,x.Type2),name)
w.AddType(x.Type,x.Type2,name)
i.Properties[p.Name] = p
//}
case *ast.ObjCMethodDecl:
@ -292,7 +297,7 @@ func (w *Wrapper) add(name string, ns []ast.Node) {
}
m.Parameters, avail = w.GetParms(x,name)
if avail {
w.AddType(typeOrType2(x.Type,x.Type2),name)
w.AddType(x.Type,x.Type2,name)
i.Methods[m.Name] = m
}
case *ast.ObjCProtocol:
@ -346,6 +351,7 @@ func (w *Wrapper) GetParms(n *ast.ObjCMethodDecl,class string) ([]Parameter,bool
if Debug { fmt.Printf("GetParms(): ast.Unknown: %s\n",x.Name) }
}
}
// check that the method is available for this OS and not deprecated
a := func() bool {
if len(avail) == 0 {
return true
@ -360,6 +366,7 @@ func (w *Wrapper) GetParms(n *ast.ObjCMethodDecl,class string) ([]Parameter,bool
if !a {
return nil, false
}
// check that we found the right number of parameters
if len(ret) != len(n.Parameters) {
fmt.Printf("Error in method declaration %s: Wrong number of ParmVarDecl children: %d parameters but %d ParmVarDecl children\n",n.Name,len(n.Parameters),len(ret))
}