From e7270e95cff478a9ebb4a26fc63f22722eb993a1 Mon Sep 17 00:00:00 2001 From: Greg Date: Thu, 18 Apr 2019 09:38:46 -0400 Subject: [PATCH] Add type parsing infrastructure. --- .gitignore | 1 - types/combinators.go | 291 +++++++++++++++++++++++++++++++++++++++++++ types/ctypes.go | 208 +++++++++++++++++++++++++++++++ types/main.go | 76 +++++++++++ types/node.go | 69 ++++++++++ wrap/main.go | 15 ++- 6 files changed, 655 insertions(+), 5 deletions(-) create mode 100644 types/combinators.go create mode 100644 types/ctypes.go create mode 100644 types/main.go create mode 100644 types/node.go diff --git a/.gitignore b/.gitignore index 1c61274..de2abf3 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,3 @@ ast.txt simple complex program -types diff --git a/types/combinators.go b/types/combinators.go new file mode 100644 index 0000000..e1a4729 --- /dev/null +++ b/types/combinators.go @@ -0,0 +1,291 @@ +package types + +import ( + "fmt" + "regexp" +) + +var ( + whitespace *regexp.Regexp + wordchars *regexp.Regexp + reservedwords *regexp.Regexp +) + +func init() { + whitespace = regexp.MustCompile(" *") + wordchars = regexp.MustCompile(`[_0-9a-zA-Z]`) + reservedwords = regexp.MustCompile("^(void|char|short|int|long|float|double|signed|unsigned|_Bool|_Complex|const|restrict|volatile|struct|union|enum)$") +} + +func dbg(f string, xs ...interface{}) { + if Debug { + fmt.Printf(f,xs...) + } +} + +type Parser func(string, *Node) (string, *Node) + +// Adders + +//Child takes a parser and adds its output node (if non-nil) to the tree. +//FIXME -- broken? +func Child(p Parser) Parser { + return func(s string, n *Node) (string, *Node) { + dbg("Child(%s %p)\n",n.Kind,n) + s2,n2 := p(s,n) + if n2 == nil { + return s,nil + } + if n2 != n { + dbg("Child(%p): AddChild()\n",p) + n.AddChild(n2) + } + return s2,n + } +} + +//ChildOf takes a node and adds results of a parser to it as a child +func ChildOf(ret *Node, p Parser) Parser { + return func(s string, n *Node) (string, *Node) { + dbg("ChildOf(%s %p) %s %p\n",ret.Kind,ret,n.Kind,n) + s2,n2 := p(s,ret) + if n2 == nil { + return s,nil + } + if n2 == ret { + dbg("ChildOf(ret = %p) n2 = %p. WHAT\n",ret,n2) + ret.Children = n2.Children + } else { + dbg("ChildOf(ret = %p) AddChild()\n",ret) + ret.AddChild(n2) + } + return s2,ret + } +} + +//Children takes a parser returns a parser that adds the children of its +//output node to the tree. If multiple parsers are passed in, they are +//passed to Seq(...) +func Children(ps ...Parser) Parser { + if len(ps) > 1 { + return Children(Seq(ps...)) + } + p := ps[0] + return func(s string, n *Node) (string, *Node) { + dbg("Children(%s %p)\n",n.Kind,n) + s2,n2 := p(s,n) + if n2 == nil { + return s,nil + } + for _,c := range n2.Children { + dbg("Children(%s %p) AddChild() from %p\n",n.Kind,n,n2) + if c != n { + n.AddChild(c) + } + } + return s2,n + } +} + +//ChildrenOf takes a node and adds the children of a parser's output node +//to it as its children. +func ChildrenOf(ret *Node, p Parser) Parser { + return func(s string, n *Node) (string, *Node) { + dbg("ChildrenOf(%s %p) %s %p\n",ret.Kind,ret,n.Kind,n) + return Children(p)(s,ret) + } +} + +func NodeNamed(k string, p Parser) Parser { + return func(s string, n *Node) (string, *Node) { + s2,n2 := p(s,n) + if n2 != nil { + n2.Kind = k + } + return s2,n2 + } +} + +// Combinators + +//Id is the identity parser +func Id(s string, n *Node) (string, *Node) { + return s,n +} + +//Opt optionally runs a Parser, returning the input node if it fails +func Opt(p Parser) Parser { + return func(s string, n *Node) (string, *Node) { + s2,n2 := p(s,n) + if n2 == nil { + return s,n + } + return s2,n2 + } +} + +//OneOf picks the first matching parser and returns its result +func OneOf(ps ...Parser) Parser { + dbg("OneOf(%p)\n",ps) + return func(s string, n *Node) (string, *Node) { + for _,p := range ps { + s2,n2 := p(s,n) + if n2 != nil { + return s2,n2 + } + } + return s,nil + } +} + +//Doesn't work? May have side effects that do not get unwound. +func Longest(ps ...Parser) Parser { + dbg("Longest(%p)\n",ps) + return func(s string, n *Node) (string, *Node) { + ss := make([]string,len(ps)) + ns := make([]*Node,len(ps)) + //An arbitrarily large number so I don't have to import "math" + minrem := 10000 + mini := 0 + for i,p := range ps { + ss[i],ns[i] = p(s,n) + if ns[i] != nil && len(ss[i]) < minrem { + minrem = len(ss[i]) + mini = i + } + } + if minrem < 10000 { + return ss[mini],ns[mini] + } + return s,nil + } +} + +//Seq applies parsers in sequence, adding results as children to the input +//node. Returns nil and the input string unless the entire sequence succeeds +func Seq(ps ...Parser) Parser { + dbg("Seq(%p)\n",ps) + return func(s string, n *Node) (string, *Node) { + ret := NewNode("Seq") + s2, n2 := s,n + for _,p := range ps { + s2, n2 = p(s2,ret) + if n2 == nil { + return s,nil + } + if n2 != ret { + dbg("Seq(%p): AddChild()\n",ps) + ret.AddChild(n2) + } + } + return s2,ret + } +} +func SeqC(ps ...Parser) Parser { + return Children(Seq(ps...)) +} + +//Like Seq but subsequent children are nested inside their earlier siblings. +func Nest(ps ...Parser) Parser { + dbg("Nest(%p)\n",ps) + return func(s string, n *Node) (string, *Node) { + s2,n2 := Seq(ps...)(s,n) + if n2 == nil { + return s,nil + } + ret := NewNode("Nest") + n3 := ret + for _,c := range n2.Children { + n3.AddChild(c) + n3 = c + } + return s2,ret + } +} + +//ZeroOrMore returns a sequence of zero or more nodes +func ZeroOrMore(p Parser) Parser { + return func(s string, n *Node) (string, *Node) { + ret := NewNode("ZeroOrMore") + dbg("ZeroOrMore(%s %p) ret = %p\n",n.Kind,n,ret) + var s2 string + var n2 *Node + for s2,n2 = p(s,n); n2 != nil; s2,n2 = p(s2,n) { + dbg("ZeroOrMore(%p): AddChild()\n",p) + ret.AddChild(n2) + } + if len(ret.Children) > 0 { + return s2,ret + } + return s,n + } +} + +func OneOrMore(p Parser) Parser { + return Seq(p,Children(ZeroOrMore(p))) +} + +func Parenthesized(p Parser) Parser { + return Children(Seq(Lit("("),p,Lit(")"))) +} + +func Bracketed(p Parser) Parser { + return Seq(Lit("["),p,Lit("]")) +} + +func AngBracketed(p Parser) Parser { + return Children(Seq(Lit("<"),p,Lit(">"))) +} + +func CurlyBracketed(p Parser) Parser { + return Children(Seq(Lit("{"),p,Lit("}"))) +} + +// Recognizers + +func Word(f string) Parser { + return Lit(f,true) +} + +func Lit(f string, ws ...bool) Parser { + word := false + if len(ws) > 0 { + word = ws[0] + } + lenf := len(f) + return func(s string, n *Node) (string, *Node) { + ret := NewNode("Lit",f) + dbg("Lit(%p) %s ret = %p\n",n,f,ret) + if len(s) < lenf { + return s,nil + } + if f == s[:lenf] && !(word && len(s) > lenf && wordchars.Match([]byte{s[lenf]})) { + adv := lenf + if loc := whitespace.FindStringIndex(s[lenf:]); loc != nil { + adv += loc[1] + } + return s[adv:],ret + } + return s,nil + } +} + +func Regexp(f string) Parser { + f = "^" + f + r := regexp.MustCompile(f) + return func(s string, n *Node) (string, *Node) { + dbg("Regexp(%p) %s\n",n,f) + if loc := r.FindStringIndex(s); loc != nil { + lenf := loc[1] + adv := lenf + if loc := whitespace.FindStringIndex(s[lenf:]); loc != nil { + adv += loc[1] + } + ret := NewNode("Regexp",s[:lenf]) + dbg("Regexp(%p): ret = %p (%s)\n",n,ret,s[:lenf]) + return s[adv:],ret + } + return s,nil + } +} + diff --git a/types/ctypes.go b/types/ctypes.go new file mode 100644 index 0000000..c3778a9 --- /dev/null +++ b/types/ctypes.go @@ -0,0 +1,208 @@ +package types + +// Parsers for recognizing type names in C/Objective-C + +func TypeName(s string, n *Node) (string, *Node) { + return NodeNamed("TypeName",Seq( + SpecifierQualifierList, + Opt(AbstractDeclarator), + ))(s,n) +} + +func AbstractDeclarator(s string, n *Node) (string, *Node) { + return OneOf(SeqC( + Opt(Pointer), + Children(OneOrMore(DirectAbstractDeclarator))), + Pointer, + )(s,n) +} + +func DirectAbstractDeclarator(s string, n *Node) (string, *Node) { + return OneOf( + ChildOf(NewNode("Parenthesized"),Parenthesized(AbstractDeclarator)), + NodeNamed("Array",Bracketed(Opt(TypeQualifierList))), + NodeNamed("Array",Bracketed(SeqC(Opt(TypeQualifierList),NodeNamed("Length",Regexp(`[\d]+|\*`))))), // NOTE: Does not allow arbitrary expressions + NodeNamed("Array",Bracketed(SeqC(Word("static"),Opt(TypeQualifierList),NodeNamed("Length",Regexp(`[\d]+`))))), // NOTE: Does not allow arbitrary expressions + NodeNamed("Array",Bracketed(SeqC(Opt(TypeQualifierList),Word("static"),NodeNamed("Length",Regexp(`[\d]+`))))), // NOTE: Does not allow arbitrary expressions + ChildOf(NewNode("Function"),Parenthesized(Opt(ParameterList))), + )(s,n) +} + +func ParameterList(s string, n *Node) (string, *Node) { + return SeqC( + Opt(Children(OneOrMore(SeqC(ParameterDeclaration,Lit(","))))), + ParameterDeclaration, + )(s,n) +} + +func ParameterDeclaration(s string, n *Node) (string, *Node) { + return ChildOf(NewNode("ParameterDeclaration"),OneOf( + NodeNamed("Ellipsis",Lit("...")), + SeqC(DeclarationSpecifiers,Declarator), + SeqC(DeclarationSpecifiers,Opt(AbstractDeclarator)), + ))(s,n) +} + +func DeclarationSpecifiers(s string, n *Node) (string, *Node) { + return OneOf( + SeqC(StorageClassSpecifier,Opt(DeclarationSpecifiers)), + SeqC(TypeSpecifier,Opt(DeclarationSpecifiers)), + SeqC(TypeQualifier,Opt(DeclarationSpecifiers)), + SeqC(Identifier,Opt(DeclarationSpecifiers)), + // SeqC(FunctionSpecifier,Opt(DeclarationSpecifiers)), + )(s,n) +} + +func StorageClassSpecifier(s string, n *Node) (string, *Node) { + return NodeNamed("StorageClassSpecifier",OneOf( + Word("typedef"), + Word("extern"), + Word("static"), + Word("auto"), + Word("register"), + ))(s,n) +} + +func Declarator(s string, n *Node) (string, *Node) { + return NodeNamed("Declarator", + Seq(ZeroOrMore(Pointer), DirectDeclarator))(s,n) +} + +func DirectDeclarator(s string, n *Node) (string, *Node) { + return NodeNamed("DirectDeclarator", + OneOf( + Identifier, + Parenthesized(Declarator), + // INCOMPLETE + ), + )(s,n) +} + +func NullableAnnotation(s string, n *Node) (string, *Node) { + return NodeNamed("NullableAnnotation",OneOf( + Word("_Nullable"), + Word("_Nonnull"), + Word("_Null_unspecified"), + ))(s,n) +} +func Pointer(s string, n *Node) (string, *Node) { + return SeqC( + NodeNamed("Pointer",Lit("*")), + Opt(TypeQualifierList), + Opt(NullableAnnotation), + Opt(Pointer), + )(s,n) +} + +func TypeQualifierList(s string, n *Node) (string, *Node) { + return NodeNamed("TypeQualifierList", + Children((OneOrMore(TypeQualifier))), + )(s,n) +} + +func SpecifierQualifierList(s string, n *Node) (string, *Node) { + return NodeNamed("SpecifierQualifierList", + OneOf( + SeqC(TypeSpecifier,Opt(SpecifierQualifierList)), + SeqC(StructOrUnionSpecifier,Opt(SpecifierQualifierList)), + SeqC(TypedefName,Opt(SpecifierQualifierList)), + SeqC(TypeQualifier,Opt(SpecifierQualifierList)), + ), + )(s,n) + // OneOrMore(OneOf(TypeQualifier,TypeSpecifier)))(s,n) +} + +func TypeSpecifier(s string, n *Node) (string, *Node) { + return NodeNamed("TypeSpecifier",OneOf( + Word("void"), + Word("char"), + Word("short"), + Word("int"), + Word("long"), + Word("float"), + Word("double"), + Word("signed"), + Word("unsigned"), + Word("_Bool"), + Word("_Complex"), + //StructOrUnionSpecifier, + //EnumSpecifier, + //TypedefName, + ))(s,n) +} + +func TypeQualifier(s string, n *Node) (string, *Node) { + return NodeNamed("TypeQualifier",OneOf( + Word("const"), + Word("restrict"), + Word("volatile"), + ))(s,n) +} + +func StructOrUnionSpecifier(s string, n *Node) (string, *Node) { + return NodeNamed("StructOrUnionSpecifier",Children(OneOf( + Seq(StructOrUnion,Opt(Identifier),StructDeclarationList), + Nest(StructOrUnion,Identifier), + )))(s,n) +} + +func StructOrUnion(s string, n *Node) (string, *Node) { + return OneOf( + NodeNamed("Struct",Word("struct")), + NodeNamed("Union",Word("union")))(s,n) +} + +func StructDeclarationList(s string, n *Node) (string, *Node) { + return NodeNamed("StructDeclarationList",OneOrMore(StructDeclaration))(s,n) +} + +func StructDeclaration(s string, n *Node) (string, *Node) { + return NodeNamed("StructDeclaration",Seq( + SpecifierQualifierList, + StructDeclaratorList, + Lit(";"), + ))(s,n) +} + +func StructDeclaratorList(s string, n *Node) (string, *Node) { + return NodeNamed("StructDeclaratorList",Seq( + Opt(OneOrMore(Seq(StructDeclarator,Lit(",")))), + StructDeclarator, + ))(s,n) +} + +func StructDeclarator(s string, n *Node) (string, *Node) { + return NodeNamed("StructDeclarator",Declarator)(s,n) +} + +func Generic(s string, n *Node) (string, *Node) { + return NodeNamed("Generic",TypeName)(s,n) +} + +func GenericList(s string, n *Node) (string, *Node) { + return OneOf( + SeqC(Generic,Lit(","),GenericList), + Generic, + )(s,n) +} + +func TypedefName(s string, n *Node) (string, *Node) { + return NodeNamed("TypedefName",OneOf( + SeqC(NodeNamed("TypedefName",Identifier),AngBracketed(GenericList)), + Identifier, + ))(s,n) +} + +func Identifier(s string, n *Node) (string, *Node) { + s2,n2 := NodeNamed("Identifier", + Regexp(`[_a-zA-Z][_0-9a-zA-Z]*`))(s,n) + if n2 == nil { + return s,nil + } + if reservedwords.MatchString(n2.Content) { + dbg("Identifier '%s' contains reserved word\n",n2.Content) + return s,nil + } + return s2,n2 +} + diff --git a/types/main.go b/types/main.go new file mode 100644 index 0000000..33bbb99 --- /dev/null +++ b/types/main.go @@ -0,0 +1,76 @@ +package types + +import ( + "fmt" +) + +var ( + Debug bool = false +) + +func Parse(s string) *Node { + _, n2 := TypeName(s,NewNode("AST")) + return n2 +} + +//Evaluate a node to determine if it is a pointer or array +func (n *Node) isAbstract(k string) bool { + if n.stripAbstract(k) == nil { + return false + } + return true +} + +//Strip one level of pointer or array indirection from a node +func (n *Node) stripAbstract(k string) *Node { + i := len(n.Children) - 1 + if i < 1 { + return nil + } + ret := NewNode(n.Kind) + cs := n.Children[:] + + fmt.Printf("stripAbstract(): i = %d\n",i) + //Scan backwords skipping NullableAnnotation tags + for ;i > 0 && cs[i].Kind == "NullableAnnotation"; i-- { } + + if cs[i].Kind == k { + fmt.Printf("stripAbstract(): last node is %s\n",k) + ret.Children = cs[:i] + return ret + } + if i > 1 && cs[i-1].Kind == "Parenthesized" { + j := len(cs[i-1].Children) - 1 + //Scan backwards skipping TypeQualifier tags + for ;j > 0 && cs[i-1].Children[j].Kind == "TypeQualifier"; j-- { } + if cs[i-1].Children[j].Kind != k { + return nil + } + if j == 0 { // strip Parenthesized tag + cs[i-1] = cs[i] + ret.Children = cs[:i] + return ret + } + // strip last child from Parenthesized tag + cs[i-1].Children = cs[i-1].Children[:j] + ret.Children = cs + return ret + } + return nil +} + +//PointsTo, when called on a pointer node returns a node describing the type +//pointed to. Otherwise returns nil when called on non-pointer types. +func (n *Node) PointsTo() *Node { + fmt.Printf("PointsTo()\n") + return n.stripAbstract("Pointer") +} + +//ArrayOf, when called on an array node returns a node describing the type +//of the elements of the array. Otherwise returns nil when called on +//non-array types. +func (n *Node) ArrayOf() *Node { + fmt.Printf("ArrayOf()\n") + return n.stripAbstract("Array") +} + diff --git a/types/node.go b/types/node.go new file mode 100644 index 0000000..c0522d8 --- /dev/null +++ b/types/node.go @@ -0,0 +1,69 @@ +package types + +import ( + "fmt" + "os" + "strings" +) + +// Type definition and basic functions for Nodes + +type Node struct { + Kind, Content string + Children []*Node +} + +func NewNode(k string,cs ...string) *Node { + c := "" + if len(cs) > 0 { + c = cs[0] + } + ret := &Node{ Kind: k, Content: c, Children: []*Node{} } + dbg("NewNode(%p) %s\n",ret,ret.Kind) + return ret +} + +func (n *Node) String(ls ...int) string { + if n == nil { + return "" + } + var ret strings.Builder + level := 0 + if len(ls) > 0 { + level = ls[0] + if level > 100 { + fmt.Println("(*Node)String(): Recursion too deep") + os.Exit(-1) + } + } + prefix := strings.Repeat("-",level) + ret.WriteString(fmt.Sprintf("%s<%s> %p '%s'\n",prefix, n.Kind, n, n.Content)) + for _,c := range n.Children { + ret.WriteString(c.String(level+1)) + } + return ret.String() +} + +func (n *Node) AddChild(c *Node) *Node { + erp := func(s string) { + dbg("(%p)AddChild(%p): %s\n",n,c,s) + os.Exit(-1) + } + if n == nil { erp("Called on nil node") } + if c == nil { erp("Child is nil") } + if n == c { erp("Node cannot be its own child") } + + // Skip literals + if c.Kind == "Lit" { return n } + + // Do we already have this child? (FIXME: Not needed?) + for _,d := range n.Children { + if c == d { + return n + } + } + dbg("(%p)AddChild(%p)\n",n,c) + n.Children = append(n.Children,c) + return n +} + diff --git a/wrap/main.go b/wrap/main.go index 8dd78ee..84c429b 100644 --- a/wrap/main.go +++ b/wrap/main.go @@ -65,11 +65,16 @@ var builtinTypes map[string]string = map[string]string{ "complex double": "C.complexdouble", }*/ -func (w *Wrapper) AddType(t,class string) { +func (w *Wrapper) AddType(t1,t2,class string) { + fmt.Printf("Type: %s\n",t1) + t := typeOrType2(t1,t2) if _,ok := builtinTypes[t]; ok { return } nt, err := goType(t,class) + if Debug { + fmt.Printf("AddType(): (%s) (%s) -> %s\n",t1,t2,nt) + } if err != nil { return } @@ -264,7 +269,7 @@ func (w *Wrapper) add(name string, ns []ast.Node) { Properties: map[string]Property{}, Methods: map[string]Method{}, } - w.AddType(name,name) + w.AddType(name,"",name) } var avail bool for _,c := range ns { @@ -278,7 +283,7 @@ func (w *Wrapper) add(name string, ns []ast.Node) { } //_,avail = w.GetParms(x,name) // TODO //if avail { - w.AddType(typeOrType2(x.Type,x.Type2),name) + w.AddType(x.Type,x.Type2,name) i.Properties[p.Name] = p //} case *ast.ObjCMethodDecl: @@ -292,7 +297,7 @@ func (w *Wrapper) add(name string, ns []ast.Node) { } m.Parameters, avail = w.GetParms(x,name) if avail { - w.AddType(typeOrType2(x.Type,x.Type2),name) + w.AddType(x.Type,x.Type2,name) i.Methods[m.Name] = m } case *ast.ObjCProtocol: @@ -346,6 +351,7 @@ func (w *Wrapper) GetParms(n *ast.ObjCMethodDecl,class string) ([]Parameter,bool if Debug { fmt.Printf("GetParms(): ast.Unknown: %s\n",x.Name) } } } + // check that the method is available for this OS and not deprecated a := func() bool { if len(avail) == 0 { return true @@ -360,6 +366,7 @@ func (w *Wrapper) GetParms(n *ast.ObjCMethodDecl,class string) ([]Parameter,bool if !a { return nil, false } + // check that we found the right number of parameters if len(ret) != len(n.Parameters) { fmt.Printf("Error in method declaration %s: Wrong number of ParmVarDecl children: %d parameters but %d ParmVarDecl children\n",n.Name,len(n.Parameters),len(ret)) }