// Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package pointer // This file defines the main datatypes and Analyze function of the pointer analysis. import ( "fmt" "go/token" "io" "os" "reflect" "runtime" "runtime/debug" "sort" "golang.org/x/tools/go/callgraph" "golang.org/x/tools/go/ssa" "golang.org/x/tools/go/types" "golang.org/x/tools/go/types/typeutil" ) const ( // optimization options; enable all when committing optRenumber = true // enable renumbering optimization (makes logs hard to read) optHVN = true // enable pointer equivalence via Hash-Value Numbering // debugging options; disable all when committing debugHVN = false // enable assertions in HVN debugHVNVerbose = false // enable extra HVN logging debugHVNCrossCheck = false // run solver with/without HVN and compare (caveats below) debugTimers = false // show running time of each phase ) // object.flags bitmask values. const ( otTagged = 1 << iota // type-tagged object otIndirect // type-tagged object with indirect payload otFunction // function object ) // An object represents a contiguous block of memory to which some // (generalized) pointer may point. // // (Note: most variables called 'obj' are not *objects but nodeids // such that a.nodes[obj].obj != nil.) // type object struct { // flags is a bitset of the node type (ot*) flags defined above. flags uint32 // Number of following nodes belonging to the same "object" // allocation. Zero for all other nodes. size uint32 // data describes this object; it has one of these types: // // ssa.Value for an object allocated by an SSA operation. // types.Type for an rtype instance object or *rtype-tagged object. // string for an instrinsic object, e.g. the array behind os.Args. // nil for an object allocated by an instrinsic. // (cgn provides the identity of the intrinsic.) data interface{} // The call-graph node (=context) in which this object was allocated. // May be nil for global objects: Global, Const, some Functions. cgn *cgnode } // nodeid denotes a node. // It is an index within analysis.nodes. // We use small integers, not *node pointers, for many reasons: // - they are smaller on 64-bit systems. // - sets of them can be represented compactly in bitvectors or BDDs. // - order matters; a field offset can be computed by simple addition. type nodeid uint32 // A node is an equivalence class of memory locations. // Nodes may be pointers, pointed-to locations, neither, or both. // // Nodes that are pointed-to locations ("labels") have an enclosing // object (see analysis.enclosingObject). // type node struct { // If non-nil, this node is the start of an object // (addressable memory location). // The following obj.size nodes implicitly belong to the object; // they locate their object by scanning back. obj *object // The type of the field denoted by this node. Non-aggregate, // unless this is an tagged.T node (i.e. the thing // pointed to by an interface) in which case typ is that type. typ types.Type // subelement indicates which directly embedded subelement of // an object of aggregate type (struct, tuple, array) this is. subelement *fieldInfo // e.g. ".a.b[*].c" // Solver state for the canonical node of this pointer- // equivalence class. Each node is created with its own state // but they become shared after HVN. solve *solverState } // An analysis instance holds the state of a single pointer analysis problem. type analysis struct { config *Config // the client's control/observer interface prog *ssa.Program // the program being analyzed log io.Writer // log stream; nil to disable panicNode nodeid // sink for panic, source for recover nodes []*node // indexed by nodeid flattenMemo map[types.Type][]*fieldInfo // memoization of flatten() trackTypes map[types.Type]bool // memoization of shouldTrack() constraints []constraint // set of constraints cgnodes []*cgnode // all cgnodes genq []*cgnode // queue of functions to generate constraints for intrinsics map[*ssa.Function]intrinsic // non-nil values are summaries for intrinsic fns globalval map[ssa.Value]nodeid // node for each global ssa.Value globalobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton localval map[ssa.Value]nodeid // node for each local ssa.Value localobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton atFuncs map[*ssa.Function]bool // address-taken functions (for presolver) mapValues []nodeid // values of makemap objects (indirect in HVN) work nodeset // solver's worklist result *Result // results of the analysis track track // pointerlike types whose aliasing we track deltaSpace []int // working space for iterating over PTS deltas // Reflection & intrinsics: hasher typeutil.Hasher // cache of type hashes reflectValueObj types.Object // type symbol for reflect.Value (if present) reflectValueCall *ssa.Function // (reflect.Value).Call reflectRtypeObj types.Object // *types.TypeName for reflect.rtype (if present) reflectRtypePtr *types.Pointer // *reflect.rtype reflectType *types.Named // reflect.Type rtypes typeutil.Map // nodeid of canonical *rtype-tagged object for type T reflectZeros typeutil.Map // nodeid of canonical T-tagged object for zero value runtimeSetFinalizer *ssa.Function // runtime.SetFinalizer } // enclosingObj returns the first node of the addressable memory // object that encloses node id. Panic ensues if that node does not // belong to any object. func (a *analysis) enclosingObj(id nodeid) nodeid { // Find previous node with obj != nil. for i := id; i >= 0; i-- { n := a.nodes[i] if obj := n.obj; obj != nil { if i+nodeid(obj.size) <= id { break // out of bounds } return i } } panic("node has no enclosing object") } // labelFor returns the Label for node id. // Panic ensues if that node is not addressable. func (a *analysis) labelFor(id nodeid) *Label { return &Label{ obj: a.nodes[a.enclosingObj(id)].obj, subelement: a.nodes[id].subelement, } } func (a *analysis) warnf(pos token.Pos, format string, args ...interface{}) { msg := fmt.Sprintf(format, args...) if a.log != nil { fmt.Fprintf(a.log, "%s: warning: %s\n", a.prog.Fset.Position(pos), msg) } a.result.Warnings = append(a.result.Warnings, Warning{pos, msg}) } // computeTrackBits sets a.track to the necessary 'track' bits for the pointer queries. func (a *analysis) computeTrackBits() { var queryTypes []types.Type for v := range a.config.Queries { queryTypes = append(queryTypes, v.Type()) } for v := range a.config.IndirectQueries { queryTypes = append(queryTypes, mustDeref(v.Type())) } for _, t := range queryTypes { switch t.Underlying().(type) { case *types.Chan: a.track |= trackChan case *types.Map: a.track |= trackMap case *types.Pointer: a.track |= trackPtr case *types.Slice: a.track |= trackSlice case *types.Interface: a.track = trackAll return } if rVObj := a.reflectValueObj; rVObj != nil && types.Identical(t, rVObj.Type()) { a.track = trackAll return } } } // Analyze runs the pointer analysis with the scope and options // specified by config, and returns the (synthetic) root of the callgraph. // // Pointer analysis of a transitively closed well-typed program should // always succeed. An error can occur only due to an internal bug. // func Analyze(config *Config) (result *Result, err error) { if config.Mains == nil { return nil, fmt.Errorf("no main/test packages to analyze (check $GOROOT/$GOPATH)") } defer func() { if p := recover(); p != nil { err = fmt.Errorf("internal error in pointer analysis: %v (please report this bug)", p) fmt.Fprintln(os.Stderr, "Internal panic in pointer analysis:") debug.PrintStack() } }() a := &analysis{ config: config, log: config.Log, prog: config.prog(), globalval: make(map[ssa.Value]nodeid), globalobj: make(map[ssa.Value]nodeid), flattenMemo: make(map[types.Type][]*fieldInfo), trackTypes: make(map[types.Type]bool), atFuncs: make(map[*ssa.Function]bool), hasher: typeutil.MakeHasher(), intrinsics: make(map[*ssa.Function]intrinsic), result: &Result{ Queries: make(map[ssa.Value]Pointer), IndirectQueries: make(map[ssa.Value]Pointer), }, deltaSpace: make([]int, 0, 100), } if false { a.log = os.Stderr // for debugging crashes; extremely verbose } if a.log != nil { fmt.Fprintln(a.log, "==== Starting analysis") } // Pointer analysis requires a complete program for soundness. // Check to prevent accidental misconfiguration. for _, pkg := range a.prog.AllPackages() { // (This only checks that the package scope is complete, // not that func bodies exist, but it's a good signal.) if !pkg.Object.Complete() { return nil, fmt.Errorf(`pointer analysis requires a complete program yet package %q was incomplete`, pkg.Object.Path()) } } if reflect := a.prog.ImportedPackage("reflect"); reflect != nil { rV := reflect.Object.Scope().Lookup("Value") a.reflectValueObj = rV a.reflectValueCall = a.prog.LookupMethod(rV.Type(), nil, "Call") a.reflectType = reflect.Object.Scope().Lookup("Type").Type().(*types.Named) a.reflectRtypeObj = reflect.Object.Scope().Lookup("rtype") a.reflectRtypePtr = types.NewPointer(a.reflectRtypeObj.Type()) // Override flattening of reflect.Value, treating it like a basic type. tReflectValue := a.reflectValueObj.Type() a.flattenMemo[tReflectValue] = []*fieldInfo{{typ: tReflectValue}} // Override shouldTrack of reflect.Value and *reflect.rtype. // Always track pointers of these types. a.trackTypes[tReflectValue] = true a.trackTypes[a.reflectRtypePtr] = true a.rtypes.SetHasher(a.hasher) a.reflectZeros.SetHasher(a.hasher) } if runtime := a.prog.ImportedPackage("runtime"); runtime != nil { a.runtimeSetFinalizer = runtime.Func("SetFinalizer") } a.computeTrackBits() a.generate() a.showCounts() if optRenumber { a.renumber() } N := len(a.nodes) // excludes solver-created nodes if optHVN { if debugHVNCrossCheck { // Cross-check: run the solver once without // optimization, once with, and compare the // solutions. savedConstraints := a.constraints a.solve() a.dumpSolution("A.pts", N) // Restore. a.constraints = savedConstraints for _, n := range a.nodes { n.solve = new(solverState) } a.nodes = a.nodes[:N] // rtypes is effectively part of the solver state. a.rtypes = typeutil.Map{} a.rtypes.SetHasher(a.hasher) } a.hvn() } if debugHVNCrossCheck { runtime.GC() runtime.GC() } a.solve() // Compare solutions. if optHVN && debugHVNCrossCheck { a.dumpSolution("B.pts", N) if !diff("A.pts", "B.pts") { return nil, fmt.Errorf("internal error: optimization changed solution") } } // Create callgraph.Nodes in deterministic order. if cg := a.result.CallGraph; cg != nil { for _, caller := range a.cgnodes { cg.CreateNode(caller.fn) } } // Add dynamic edges to call graph. var space [100]int for _, caller := range a.cgnodes { for _, site := range caller.sites { for _, callee := range a.nodes[site.targets].solve.pts.AppendTo(space[:0]) { a.callEdge(caller, site, nodeid(callee)) } } } return a.result, nil } // callEdge is called for each edge in the callgraph. // calleeid is the callee's object node (has otFunction flag). // func (a *analysis) callEdge(caller *cgnode, site *callsite, calleeid nodeid) { obj := a.nodes[calleeid].obj if obj.flags&otFunction == 0 { panic(fmt.Sprintf("callEdge %s -> n%d: not a function object", site, calleeid)) } callee := obj.cgn if cg := a.result.CallGraph; cg != nil { // TODO(adonovan): opt: I would expect duplicate edges // (to wrappers) to arise due to the elimination of // context information, but I haven't observed any. // Understand this better. callgraph.AddEdge(cg.CreateNode(caller.fn), site.instr, cg.CreateNode(callee.fn)) } if a.log != nil { fmt.Fprintf(a.log, "\tcall edge %s -> %s\n", site, callee) } // Warn about calls to non-intrinsic external functions. // TODO(adonovan): de-dup these messages. if fn := callee.fn; fn.Blocks == nil && a.findIntrinsic(fn) == nil { a.warnf(site.pos(), "unsound call to unknown intrinsic: %s", fn) a.warnf(fn.Pos(), " (declared here)") } } // dumpSolution writes the PTS solution to the specified file. // // It only dumps the nodes that existed before solving. The order in // which solver-created nodes are created depends on pre-solver // optimization, so we can't include them in the cross-check. // func (a *analysis) dumpSolution(filename string, N int) { f, err := os.Create(filename) if err != nil { panic(err) } for id, n := range a.nodes[:N] { if _, err := fmt.Fprintf(f, "pts(n%d) = {", id); err != nil { panic(err) } var sep string for _, l := range n.solve.pts.AppendTo(a.deltaSpace) { if l >= N { break } fmt.Fprintf(f, "%s%d", sep, l) sep = " " } fmt.Fprintf(f, "} : %s\n", n.typ) } if err := f.Close(); err != nil { panic(err) } } // showCounts logs the size of the constraint system. A typical // optimized distribution is 65% copy, 13% load, 11% addr, 5% // offsetAddr, 4% store, 2% others. // func (a *analysis) showCounts() { if a.log != nil { counts := make(map[reflect.Type]int) for _, c := range a.constraints { counts[reflect.TypeOf(c)]++ } fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints)) var lines []string for t, n := range counts { line := fmt.Sprintf("%7d (%2d%%)\t%s", n, 100*n/len(a.constraints), t) lines = append(lines, line) } sort.Sort(sort.Reverse(sort.StringSlice(lines))) for _, line := range lines { fmt.Fprintf(a.log, "\t%s\n", line) } fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes)) // Show number of pointer equivalence classes. m := make(map[*solverState]bool) for _, n := range a.nodes { m[n.solve] = true } fmt.Fprintf(a.log, "# ptsets:\t%d\n", len(m)) } }