erigon-pulse/core/vm/absint_stackset.go
Suhabe Bugrara 8b31944611
Control Flow Analysis (#990)
* First

* More on SA interpreter

* Fixup

* Add cfg action to hack binary that invokes the SaInterpreter. Added an operation handler for PUSH1

* refactor cfg tests into separate file

* Move cfg tests into separate file

* More refactoring into new file

* dataflow interpreter

* work on cfg0

* finish cfg0

* df works on base examples

* refactor into dataflow spec

* add bounded stack

* add harder example

* fix switch pass thru

* fix switch pass thru

* bug fix, and better printing

* manual merge

* restore call to test gencfg

* abstract interpretation based cfg analysis

* fix post signature

* use uint256 instead uint64, add post function

* preprocess stmts

* initial implementation of resolve

* fix resolve

* fix resolve

* print stmts for edges

* print stmts for edges

* print states

* print states

* bug fixes, debugging

* fix jumpi dest - first working impl

* reachability analysis to filter out dead edges

* add all transfer functions

* larger contract bytecodes from solc compiler

* simple solidity contract goes thru

* add deposit contract bytecode

* rename deposit contract test

* fix new contract arg

* Address non-determinism leading to imprecise results

* improve debugging output

* improve debugging output

* improve debugging output

* fix for bug causing incorrect analysis results

* fix for bug causing incorrect analysis results

* fix for bug causing incorrect analysis results

* add more test cases

* fix coverage bug

* debugging for non-termination

* fix bad fixpoint check

* fix data inference

* fix transfer function for halting stmts

* switch to deposit contract test, disable debugging

* add anly counter to viz, fix stmt.valid check

* show all preds, adjust anlycounter behavior

* dfs instead of bfs to fail earlier

* viz improvements

* add worklist size to viz

* add test case for private functions

* valueset analysis

* add more checks to fail earlier in the analysis to help debugging, improve debugging output, catch additional bad jumps

* delete old code

* delete old code

* delete old code

* fix up minor changes to jump table

* copy over comments from cgf-1 branch

* remove minor diffs

* add recompiled deposit contract

* graph viz

* cleanup/refactoring

* initial impl of viz

* script to run cfg anly and generate dot file

* div example

* accept bytecode from cmd line

* add minimal deposit contract example

* replace valueset analysis with stackset analysis

* get in sync with master

* sync with master

* fix linting

* fix linting

* fix linting

* reformatting

* fix linting

* fix linting

* fix linting

* fix linting

* fix linting

* fix linting

* fix linting

Co-authored-by: Alexey Akhunov <akhounov@gmail.com>
2020-08-28 07:26:49 +01:00

806 lines
18 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package vm
import (
"bufio"
"errors"
"fmt"
"github.com/emicklei/dot"
"github.com/holiman/uint256"
"github.com/logrusorgru/aurora"
"os"
"strconv"
"strings"
)
//////////////////////////////////////////////////
type AbsValueKind int
const (
absStackLen = 64
)
var DEBUG = false
var StopOnError = true
//////////////////////////
// stmt is the representation of an executable instruction - extension of an opcode
type astmt struct {
inferredAsData bool
ends bool
isBlockEntry bool
isBlockExit bool
opcode OpCode
operation *operation
pc int
numBytes int
value uint256.Int
}
func (stmt *astmt) String() string {
ends := ""
if stmt.ends {
ends = "ends"
}
return fmt.Sprintf("%v %v %v", stmt.opcode, ends, stmt.operation.isPush)
}
type program struct {
contract *Contract
stmts []*astmt
blocks []*block
entry2block map[int]*block
exit2block map[int]*block
}
func toProgram(contract *Contract) *program {
jt := newIstanbulInstructionSet()
program := &program{contract: contract}
codeLen := len(contract.Code)
inferIsData := make(map[int]bool)
for pc := 0; pc < codeLen; pc++ {
stmt := astmt{}
stmt.pc = pc
stmt.inferredAsData = inferIsData[pc]
op := contract.GetOp(uint64(pc))
stmt.opcode = op
stmt.operation = jt[op]
stmt.ends = stmt.operation == nil || stmt.operation.halts || stmt.operation.reverts
//fmt.Printf("%v %v %v", pc, stmt.opcode, stmt.operation.valid)
if op.IsPush() {
pushByteSize := stmt.operation.opNum
startMin := pc + 1
if startMin >= codeLen {
startMin = codeLen
}
endMin := startMin + pushByteSize
if startMin+pushByteSize >= codeLen {
endMin = codeLen
}
integer := new(uint256.Int)
integer.SetBytes(contract.Code[startMin:endMin])
stmt.value = *integer
stmt.numBytes = pushByteSize + 1
if !stmt.inferredAsData {
for datapc := startMin; datapc < endMin; datapc++ {
inferIsData[datapc] = true
}
}
} else {
stmt.numBytes = 1
}
program.stmts = append(program.stmts, &stmt)
if pc != len(program.stmts)-1 {
panic("Invalid length")
}
}
for pc, stmt := range program.stmts {
if !stmt.inferredAsData {
if pc == 0 {
stmt.isBlockEntry = true
} else if stmt.opcode == JUMPDEST {
stmt.isBlockEntry = true
} else if stmt.opcode == JUMPI && pc < len(program.stmts)-1 {
entry := program.stmts[pc+1]
entry.isBlockEntry = true
}
if pc == len(program.stmts)-1 {
stmt.isBlockExit = true
} else if stmt.opcode == JUMP || stmt.opcode == JUMPI {
stmt.isBlockExit = true
}
}
}
program.entry2block = make(map[int]*block)
program.exit2block = make(map[int]*block)
for entrypc, entry := range program.stmts {
if entry.isBlockEntry {
block := &block{entrypc: entrypc, stmts: make([]*astmt, 0)}
program.blocks = append(program.blocks, block)
for i := entrypc; i < len(program.stmts); i++ {
block.stmts = append(block.stmts, program.stmts[i])
if program.stmts[i].isBlockExit {
break
}
}
if len(block.stmts) > 0 {
block.exitpc = block.stmts[len(block.stmts)-1].pc
}
program.entry2block[block.entrypc] = block
program.exit2block[block.exitpc] = block
}
}
return program
}
/*
func printStmts(stmts []*astmt) {
for i, stmt := range stmts {
fmt.Printf("%v %v\n", i, stmt)
}
}*/
////////////////////////
type edge struct {
pc0 int
stmt *astmt
pc1 int
isJump bool
}
func (e edge) String() string {
return fmt.Sprintf("%v %v %v", e.pc0, e.pc1, e.stmt.opcode)
}
/*
func reverseSortEdges(edges []edge) {
sort.SliceStable(edges, func(i, j int) bool {
return edges[i].pc0 > edges[j].pc0
})
}
func sortEdges(edges []edge) {
sort.SliceStable(edges, func(i, j int) bool {
return edges[i].pc0 < edges[j].pc0
})
}*/
func printEdges(edges []edge) {
for _, edge := range edges {
fmt.Printf("%v\n", edge)
}
}
func getEntryReachableEdges(entry int, edges []edge) []edge {
pc2edges := make(map[int][]edge)
for _, e := range edges {
l := pc2edges[e.pc0]
l = append(l, e)
pc2edges[e.pc0] = l
}
workList := []int{entry}
visited := make(map[int]bool)
visited[entry] = true
for len(workList) > 0 {
var pc int
pc, workList = workList[0], workList[1:]
for _, edge := range pc2edges[pc] {
if !visited[edge.pc1] {
visited[edge.pc1] = true
workList = append(workList, edge.pc1)
}
}
}
var reachable []edge
for pc, exists := range visited {
if exists {
reachable = append(reachable, pc2edges[pc]...)
}
}
return reachable
}
////////////////////////
const (
BotValue AbsValueKind = iota
TopValue
ConcreteValue
)
func (d AbsValueKind) String() string {
return [...]string{"⊥", "", "AbsValue"}[d]
}
//////////////////////////////////////////////////
type AbsValue struct {
kind AbsValueKind
value uint256.Int //only when kind=ConcreteValue
pc int //only when kind=TopValue
fromDeepStack bool //only when Kind=TopValue
}
func (c0 AbsValue) String(abbrev bool) string {
if c0.kind == BotValue {
return c0.kind.String()
} else if c0.kind == TopValue {
if !abbrev {
return fmt.Sprintf("%v%v", c0.kind.String(), c0.pc)
}
return c0.kind.String()
} else if c0.value.IsUint64() {
return strconv.FormatUint(c0.value.Uint64(), 10)
}
return "256bit"
}
func AbsValueTop(pc int, fromDeepStack bool) AbsValue {
return AbsValue{kind: TopValue, pc: pc, fromDeepStack: fromDeepStack}
}
func AbsValueBot() AbsValue {
return AbsValue{kind: BotValue}
}
func AbsValueConcrete(value uint256.Int) AbsValue {
return AbsValue{kind: ConcreteValue, value: value}
}
func (c0 AbsValue) Eq(c1 AbsValue) bool {
if c0.kind != c1.kind {
return false
}
if c0.kind == ConcreteValue {
if !c0.value.Eq(&c1.value) {
return false
}
}
return true
}
//////////////////////////////////////////////////
type astack struct {
values []AbsValue
}
func (s *astack) Copy() *astack {
newStack := &astack{}
newStack.values = append(newStack.values, s.values...)
return newStack
}
func (s *astack) Push(value AbsValue) {
rest := s.values[0 : absStackLen-1]
s.values = nil
s.values = append(s.values, value)
s.values = append(s.values, rest...)
}
func (s *astack) Pop(pc int) AbsValue {
res := s.values[0]
s.values = s.values[1:absStackLen]
s.values = append(s.values, AbsValueTop(pc, true))
return res
}
func (s *astack) String(abbrev bool) string {
strs := make([]string, 0)
for _, c := range s.values {
strs = append(strs, c.String(abbrev))
}
return strings.Join(strs, " ")
}
func (s *astack) Eq(s1 *astack) bool {
for i := 0; i < absStackLen; i++ {
if !s.values[i].Eq(s1.values[i]) {
return false
}
}
return true
}
//////////////////////////////////////////////////
type astate struct {
stackset []*astack
anlyCounter int
worklistLen int
}
func emptyState() *astate {
return &astate{stackset: nil, anlyCounter: -1, worklistLen: -1}
}
func (state *astate) Copy() *astate {
newState := emptyState()
for _, stack := range state.stackset {
newState.stackset = append(newState.stackset, stack.Copy())
}
return newState
}
// botState generates initial state which is a stack of "bottom" values
func botState() *astate {
st := emptyState()
botStack := &astack{}
for i := 0; i < absStackLen; i++ {
botStack.values = append(botStack.values, AbsValueBot())
}
st.stackset = append(st.stackset, botStack)
return st
}
func ExistsIn(values []AbsValue, value AbsValue) bool {
for _, v := range values {
if value.Eq(v) {
return true
}
}
return false
}
func (state *astate) String(abbrev bool) string {
var elms []string
for i := 0; i < absStackLen; i++ {
var elm []string
var values []AbsValue
for _, stack := range state.stackset {
value := stack.values[i]
if !ExistsIn(values, value) {
elm = append(elm, value.String(abbrev))
values = append(values, value)
}
}
var e string
if len(values) > 1 {
e = fmt.Sprintf("{%v}", strings.Join(elm, ","))
} else {
e = fmt.Sprintf("%v", strings.Join(elm, ","))
}
elms = append(elms, e)
}
return strings.Join(elms, " ")
}
func (state *astate) Add(stack *astack) {
for _, existing := range state.stackset {
if existing.Eq(stack) {
return
}
}
state.stackset = append(state.stackset, stack)
}
//////////////////////////////////////////////////
type ResolveResult struct {
edges []edge
resolved bool
badJump *astmt
}
// resolve analyses given executable instruction at given program counter in the context of given state
// It either concludes that the execution of the instruction may result in a jump to an unpredictable
// destination (in this case, attrubute resolved will be false), or returns one (for a non-JUMPI) or two (for JUMPI)
// edges that contain program counters of destinations where the executions can possibly come next
func resolve(program *program, pc0 int, st0 *astate) ResolveResult {
stmt := program.stmts[pc0]
if stmt.ends {
return ResolveResult{resolved: true}
}
codeLen := len(program.contract.Code)
var edges []edge
isBadJump := false
//jump edges
for _, stack := range st0.stackset {
if stmt.opcode == JUMP || stmt.opcode == JUMPI {
jumpDest := stack.values[0]
if jumpDest.kind == TopValue {
isBadJump = true
} else if jumpDest.kind == ConcreteValue {
if jumpDest.value.IsUint64() {
pc1 := int(jumpDest.value.Uint64())
if program.stmts[pc1].opcode != JUMPDEST {
isBadJump = true
} else {
edges = append(edges, edge{pc0, stmt, pc1, true})
}
} else {
isBadJump = true
}
}
}
}
//fall-thru edge
if stmt.opcode != JUMP {
if pc0 < codeLen-stmt.numBytes {
edges = append(edges, edge{pc0, stmt, pc0 + stmt.numBytes, false})
}
}
if DEBUG {
fmt.Printf("\nResolve: %v %v\n", pc0, stmt)
printEdges(edges)
}
if isBadJump {
return ResolveResult{edges: edges, resolved: false, badJump: stmt}
}
return ResolveResult{edges: edges, resolved: true, badJump: nil}
}
func post(st0 *astate, edge edge) (*astate, error) {
st1 := emptyState()
stmt := edge.stmt
isStackTooShort := false
for _, stack0 := range st0.stackset {
elm0 := stack0.values[0]
if edge.isJump {
if elm0.kind == ConcreteValue && elm0.value.IsUint64() && int(elm0.value.Uint64()) != edge.pc1 {
continue
}
}
stack1 := stack0.Copy()
if stmt.opcode.IsPush() {
stack1.Push(AbsValueConcrete(stmt.value))
} else if stmt.operation.isDup {
value := stack1.values[stmt.operation.opNum-1]
stack1.Push(value)
isStackTooShort = isStackTooShort || value.fromDeepStack
} else if stmt.operation.isSwap {
opNum := stmt.operation.opNum
a := stack1.values[0]
b := stack1.values[opNum]
stack1.values[0] = b
stack1.values[opNum] = a
isStackTooShort = isStackTooShort || a.fromDeepStack || b.fromDeepStack
} else {
for i := 0; i < stmt.operation.numPop; i++ {
s := stack1.Pop(edge.pc0)
isStackTooShort = isStackTooShort || s.fromDeepStack
}
for i := 0; i < stmt.operation.numPush; i++ {
stack1.Push(AbsValueTop(edge.pc0, false))
}
}
st1.Add(stack1)
}
if isStackTooShort {
return st1, errors.New("abstract stack too short: reached unmodelled depth")
}
return st1, nil
}
func Leq(st0 *astate, st1 *astate) bool {
for _, stack0 := range st0.stackset {
var found bool
for _, stack1 := range st1.stackset {
if stack0.Eq(stack1) {
found = true
break
}
}
if !found {
return false
}
}
return true
}
func Lub(st0 *astate, st1 *astate) *astate {
newState := emptyState()
for _, stack := range st0.stackset {
newState.Add(stack)
}
for _, stack := range st1.stackset {
newState.Add(stack)
}
return newState
}
type block struct {
entrypc int
exitpc int
stmts []*astmt
}
func printAnlyState(program *program, prevEdgeMap map[int]map[int]bool, D map[int]*astate, badJumps map[int]bool) {
// es := make([]edge, len(edges))
// copy(es, edges)
// sortEdges(es)
var badJumpList []string
for pc, stmt := range program.stmts {
if stmt.inferredAsData {
//fmt.Printf("data: %v\n", stmt.inferredAsData)
continue
}
if stmt.opcode == JUMPDEST {
fmt.Println()
}
var valueStr string
if stmt.opcode.IsPush() {
valueStr = fmt.Sprintf("%v %v", stmt.opcode, stmt.value.Hex())
} else {
valueStr = fmt.Sprintf("%v", stmt.opcode)
}
pc0s := make([]string, 0)
for pc0 := range prevEdgeMap[pc] {
pc0s = append(pc0s, strconv.Itoa(pc0))
}
if badJumps[pc] {
out := fmt.Sprintf("[%5v] (w:%2v) %3v\t %-25v %-10v %v\n", aurora.Blue(D[pc].anlyCounter), aurora.Cyan(D[pc].worklistLen), aurora.Yellow(pc), aurora.Red(valueStr), aurora.Magenta(strings.Join(pc0s, ",")), D[pc].String(false))
fmt.Print(out)
badJumpList = append(badJumpList, out)
} else if prevEdgeMap[pc] != nil {
fmt.Printf("[%5v] (w:%2v) %3v\t %-25v %-10v %v\n", aurora.Blue(D[pc].anlyCounter), aurora.Cyan(D[pc].worklistLen), aurora.Yellow(pc), aurora.Green(valueStr), aurora.Magenta(strings.Join(pc0s, ",")), D[pc].String(true))
} else {
fmt.Printf("[%5v] (w:%2v) %3v\t %-25v\n", aurora.Blue(D[pc].anlyCounter), aurora.Cyan(D[pc].worklistLen), aurora.Yellow(pc), valueStr)
}
}
print("\nFull list of bad jumps:\n")
for _, badJump := range badJumpList {
fmt.Println(badJump)
}
g := dot.NewGraph(dot.Directed)
block2node := make(map[*block]*dot.Node)
for _, block := range program.blocks {
n := g.Node(fmt.Sprintf("%v\n%v", block.entrypc, block.exitpc)).Box()
block2node[block] = &n
}
for pc1 := range prevEdgeMap {
for pc0 := range prevEdgeMap[pc1] {
block1 := program.entry2block[pc1]
if block1 == nil {
continue
}
block0 := program.exit2block[pc0]
if block0 == nil {
continue
}
n0 := block2node[block0]
n1 := block2node[block1]
g.Edge(*n0, *n1)
}
}
path := "cfg.dot"
_ = os.Remove(path)
f, errcr := os.Create(path)
if errcr != nil {
panic(errcr)
}
defer f.Close()
w := bufio.NewWriter(f)
_, errwr := w.WriteString(g.String())
if errwr != nil {
panic(errwr)
}
_ = w.Flush()
}
func check(program *program, prevEdgeMap map[int]map[int]bool) {
for pc1, pc0s := range prevEdgeMap {
for pc0 := range pc0s {
s := program.stmts[pc0]
if s.ends {
msg := fmt.Sprintf("Halt has successor: %v -> %v", pc0, pc1)
panic(msg)
}
}
}
}
func AbsIntCfgHarness(contract *Contract) {
program := toProgram(contract)
startPC := 0
codeLen := len(program.contract.Code)
D := make(map[int]*astate)
for pc := 0; pc < codeLen; pc++ {
D[pc] = emptyState()
}
D[startPC] = botState()
prevEdgeMap := make(map[int]map[int]bool)
var workList []edge
{
resolution := resolve(program, startPC, D[startPC])
if !resolution.resolved {
fmt.Printf("Unable to resolve at pc=%x\n", startPC)
return
}
for _, e := range resolution.edges {
if prevEdgeMap[e.pc1] == nil {
prevEdgeMap[e.pc1] = make(map[int]bool)
}
prevEdgeMap[e.pc1][e.pc0] = true
}
workList = resolution.edges
}
check(program, prevEdgeMap)
anlyCounter := 0
badJumps := make(map[int]bool)
for len(workList) > 0 {
//sortEdges(workList)
var e edge
e, workList = workList[0], workList[1:]
//fmt.Printf("%v\n", e.pc0)
if e.pc0 == -1 {
fmt.Printf("---------------------------------------\n")
fmt.Printf("Verbose debugging for pc=%v\n", e.pc0)
DEBUG = true
}
if DEBUG {
fmt.Printf("pre pc=%v\t%v\n", e.pc0, D[e.pc0])
}
preDpc0 := D[e.pc0]
preDpc1 := D[e.pc1]
post1, err := post(preDpc0, e)
if err != nil {
if StopOnError {
printAnlyState(program, prevEdgeMap, D, nil)
fmt.Printf("FAILURE: pc=%v %v\n", e.pc0, err)
return
}
fmt.Printf("FAILURE: pc=%v %v\n", e.pc0, err)
}
if DEBUG {
fmt.Printf("post\t\t%v\n", post1)
fmt.Printf("Dprev\t\t%v\n", preDpc1)
}
if !Leq(post1, preDpc1) {
postDpc1 := Lub(post1, preDpc1)
if false {
fmt.Printf("\nedge %v %v\n", e.pc0, e.pc1)
//fmt.Printf("pre D[pc0]\t\t%v\n", preDpc0);
fmt.Printf("pre D[pc1]\t\t%v\n", preDpc1)
fmt.Printf("post\t\t\t%v\n", post1)
/*
for i := 0; i < absStackLen; i++ {
c0 := post1.stack[i]
c1 := preDpc1.stack[i]
if !ValueSetLeq(c0, c1) {
fmt.Printf("diff: \t\t\t%v %v %v %v %v\n", i, c0, c1, c0.kind, c1.kind)
if c0.kind == ConstValueKind && c1.kind == ConstValueKind {
fmt.Printf("\t\t\t\t\tEQ=%v\n", c0.value.Eq(&c1.value))
}
}
}*/
//fmt.Printf("lub\t\t\t%v\n", postDpc1)
printAnlyState(program, prevEdgeMap, D, nil)
}
D[e.pc1] = postDpc1
resolution := resolve(program, e.pc1, D[e.pc1])
if !resolution.resolved {
badJumps[resolution.badJump.pc] = true
fmt.Printf("FAILURE: Unable to resolve: anlyCounter=%v pc=%x\n", aurora.Red(anlyCounter), aurora.Red(e.pc1))
if StopOnError {
printAnlyState(program, prevEdgeMap, D, badJumps)
return
}
} else {
for _, e := range resolution.edges {
inWorkList := false
for _, w := range workList {
if w.pc0 == e.pc0 && w.pc1 == e.pc1 {
inWorkList = true
}
}
if !inWorkList {
head := []edge{e}
workList = append(head, workList...)
}
}
if prevEdgeMap[e.pc1] == nil {
prevEdgeMap[e.pc1] = make(map[int]bool)
}
prevEdgeMap[e.pc1][e.pc0] = true
}
}
DEBUG = false
decp1Copy := D[e.pc1]
decp1Copy.anlyCounter = anlyCounter
decp1Copy.worklistLen = len(workList)
D[e.pc1] = decp1Copy
anlyCounter++
check(program, prevEdgeMap)
}
print("\nFinal resolve....")
var finalEdges []edge
for pc := 0; pc < codeLen; pc++ {
resolution := resolve(program, pc, D[pc])
if !resolution.resolved {
badJumps[resolution.badJump.pc] = true
fmt.Println("Bad jump found during final resolve.")
}
finalEdges = append(finalEdges, resolution.edges...)
}
//need to run a DFS from the entry point to pick only reachable stmts
reachableEdges := getEntryReachableEdges(0, finalEdges)
fmt.Printf("\n# of unreachable edges: %v\n", len(finalEdges)-len(reachableEdges))
fmt.Printf("\n# of total edges: %v\n", len(finalEdges))
//printEdges(edges)
printAnlyState(program, prevEdgeMap, D, nil)
println("done valueset")
if len(badJumps) > 0 {
printAnlyState(program, prevEdgeMap, D, badJumps)
}
}