Victor Farazdagi 3d3b9d1217
Spec checker tool (#8722)
* Add specdocs static code analyzer

* docs pulling script

* update content pulling script

* add test

* better parsing of incoming docs

* update test

* implements analyzer

* separate tool

* remove analyzer code

* cleanup

* deep source fixes

* untrack raw specs files

* add back phase0 defs

* update spec texts

* re-arrange code

* updated spec list

* cleanup

* more comments and readme

* add merkle proofs specs

* add extra.md

* mark wrong length issue

* update readme

* update readme

* remove non-def snippets

* update comment

* check numrows

* ignore last empty line

Co-authored-by: Raul Jordan <raul@prysmaticlabs.com>
2021-04-15 14:54:07 +00:00

177 lines
5.0 KiB
Go

package main
import (
"fmt"
"go/ast"
"go/parser"
"go/token"
"os"
"path"
"path/filepath"
"regexp"
"strings"
"github.com/urfave/cli/v2"
)
// Regex to find Python's "def".
var reg1 = regexp.MustCompile(`def\s(.*)\(.*`)
// checkNumRows defines whether tool should check that the spec comment is the last comment of the block, so not only
// it matches the reference snippet, but it also has the same number of rows.
const checkNumRows = false
func check(cliCtx *cli.Context) error {
// Obtain reference snippets.
defs, err := parseSpecs()
if err != nil {
return err
}
// Walk the path, and process all contained Golang files.
fileWalker := func(path string, info os.FileInfo, err error) error {
if info == nil {
return fmt.Errorf("invalid input dir %q", path)
}
if !strings.HasSuffix(info.Name(), ".go") {
return nil
}
return inspectFile(path, defs)
}
return filepath.Walk(cliCtx.String(dirFlag.Name), fileWalker)
}
func inspectFile(path string, defs map[string][]string) error {
// Parse source files, and check the pseudo code.
fset := token.NewFileSet()
file, err := parser.ParseFile(fset, path, nil, parser.ParseComments)
if err != nil {
return err
}
ast.Inspect(file, func(node ast.Node) bool {
stmt, ok := node.(*ast.CommentGroup)
if !ok {
return true
}
// Ignore comment groups that do not have python pseudo-code.
chunk := stmt.Text()
if !reg1.MatchString(chunk) {
return true
}
// Trim the chunk, so that it starts from Python's "def".
loc := reg1.FindStringIndex(chunk)
chunk = chunk[loc[0]:]
// Find out Python function name.
defName, defBody := parseDefChunk(chunk)
if defName == "" {
fmt.Printf("%s: cannot parse comment pseudo code\n", fset.Position(node.Pos()))
return false
}
// Calculate differences with reference implementation.
refDefs, ok := defs[defName]
if !ok {
fmt.Printf("%s: %q is not found in spec docs\n", fset.Position(node.Pos()), defName)
return false
}
if !matchesRefImplementation(defName, refDefs, defBody, fset.Position(node.Pos())) {
fmt.Printf("%s: %q code does not match reference implementation in specs\n", fset.Position(node.Pos()), defName)
return false
}
return true
})
return nil
}
// parseSpecs parses input spec docs into map of function name -> array of function bodies
// (single entity may have several definitions).
func parseSpecs() (map[string][]string, error) {
loadSpecsFile := func(sb *strings.Builder, specFilePath string) error {
chunk, err := specFS.ReadFile(specFilePath)
if err != nil {
return fmt.Errorf("cannot read specs file: %w", err)
}
_, err = sb.Write(chunk)
if err != nil {
return fmt.Errorf("cannot copy specs file: %w", err)
}
return nil
}
// Traverse all spec files, and aggregate them within as single string.
var sb strings.Builder
for dirName, fileNames := range specDirs {
for _, fileName := range fileNames {
if err := loadSpecsFile(&sb, path.Join("data", dirName, fileName)); err != nil {
return nil, err
}
}
}
// Load file with extra definitions (this allows us to use pseudo-code that is not from specs).
if err := loadSpecsFile(&sb, path.Join("data", "extra.md")); err != nil {
return nil, err
}
// Parse docs into function name -> array of function bodies map.
chunks := strings.Split(strings.ReplaceAll(sb.String(), "```python", ""), "```")
defs := make(map[string][]string, len(chunks))
for _, chunk := range chunks {
defName, defBody := parseDefChunk(chunk)
if defName == "" {
continue
}
defs[defName] = append(defs[defName], defBody)
}
return defs, nil
}
// parseDefChunk extract function name and function body from a Python's "def" chunk.
func parseDefChunk(chunk string) (string, string) {
chunk = strings.TrimLeft(chunk, "\n")
if chunk == "" {
return "", ""
}
chunkLines := strings.Split(chunk, "\n")
// Ignore all snippets, that do not define functions.
if chunkLines[0][:4] != "def " {
return "", ""
}
defMatches := reg1.FindStringSubmatch(chunkLines[0])
if len(defMatches) < 2 {
return "", ""
}
return strings.Trim(defMatches[1], " "), chunk
}
// matchesRefImplementation compares input string to reference code snippets (there might be multiple implementations).
func matchesRefImplementation(defName string, refDefs []string, input string, pos token.Position) bool {
for _, refDef := range refDefs {
refDefLines := strings.Split(strings.TrimRight(refDef, "\n"), "\n")
inputLines := strings.Split(strings.TrimRight(input, "\n"), "\n")
matchesPerfectly := true
for i := 0; i < len(refDefLines); i++ {
a, b := strings.Trim(refDefLines[i], " "), strings.Trim(inputLines[i], " ")
if a != b {
matchesPerfectly = false
break
}
}
// Mark potential issues, when there's some more comments in our code (which might be ok, as we are not required
// to put specs comments as the last one in the doc block).
if checkNumRows && len(refDefLines) != len(inputLines) {
fmt.Printf("%s: %q potentially has issues (comment is longer than reference implementation)\n", pos, defName)
}
if matchesPerfectly {
return true
}
}
return false
}