Cleaned up error handling in network and node start-up (#7811)

The check in catches errors in the node start-up code and makes sure
that the network is stopped if any node fails to start cleanly, and
that5 it returns an error - so that any calling code can take
appropriate action.
This commit is contained in:
Mark Holt 2023-06-28 18:21:15 +01:00 committed by GitHub
parent e49063006d
commit bcc2a4a2f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 55 additions and 22 deletions

View File

@ -29,6 +29,7 @@ type Network struct {
Logger log.Logger
BasePrivateApiAddr string
BaseRPCAddr string
Snapshots bool
Nodes []Node
wg sync.WaitGroup
peers []string
@ -88,14 +89,23 @@ func (nw *Network) Start(ctx *cli.Context) error {
// get the enode of the node
// - note this has the side effect of waiting for the node to start
if enode, err := getEnode(node); err == nil {
nw.peers = append(nw.peers, enode)
baseNode.StaticPeers = strings.Join(nw.peers, ",")
enode, err := getEnode(node)
// TODO do we need to call AddPeer to the nodes to make them aware of this one
// the current model only works for an appending node network where the peers gossip
// connections - not sure if this is the case ?
if err != nil {
if errors.Is(err, devnetutils.ErrInvalidEnodeString) {
continue
}
nw.Stop()
return err
}
nw.peers = append(nw.peers, enode)
baseNode.StaticPeers = strings.Join(nw.peers, ",")
// TODO do we need to call AddPeer to the nodes to make them aware of this one
// the current model only works for an appending node network where the peers gossip
// connections - not sure if this is the case ?
}
}
@ -138,17 +148,14 @@ func (nw *Network) startNode(nodeAddr string, cfg interface{}, nodeNumber int) (
app := erigonapp.MakeApp(fmt.Sprintf("node-%d", nodeNumber), node.run, erigoncli.DefaultFlags)
if err := app.Run(args); err != nil {
_, printErr := fmt.Fprintln(os.Stderr, err)
if printErr != nil {
nw.Logger.Warn("Error writing app run error to stderr", "err", printErr)
}
nw.Logger.Warn("App run returned error", "node", fmt.Sprintf("node-%d", nodeNumber), "err", err)
}
}()
return node, nil
return &node, nil
}
// getEnode returns the enode of the mining node
// getEnode returns the enode of the netowrk node
func getEnode(n Node) (string, error) {
reqCount := 0
@ -156,6 +163,12 @@ func getEnode(n Node) (string, error) {
nodeInfo, err := n.AdminNodeInfo()
if err != nil {
if r, ok := n.(*node); ok {
if !r.running() {
return "", err
}
}
if reqCount < 10 {
var urlErr *url.Error
if errors.As(err, &urlErr) {
@ -193,14 +206,17 @@ func (nw *Network) Run(ctx go_context.Context, scenario scenarios.Scenario) erro
func (nw *Network) Stop() {
type stoppable interface {
Stop()
running() bool
}
for _, n := range nw.Nodes {
if stoppable, ok := n.(stoppable); ok {
stoppable.Stop()
for i, n := range nw.Nodes {
if stoppable, ok := n.(stoppable); ok && stoppable.running() {
nw.Logger.Info("Stopping", "node", i)
go stoppable.Stop()
}
}
nw.Logger.Info("Waiting for nodes to stop")
nw.Wait()
}

View File

@ -4,6 +4,7 @@ import (
go_context "context"
"sync"
"github.com/c2h5oh/datasize"
"github.com/ledgerwatch/erigon/cmd/devnet/args"
"github.com/ledgerwatch/erigon/cmd/devnet/requests"
"github.com/ledgerwatch/erigon/params"
@ -42,6 +43,14 @@ func (n *node) Stop() {
toClose.Close()
}
n.done()
}
func (n *node) running() bool {
return n.ethNode != nil
}
func (n *node) done() {
if n.wg != nil {
wg := n.wg
n.wg = nil
@ -59,6 +68,8 @@ func (n *node) run(ctx *cli.Context) error {
var logger log.Logger
var err error
defer n.done()
if logger, err = debug.Setup(ctx, false /* rootLogger */); err != nil {
return err
}
@ -68,6 +79,8 @@ func (n *node) run(ctx *cli.Context) error {
nodeCfg := enode.NewNodConfigUrfave(ctx, logger)
ethCfg := enode.NewEthConfigUrfave(ctx, nodeCfg, logger)
nodeCfg.MdbxDBSizeLimit = 512 * datasize.MB
n.ethNode, err = enode.New(nodeCfg, ethCfg, logger)
if err != nil {

View File

@ -3,6 +3,7 @@ package devnetutils
import (
"crypto/rand"
"encoding/binary"
"errors"
"fmt"
"net"
"os"
@ -19,6 +20,8 @@ import (
"github.com/ledgerwatch/log/v3"
)
var ErrInvalidEnodeString = errors.New("invalid enode string")
// ClearDevDB cleans up the dev folder used for the operations
func ClearDevDB(dataDir string, logger log.Logger) error {
logger.Info("Deleting nodes' data folders")
@ -56,7 +59,7 @@ func HexToInt(hexStr string) uint64 {
// UniqueIDFromEnode returns the unique ID from a node's enode, removing the `?discport=0` part
func UniqueIDFromEnode(enode string) (string, error) {
if len(enode) == 0 {
return "", fmt.Errorf("invalid enode string")
return "", ErrInvalidEnodeString
}
// iterate through characters in the string until we reach '?'
@ -73,14 +76,14 @@ func UniqueIDFromEnode(enode string) (string, error) {
}
if ati == 0 {
return "", fmt.Errorf("invalid enode string")
return "", ErrInvalidEnodeString
}
if _, apiPort, err := net.SplitHostPort(enode[ati+1 : i]); err != nil {
return "", fmt.Errorf("invalid enode string")
return "", ErrInvalidEnodeString
} else {
if _, err := strconv.Atoi(apiPort); err != nil {
return "", fmt.Errorf("invalid enode string")
return "", ErrInvalidEnodeString
}
}

View File

@ -175,7 +175,7 @@ func action(ctx *cli.Context) error {
network.Stop()
case syscall.SIGINT:
log.Info("Terminating network")
logger.Info("Terminating network")
os.Exit(-int(syscall.SIGINT))
}
}()

View File

@ -175,7 +175,7 @@ func (req *requestGenerator) PingErigonRpc() CallResult {
func NewRequestGenerator(target string, logger log.Logger) RequestGenerator {
return &requestGenerator{
client: &http.Client{
Timeout: time.Second * 600,
Timeout: time.Second * 10,
},
reqID: 1,
logger: logger,

View File

@ -51,7 +51,8 @@ func StartHTTPEndpoint(endpoint string, timeouts rpccfg.HTTPTimeouts, handler ht
}
go func() {
serveErr := httpSrv.Serve(listener)
if serveErr != nil && !errors.Is(serveErr, context.Canceled) && !errors.Is(serveErr, libcommon.ErrStopped) {
if serveErr != nil &&
!(errors.Is(serveErr, context.Canceled) || errors.Is(serveErr, libcommon.ErrStopped) || errors.Is(serveErr, http.ErrServerClosed)) {
log.Warn("Failed to serve http endpoint", "err", serveErr)
}
}()