Mark Holt 509a7af26a
Discovery zero refresh timer (#8661)
This fixes an issue where the mumbai testnet node struggle to find
peers. Before this fix in general test peer numbers are typically around
20 in total between eth66, eth67 and eth68. For new peers some can
struggle to find even a single peer after days of operation.

These are the numbers after 12 hours or running on a node which
previously could not find any peers: eth66=13, eth67=76, eth68=91.

The root cause of this issue is the following:

- A significant number of mumbai peers around the boot node return
network ids which are different from those currently available in the
DHT
- The available nodes are all consequently busy and return 'too many
peers' for long periods

These issues case a significant number of discovery timeouts, some of
the queries will never receive a response.

This causes the discovery read loop to enter a channel deadlock - which
means that no responses are processed, nor timeouts fired. This causes
the discovery process in the node to stop. From then on it just
re-requests handshakes from a relatively small number of peers.

This check in fixes this situation with the following changes:

- Remove the deadlock by running the timer in a separate go-routine so
it can run independently of the main request processing.
- Allow the discovery process matcher to match on port if no id match
can be established on initial ping. This allows subsequent node
validation to proceed and if the node proves to be valid via the
remainder of the look-up and handshake process it us used as a valid
peer.
- Completely unsolicited responses, i.e. those which come from a
completely unknown ip:port combination continue to be ignored.
-
2023-11-07 08:48:58 +00:00

155 lines
4.5 KiB
Go

// Copyright 2015 The go-ethereum Authors
// This file is part of go-ethereum.
//
// go-ethereum is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// go-ethereum is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with go-ethereum. If not, see <http://www.gnu.org/licenses/>.
// bootnode runs a bootstrap node for the Ethereum Discovery Protocol.
package main
import (
"crypto/ecdsa"
"flag"
"fmt"
"net"
"os"
"github.com/ledgerwatch/erigon-lib/common"
"github.com/ledgerwatch/erigon/turbo/logging"
"github.com/ledgerwatch/erigon/cmd/utils"
"github.com/ledgerwatch/erigon/crypto"
"github.com/ledgerwatch/erigon/p2p/discover"
"github.com/ledgerwatch/erigon/p2p/enode"
"github.com/ledgerwatch/erigon/p2p/nat"
"github.com/ledgerwatch/erigon/p2p/netutil"
)
func main() {
var (
listenAddr = flag.String("addr", ":30301", "listen address")
genKey = flag.String("genkey", "", "generate a node key")
writeAddr = flag.Bool("writeaddress", false, "write out the node's public key and quit")
nodeKeyFile = flag.String("nodekey", "", "private key filename")
nodeKeyHex = flag.String("nodekeyhex", "", "private key as hex (for testing)")
natdesc = flag.String(utils.NATFlag.Name, "", utils.NATFlag.Usage)
netrestrict = flag.String("netrestrict", "", "restrict network communication to the given IP networks (CIDR masks)")
runv5 = flag.Bool("v5", false, "run a v5 topic discovery bootnode")
nodeKey *ecdsa.PrivateKey
err error
)
flag.Parse()
logger := logging.SetupLogger("bootnode")
natm, err := nat.Parse(*natdesc)
if err != nil {
utils.Fatalf("-nat: %v", err)
}
switch {
case *genKey != "":
nodeKey, err = crypto.GenerateKey()
if err != nil {
utils.Fatalf("could not generate key: %v", err)
}
if err = crypto.SaveECDSA(*genKey, nodeKey); err != nil {
utils.Fatalf("%v", err)
}
if !*writeAddr {
return
}
case *nodeKeyFile == "" && *nodeKeyHex == "":
utils.Fatalf("Use -nodekey or -nodekeyhex to specify a private key")
case *nodeKeyFile != "" && *nodeKeyHex != "":
utils.Fatalf("Options -nodekey and -nodekeyhex are mutually exclusive")
case *nodeKeyFile != "":
if nodeKey, err = crypto.LoadECDSA(*nodeKeyFile); err != nil {
utils.Fatalf("-nodekey: %v", err)
}
case *nodeKeyHex != "":
if nodeKey, err = crypto.HexToECDSA(*nodeKeyHex); err != nil {
utils.Fatalf("-nodekeyhex: %v", err)
}
}
if *writeAddr {
fmt.Printf("%x\n", crypto.MarshalPubkey(&nodeKey.PublicKey))
os.Exit(0)
}
var restrictList *netutil.Netlist
if *netrestrict != "" {
restrictList, err = netutil.ParseNetlist(*netrestrict)
if err != nil {
utils.Fatalf("-netrestrict: %v", err)
}
}
addr, err := net.ResolveUDPAddr("udp", *listenAddr)
if err != nil {
utils.Fatalf("-ResolveUDPAddr: %v", err)
}
conn, err := net.ListenUDP("udp", addr)
if err != nil {
utils.Fatalf("-ListenUDP: %v", err)
}
realaddr := conn.LocalAddr().(*net.UDPAddr)
if natm != nil {
if !realaddr.IP.IsLoopback() && natm.SupportsMapping() {
go nat.Map(natm, nil, "udp", realaddr.Port, realaddr.Port, "ethereum discovery", logger)
}
if ext, err := natm.ExternalIP(); err == nil {
realaddr = &net.UDPAddr{IP: ext, Port: realaddr.Port}
}
}
printNotice(&nodeKey.PublicKey, *realaddr)
ctx, cancel := common.RootContext()
defer cancel()
db, err := enode.OpenDB(ctx, "" /* path */, "" /* tmpDir */)
if err != nil {
panic(err)
}
ln := enode.NewLocalNode(db, nodeKey, logger)
cfg := discover.Config{
PrivateKey: nodeKey,
NetRestrict: restrictList,
}
if *runv5 {
if _, err := discover.ListenV5(ctx, "any", conn, ln, cfg); err != nil {
utils.Fatalf("%v", err)
}
} else {
if _, err := discover.ListenUDP(ctx, "any", conn, ln, cfg); err != nil {
utils.Fatalf("%v", err)
}
}
select {}
}
func printNotice(nodeKey *ecdsa.PublicKey, addr net.UDPAddr) {
if addr.IP.IsUnspecified() {
addr.IP = net.IP{127, 0, 0, 1}
}
n := enode.NewV4(nodeKey, addr.IP, 0, addr.Port)
fmt.Println(n.URLv4())
fmt.Println("Note: you're using cmd/bootnode, a developer tool.")
fmt.Println("We recommend using a regular node as bootstrap node for production deployments.")
}