diff --git a/p2p/dnsdisc/client.go b/p2p/dnsdisc/client.go index b87278482..d3e8111ab 100644 --- a/p2p/dnsdisc/client.go +++ b/p2p/dnsdisc/client.go @@ -217,8 +217,11 @@ type randomIterator struct { c *Client mu sync.Mutex - trees map[string]*clientTree // all trees lc linkCache // tracks tree dependencies + trees map[string]*clientTree // all trees + // buffers for syncableTrees + syncableList []*clientTree + disabledList []*clientTree } func (c *Client) newRandomIterator() *randomIterator { @@ -238,10 +241,10 @@ func (it *randomIterator) Node() *enode.Node { // Close closes the iterator. func (it *randomIterator) Close() { + it.cancelFn() + it.mu.Lock() defer it.mu.Unlock() - - it.cancelFn() it.trees = nil } @@ -264,7 +267,7 @@ func (it *randomIterator) addTree(url string) error { // nextNode syncs random tree entries until it finds a node. func (it *randomIterator) nextNode() *enode.Node { for { - ct := it.nextTree() + ct := it.pickTree() if ct == nil { return nil } @@ -282,26 +285,79 @@ func (it *randomIterator) nextNode() *enode.Node { } } -// nextTree returns a random tree. -func (it *randomIterator) nextTree() *clientTree { +// pickTree returns a random tree to sync from. +func (it *randomIterator) pickTree() *clientTree { it.mu.Lock() defer it.mu.Unlock() + // Rebuild the trees map if any links have changed. if it.lc.changed { it.rebuildTrees() it.lc.changed = false } - if len(it.trees) == 0 { - return nil - } - limit := rand.Intn(len(it.trees)) - for _, ct := range it.trees { - if limit == 0 { - return ct + + for { + canSync, trees := it.syncableTrees() + switch { + case canSync: + // Pick a random tree. + return trees[rand.Intn(len(trees))] + case len(trees) > 0: + // No sync action can be performed on any tree right now. The only meaningful + // thing to do is waiting for any root record to get updated. + if !it.waitForRootUpdates(trees) { + // Iterator was closed while waiting. + return nil + } + default: + // There are no trees left, the iterator was closed. + return nil } - limit-- } - return nil +} + +// syncableTrees finds trees on which any meaningful sync action can be performed. +func (it *randomIterator) syncableTrees() (canSync bool, trees []*clientTree) { + // Resize tree lists. + it.syncableList = it.syncableList[:0] + it.disabledList = it.disabledList[:0] + + // Partition them into the two lists. + for _, ct := range it.trees { + if ct.canSyncRandom() { + it.syncableList = append(it.syncableList, ct) + } else { + it.disabledList = append(it.disabledList, ct) + } + } + if len(it.syncableList) > 0 { + return true, it.syncableList + } + return false, it.disabledList +} + +// waitForRootUpdates waits for the closest scheduled root check time on the given trees. +func (it *randomIterator) waitForRootUpdates(trees []*clientTree) bool { + var minTree *clientTree + var nextCheck mclock.AbsTime + for _, ct := range trees { + check := ct.nextScheduledRootCheck() + if minTree == nil || check < nextCheck { + minTree = ct + nextCheck = check + } + } + + sleep := nextCheck.Sub(it.c.clock.Now()) + it.c.cfg.Logger.Debug("DNS iterator waiting for root updates", "sleep", sleep, "tree", minTree.loc.domain) + timeout := it.c.clock.NewTimer(sleep) + defer timeout.Stop() + select { + case <-timeout.C(): + return true + case <-it.ctx.Done(): + return false // Iterator was closed. + } } // rebuildTrees rebuilds the 'trees' map. diff --git a/p2p/dnsdisc/client_test.go b/p2p/dnsdisc/client_test.go index 6a6705abf..741bee423 100644 --- a/p2p/dnsdisc/client_test.go +++ b/p2p/dnsdisc/client_test.go @@ -231,6 +231,53 @@ func TestIteratorRootRecheckOnFail(t *testing.T) { checkIterator(t, it, nodes) } +// This test checks that the iterator works correctly when the tree is initially empty. +func TestIteratorEmptyTree(t *testing.T) { + var ( + clock = new(mclock.Simulated) + nodes = testNodes(nodesSeed1, 1) + resolver = newMapResolver() + c = NewClient(Config{ + Resolver: resolver, + Logger: testlog.Logger(t, log.LvlTrace), + RecheckInterval: 20 * time.Minute, + RateLimit: 500, + }) + ) + c.clock = clock + tree1, url := makeTestTree("n", nil, nil) + tree2, _ := makeTestTree("n", nodes, nil) + resolver.add(tree1.ToTXT("n")) + + // Start the iterator. + node := make(chan *enode.Node) + it, err := c.NewIterator(url) + if err != nil { + t.Fatal(err) + } + go func() { + it.Next() + node <- it.Node() + }() + + // Wait for the client to get stuck in waitForRootUpdates. + clock.WaitForTimers(1) + + // Now update the root. + resolver.add(tree2.ToTXT("n")) + + // Wait for it to pick up the root change. + clock.Run(c.cfg.RecheckInterval) + select { + case n := <-node: + if n.ID() != nodes[0].ID() { + t.Fatalf("wrong node returned") + } + case <-time.After(5 * time.Second): + t.Fatal("it.Next() did not unblock within 5s of real time") + } +} + // updateSomeNodes applies ENR updates to some of the given nodes. func updateSomeNodes(keySeed int64, nodes []*enode.Node) { keys := testKeys(nodesSeed1, len(nodes)) diff --git a/p2p/dnsdisc/sync.go b/p2p/dnsdisc/sync.go index 36f02acba..073547c90 100644 --- a/p2p/dnsdisc/sync.go +++ b/p2p/dnsdisc/sync.go @@ -25,9 +25,9 @@ import ( "github.com/ethereum/go-ethereum/p2p/enode" ) -const ( - rootRecheckFailCount = 5 // update root if this many leaf requests fail -) +// This is the number of consecutive leaf requests that may fail before +// we consider re-resolving the tree root. +const rootRecheckFailCount = 5 // clientTree is a full tree being synced. type clientTree struct { @@ -89,13 +89,22 @@ func (ct *clientTree) syncRandom(ctx context.Context) (n *enode.Node, err error) ct.gcLinks() // Sync next random entry in ENR tree. Once every node has been visited, we simply - // start over. This is fine because entries are cached. + // start over. This is fine because entries are cached internally by the client LRU + // also by DNS resolvers. if ct.enrs.done() { ct.enrs = newSubtreeSync(ct.c, ct.loc, ct.root.eroot, false) } return ct.syncNextRandomENR(ctx) } +// canSyncRandom checks if any meaningful action can be performed by syncRandom. +func (ct *clientTree) canSyncRandom() bool { + // Note: the check for non-zero leaf count is very important here. + // If we're done syncing all nodes, and no leaves were found, the tree + // is empty and we can't use it for sync. + return ct.rootUpdateDue() || !ct.links.done() || !ct.enrs.done() || ct.enrs.leaves != 0 +} + // gcLinks removes outdated links from the global link cache. GC runs once // when the link sync finishes. func (ct *clientTree) gcLinks() { @@ -184,10 +193,14 @@ func (ct *clientTree) updateRoot(ctx context.Context) error { // rootUpdateDue returns true when a root update is needed. func (ct *clientTree) rootUpdateDue() bool { tooManyFailures := ct.leafFailCount > rootRecheckFailCount - scheduledCheck := ct.c.clock.Now().Sub(ct.lastRootCheck) > ct.c.cfg.RecheckInterval + scheduledCheck := ct.c.clock.Now() >= ct.nextScheduledRootCheck() return ct.root == nil || tooManyFailures || scheduledCheck } +func (ct *clientTree) nextScheduledRootCheck() mclock.AbsTime { + return ct.lastRootCheck.Add(ct.c.cfg.RecheckInterval) +} + // slowdownRootUpdate applies a delay to root resolution if is tried // too frequently. This avoids busy polling when the client is offline. // Returns true if the timeout passed, false if sync was canceled. @@ -218,10 +231,11 @@ type subtreeSync struct { root string missing []string // missing tree node hashes link bool // true if this sync is for the link tree + leaves int // counter of synced leaves } func newSubtreeSync(c *Client, loc *linkEntry, root string, link bool) *subtreeSync { - return &subtreeSync{c, loc, root, []string{root}, link} + return &subtreeSync{c, loc, root, []string{root}, link, 0} } func (ts *subtreeSync) done() bool { @@ -253,10 +267,12 @@ func (ts *subtreeSync) resolveNext(ctx context.Context, hash string) (entry, err if ts.link { return nil, errENRInLinkTree } + ts.leaves++ case *linkEntry: if !ts.link { return nil, errLinkInENRTree } + ts.leaves++ case *branchEntry: ts.missing = append(ts.missing, e.children...) }