From d744aaa2cd56519016e6a6058859887b359e20b5 Mon Sep 17 00:00:00 2001 From: Jim McDonald Date: Thu, 16 Jan 2020 16:57:38 +0000 Subject: [PATCH] Better resync checking and running (#4516) * Separate out fallen behind/resync check * Remove hard-coded resync interval * Merge branch 'master' into resync * Merge branch 'master' into resync * Merge branch 'master' into resync * Merge branch 'master' into resync * Merge branch 'master' into resync * Merge branch 'master' into resync * Merge branch 'master' into resync * Merge branch 'master' into resync * Merge branch 'master' into resync --- beacon-chain/sync/rpc_status.go | 35 ++++++++++++++++++++++----------- beacon-chain/sync/service.go | 1 + 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/beacon-chain/sync/rpc_status.go b/beacon-chain/sync/rpc_status.go index 58a090dda..6dbbe52bc 100644 --- a/beacon-chain/sync/rpc_status.go +++ b/beacon-chain/sync/rpc_status.go @@ -15,14 +15,14 @@ import ( "github.com/prysmaticlabs/prysm/shared/roughtime" "github.com/prysmaticlabs/prysm/shared/runutil" "github.com/prysmaticlabs/prysm/shared/slotutil" + "github.com/sirupsen/logrus" ) -const statusInterval = 6 * time.Minute // 30 slots. - // maintainPeerStatuses by infrequently polling peers for their latest status. func (r *Service) maintainPeerStatuses() { - ctx := context.Background() - runutil.RunEvery(r.ctx, statusInterval, func() { + // Run twice per epoch. + interval := time.Duration(params.BeaconConfig().SecondsPerSlot*params.BeaconConfig().SlotsPerEpoch/2) * time.Second + runutil.RunEvery(r.ctx, interval, func() { for _, pid := range r.p2p.Peers().Connected() { // If the status hasn't been updated in the recent interval time. lastUpdated, err := r.p2p.Peers().ChainStateLastUpdated(pid) @@ -30,18 +30,31 @@ func (r *Service) maintainPeerStatuses() { // Peer has vanished; nothing to do continue } - if roughtime.Now().After(lastUpdated.Add(statusInterval)) { - if err := r.sendRPCStatusRequest(ctx, pid); err != nil { + if roughtime.Now().After(lastUpdated.Add(interval)) { + if err := r.sendRPCStatusRequest(r.ctx, pid); err != nil { log.WithField("peer", pid).WithError(err).Error("Failed to request peer status") } } } - // If our head slot is not in the latest epoch, check peers to determine if we need to - // resync with the network. - currentSlot := uint64(roughtime.Now().Unix()-r.chain.GenesisTime().Unix()) / params.BeaconConfig().SecondsPerSlot - for !r.initialSync.Syncing() && helpers.SlotToEpoch(r.chain.HeadSlot()) < helpers.SlotToEpoch(currentSlot) { - _, highestEpoch, _ := r.p2p.Peers().BestFinalized(params.BeaconConfig().MaxPeersToSync, helpers.SlotToEpoch(r.chain.HeadSlot())) + }) +} + +// resyncIfBehind checks periodically to see if we are in normal sync but have fallen behind our peers by more than an epoch, +// in which case we attempt a resync using the initial sync method to catch up. +func (r *Service) resyncIfBehind() { + // Run sixteen times per epoch. + interval := time.Duration(params.BeaconConfig().SecondsPerSlot*params.BeaconConfig().SlotsPerEpoch/16) * time.Second + runutil.RunEvery(r.ctx, interval, func() { + currentEpoch := uint64(roughtime.Now().Unix()-r.chain.GenesisTime().Unix()) / (params.BeaconConfig().SecondsPerSlot * params.BeaconConfig().SlotsPerEpoch) + syncedEpoch := helpers.SlotToEpoch(r.chain.HeadSlot()) + if !r.initialSync.Syncing() && syncedEpoch < currentEpoch-1 { + _, highestEpoch, _ := r.p2p.Peers().BestFinalized(params.BeaconConfig().MaxPeersToSync, syncedEpoch) if helpers.StartSlot(highestEpoch) > r.chain.HeadSlot() { + log.WithFields(logrus.Fields{ + "currentEpoch": currentEpoch, + "syncedEpoch": syncedEpoch, + "peersEpoch": highestEpoch, + }).Info("Fallen behind peers; reverting to initial sync to catch up") numberOfTimesResyncedCounter.Inc() r.clearPendingSlots() if err := r.initialSync.Resync(); err != nil { diff --git a/beacon-chain/sync/service.go b/beacon-chain/sync/service.go index bfbe2a432..00913cd82 100644 --- a/beacon-chain/sync/service.go +++ b/beacon-chain/sync/service.go @@ -88,6 +88,7 @@ func (r *Service) Start() { r.p2p.AddDisconnectionHandler(r.removeDisconnectedPeerStatus) r.processPendingBlocksQueue() r.maintainPeerStatuses() + r.resyncIfBehind() } // Stop the regular sync service.