Document why we hash downloaded blocks for both sync algs (#2927)

## Proposed Changes Initially the idea was to remove hashing of blocks in backfill sync. After considering it more, we conclude that we need to do it in both (forward and backfill) anyway. But since we forgot why we were doing it in the first place, this PR documents this logic. Future us should find it useful Co-authored-by: Divma <26765164+divagant-martian@users.noreply.github.com>
2024-12-25 21:17:17 +00:00 · 2022-01-26 23:14:22 +00:00 · 2022-01-26 23:14:22 +00:00 · 9964f5afe5
commit 9964f5afe5
parent 150931950d
3 changed files with 47 additions and 10 deletions
--- a/beacon_node/network/src/sync/backfill_sync/mod.rs
+++ b/beacon_node/network/src/sync/backfill_sync/mod.rs
@ -54,6 +54,13 @@ impl BatchConfig for BackFillBatchConfig {
    fn max_batch_processing_attempts() -> u8 {
        MAX_BATCH_PROCESSING_ATTEMPTS
    }
+    fn batch_attempt_hash<T: EthSpec>(blocks: &[SignedBeaconBlock<T>]) -> u64 {
+        use std::collections::hash_map::DefaultHasher;
+        use std::hash::{Hash, Hasher};
+        let mut hasher = DefaultHasher::new();
+        blocks.hash(&mut hasher);
+        hasher.finish()
+    }
 }

 /// Return type when attempting to start the backfill sync process.
@ -119,7 +126,7 @@ pub struct BackFillSync<T: BeaconChainTypes> {
    /// Batches validated by this chain.
    validated_batches: u64,

-    /// We keep track of peer that are participating in the backfill sync. Unlike RangeSync,
+    /// We keep track of peers that are participating in the backfill sync. Unlike RangeSync,
    /// BackFillSync uses all synced peers to download the chain from. If BackFillSync fails, we don't
    /// want to penalize all our synced peers, so we use this variable to keep track of peers that
    /// have participated and only penalize these peers if backfill sync fails.
@ -539,7 +546,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
                "error" => %e, "batch" => self.processing_target);
            // This is unlikely to happen but it would stall syncing since the batch now has no
            // blocks to continue, and the chain is expecting a processing result that won't
-            // arrive.  To mitigate this, (fake) fail this processing so that the batch is
+            // arrive. To mitigate this, (fake) fail this processing so that the batch is
            // re-downloaded.
            self.on_batch_process_result(
                network,
@ -795,7 +802,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
                    for attempt in batch.attempts() {
                        // The validated batch has been re-processed
                        if attempt.hash != processed_attempt.hash {
-                            // The re-downloaded version was different
+                            // The re-downloaded version was different.
                            if processed_attempt.peer_id != attempt.peer_id {
                                // A different peer sent the correct batch, the previous peer did not
                                // We negatively score the original peer.
--- a/beacon_node/network/src/sync/manager.rs
+++ b/beacon_node/network/src/sync/manager.rs
@ -131,7 +131,7 @@ pub enum SyncRequestType {
    RangeSync(Epoch, ChainId),
 }

-/// The result of processing a multiple blocks (a chain segment).
+/// The result of processing multiple blocks (a chain segment).
 #[derive(Debug)]
 pub enum BatchProcessResult {
    /// The batch was completed successfully. It carries whether the sent batch contained blocks.
--- a/beacon_node/network/src/sync/range_sync/batch.rs
+++ b/beacon_node/network/src/sync/range_sync/batch.rs
@ -19,6 +19,34 @@ pub trait BatchConfig {
    fn max_batch_download_attempts() -> u8;
    /// The max batch processing attempts.
    fn max_batch_processing_attempts() -> u8;
+    /// Hashing function of a batch's attempt. Used for scoring purposes.
+    ///
+    /// When a batch fails processing, it is possible that the batch is wrong (faulty or
+    /// incomplete) or that a previous one is wrong. For this reason we need to re-download and
+    /// re-process the batches awaiting validation and the current one. Consider this scenario:
+    ///
+    /// ```ignore
+    /// BatchA BatchB BatchC BatchD
+    /// -----X Empty  Empty  Y-----
+    /// ```
+    ///
+    /// BatchA declares that it refers X, but BatchD declares that it's first block is Y. There is no
+    /// way to know if BatchD is faulty/incomplete or if batches B and/or C are missing blocks. It is
+    /// also possible that BatchA belongs to a different chain to the rest starting in some block
+    /// midway in the batch's range. For this reason, the four batches would need to be re-downloaded
+    /// and re-processed.
+    ///
+    /// If batchD was actually good, it will still register two processing attempts for the same set of
+    /// blocks. In this case, we don't want to penalize the peer that provided the first version, since
+    /// it's equal to the successfully processed one.
+    ///
+    /// The function `batch_attempt_hash` provides a way to compare two batch attempts without
+    /// storing the full set of blocks.
+    ///
+    /// Note that simpler hashing functions considered in the past (hash of first block, hash of last
+    /// block, number of received blocks) are not good enough to differentiate attempts. For this
+    /// reason, we hash the complete set of blocks both in RangeSync and BackFillSync.
+    fn batch_attempt_hash<T: EthSpec>(blocks: &[SignedBeaconBlock<T>]) -> u64;
 }

 pub struct RangeSyncBatchConfig {}
@ -30,6 +58,11 @@ impl BatchConfig for RangeSyncBatchConfig {
    fn max_batch_processing_attempts() -> u8 {
        MAX_BATCH_PROCESSING_ATTEMPTS
    }
+    fn batch_attempt_hash<T: EthSpec>(blocks: &[SignedBeaconBlock<T>]) -> u64 {
+        let mut hasher = std::collections::hash_map::DefaultHasher::new();
+        blocks.hash(&mut hasher);
+        hasher.finish()
+    }
 }

 /// Error type of a batch in a wrong state.
@ -300,7 +333,7 @@ impl<T: EthSpec, B: BatchConfig> BatchInfo<T, B> {
    pub fn start_processing(&mut self) -> Result<Vec<SignedBeaconBlock<T>>, WrongState> {
        match self.state.poison() {
            BatchState::AwaitingProcessing(peer, blocks) => {
-                self.state = BatchState::Processing(Attempt::new(peer, &blocks));
+                self.state = BatchState::Processing(Attempt::new::<B, T>(peer, &blocks));
                Ok(blocks)
            }
            BatchState::Poisoned => unreachable!("Poisoned batch"),
@ -386,11 +419,8 @@ pub struct Attempt {
 }

 impl Attempt {
-    #[allow(clippy::ptr_arg)]
-    fn new<T: EthSpec>(peer_id: PeerId, blocks: &Vec<SignedBeaconBlock<T>>) -> Self {
-        let mut hasher = std::collections::hash_map::DefaultHasher::new();
-        blocks.hash(&mut hasher);
-        let hash = hasher.finish();
+    fn new<B: BatchConfig, T: EthSpec>(peer_id: PeerId, blocks: &[SignedBeaconBlock<T>]) -> Self {
+        let hash = B::batch_attempt_hash(blocks);
        Attempt { peer_id, hash }
    }
 }