From f1113540d8dc5ec3c39c251b21dc9be3e221c2b2 Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Fri, 15 Dec 2023 14:23:54 +1100 Subject: [PATCH] Fix off-by-one bug in missed block detection (#5012) * Regression test * Fix the bug --- .../beacon_chain/src/validator_monitor.rs | 36 ++++---- .../beacon_chain/tests/validator_monitor.rs | 82 ++++++++++++++++++- 2 files changed, 102 insertions(+), 16 deletions(-) diff --git a/beacon_node/beacon_chain/src/validator_monitor.rs b/beacon_node/beacon_chain/src/validator_monitor.rs index c511a0ff8..8d82a0c06 100644 --- a/beacon_node/beacon_chain/src/validator_monitor.rs +++ b/beacon_node/beacon_chain/src/validator_monitor.rs @@ -442,7 +442,7 @@ impl ValidatorMonitor { } /// Add some validators to `self` for additional monitoring. - fn add_validator_pubkey(&mut self, pubkey: PublicKeyBytes) { + pub fn add_validator_pubkey(&mut self, pubkey: PublicKeyBytes) { let index_opt = self .indices .iter() @@ -602,8 +602,10 @@ impl ValidatorMonitor { let end_slot = current_slot.saturating_sub(MISSED_BLOCK_LAG_SLOTS).as_u64(); - // List of proposers per epoch from the beacon_proposer_cache - let mut proposers_per_epoch: Option> = None; + // List of proposers per epoch from the beacon_proposer_cache, and the epoch at which the + // cache is valid. + let mut proposers_per_epoch: Option<(SmallVec<[usize; TYPICAL_SLOTS_PER_EPOCH]>, Epoch)> = + None; for (prev_slot, slot) in (start_slot.as_u64()..=end_slot) .map(Slot::new) @@ -617,25 +619,30 @@ impl ValidatorMonitor { // Found missed block if block_root == prev_block_root { let slot_epoch = slot.epoch(T::slots_per_epoch()); - let prev_slot_epoch = prev_slot.epoch(T::slots_per_epoch()); if let Ok(shuffling_decision_block) = state.proposer_shuffling_decision_root_at_epoch(slot_epoch, *block_root) { - // Only update the cache if it needs to be initialised or because - // slot is at epoch + 1 - if proposers_per_epoch.is_none() || slot_epoch != prev_slot_epoch { - proposers_per_epoch = self.get_proposers_by_epoch_from_cache( - slot_epoch, - shuffling_decision_block, - ); + // Update the cache if it has not yet been initialised, or if it is + // initialised for a prior epoch. This is an optimisation to avoid bouncing + // the proposer shuffling cache lock when there are lots of missed blocks. + if proposers_per_epoch + .as_ref() + .map_or(true, |(_, cached_epoch)| *cached_epoch != slot_epoch) + { + proposers_per_epoch = self + .get_proposers_by_epoch_from_cache( + slot_epoch, + shuffling_decision_block, + ) + .map(|cache| (cache, slot_epoch)); } // Only add missed blocks for the proposer if it's in the list of monitored validators let slot_in_epoch = slot % T::slots_per_epoch(); if let Some(proposer_index) = proposers_per_epoch - .as_deref() - .and_then(|proposers| proposers.get(slot_in_epoch.as_usize())) + .as_ref() + .and_then(|(proposers, _)| proposers.get(slot_in_epoch.as_usize())) { let i = *proposer_index as u64; if let Some(pub_key) = self.indices.get(&i) { @@ -674,7 +681,8 @@ impl ValidatorMonitor { debug!( self.log, "Could not get proposers from cache"; - "epoch" => ?slot_epoch + "epoch" => ?slot_epoch, + "decision_root" => ?shuffling_decision_block, ); } } diff --git a/beacon_node/beacon_chain/tests/validator_monitor.rs b/beacon_node/beacon_chain/tests/validator_monitor.rs index 6fe074068..d9ff57b1b 100644 --- a/beacon_node/beacon_chain/tests/validator_monitor.rs +++ b/beacon_node/beacon_chain/tests/validator_monitor.rs @@ -1,9 +1,9 @@ -use lazy_static::lazy_static; - use beacon_chain::test_utils::{ AttestationStrategy, BeaconChainHarness, BlockStrategy, EphemeralHarnessType, }; use beacon_chain::validator_monitor::{ValidatorMonitorConfig, MISSED_BLOCK_LAG_SLOTS}; +use lazy_static::lazy_static; +use logging::test_logger; use types::{Epoch, EthSpec, Keypair, MainnetEthSpec, PublicKeyBytes, Slot}; // Should ideally be divisible by 3. @@ -23,6 +23,7 @@ fn get_harness( let harness = BeaconChainHarness::builder(MainnetEthSpec) .default_spec() .keypairs(KEYPAIRS[0..validator_count].to_vec()) + .logger(test_logger()) .fresh_ephemeral_store() .mock_execution_layer() .validator_monitor_config(ValidatorMonitorConfig { @@ -39,6 +40,83 @@ fn get_harness( harness } +// Regression test for off-by-one caching issue in missed block detection. +#[tokio::test] +async fn missed_blocks_across_epochs() { + let slots_per_epoch = E::slots_per_epoch(); + let all_validators = (0..VALIDATOR_COUNT).collect::>(); + + let harness = get_harness(VALIDATOR_COUNT, vec![]); + let validator_monitor = &harness.chain.validator_monitor; + let mut genesis_state = harness.get_current_state(); + let genesis_state_root = genesis_state.update_tree_hash_cache().unwrap(); + let genesis_block_root = harness.head_block_root(); + + // Skip a slot in the first epoch (to prime the cache inside the missed block function) and then + // at a different offset in the 2nd epoch. The missed block in the 2nd epoch MUST NOT reuse + // the cache from the first epoch. + let first_skip_offset = 3; + let second_skip_offset = slots_per_epoch / 2; + assert_ne!(first_skip_offset, second_skip_offset); + let first_skip_slot = Slot::new(first_skip_offset); + let second_skip_slot = Slot::new(slots_per_epoch + second_skip_offset); + let slots = (1..2 * slots_per_epoch) + .map(Slot::new) + .filter(|slot| *slot != first_skip_slot && *slot != second_skip_slot) + .collect::>(); + + let (block_roots_by_slot, state_roots_by_slot, _, head_state) = harness + .add_attested_blocks_at_slots(genesis_state, genesis_state_root, &slots, &all_validators) + .await; + + // Prime the proposer shuffling cache. + let mut proposer_shuffling_cache = harness.chain.beacon_proposer_cache.lock(); + for epoch in [0, 1].into_iter().map(Epoch::new) { + let start_slot = epoch.start_slot(slots_per_epoch) + 1; + let state = harness + .get_hot_state(state_roots_by_slot[&start_slot]) + .unwrap(); + let decision_root = state + .proposer_shuffling_decision_root(genesis_block_root) + .unwrap(); + proposer_shuffling_cache + .insert( + epoch, + decision_root, + state + .get_beacon_proposer_indices(&harness.chain.spec) + .unwrap(), + state.fork(), + ) + .unwrap(); + } + drop(proposer_shuffling_cache); + + // Monitor the validator that proposed the block at the same offset in the 0th epoch as the skip + // in the 1st epoch. + let innocent_proposer_slot = Slot::new(second_skip_offset); + let innocent_proposer = harness + .get_block(block_roots_by_slot[&innocent_proposer_slot]) + .unwrap() + .message() + .proposer_index(); + + let mut vm_write = validator_monitor.write(); + + // Call `process_` once to update validator indices. + vm_write.process_valid_state(head_state.current_epoch(), &head_state, &harness.chain.spec); + // Start monitoring the innocent validator. + vm_write.add_validator_pubkey(KEYPAIRS[innocent_proposer as usize].pk.compress()); + // Check for missed blocks. + vm_write.process_valid_state(head_state.current_epoch(), &head_state, &harness.chain.spec); + + // My client is innocent, your honour! + assert_eq!( + vm_write.get_monitored_validator_missed_block_count(innocent_proposer), + 0 + ); +} + #[tokio::test] async fn produces_missed_blocks() { let validator_count = 16;