From f2a8c6229c47c8542d57e60dd55c3de8539792e9 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Mon, 23 Aug 2021 00:59:14 +0000 Subject: [PATCH] Metrics and DEBG log for late gossip blocks (#2533) ## Issue Addressed Which issue # does this PR address? ## Proposed Changes - Add a counter metric to log when a block is received late from gossip. - Also push a `DEBG` log for the above condition. - Use Debug (`?`) instead of Display (`%`) for a bunch of logs in the beacon processor, so we don't have to deal with concatenated block roots. - Add new ERRO and CRIT to HTTP API to alert users when they're publishing late blocks. ## Additional Info NA --- beacon_node/beacon_chain/src/beacon_chain.rs | 6 ++- beacon_node/http_api/src/lib.rs | 27 +++++++----- beacon_node/http_api/src/metrics.rs | 8 ++++ .../beacon_processor/worker/gossip_methods.rs | 44 ++++++++++++------- beacon_node/network/src/metrics.rs | 4 ++ 5 files changed, 60 insertions(+), 29 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 5d5741226..2103542de 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -2932,6 +2932,7 @@ impl BeaconChain { .beacon_state .previous_epoch() .start_slot(T::EthSpec::slots_per_epoch()); + let head_proposer_index = new_head.beacon_block.message().proposer_index(); // Update the snapshot that stores the head of the chain at the time it received the // block. @@ -2962,9 +2963,10 @@ impl BeaconChain { debug!( self.log, "Delayed head block"; - "delay" => ?block_delay, - "root" => ?beacon_block_root, + "block_root" => ?beacon_block_root, + "proposer_index" => head_proposer_index, "slot" => head_slot, + "block_delay" => ?block_delay, ); } diff --git a/beacon_node/http_api/src/lib.rs b/beacon_node/http_api/src/lib.rs index 32a10ed71..66c87efb8 100644 --- a/beacon_node/http_api/src/lib.rs +++ b/beacon_node/http_api/src/lib.rs @@ -895,7 +895,10 @@ pub fn serve( info!( log, "Valid block from HTTP API"; - "root" => format!("{}", root) + "block_delay" => ?delay, + "root" => format!("{}", root), + "proposer_index" => block.message().proposer_index(), + "slot" => block.slot(), ); // Notify the validator monitor. @@ -917,25 +920,25 @@ pub fn serve( // // Check to see the thresholds are non-zero to avoid logging errors with small // slot times (e.g., during testing) - let crit_threshold = chain.spec.seconds_per_slot / 3; - let warn_threshold = chain.spec.seconds_per_slot / 6; - if crit_threshold > 0 && delay.as_secs() > crit_threshold { + let crit_threshold = chain.slot_clock.unagg_attestation_production_delay(); + let error_threshold = crit_threshold / 2; + if delay >= crit_threshold { crit!( log, "Block was broadcast too late"; - "root" => ?root, - "slot" => block.slot(), - "delay_ms" => delay.as_millis(), "msg" => "system may be overloaded, block likely to be orphaned", + "delay_ms" => delay.as_millis(), + "slot" => block.slot(), + "root" => ?root, ) - } else if warn_threshold > 0 && delay.as_secs() > warn_threshold { - warn!( + } else if delay >= error_threshold { + error!( log, "Block broadcast was delayed"; - "root" => ?root, - "slot" => block.slot(), - "delay_ms" => delay.as_millis(), "msg" => "system may be overloaded, block may be orphaned", + "delay_ms" => delay.as_millis(), + "slot" => block.slot(), + "root" => ?root, ) } diff --git a/beacon_node/http_api/src/metrics.rs b/beacon_node/http_api/src/metrics.rs index 81bfc8f21..1c3ab1f68 100644 --- a/beacon_node/http_api/src/metrics.rs +++ b/beacon_node/http_api/src/metrics.rs @@ -33,4 +33,12 @@ lazy_static::lazy_static! { "http_api_block_broadcast_delay_times", "Time between start of the slot and when the block was broadcast" ); + pub static ref HTTP_API_BLOCK_PUBLISHED_LATE_TOTAL: Result = try_create_int_counter( + "http_api_block_published_late_total", + "The count of times a block was published beyond more than half way to the attestation deadline" + ); + pub static ref HTTP_API_BLOCK_PUBLISHED_VERY_LATE_TOTAL: Result = try_create_int_counter( + "http_api_block_published_very_late_total", + "The count of times a block was published beyond the attestation deadline" + ); } diff --git a/beacon_node/network/src/beacon_processor/worker/gossip_methods.rs b/beacon_node/network/src/beacon_processor/worker/gossip_methods.rs index 68deb9248..989a4a10a 100644 --- a/beacon_node/network/src/beacon_processor/worker/gossip_methods.rs +++ b/beacon_node/network/src/beacon_processor/worker/gossip_methods.rs @@ -291,19 +291,33 @@ impl Worker { reprocess_tx: mpsc::Sender>, seen_duration: Duration, ) { + let block_delay = + get_block_delay_ms(seen_duration, block.message(), &self.chain.slot_clock); // Log metrics to track delay from other nodes on the network. metrics::observe_duration( &metrics::BEACON_BLOCK_GOSSIP_SLOT_START_DELAY_TIME, - get_block_delay_ms(seen_duration, block.message(), &self.chain.slot_clock), + block_delay, ); let verified_block = match self.chain.verify_block_for_gossip(block) { Ok(verified_block) => { + if block_delay >= self.chain.slot_clock.unagg_attestation_production_delay() { + metrics::inc_counter(&metrics::BEACON_BLOCK_GOSSIP_ARRIVED_LATE_TOTAL); + debug!( + self.log, + "Gossip block arrived late"; + "block_root" => ?verified_block.block_root, + "proposer_index" => verified_block.block.message().proposer_index(), + "slot" => verified_block.block.slot(), + "block_delay" => ?block_delay, + ); + } + info!( self.log, "New block received"; "slot" => verified_block.block.slot(), - "hash" => %verified_block.block_root + "hash" => ?verified_block.block_root ); self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Accept); @@ -325,7 +339,7 @@ impl Worker { debug!( self.log, "Unknown parent for gossip block"; - "root" => %block.canonical_root() + "root" => ?block.canonical_root() ); self.send_sync_message(SyncMessage::UnknownBlock(peer_id, block)); return; @@ -392,7 +406,7 @@ impl Worker { self.log, "Block arrived early"; "block_slot" => %block_slot, - "block_root" => %block_root, + "block_root" => ?block_root, "msg" => "if this happens consistently, check system clock" ); @@ -423,7 +437,7 @@ impl Worker { self.log, "Failed to defer block import"; "block_slot" => %block_slot, - "block_root" => %block_root, + "block_root" => ?block_root, "location" => "block gossip" ) } @@ -440,7 +454,7 @@ impl Worker { "Failed to defer block import"; "error" => ?e, "block_slot" => %block_slot, - "block_root" => %block_root, + "block_root" => ?block_root, "location" => "block gossip" ) } @@ -472,7 +486,7 @@ impl Worker { self.log, "Failed to inform block import"; "source" => "gossip", - "block_root" => %block_root, + "block_root" => ?block_root, ) }; @@ -511,7 +525,7 @@ impl Worker { self.log, "Invalid gossip beacon block"; "outcome" => ?other, - "block root" => %block.canonical_root(), + "block root" => ?block.canonical_root(), "block slot" => block.slot() ); self.gossip_penalize_peer(peer_id, PeerAction::MidToleranceError); @@ -843,7 +857,7 @@ impl Worker { self.log, "Attestation is not within the last ATTESTATION_PROPAGATION_SLOT_RANGE slots"; "peer_id" => %peer_id, - "block" => %beacon_block_root, + "block" => ?beacon_block_root, "type" => ?attestation_type, ); @@ -918,7 +932,7 @@ impl Worker { self.log, "Attestation already known"; "peer_id" => %peer_id, - "block" => %beacon_block_root, + "block" => ?beacon_block_root, "type" => ?attestation_type, ); self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore); @@ -935,7 +949,7 @@ impl Worker { self.log, "Aggregator already known"; "peer_id" => %peer_id, - "block" => %beacon_block_root, + "block" => ?beacon_block_root, "type" => ?attestation_type, ); // This is an allowed behaviour. @@ -956,7 +970,7 @@ impl Worker { self.log, "Prior attestation known"; "peer_id" => %peer_id, - "block" => %beacon_block_root, + "block" => ?beacon_block_root, "epoch" => %epoch, "validator_index" => validator_index, "type" => ?attestation_type, @@ -980,7 +994,7 @@ impl Worker { self.log, "Validation Index too high"; "peer_id" => %peer_id, - "block" => %beacon_block_root, + "block" => ?beacon_block_root, "type" => ?attestation_type, ); self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); @@ -991,7 +1005,7 @@ impl Worker { self.log, "Attestation for unknown block"; "peer_id" => %peer_id, - "block" => %beacon_block_root + "block" => ?beacon_block_root ); if let Some(sender) = reprocess_tx { // We don't know the block, get the sync manager to handle the block lookup, and @@ -1200,7 +1214,7 @@ impl Worker { self.log, "Invalid attestation from network"; "reason" => ?error, - "block" => %beacon_block_root, + "block" => ?beacon_block_root, "peer_id" => %peer_id, "type" => ?attestation_type, ); diff --git a/beacon_node/network/src/metrics.rs b/beacon_node/network/src/metrics.rs index 7ffce1254..820497855 100644 --- a/beacon_node/network/src/metrics.rs +++ b/beacon_node/network/src/metrics.rs @@ -465,6 +465,10 @@ lazy_static! { "beacon_block_gossip_slot_start_delay_time", "Duration between when the block is received and the start of the slot it belongs to.", ); + pub static ref BEACON_BLOCK_GOSSIP_ARRIVED_LATE_TOTAL: Result = try_create_int_counter( + "beacon_block_gossip_arrived_late_total", + "Count of times when a gossip block arrived from the network later than the attestation deadline.", + ); /* * Attestation reprocessing queue metrics.