mirror of
https://gitlab.com/pulsechaincom/lighthouse-pulse.git
synced 2024-12-24 20:47:17 +00:00
Don't return errors on HTTP API for already-known messages (#3341)
## Issue Addressed - Resolves #3266 ## Proposed Changes Return 200 OK rather than an error when a block, attestation or sync message is already known. Presently, we will log return an error which causes a BN to go "offline" from the VCs perspective which causes the fallback mechanism to do work to try and avoid and upcheck offline nodes. This can be observed as instability in the `vc_beacon_nodes_available_count` metric. The current behaviour also causes scary logs for the user. There's nothing to *actually* be concerned about when we see duplicate messages, this can happen on fallback systems (see code comments). ## Additional Info NA
This commit is contained in:
parent
052d5cf31f
commit
2de26b20f8
@ -1168,12 +1168,46 @@ pub fn serve<T: BeaconChainTypes>(
|
|||||||
blocking_json_task(move || {
|
blocking_json_task(move || {
|
||||||
let seen_timestamp = timestamp_now();
|
let seen_timestamp = timestamp_now();
|
||||||
let mut failures = Vec::new();
|
let mut failures = Vec::new();
|
||||||
|
let mut num_already_known = 0;
|
||||||
|
|
||||||
for (index, attestation) in attestations.as_slice().iter().enumerate() {
|
for (index, attestation) in attestations.as_slice().iter().enumerate() {
|
||||||
let attestation = match chain
|
let attestation = match chain
|
||||||
.verify_unaggregated_attestation_for_gossip(attestation, None)
|
.verify_unaggregated_attestation_for_gossip(attestation, None)
|
||||||
{
|
{
|
||||||
Ok(attestation) => attestation,
|
Ok(attestation) => attestation,
|
||||||
|
Err(AttnError::PriorAttestationKnown { .. }) => {
|
||||||
|
num_already_known += 1;
|
||||||
|
|
||||||
|
// Skip to the next attestation since an attestation for this
|
||||||
|
// validator is already known in this epoch.
|
||||||
|
//
|
||||||
|
// There's little value for the network in validating a second
|
||||||
|
// attestation for another validator since it is either:
|
||||||
|
//
|
||||||
|
// 1. A duplicate.
|
||||||
|
// 2. Slashable.
|
||||||
|
// 3. Invalid.
|
||||||
|
//
|
||||||
|
// We are likely to get duplicates in the case where a VC is using
|
||||||
|
// fallback BNs. If the first BN actually publishes some/all of a
|
||||||
|
// batch of attestations but fails to respond in a timely fashion,
|
||||||
|
// the VC is likely to try publishing the attestations on another
|
||||||
|
// BN. That second BN may have already seen the attestations from
|
||||||
|
// the first BN and therefore indicate that the attestations are
|
||||||
|
// "already seen". An attestation that has already been seen has
|
||||||
|
// been published on the network so there's no actual error from
|
||||||
|
// the perspective of the user.
|
||||||
|
//
|
||||||
|
// It's better to prevent slashable attestations from ever
|
||||||
|
// appearing on the network than trying to slash validators,
|
||||||
|
// especially those validators connected to the local API.
|
||||||
|
//
|
||||||
|
// There might be *some* value in determining that this attestation
|
||||||
|
// is invalid, but since a valid attestation already it exists it
|
||||||
|
// appears that this validator is capable of producing valid
|
||||||
|
// attestations and there's no immediate cause for concern.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!(log,
|
error!(log,
|
||||||
"Failure verifying attestation for gossip";
|
"Failure verifying attestation for gossip";
|
||||||
@ -1240,6 +1274,15 @@ pub fn serve<T: BeaconChainTypes>(
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if num_already_known > 0 {
|
||||||
|
debug!(
|
||||||
|
log,
|
||||||
|
"Some unagg attestations already known";
|
||||||
|
"count" => num_already_known
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
if failures.is_empty() {
|
if failures.is_empty() {
|
||||||
Ok(())
|
Ok(())
|
||||||
} else {
|
} else {
|
||||||
@ -2234,6 +2277,16 @@ pub fn serve<T: BeaconChainTypes>(
|
|||||||
// identical aggregates, especially if they're using the same beacon
|
// identical aggregates, especially if they're using the same beacon
|
||||||
// node.
|
// node.
|
||||||
Err(AttnError::AttestationAlreadyKnown(_)) => continue,
|
Err(AttnError::AttestationAlreadyKnown(_)) => continue,
|
||||||
|
// If we've already seen this aggregator produce an aggregate, just
|
||||||
|
// skip this one.
|
||||||
|
//
|
||||||
|
// We're likely to see this with VCs that use fallback BNs. The first
|
||||||
|
// BN might time-out *after* publishing the aggregate and then the
|
||||||
|
// second BN will indicate it's already seen the aggregate.
|
||||||
|
//
|
||||||
|
// There's no actual error for the user or the network since the
|
||||||
|
// aggregate has been successfully published by some other node.
|
||||||
|
Err(AttnError::AggregatorAlreadyKnown(_)) => continue,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!(log,
|
error!(log,
|
||||||
"Failure verifying aggregate and proofs";
|
"Failure verifying aggregate and proofs";
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
use crate::metrics;
|
use crate::metrics;
|
||||||
use beacon_chain::validator_monitor::{get_block_delay_ms, timestamp_now};
|
use beacon_chain::validator_monitor::{get_block_delay_ms, timestamp_now};
|
||||||
use beacon_chain::{BeaconChain, BeaconChainTypes, CountUnrealized};
|
use beacon_chain::{BeaconChain, BeaconChainTypes, BlockError, CountUnrealized};
|
||||||
use lighthouse_network::PubsubMessage;
|
use lighthouse_network::PubsubMessage;
|
||||||
use network::NetworkMessage;
|
use network::NetworkMessage;
|
||||||
use slog::{crit, error, info, Logger};
|
use slog::{crit, error, info, warn, Logger};
|
||||||
use slot_clock::SlotClock;
|
use slot_clock::SlotClock;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tokio::sync::mpsc::UnboundedSender;
|
use tokio::sync::mpsc::UnboundedSender;
|
||||||
@ -86,6 +86,27 @@ pub async fn publish_block<T: BeaconChainTypes>(
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
Err(BlockError::BlockIsAlreadyKnown) => {
|
||||||
|
info!(
|
||||||
|
log,
|
||||||
|
"Block from HTTP API already known";
|
||||||
|
"block" => ?block.canonical_root(),
|
||||||
|
"slot" => block.slot(),
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
Err(BlockError::RepeatProposal { proposer, slot }) => {
|
||||||
|
warn!(
|
||||||
|
log,
|
||||||
|
"Block ignored due to repeat proposal";
|
||||||
|
"msg" => "this can happen when a VC uses fallback BNs. \
|
||||||
|
whilst this is not necessarily an error, it can indicate issues with a BN \
|
||||||
|
or between the VC and BN.",
|
||||||
|
"slot" => slot,
|
||||||
|
"proposer" => proposer,
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
let msg = format!("{:?}", e);
|
let msg = format!("{:?}", e);
|
||||||
error!(
|
error!(
|
||||||
|
@ -11,7 +11,7 @@ use beacon_chain::{
|
|||||||
use eth2::types::{self as api_types};
|
use eth2::types::{self as api_types};
|
||||||
use lighthouse_network::PubsubMessage;
|
use lighthouse_network::PubsubMessage;
|
||||||
use network::NetworkMessage;
|
use network::NetworkMessage;
|
||||||
use slog::{error, warn, Logger};
|
use slog::{debug, error, warn, Logger};
|
||||||
use slot_clock::SlotClock;
|
use slot_clock::SlotClock;
|
||||||
use std::cmp::max;
|
use std::cmp::max;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
@ -189,6 +189,24 @@ pub fn process_sync_committee_signatures<T: BeaconChainTypes>(
|
|||||||
|
|
||||||
verified_for_pool = Some(verified);
|
verified_for_pool = Some(verified);
|
||||||
}
|
}
|
||||||
|
// If this validator has already published a sync message, just ignore this message
|
||||||
|
// without returning an error.
|
||||||
|
//
|
||||||
|
// This is likely to happen when a VC uses fallback BNs. If the first BN publishes
|
||||||
|
// the message and then fails to respond in a timely fashion then the VC will move
|
||||||
|
// to the second BN. The BN will then report that this message has already been
|
||||||
|
// seen, which is not actually an error as far as the network or user are concerned.
|
||||||
|
Err(SyncVerificationError::PriorSyncCommitteeMessageKnown {
|
||||||
|
validator_index,
|
||||||
|
slot,
|
||||||
|
}) => {
|
||||||
|
debug!(
|
||||||
|
log,
|
||||||
|
"Ignoring already-known sync message";
|
||||||
|
"slot" => slot,
|
||||||
|
"validator_index" => validator_index,
|
||||||
|
);
|
||||||
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!(
|
error!(
|
||||||
log,
|
log,
|
||||||
@ -283,6 +301,16 @@ pub fn process_signed_contribution_and_proofs<T: BeaconChainTypes>(
|
|||||||
// If we already know the contribution, don't broadcast it or attempt to
|
// If we already know the contribution, don't broadcast it or attempt to
|
||||||
// further verify it. Return success.
|
// further verify it. Return success.
|
||||||
Err(SyncVerificationError::SyncContributionAlreadyKnown(_)) => continue,
|
Err(SyncVerificationError::SyncContributionAlreadyKnown(_)) => continue,
|
||||||
|
// If we've already seen this aggregator produce an aggregate, just
|
||||||
|
// skip this one.
|
||||||
|
//
|
||||||
|
// We're likely to see this with VCs that use fallback BNs. The first
|
||||||
|
// BN might time-out *after* publishing the aggregate and then the
|
||||||
|
// second BN will indicate it's already seen the aggregate.
|
||||||
|
//
|
||||||
|
// There's no actual error for the user or the network since the
|
||||||
|
// aggregate has been successfully published by some other node.
|
||||||
|
Err(SyncVerificationError::AggregatorAlreadyKnown(_)) => continue,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!(
|
error!(
|
||||||
log,
|
log,
|
||||||
|
Loading…
Reference in New Issue
Block a user