lighthouse-pulse/beacon_node/execution_layer/src/metrics.rs

84 lines
4.2 KiB
Rust
Raw Normal View History

pub use lighthouse_metrics::*;
pub const HIT: &str = "hit";
pub const MISS: &str = "miss";
pub const GET_PAYLOAD: &str = "get_payload";
pub const GET_BLINDED_PAYLOAD: &str = "get_blinded_payload";
Improve debugging experience for builder proposals (#3725) ## Issue Addressed NA ## Proposed Changes This PR sets out to improve the logging/metrics experience when interacting with the builder. Namely, it: - Adds/changes metrics (see "Metrics Changes" section). - Adds new logs which show the duration of requests to the builder/local EL. - Refactors existing logs for consistency and so that the `parent_hash` is include in all relevant logs (we can grep for this field when trying to trace the flow of block production). Additionally, when I was implementing this PR I noticed that we skip some verification of the builder payload in the scenario where the builder return `Ok` but the local EL returns with `Err`. Namely, we were skipping the bid signature and other values like parent hash and prev randao. In this PR I've changed it so we *always* check these values and reject the bid if they're incorrect. With these changes, we'll sometimes choose to skip a proposal rather than propose something invalid -- that's the only side-effect to the changes that I can see. ## Metrics Changes - Changed: `execution_layer_request_times`: - `method = "get_blinded_payload_local"`: time taken to get a payload from a local EE. - `method = "get_blinded_payload_builder"`: time taken to get a blinded payload from a builder. - `method = "post_blinded_payload_builder"`: time taken to get a builder to reveal a payload they've previously supplied us. - `execution_layer_get_payload_outcome` - `outcome = "success"`: we successfully produced a payload from a builder or local EE. - `outcome = "failure"`: we were unable to get a payload from a builder or local EE. - New: `execution_layer_builder_reveal_payload_outcome` - `outcome = "success"`: a builder revealed a payload from a signed, blinded block. - `outcome = "failure"`: the builder did not reveal the payload. - New: `execution_layer_get_payload_source` - `type = "builder"`: we used a payload from a builder to produce a block. - `type = "local"`: we used a payload from a local EE to produce a block. - New: `execution_layer_get_payload_builder_rejections` has a `reason` field to describe why we rejected a payload from a builder. - New: `execution_layer_payload_bids` tracks the bid (in gwei) from the builder or local EE (local EE not yet supported, waiting on EEs to expose the value). Can only record values that fit inside an i64 (roughly 9 million ETH). ## Additional Info NA
2022-11-29 05:51:42 +00:00
pub const GET_BLINDED_PAYLOAD_LOCAL: &str = "get_blinded_payload_local";
pub const GET_BLINDED_PAYLOAD_BUILDER: &str = "get_blinded_payload_builder";
pub const POST_BLINDED_PAYLOAD_BUILDER: &str = "post_blinded_payload_builder";
pub const NEW_PAYLOAD: &str = "new_payload";
pub const FORKCHOICE_UPDATED: &str = "forkchoice_updated";
pub const GET_TERMINAL_POW_BLOCK_HASH: &str = "get_terminal_pow_block_hash";
pub const IS_VALID_TERMINAL_POW_BLOCK_HASH: &str = "is_valid_terminal_pow_block_hash";
Improve debugging experience for builder proposals (#3725) ## Issue Addressed NA ## Proposed Changes This PR sets out to improve the logging/metrics experience when interacting with the builder. Namely, it: - Adds/changes metrics (see "Metrics Changes" section). - Adds new logs which show the duration of requests to the builder/local EL. - Refactors existing logs for consistency and so that the `parent_hash` is include in all relevant logs (we can grep for this field when trying to trace the flow of block production). Additionally, when I was implementing this PR I noticed that we skip some verification of the builder payload in the scenario where the builder return `Ok` but the local EL returns with `Err`. Namely, we were skipping the bid signature and other values like parent hash and prev randao. In this PR I've changed it so we *always* check these values and reject the bid if they're incorrect. With these changes, we'll sometimes choose to skip a proposal rather than propose something invalid -- that's the only side-effect to the changes that I can see. ## Metrics Changes - Changed: `execution_layer_request_times`: - `method = "get_blinded_payload_local"`: time taken to get a payload from a local EE. - `method = "get_blinded_payload_builder"`: time taken to get a blinded payload from a builder. - `method = "post_blinded_payload_builder"`: time taken to get a builder to reveal a payload they've previously supplied us. - `execution_layer_get_payload_outcome` - `outcome = "success"`: we successfully produced a payload from a builder or local EE. - `outcome = "failure"`: we were unable to get a payload from a builder or local EE. - New: `execution_layer_builder_reveal_payload_outcome` - `outcome = "success"`: a builder revealed a payload from a signed, blinded block. - `outcome = "failure"`: the builder did not reveal the payload. - New: `execution_layer_get_payload_source` - `type = "builder"`: we used a payload from a builder to produce a block. - `type = "local"`: we used a payload from a local EE to produce a block. - New: `execution_layer_get_payload_builder_rejections` has a `reason` field to describe why we rejected a payload from a builder. - New: `execution_layer_payload_bids` tracks the bid (in gwei) from the builder or local EE (local EE not yet supported, waiting on EEs to expose the value). Can only record values that fit inside an i64 (roughly 9 million ETH). ## Additional Info NA
2022-11-29 05:51:42 +00:00
pub const LOCAL: &str = "local";
pub const BUILDER: &str = "builder";
pub const SUCCESS: &str = "success";
pub const FAILURE: &str = "failure";
lazy_static::lazy_static! {
pub static ref EXECUTION_LAYER_PROPOSER_INSERTED: Result<IntCounter> = try_create_int_counter(
"execution_layer_proposer_inserted",
"Count of times a new proposer is known",
);
pub static ref EXECUTION_LAYER_PROPOSER_DATA_UPDATED: Result<IntCounter> = try_create_int_counter(
"execution_layer_proposer_data_updated",
"Count of times new proposer data is supplied",
);
Improve debugging experience for builder proposals (#3725) ## Issue Addressed NA ## Proposed Changes This PR sets out to improve the logging/metrics experience when interacting with the builder. Namely, it: - Adds/changes metrics (see "Metrics Changes" section). - Adds new logs which show the duration of requests to the builder/local EL. - Refactors existing logs for consistency and so that the `parent_hash` is include in all relevant logs (we can grep for this field when trying to trace the flow of block production). Additionally, when I was implementing this PR I noticed that we skip some verification of the builder payload in the scenario where the builder return `Ok` but the local EL returns with `Err`. Namely, we were skipping the bid signature and other values like parent hash and prev randao. In this PR I've changed it so we *always* check these values and reject the bid if they're incorrect. With these changes, we'll sometimes choose to skip a proposal rather than propose something invalid -- that's the only side-effect to the changes that I can see. ## Metrics Changes - Changed: `execution_layer_request_times`: - `method = "get_blinded_payload_local"`: time taken to get a payload from a local EE. - `method = "get_blinded_payload_builder"`: time taken to get a blinded payload from a builder. - `method = "post_blinded_payload_builder"`: time taken to get a builder to reveal a payload they've previously supplied us. - `execution_layer_get_payload_outcome` - `outcome = "success"`: we successfully produced a payload from a builder or local EE. - `outcome = "failure"`: we were unable to get a payload from a builder or local EE. - New: `execution_layer_builder_reveal_payload_outcome` - `outcome = "success"`: a builder revealed a payload from a signed, blinded block. - `outcome = "failure"`: the builder did not reveal the payload. - New: `execution_layer_get_payload_source` - `type = "builder"`: we used a payload from a builder to produce a block. - `type = "local"`: we used a payload from a local EE to produce a block. - New: `execution_layer_get_payload_builder_rejections` has a `reason` field to describe why we rejected a payload from a builder. - New: `execution_layer_payload_bids` tracks the bid (in gwei) from the builder or local EE (local EE not yet supported, waiting on EEs to expose the value). Can only record values that fit inside an i64 (roughly 9 million ETH). ## Additional Info NA
2022-11-29 05:51:42 +00:00
pub static ref EXECUTION_LAYER_REQUEST_TIMES: Result<HistogramVec> =
try_create_histogram_vec_with_buckets(
"execution_layer_request_times",
"Duration of calls to ELs",
Improve debugging experience for builder proposals (#3725) ## Issue Addressed NA ## Proposed Changes This PR sets out to improve the logging/metrics experience when interacting with the builder. Namely, it: - Adds/changes metrics (see "Metrics Changes" section). - Adds new logs which show the duration of requests to the builder/local EL. - Refactors existing logs for consistency and so that the `parent_hash` is include in all relevant logs (we can grep for this field when trying to trace the flow of block production). Additionally, when I was implementing this PR I noticed that we skip some verification of the builder payload in the scenario where the builder return `Ok` but the local EL returns with `Err`. Namely, we were skipping the bid signature and other values like parent hash and prev randao. In this PR I've changed it so we *always* check these values and reject the bid if they're incorrect. With these changes, we'll sometimes choose to skip a proposal rather than propose something invalid -- that's the only side-effect to the changes that I can see. ## Metrics Changes - Changed: `execution_layer_request_times`: - `method = "get_blinded_payload_local"`: time taken to get a payload from a local EE. - `method = "get_blinded_payload_builder"`: time taken to get a blinded payload from a builder. - `method = "post_blinded_payload_builder"`: time taken to get a builder to reveal a payload they've previously supplied us. - `execution_layer_get_payload_outcome` - `outcome = "success"`: we successfully produced a payload from a builder or local EE. - `outcome = "failure"`: we were unable to get a payload from a builder or local EE. - New: `execution_layer_builder_reveal_payload_outcome` - `outcome = "success"`: a builder revealed a payload from a signed, blinded block. - `outcome = "failure"`: the builder did not reveal the payload. - New: `execution_layer_get_payload_source` - `type = "builder"`: we used a payload from a builder to produce a block. - `type = "local"`: we used a payload from a local EE to produce a block. - New: `execution_layer_get_payload_builder_rejections` has a `reason` field to describe why we rejected a payload from a builder. - New: `execution_layer_payload_bids` tracks the bid (in gwei) from the builder or local EE (local EE not yet supported, waiting on EEs to expose the value). Can only record values that fit inside an i64 (roughly 9 million ETH). ## Additional Info NA
2022-11-29 05:51:42 +00:00
decimal_buckets(-2, 1),
&["method"]
);
pub static ref EXECUTION_LAYER_PAYLOAD_ATTRIBUTES_LOOKAHEAD: Result<Histogram> = try_create_histogram(
"execution_layer_payload_attributes_lookahead",
"Duration between an fcU call with PayloadAttributes and when the block should be produced",
);
pub static ref EXECUTION_LAYER_PRE_PREPARED_PAYLOAD_ID: Result<IntCounterVec> = try_create_int_counter_vec(
"execution_layer_pre_prepared_payload_id",
"Indicates hits or misses for already having prepared a payload id before payload production",
&["event"]
);
Separate execution payloads in the DB (#3157) ## Proposed Changes Reduce post-merge disk usage by not storing finalized execution payloads in Lighthouse's database. :warning: **This is achieved in a backwards-incompatible way for networks that have already merged** :warning:. Kiln users and shadow fork enjoyers will be unable to downgrade after running the code from this PR. The upgrade migration may take several minutes to run, and can't be aborted after it begins. The main changes are: - New column in the database called `ExecPayload`, keyed by beacon block root. - The `BeaconBlock` column now stores blinded blocks only. - Lots of places that previously used full blocks now use blinded blocks, e.g. analytics APIs, block replay in the DB, etc. - On finalization: - `prune_abanonded_forks` deletes non-canonical payloads whilst deleting non-canonical blocks. - `migrate_db` deletes finalized canonical payloads whilst deleting finalized states. - Conversions between blinded and full blocks are implemented in a compositional way, duplicating some work from Sean's PR #3134. - The execution layer has a new `get_payload_by_block_hash` method that reconstructs a payload using the EE's `eth_getBlockByHash` call. - I've tested manually that it works on Kiln, using Geth and Nethermind. - This isn't necessarily the most efficient method, and new engine APIs are being discussed to improve this: https://github.com/ethereum/execution-apis/pull/146. - We're depending on the `ethers` master branch, due to lots of recent changes. We're also using a workaround for https://github.com/gakonst/ethers-rs/issues/1134. - Payload reconstruction is used in the HTTP API via `BeaconChain::get_block`, which is now `async`. Due to the `async` fn, the `blocking_json` wrapper has been removed. - Payload reconstruction is used in network RPC to serve blocks-by-{root,range} responses. Here the `async` adjustment is messier, although I think I've managed to come up with a reasonable compromise: the handlers take the `SendOnDrop` by value so that they can drop it on _task completion_ (after the `fn` returns). Still, this is introducing disk reads onto core executor threads, which may have a negative performance impact (thoughts appreciated). ## Additional Info - [x] For performance it would be great to remove the cloning of full blocks when converting them to blinded blocks to write to disk. I'm going to experiment with a `put_block` API that takes the block by value, breaks it into a blinded block and a payload, stores the blinded block, and then re-assembles the full block for the caller. - [x] We should measure the latency of blocks-by-root and blocks-by-range responses. - [x] We should add integration tests that stress the payload reconstruction (basic tests done, issue for more extensive tests: https://github.com/sigp/lighthouse/issues/3159) - [x] We should (manually) test the schema v9 migration from several prior versions, particularly as blocks have changed on disk and some migrations rely on being able to load blocks. Co-authored-by: Paul Hauner <paul@paulhauner.com>
2022-05-12 00:42:17 +00:00
pub static ref EXECUTION_LAYER_GET_PAYLOAD_BY_BLOCK_HASH: Result<Histogram> = try_create_histogram(
"execution_layer_get_payload_by_block_hash_time",
"Time to reconstruct a payload from the EE using eth_getBlockByHash"
);
Verify execution block hashes during finalized sync (#3794) ## Issue Addressed Recent discussions with other client devs about optimistic sync have revealed a conceptual issue with the optimisation implemented in #3738. In designing that feature I failed to consider that the execution node checks the `blockHash` of the execution payload before responding with `SYNCING`, and that omitting this check entirely results in a degradation of the full node's validation. A node omitting the `blockHash` checks could be tricked by a supermajority of validators into following an invalid chain, something which is ordinarily impossible. ## Proposed Changes I've added verification of the `payload.block_hash` in Lighthouse. In case of failure we log a warning and fall back to verifying the payload with the execution client. I've used our existing dependency on `ethers_core` for RLP support, and a new dependency on Parity's `triehash` crate for the Merkle patricia trie. Although the `triehash` crate is currently unmaintained it seems like our best option at the moment (it is also used by Reth, and requires vastly less boilerplate than Parity's generic `trie-root` library). Block hash verification is pretty quick, about 500us per block on my machine (mainnet). The optimistic finalized sync feature can be disabled using `--disable-optimistic-finalized-sync` which forces full verification with the EL. ## Additional Info This PR also introduces a new dependency on our [`metastruct`](https://github.com/sigp/metastruct) library, which was perfectly suited to the RLP serialization method. There will likely be changes as `metastruct` grows, but I think this is a good way to start dogfooding it. I took inspiration from some Parity and Reth code while writing this, and have preserved the relevant license headers on the files containing code that was copied and modified.
2023-01-09 03:11:59 +00:00
pub static ref EXECUTION_LAYER_VERIFY_BLOCK_HASH: Result<Histogram> = try_create_histogram_with_buckets(
"execution_layer_verify_block_hash_time",
"Time to verify the execution block hash in Lighthouse, without the EL",
Ok(vec![10e-6, 50e-6, 100e-6, 500e-6, 1e-3, 5e-3, 10e-3, 50e-3, 100e-3, 500e-3]),
);
pub static ref EXECUTION_LAYER_PAYLOAD_STATUS: Result<IntCounterVec> = try_create_int_counter_vec(
"execution_layer_payload_status",
"Indicates the payload status returned for a particular method",
&["method", "status"]
);
Improve debugging experience for builder proposals (#3725) ## Issue Addressed NA ## Proposed Changes This PR sets out to improve the logging/metrics experience when interacting with the builder. Namely, it: - Adds/changes metrics (see "Metrics Changes" section). - Adds new logs which show the duration of requests to the builder/local EL. - Refactors existing logs for consistency and so that the `parent_hash` is include in all relevant logs (we can grep for this field when trying to trace the flow of block production). Additionally, when I was implementing this PR I noticed that we skip some verification of the builder payload in the scenario where the builder return `Ok` but the local EL returns with `Err`. Namely, we were skipping the bid signature and other values like parent hash and prev randao. In this PR I've changed it so we *always* check these values and reject the bid if they're incorrect. With these changes, we'll sometimes choose to skip a proposal rather than propose something invalid -- that's the only side-effect to the changes that I can see. ## Metrics Changes - Changed: `execution_layer_request_times`: - `method = "get_blinded_payload_local"`: time taken to get a payload from a local EE. - `method = "get_blinded_payload_builder"`: time taken to get a blinded payload from a builder. - `method = "post_blinded_payload_builder"`: time taken to get a builder to reveal a payload they've previously supplied us. - `execution_layer_get_payload_outcome` - `outcome = "success"`: we successfully produced a payload from a builder or local EE. - `outcome = "failure"`: we were unable to get a payload from a builder or local EE. - New: `execution_layer_builder_reveal_payload_outcome` - `outcome = "success"`: a builder revealed a payload from a signed, blinded block. - `outcome = "failure"`: the builder did not reveal the payload. - New: `execution_layer_get_payload_source` - `type = "builder"`: we used a payload from a builder to produce a block. - `type = "local"`: we used a payload from a local EE to produce a block. - New: `execution_layer_get_payload_builder_rejections` has a `reason` field to describe why we rejected a payload from a builder. - New: `execution_layer_payload_bids` tracks the bid (in gwei) from the builder or local EE (local EE not yet supported, waiting on EEs to expose the value). Can only record values that fit inside an i64 (roughly 9 million ETH). ## Additional Info NA
2022-11-29 05:51:42 +00:00
pub static ref EXECUTION_LAYER_GET_PAYLOAD_OUTCOME: Result<IntCounterVec> = try_create_int_counter_vec(
"execution_layer_get_payload_outcome",
"The success/failure outcomes from calling get_payload",
&["outcome"]
);
pub static ref EXECUTION_LAYER_BUILDER_REVEAL_PAYLOAD_OUTCOME: Result<IntCounterVec> = try_create_int_counter_vec(
"execution_layer_builder_reveal_payload_outcome",
"The success/failure outcomes from a builder un-blinding a payload",
&["outcome"]
);
pub static ref EXECUTION_LAYER_GET_PAYLOAD_SOURCE: Result<IntCounterVec> = try_create_int_counter_vec(
"execution_layer_get_payload_source",
"The source of each payload returned from get_payload",
&["source"]
);
pub static ref EXECUTION_LAYER_GET_PAYLOAD_BUILDER_REJECTIONS: Result<IntCounterVec> = try_create_int_counter_vec(
"execution_layer_get_payload_builder_rejections",
"The reasons why a payload from a builder was rejected",
&["reason"]
);
pub static ref EXECUTION_LAYER_PAYLOAD_BIDS: Result<IntGaugeVec> = try_create_int_gauge_vec(
"execution_layer_payload_bids",
"The gwei bid value of payloads received by local EEs or builders. Only shows values up to i64::max_value.",
&["source"]
);
}