From 375e2b49b3696115ac0b6bb6defef365a46374ec Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Fri, 1 Apr 2022 07:16:25 +0000 Subject: [PATCH] Conserve disk space by raising default SPRP (#3137) ## Proposed Changes Increase the default `--slots-per-restore-point` to 8192 for a 4x reduction in freezer DB disk usage. Existing nodes that use the previous default of 2048 will be left unchanged. Newly synced nodes (with or without checkpoint sync) will use the new 8192 default. Long-term we could do away with the freezer DB entirely for validator-only nodes, but this change is much simpler and grants us some extra space in the short term. We can also roll it out gradually across our nodes by purging databases one by one, while keeping the Ansible config the same. ## Additional Info We ignore a change from 2048 to 8192 if the user hasn't set the 8192 explicitly. We fire a debug log in the case where we do ignore: ``` DEBG Ignoring slots-per-restore-point config in favour of on-disk value, on_disk: 2048, config: 8192 ``` --- beacon_node/http_api/src/database.rs | 2 ++ beacon_node/src/config.rs | 17 ++++++++---- beacon_node/store/src/config.rs | 6 ++++- beacon_node/store/src/hot_cold_store.rs | 35 ++++++++++++++++++++++--- book/src/advanced_database.md | 24 ++++++++++++----- book/src/api-lighthouse.md | 6 +++++ common/eth2/src/lighthouse.rs | 3 ++- database_manager/src/lib.rs | 4 ++- lighthouse/tests/beacon_node.rs | 34 ++++++++++++++++++++++++ 9 files changed, 114 insertions(+), 17 deletions(-) diff --git a/beacon_node/http_api/src/database.rs b/beacon_node/http_api/src/database.rs index e91188334..3a7c81ad8 100644 --- a/beacon_node/http_api/src/database.rs +++ b/beacon_node/http_api/src/database.rs @@ -9,10 +9,12 @@ pub fn info( ) -> Result { let store = &chain.store; let split = store.get_split_info(); + let config = store.get_config().clone(); let anchor = store.get_anchor_info(); Ok(DatabaseInfo { schema_version: CURRENT_SCHEMA_VERSION.as_u64(), + config, split, anchor, }) diff --git a/beacon_node/src/config.rs b/beacon_node/src/config.rs index 7f45ad355..461f230d2 100644 --- a/beacon_node/src/config.rs +++ b/beacon_node/src/config.rs @@ -284,7 +284,9 @@ pub fn get_config( client_config.freezer_db_path = Some(PathBuf::from(freezer_dir)); } - client_config.store.slots_per_restore_point = get_slots_per_restore_point::(cli_args)?; + let (sprp, sprp_explicit) = get_slots_per_restore_point::(cli_args)?; + client_config.store.slots_per_restore_point = sprp; + client_config.store.slots_per_restore_point_set_explicitly = sprp_explicit; if let Some(block_cache_size) = cli_args.value_of("block-cache-size") { client_config.store.block_cache_size = block_cache_size @@ -813,15 +815,20 @@ pub fn get_data_dir(cli_args: &ArgMatches) -> PathBuf { } /// Get the `slots_per_restore_point` value to use for the database. -pub fn get_slots_per_restore_point(cli_args: &ArgMatches) -> Result { +/// +/// Return `(sprp, set_explicitly)` where `set_explicitly` is `true` if the user provided the value. +pub fn get_slots_per_restore_point( + cli_args: &ArgMatches, +) -> Result<(u64, bool), String> { if let Some(slots_per_restore_point) = clap_utils::parse_optional(cli_args, "slots-per-restore-point")? { - Ok(slots_per_restore_point) + Ok((slots_per_restore_point, true)) } else { - Ok(std::cmp::min( + let default = std::cmp::min( E::slots_per_historical_root() as u64, store::config::DEFAULT_SLOTS_PER_RESTORE_POINT, - )) + ); + Ok((default, false)) } } diff --git a/beacon_node/store/src/config.rs b/beacon_node/store/src/config.rs index 208776c1e..4268ec2e9 100644 --- a/beacon_node/store/src/config.rs +++ b/beacon_node/store/src/config.rs @@ -4,7 +4,8 @@ use ssz::{Decode, Encode}; use ssz_derive::{Decode, Encode}; use types::{EthSpec, MinimalEthSpec}; -pub const DEFAULT_SLOTS_PER_RESTORE_POINT: u64 = 2048; +pub const PREV_DEFAULT_SLOTS_PER_RESTORE_POINT: u64 = 2048; +pub const DEFAULT_SLOTS_PER_RESTORE_POINT: u64 = 8192; pub const DEFAULT_BLOCK_CACHE_SIZE: usize = 5; /// Database configuration parameters. @@ -12,6 +13,8 @@ pub const DEFAULT_BLOCK_CACHE_SIZE: usize = 5; pub struct StoreConfig { /// Number of slots to wait between storing restore points in the freezer database. pub slots_per_restore_point: u64, + /// Flag indicating whether the `slots_per_restore_point` was set explicitly by the user. + pub slots_per_restore_point_set_explicitly: bool, /// Maximum number of blocks to store in the in-memory block cache. pub block_cache_size: usize, /// Whether to compact the database on initialization. @@ -36,6 +39,7 @@ impl Default for StoreConfig { Self { // Safe default for tests, shouldn't ever be read by a CLI node. slots_per_restore_point: MinimalEthSpec::slots_per_historical_root() as u64, + slots_per_restore_point_set_explicitly: false, block_cache_size: DEFAULT_BLOCK_CACHE_SIZE, compact_on_init: false, compact_on_prune: true, diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index 153226f9a..2c31f7cf2 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -1,7 +1,10 @@ use crate::chunked_vector::{ store_updated_vector, BlockRoots, HistoricalRoots, RandaoMixes, StateRoots, }; -use crate::config::{OnDiskStoreConfig, StoreConfig}; +use crate::config::{ + OnDiskStoreConfig, StoreConfig, DEFAULT_SLOTS_PER_RESTORE_POINT, + PREV_DEFAULT_SLOTS_PER_RESTORE_POINT, +}; use crate::forwards_iter::{HybridForwardsBlockRootsIterator, HybridForwardsStateRootsIterator}; use crate::impls::beacon_state::{get_full_state, store_full_state}; use crate::iter::{ParentRootBlockIterator, StateRootsIterator}; @@ -150,7 +153,7 @@ impl HotColdDB, LevelDB> { ) -> Result, Error> { Self::verify_slots_per_restore_point(config.slots_per_restore_point)?; - let db = Arc::new(HotColdDB { + let mut db = HotColdDB { split: RwLock::new(Split::default()), anchor_info: RwLock::new(None), cold_db: LevelDB::open(cold_path)?, @@ -160,10 +163,31 @@ impl HotColdDB, LevelDB> { spec, log, _phantom: PhantomData, - }); + }; + + // Allow the slots-per-restore-point value to stay at the previous default if the config + // uses the new default. Don't error on a failed read because the config itself may need + // migrating. + if let Ok(Some(disk_config)) = db.load_config() { + if !db.config.slots_per_restore_point_set_explicitly + && disk_config.slots_per_restore_point == PREV_DEFAULT_SLOTS_PER_RESTORE_POINT + && db.config.slots_per_restore_point == DEFAULT_SLOTS_PER_RESTORE_POINT + { + debug!( + db.log, + "Ignoring slots-per-restore-point config in favour of on-disk value"; + "config" => db.config.slots_per_restore_point, + "on_disk" => disk_config.slots_per_restore_point, + ); + + // Mutate the in-memory config so that it's compatible. + db.config.slots_per_restore_point = PREV_DEFAULT_SLOTS_PER_RESTORE_POINT; + } + } // Ensure that the schema version of the on-disk database matches the software. // If the version is mismatched, an automatic migration will be attempted. + let db = Arc::new(db); if let Some(schema_version) = db.load_schema_version()? { debug!( db.log, @@ -1108,6 +1132,11 @@ impl, Cold: ItemStore> HotColdDB .map_or(self.spec.genesis_slot, |anchor| anchor.oldest_block_slot) } + /// Return the in-memory configuration used by the database. + pub fn get_config(&self) -> &StoreConfig { + &self.config + } + /// Load previously-stored config from disk. fn load_config(&self) -> Result, Error> { self.hot_db.get(&CONFIG_KEY) diff --git a/book/src/advanced_database.md b/book/src/advanced_database.md index 02a344c74..178936cf6 100644 --- a/book/src/advanced_database.md +++ b/book/src/advanced_database.md @@ -23,27 +23,39 @@ states to slow down dramatically. A lower _slots per restore point_ value (SPRP) frequent restore points, while a higher SPRP corresponds to less frequent. The table below shows some example values. -| Use Case | SPRP | Yearly Disk Usage | Load Historical State | -| ---------------------- | -------------- | ----------------- | --------------------- | -| Block explorer/analysis | 32 | 1.4 TB | 155 ms | -| Default | 2048 | 23.1 GB | 10.2 s | -| Validator only | 8192 | 5.7 GB | 41 s | +| Use Case | SPRP | Yearly Disk Usage | Load Historical State | +| ---------------------- | -------------- | ----------------- | --------------------- | +| Block explorer/analysis | 32 | 1.4 TB | 155 ms | +| Hobbyist (prev. default) | 2048 | 23.1 GB | 10.2 s | +| Validator only (default) | 8192 | 5.7 GB | 41 s | As you can see, it's a high-stakes trade-off! The relationships to disk usage and historical state load time are both linear – doubling SPRP halves disk usage and doubles load time. The minimum SPRP is 32, and the maximum is 8192. +The default value is 8192 for databases synced from scratch using Lighthouse v2.2.0 or later, or +2048 for prior versions. Please see the section on [Defaults](#defaults) below. + The values shown in the table are approximate, calculated using a simple heuristic: each `BeaconState` consumes around 18MB of disk space, and each block replayed takes around 5ms. The **Yearly Disk Usage** column shows the approx size of the freezer DB _alone_ (hot DB not included), and the **Load Historical State** time is the worst-case load time for a state in the last slot before a restore point. +### Defaults + +As of Lighthouse v2.2.0, the default slots-per-restore-point value has been increased from 2048 +to 8192 in order to conserve disk space. Existing nodes will continue to use SPRP=2048 unless +re-synced. Note that it is currently not possible to change the SPRP without re-syncing, although +fast re-syncing may be achieved with [Checkpoint Sync](./checkpoint-sync.md). + +### CLI Configuration + To configure your Lighthouse node's database with a non-default SPRP, run your Beacon Node with the `--slots-per-restore-point` flag: ```bash -lighthouse beacon_node --slots-per-restore-point 8192 +lighthouse beacon_node --slots-per-restore-point 32 ``` ## Glossary diff --git a/book/src/api-lighthouse.md b/book/src/api-lighthouse.md index ea282cf2b..f5c4542b9 100644 --- a/book/src/api-lighthouse.md +++ b/book/src/api-lighthouse.md @@ -366,6 +366,12 @@ curl "http://localhost:5052/lighthouse/database/info" | jq ```json { "schema_version": 5, + "config": { + "slots_per_restore_point": 2048, + "block_cache_size": 5, + "compact_on_init": false, + "compact_on_prune": true + }, "split": { "slot": "2034912", "state_root": "0x11c8516aa7d4d1613e84121e3a557ceca34618b4c1a38f05b66ad045ff82b33b" diff --git a/common/eth2/src/lighthouse.rs b/common/eth2/src/lighthouse.rs index a2e4a66c4..91e6a5558 100644 --- a/common/eth2/src/lighthouse.rs +++ b/common/eth2/src/lighthouse.rs @@ -14,7 +14,7 @@ use reqwest::IntoUrl; use serde::{Deserialize, Serialize}; use ssz::four_byte_option_impl; use ssz_derive::{Decode, Encode}; -use store::{AnchorInfo, Split}; +use store::{AnchorInfo, Split, StoreConfig}; pub use attestation_performance::{ AttestationPerformance, AttestationPerformanceQuery, AttestationPerformanceStatistics, @@ -334,6 +334,7 @@ impl Eth1Block { #[derive(Debug, Serialize, Deserialize)] pub struct DatabaseInfo { pub schema_version: u64, + pub config: StoreConfig, pub split: Split, pub anchor: Option, } diff --git a/database_manager/src/lib.rs b/database_manager/src/lib.rs index eaf94d532..6717bb0f4 100644 --- a/database_manager/src/lib.rs +++ b/database_manager/src/lib.rs @@ -100,7 +100,9 @@ fn parse_client_config( client_config.freezer_db_path = Some(freezer_dir); } - client_config.store.slots_per_restore_point = get_slots_per_restore_point::(cli_args)?; + let (sprp, sprp_explicit) = get_slots_per_restore_point::(cli_args)?; + client_config.store.slots_per_restore_point = sprp; + client_config.store.slots_per_restore_point_set_explicitly = sprp_explicit; Ok(client_config) } diff --git a/lighthouse/tests/beacon_node.rs b/lighthouse/tests/beacon_node.rs index 4d596ce70..3088fa423 100644 --- a/lighthouse/tests/beacon_node.rs +++ b/lighthouse/tests/beacon_node.rs @@ -804,6 +804,40 @@ fn slots_per_restore_point_flag() { .run_with_zero_port() .with_config(|config| assert_eq!(config.store.slots_per_restore_point, 64)); } +#[test] +fn slots_per_restore_point_update_prev_default() { + use beacon_node::beacon_chain::store::config::{ + DEFAULT_SLOTS_PER_RESTORE_POINT, PREV_DEFAULT_SLOTS_PER_RESTORE_POINT, + }; + + CommandLineTest::new() + .flag("slots-per-restore-point", Some("2048")) + .run_with_zero_port() + .with_config_and_dir(|config, dir| { + // Check that 2048 is the previous default. + assert_eq!( + config.store.slots_per_restore_point, + PREV_DEFAULT_SLOTS_PER_RESTORE_POINT + ); + + // Restart the BN with the same datadir and the new default SPRP. It should + // allow this. + CommandLineTest::new() + .flag("datadir", Some(&dir.path().display().to_string())) + .flag("zero-ports", None) + .run_with_no_datadir() + .with_config(|config| { + // The dumped config will have the new default 8192 value, but the fact that + // the BN started and ran (with the same datadir) means that the override + // was successful. + assert_eq!( + config.store.slots_per_restore_point, + DEFAULT_SLOTS_PER_RESTORE_POINT + ); + }); + }) +} + #[test] fn block_cache_size_flag() { CommandLineTest::new()