diff --git a/Cargo.lock b/Cargo.lock index 14bbe131..9412ce53 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5079,6 +5079,7 @@ dependencies = [ "criterion", "ctor", "futures", + "itertools 0.14.0", "log", "minicbor", "minicbor-serde", diff --git a/src/core/config/mod.rs b/src/core/config/mod.rs index 85a06213..0bb2da4e 100644 --- a/src/core/config/mod.rs +++ b/src/core/config/mod.rs @@ -1288,19 +1288,20 @@ pub struct Config { #[serde(default = "default_rocksdb_stats_level")] pub rocksdb_stats_level: u8, - /// Erases data no longer reachable in the current schema. The developers - /// expect this to be set to true which simplifies the schema and prevents - /// accumulation of old schemas remaining in the codebase forever. If this - /// is set to false, old columns which are not described in the current - /// schema will be ignored rather than erased, leaking their space. + /// Ignores the list of dropped columns set by developers. /// - /// This can be set to false when moving between versions in ways which are - /// not recommended or otherwise forbidden, or for diagnostic and - /// development purposes; requiring preservation across such movements. + /// This should be set to true when knowingly moving between versions in + /// ways which are not recommended or otherwise forbidden, or for + /// diagnostic and development purposes; requiring preservation across such + /// movements. /// - /// default: true - #[serde(default = "true_fn")] - pub rocksdb_drop_missing_columns: bool, + /// The developer's list of dropped columns is meant to safely reduce space + /// by erasing data no longer in use. If this is set to true that storage + /// will not be reclaimed as intended. + /// + /// default: false + #[serde(default)] + pub rocksdb_never_drop_columns: bool, /// This is a password that can be configured that will let you login to the /// server bot account (currently `@conduit`) for emergency troubleshooting diff --git a/src/database/Cargo.toml b/src/database/Cargo.toml index 89c63ca9..f1bd0bfa 100644 --- a/src/database/Cargo.toml +++ b/src/database/Cargo.toml @@ -57,6 +57,7 @@ async-channel.workspace = true const-str.workspace = true ctor.workspace = true futures.workspace = true +itertools.workspace = true log.workspace = true minicbor.workspace = true minicbor-serde.workspace = true diff --git a/src/database/engine/open.rs b/src/database/engine/open.rs index 5d63dcc8..315ec636 100644 --- a/src/database/engine/open.rs +++ b/src/database/engine/open.rs @@ -4,15 +4,12 @@ use std::{ sync::{Arc, atomic::AtomicU32}, }; +use itertools::Itertools; use rocksdb::{ColumnFamilyDescriptor, Options}; -use tuwunel_core::{Result, debug, implement, info, warn}; +use tuwunel_core::{Result, debug, debug_warn, implement, info, warn}; use super::{ - Db, Engine, - cf_opts::cf_options, - context, - db_opts::db_options, - descriptor::{self, Descriptor}, + Db, Engine, cf_opts::cf_options, context, db_opts::db_options, descriptor::Descriptor, repair::repair, }; use crate::{Context, or_else}; @@ -31,7 +28,7 @@ pub(crate) async fn open(ctx: Arc, desc: &[Descriptor]) -> Result, desc: &[Descriptor]) -> Result, db_opts: &Options, desc: &[Descriptor], -) -> Result> { +) -> Result<(Vec, Vec)> { let server = &ctx.server; let config = &server.config; let path = &config.database_path; let existing = Self::discover_cfs(path, db_opts); - let creating = desc - .iter() - .filter(|desc| !existing.contains(desc.name)); - + // Found columns which are not described. let missing = existing .iter() .filter(|&name| name != "default") .filter(|&name| !desc.iter().any(|desc| desc.name == name)); + // Described columns which are not found. + let creating = desc + .iter() + .filter(|desc| !desc.dropped) + .filter(|desc| !existing.contains(desc.name)); + + // Found columns which are described as dropped. + let dropping = desc + .iter() + .filter(|desc| desc.dropped) + .filter(|desc| existing.contains(desc.name)) + .filter(|_| !config.rocksdb_never_drop_columns); + + // Described dropped columns which are no longer found. + let dropped = desc + .iter() + .filter(|desc| desc.dropped) + .filter(|desc| !existing.contains(desc.name)); + debug!( existing = existing.len(), described = desc.len(), missing = missing.clone().count(), + dropped = dropped.clone().count(), creating = creating.clone().count(), + dropping = dropping.clone().count(), "Discovered database columns" ); missing.clone().for_each(|name| { - debug!("Found unrecognized column {name:?} in existing database."); + debug_warn!("Found undescribed column {name:?} in existing database."); + }); + + dropped.map(|desc| desc.name).for_each(|name| { + debug!("Previously dropped column {name:?} no longer found in database."); }); creating.map(|desc| desc.name).for_each(|name| { debug!("Creating new column {name:?} not previously found in existing database."); }); - let missing_descriptors = missing + dropping .clone() - .filter(|_| config.rocksdb_drop_missing_columns) - .map(|_| descriptor::DROPPED); + .map(|desc| desc.name) + .for_each(|name| { + warn!( + "Column {name:?} has been scheduled for deletion. Storage may not appear \ + reclaimed until further restart or compaction." + ); + }); - let cfopts: Vec<_> = desc - .iter() - .copied() - .chain(missing_descriptors) - .map(|ref desc| cf_options(ctx, db_opts.clone(), desc)) - .collect::>()?; + let dropping_names: Vec<_> = dropping + .clone() + .map(|desc| desc.name) + .map(ToOwned::to_owned) + .collect(); let cfds: Vec<_> = desc .iter() - .map(|desc| desc.name) - .map(ToOwned::to_owned) - .chain(missing.cloned()) - .zip(cfopts.into_iter()) - .map(|(name, opts)| ColumnFamilyDescriptor::new(name, opts)) - .collect(); + .filter(|desc| !desc.dropped) + .chain(dropping) + .copied() + .inspect(|desc| debug!(name = desc.name, "Described column")) + .map(|desc| Ok((desc.name.to_owned(), cf_options(ctx, db_opts.clone(), &desc)?))) + .map_ok(|(name, opts)| ColumnFamilyDescriptor::new(name, opts)) + .collect::>()?; - Ok(cfds) + Ok((cfds, dropping_names)) } #[implement(Engine)] diff --git a/src/database/maps.rs b/src/database/maps.rs index d0ef6c22..2d77526d 100644 --- a/src/database/maps.rs +++ b/src/database/maps.rs @@ -16,6 +16,7 @@ pub(super) fn open(engine: &Arc) -> Result { open_list(engine, MAP #[tracing::instrument(name = "maps", level = "debug", skip_all)] pub(super) fn open_list(engine: &Arc, maps: &[Descriptor]) -> Result { maps.iter() + .filter(|desc| !desc.dropped) .map(|desc| Ok((desc.name, Map::open(engine, desc.name)?))) .collect() } @@ -165,6 +166,10 @@ pub(super) static MAPS: &[Descriptor] = &[ name: "roomid_joinedcount", ..descriptor::RANDOM_SMALL }, + Descriptor { + name: "roomid_maxremotepowerlevel", + ..descriptor::RANDOM_SMALL + }, Descriptor { name: "roomid_pduleaves", ..descriptor::RANDOM_SMALL @@ -447,8 +452,4 @@ pub(super) static MAPS: &[Descriptor] = &[ name: "userroomid_notificationcount", ..descriptor::RANDOM }, - Descriptor { - name: "roomid_maxremotepowerlevel", - ..descriptor::RANDOM_SMALL - }, ]; diff --git a/src/database/mod.rs b/src/database/mod.rs index a09b1afa..434b8435 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -1,4 +1,4 @@ -#![type_length_limit = "8192"] +#![type_length_limit = "65536"] extern crate rust_rocksdb as rocksdb; diff --git a/tuwunel-example.toml b/tuwunel-example.toml index ead806d9..7e323e99 100644 --- a/tuwunel-example.toml +++ b/tuwunel-example.toml @@ -1091,17 +1091,18 @@ # #rocksdb_stats_level = 1 -# Erases data no longer reachable in the current schema. The developers -# expect this to be set to true which simplifies the schema and prevents -# accumulation of old schemas remaining in the codebase forever. If this -# is set to false, old columns which are not described in the current -# schema will be ignored rather than erased, leaking their space. +# Ignores the list of dropped columns set by developers. # -# This can be set to false when moving between versions in ways which are -# not recommended or otherwise forbidden, or for diagnostic and -# development purposes; requiring preservation across such movements. +# This should be set to true when knowingly moving between versions in +# ways which are not recommended or otherwise forbidden, or for +# diagnostic and development purposes; requiring preservation across such +# movements. # -#rocksdb_drop_missing_columns = true +# The developer's list of dropped columns is meant to safely reduce space +# by erasing data no longer in use. If this is set to true that storage +# will not be reclaimed as intended. +# +#rocksdb_never_drop_columns = false # This is a password that can be configured that will let you login to the # server bot account (currently `@conduit`) for emergency troubleshooting