Improve config options for missing and dropping db columns.

Implement actual drop functionality.

Signed-off-by: Jason Volk <jason@zemos.net>
This commit is contained in:
Jason Volk
2025-11-02 21:33:12 +00:00
parent abded2d442
commit 7320d0a40b
7 changed files with 90 additions and 54 deletions

1
Cargo.lock generated
View File

@@ -5079,6 +5079,7 @@ dependencies = [
"criterion",
"ctor",
"futures",
"itertools 0.14.0",
"log",
"minicbor",
"minicbor-serde",

View File

@@ -1288,19 +1288,20 @@ pub struct Config {
#[serde(default = "default_rocksdb_stats_level")]
pub rocksdb_stats_level: u8,
/// Erases data no longer reachable in the current schema. The developers
/// expect this to be set to true which simplifies the schema and prevents
/// accumulation of old schemas remaining in the codebase forever. If this
/// is set to false, old columns which are not described in the current
/// schema will be ignored rather than erased, leaking their space.
/// Ignores the list of dropped columns set by developers.
///
/// This can be set to false when moving between versions in ways which are
/// not recommended or otherwise forbidden, or for diagnostic and
/// development purposes; requiring preservation across such movements.
/// This should be set to true when knowingly moving between versions in
/// ways which are not recommended or otherwise forbidden, or for
/// diagnostic and development purposes; requiring preservation across such
/// movements.
///
/// default: true
#[serde(default = "true_fn")]
pub rocksdb_drop_missing_columns: bool,
/// The developer's list of dropped columns is meant to safely reduce space
/// by erasing data no longer in use. If this is set to true that storage
/// will not be reclaimed as intended.
///
/// default: false
#[serde(default)]
pub rocksdb_never_drop_columns: bool,
/// This is a password that can be configured that will let you login to the
/// server bot account (currently `@conduit`) for emergency troubleshooting

View File

@@ -57,6 +57,7 @@ async-channel.workspace = true
const-str.workspace = true
ctor.workspace = true
futures.workspace = true
itertools.workspace = true
log.workspace = true
minicbor.workspace = true
minicbor-serde.workspace = true

View File

@@ -4,15 +4,12 @@ use std::{
sync::{Arc, atomic::AtomicU32},
};
use itertools::Itertools;
use rocksdb::{ColumnFamilyDescriptor, Options};
use tuwunel_core::{Result, debug, implement, info, warn};
use tuwunel_core::{Result, debug, debug_warn, implement, info, warn};
use super::{
Db, Engine,
cf_opts::cf_options,
context,
db_opts::db_options,
descriptor::{self, Descriptor},
Db, Engine, cf_opts::cf_options, context, db_opts::db_options, descriptor::Descriptor,
repair::repair,
};
use crate::{Context, or_else};
@@ -31,7 +28,7 @@ pub(crate) async fn open(ctx: Arc<Context>, desc: &[Descriptor]) -> Result<Arc<S
&ctx.row_cache.lock().expect("row cache locked"),
)?;
let cfds = Self::configure_cfds(&ctx, &db_opts, desc)?;
let (cfds, dropped) = Self::configure_cfds(&ctx, &db_opts, desc)?;
let num_cfds = cfds.len();
debug!("Configured {num_cfds} column descriptors...");
@@ -50,6 +47,13 @@ pub(crate) async fn open(ctx: Arc<Context>, desc: &[Descriptor]) -> Result<Arc<S
}
.or_else(or_else)?;
if !config.rocksdb_read_only && !config.rocksdb_secondary {
for name in &dropped {
debug!("Deleting dropped column {name:?} ...");
db.drop_cf(name).or_else(or_else)?;
}
}
info!(
columns = num_cfds,
sequence = %db.latest_sequence_number(),
@@ -74,59 +78,86 @@ fn configure_cfds(
ctx: &Arc<Context>,
db_opts: &Options,
desc: &[Descriptor],
) -> Result<Vec<ColumnFamilyDescriptor>> {
) -> Result<(Vec<ColumnFamilyDescriptor>, Vec<String>)> {
let server = &ctx.server;
let config = &server.config;
let path = &config.database_path;
let existing = Self::discover_cfs(path, db_opts);
let creating = desc
.iter()
.filter(|desc| !existing.contains(desc.name));
// Found columns which are not described.
let missing = existing
.iter()
.filter(|&name| name != "default")
.filter(|&name| !desc.iter().any(|desc| desc.name == name));
// Described columns which are not found.
let creating = desc
.iter()
.filter(|desc| !desc.dropped)
.filter(|desc| !existing.contains(desc.name));
// Found columns which are described as dropped.
let dropping = desc
.iter()
.filter(|desc| desc.dropped)
.filter(|desc| existing.contains(desc.name))
.filter(|_| !config.rocksdb_never_drop_columns);
// Described dropped columns which are no longer found.
let dropped = desc
.iter()
.filter(|desc| desc.dropped)
.filter(|desc| !existing.contains(desc.name));
debug!(
existing = existing.len(),
described = desc.len(),
missing = missing.clone().count(),
dropped = dropped.clone().count(),
creating = creating.clone().count(),
dropping = dropping.clone().count(),
"Discovered database columns"
);
missing.clone().for_each(|name| {
debug!("Found unrecognized column {name:?} in existing database.");
debug_warn!("Found undescribed column {name:?} in existing database.");
});
dropped.map(|desc| desc.name).for_each(|name| {
debug!("Previously dropped column {name:?} no longer found in database.");
});
creating.map(|desc| desc.name).for_each(|name| {
debug!("Creating new column {name:?} not previously found in existing database.");
});
let missing_descriptors = missing
dropping
.clone()
.filter(|_| config.rocksdb_drop_missing_columns)
.map(|_| descriptor::DROPPED);
.map(|desc| desc.name)
.for_each(|name| {
warn!(
"Column {name:?} has been scheduled for deletion. Storage may not appear \
reclaimed until further restart or compaction."
);
});
let cfopts: Vec<_> = desc
.iter()
.copied()
.chain(missing_descriptors)
.map(|ref desc| cf_options(ctx, db_opts.clone(), desc))
.collect::<Result<_>>()?;
let dropping_names: Vec<_> = dropping
.clone()
.map(|desc| desc.name)
.map(ToOwned::to_owned)
.collect();
let cfds: Vec<_> = desc
.iter()
.map(|desc| desc.name)
.map(ToOwned::to_owned)
.chain(missing.cloned())
.zip(cfopts.into_iter())
.map(|(name, opts)| ColumnFamilyDescriptor::new(name, opts))
.collect();
.filter(|desc| !desc.dropped)
.chain(dropping)
.copied()
.inspect(|desc| debug!(name = desc.name, "Described column"))
.map(|desc| Ok((desc.name.to_owned(), cf_options(ctx, db_opts.clone(), &desc)?)))
.map_ok(|(name, opts)| ColumnFamilyDescriptor::new(name, opts))
.collect::<Result<_>>()?;
Ok(cfds)
Ok((cfds, dropping_names))
}
#[implement(Engine)]

View File

@@ -16,6 +16,7 @@ pub(super) fn open(engine: &Arc<Engine>) -> Result<Maps> { open_list(engine, MAP
#[tracing::instrument(name = "maps", level = "debug", skip_all)]
pub(super) fn open_list(engine: &Arc<Engine>, maps: &[Descriptor]) -> Result<Maps> {
maps.iter()
.filter(|desc| !desc.dropped)
.map(|desc| Ok((desc.name, Map::open(engine, desc.name)?)))
.collect()
}
@@ -165,6 +166,10 @@ pub(super) static MAPS: &[Descriptor] = &[
name: "roomid_joinedcount",
..descriptor::RANDOM_SMALL
},
Descriptor {
name: "roomid_maxremotepowerlevel",
..descriptor::RANDOM_SMALL
},
Descriptor {
name: "roomid_pduleaves",
..descriptor::RANDOM_SMALL
@@ -447,8 +452,4 @@ pub(super) static MAPS: &[Descriptor] = &[
name: "userroomid_notificationcount",
..descriptor::RANDOM
},
Descriptor {
name: "roomid_maxremotepowerlevel",
..descriptor::RANDOM_SMALL
},
];

View File

@@ -1,4 +1,4 @@
#![type_length_limit = "8192"]
#![type_length_limit = "65536"]
extern crate rust_rocksdb as rocksdb;

View File

@@ -1091,17 +1091,18 @@
#
#rocksdb_stats_level = 1
# Erases data no longer reachable in the current schema. The developers
# expect this to be set to true which simplifies the schema and prevents
# accumulation of old schemas remaining in the codebase forever. If this
# is set to false, old columns which are not described in the current
# schema will be ignored rather than erased, leaking their space.
# Ignores the list of dropped columns set by developers.
#
# This can be set to false when moving between versions in ways which are
# not recommended or otherwise forbidden, or for diagnostic and
# development purposes; requiring preservation across such movements.
# This should be set to true when knowingly moving between versions in
# ways which are not recommended or otherwise forbidden, or for
# diagnostic and development purposes; requiring preservation across such
# movements.
#
#rocksdb_drop_missing_columns = true
# The developer's list of dropped columns is meant to safely reduce space
# by erasing data no longer in use. If this is set to true that storage
# will not be reclaimed as intended.
#
#rocksdb_never_drop_columns = false
# This is a password that can be configured that will let you login to the
# server bot account (currently `@conduit`) for emergency troubleshooting