Mitigate issues from low RLIMIT_NPROC defaults. (fixes #337)

Signed-off-by: Jason Volk <jason@zemos.net>
2026-02-26 00:39:50 +00:00
parent 081394c313
commit 9e09162817
4 changed files with 45 additions and 8 deletions
--- a/src/core/utils/sys/limits.rs
+++ b/src/core/utils/sys/limits.rs
@@ -28,6 +28,28 @@ pub fn maximize_fd_limit() -> Result {
 #[cfg(not(unix))]
 pub fn maximize_fd_limit() -> Result { Ok(()) }

+#[cfg(unix)]
+/// Some distributions ship with very low defaults for thread counts; similar to
+/// low default file descriptor limits. But unlike fd's, thread limit is rarely
+/// reached, though on large systems (32+ cores) shipping with defaults of
+/// ~1024 as have been observed are problematic.
+pub fn maximize_thread_limit() -> Result {
+	use nix::sys::resource::setrlimit;
+
+	let (soft_limit, hard_limit) = max_threads()?;
+	if soft_limit < hard_limit {
+		let new_limit = hard_limit.try_into()?;
+		setrlimit(Resource::RLIMIT_NPROC, new_limit, new_limit)?;
+		assert_eq!((hard_limit, hard_limit), max_threads()?, "getrlimit != setrlimit");
+		debug!(to = hard_limit, from = soft_limit, "Raised RLIMIT_NPROC");
+	}
+
+	Ok(())
+}
+
+#[cfg(not(unix))]
+pub fn maximize_thread_limit() -> Result { Ok(()) }
+
 #[cfg(unix)]
 pub fn max_file_descriptors() -> Result<(usize, usize)> {
 	getrlimit(Resource::RLIMIT_NOFILE)
--- a/src/database/pool/configure.rs
+++ b/src/database/pool/configure.rs
@@ -1,7 +1,7 @@
 use std::{path::PathBuf, sync::Arc};

 use tuwunel_core::{
-	Server, debug,
+	Server, at, debug,
 	debug::INFO_SPAN_LEVEL,
 	debug_info, debug_warn, expected, info, is_equal_to,
 	utils::{
@@ -12,7 +12,7 @@ use tuwunel_core::{
 		stream::{AMPLIFICATION_LIMIT, WIDTH_LIMIT},
 		sys::{
 			compute::{available_parallelism, cores_available, is_core_available},
-			storage,
+			max_threads, storage,
 		},
 	},
 };
@@ -116,6 +116,13 @@ pub(super) fn configure(server: &Arc<Server>) -> (Vec<usize>, Vec<usize>, Vec<us
 		})
 		.collect();

+	// Query getrlimit(2) to impose any additional restriction, divide to leave room
+	// for other threads in the process.
+	let max_threads = max_threads()
+		.map(at!(0))
+		.unwrap_or(usize::MAX)
+		.saturating_div(3);
+
 	// Determine an ideal max worker count based on true capacity. As stated prior
 	// the true value is rarely attainable in any thread-worker model, and clamped.
 	let max_workers = devices
@@ -126,6 +133,7 @@ pub(super) fn configure(server: &Arc<Server>) -> (Vec<usize>, Vec<usize>, Vec<us
 		.chain(default_worker_count.into_iter())
 		.fold(0_usize, usize::saturating_add)
 		.min(config.db_pool_max_workers)
+		.clamp(WORKER_LIMIT.0, max_threads)
 		.clamp(WORKER_LIMIT.0, WORKER_LIMIT.1);

 	// Tamper for the total number of workers by reducing the count for each group.
--- a/src/main/runtime.rs
+++ b/src/main/runtime.rs
@@ -16,7 +16,7 @@ use tuwunel_core::{
 	Result, debug, is_true,
 	utils::sys::{
 		compute::{nth_core_available, set_affinity},
-		thread_usage, usage,
+		max_threads,
 	},
 };

@@ -50,13 +50,19 @@ pub fn new(args: Option<&Args>) -> Result<Runtime> {
 		.set(args.gc_muzzy)
 		.expect("set GC_MUZZY from program argument");

+	let max_blocking_threads = max_threads()
+		.expect("obtained RLIMIT_NPROC or default")
+		.0
+		.saturating_div(3)
+		.clamp(WORKER_MIN, MAX_BLOCKING_THREADS);
+
 	let mut builder = Builder::new_multi_thread();
 	builder
 		.enable_io()
 		.enable_time()
 		.thread_name(WORKER_NAME)
 		.worker_threads(args.worker_threads.max(WORKER_MIN))
-		.max_blocking_threads(MAX_BLOCKING_THREADS)
+		.max_blocking_threads(max_blocking_threads)
 		.thread_keep_alive(Duration::from_secs(WORKER_KEEPALIVE))
 		.global_queue_interval(args.global_event_interval)
 		.event_interval(args.kernel_event_interval)
@@ -110,7 +116,7 @@ pub fn shutdown(server: &Arc<Server>, runtime: Runtime) -> Result {
 		tuwunel_core::event!(LEVEL, ?runtime_metrics, "Final runtime metrics.");
 	}

-	if let Ok(resource_usage) = usage() {
+	if let Ok(resource_usage) = tuwunel_core::utils::sys::usage() {
 		tuwunel_core::event!(LEVEL, ?resource_usage, "Final resource usage.");
 	}

@@ -208,7 +214,7 @@ fn set_worker_mallctl(_: usize) {}
 )]
 fn thread_stop() {
 	if cfg!(any(tokio_unstable, not(feature = "release_max_log_level")))
-		&& let Ok(resource_usage) = thread_usage()
+		&& let Ok(resource_usage) = tuwunel_core::utils::sys::thread_usage()
 	{
 		tuwunel_core::debug!(?resource_usage, "Thread resource usage.");
 	}
--- a/src/main/server.rs
+++ b/src/main/server.rs
@@ -51,8 +51,9 @@ pub fn new(args: Option<&Args>, runtime: Option<&runtime::Handle>) -> Result<Arc
 	#[cfg(feature = "sentry_telemetry")]
 	let sentry_guard = crate::sentry::init(&config);

-	sys::maximize_fd_limit()
-		.expect("Unable to increase maximum soft and hard file descriptor limit");
+	sys::maximize_fd_limit().expect("Unable to increase maximum file descriptor limit");
+
+	sys::maximize_thread_limit().expect("Unable to increase maximum thread count limit");

 	let (_old_width, _new_width) = stream::set_width(config.stream_width_default);
 	let (_old_amp, _new_amp) = stream::set_amplification(config.stream_amplification);