Mitigate issues from low RLIMIT_NPROC defaults. (fixes #337)

Signed-off-by: Jason Volk <jason@zemos.net>
This commit is contained in:
Jason Volk
2026-02-26 00:39:50 +00:00
parent 081394c313
commit 9e09162817
4 changed files with 45 additions and 8 deletions

View File

@@ -28,6 +28,28 @@ pub fn maximize_fd_limit() -> Result {
#[cfg(not(unix))]
pub fn maximize_fd_limit() -> Result { Ok(()) }
#[cfg(unix)]
/// Some distributions ship with very low defaults for thread counts; similar to
/// low default file descriptor limits. But unlike fd's, thread limit is rarely
/// reached, though on large systems (32+ cores) shipping with defaults of
/// ~1024 as have been observed are problematic.
pub fn maximize_thread_limit() -> Result {
use nix::sys::resource::setrlimit;
let (soft_limit, hard_limit) = max_threads()?;
if soft_limit < hard_limit {
let new_limit = hard_limit.try_into()?;
setrlimit(Resource::RLIMIT_NPROC, new_limit, new_limit)?;
assert_eq!((hard_limit, hard_limit), max_threads()?, "getrlimit != setrlimit");
debug!(to = hard_limit, from = soft_limit, "Raised RLIMIT_NPROC");
}
Ok(())
}
#[cfg(not(unix))]
pub fn maximize_thread_limit() -> Result { Ok(()) }
#[cfg(unix)]
pub fn max_file_descriptors() -> Result<(usize, usize)> {
getrlimit(Resource::RLIMIT_NOFILE)

View File

@@ -1,7 +1,7 @@
use std::{path::PathBuf, sync::Arc};
use tuwunel_core::{
Server, debug,
Server, at, debug,
debug::INFO_SPAN_LEVEL,
debug_info, debug_warn, expected, info, is_equal_to,
utils::{
@@ -12,7 +12,7 @@ use tuwunel_core::{
stream::{AMPLIFICATION_LIMIT, WIDTH_LIMIT},
sys::{
compute::{available_parallelism, cores_available, is_core_available},
storage,
max_threads, storage,
},
},
};
@@ -116,6 +116,13 @@ pub(super) fn configure(server: &Arc<Server>) -> (Vec<usize>, Vec<usize>, Vec<us
})
.collect();
// Query getrlimit(2) to impose any additional restriction, divide to leave room
// for other threads in the process.
let max_threads = max_threads()
.map(at!(0))
.unwrap_or(usize::MAX)
.saturating_div(3);
// Determine an ideal max worker count based on true capacity. As stated prior
// the true value is rarely attainable in any thread-worker model, and clamped.
let max_workers = devices
@@ -126,6 +133,7 @@ pub(super) fn configure(server: &Arc<Server>) -> (Vec<usize>, Vec<usize>, Vec<us
.chain(default_worker_count.into_iter())
.fold(0_usize, usize::saturating_add)
.min(config.db_pool_max_workers)
.clamp(WORKER_LIMIT.0, max_threads)
.clamp(WORKER_LIMIT.0, WORKER_LIMIT.1);
// Tamper for the total number of workers by reducing the count for each group.

View File

@@ -16,7 +16,7 @@ use tuwunel_core::{
Result, debug, is_true,
utils::sys::{
compute::{nth_core_available, set_affinity},
thread_usage, usage,
max_threads,
},
};
@@ -50,13 +50,19 @@ pub fn new(args: Option<&Args>) -> Result<Runtime> {
.set(args.gc_muzzy)
.expect("set GC_MUZZY from program argument");
let max_blocking_threads = max_threads()
.expect("obtained RLIMIT_NPROC or default")
.0
.saturating_div(3)
.clamp(WORKER_MIN, MAX_BLOCKING_THREADS);
let mut builder = Builder::new_multi_thread();
builder
.enable_io()
.enable_time()
.thread_name(WORKER_NAME)
.worker_threads(args.worker_threads.max(WORKER_MIN))
.max_blocking_threads(MAX_BLOCKING_THREADS)
.max_blocking_threads(max_blocking_threads)
.thread_keep_alive(Duration::from_secs(WORKER_KEEPALIVE))
.global_queue_interval(args.global_event_interval)
.event_interval(args.kernel_event_interval)
@@ -110,7 +116,7 @@ pub fn shutdown(server: &Arc<Server>, runtime: Runtime) -> Result {
tuwunel_core::event!(LEVEL, ?runtime_metrics, "Final runtime metrics.");
}
if let Ok(resource_usage) = usage() {
if let Ok(resource_usage) = tuwunel_core::utils::sys::usage() {
tuwunel_core::event!(LEVEL, ?resource_usage, "Final resource usage.");
}
@@ -208,7 +214,7 @@ fn set_worker_mallctl(_: usize) {}
)]
fn thread_stop() {
if cfg!(any(tokio_unstable, not(feature = "release_max_log_level")))
&& let Ok(resource_usage) = thread_usage()
&& let Ok(resource_usage) = tuwunel_core::utils::sys::thread_usage()
{
tuwunel_core::debug!(?resource_usage, "Thread resource usage.");
}

View File

@@ -51,8 +51,9 @@ pub fn new(args: Option<&Args>, runtime: Option<&runtime::Handle>) -> Result<Arc
#[cfg(feature = "sentry_telemetry")]
let sentry_guard = crate::sentry::init(&config);
sys::maximize_fd_limit()
.expect("Unable to increase maximum soft and hard file descriptor limit");
sys::maximize_fd_limit().expect("Unable to increase maximum file descriptor limit");
sys::maximize_thread_limit().expect("Unable to increase maximum thread count limit");
let (_old_width, _new_width) = stream::set_width(config.stream_width_default);
let (_old_amp, _new_amp) = stream::set_amplification(config.stream_amplification);