diff --git a/.claude/rust-guidelines.txt b/.claude/rust-guidelines.txt new file mode 100644 index 0000000..31c512e --- /dev/null +++ b/.claude/rust-guidelines.txt @@ -0,0 +1,2436 @@ + + +# Pragmatic Rust Guidelines + +This file contains all guidelines concatenated for easy reference. + +--- + + +# AI Guidelines + + + +## Design with AI use in Mind (M-DESIGN-FOR-AI) { #M-DESIGN-FOR-AI } + +To maximize the utility you get from letting agents work in your code base. +0.1 + +As a general rule, making APIs easier to use for humans also makes them easier to use by AI. +If you follow the guidelines in this book, you should be in good shape. + +Rust's strong type system is a boon for agents, as their lack of genuine understanding can often be +counterbalanced by comprehensive compiler checks, which Rust provides in abundance. + +With that said, there are a few guidelines which are particularly important to help make AI coding in Rust more effective: + +* **Create Idiomatic Rust API Patterns**. The more your APIs, whether public or internal, look and feel like the majority of +Rust code in the world, the better it is for AI. Follow the [Rust API Guidelines](https://rust-lang.github.io/api-guidelines/checklist.html) +along with the guidelines from [Library / UX](../libs/ux). + +* **Provide Thorough Docs**. Agents love good detailed docs. Include docs for all of your modules and public items in your crate. +Assume the reader has a solid, but not expert, level of understanding of Rust, and that the reader understands the standard library. +Follow +[C-CRATE-DOC](https://rust-lang.github.io/api-guidelines/checklist.html#c-crate-doc), +[C-FAILURE](https://rust-lang.github.io/api-guidelines/checklist.html#c-failure), +[C-LINK](https://rust-lang.github.io/api-guidelines/checklist.html#c-link), and +[M-MODULE-DOCS](../docs/#M-MODULE-DOCS) +[M-CANONICAL-DOCS](../docs/#M-CANONICAL-DOCS). + +* **Provide Thorough Examples**. Your documentation should have directly usable examples, the repository should include more elaborate ones. +Follow +[C-EXAMPLE](https://rust-lang.github.io/api-guidelines/checklist.html#c-example) +[C-QUESTION-MARK](https://rust-lang.github.io/api-guidelines/checklist.html#c-question-mark). + +* **Use Strong Types**. Avoid [primitive obsession](https://refactoring.guru/smells/primitive-obsession) by using strong types with strict well-documented semantics. +Follow +[C-NEWTYPE](https://rust-lang.github.io/api-guidelines/checklist.html#c-newtype). + +* **Make Your APIs Testable**. Design APIs which allow your customers to test their use of your API in unit tests. This might involve introducing some mocks, fakes, +or cargo features. AI agents need to be able to iterate quickly to prove that the code they are writing that calls your API is working +correctly. + +* **Ensure Test Coverage**. Your own code should have good test coverage over observable behavior. +This enables agents to work in a mostly hands-off mode when refactoring. + + +--- + + +# Application Guidelines + + + +## Applications may use Anyhow or Derivatives (M-APP-ERROR) { #M-APP-ERROR } + +To simplify application-level error handling. +0.1 + +> Note, this guideline is primarily a relaxation and clarification of [M-ERRORS-CANONICAL-STRUCTS]. + +Applications, and crates in your own repository exclusively used from your application, may use [anyhow](https://github.com/dtolnay/anyhow), +[eyre](https://github.com/eyre-rs/eyre) or similar application-level error crates instead of implementing their own types. + +For example, in your application crates you may just re-export and use eyre's common `Result` type, which should be able to automatically +handle all third party library errors, in particular the ones following +[M-ERRORS-CANONICAL-STRUCTS]. + +```rust,ignore +use eyre::Result; + +fn start_application() -> Result<()> { + start_server()?; + Ok(()) +} +``` + +Once you selected your application error crate you should switch all application-level errors to that type, and you should not mix multiple +application-level error types. + +Libraries (crates used by more than one crate) should always follow [M-ERRORS-CANONICAL-STRUCTS] instead. + +[M-ERRORS-CANONICAL-STRUCTS]: ../libs/ux/#M-ERRORS-CANONICAL-STRUCTS + + + +## Use Mimalloc for Apps (M-MIMALLOC-APPS) { #M-MIMALLOC-APPS } + +To get significant performance for free. +0.1 + +Applications should set [mimalloc](https://crates.io/crates/mimalloc) as their global allocator. This usually results in notable performance +increases along allocating hot paths; we have seen up to 25% benchmark improvements. + +Changing the allocator only takes a few lines of code. Add mimalloc to your `Cargo.toml` like so: + +```toml +[dependencies] +mimalloc = { version = "0.1" } # Or later version if available +``` + +Then use it from your `main.rs`: + +```rust,ignore +use mimalloc::MiMalloc; + +#[global_allocator] +static GLOBAL: MiMalloc = MiMalloc; +``` + + +--- + + +# Documentation + + + +## Documentation Has Canonical Sections (M-CANONICAL-DOCS) { #M-CANONICAL-DOCS } + +To follow established and expected Rust best practices. +1.0 + +Public library items must contain the canonical doc sections. The summary sentence must always be present. Extended documentation and examples +are strongly encouraged. The other sections must be present when applicable. + +```rust +/// Summary sentence < 15 words. +/// +/// Extended documentation in free form. +/// +/// # Examples +/// One or more examples that show API usage like so. +/// +/// # Errors +/// If fn returns `Result`, list known error conditions +/// +/// # Panics +/// If fn may panic, list when this may happen +/// +/// # Safety +/// If fn is `unsafe` or may otherwise cause UB, this section must list +/// all conditions a caller must uphold. +/// +/// # Abort +/// If fn may abort the process, list when this may happen. +pub fn foo() {} +``` + +In contrast to other languages, you should not create a table of parameters. Instead parameter use is explained in plain text. In other words, do not + +```rust,ignore +/// Copies a file. +/// +/// # Parameters +/// - src: The source. +/// - dst: The destination. +fn copy(src: File, dst: File) {} +``` + +but instead: + +```rust,ignore +/// Copies a file from `src` to `dst`. +fn copy(src: File, dst: File) {} +``` + +### Related Reading + +- Function docs include error, panic, and safety considerations ([C-FAILURE](https://rust-lang.github.io/api-guidelines/documentation.html#c-failure)) + + + +## Mark `pub use` Items with `#[doc(inline)]` (M-DOC-INLINE) { #M-DOC-INLINE } + +To make re-exported items 'fit in' with their non re-exported siblings. +1.0 + +When publicly re-exporting crate items via `pub use foo::Foo` or `pub use foo::*`, they show up in an opaque re-export block. In most cases, this is not +helpful to the reader: + +![TEXT](M-DOC-INLINE_BAD.png) + +Instead, you should annotate them with `#[doc(inline)]` at the `use` site, for them to be inlined organically: + +```rust,edition2021,ignore +# pub(crate) mod foo { pub struct Foo; } +#[doc(inline)] +pub use foo::*; + +// or + +#[doc(inline)] +pub use foo::Foo; +``` + +![TEXT](M-DOC-INLINE_GOOD.png) + +This does not apply to `std` or 3rd party types; these should always be re-exported without inlining to make it clear they are external. + +> ### Still avoid glob exports +> +> The `#[doc(inline)]` trick above does not change [M-NO-GLOB-REEXPORTS]; you generally should not re-export items via wildcards. + +[M-NO-GLOB-REEXPORTS]: ../libs/resilience/#M-NO-GLOB-REEXPORTS + + + +## First Sentence is One Line; Approx. 15 Words (M-FIRST-DOC-SENTENCE) { #M-FIRST-DOC-SENTENCE } + +To make API docs easily skimmable. +1.0 + +When you document your item, the first sentence becomes the "summary sentence" that is extracted and shown in the module summary: + +```rust +/// This is the summary sentence, shown in the module summary. +/// +/// This is other documentation. It is only shown in that item's detail view. +/// Sentences here can be as long as you like and it won't cause any issues. +fn some_item() { } +``` + +Since Rust API documentation is rendered with a fixed max width, there is a naturally preferred sentence length you should not +exceed to keep things tidy on most screens. + +If you keep things in a line, your docs will become easily skimmable. Compare, for example, the standard library: + +![TEXT](M-FIRST-DOC-SENTENCE_GOOD.png) + +Otherwise, you might end up with _widows_ and a generally unpleasant reading flow: + +![TEXT](M-FIRST-DOC-SENTENCE_BAD.png) + +As a rule of thumb, the first sentence should not exceed **15 words**. + + + +## Has Comprehensive Module Documentation (M-MODULE-DOCS) { #M-MODULE-DOCS } + +To allow for better API docs navigation. +1.1 + +Any public library module must have `//!` module documentation, and the first sentence must follow [M-DOC-FIRST-SENTENCE]. + +```rust,edition2021,ignore +pub mod ffi { + //! Contains FFI abstractions. + + pub struct String {}; +} +``` + +The rest of the module documentation should be comprehensive, i.e., cover the most relevant technical aspects of the contained items, including + +- what the module contains +- when it should be used, possibly when not +- examples +- subsystem specifications (e.g., `std::fmt` [also describes its formatting language](https://doc.rust-lang.org/stable/std/fmt/index.html#formatting-parameters)) +- observable side effects, including what guarantees are made about these, if any +- relevant implementation details, e.g., the used system APIs + + Great examples include: + +- [`std::fmt`](https://doc.rust-lang.org/stable/std/fmt/index.html) +- [`std::pin`](https://doc.rust-lang.org/stable/std/pin/index.html) +- [`std::option`](https://doc.rust-lang.org/stable/std/option/index.html) + +This does not mean every module should contain all of these items. But if there is something to say about the interaction of the contained types, +their module documentation is the right place. + +[M-DOC-FIRST-SENTENCE]: ./#M-DOC-FIRST-SENTENCE + + +--- + + +# FFI Guidelines + + + +## Isolate DLL State Between FFI Libraries (M-ISOLATE-DLL-STATE) { #M-ISOLATE-DLL-STATE } + +To prevent data corruption and undefined behavior. +0.1 + +When loading multiple Rust-based dynamic libraries (DLLs) within one application, you may only share 'portable' state between these libraries. +Likewise, when authoring such libraries, you must only accept or provide 'portable' data from foreign DLLs. + +Portable here means data that is safe and consistent to process regardless of its origin. By definition, this is a subset of FFI-safe types. +A type is portable if it is `#[repr(C)]` (or similarly well-defined), and _all_ of the following: + +- It must not have any interaction with any `static` or thread local. +- It must not have any interaction with any `TypeId`. +- It must not contain any value, pointer or reference to any non-portable data (it is valid to point into portable data within non-portable data, such as + sharing a reference to an ASCII string held in a `Box`). + +_Interaction_ means any computational relationship, and therefore also relates to how the type is used. Sending a `u128` between DLLs is OK, using it to +exchange a transmuted `TypeId` isn't. + +The underlying issue stems from the Rust compiler treating each DLL as an entirely new compilation artifact, akin to a standalone application. This means each DLL: + +- has its own set of `static` and thread-local variables, +- the type layout of any `#[repr(Rust)]` type (the default) can differ between compilations, +- has its own set of unique type IDs, differing from any other DLL. + +Notably, this affects: + +- ⚠️ any allocated instance, e.g., `String`, `Vec`, `Box`, ... +- ⚠️ any library relying on other statics, e.g., `tokio`, `log`, +- ⚠️ any struct not `#[repr(C)]`, +- ⚠️ any data structure relying on consistent `TypeId`. + +In practice, transferring any of the above between libraries leads to data loss, state corruption, and usually undefined behavior. + +Take particular note that this may also apply to types and methods that are invisible at the FFI boundary: + +```rust,ignore +/// A method in DLL1 that wants to use a common service from DLL2 +#[ffi_function] +fn use_common_service(common: &CommonService) { + // This has at least two issues: + // - `CommonService`, or ANY type nested deep within might have + // a different type layout in DLL2, leading to immediate + // undefined behavior (UB) ⚠️ + // - `do_work()` here looks like it will be invoked in DLL2, but + // the code executed will actually come from DLL1. This means that + // `do_work()` invoked here will see a data structure coming from + // DLL2, but will use statics from DLL1 ⚠️ + common.do_work(); +} +``` + + +--- + + +# Library Guidelines + + +--- + + +# Performance Guidelines + + + +## Identify, Profile, Optimize the Hot Path Early (M-HOTPATH) { #M-HOTPATH } + +To end up with high performance code. +0.1 + +You should, early in the development process, identify if your crate is performance or COGS relevant. If it is: + +- identify hot paths and create benchmarks around them, +- regularly run a profiler collecting CPU and allocation insights, +- document or communicate the most performance sensitive areas. + +For benchmarks we recommend [criterion](https://crates.io/crates/criterion) or [divan](https://crates.io/crates/divan). +If possible, benchmarks should not only measure elapsed wall time, but also used CPU time over all threads (this unfortunately +requires manual work and is not supported out of the box by the common benchmark utils). + +Profiling Rust on Windows works out of the box with [Intel VTune](https://www.intel.com/content/www/us/en/developer/tools/oneapi/vtune-profiler.html) +and [Superluminal](https://superluminal.eu/). However, to gain meaningful CPU insights you should enable debug symbols for benchmarks in your `Cargo.toml`: + +```toml +[profile.bench] +debug = 1 +``` + +Documenting the most performance sensitive areas helps other contributors take better decision. This can be as simple as +sharing screenshots of your latest profiling hot spots. + +### Further Reading + +- [Performance Tips](https://cheats.rs/#performance-tips) + +> ### How much faster? +> +> Some of the most common 'language related' issues we have seen include: +> +> - frequent re-allocations, esp. cloned, growing or `format!` assembled strings, +> - short lived allocations over bump allocations or similar, +> - memory copy overhead that comes from cloning Strings and collections, +> - repeated re-hashing of equal data structures +> - the use of Rust's default hasher where collision resistance wasn't an issue +> +> Anecdotally, we have seen ~15% benchmark gains on hot paths where only some of these `String` problems were +> addressed, and it appears that up to 50% could be achieved in highly optimized versions. + + + +## Optimize for Throughput, Avoid Empty Cycles (M-THROUGHPUT) { #M-THROUGHPUT } + +To ensure COGS savings at scale. +0.1 + +You should optimize your library for throughput, and one of your key metrics should be _items per CPU cycle_. + +This does not mean to neglect latency—after all you can scale for throughput, but not for latency. However, +in most cases you should not pay for latency with _empty cycles_ that come with single-item processing, contended locks and frequent task switching. + +Ideally, you should + +- partition reasonable chunks of work ahead of time, +- let individual threads and tasks deal with their slice of work independently, +- sleep or yield when no work is present, +- design your own APIs for batched operations, +- perform work via batched APIs where available, +- yield within long individual items, or between chunks of batches (see [M-YIELD-POINTS]), +- exploit CPU caches, temporal and spatial locality. + +You should not: + +- hot spin to receive individual items faster, +- perform work on individual items if batching is possible, +- do work stealing or similar to balance individual items. + +Shared state should only be used if the cost of sharing is less than the cost of re-computation. + +[M-YIELD-POINTS]: ./#M-YIELD-POINTS + + + +## Long-Running Tasks Should Have Yield Points. (M-YIELD-POINTS) { #M-YIELD-POINTS } + +To ensure you don't starve other tasks of CPU time. +0.2 + +If you perform long running computations, they should contain `yield_now().await` points. + +Your future might be executed in a runtime that cannot work around blocking or long-running tasks. Even then, such tasks are +considered bad design and cause runtime overhead. If your complex task performs I/O regularly it will simply utilize these await points to preempt itself: + +```rust, ignore +async fn process_items(items: &[items]) { + // Keep processing items, the runtime will preempt you automatically. + for i in items { + read_item(i).await; + } +} +``` + +If your task performs long-running CPU operations without intermixed I/O, it should instead cooperatively yield at regular intervals, to not starve concurrent operations: + +```rust, ignore +async fn process_items(zip_file: File) { + let items = zip_file.read().async; + for i in items { + decompress(i); + yield_now().await; + } +} +``` + +If the number and duration of your individual operations are unpredictable you should use APIs such as `has_budget_remaining()` and +related APIs to query your hosting runtime. + +> ### Yield how often? +> +> In a thread-per-core model the overhead of task switching must be balanced against the systemic effects of starving unrelated tasks. +> +> Under the assumption that runtime task switching takes 100's of ns, in addition to the overhead of lost CPU caches, +> continuous execution in between should be long enough that the switching cost becomes negligible (<1%). +> +> Thus, performing 10 - 100μs of CPU-bound work between yield points would be a good starting point. + + +--- + + +# Safety Guidelines + + + +## Unsafe Implies Undefined Behavior (M-UNSAFE-IMPLIES-UB) { #M-UNSAFE-IMPLIES-UB } + +To ensure semantic consistency and prevent warning fatigue. +1.0 + +The marker `unsafe` may only be applied to functions and traits if misuse implies the risk of undefined behavior (UB). +It must not be used to mark functions that are dangerous to call for other reasons. + +```rust +// Valid use of unsafe +unsafe fn print_string(x: *const String) { } + +// Invalid use of unsafe +unsafe fn delete_database() { } +``` + + + +## Unsafe Needs Reason, Should be Avoided (M-UNSAFE) { #M-UNSAFE } + +To prevent undefined behavior, attack surface, and similar 'happy little accidents'. +0.2 + +You must have a valid reason to use `unsafe`. The only valid reasons are + +1) novel abstractions, e.g., a new smart pointer or allocator, +1) performance, e.g., attempting to call `.get_unchecked()`, +1) FFI and platform calls, e.g., calling into C or the kernel, ... + +Unsafe code lowers the guardrails used by the compiler, transferring some of the compiler's responsibilities +to the programmer. Correctness of the resulting code relies primarily on catching all mistakes in code review, +which is error-prone. Mistakes in unsafe code may introduce high-severity security vulnerabilities. + +You must not use ad-hoc `unsafe` to + +- shorten a performant and safe Rust program, e.g., 'simplify' enum casts via `transmute`, +- bypass `Send` and similar bounds, e.g., by doing `unsafe impl Send ...`, +- bypass lifetime requirements via `transmute` and similar. + +Ad-hoc here means `unsafe` embedded in otherwise unrelated code. It is of course permissible to create properly designed, sound abstractions doing these things. + +In any case, `unsafe` must follow the guidelines outlined below. + +### Novel Abstractions + +- [ ] Verify there is no established alternative. If there is, prefer that. +- [ ] Your abstraction must be minimal and testable. +- [ ] It must be hardened and tested against ["adversarial code"](https://cheats.rs/#adversarial-code), esp. + - If they accept closures they must become invalid (e.g., poisoned) if the closure panics + - They must assume any safe trait is misbehaving, esp. `Deref`, `Clone` and `Drop`. +- [ ] Any use of `unsafe` must be accompanied by plain-text reasoning outlining its safety +- [ ] It must pass [Miri](https://github.com/rust-lang/miri), including adversarial test cases +- [ ] It must follow all other [unsafe code guidelines](https://rust-lang.github.io/unsafe-code-guidelines/) + +### Performance + +- [ ] Using `unsafe` for performance reasons should only be done after benchmarking +- [ ] Any use of `unsafe` must be accompanied by plain-text reasoning outlining its safety. This applies to both + calling `unsafe` methods, as well as providing `_unchecked` ones. +- [ ] The code in question must pass [Miri](https://github.com/rust-lang/miri) +- [ ] You must follow the [unsafe code guidelines](https://rust-lang.github.io/unsafe-code-guidelines/) + +### FFI + +- [ ] We recommend you use an established interop library to avoid `unsafe` constructs +- [ ] You must follow the [unsafe code guidelines](https://rust-lang.github.io/unsafe-code-guidelines/) +- [ ] You must document your generated bindings to make it clear which call patterns are permissible + +### Further Reading + +- [Nomicon](https://doc.rust-lang.org/nightly/nomicon/) +- [Unsafe Code Guidelines](https://rust-lang.github.io/unsafe-code-guidelines/) +- [Miri](https://github.com/rust-lang/miri) +- ["Adversarial code"](https://cheats.rs/#adversarial-code) + + + +## All Code Must be Sound (M-UNSOUND) { #M-UNSOUND } + +To prevent unexpected runtime behavior, leading to potential bugs and incompatibilities. +1.0 + +Unsound code is seemingly _safe_ code that may produce undefined behavior when called from other safe code, or on its own accord. + +> ### Meaning of 'Safe' +> +> The terms _safe_ and `unsafe` are technical terms in Rust. +> +> A function is _safe_, if its signature does not mark it `unsafe`. That said, _safe_ functions can still be dangerous +> (e.g., `delete_database()`), and `unsafe` ones are, when properly used, usually quite benign (e.g.,`vec.get_unchecked()`). +> +> A function is therefore _unsound_ if it appears _safe_ (i.e., it is not marked `unsafe`), but if _any_ of its calling +> modes would cause undefined behavior. This is to be interpreted in the strictest sense. Even if causing undefined +> behavior is only a 'remote, theoretical possibility' requiring 'weird code', the function is unsound. +> +> Also see [Unsafe, Unsound, Undefined](https://cheats.rs/#unsafe-unsound-undefined). + +```rust +// "Safely" converts types +fn unsound_ref(x: &T) -> &u128 { + unsafe { std::mem::transmute(x) } +} + +// "Clever trick" to work around missing `Send` bounds. +struct AlwaysSend(T); +unsafe impl Send for AlwaysSend {} +unsafe impl Sync for AlwaysSend {} +``` + +Unsound abstractions are never permissible. If you cannot safely encapsulate something, you must expose `unsafe` functions instead, and document proper behavior. + +
+ +No Exceptions + +While you may break most guidelines if you have a good enough reason, there are no exceptions in this case: unsound code is never acceptable. + +
+ +> ### It's the Module Boundaries +> +> Note that soundness boundaries equal module boundaries! It is perfectly fine, in an otherwise safe abstraction, +> to have safe functions that rely on behavior guaranteed elsewhere **in the same module**. +> +> ```rust +> struct MyDevice(*const u8); +> +> impl MyDevice { +> fn new() -> Self { +> // Properly initializes instance ... +> # todo!() +> } +> +> fn get(&self) -> u8 { +> // It is perfectly fine to rely on `self.0` being valid, despite this +> // function in-and-by itself being unable to validate that. +> unsafe { *self.0 } +> } +> } +> +> ``` + + +--- + + +# Universal Guidelines + + + +## Names are Free of Weasel Words (M-CONCISE-NAMES) { #M-CONCISE-NAMES } + +To improve readability. +1.0 + +Symbol names, especially types and traits names, should be free of weasel words that do not meaningfully +add information. Common offenders include `Service`, `Manager`, and `Factory`. For example: + +While your library may very well contain or communicate with a booking service—or even hold an `HttpClient` +instance named `booking_service`—one should rarely encounter a `BookingService` _type_ in code. + +An item handling many bookings can just be called `Bookings`. If it does anything more specific, then that quality +should be appended instead. It submits these items elsewhere? Calling it `BookingDispatcher` would be more helpful. + +The same is true for `Manager`s. Every code manages _something_, so that moniker is rarely useful. With rare +exceptions, life cycle issues should likewise not be made the subject of some manager. Items are created in whatever +way they are needed, their disposal is governed by `Drop`, and only `Drop`. + +Regarding factories, at least the term should be avoided. While the concept `FooFactory` has its use, its canonical +Rust name is `Builder` (compare [M-INIT-BUILDER](../libs/ux/#M-INIT-BUILDER)). A builder that can produce items repeatedly is still a builder. + +In addition, accepting factories (builders) as parameters is an unidiomatic import of OO concepts into Rust. If +repeatable instantiation is required, functions should ask for an `impl Fn() -> Foo` over a `FooBuilder` or +similar. In contrast, standalone builders have their use, but primarily to reduce parametric permutation complexity +around optional values (again, [M-INIT-BUILDER](../libs/ux/#M-INIT-BUILDER)). + + + +## Magic Values are Documented (M-DOCUMENTED-MAGIC) { #M-DOCUMENTED-MAGIC } + +To ensure maintainability and prevent misunderstandings when refactoring. +1.0 + +Hardcoded _magic_ values in production code must be accompanied by a comment. The comment should outline: + +- why this value was chosen, +- non-obvious side effects if that value is changed, +- external systems that interact with this constant. + +You should prefer named constants over inline values. + +```rust, ignore +// Bad: it's relatively obvious that this waits for a day, but not why +wait_timeout(60 * 60 * 24).await // Wait at most a day + +// Better +wait_timeout(60 * 60 * 24).await // Large enough value to ensure the server + // can finish. Setting this too low might + // make us abort a valid request. Based on + // `api.foo.com` timeout policies. + +// Best + +/// How long we wait for the server. +/// +/// Large enough value to ensure the server +/// can finish. Setting this too low might +/// make us abort a valid request. Based on +/// `api.foo.com` timeout policies. +const UPSTREAM_SERVER_TIMEOUT: Duration = Duration::from_secs(60 * 60 * 24); +``` + + + +## Lint Overrides Should Use `#[expect]` (M-LINT-OVERRIDE-EXPECT) { #M-LINT-OVERRIDE-EXPECT } + +To prevent the accumulation of outdated lints. +1.0 + +When overriding project-global lints inside a submodule or item, you should do so via `#[expect]`, not `#[allow]`. + +Expected lints emit a warning if the marked warning was not encountered, thus preventing the accumulation of stale lints. +That said, `#[allow]` lints are still useful when applied to generated code, and can appear in macros. + +Overrides should be accompanied by a `reason`: + +```rust,edition2021 +#[expect(clippy::unused_async, reason = "API fixed, will use I/O later")] +pub async fn ping_server() { + // Stubbed out for now +} +``` + + + +## Use Structured Logging with Message Templates (M-LOG-STRUCTURED) { #M-LOG-STRUCTURED } + +To minimize the cost of logging and to improve filtering capabilities. +0.1 + +Logging should use structured events with named properties and message templates following +the [message templates](https://messagetemplates.org/) specification. + +> **Note:** Examples use the [`tracing`](https://docs.rs/tracing/) crate's `event!` macro, +but these principles apply to any logging API that supports structured logging (e.g., `log`, +`slog`, custom telemetry systems). + +### Avoid String Formatting + +String formatting allocates memory at runtime. Message templates defer formatting until viewing time. +We recommend that message template includes all named properties for easier inspection at viewing time. + +```rust,ignore +// Bad: String formatting causes allocations +tracing::info!("file opened: {}", path); +tracing::info!(format!("file opened: {}", path)); + +// Good: Message templates with named properties +event!( + name: "file.open.success", + Level::INFO, + file.path = path.display(), + "file opened: {{file.path}}", +); +``` + +> **Note**: Use the `{{property}}` syntax in message templates which preserves the literal text +> while escaping Rust's format syntax. String formatting is deferred until logs are viewed. + +### Name Your Events + +Use hierarchical dot-notation: `..` + +```rust,ignore +// Bad: Unnamed events +event!( + Level::INFO, + file.path = file_path, + "file {{file.path}} processed succesfully", +); + +// Good: Named events +event!( + name: "file.processing.success", // event identifier + Level::INFO, + file.path = file_path, + "file {{file.path}} processed succesfully", +); +``` + +Named events enable grouping and filtering across log entries. + +### Follow OpenTelemetry Semantic Conventions + +Use [OTel semantic conventions](https://opentelemetry.io/docs/specs/semconv/) for common attributes if needed. +This enables standardization and interoperability. + +```rust,ignore +event!( + name: "file.write.success", + Level::INFO, + file.path = path.display(), // Standard OTel name + file.size = bytes_written, // Standard OTel name + file.directory = dir_path, // Standard OTel name + file.extension = extension, // Standard OTel name + file.operation = "write", // Custom name + "{{file.operation}} {{file.size}} bytes to {{file.path}} in {{file.directory}} extension={{file.extension}}", +); +``` + +Common conventions: + +- HTTP: `http.request.method`, `http.response.status_code`, `url.scheme`, `url.path`, `server.address` +- File: `file.path`, `file.directory`, `file.name`, `file.extension`, `file.size` +- Database: `db.system.name`, `db.namespace`, `db.operation.name`, `db.query.text` +- Errors: `error.type`, `error.message`, `exception.type`, `exception.stacktrace` + +### Redact Sensitive Data + +Do not log plain sensitive data as this might lead to privacy and security incidents. + +```rust,ignore +// Bad: Logs potentially sensitive data +event!( + name: "file.operation.started", + Level::INFO, + user.email = user.email, // Sensitive data + file.name = "license.txt", + "reading file {{file.name}} for user {{user.email}}", +); + +// Good: Redact sensitive parts +event!( + name: "file.operation.started", + Level::INFO, + user.email.redacted = redact_email(user.email), + file.name = "license.txt", + "reading file {{file.name}} for user {{user.email.redacted}}", +); +``` + +Sensitive data includes email addresses, file paths revealing user identity, filenames containing secrets or tokens, +file contents with PII, temporary file paths with session IDs and more. Consider using the [`data_privacy`](https://crates.io/crates/data_privacy) crate for consistent redaction. + +### Further Reading + +- [Message Templates Specification](https://messagetemplates.org/) +- [OpenTelemetry Semantic Conventions](https://opentelemetry.io/docs/specs/semconv/) +- [OWASP Logging Cheat Sheet](https://cheatsheetseries.owasp.org/cheatsheets/Logging_Cheat_Sheet.html) + + + +## Panic Means 'Stop the Program' (M-PANIC-IS-STOP) { #M-PANIC-IS-STOP } + +To ensure soundness and predictability. +1.0 + +Panics are not exceptions. Instead, they suggest immediate program termination. + +Although your code must be [panic-safe](https://doc.rust-lang.org/nomicon/exception-safety.html) (i.e., a survived panic may not lead to +inconsistent state), invoking a panic means _this program should stop now_. It is not valid to: + +- use panics to communicate (errors) upstream, +- use panics to handle self-inflicted error conditions, +- assume panics will be caught, even by your own code. + +For example, if the application calling you is compiled with a `Cargo.toml` containing + +```toml +[profile.release] +panic = "abort" +``` + +then any invocation of panic will cause an otherwise functioning program to needlessly abort. Valid reasons to panic are: + +- when encountering a programming error, e.g., `x.expect("must never happen")`, +- anything invoked from const contexts, e.g., `const { foo.unwrap() }`, +- when user requested, e.g., providing an `unwrap()` method yourself, +- when encountering a poison, e.g., by calling `unwrap()` on a lock result (a poisoned lock signals another thread has panicked already). + +Any of those are directly or indirectly linked to programming errors. + + + +## Detected Programming Bugs are Panics, Not Errors (M-PANIC-ON-BUG) { #M-PANIC-ON-BUG } + +To avoid impossible error handling code and ensure runtime consistency. +1.0 + +As an extension of [M-PANIC-IS-STOP] above, when an unrecoverable programming error has been +detected, libraries and applications must panic, i.e., request program termination. + +In these cases, no `Error` type should be introduced or returned, as any such error could not be acted upon at runtime. + +Contract violations, i.e., the breaking of invariants either within a library or by a caller, are programming errors and must therefore panic. + +However, what constitutes a violation is situational. APIs are not expected to go out of their way to detect them, as such +checks can be impossible or expensive. Encountering `must_be_even == 3` during an already existing check clearly warrants +a panic, while a function `parse(&str)` clearly must return a `Result`. If in doubt, we recommend you take inspiration from the standard library. + +```rust, ignore +// Generally, a function with bad parameters must either +// - Ignore a parameter and/or return the wrong result +// - Signal an issue via Result or similar +// - Panic +// If in this `divide_by` we see that y == 0, panicking is +// the correct approach. +fn divide_by(x: u32, y: u32) -> u32 { ... } + +// However, it can also be permissible to omit such checks +// and return an unspecified (but not an undefined) result. +fn divide_by_fast(x: u32, y: u32) -> u32 { ... } + +// Here, passing an invalid URI is not a contract violation. +// Since parsing is inherently fallible, a Result must be returned. +fn parse_uri(s: &str) -> Result { }; + +``` + +> ### Make it 'Correct by Construction' +> +> While panicking on a detected programming error is the 'least bad option', your panic might still ruin someone's day. +> For any user input or calling sequence that would otherwise panic, you should also explore if you can use the type +> system to avoid panicking code paths altogether. + +[M-PANIC-IS-STOP]: ../universal/#M-PANIC-IS-STOP + + + +## Public Types are Debug (M-PUBLIC-DEBUG) { #M-PUBLIC-DEBUG } + +To simplify debugging and prevent leaking sensitive data. +1.0 + +All public types exposed by a crate should implement `Debug`. Most types can do so via `#[derive(Debug)]`: + +```rust +#[derive(Debug)] +struct Endpoint(String); +``` + +Types designed to hold sensitive data should also implement `Debug`, but do so via a custom implementation. +This implementation must employ unit tests to ensure sensitive data isn't actually leaked, and will not be in the future. + +```rust +use std::fmt::{Debug, Formatter}; + +struct UserSecret(String); + +impl Debug for UserSecret { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "UserSecret(...)") + } +} + +#[test] +fn test() { + let key = "552d3454-d0d5-445d-ab9f-ef2ae3a8896a"; + let secret = UserSecret(key.to_string()); + let rendered = format!("{:?}", secret); + + assert!(rendered.contains("UserSecret")); + assert!(!rendered.contains(key)); +} +``` + + + +## Public Types Meant to be Read are Display (M-PUBLIC-DISPLAY) { #M-PUBLIC-DISPLAY } + +To improve usability. +1.0 + +If your type is expected to be read by upstream consumers, be it developers or end users, it should implement `Display`. This in particular includes: + +- Error types, which are mandated by `std::error::Error` to implement `Display` +- Wrappers around string-like data + +Implementations of `Display` should follow Rust customs; this includes rendering newlines and escape sequences. +The handling of sensitive data outlined in [M-PUBLIC-DEBUG] applies analogously. + +[M-PUBLIC-DEBUG]: ./#M-PUBLIC-DEBUG + + + +## Prefer Regular over Associated Functions (M-REGULAR-FN) { #M-REGULAR-FN } + +To improve readability. +1.0 + +Associated functions should primarily be used for instance creation, not general purpose computation. + +In contrast to some OO languages, regular functions are first-class citizens in Rust and need no module or _class_ to host them. Functionality that +does not clearly belong to a receiver should therefore not reside in a type's `impl` block: + +```rust, ignore +struct Database {} + +impl Database { + // Ok, associated function creates an instance + fn new() -> Self {} + + // Ok, regular method with `&self` as receiver + fn query(&self) {} + + // Not ok, this function is not directly related to `Database`, + // it should therefore not live under `Database` as an associated + // function. + fn check_parameters(p: &str) {} +} + +// As a regular function this is fine +fn check_parameters(p: &str) {} +``` + +Regular functions are more idiomatic, and reduce unnecessary noise on the caller side. Associated trait functions are perfectly idiomatic though: + +```rust +pub trait Default { + fn default() -> Self; +} + +struct Foo; + +impl Default for Foo { + fn default() -> Self { Self } +} +``` + + + +## If in Doubt, Split the Crate (M-SMALLER-CRATES) { #M-SMALLER-CRATES } + +To improve compile times and modularity. +1.0 + +You should err on the side of having too many crates rather than too few, as this leads to dramatic compile time improvements—especially +during the development of these crates—and prevents cyclic component dependencies. + +Essentially, if a submodule can be used independently, its contents should be moved into a separate crate. + +Performing this crate split may cause you to lose access to some `pub(crate)` fields or methods. In many situations, this is a desirable +side-effect and should prompt you to design more flexible abstractions that would give your users similar affordances. + +In some cases, it is desirable to re-join individual crates back into a single _umbrella crate_, such as when dealing with proc macros, or runtimes. +Functionality split for technical reasons (e.g., a `foo_proc` proc macro crate) should always be re-exported. Otherwise, re-exports should be used sparingly. + +> ### Features vs. Crates +> +> As a rule of thumb, crates are for items that can reasonably be used on their own. Features should unlock extra functionality that +> can't live on its own. In the case of umbrella crates, see below, features may also be used to enable constituents (but then that functionality +> was extracted into crates already). +> +> For example, if you defined a `web` crate with the following modules, users only needing client calls would also have to pay for the compilation of server code: +> +> ```text +> web::server +> web::client +> web::protocols +> ``` +> +> Instead, you should introduce individual crates that give users the ability to pick and choose: +> +> ```text +> web_server +> web_client +> web_protocols +> ``` + + + +## Use Static Verification (M-STATIC-VERIFICATION) { #M-STATIC-VERIFICATION } + +To ensure consistency and avoid common issues. +1.0 + +Projects should use the following static verification tools to help maintain the quality of the code. These tools can be +configured to run on a developer's machine during normal work, and should be used as part of check-in gates. + +* [compiler lints](https://doc.rust-lang.org/rustc/lints/index.html) offer many lints to avoid bugs and improve code quality. +* [clippy lints](https://doc.rust-lang.org/clippy/) contain hundreds of lints to avoid bugs and improve code quality. +* [rustfmt](https://github.com/rust-lang/rustfmt) ensures consistent source formatting. +* [cargo-audit](https://crates.io/crates/cargo-audit) verifies crate dependencies for security vulnerabilities. +* [cargo-hack](https://crates.io/crates/cargo-hack) validates that all combinations of crate features work correctly. +* [cargo-udeps](https://crates.io/crates/cargo-udeps) detects unused dependencies in Cargo.toml files. +* [miri](https://github.com/rust-lang/miri) validates the correctness of unsafe code. + +### Compiler Lints + +The Rust compiler generally produces exceptionally good diagnostics. In addition to the default set of diagnostics, projects +should explicitly enable the following set of compiler lints: + +```toml +[lints.rust] +ambiguous_negative_literals = "warn" +missing_debug_implementations = "warn" +redundant_imports = "warn" +redundant_lifetimes = "warn" +trivial_numeric_casts = "warn" +unsafe_op_in_unsafe_fn = "warn" +unused_lifetimes = "warn" +``` + +### Clippy Lints + +For clippy, projects should enable all major lint categories, and additionally enable some lints from the `restriction` lint group. +Undesired lints (e.g., numeric casts) can be opted back out of on a case-by-case basis: + +```toml +[lints.clippy] +cargo = { level = "warn", priority = -1 } +complexity = { level = "warn", priority = -1 } +correctness = { level = "warn", priority = -1 } +pedantic = { level = "warn", priority = -1 } +perf = { level = "warn", priority = -1 } +style = { level = "warn", priority = -1 } +suspicious = { level = "warn", priority = -1 } +# nursery = { level = "warn", priority = -1 } # optional, might cause more false positives + +# These lints are from the `restriction` lint group and prevent specific +# constructs being used in source code in order to drive up consistency, +# quality, and brevity +allow_attributes_without_reason = "warn" +as_pointer_underscore = "warn" +assertions_on_result_states = "warn" +clone_on_ref_ptr = "warn" +deref_by_slicing = "warn" +disallowed_script_idents = "warn" +empty_drop = "warn" +empty_enum_variants_with_brackets = "warn" +empty_structs_with_brackets = "warn" +fn_to_numeric_cast_any = "warn" +if_then_some_else_none = "warn" +map_err_ignore = "warn" +redundant_type_annotations = "warn" +renamed_function_params = "warn" +semicolon_outside_block = "warn" +string_to_string = "warn" +undocumented_unsafe_blocks = "warn" +unnecessary_safety_comment = "warn" +unnecessary_safety_doc = "warn" +unneeded_field_pattern = "warn" +unused_result_ok = "warn" + +# May cause issues with structured logging otherwise. +literal_string_with_formatting_args = "allow" + +# Define custom opt outs here +# ... +``` + + + +## Follow the Upstream Guidelines (M-UPSTREAM-GUIDELINES) { #M-UPSTREAM-GUIDELINES } + +To avoid repeating mistakes the community has already learned from, and to have a codebase that does not surprise users and contributors. +1.0 + +The guidelines in this book complement existing Rust guidelines, in particular: + +- [Rust API Guidelines](https://rust-lang.github.io/api-guidelines/checklist.html) +- [Rust Style Guide](https://doc.rust-lang.org/nightly/style-guide/) +- [Rust Design Patterns](https://rust-unofficial.github.io/patterns//intro.html) +- [Rust Reference - Undefined Behavior](https://doc.rust-lang.org/reference/behavior-considered-undefined.html) + +We recommend you read through these as well, and apply them in addition to this book's items. Pay special attention to the ones below, as they are frequently forgotten: + +- [ ] [C-CONV](https://rust-lang.github.io/api-guidelines/naming.html#ad-hoc-conversions-follow-as_-to_-into_-conventions-c-conv) - Ad-hoc conversions + follow `as_`, `to_`, `into_` conventions +- [ ] [C-GETTER](https://rust-lang.github.io/api-guidelines/naming.html#getter-names-follow-rust-convention-c-getter) - Getter names follow Rust convention +- [ ] [C-COMMON-TRAITS](https://rust-lang.github.io/api-guidelines/interoperability.html#c-common-traits) - Types eagerly implement common traits + - `Copy`, `Clone`, `Eq`, `PartialEq`, `Ord`, `PartialOrd`, `Hash`, `Default`, `Debug` + - `Display` where type wants to be displayed +- [ ] [C-CTOR](https://rust-lang.github.io/api-guidelines/predictability.html?highlight=new#constructors-are-static-inherent-methods-c-ctor) - + Constructors are static, inherent methods + - In particular, have `Foo::new()`, even if you have `Foo::default()` +- [ ] [C-FEATURE](https://rust-lang.github.io/api-guidelines/naming.html#feature-names-are-free-of-placeholder-words-c-feature) - Feature names + are free of placeholder words + + +--- + + +# Libraries / Building Guidelines + + + +## Features are Additive (M-FEATURES-ADDITIVE) { #M-FEATURES-ADDITIVE } + +To prevent compilation breakage in large and complex projects. +1.0 + +All library features must be additive, and any combination must work, as long as the feature itself would work on the current platform. This implies: + +- [ ] You must not introduce a `no-std` feature, use a `std` feature instead +- [ ] Adding any feature `foo` must not disable or modify any public item + - Adding enum variants is fine if these enums are `#[non_exhaustive]` +- [ ] Features must not rely on other features to be manually enabled +- [ ] Features must not rely on their parent to skip-enable a feature in one of their children + +Further Reading + +- [Feature Unification](https://doc.rust-lang.org/cargo/reference/features.html#feature-unification) +- [Mutually Exclusive Features](https://doc.rust-lang.org/cargo/reference/features.html#mutually-exclusive-features) + + + +## Libraries Work Out of the Box (M-OOBE) { #M-OOBE } + +To be easily adoptable by the Rust ecosystem. +1.0 + +Libraries must _just work_ on all supported platforms, with the exception of libraries that are expressly platform or target specific. + +Rust crates often come with dozens of dependencies, applications with 100's. Users expect `cargo build` and `cargo install` +to _just work_. Consider this installation of `bat` that pulls in ~250 dependencies: + +```text +Compiling writeable v0.5.5 +Compiling strsim v0.11.1 +Compiling litemap v0.7.5 +Compiling crossbeam-utils v0.8.21 +Compiling icu_properties_data v1.5.1 +Compiling ident_case v1.0.1 +Compiling once_cell v1.21.3 +Compiling icu_normalizer_data v1.5.1 +Compiling fnv v1.0.7 +Compiling regex-syntax v0.8.5 +Compiling anstyle v1.0.10 +Compiling vcpkg v0.2.15 +Compiling utf8parse v0.2.2 +Compiling aho-corasick v1.1.3 +Compiling utf16_iter v1.0.5 +Compiling hashbrown v0.15.2 +Building [==> ] 29/251: icu_locid_transform_data, serde, winnow, indexma... +``` + +This compilation, like practically all other applications and libraries, will _just work_. + +While there are tools targeting specific functionality (e.g., a Wayland compositor) or platform crates like +`windows`; unless a crate is _obviously_ platform specific, the expectation is that it will otherwise _just work_. + +This means crates must build, ultimately + +- [ ] on all [Tier 1 platforms](https://doc.rust-lang.org/rustc/platform-support.html),1 and +- [ ] without any additional prerequisites beyond `cargo` and `rust`.2 + + + +1 It is ok to not support Tier 1 platforms "for now", but abstractions must be present so support can easily be extended. This is usually +done by introducing an internal `HAL` ([Hardware Abstraction Layer](https://en.wikipedia.org/wiki/HAL_(software))) module with a `dummy` fallback target.
+2 A default Rust installation will also have `cc` and a linker present. + +
+ +In particular, non-platform crates must not, by default, require the user to install additional tools, or expect environment variables +to compile. If tools were somehow needed (like the generation of Rust from `.proto` files) these tools should be run as part of the +publishing workflow or earlier, and the resulting artifacts (e.g., `.rs` files) be contained inside the published crate. + +If a dependency is known to be platform specific, the parent must use conditional (platform) compilation or opt-in feature gates. + +> ** Libraries are Responsible for Their Dependencies.** +> +> Imagine you author a `Copilot` crate, which in turn uses an `HttpClient`, which in turn depends on a `perl` script to compile. +> +> Then every one of your users, and your user's users, and everyone above, would need to install Perl to compile _their_ crate. In large projects you would +> have 100's of people who don't know or don't care about your library or Perl, encounter a cryptic compilation error, and now have to figure out how to +> install it on their system. +> +> In practical terms, such behavior is largely a self-inflicted death sentence in the open source space, since the moment alternatives +> are available, people will switch to those that _just work_. + + + +## Native `-sys` Crates Compile Without Dependencies (M-SYS-CRATES) { #M-SYS-CRATES } + +To have libraries that 'just work' on all platforms. +0.2 + +If you author a pair of `foo` and `foo-sys` crates wrapping a native `foo.lib`, you are likely to run into the issues described +in [M-OOBE]. + +Follow these steps to produce a crate that _just works_ across platforms: + +- [ ] fully govern the build of `foo.lib` from `build.rs` inside `foo-sys`. Only use hand-crafted compilation via the + [cc](https://crates.io/crates/cc) crate, do _not_ run Makefiles or external build scripts, as that will require the installation of external dependencies, +- [ ] make all external tools optional, such as `nasm`, +- [ ] embed the upstream source code in your crate, +- [ ] make the embedded sources verifiable (e.g., include Git URL + hash), +- [ ] pre-generate `bindgen` glue if possible, +- [ ] support both static linking, and dynamic linking via [libloading](https://crates.io/crates/libloading). + +Deviations from these points can work, and can be considered on a case-by-case basis: + +If the native build system is available as an _OOBE_ crate, that can be used instead of `cc` invocations. The same applies to external tools. + +Source code might have to be downloaded if it does not fit crates.io size limitations. In any case, only servers with an availability +comparable to crates.io should be used. In addition, the specific hashes of acceptable downloads should be stored in the crate and verified. + +Downloading sources can fail on hermetic build environments, therefore alternative source roots should also be specifiable (e.g., via environment variables). + +[M-OOBE]: ./#M-OOBE + + +--- + + +# Libraries / Interoperability Guidelines + + + +## Don't Leak External Types (M-DONT-LEAK-TYPES) { #M-DONT-LEAK-TYPES } + +To prevent accidental breakage and long-term maintenance cost. +0.1 + +Where possible, you should prefer `std`1 types in public APIs over types coming from external crates. Exceptions should be carefully considered. + +Any type in any public API will become part of that API's contract. Since `std` and constituents are the only crates +shipped by default, and since they come with a permanent stability guarantee, their types are the only ones that come without an interoperability risk. + +A crate that exposes another crate's type is said to _leak_ that type. + +For maximal long term stability your crate should, theoretically, not leak any types. Practically, some leakage +is unavoidable, sometimes even beneficial. We recommend you follow this heuristic: + +- [ ] if you can avoid it, do not leak third-party types +- [ ] if you are part of an umbrella crate,2 you may freely leak types from sibling crates. +- [ ] behind a relevant feature flag, types may be leaked (e.g., `serde`) +- [ ] without a feature _only_ if they give a _substantial benefit_. Most commonly that is interoperability with significant + other parts of the Rust ecosystem based around these types. + + + +1 In rare instances, e.g., high performance libraries used from embedded, you might even want to limit yourself to `core` only. + +2 For example, a `runtime` crate might be the umbrella of `runtime_rt`, `runtime_app` and `runtime_clock` As users are +expected to only interact with the umbrella, siblings may leak each others types. + + + + + +## Native Escape Hatches (M-ESCAPE-HATCHES) { #M-ESCAPE-HATCHES } + +To allow users to work around unsupported use cases until alternatives are available. +0.1 + +Types wrapping native handles should provide `unsafe` escape hatches. In interop scenarios your users might have gotten a native handle from somewhere +else, or they might have to pass your wrapped handle over FFI. To enable these use cases you should provide `unsafe` conversion methods. + +```rust +# type HNATIVE = *const u8; +pub struct Handle(HNATIVE); + +impl Handle { + pub fn new() -> Self { + // Safely creates handle via API calls + # todo!() + } + + // Constructs a new Handle from a native handle the user got elsewhere. + // This method should then also document all safety requirements that + // must be fulfilled. + pub unsafe fn from_native(native: HNATIVE) -> Self { + Self(native) + } + + // Various extra methods to permanently or temporarily obtain + // a native handle. + pub fn into_native(self) -> HNATIVE { self.0 } + pub fn to_native(&self) -> HNATIVE { self.0 } +} +``` + + + +## Types are Send (M-TYPES-SEND) { #M-TYPES-SEND } + +To enable the use of types in Tokio and behind runtime abstractions +1.0 + +Public types should be `Send` for compatibility reasons: + +- All futures produced (explicitly or implicitly) must be `Send` +- Most other types should be `Send`, but there might be exceptions + +### Futures + +When declaring a future explicitly you should ensure it is, and remains, `Send`. + +```rust +# use std::future::Future; +# use std::pin::Pin; +# use std::task::{Context, Poll}; +# +struct Foo {} + +impl Future for Foo { + // Explicit implementation of `Future` for your type + # type Output = (); + # + # fn poll(self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll<::Output> { todo!() } +} + +// You should assert your type is `Send` +const fn assert_send() {} +const _: () = assert_send::(); +``` + +When returning futures implicitly through `async` method calls, you should make sure these are `Send` too. +You do not have to test every single method, but you should at least validate your main entry points. + +```rust,edition2021 +async fn foo() { } + +// TODO: We want this as a macro as well +fn assert_send(_: T) {} +_ = assert_send(foo()); +``` + +### Regular Types + +Most regular types should be `Send`, as they otherwise infect futures turning them `!Send` if held across `.await` points. + +```rust,edition2021 +# use std::rc::Rc; +# async fn read_file(x: &str) {} +# +async fn foo() { + let rc = Rc::new(123); // <-- Holding this across an .await point prevents + read_file("foo.txt").await; // the future from being `Send`. + dbg!(rc); +} +``` + +That said, if the default use of your type is _instantaneous_, and there is no reason for it to be otherwise held across `.await` boundaries, it may be `!Send`. + +```rust,edition2021 +# use std::rc::Rc; +# struct Telemetry; impl Telemetry { fn ping(&self, _: u32) {} } +# fn telemetry() -> Telemetry { Telemetry } +# async fn read_file(x: &str) {} +# +async fn foo() { + // Here a hypothetical instance Telemetry is summoned + // and used ad-hoc. It may be ok for Telemetry to be !Send. + telemetry().ping(0); + read_file("foo.txt").await; + telemetry().ping(1); +} +``` + +> ### The Cost of Send +> +> Ideally, there would be abstractions that are `Send` in work-stealing runtimes, and `!Send` in thread-per-core models based on non-atomic +> types like `Rc` and `RefCell` instead. +> +> Practically these abstractions don't exist, preventing Tokio compatibility in the non-atomic case. That in turn means you would have to +> "reinvent the world" to get anything done in a thread-per-core universe. +> +> The good news is, in most cases atomics and uncontended locks only have a measurable impact if accessed more frequently than every 64 words or so. +> +>
+> +> ![TEXT](M-TYPES-SEND.png) +> +>
+> +> Working with a large `Vec` in a hot loop is a bad idea, but doing the occasional uncontended atomic operation from otherwise thread-per-core +> async code has no performance impact, but gives you widespread ecosystem compatibility. + + +--- + + +# Libraries / Resilience Guidelines + + + +## Avoid Statics (M-AVOID-STATICS) { #M-AVOID-STATICS } + +To prevent consistency and correctness issues between crate versions. +1.0 + +Libraries should avoid `static` and thread-local items, if a consistent view of the item is relevant for correctness. +Essentially, any code that would be incorrect if the static _magically_ had another value must not use them. Statics +only used for performance optimizations are ok. + +The fundamental issue with statics in Rust is the secret duplication of state. + +Consider a crate `core` with the following function: + +```rust +# use std::sync::atomic::AtomicUsize; +# use std::sync::atomic::Ordering; +static GLOBAL_COUNTER: AtomicUsize = AtomicUsize::new(0); + +pub fn increase_counter() -> usize { + GLOBAL_COUNTER.fetch_add(1, Ordering::Relaxed) +} +``` + +Now assume you have a crate `main`, calling two libraries `library_a` and `library_b`, each invoking that counter: + +```rust,ignore +// Increase global static counter 2 times +library_a::count_up(); +library_a::count_up(); + +// Increase global static counter 3 more times +library_b::count_up(); +library_b::count_up(); +library_b::count_up(); +``` + +They eventually report their result: + +```rust,ignore +library_a::print_counter(); +library_b::print_counter(); +main::print_counter(); +``` + +At this point, what is _the_ value of said counter; `0`, `2`, `3` or `5`? + +The answer is, possibly any (even multiple!) of the above, depending on the crate's version resolution! + +Under the hood Rust may link to multiple versions of the same crate, independently instantiated, to satisfy declared +dependencies. This is especially observable during a crate's `0.x` version timeline, where each `x` constitutes a separate _major_ version. + +If `main`, `library_a` and `library_b` all declared the same version of `core`, e.g. `0.5`, then the reported result will be `5`, since all +crates actually _see_ the same version of `GLOBAL_COUNTER`. + +However, if `library_a` declared `0.4` instead, then it would be linked against a separate version of `core`; thus `main` and `library_b` would +agree on a value of `3`, while `library_a` reported `2`. + +Although `static` items can be useful, they are particularly dangerous before a library's stabilization, and for any state where _secret duplication_ would +cause consistency issues when static and non-static variable use interacts. In addition, statics interfere with unit testing, and are a contention point in +thread-per-core designs. + + + +## I/O and System Calls Are Mockable (M-MOCKABLE-SYSCALLS) { #M-MOCKABLE-SYSCALLS } + +To make otherwise hard-to-evoke edge cases testable. +0.2 + +Any user-facing type doing I/O, or sys calls with side effects, should be mockable to these effects. This includes file and +network access, clocks, entropy sources and seeds, and similar. More generally, any operation that is + +- non-deterministic, +- reliant on external state, +- depending on the hardware or the environment, +- is otherwise fragile or not universally reproducible + +should be mockable. + +> ### Mocking Allocations? +> +> Unless you write kernel code or similar, you can consider allocations to be deterministic, hardware independent and practically +> infallible, thus not covered by this guideline. +> +> However, this does _not_ mean you should expect there to be unlimited memory available. While it is ok to +> accept caller provided input as-is if your library has a _reasonable_ memory complexity, memory-hungry libraries +> and code handling external input should provide bounded and / or chunking operations. + +This guideline has several implications for libraries, they + +- should not perform ad-hoc I/O, i.e., call `read("foo.txt")` +- should not rely on non-mockable I/O and sys calls +- should not create their own I/O or sys call _core_ themselves +- should not offer `MyIoLibrary::default()` constructors + +Instead, libraries performing I/O and sys calls should either accept some I/O _core_ that is mockable already, or provide mocking functionality themselves: + +```rust, ignore +let lib = Library::new_runtime(runtime_io); // mockable I/O functionality passed in +let (lib, mock) = Library::new_mocked(); // supports inherent mocking +``` + +Libraries supporting inherent mocking should implement it as follows: + +```rust, ignore +pub struct Library { + some_core: LibraryCore // Encapsulates syscalls, I/O, ... compare below. +} + +impl Library { + pub fn new() -> Self { ... } + pub fn new_mocked() -> (Self, MockCtrl) { ... } +} +``` + +Behind the scenes, `LibraryCore` is a non-public enum, similar to [M-RUNTIME-ABSTRACTED], that either dispatches +calls to the respective sys call, or to an mocking controller. + +```rust, ignore +// Dispatches calls either to the operating system, or to a +// mocking controller. +enum LibraryCore { + Native, + + #[cfg(feature = "test-util")] + Mocked(mock::MockCtrl) +} + +impl LibraryCore { + // Some function you'd forward to the operating system. + fn random_u32(&self) { + match self { + Self::Native => unsafe { os_random_u32() } + Self::Mocked(m) => m.random_u32() + } + } +} + + +#[cfg(feature = "test-util")] +mod mock { + // This follows the M-SERVICES-CLONE pattern, so both `LibraryCore` and + // the user can hold on to the same `MockCtrl` instance. + pub struct MockCtrl { + inner: Arc + } + + // Implement required logic accordingly, usually forwarding to + // `MockCtrlInner` below. + impl MockCtrl { + pub fn set_next_u32(&self, x: u32) { ... } + pub fn random_u32(&self) { ... } + } + + // Contains actual logic, e.g., the next random number we should return. + struct MockCtrlInner { + next_call: u32 + } +} +``` + +Runtime-aware libraries already build on top of the [M-RUNTIME-ABSTRACTED] pattern should extend their runtime enum instead: + +```rust, ignore +enum Runtime { + #[cfg(feature="tokio")] + Tokio(tokio::Tokio), + + #[cfg(feature="smol")] + Smol(smol::Smol) + + #[cfg(feature="test-util")] + Mock(mock::MockCtrl) +} +``` + +As indicated above, most libraries supporting mocking should not accept mock controllers, but return them via parameter tuples, +with the first parameter being the library instance, the second the mock controller. This is to prevent state ambiguity if multiple +instances shared a single controller: + +```rust, ignore +impl Library { + pub fn new_mocked() -> (Self, MockCtrl) { ... } // good + pub fn new_mocked_bad(&mut MockCtrl) -> Self { ... } // prone to misuse +} +``` + +[M-RUNTIME-ABSTRACTED]: ../ux/#M-RUNTIME-ABSTRACTED + + + +## Don't Glob Re-Export Items (M-NO-GLOB-REEXPORTS) { #M-NO-GLOB-REEXPORTS } + +To prevent accidentally leaking unintended types. +1.0 + +Don't `pub use foo::*` from other modules, especially not from other crates. You might accidentally export more than you want, +and globs are hard to review in PRs. Re-export items individually instead: + +```rust,ignore +pub use foo::{A, B, C}; +``` + +Glob exports are permissible for technical reasons, like doing platform specific re-exports from a set of HAL (hardware abstraction layer) modules: + +```rust,ignore +#[cfg(target_os = "windows")] +mod windows { /* ... */ } + +#[cfg(target_os = "linux")] +mod linux { /* ... */ } + +// Acceptable use of glob re-exports, this is a common pattern +// and it is clear everything is just forwarded from a single +// platform. + +#[cfg(target_os = "windows")] +pub use windows::*; + +#[cfg(target_os = "linux")] +pub use linux::*; +``` + + + +## Use the Proper Type Family (M-STRONG-TYPES) { #M-STRONG-TYPES } + +To have and maintain the right data and safety variants, at the right time. +1.0 + +Use the appropriate `std` type for your task. In general you should use the strongest type available, as early as possible in your API flow. Common offenders are + +| Do not use ... | use instead ... | Explanation | +| --- | --- | --- | +| `String`* | `PathBuf`* | Anything dealing with the OS should be `Path`-like | + +That said, you should also follow common Rust `std` conventions. Purely numeric types at public API boundaries (e.g., `window_size()`) are expected to +be regular numbers, not `Saturating`, `NonZero`, or similar. + + + +* Including their siblings, e.g., `&str`, `Path`, ... + + + + + +## Test Utilities are Feature Gated (M-TEST-UTIL) { #M-TEST-UTIL } + +To prevent production builds from accidentally bypassing safety checks. +0.2 + +Testing functionality must be guarded behind a feature flag. This includes + +- mocking functionality ([M-MOCKABLE-SYSCALLS]), +- the ability to inspect sensitive data, +- safety check overrides, +- fake data generation. + +We recommend you use a single flag only, named `test-util`. In any case, the feature(s) must clearly communicate they are for testing purposes. + +```rust, ignore +impl HttpClient { + pub fn get() { ... } + + #[cfg(feature = "test-util")] + pub fn bypass_certificate_checks() { ... } +} +``` + +[M-MOCKABLE-SYSCALLS]: ./#M-MOCKABLE-SYSCALLS + + +--- + + +# Libraries / UX Guidelines + + + +## Avoid Smart Pointers and Wrappers in APIs (M-AVOID-WRAPPERS) { #M-AVOID-WRAPPERS } + +To reduce cognitive load and improve API ergonomics. +1.0 + +As a specialization of [M-ABSTRACTIONS-DONT-NEST], generic wrappers and smart pointers like +`Rc`, `Arc`, `Box`, or `RefCell` should be avoided in public APIs. + +From a user perspective these are mostly implementation details, and introduce infectious complexity that users have to +resolve. In fact, these might even be impossible to resolve once multiple crates disagree about the required type of wrapper. + +If wrappers are needed internally, they should be hidden behind a clean API that uses simple types like `&T`, `&mut T`, or `T` directly. Compare: + +```rust,ignore +// Good: simple API +pub fn process_data(data: &Data) -> State { ... } +pub fn store_config(config: Config) -> Result<(), Error> { ... } + +// Bad: Exposing implementation details +pub fn process_shared(data: Arc>) -> Box { ... } +pub fn initialize(config: Rc>) -> Arc { ... } +``` + +Smart pointers in APIs are acceptable when: + +- The smart pointer is fundamental to the API's purpose (e.g., a new container lib) + +- The smart pointer, based on benchmarks, significantly improves performance and the complexity is justified. + +[M-ABSTRACTIONS-DONT-NEST]: ./#M-ABSTRACTIONS-DONT-NEST + + + +## Prefer Types over Generics, Generics over Dyn Traits (M-DI-HIERARCHY) { #M-DI-HIERARCHY } + +To prevent patterns that don't compose, and design lock-in. +0.1 + +When asking for async dependencies, prefer concrete types over generics, and generics over `dyn Trait`. + +It is easy to accidentally deviate from this pattern when porting code from languages like C# that heavily rely on interfaces. +Consider you are porting a service called `Database` from C# to Rust and, inspired by the original `IDatabase` interface, you naively translate it into: + +```rust,ignore +trait Database { + async fn update_config(&self, file: PathBuf); + async fn store_object(&self, id: Id, obj: Object); + async fn load_object(&self, id: Id) -> Object; +} + +impl Database for MyDatabase { ... } + +// Intended to be used like this: +async fn start_service(b: Rc) { ... } +``` + +Apart from not feeling idiomatic, this approach precludes other Rust constructs that conflict with object safety, +can cause issues with asynchronous code, and exposes wrappers (compare [M-AVOID-WRAPPERS]). + +Instead, when more than one implementation is needed, this _design escalation ladder_ should be followed: + +If the other implementation is only concerned with providing a _sans-io_ implementation for testing, implement your type as an +enum, following [M-MOCKABLE-SYSCALLS] instead. + +If users are expected to provide custom implementations, you should introduce one or more traits, and implement them for your own types +_on top_ of your inherent functions. Each trait should be relatively narrow, e.g., `StoreObject`, `LoadObject`. If eventually a single +trait is needed it should be made a subtrait, e.g., `trait DataAccess: StoreObject + LoadObject {}`. + +Code working with these traits should ideally accept them as generic type parameters as long as their use does not contribute to significant nesting +(compare [M-ABSTRACTIONS-DONT-NEST]). + +```rust,ignore +// Good, generic does not have infectious impact, uses only most specific trait +async fn read_database(x: impl LoadObject) { ... } + +// Acceptable, unless further nesting makes this excessive. +struct MyService { + db: T, +} +``` + +Once generics become a nesting problem, `dyn Trait` can be considered. Even in this case, visible wrapping should be avoided, and custom wrappers should be preferred. + +```rust +# use std::sync::Arc; +# trait DataAccess { +# fn foo(&self); +# } +// This allows you to expand or change `DynamicDataAccess` later. You can also +// implement `DataAccess` for `DynamicDataAccess` if needed, and use it with +// regular generic functions. +struct DynamicDataAccess(Arc); + +impl DynamicDataAccess { + fn new(db: T) -> Self { + Self(Arc::new(db)) + } +} + +struct MyService { + db: DynamicDataAccess, +} +``` + +The generic wrapper can also be combined with the enum approach from [M-MOCKABLE-SYSCALLS]: + +```rust,ignore +enum DataAccess { + MyDatabase(MyDatabase), + Mock(mock::MockCtrl), + Dynamic(DynamicDataAccess) +} + +async fn read_database(x: &DataAccess) { ... } +``` + +[M-AVOID-WRAPPERS]: ./#M-AVOID-WRAPPERS +[M-MOCKABLE-SYSCALLS]: ../resilience/#M-MOCKABLE-SYSCALLS +[M-ABSTRACTIONS-DONT-NEST]: ./#M-ABSTRACTIONS-DONT-NEST + + + +## Error are Canonical Structs (M-ERRORS-CANONICAL-STRUCTS) { #M-ERRORS-CANONICAL-STRUCTS } + +To harmonize the behavior of error types, and provide a consistent error handling. +1.0 + +Errors should be a situation-specific `struct` that contain a [`Backtrace`](https://doc.rust-lang.org/stable/std/backtrace/struct.Backtrace.html), +a possible upstream error cause, and helper methods. + +Simple crates usually expose a single error type `Error`, complex crates may expose multiple types, for example +`AccessError` and `ConfigurationError`. Error types should provide helper methods for additional information that allows callers to handle the error. + +A simple error might look like so: + +```rust +# use std::backtrace::Backtrace; +# use std::fmt::Display; +# use std::fmt::Formatter; +pub struct ConfigurationError { + backtrace: Backtrace, +} + +impl ConfigurationError { + pub(crate) fn new() -> Self { + Self { backtrace: Backtrace::capture() } + } +} + +// Impl Debug + Display +``` + +Where appropriate, error types should provide contextual error information, for example: + +```rust,ignore +# use std::backtrace::Backtrace; +# #[derive(Debug)] +# pub struct ConfigurationError { +# backtrace: Backtrace, +# } +impl ConfigurationError { + pub fn config_file(&self) -> &Path { } +} +``` + +If your API does mixed operations, or depends on various upstream libraries, store an `ErrorKind`. +Error kinds, and more generally enum-based errors, should not be used to avoid creating separate public error types when there is otherwise no error overlap: + +```rust, ignore +// Prefer this +fn download_iso() -> Result<(), DownloadError> {} +fn start_vm() -> Result<(), VmError> {} + +// Over that +fn download_iso() -> Result<(), GlobalEverythingErrorEnum> {} +fn start_vm() -> Result<(), GlobalEverythingErrorEnum> {} + +// However, not every function warrants a new error type. Errors +// should be general enough to be reused. +fn parse_json() -> Result<(), ParseError> {} +fn parse_toml() -> Result<(), ParseError> {} +``` + +If you do use an inner `ErrorKind`, that enum should not be exposed directly for future-proofing reasons, +as otherwise you would expose your callers to _all_ possible failure modes, even the ones you consider internal +and unhandleable. Instead, expose various `is_xxx()` methods as shown below: + +```rust +# use std::backtrace::Backtrace; +# use std::fmt::Display; +# use std::fmt::Formatter; +#[derive(Debug)] +pub(crate) enum ErrorKind { + Io(std::io::Error), + Protocol +} + +#[derive(Debug)] +pub struct HttpError { + kind: ErrorKind, + backtrace: Backtrace, +} + +impl HttpError { + pub fn is_io(&self) -> bool { matches!(self.kind, ErrorKind::Io(_)) } + pub fn is_protocol(&self) -> bool { matches!(self.kind, ErrorKind::Protocol) } +} +``` + +Most upstream errors don't provide a backtrace. You should capture one when creating an `Error` instance, either via one of +your `Error::new()` flavors, or when implementing `From for Error {}`. + +Error structs must properly implement `Display` that renders as follows: + +```rust,ignore +impl Display for MyError { + // Print a summary sentence what happened. + // Print `self.backtrace`. + // Print any additional upstream 'cause' information you might have. +# fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { +# todo!() +# } +} +``` + +Errors must also implement `std::error::Error`: + +```rust,ignore +impl std::error::Error for MyError { } +``` + +Lastly, if you happen to emit lots of errors from your crate, consider creating a private `bail!()` helper macro to simplify error instantiation. + +> ### When You Get Backtraces +> +> Backtraces are an invaluable debug tool in complex or async code, since errors might _travel_ far through a callstack before being surfaced. +> +> That said, they are a _development_ tool, not a _runtime_ diagnostic, and by default `Backtrace::capture()` will **not** capture +> backtraces, as they have a large overhead, e.g., 4μs per capture on the author's PC. +> +> Instead, Rust evaluates a [set of environment variables](https://doc.rust-lang.org/stable/std/backtrace/index.html#environment-variables), such as +> `RUST_BACKTRACE`, and only walks the call frame when explicitly asked. Otherwise it captures an empty trace, at the cost of only a few CPU instructions. + + + +## Essential Functionality Should be Inherent (M-ESSENTIAL-FN-INHERENT) { #M-ESSENTIAL-FN-INHERENT } + +To make essential functionality easily discoverable. +1.0 + +Types should implement core functionality inherently. Trait implementations should forward to inherent functions, and not replace them. Instead of this + +```rust +# trait Download { +# fn download_file(&self, url: impl AsRef); +# } +struct HttpClient {} + +// Offloading essential functionality into traits means users +// will have to figure out what other traits to `use` to +// actually use this type. +impl Download for HttpClient { + fn download_file(&self, url: impl AsRef) { + // ... logic to download a file + } +} +``` + +do this: + +```rust +# trait Download { +# fn download_file(&self, url: impl AsRef); +# } +struct HttpClient {} + +impl HttpClient { + fn download_file(&self, url: impl AsRef) { + // ... logic to download a file + } +} + +// Forward calls to inherent impls. `HttpClient` can be used +impl Download for HttpClient { + fn download_file(&self, url: impl AsRef) { + Self::download_file(self, url) + } +} +``` + + + +## Accept `impl AsRef<>` Where Feasible (M-IMPL-ASREF) { #M-IMPL-ASREF } + +To give users flexibility calling in with their own types. +1.0 + +In **function** signatures, accept `impl AsRef` for types that have a +[clear reference hierarchy](https://doc.rust-lang.org/stable/std/convert/trait.AsRef.html#implementors), where you +do not need to take ownership, or where object creation is relatively cheap. + +| Instead of ... | accept ... | +| --- | --- | +| `&str`, `String` | `impl AsRef` | +| `&Path`, `PathBuf` | `impl AsRef` | +| `&[u8]`, `Vec` | `impl AsRef<[u8]>` | + +```rust,ignore +# use std::path::Path; +// Definitely use `AsRef`, the function does not need ownership. +fn print(x: impl AsRef) {} +fn read_file(x: impl AsRef) {} +fn send_network(x: impl AsRef<[u8]>) {} + +// Further analysis needed. In these cases the function wants +// ownership of some `String` or `Vec`. If those are +// "low freqency, low volume" functions `AsRef` has better ergonomics, +// otherwise accepting a `String` or `Vec` will have better +// performance. +fn new_instance(x: impl AsRef) -> HoldsString {} +fn send_to_other_thread(x: impl AsRef<[u8]>) {} +``` + +In contrast, **types** should generally not be infected by these bounds: + +```rust,ignore +// Generally not ok. There might be exceptions for performance +// reasons, but those should not be user visible. +struct User> { + name: T +} + +// Better +struct User { + name: String +} +``` + + + +## Accept `impl 'IO'` Where Feasible ('Sans IO') (M-IMPL-IO) { #M-IMPL-IO } + +To untangle business logic from I/O logic, and have N*M composability. +0.1 + +Functions and types that only need to perform one-shot I/O during initialization should be written "[sans-io](https://www.firezone.dev/blog/sans-io)", +and accept some `impl T`, where `T` is the appropriate I/O trait, effectively outsourcing I/O work to another type: + +```rust,ignore +// Bad, caller must provide a File to parse the given data. If this +// data comes from the network, it'd have to be written to disk first. +fn parse_data(file: File) {} +``` + +```rust +// Much better, accepts +// - Files, +// - TcpStreams, +// - Stdin, +// - &[u8], +// - UnixStreams, +// ... and many more. +fn parse_data(data: impl std::io::Read) {} +``` + +Synchronous functions should use [`std::io::Read`](https://doc.rust-lang.org/std/io/trait.Read.html) and +[`std::io::Write`](https://doc.rust-lang.org/std/io/trait.Write.html). Asynchronous _functions_ targeting more than one runtime should use +[`futures::io::AsyncRead`](https://docs.rs/futures/latest/futures/io/trait.AsyncRead.html) and similar. +_Types_ that need to perform runtime-specific, continuous I/O should follow [M-RUNTIME-ABSTRACTED] instead. + +[M-RUNTIME-ABSTRACTED]: ./#M-RUNTIME-ABSTRACTED + + + +## Accept `impl RangeBounds<>` Where Feasible (M-IMPL-RANGEBOUNDS) { #M-IMPL-RANGEBOUNDS } + +To give users flexibility and clarity when specifying ranges. +1.0 + +Functions that accept a range of numbers must use a `Range` type or trait over hand-rolled parameters: + +```rust,ignore +// Bad +fn select_range(low: usize, high: usize) {} +fn select_range(range: (usize, usize)) {} +``` + +In addition, functions that can work on arbitrary ranges, should accept `impl RangeBounds` rather than `Range`. + +```rust +# use std::ops::{RangeBounds, Range}; +// Callers must call with `select_range(1..3)` +fn select_range(r: Range) {} + +// Callers may call as +// select_any(1..3) +// select_any(1..) +// select_any(..) +fn select_any(r: impl RangeBounds) {} +``` + + + +## Complex Type Construction has Builders (M-INIT-BUILDER) { #M-INIT-BUILDER } + +To future-proof type construction in complex scenarios. +0.3 + +Types that could support 4 or more arbitrary initialization permutations should provide builders. In other words, types with up to +2 optional initialization parameters can be constructed via inherent methods: + +```rust +# struct A; +# struct B; +struct Foo; + +// Supports 2 optional construction parameters, inherent methods ok. +impl Foo { + pub fn new() -> Self { Self } + pub fn with_a(a: A) -> Self { Self } + pub fn with_b(b: B) -> Self { Self } + pub fn with_a_b(a: A, b: B) -> Self { Self } +} +``` + +Beyond that, types should provide a builder: + +```rust, ignore +# struct A; +# struct B; +# struct C; +# struct Foo; +# struct FooBuilder; +impl Foo { + pub fn new() -> Self { ... } + pub fn builder() -> FooBuilder { ... } +} + +impl FooBuilder { + pub fn a(mut self, a: A) -> Self { ... } + pub fn b(mut self, b: B) -> Self { ... } + pub fn c(mut self, c: C) -> Self { ... } + pub fn build(self) -> Foo { ... } +} + +``` + +The proper name for a builder that builds `Foo` is `FooBuilder`. Its methods must be chainable, with the final method called +`.build()`. The buildable struct must have a shortcut `Foo::builder()`, while the builder itself should _not_ have a public +`FooBuilder::new()`. Builder methods that set a value `x` are called `x()`, not `set_x()` or similar. + +### Builders and Required Parameters + +Required parameters should be passed when creating the builder, not as setter methods. For builders with multiple required +parameters, encapsulate them into a parameters struct and use the `deps: impl Into` pattern to provide flexibility: + +> **Note:** A dedicated deps struct is not required if the builder has no required parameters or only a single simple parameter. However, +> for backward compatibility and API evolution, it's preferable to use a dedicated struct for deps even in simple cases, as it makes it +> easier to add new required parameters in the future without breaking existing code. + +```rust, ignore +#[derive(Debug, Clone)] +pub struct FooDeps { + pub logger: Logger, + pub config: Config, +} + +impl From<(Logger, Config)> for FooDeps { ... } +impl From for FooDeps { ... } // In case we could use default Config instance + +impl Foo { + pub fn builder(deps: impl Into) -> FooBuilder { ... } +} +``` + +This pattern allows for convenient usage: + +- `Foo::builder(logger)` - when only the logger is needed +- `Foo::builder((logger, config))` - when both parameters are needed +- `Foo::builder(FooDeps { logger, config })` - explicit struct construction + +Alternatively, you can use [`fundle`](https://docs.rs/fundle) to simplify the creation of `FooDeps`: + +```rust, ignore +#[derive(Debug, Clone)] +#[fundle::deps] +pub struct FooDeps { + pub logger: Logger, + pub config: Config, +} +``` + +This pattern enables "dependency injection", see [these docs](https://docs.rs/fundle/latest/fundle/attr.deps.html) for more details. + +### Runtime-Specific Builders + +For types that are runtime-specific or require runtime-specific configuration, provide dedicated builder creation methods that accept the appropriate runtime parameters: + +```rust, ignore +#[cfg(feature="smol")] +#[derive(Debug, Clone)] +pub struct SmolDeps { + pub clock: Clock, + pub io_context: Context, +} + +#[cfg(feature="tokio")] +#[derive(Debug, Clone)] +pub struct TokioDeps { + pub clock: Clock, +} + +impl Foo { + #[cfg(feature="smol")] + pub fn builder_smol(deps: impl Into) -> FooBuilder { ... } + + #[cfg(feature="tokio")] + pub fn builder_tokio(deps: impl Into) -> FooBuilder { ... } +} +``` + +This approach ensures type safety at compile time and makes the runtime dependency explicit in the API surface. The resulting +builder methods follow the pattern `builder_{runtime}(deps)` where `{runtime}` indicates the specific runtime or execution environment. + +### Further Reading + +- [Builder pattern in Rust: self vs. &mut self, and method vs. associated function](https://users.rust-lang.org/t/builder-pattern-in-rust-self-vs-mut-self-and-method-vs-associated-function/72892) +- [fundle](https://docs.rs/fundle) + + + +## Complex Type Initialization Hierarchies are Cascaded (M-INIT-CASCADED) { #M-INIT-CASCADED } + +To prevent misuse and accidental parameter mix ups. +1.0 + +Types that require 4+ parameters should cascade their initialization via helper types. + +```rust, ignore +# struct Deposit; +impl Deposit { + // Easy to confuse parameters and signature generally unwieldy. + pub fn new(bank_name: &str, customer_name: &str, currency_name: &str, currency_amount: u64) -> Self { } +} +``` + +Instead of providing a long parameter list, parameters should be grouped semantically. When applying this guideline, +also check if [C-NEWTYPE] is applicable: + +```rust, ignore +# struct Deposit; +# struct Account; +# struct Currency +impl Deposit { + // Better, signature cleaner + pub fn new(account: Account, amount: Currency) -> Self { } +} + +impl Account { + pub fn new_ok(bank: &str, customer: &str) -> Self { } + pub fn new_even_better(bank: Bank, customer: Customer) -> Self { } +} +``` + +[C-NEWTYPE]: https://rust-lang.github.io/api-guidelines/type-safety.html#c-newtype + + + +## Services are Clone (M-SERVICES-CLONE) { #M-SERVICES-CLONE } + +To avoid composability issues when sharing common services. +1.0 + +Heavyweight _service_ types and 'thread singletons' should implement shared-ownership `Clone` semantics, including any type you expect to be used from your `Application::init`. + +Per thread, users should essentially be able to create a single resource handler instance, and have it reused by other handlers on the same thread: + +```rust,ignore +impl ThreadLocal for MyThreadState { + fn init(...) -> Self { + + // Create common service instance possibly used by many. + let common = ServiceCommon::new(); + + // Users can freely pass `common` here multiple times + let service_1 = ServiceA::new(&common); + let service_2 = ServiceA::new(&common); + + Self { ... } + } +} +``` + +Services then simply clone their dependency and store a new _handle_, as if `ServiceCommon` were a shared-ownership smart pointer: + +```rust,ignore +impl ServiceA { + pub fn new(common: &ServiceCommon) -> Self { + // If we only need to access `common` from `new` we don't have + // to store it. Otherwise, make a clone we store in `Self`. + let common = common.clone(); + } +} +``` + +Under the hood this `Clone` should **not** create a fat copy of the entire service. Instead, it should follow the `Arc` pattern: + +```rust, ignore +// Actual service containing core logic and data. +struct ServiceCommonInner {} + +#[derive(Clone)] +pub ServiceCommon { + inner: Arc +} + +impl ServiceCommon { + pub fn new() { + Self { inner: Arc::new(ServiceCommonInner::new()) } + } + + // Method forwards ... + pub fn foo(&self) { self.inner.foo() } + pub fn bar(&self) { self.inner.bar() } +} +``` + + + +## Abstractions Don't Visibly Nest (M-SIMPLE-ABSTRACTIONS) { #M-SIMPLE-ABSTRACTIONS } + +To prevent cognitive load and a bad out of the box UX. +0.1 + +When designing your public types and primary API surface, avoid exposing nested or complex parametrized types to your users. + +While powerful, type parameters introduce a cognitive load, even more so if the involved traits are crate-specific. Type parameters +become infectious to user code holding on to these types in their fields, often come with complex trait hierarchies on their own, and +might cause confusing error messages. + +From the perspective of a user authoring `Foo`, where the other structs come from your crate: + +```rust,ignore +struct Foo { + service: Service // Great + service: Service // Acceptable + service: Service> // Bad + + list: List> // Great, `List` is simple container, + // other types user provided. + + matrix: Matrix4x4 // Great + matrix: Matrix4x4 // Still ok + matrix: Matrix, Const<4>, ArrayStorage> // ?!? +} +``` + +_Visible_ type parameters should be avoided in _service-like_ types (i.e., types mainly instantiated once per thread / application that are often passed +as dependencies), in particular if the nestee originates from the same crate as the service. + +Containers, smart-pointers and similar data structures obviously must expose a type parameter, e.g., `List` above. Even then, care should +be taken to limit the number and nesting of parameters. + +To decide whether type parameter nesting should be avoided, consider these factors: + +- Will the type be **named** by your users? + - Service-level types are always expected to be named (e.g., `Library`), + - Utility types, such as the many [`std::iter`](https://doc.rust-lang.org/stable/std/iter/index.html) types like `Chain`, `Cloned`, `Cycle`, are not + expected to be named. +- Does the type primarily compose with non-user types? +- Do the used type parameters have complex bounds? +- Do the used type parameters affect inference in other types or functions? + +The more of these factors apply, the bigger the cognitive burden. + +As a rule of thumb, primary service API types should not nest _on their own volition_, and if they do, only 1 level deep. In other words, these +APIs should not require users having to deal with an `Foo>`. However, if `Foo` users want to bring their own `A>` as `T` they +should be free to do so. + +> ### Type Magic for Better UX? +> +> The guideline above is written with 'bread-and-butter' types in mind you might create during _normal_ development activity. Its intention is to +> reduce friction users encounter when working with your code. +> +> However, when designing API patterns and ecosystems at large, there might be valid reasons to introduce intricate type magic to overall _lower_ +> the cognitive friction involved, [Bevy's ECS](https://docs.rs/bevy_ecs/latest/bevy_ecs/) or +> [Axum's request handlers](https://docs.rs/axum/latest/axum/handler/trait.Handler.html) come to mind. +> +> The threshold where this pays off is high though. If there is any doubt about the utility of your creative use of generics, your users might be +> better off without them. + + +--- diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..fb2b808 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,68 @@ +--- +name: Bug Report +about: Report a bug to help us improve Marathon +title: '[BUG] ' +labels: bug +assignees: '' +--- + +## Bug Description + +A clear and concise description of what the bug is. + +## Minimal, Complete, Verifiable Example (MCVE) + +Please provide the **smallest possible code example** that demonstrates the bug. This helps us reproduce and fix the issue faster. + +### Minimal Code Example + +```rust +// Paste your minimal reproducible code here +// Remove anything not necessary to demonstrate the bug +``` + +### Steps to Reproduce + +1. +2. +3. +4. + +### Expected Behavior + +What you expected to happen: + +### Actual Behavior + +What actually happened: + +## Environment + +- **OS**: [e.g., macOS 15.0, iOS 18.2] +- **Rust Version**: [e.g., 1.85.0 - run `rustc --version`] +- **Marathon Version/Commit**: [e.g., v0.1.0 or commit hash] +- **Platform**: [Desktop / iOS Simulator / iOS Device] + +## Logs/Stack Traces + +If applicable, paste any error messages or stack traces here: + +``` +paste logs here +``` + +## Screenshots/Videos + +If applicable, add screenshots or videos to help explain the problem. + +## Additional Context + +Add any other context about the problem here. For example: +- Does it happen every time or intermittently? +- Did this work in a previous version? +- Are you running multiple instances? +- Any relevant configuration or network setup? + +## Possible Solution + +If you have ideas about what might be causing the issue or how to fix it, please share them here. diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..34e9fc3 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,8 @@ +blank_issues_enabled: true +contact_links: + - name: Question or Discussion + url: https://github.com/r3t-studios/marathon/discussions + about: Ask questions or discuss ideas with the community + - name: Security Vulnerability + url: https://github.com/r3t-studios/marathon/security/policy + about: Please report security issues privately (see SECURITY.md) diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..d949d93 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,72 @@ +--- +name: Feature Request +about: Suggest a new feature or enhancement for Marathon +title: '[FEATURE] ' +labels: enhancement +assignees: '' +--- + +## Problem Statement + +**Is your feature request related to a problem? Please describe.** + +A clear and concise description of what the problem is. For example: +- "I'm always frustrated when..." +- "It's difficult to..." +- "Users need to be able to..." + +## Feature Request (Given-When-Then Format) + +Please describe your feature request using the Given-When-Then format to make the behavior clear: + +### Scenario 1: [Brief scenario name] + +**Given** [initial context or preconditions] +**When** [specific action or event] +**Then** [expected outcome] + +**Example:** +- **Given** I am editing a collaborative document with 3 other peers +- **When** I lose network connectivity for 5 minutes +- **Then** my local changes should be preserved and sync automatically when I reconnect + +### Scenario 2: [Additional scenario if needed] + +**Given** [initial context] +**When** [action] +**Then** [outcome] + +## Alternatives Considered + +**Describe alternatives you've considered.** + +Have you thought of other ways to solve this problem? What are the pros and cons of different approaches? + +## Technical Considerations + +**Do you have thoughts on implementation?** + +If you have ideas about how this could be implemented technically, share them here. For example: +- Which modules might be affected +- Potential challenges or dependencies +- Performance implications +- Breaking changes required + +## Additional Context + +Add any other context, mockups, screenshots, or examples from other projects that illustrate the feature. + +## Priority/Impact + +How important is this feature to you or your use case? +- [ ] Critical - blocking current work +- [ ] High - would significantly improve workflow +- [ ] Medium - nice to have +- [ ] Low - minor improvement + +## Willingness to Contribute + +- [ ] I'm willing to implement this feature +- [ ] I can help test this feature +- [ ] I can help with documentation +- [ ] I'm just suggesting the idea diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..dc89f0b --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,117 @@ +## Description + + + +## Related Issues + + +Fixes # +Relates to # + +## Type of Change + + + +- [ ] Bug fix (non-breaking change that fixes an issue) +- [ ] New feature (non-breaking change that adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) +- [ ] Documentation update +- [ ] Refactoring (no functional changes) +- [ ] Performance improvement +- [ ] Test coverage improvement + +## Changes Made + + + +- +- +- + +## Testing Performed + + + +- [ ] All existing tests pass (`cargo nextest run`) +- [ ] Added new tests for new functionality +- [ ] Tested manually on desktop +- [ ] Tested manually on iOS (if applicable) +- [ ] Tested with multiple instances +- [ ] Tested edge cases and error conditions + +### Test Details + + + +**Desktop:** +- + +**iOS:** (if applicable) +- + +**Multi-instance:** (if applicable) +- + +## Documentation + + + +- [ ] Updated relevant documentation in `/docs` +- [ ] Updated README.md (if public API changed) +- [ ] Added doc comments to new public APIs +- [ ] Updated CHANGELOG.md + +## Code Quality + + + +- [ ] Code follows project style guidelines +- [ ] Ran `cargo +nightly fmt` +- [ ] Ran `cargo clippy` and addressed warnings +- [ ] No new compiler warnings +- [ ] Added meaningful variable/function names + +## AI Usage + + + + +- [ ] No AI assistance used +- [ ] Used AI tools (brief description below) + + + + + + +## Breaking Changes + + + +**Does this PR introduce breaking changes?** +- [ ] No +- [ ] Yes (describe below) + + + + + +## Screenshots/Videos + + + +## Checklist + + + +- [ ] My code follows the project's coding standards +- [ ] I have tested my changes thoroughly +- [ ] I have updated relevant documentation +- [ ] I have added tests that prove my fix/feature works +- [ ] All tests pass locally +- [ ] I have read and followed the [CONTRIBUTING.md](../CONTRIBUTING.md) guidelines +- [ ] I understand and accept the [AI_POLICY.md](../AI_POLICY.md) + +## Additional Notes + + diff --git a/AI_POLICY.md b/AI_POLICY.md new file mode 100644 index 0000000..7df94af --- /dev/null +++ b/AI_POLICY.md @@ -0,0 +1,136 @@ +# AI and Machine Learning Usage Policy + +## Core Principle: Human Accountability + +Every contribution to Marathon must have a human who: +- **Made the decisions** about what to build and how to build it +- **Understands the code, design, or content** they're submitting +- **Takes responsibility** for the outcome and any issues that arise +- **Can be held accountable** for the contribution + +AI and ML tools are welcome as assistants, but they cannot: +- Make architectural or design decisions +- Choose between technical trade-offs +- Take responsibility for bugs or issues +- Be credited as contributors + +## Context: Pragmatism at a Small Scale + +We're a tiny studio with limited resources. We can't afford large teams, professional translators, or extensive QA departments. **Machine learning tools help us punch above our weight class** - they let us move faster, support more languages, and catch bugs we'd otherwise miss. + +We use these tools not to replace human judgment, but to stretch our small team's capacity. This is about working **smart with what we have**, not taking shortcuts that compromise quality or accountability. + +We're using ethical and responsible machine learning as much as possible while ensuring that we are not erasing human contributions while we are resource-constrained. + +## The Blurry Line + +**Here's the honest truth:** The line between "generative AI" and "assistive AI" is fuzzy and constantly shifting. Is IDE autocomplete assistive? What about when it suggests entire functions? What about pair-programming with an LLM? + +**We don't have perfect answers.** What we do have is a principle: **a human must make the decisions and be accountable.** + +If you're unsure whether your use of AI crosses a line, ask yourself: +- **"Do I understand what this code does and why?"** +- **"Did I decide this was the right approach, or did the AI?"** +- **"Can I maintain and debug this?"** +- **"Am I comfortable being accountable for this?"** + +If you answer "yes" to those questions, you're probably fine. If you're still uncertain, open a discussion - we'd rather have the conversation than enforce rigid rules that don't match reality. + +## What This Looks Like in Practice + +### Acceptable Use + +**"I used Claude/Copilot to help write this function, I reviewed it, I understand it, and I'm responsible for it."** +- You directed the tool +- You reviewed and understood the output +- You made the decision to use this approach +- You take responsibility for the result + +**"I directed an LLM to implement my design, then verified it meets requirements."** +- You designed the solution +- You used AI to speed up implementation +- You verified correctness +- You own the outcome + +**"I used machine translation as a starting point, then reviewed and corrected the output."** +- You acknowledge the limitations of automated translation +- You applied human judgment to the result +- You ensure accuracy and appropriateness + +### Not Acceptable + +**"Claude wrote this, I pasted it in, seems fine."** +- No understanding of the code +- No verification of correctness +- Cannot maintain or debug +- Cannot explain design decisions + +**"I asked an LLM what architecture to use and implemented its suggestion."** +- The AI made the architectural decision +- No human judgment about trade-offs +- No accountability for the choice + +**"I'm submitting this AI-generated documentation without reviewing it."** +- No verification of accuracy +- No human oversight +- Cannot vouch for quality + +## Why This Matters + +Marathon itself was largely written with AI assistance under human direction. **That's fine!** What matters is: + +1. **A human made every architectural decision** +2. **A human is accountable for every line of code** +3. **A human can explain why things work the way they do** +4. **Humans take credit AND responsibility** + +Think of AI like a compiler, a library, or a really capable intern - it's a tool that amplifies human capability, but **the human is always the one making decisions and being accountable**. + +## For Contributors + +We don't care what tools you use to be productive. We care that: +- **You made the decisions** (not the AI) +- **You understand what you're submitting** +- **You're accountable** for the contribution +- **You can maintain it** if issues arise + +Use whatever tools help you work effectively, but you must be able to answer "why did you make this choice?" with human reasoning, not "the AI suggested it." + +### When Contributing + +You don't need to disclose every time you use autocomplete or ask an LLM a question. We trust you to: +- Use tools responsibly +- Understand your contributions +- Take ownership of your work + +If you're doing something novel or pushing boundaries with AI assistance, mentioning it in your PR is welcome - it helps us all learn and navigate this space together. + +## What We Use + +For transparency, here's where Marathon currently uses machine learning: + +- **Development assistance** - IDE tools, code completion, pair programming with LLMs +- **Translation tooling** - Machine translation for internationalization (human-reviewed) +- **Performance analysis** - Automated profiling and optimization suggestions +- **Code review assistance** - Static analysis and potential bug detection +- **Documentation help** - Grammar checking, clarity improvements, translation + +In all cases, humans review, approve, and take responsibility for the output. + +## The Bottom Line + +**Machines can't be held accountable, so humans must make all decisions.** + +Use AI tools to help you work faster and smarter, but you must understand and be accountable for what you contribute. When in doubt, ask yourself: + +**"Can a machine be blamed if this breaks?"** + +If yes, you've crossed the line. + +## Questions or Concerns? + +This policy will evolve as we learn more about working effectively with AI tools. If you have questions, concerns, or suggestions, please open a discussion. We're figuring this out together. + +--- + +*This policy reflects our values as of February 2026. As technology and our understanding evolve, so will this document.* diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 0000000..5a85634 --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,359 @@ +# Marathon Architecture + +This document provides a high-level overview of Marathon's architecture to help contributors understand the system's design and organization. + +## Table of Contents + +- [Overview](#overview) +- [Core Principles](#core-principles) +- [System Architecture](#system-architecture) +- [Crate Organization](#crate-organization) +- [Key Components](#key-components) +- [Data Flow](#data-flow) +- [Technology Decisions](#technology-decisions) +- [Design Constraints](#design-constraints) + +## Overview + +Marathon is a **peer-to-peer game engine development kit** built on conflict-free replicated data types (CRDTs). It enables developers to build multiplayer games where players can interact with shared game state in real-time, even across network partitions, with automatic reconciliation. + +**Key Characteristics:** +- **Decentralized** - No central game server required, all players are equal peers +- **Offline-first** - Gameplay continues during network partitions +- **Eventually consistent** - All players converge to the same game state +- **Real-time** - Player actions propagate with minimal latency +- **Persistent** - Game state survives application restarts + +## Core Principles + +1. **CRDTs for Consistency** - Use mathematically proven data structures that guarantee eventual consistency for multiplayer game state +2. **Bevy ECS First** - Build on Bevy's Entity Component System for game development flexibility +3. **Zero Trust Networking** - Assume peers may be malicious (future work for competitive games) +4. **Separation of Concerns** - Clear boundaries between networking, persistence, and game logic +5. **Performance Matters** - Optimize for low latency and high throughput suitable for real-time games + +## System Architecture + +```mermaid +graph TB + subgraph App["Game Layer"] + Demo[Demo Game / Your Game] + Actions[Game Actions] + Selection[Entity Selection] + Input[Input Handling] + Render[Rendering] + end + + subgraph Core["libmarathon Core"] + Net[Networking
• CRDT Sync
• Gossip
• Sessions
• Op Apply] + Engine[Engine Core
• Event Loop
• Commands
• Discovery
• Bridge] + Persist[Persistence
• SQLite
• Type Registry
• Migrations
• Metrics] + end + + subgraph Foundation["Foundation Layer"] + Bevy[Bevy ECS
• Entities
• Components
• Systems] + Iroh[iroh P2P
• QUIC
• Gossip
• Discovery] + end + + Demo --> Actions + Demo --> Selection + Demo --> Input + Demo --> Render + + Actions --> Engine + Selection --> Engine + Input --> Engine + Render --> Engine + + Engine --> Net + Engine --> Persist + Net --> Persist + + Net --> Iroh + Engine --> Bevy + Persist --> Bevy +``` + +## Crate Organization + +Marathon is organized as a Rust workspace with four crates: + +### `libmarathon` (Core Library) + +**Purpose**: The heart of Marathon, providing networking, persistence, and CRDT synchronization. + +**Key Modules:** +``` +libmarathon/ +├── networking/ # P2P networking and CRDT sync +│ ├── crdt/ # CRDT implementations (OR-Set, RGA, LWW) +│ ├── operations/ # Network operations and vector clocks +│ ├── gossip/ # Gossip protocol bridge to iroh +│ ├── session/ # Session management +│ └── entity_map/ # UUID ↔ Entity mapping +│ +├── persistence/ # SQLite-backed state persistence +│ ├── database/ # SQLite connection and WAL +│ ├── registry/ # Type registry for reflection +│ └── health/ # Health checks and metrics +│ +├── engine/ # Core engine logic +│ ├── networking_manager/ # Network event loop +│ ├── commands/ # Bevy commands +│ └── game_actions/ # User action handling +│ +├── debug_ui/ # egui debug interface +├── render/ # Vendored Bevy render pipeline +├── transform/ # Vendored transform with rkyv +└── platform/ # Platform-specific code (iOS/desktop) +``` + +### `app` (Demo Game) + +**Purpose**: Demonstrates Marathon capabilities with a simple multiplayer cube game. + +**Key Files:** +- `main.rs` - Entry point with CLI argument handling +- `engine_bridge.rs` - Connects Bevy game to Marathon engine +- `cube.rs` - Demo game entity implementation +- `session.rs` - Multiplayer session lifecycle management +- `input/` - Input handling (keyboard, touch, Apple Pencil) +- `rendering/` - Rendering setup and camera + +### `macros` (Procedural Macros) + +**Purpose**: Code generation for serialization and deserialization. + +Built on Bevy's macro infrastructure for consistency with the ecosystem. + +### `xtask` (Build Automation) + +**Purpose**: Automate iOS build and deployment using the cargo-xtask pattern. + +**Commands:** +- `ios-build` - Build for iOS simulator/device +- `ios-deploy` - Deploy to connected device +- `ios-run` - Build and run on simulator + +## Key Components + +### 1. CRDT Synchronization Layer + +**Location**: `libmarathon/src/networking/` + +**Purpose**: Implements the CRDT-based synchronization protocol. + +**Key Concepts:** +- **Operations** - Immutable change events (Create, Update, Delete) +- **Vector Clocks** - Track causality across peers +- **OR-Sets** - Observed-Remove Sets for entity membership +- **RGA** - Replicated Growable Array for ordered sequences +- **LWW** - Last-Write-Wins for simple values + +**Protocol Flow:** + +```mermaid +sequenceDiagram + participant A as Peer A + participant G as Gossip Network + participant B as Peer B + + A->>A: Generate Op
(with vector clock) + A->>G: Broadcast Op + G->>B: Deliver Op + B->>B: Apply Op
(update vector clock) + B->>G: ACK + G->>A: ACK +``` + +See [RFC 0001](docs/rfcs/0001-crdt-gossip-sync.md) for detailed protocol specification. + +### 2. Persistence Layer + +**Location**: `libmarathon/src/persistence/` + +**Purpose**: Persist game state to SQLite with minimal overhead. + +**Architecture**: Three-tier system + +```mermaid +graph TD + A[In-Memory State
Bevy ECS - Dirty Tracking] -->|Batch writes
every N frames| B[Write Buffer
Async Batching] + B -->|Flush to disk| C[SQLite Database
WAL Mode] + + style A fill:#e1f5ff + style B fill:#fff4e1 + style C fill:#e8f5e9 +``` + +**Key Features:** +- **Automatic persistence** - Components marked with `Persisted` save automatically +- **Type registry** - Reflection-based serialization +- **WAL mode** - Write-Ahead Logging for crash safety +- **Migrations** - Schema versioning support + +See [RFC 0002](docs/rfcs/0002-persistence-strategy.md) for detailed design. + +### 3. Networking Manager + +**Location**: `libmarathon/src/engine/networking_manager.rs` + +**Purpose**: Bridge between Bevy and the iroh networking stack. + +**Responsibilities:** +- Manage peer connections and discovery +- Route operations to/from gossip network +- Maintain session state +- Handle join protocol for new peers + +### 4. Entity Mapping System + +**Location**: `libmarathon/src/networking/entity_map.rs` + +**Purpose**: Map between Bevy's local `Entity` IDs and global `UUID`s. + +**Why This Exists**: Bevy assigns local sequential entity IDs that differ across instances. We need stable UUIDs for networked entities that all peers agree on. + +```mermaid +graph LR + A[Bevy Entity
Local ID: 123] <-->|Bidirectional
Mapping| B[UUID
550e8400-....-446655440000] + + style A fill:#ffebee + style B fill:#e8f5e9 +``` + +### 5. Debug UI System + +**Location**: `libmarathon/src/debug_ui/` + +**Purpose**: Provide runtime inspection of internal state. + +Built with egui for immediate-mode GUI, integrated into Bevy's render pipeline. + +**Features:** +- View connected peers +- Inspect vector clocks +- Monitor operation log +- Check persistence metrics +- View entity mappings + +## Data Flow + +### Local Change Flow + +```mermaid +graph TD + A[User Input] --> B[Bevy System
e.g., move entity] + B --> C[Generate CRDT
Operation] + C --> D[Apply Operation
Locally] + D --> E[Broadcast via
Gossip] + D --> F[Mark Dirty for
Persistence] + + style A fill:#e3f2fd + style E fill:#fff3e0 + style F fill:#f3e5f5 +``` + +### Remote Change Flow + +```mermaid +graph TD + A[Receive Operation
from Gossip] --> B[Check Vector Clock
causality] + B --> C[Apply Operation
to ECS] + C --> D[Update Local
Vector Clock] + C --> E[Mark Dirty for
Persistence] + + style A fill:#fff3e0 + style C fill:#e8f5e9 + style E fill:#f3e5f5 +``` + +### Persistence Flow + +```mermaid +graph TD + A[Every N Frames] --> B[Identify Dirty
Entities] + B --> C[Serialize to
Write Buffer] + C --> D[Batch Write
to SQLite] + D --> E[Clear Dirty
Flags] + E --> A + + style A fill:#e8f5e9 + style D fill:#f3e5f5 +``` + +## Technology Decisions + +### Why Bevy? + +- **ECS architecture** maps perfectly to game development +- **Cross-platform** (desktop, mobile, web) +- **Active community** and ecosystem +- **Performance** through data-oriented design + +### Why iroh? + +- **QUIC-based** - Modern, efficient transport +- **NAT traversal** - Works behind firewalls +- **Gossip protocol** - Epidemic broadcast for multi-peer +- **Rust-native** - Zero-cost integration + +### Why SQLite? + +- **Embedded** - No server required +- **Battle-tested** - Reliable persistence +- **WAL mode** - Good write performance +- **Cross-platform** - Works everywhere + +### Why CRDTs? + +- **No central authority** - True P2P +- **Offline-first** - Work without connectivity +- **Provable consistency** - Mathematical guarantees +- **No conflict resolution UI** - Users don't see conflicts + +## Design Constraints + +### Current Limitations + +1. **No Authentication** - All peers are trusted (0.1.x) +2. **No Authorization** - All peers have full permissions +3. **No Encryption** - Beyond QUIC's transport security +4. **Limited Scalability** - Not tested beyond ~10 peers +5. **Desktop + iOS Only** - Web and other platforms planned + +### Performance Targets + +- **Operation latency**: < 50ms peer-to-peer +- **Persistence overhead**: < 5% frame time +- **Memory overhead**: < 10MB for typical session +- **Startup time**: < 2 seconds + +### Intentional Non-Goals + +- **Central server architecture** - Stay decentralized +- **Strong consistency** - Use eventual consistency +- **Traditional database** - Use CRDTs, not SQL queries +- **General-purpose engine** - Focus on collaboration + +## Related Documentation + +- [RFC 0001: CRDT Synchronization Protocol](docs/rfcs/0001-crdt-gossip-sync.md) +- [RFC 0002: Persistence Strategy](docs/rfcs/0002-persistence-strategy.md) +- [RFC 0003: Sync Abstraction](docs/rfcs/0003-sync-abstraction.md) +- [RFC 0004: Session Lifecycle](docs/rfcs/0004-session-lifecycle.md) +- [RFC 0005: Spatial Audio System](docs/rfcs/0005-spatial-audio-vendoring.md) +- [RFC 0006: Agent Simulation Architecture](docs/rfcs/0006-agent-simulation-architecture.md) + +## Questions? + +If you're working on Marathon and something isn't clear: + +1. Check the RFCs in `docs/rfcs/` +2. Search existing issues/discussions +3. Ask in GitHub Discussions +4. Reach out to maintainers + +--- + +*This architecture will evolve. When making significant architectural changes, consider updating this document or creating a new RFC.* diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..bc11971 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,65 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [0.1.0] - 2026-02-06 + +### Added + +#### Core Features +- CRDT-based synchronization using OR-Sets, RGA, and Last-Write-Wins semantics +- Peer-to-peer networking built on iroh with QUIC transport +- Gossip-based message broadcasting for multi-peer coordination +- Offline-first architecture with automatic reconciliation +- SQLite-backed persistence with WAL mode +- Cross-platform support for macOS desktop and iOS + +#### Demo Application +- Replicated cube demo showcasing real-time collaboration +- Multiple instance support for local testing +- Apple Pencil input support on iPad +- Real-time cursor and selection synchronization +- Debug UI for inspecting internal state + +#### Infrastructure +- Bevy 0.17 ECS integration +- Zero-copy serialization with rkyv +- Automated iOS build tooling via xtask +- Comprehensive RFC documentation covering architecture decisions + +### Architecture + +- **Networking Layer**: CRDT sync protocol, entity mapping, vector clocks, session management +- **Persistence Layer**: Three-tier system (in-memory → write buffer → SQLite) +- **Engine Core**: Event loop, networking manager, peer discovery, game actions +- **Platform Support**: iOS and desktop with platform-specific input handling + +### Documentation + +- RFC 0001: CRDT Synchronization Protocol +- RFC 0002: Persistence Strategy +- RFC 0003: Sync Abstraction +- RFC 0004: Session Lifecycle +- RFC 0005: Spatial Audio System +- RFC 0006: Agent Simulation Architecture +- iOS deployment guide +- Estimation methodology documentation + +### Known Issues + +- API is unstable and subject to change +- Limited documentation for public APIs +- Performance optimizations still needed for large-scale collaboration +- iOS builds require manual Xcode configuration + +### Notes + +This is an early development release (version 0.x.y). The API is unstable and breaking changes are expected. Not recommended for production use. + +[unreleased]: https://github.com/r3t-studios/marathon/compare/v0.1.0...HEAD +[0.1.0]: https://github.com/r3t-studios/marathon/releases/tag/v0.1.0 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..8955d88 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,148 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or advances of + any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, + without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Addressing and Repairing Harm + +If you are being harmed or notice that someone else is being harmed, or have any +other concerns, please contact the community leaders responsible for enforcement +at sienna@linux.com. All reports will be handled with discretion. + +We are committed to addressing harm in a manner that is respectful to victims +and survivors of violations of this Code of Conduct. When community leaders +receive a report of a possible violation, they will: + +1. **Acknowledge receipt** of the report +2. **Assess the situation** and gather necessary information +3. **Determine appropriate action** using the guidelines below +4. **Communicate with all parties** involved +5. **Take action** to address and repair harm +6. **Follow up** to ensure the situation is resolved + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Restorative Justice + +We believe in restorative justice and creating opportunities for those who have +violated the Code of Conduct to repair harm and reintegrate into the community +when appropriate. This may include: + +* Facilitated conversations between affected parties +* Public acknowledgment of harm and apology +* Education and learning opportunities +* Community service or contributions +* Gradual reintegration with monitoring + +The possibility of restoration depends on: +* The severity of the violation +* The willingness of the violator to acknowledge harm +* The consent and comfort of those harmed +* The assessment of community leaders + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official email address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 3.0, available at +[https://www.contributor-covenant.org/version/3/0/code_of_conduct.html][v3.0]. + +The "Addressing and Repairing Harm" section is inspired by the restorative +justice approach outlined in Contributor Covenant 3.0. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][Mozilla CoC]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v3.0]: https://www.contributor-covenant.org/version/3/0/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..685c875 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,343 @@ +# Contributing to Marathon + +Thank you for your interest in contributing to Marathon! We're excited to work with you. + +This document provides guidelines for contributing to the project. Following these guidelines helps maintain code quality and makes the review process smoother for everyone. + +## Table of Contents + +- [Code of Conduct](#code-of-conduct) +- [Getting Started](#getting-started) +- [Development Environment Setup](#development-environment-setup) +- [How to Contribute](#how-to-contribute) +- [Coding Standards](#coding-standards) +- [Testing](#testing) +- [Pull Request Process](#pull-request-process) +- [Reporting Bugs](#reporting-bugs) +- [Suggesting Features](#suggesting-features) +- [AI Usage Policy](#ai-usage-policy) +- [Questions?](#questions) + +## Code of Conduct + +This project adheres to the [Contributor Covenant Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. Please report unacceptable behavior to the project maintainers. + +## Getting Started + +1. **Fork the repository** on GitHub +2. **Clone your fork** locally +3. **Set up your development environment** (see below) +4. **Create a branch** for your changes +5. **Make your changes** with clear commit messages +6. **Test your changes** thoroughly +7. **Submit a pull request** + +## Development Environment Setup + +### Prerequisites + +- **Rust** 2024 edition or later (install via [rustup](https://rustup.rs/)) +- **macOS** (for macOS desktop and iOS development) +- **Xcode** and iOS simulator (for iOS development) +- **Linux** (for Linux desktop development) +- **Windows** (for Windows desktop development) +- **Git** for version control + +### Initial Setup + +```bash +# Clone your fork +git clone https://github.com/user/marathon.git +cd marathon + +# Add upstream remote +git remote add upstream https://github.com/r3t-studios/marathon.git + +# Build the project +cargo build + +# Run tests +cargo test + +# Run the desktop demo +cargo run --package app +``` + +### iOS Development Setup + +For iOS development, see our detailed [iOS Deployment Guide](docs/ios-deployment.md). + +```bash +# Build for iOS simulator +cargo xtask ios-build + +# Run on simulator +cargo xtask ios-run +``` + +### Useful Commands + +```bash +# Check code without building +cargo check + +# Run clippy for linting +cargo clippy + +# Format code +cargo fmt + +# Run tests with output +cargo nextest run -- --nocapture + +# Build documentation +cargo doc --open +``` + +## How to Contribute + +### Types of Contributions + +We welcome many types of contributions: + +- **Bug fixes** - Fix issues and improve stability +- **Features** - Implement new functionality (discuss first in an issue) +- **Documentation** - Improve or add documentation +- **Examples** - Create new examples or demos +- **Tests** - Add test coverage +- **Performance** - Optimize existing code +- **Refactoring** - Improve code quality + +### Before You Start + +For **bug fixes and small improvements**, feel free to open a PR directly. + +For **new features or significant changes**: +1. **Open an issue first** to discuss the proposal +2. Wait for maintainer feedback before investing significant time +3. Reference the issue in your PR + +This helps ensure your work aligns with project direction and avoids duplicate effort. + +## Coding Standards + +### Rust Style + +- Follow the [Rust API Guidelines](https://rust-lang.github.io/api-guidelines/) +- Follow the [Rust Style Guide](https://microsoft.github.io/rust-guidelines/guidelines/index.html) +- Use `cargo +nightly fmt` to format code (run before committing) +- Address all `cargo clippy` warnings +- Use meaningful variable and function names +- Add doc comments (`///`) for public APIs + +### Code Organization + +- Keep modules focused and cohesive +- Prefer composition over inheritance +- Use Rust's type system to enforce invariants +- Avoid unnecessary `unsafe` code + +### Documentation + +- Add doc comments for all public types, traits, and functions +- Include examples in doc comments when helpful +- Update relevant documentation in `/docs` when making architectural changes +- Keep README.md in sync with current capabilities + +### Commit Messages + +Write clear, descriptive conventional commit messages: + +``` +Short summary (50 chars or less) + +More detailed explanation if needed. Wrap at 72 characters. + +- Bullet points are fine +- Use present tense ("Add feature" not "Added feature") +- Reference issues and PRs with #123 +``` + +Good examples: +``` +Add cursor synchronization to networking layer + +Implement entity selection system for iOS + +Fix panic in SQLite persistence during shutdown (#42) +``` + +## Testing + +### Running Tests + +```bash +# Run all tests +cargo nextest run + +# Run tests for specific crate +cargo nextest run --package libmarathon + +# Run specific test +cargo nextest run test_vector_clock_merge + +# Run tests with output +cargo nextest run -- --nocapture +``` + +### Writing Tests + +- Add unit tests in the same file as the code (in a `mod tests` block) +- Add integration tests in `tests/` directory +- Test edge cases and error conditions +- Keep tests focused and readable +- Use descriptive test names: `test_vector_clock_handles_concurrent_updates` + +### Test Coverage + +We aim for good test coverage, especially for: +- CRDT operations and synchronization logic +- Persistence layer operations +- Network protocol handling +- Error conditions and edge cases + +You don't need 100% coverage, but core logic should be well-tested. + +## Pull Request Process + +### Before Submitting + +1. **Update your branch** with latest upstream changes + ```bash + git fetch upstream + git rebase upstream/mainline + ``` + +2. **Run the test suite** and ensure all tests pass + ```bash + cargo test + ``` + +3. **Run clippy** and fix any warnings + ```bash + cargo clippy + ``` + +4. **Format your code** + ```bash + cargo fmt + ``` + +5. **Update documentation** if you changed APIs or behavior + +### Submitting Your PR + +1. **Push to your fork** + ```bash + git push origin your-branch-name + ``` + +2. **Open a pull request** on GitHub + +3. **Fill out the PR template** with: + - Clear description of what changed and why + - Link to related issues + - Testing performed + - Screenshots/videos for UI changes + +4. **Request review** from maintainers + +### During Review + +- Be responsive to feedback +- Make requested changes promptly +- Push updates to the same branch (they'll appear in the PR) +- Use "fixup" commits or force-push after addressing review comments +- Be patient - maintainers are volunteers with limited time + +### After Approval + +- Maintainers will merge your PR +- You can delete your branch after merging +- Celebrate! 🎉 You're now a Marathon contributor! + +## Reporting Bugs + +### Before Reporting + +1. **Check existing issues** to avoid duplicates +2. **Verify it's a bug** and not expected behavior +3. **Test on the latest version** from mainline branch + +### Bug Report Template + +When opening a bug report, please include: + +- **Description** - What went wrong? +- **Expected behavior** - What should have happened? +- **Actual behavior** - What actually happened? +- **Steps to reproduce** - Minimal steps to reproduce the issue +- **Environment**: + - OS version (macOS version, iOS version) + - Rust version (`rustc --version`) + - Marathon version or commit hash +- **Logs/Stack traces** - Error messages or relevant log output +- **Screenshots/Videos** - If applicable + +### Security Issues + +**Do not report security vulnerabilities in public issues.** + +Please see our [Security Policy](SECURITY.md) for how to report security issues privately. + +## Suggesting Features + +We welcome feature suggestions! Here's how to propose them effectively: + +### Before Suggesting + +1. **Check existing issues and discussions** for similar ideas +2. **Consider if it aligns** with Marathon's goals (multiplayer game engine framework) +3. **Think about the scope** - is this a core feature or better as a plugin/extension? + +### Feature Request Template + +When suggesting a feature, please include: + +- **Problem statement** - What problem does this solve? +- **Proposed solution** - How would this feature work? +- **Alternatives considered** - What other approaches did you think about? +- **Use cases** - Real-world scenarios where this helps +- **Implementation ideas** - Technical approach (if you have thoughts) + +### Feature Discussion + +- Maintainers will label feature requests as `enhancement` +- We'll discuss feasibility, scope, and priority +- Features that align with the roadmap are more likely to be accepted +- You're welcome to implement features you propose (with approval) + +## AI Usage Policy + +Marathon has specific guidelines around AI and ML tool usage. Please read our [AI Usage Policy](AI_POLICY.md) before contributing. + +**Key points:** +- AI tools (Copilot, ChatGPT, etc.) are allowed for productivity +- You must understand and be accountable for all code you submit +- Humans make all architectural decisions, not AI +- When in doubt, ask yourself: "Can I maintain and debug this?" + +## Questions? + +- **General questions** - Open a [Discussion](https://github.com/yourusername/marathon/discussions) +- **Bug reports** - Open an [Issue](https://github.com/yourusername/marathon/issues) +- **Real-time chat** - [Discord/Slack link if you have one] +- **Email** - [maintainer email if appropriate] + +## Recognition + +All contributors will be recognized in our release notes and can be listed in AUTHORS file (coming soon). + +--- + +Thank you for contributing to Marathon! Your effort helps make collaborative software better for everyone. diff --git a/Cargo.lock b/Cargo.lock index fb3f4fb..975e336 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -44,8 +44,8 @@ dependencies = [ "accesskit_consumer", "hashbrown 0.15.5", "objc2 0.5.2", - "objc2-app-kit 0.2.2", - "objc2-foundation 0.2.2", + "objc2-app-kit", + "objc2-foundation", ] [[package]] @@ -301,26 +301,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "arboard" -version = "3.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0348a1c054491f4bfe6ab86a7b6ab1e44e45d899005de92f58b3df180b36ddaf" -dependencies = [ - "clipboard-win", - "image", - "log", - "objc2 0.6.3", - "objc2-app-kit 0.3.2", - "objc2-core-foundation", - "objc2-core-graphics", - "objc2-foundation 0.3.2", - "parking_lot", - "percent-encoding", - "windows-sys 0.60.2", - "x11rb", -] - [[package]] name = "arrayref" version = "0.3.9" @@ -2116,15 +2096,6 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" -[[package]] -name = "clipboard-win" -version = "5.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bde03770d3df201d4fb868f2c9c59e66a3e4e2bd06692a0fe701e7103c7e84d4" -dependencies = [ - "error-code", -] - [[package]] name = "cobs" version = "0.3.0" @@ -2990,12 +2961,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "error-code" -version = "3.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" - [[package]] name = "euclid" version = "0.22.11" @@ -3044,26 +3009,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" -[[package]] -name = "fax" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f05de7d48f37cd6730705cbca900770cab77a89f413d23e100ad7fad7795a0ab" -dependencies = [ - "fax_derive", -] - -[[package]] -name = "fax_derive" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0aca10fb742cb43f9e7bb8467c91aa9bcb8e3ffbc6a6f7389bb93ffc920577d" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "fdeflate" version = "0.3.7" @@ -4064,7 +4009,6 @@ dependencies = [ "moxcms", "num-traits", "png", - "tiff", ] [[package]] @@ -4594,7 +4538,6 @@ name = "libmarathon" version = "0.1.0" dependencies = [ "anyhow", - "arboard", "async-channel", "bevy", "bevy_app", @@ -4649,7 +4592,6 @@ dependencies = [ "proptest", "radsort", "rand 0.8.5", - "raw-window-handle", "rkyv", "rusqlite", "rustc-hash 2.1.1", @@ -5428,22 +5370,10 @@ dependencies = [ "objc2 0.5.2", "objc2-core-data", "objc2-core-image", - "objc2-foundation 0.2.2", + "objc2-foundation", "objc2-quartz-core", ] -[[package]] -name = "objc2-app-kit" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d49e936b501e5c5bf01fda3a9452ff86dc3ea98ad5f283e1455153142d97518c" -dependencies = [ - "bitflags 2.10.0", - "objc2 0.6.3", - "objc2-core-graphics", - "objc2-foundation 0.3.2", -] - [[package]] name = "objc2-cloud-kit" version = "0.2.2" @@ -5454,7 +5384,7 @@ dependencies = [ "block2 0.5.1", "objc2 0.5.2", "objc2-core-location", - "objc2-foundation 0.2.2", + "objc2-foundation", ] [[package]] @@ -5465,7 +5395,7 @@ checksum = "a5ff520e9c33812fd374d8deecef01d4a840e7b41862d849513de77e44aa4889" dependencies = [ "block2 0.5.1", "objc2 0.5.2", - "objc2-foundation 0.2.2", + "objc2-foundation", ] [[package]] @@ -5477,7 +5407,7 @@ dependencies = [ "bitflags 2.10.0", "block2 0.5.1", "objc2 0.5.2", - "objc2-foundation 0.2.2", + "objc2-foundation", ] [[package]] @@ -5487,21 +5417,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" dependencies = [ "bitflags 2.10.0", - "dispatch2", - "objc2 0.6.3", -] - -[[package]] -name = "objc2-core-graphics" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e022c9d066895efa1345f8e33e584b9f958da2fd4cd116792e15e07e4720a807" -dependencies = [ - "bitflags 2.10.0", - "dispatch2", - "objc2 0.6.3", - "objc2-core-foundation", - "objc2-io-surface", ] [[package]] @@ -5512,7 +5427,7 @@ checksum = "55260963a527c99f1819c4f8e3b47fe04f9650694ef348ffd2227e8196d34c80" dependencies = [ "block2 0.5.1", "objc2 0.5.2", - "objc2-foundation 0.2.2", + "objc2-foundation", "objc2-metal", ] @@ -5525,7 +5440,7 @@ dependencies = [ "block2 0.5.1", "objc2 0.5.2", "objc2-contacts", - "objc2-foundation 0.2.2", + "objc2-foundation", ] [[package]] @@ -5547,17 +5462,6 @@ dependencies = [ "objc2 0.5.2", ] -[[package]] -name = "objc2-foundation" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272" -dependencies = [ - "bitflags 2.10.0", - "objc2 0.6.3", - "objc2-core-foundation", -] - [[package]] name = "objc2-io-kit" version = "0.3.2" @@ -5568,17 +5472,6 @@ dependencies = [ "objc2-core-foundation", ] -[[package]] -name = "objc2-io-surface" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180788110936d59bab6bd83b6060ffdfffb3b922ba1396b312ae795e1de9d81d" -dependencies = [ - "bitflags 2.10.0", - "objc2 0.6.3", - "objc2-core-foundation", -] - [[package]] name = "objc2-link-presentation" version = "0.2.2" @@ -5587,8 +5480,8 @@ checksum = "a1a1ae721c5e35be65f01a03b6d2ac13a54cb4fa70d8a5da293d7b0020261398" dependencies = [ "block2 0.5.1", "objc2 0.5.2", - "objc2-app-kit 0.2.2", - "objc2-foundation 0.2.2", + "objc2-app-kit", + "objc2-foundation", ] [[package]] @@ -5600,7 +5493,7 @@ dependencies = [ "bitflags 2.10.0", "block2 0.5.1", "objc2 0.5.2", - "objc2-foundation 0.2.2", + "objc2-foundation", ] [[package]] @@ -5612,7 +5505,7 @@ dependencies = [ "bitflags 2.10.0", "block2 0.5.1", "objc2 0.5.2", - "objc2-foundation 0.2.2", + "objc2-foundation", "objc2-metal", ] @@ -5623,7 +5516,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a684efe3dec1b305badae1a28f6555f6ddd3bb2c2267896782858d5a78404dc" dependencies = [ "objc2 0.5.2", - "objc2-foundation 0.2.2", + "objc2-foundation", ] [[package]] @@ -5639,7 +5532,7 @@ dependencies = [ "objc2-core-data", "objc2-core-image", "objc2-core-location", - "objc2-foundation 0.2.2", + "objc2-foundation", "objc2-link-presentation", "objc2-quartz-core", "objc2-symbols", @@ -5655,7 +5548,7 @@ checksum = "44fa5f9748dbfe1ca6c0b79ad20725a11eca7c2218bceb4b005cb1be26273bfe" dependencies = [ "block2 0.5.1", "objc2 0.5.2", - "objc2-foundation 0.2.2", + "objc2-foundation", ] [[package]] @@ -5668,7 +5561,7 @@ dependencies = [ "block2 0.5.1", "objc2 0.5.2", "objc2-core-location", - "objc2-foundation 0.2.2", + "objc2-foundation", ] [[package]] @@ -6190,12 +6083,6 @@ version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" -[[package]] -name = "quick-error" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" - [[package]] name = "quick-xml" version = "0.37.5" @@ -6775,7 +6662,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2" dependencies = [ "fnv", - "quick-error 1.2.3", + "quick-error", "tempfile", "wait-timeout", ] @@ -7482,20 +7369,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "tiff" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af9605de7fee8d9551863fd692cce7637f548dbd9db9180fcc07ccc6d26c336f" -dependencies = [ - "fax", - "flate2", - "half", - "quick-error 2.0.1", - "weezl", - "zune-jpeg", -] - [[package]] name = "time" version = "0.3.44" @@ -8347,12 +8220,6 @@ dependencies = [ "rustls-pki-types", ] -[[package]] -name = "weezl" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a28ac98ddc8b9274cb41bb4d9d4d5c425b6020c50c46f25559911905610b4a88" - [[package]] name = "wgpu" version = "26.0.1" @@ -9151,8 +9018,8 @@ dependencies = [ "memmap2", "ndk 0.9.0", "objc2 0.5.2", - "objc2-app-kit 0.2.2", - "objc2-foundation 0.2.2", + "objc2-app-kit", + "objc2-foundation", "objc2-ui-kit", "orbclient", "percent-encoding", @@ -9460,18 +9327,3 @@ dependencies = [ "quote", "syn", ] - -[[package]] -name = "zune-core" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a" - -[[package]] -name = "zune-jpeg" -version = "0.4.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29ce2c8a9384ad323cf564b67da86e21d3cfdff87908bc1223ed5c99bc792713" -dependencies = [ - "zune-core", -] diff --git a/Cargo.toml b/Cargo.toml index 201a382..39bf6a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,13 +10,14 @@ edition = "2024" tokio = { version = "1", features = ["full"] } tokio-stream = "0.1" tokio-util = "0.7" +futures-lite = "2.0" # Iroh - P2P networking and gossip iroh = { version = "0.95.0", features = ["discovery-pkarr-dht"] } iroh-gossip = "0.95.0" # Database -rusqlite = "0.37.0" +rusqlite = { version = "0.37.0", features = ["bundled"] } # Serialization serde = { version = "1.0", features = ["derive"] } @@ -34,6 +35,8 @@ chrono = { version = "0.4", features = ["serde"] } # Logging tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } +tracing-appender = "0.2" +tracing-oslog = "0.3" # Random rand = "0.8" @@ -41,17 +44,24 @@ rand = "0.8" # Encoding hex = "0.4" -# ML/AI -candle-core = "0.8" -candle-nn = "0.8" -candle-transformers = "0.8" -tokenizers = "0.20" -hf-hub = "0.3" +# Data structures +bytes = "1.0" +crossbeam-channel = "0.5" +uuid = { version = "1.0", features = ["v4", "serde"] } -# Bevy -bevy = "0.17" +# Bevy and graphics +bevy = "0.17.2" +egui = { version = "0.33", default-features = false, features = ["bytemuck", "default_fonts"] } +glam = "0.29" +winit = "0.30" # Synchronization parking_lot = "0.12" crdts = "7.3" inventory = "0.3" + +# CLI +clap = { version = "4.5", features = ["derive"] } + +# Testing +tempfile = "3" diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..d8ffb88 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Marathon Contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..f304881 --- /dev/null +++ b/README.md @@ -0,0 +1,164 @@ +# Marathon + +**A peer-to-peer game engine development kit built with Rust and CRDTs** + +Marathon is a multiplayer game engine framework designed for building real-time collaborative games with offline-first capabilities. Built on [Bevy](https://bevyengine.org/) and [iroh](https://iroh.computer/), it provides CRDT-based state synchronization, peer-to-peer networking, and persistent state management out of the box - so you can focus on making great games instead of wrestling with networking code. + +## ⚠️ Early Development Notice + +**This project is in early development ( Result<()> { // Create app offline let mut app = create_test_app_maybe_offline(node_id, ctx.db_path(), None); - app.update(); + update_with_fixed(&mut app); // Create and insert GossipBridge let bridge = GossipBridge::new(node_id); @@ -128,7 +136,7 @@ async fn test_join_request_sent() -> Result<()> { // Update to trigger send_join_request_once_system // With the peer-wait logic, JoinRequest waits for peers or timeout - app.update(); // Start wait timer + update_with_fixed(&mut app); // Start wait timer // Simulate 1-second timeout (first node case - no peers) { @@ -139,7 +147,7 @@ async fn test_join_request_sent() -> Result<()> { } // Update again - should send JoinRequest due to timeout - app.update(); + update_with_fixed(&mut app); // Verify JoinRequest was sent by checking JoinRequestSent resource { @@ -393,7 +401,7 @@ async fn test_join_request_waits_for_peers() -> Result<()> { let bridge = GossipBridge::new(node_id); let mut app = create_test_app_maybe_offline(node_id, ctx.db_path(), Some(bridge.clone())); - app.update(); + update_with_fixed(&mut app); // Transition to Joining { @@ -412,7 +420,7 @@ async fn test_join_request_waits_for_peers() -> Result<()> { // Run for 10 frames (~166ms) - should NOT send JoinRequest yet (no peers) for i in 0..10 { - app.update(); + update_with_fixed(&mut app); tokio::time::sleep(Duration::from_millis(16)).await; let join_sent = app.world().resource::(); @@ -560,7 +568,7 @@ async fn test_join_request_sends_after_timeout() -> Result<()> { let bridge = GossipBridge::new(node_id); let mut app = create_test_app_maybe_offline(node_id, ctx.db_path(), Some(bridge.clone())); - app.update(); + update_with_fixed(&mut app); // Transition to Joining { @@ -571,7 +579,7 @@ async fn test_join_request_sends_after_timeout() -> Result<()> { println!("Initial state: Session=Joining, Peers=0"); // Run one frame to start the wait timer - app.update(); + update_with_fixed(&mut app); // Manually set wait_started to 1.1 seconds ago to simulate timeout { @@ -583,7 +591,7 @@ async fn test_join_request_sends_after_timeout() -> Result<()> { } // Run one frame - should send JoinRequest due to timeout - app.update(); + update_with_fixed(&mut app); { let join_sent = app.world().resource::(); @@ -627,7 +635,7 @@ async fn test_join_request_only_sent_once() -> Result<()> { let bridge = GossipBridge::new(node_id); let mut app = create_test_app_maybe_offline(node_id, ctx.db_path(), Some(bridge.clone())); - app.update(); + update_with_fixed(&mut app); // Transition to Joining and add a peer { @@ -644,7 +652,7 @@ async fn test_join_request_only_sent_once() -> Result<()> { println!("Initial state: Session=Joining, Peers=1"); // Run frame - should send JoinRequest - app.update(); + update_with_fixed(&mut app); { let join_sent = app.world().resource::(); @@ -678,7 +686,7 @@ async fn test_join_request_only_sent_once() -> Result<()> { // Run 20 more frames - should NOT send JoinRequest again for i in 0..20 { - app.update(); + update_with_fixed(&mut app); tokio::time::sleep(Duration::from_millis(16)).await; let app_bridge = app.world().resource::(); diff --git a/crates/libmarathon/tests/sync_integration_headless.rs b/crates/libmarathon/tests/sync_integration_headless.rs index 8215a17..bb52ecd 100644 --- a/crates/libmarathon/tests/sync_integration_headless.rs +++ b/crates/libmarathon/tests/sync_integration_headless.rs @@ -95,6 +95,17 @@ struct TestHealth { use rusqlite::Connection; +/// Helper to ensure FixedUpdate and FixedPostUpdate run (since they're on a fixed timestep) +fn update_with_fixed(app: &mut App) { + use bevy::prelude::{FixedUpdate, FixedPostUpdate}; + // Run Main schedule (which includes Update) + app.update(); + // Explicitly run FixedUpdate to ensure systems there execute + app.world_mut().run_schedule(FixedUpdate); + // Explicitly run FixedPostUpdate to ensure delta generation executes + app.world_mut().run_schedule(FixedPostUpdate); +} + /// Check if an entity exists in the database fn entity_exists_in_db(db_path: &PathBuf, entity_id: Uuid) -> Result { let conn = Connection::open(db_path)?; @@ -868,8 +879,8 @@ async fn test_lock_heartbeat_expiration() -> Result<()> { // Update to allow lock propagation for _ in 0..10 { - app1.update(); - app2.update(); + update_with_fixed(&mut app1); + update_with_fixed(&mut app2); tokio::time::sleep(Duration::from_millis(100)).await; } @@ -899,7 +910,7 @@ async fn test_lock_heartbeat_expiration() -> Result<()> { // Run cleanup system (which removes expired locks and broadcasts LockReleased) println!("Running cleanup to expire locks..."); for _ in 0..10 { - app2.update(); + update_with_fixed(&mut app2); tokio::time::sleep(Duration::from_millis(100)).await; } @@ -1119,7 +1130,7 @@ async fn test_offline_to_online_sync() -> Result<()> { } // Update to trigger delta generation (offline) - app1.update(); + update_with_fixed(&mut app1); tokio::time::sleep(Duration::from_millis(50)).await; // Verify clock incremented for spawn @@ -1156,7 +1167,7 @@ async fn test_offline_to_online_sync() -> Result<()> { } } - app1.update(); + update_with_fixed(&mut app1); tokio::time::sleep(Duration::from_millis(50)).await; let clock_after_second_spawn = { @@ -1179,7 +1190,7 @@ async fn test_offline_to_online_sync() -> Result<()> { } } - app1.update(); + update_with_fixed(&mut app1); tokio::time::sleep(Duration::from_millis(50)).await; let clock_after_modify = { @@ -1197,7 +1208,7 @@ async fn test_offline_to_online_sync() -> Result<()> { commands.entity(entity_b_bevy).insert(ToDelete); } - app1.update(); + update_with_fixed(&mut app1); tokio::time::sleep(Duration::from_millis(50)).await; let clock_after_delete = { @@ -1262,8 +1273,8 @@ async fn test_offline_to_online_sync() -> Result<()> { // Wait a bit more for tombstone to sync for _ in 0..20 { - app1.update(); - app2.update(); + update_with_fixed(&mut app1); + update_with_fixed(&mut app2); tokio::time::sleep(Duration::from_millis(100)).await; } diff --git a/crates/libmarathon/tests/test_utils/mod.rs b/crates/libmarathon/tests/test_utils/mod.rs index 230d324..375ac52 100644 --- a/crates/libmarathon/tests/test_utils/mod.rs +++ b/crates/libmarathon/tests/test_utils/mod.rs @@ -87,6 +87,17 @@ pub fn create_test_app_maybe_offline(node_id: Uuid, db_path: PathBuf, bridge: Op app } +/// Helper to ensure FixedUpdate and FixedPostUpdate run (since they're on a fixed timestep) +fn update_with_fixed(app: &mut App) { + use bevy::prelude::{FixedUpdate, FixedPostUpdate}; + // Run Main schedule (which includes Update) + app.update(); + // Explicitly run FixedUpdate to ensure systems there execute + app.world_mut().run_schedule(FixedUpdate); + // Explicitly run FixedPostUpdate to ensure delta generation executes + app.world_mut().run_schedule(FixedPostUpdate); +} + /// Wait for sync condition to be met, polling both apps pub async fn wait_for_sync( app1: &mut App, @@ -102,8 +113,8 @@ where while start.elapsed() < timeout { // Tick both apps - app1.update(); - app2.update(); + update_with_fixed(app1); + update_with_fixed(app2); tick_count += 1; if tick_count % 50 == 0 { diff --git a/crates/xtask/Cargo.toml b/crates/xtask/Cargo.toml index fed9608..a08a555 100644 --- a/crates/xtask/Cargo.toml +++ b/crates/xtask/Cargo.toml @@ -1,11 +1,11 @@ [package] name = "xtask" version = "0.1.0" -edition = "2021" +edition.workspace = true publish = false [dependencies] -anyhow = "1.0" -clap = { version = "4.5", features = ["derive"] } -tracing = "0.1" -tracing-subscriber = { version = "0.3", features = ["env-filter"] } +anyhow.workspace = true +clap.workspace = true +tracing.workspace = true +tracing-subscriber.workspace = true