diff --git a/Cargo.lock b/Cargo.lock index 9103a65..05e1059 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -766,7 +766,7 @@ dependencies = [ "bevy_reflect", "bytemuck", "derive_more 2.0.1", - "encase 0.11.2", + "encase", "serde", "thiserror 2.0.17", "wgpu-types", @@ -877,7 +877,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7449e5903594a00f007732ba232af0c527ad4e6e3d29bc3e195ec78dbd20c8b2" dependencies = [ "bevy_macro_utils", - "encase_derive_impl 0.11.2", + "encase_derive_impl", ] [[package]] @@ -1379,7 +1379,7 @@ dependencies = [ "bytemuck", "derive_more 2.0.1", "downcast-rs 2.0.2", - "encase 0.11.2", + "encase", "fixedbitset", "image", "indexmap", @@ -2866,18 +2866,6 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" -[[package]] -name = "encase" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0a05902cf601ed11d564128448097b98ebe3c6574bd7b6a653a3d56d54aa020" -dependencies = [ - "const_panic", - "encase_derive 0.10.0", - "glam 0.29.3", - "thiserror 1.0.69", -] - [[package]] name = "encase" version = "0.11.2" @@ -2885,38 +2873,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02ba239319a4f60905966390f5e52799d868103a533bb7e27822792332504ddd" dependencies = [ "const_panic", - "encase_derive 0.11.2", + "encase_derive", "glam 0.30.9", "thiserror 2.0.17", ] -[[package]] -name = "encase_derive" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "181d475b694e2dd56ae919ce7699d344d1fd259292d590c723a50d1189a2ea85" -dependencies = [ - "encase_derive_impl 0.10.0", -] - [[package]] name = "encase_derive" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5223d6c647f09870553224f6e37261fe5567bc5a4f4cf13ed337476e79990f2f" dependencies = [ - "encase_derive_impl 0.11.2", -] - -[[package]] -name = "encase_derive_impl" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f97b51c5cc57ef7c5f7a0c57c250251c49ee4c28f819f87ac32f4aceabc36792" -dependencies = [ - "proc-macro2", - "quote", - "syn", + "encase_derive_impl", ] [[package]] @@ -4597,7 +4565,29 @@ version = "0.1.0" dependencies = [ "anyhow", "arboard", + "async-channel", "bevy", + "bevy_app", + "bevy_asset", + "bevy_camera", + "bevy_color", + "bevy_derive", + "bevy_diagnostic", + "bevy_ecs", + "bevy_encase_derive", + "bevy_image", + "bevy_light", + "bevy_math", + "bevy_mesh", + "bevy_platform", + "bevy_reflect", + "bevy_shader", + "bevy_tasks", + "bevy_time", + "bevy_transform", + "bevy_utils", + "bevy_window", + "bitflags 2.10.0", "blake3", "blocking", "bytemuck", @@ -4606,16 +4596,26 @@ dependencies = [ "crdts", "criterion", "crossbeam-channel", + "derive_more 2.0.1", "dirs", + "downcast-rs 2.0.2", "egui", - "encase 0.10.0", + "encase", + "fixedbitset", "futures-lite", "glam 0.29.3", + "image", + "indexmap", "inventory", "iroh", "iroh-gossip", "itertools 0.14.0", + "macros", + "naga", + "nonmax", + "offset-allocator", "proptest", + "radsort", "rand 0.8.5", "raw-window-handle", "rkyv", @@ -4623,7 +4623,8 @@ dependencies = [ "serde", "serde_json", "sha2 0.10.9", - "sync-macros", + "smallvec", + "static_assertions", "tempfile", "thiserror 2.0.17", "tokio", @@ -4631,6 +4632,8 @@ dependencies = [ "tracing", "tracing-oslog", "uuid", + "variadics_please", + "wgpu", "wgpu-types", "winit", ] @@ -4749,6 +4752,24 @@ dependencies = [ "libc", ] +[[package]] +name = "macros" +version = "0.1.0" +dependencies = [ + "anyhow", + "bevy", + "bevy_macro_utils", + "bytes", + "inventory", + "libmarathon", + "proc-macro2", + "quote", + "rkyv", + "serde", + "syn", + "tracing", +] + [[package]] name = "malloc_buf" version = "0.0.6" @@ -7241,23 +7262,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "sync-macros" -version = "0.1.0" -dependencies = [ - "anyhow", - "bevy", - "bytes", - "inventory", - "libmarathon", - "proc-macro2", - "quote", - "rkyv", - "serde", - "syn", - "tracing", -] - [[package]] name = "sync_wrapper" version = "1.0.2" diff --git a/Cargo.toml b/Cargo.toml index acb9c74..c406488 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["crates/libmarathon", "crates/sync-macros", "crates/app", "crates/xtask"] +members = ["crates/libmarathon", "crates/macros", "crates/app", "crates/xtask"] resolver = "2" [workspace.package] diff --git a/crates/app/Cargo.toml b/crates/app/Cargo.toml index 38e860b..e77d010 100644 --- a/crates/app/Cargo.toml +++ b/crates/app/Cargo.toml @@ -12,9 +12,7 @@ headless = [] [dependencies] libmarathon = { path = "../libmarathon" } bevy = { version = "0.17", default-features = false, features = [ - "bevy_render", - "bevy_core_pipeline", - "bevy_pbr", + # bevy_render, bevy_core_pipeline, bevy_pbr are now vendored in libmarathon "bevy_ui", "bevy_text", "png", diff --git a/crates/libmarathon/Cargo.toml b/crates/libmarathon/Cargo.toml index bd3359b..6de0ab4 100644 --- a/crates/libmarathon/Cargo.toml +++ b/crates/libmarathon/Cargo.toml @@ -8,6 +8,47 @@ anyhow.workspace = true arboard = "3.4" bevy.workspace = true rkyv.workspace = true + +# Bevy subcrates required by vendored rendering (bevy_render, bevy_core_pipeline, bevy_pbr) +bevy_app = "0.17.2" +bevy_asset = "0.17.2" +bevy_camera = "0.17.2" +bevy_color = "0.17.2" +bevy_derive = "0.17.2" +bevy_diagnostic = "0.17.2" +bevy_ecs = "0.17.2" +bevy_encase_derive = "0.17.2" +bevy_image = "0.17.2" +bevy_light = "0.17.2" +bevy_math = "0.17.2" +bevy_mesh = "0.17.2" +bevy_platform = { version = "0.17.2", default-features = false } +bevy_reflect = "0.17.2" +macros = { path = "../macros" } +bevy_shader = "0.17.2" +bevy_tasks = "0.17.2" +bevy_time = "0.17.2" +bevy_transform = "0.17.2" +bevy_utils = "0.17.2" +bevy_window = "0.17.2" + +# Additional dependencies required by vendored rendering crates +wgpu = { version = "26", default-features = false, features = ["dx12", "metal"] } +naga = { version = "26", features = ["wgsl-in"] } +downcast-rs = { version = "2", default-features = false, features = ["std"] } +derive_more = { version = "2", default-features = false, features = ["from"] } +image = { version = "0.25.2", default-features = false } +bitflags = { version = "2.3", features = ["bytemuck"] } +fixedbitset = "0.5" +radsort = "0.1" +nonmax = "0.5" +smallvec = { version = "1", default-features = false } +indexmap = "2.0" +async-channel = "2.3" +offset-allocator = "0.2" +variadics_please = "1.1" +static_assertions = "1.1" + blake3 = "1.5" blocking = "1.6" bytemuck = { version = "1.14", features = ["derive"] } @@ -17,7 +58,7 @@ crdts.workspace = true crossbeam-channel = "0.5" dirs = "5.0" egui = { version = "0.33", default-features = false, features = ["bytemuck", "default_fonts"] } -encase = { version = "0.10", features = ["glam"] } +encase = { version = "0.11", features = ["glam"] } futures-lite = "2.0" glam = "0.29" inventory.workspace = true @@ -30,7 +71,6 @@ rusqlite = { version = "0.37.0", features = ["bundled"] } serde = { version = "1.0", features = ["derive"] } serde_json.workspace = true sha2 = "0.10" -sync-macros = { path = "../sync-macros" } thiserror = "2.0" tokio.workspace = true toml.workspace = true diff --git a/crates/libmarathon/src/lib.rs b/crates/libmarathon/src/lib.rs index 31891a1..2cdbde7 100644 --- a/crates/libmarathon/src/lib.rs +++ b/crates/libmarathon/src/lib.rs @@ -28,6 +28,7 @@ pub mod engine; pub mod networking; pub mod persistence; pub mod platform; +pub mod render; // Vendored Bevy rendering (bevy_render + bevy_core_pipeline + bevy_pbr) pub mod utils; pub mod sync; diff --git a/crates/libmarathon/src/render/alpha.rs b/crates/libmarathon/src/render/alpha.rs new file mode 100644 index 0000000..dd74881 --- /dev/null +++ b/crates/libmarathon/src/render/alpha.rs @@ -0,0 +1,62 @@ +use bevy_reflect::{std_traits::ReflectDefault, Reflect}; + +// TODO: add discussion about performance. +/// Sets how a material's base color alpha channel is used for transparency. +#[derive(Debug, Default, Reflect, Copy, Clone, PartialEq)] +#[reflect(Default, Debug, Clone)] +pub enum AlphaMode { + /// Base color alpha values are overridden to be fully opaque (1.0). + #[default] + Opaque, + /// Reduce transparency to fully opaque or fully transparent + /// based on a threshold. + /// + /// Compares the base color alpha value to the specified threshold. + /// If the value is below the threshold, + /// considers the color to be fully transparent (alpha is set to 0.0). + /// If it is equal to or above the threshold, + /// considers the color to be fully opaque (alpha is set to 1.0). + Mask(f32), + /// The base color alpha value defines the opacity of the color. + /// Standard alpha-blending is used to blend the fragment's color + /// with the color behind it. + Blend, + /// Similar to [`AlphaMode::Blend`], however assumes RGB channel values are + /// [premultiplied](https://en.wikipedia.org/wiki/Alpha_compositing#Straight_versus_premultiplied). + /// + /// For otherwise constant RGB values, behaves more like [`AlphaMode::Blend`] for + /// alpha values closer to 1.0, and more like [`AlphaMode::Add`] for + /// alpha values closer to 0.0. + /// + /// Can be used to avoid “border” or “outline” artifacts that can occur + /// when using plain alpha-blended textures. + Premultiplied, + /// Spreads the fragment out over a hardware-dependent number of sample + /// locations proportional to the alpha value. This requires multisample + /// antialiasing; if MSAA isn't on, this is identical to + /// [`AlphaMode::Mask`] with a value of 0.5. + /// + /// Alpha to coverage provides improved performance and better visual + /// fidelity over [`AlphaMode::Blend`], as Bevy doesn't have to sort objects + /// when it's in use. It's especially useful for complex transparent objects + /// like foliage. + /// + /// [alpha to coverage]: https://en.wikipedia.org/wiki/Alpha_to_coverage + AlphaToCoverage, + /// Combines the color of the fragments with the colors behind them in an + /// additive process, (i.e. like light) producing lighter results. + /// + /// Black produces no effect. Alpha values can be used to modulate the result. + /// + /// Useful for effects like holograms, ghosts, lasers and other energy beams. + Add, + /// Combines the color of the fragments with the colors behind them in a + /// multiplicative process, (i.e. like pigments) producing darker results. + /// + /// White produces no effect. Alpha values can be used to modulate the result. + /// + /// Useful for effects like stained glass, window tint film and some colored liquids. + Multiply, +} + +impl Eq for AlphaMode {} diff --git a/crates/libmarathon/src/render/batching/gpu_preprocessing.rs b/crates/libmarathon/src/render/batching/gpu_preprocessing.rs new file mode 100644 index 0000000..a5d0b68 --- /dev/null +++ b/crates/libmarathon/src/render/batching/gpu_preprocessing.rs @@ -0,0 +1,2142 @@ +//! Batching functionality when GPU preprocessing is in use. + +use core::{any::TypeId, marker::PhantomData, mem}; + +use bevy_app::{App, Plugin}; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::{ + prelude::Entity, + query::{Has, With}, + resource::Resource, + schedule::IntoScheduleConfigs as _, + system::{Query, Res, ResMut, StaticSystemParam}, + world::{FromWorld, World}, +}; +use bevy_encase_derive::ShaderType; +use bevy_math::UVec4; +use bevy_platform::collections::{hash_map::Entry, HashMap, HashSet}; +use bevy_utils::{default, TypeIdMap}; +use bytemuck::{Pod, Zeroable}; +use encase::{internal::WriteInto, ShaderSize}; +use indexmap::IndexMap; +use nonmax::NonMaxU32; +use tracing::{error, info}; +use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features}; + +use crate::render::{ + experimental::occlusion_culling::OcclusionCulling, + render_phase::{ + BinnedPhaseItem, BinnedRenderPhaseBatch, BinnedRenderPhaseBatchSet, + BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, PhaseItem, + PhaseItemBatchSetKey as _, PhaseItemExtraIndex, RenderBin, SortedPhaseItem, + SortedRenderPhase, UnbatchableBinnedEntityIndices, ViewBinnedRenderPhases, + ViewSortedRenderPhases, + }, + render_resource::{Buffer, GpuArrayBufferable, RawBufferVec, UninitBufferVec}, + renderer::{RenderAdapter, RenderAdapterInfo, RenderDevice, RenderQueue, WgpuWrapper}, + sync_world::MainEntity, + view::{ExtractedView, NoIndirectDrawing, RetainedViewEntity}, + Render, RenderApp, RenderDebugFlags, RenderSystems, +}; + +use super::{BatchMeta, GetBatchData, GetFullBatchData}; + +#[derive(Default)] +pub struct BatchingPlugin { + /// Debugging flags that can optionally be set when constructing the renderer. + pub debug_flags: RenderDebugFlags, +} + +impl Plugin for BatchingPlugin { + fn build(&self, app: &mut App) { + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + render_app + .insert_resource(IndirectParametersBuffers::new( + self.debug_flags + .contains(RenderDebugFlags::ALLOW_COPIES_FROM_INDIRECT_PARAMETERS), + )) + .add_systems( + Render, + write_indirect_parameters_buffers.in_set(RenderSystems::PrepareResourcesFlush), + ) + .add_systems( + Render, + clear_indirect_parameters_buffers.in_set(RenderSystems::ManageViews), + ); + } + + fn finish(&self, app: &mut App) { + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + render_app.init_resource::(); + } +} + +/// Records whether GPU preprocessing and/or GPU culling are supported on the +/// device. +/// +/// No GPU preprocessing is supported on WebGL because of the lack of compute +/// shader support. GPU preprocessing is supported on DirectX 12, but due to [a +/// `wgpu` limitation] GPU culling is not. +/// +/// [a `wgpu` limitation]: https://github.com/gfx-rs/wgpu/issues/2471 +#[derive(Clone, Copy, PartialEq, Resource)] +pub struct GpuPreprocessingSupport { + /// The maximum amount of GPU preprocessing available on this platform. + pub max_supported_mode: GpuPreprocessingMode, +} + +impl GpuPreprocessingSupport { + /// Returns true if this GPU preprocessing support level isn't `None`. + #[inline] + pub fn is_available(&self) -> bool { + self.max_supported_mode != GpuPreprocessingMode::None + } + + /// Returns the given GPU preprocessing mode, capped to the current + /// preprocessing mode. + pub fn min(&self, mode: GpuPreprocessingMode) -> GpuPreprocessingMode { + match (self.max_supported_mode, mode) { + (GpuPreprocessingMode::None, _) | (_, GpuPreprocessingMode::None) => { + GpuPreprocessingMode::None + } + (mode, GpuPreprocessingMode::Culling) | (GpuPreprocessingMode::Culling, mode) => mode, + (GpuPreprocessingMode::PreprocessingOnly, GpuPreprocessingMode::PreprocessingOnly) => { + GpuPreprocessingMode::PreprocessingOnly + } + } + } + + /// Returns true if GPU culling is supported on this platform. + pub fn is_culling_supported(&self) -> bool { + self.max_supported_mode == GpuPreprocessingMode::Culling + } +} + +/// The amount of GPU preprocessing (compute and indirect draw) that we do. +#[derive(Clone, Copy, PartialEq)] +pub enum GpuPreprocessingMode { + /// No GPU preprocessing is in use at all. + /// + /// This is used when GPU compute isn't available. + None, + + /// GPU preprocessing is in use, but GPU culling isn't. + /// + /// This is used when the [`NoIndirectDrawing`] component is present on the + /// camera. + PreprocessingOnly, + + /// Both GPU preprocessing and GPU culling are in use. + /// + /// This is used by default. + Culling, +} + +/// The GPU buffers holding the data needed to render batches. +/// +/// For example, in the 3D PBR pipeline this holds `MeshUniform`s, which are the +/// `BD` type parameter in that mode. +/// +/// We have a separate *buffer data input* type (`BDI`) here, which a compute +/// shader is expected to expand to the full buffer data (`BD`) type. GPU +/// uniform building is generally faster and uses less system RAM to VRAM bus +/// bandwidth, but only implemented for some pipelines (for example, not in the +/// 2D pipeline at present) and only when compute shader is available. +#[derive(Resource)] +pub struct BatchedInstanceBuffers +where + BD: GpuArrayBufferable + Sync + Send + 'static, + BDI: Pod + Default, +{ + /// The uniform data inputs for the current frame. + /// + /// These are uploaded during the extraction phase. + pub current_input_buffer: InstanceInputUniformBuffer, + + /// The uniform data inputs for the previous frame. + /// + /// The indices don't generally line up between `current_input_buffer` + /// and `previous_input_buffer`, because, among other reasons, entities + /// can spawn or despawn between frames. Instead, each current buffer + /// data input uniform is expected to contain the index of the + /// corresponding buffer data input uniform in this list. + pub previous_input_buffer: InstanceInputUniformBuffer, + + /// The data needed to render buffers for each phase. + /// + /// The keys of this map are the type IDs of each phase: e.g. `Opaque3d`, + /// `AlphaMask3d`, etc. + pub phase_instance_buffers: TypeIdMap>, +} + +impl Default for BatchedInstanceBuffers +where + BD: GpuArrayBufferable + Sync + Send + 'static, + BDI: Pod + Sync + Send + Default + 'static, +{ + fn default() -> Self { + BatchedInstanceBuffers { + current_input_buffer: InstanceInputUniformBuffer::new(), + previous_input_buffer: InstanceInputUniformBuffer::new(), + phase_instance_buffers: HashMap::default(), + } + } +} + +/// The GPU buffers holding the data needed to render batches for a single +/// phase. +/// +/// These are split out per phase so that we can run the phases in parallel. +/// This is the version of the structure that has a type parameter, which +/// enables Bevy's scheduler to run the batching operations for the different +/// phases in parallel. +/// +/// See the documentation for [`BatchedInstanceBuffers`] for more information. +#[derive(Resource)] +pub struct PhaseBatchedInstanceBuffers +where + PI: PhaseItem, + BD: GpuArrayBufferable + Sync + Send + 'static, +{ + /// The buffers for this phase. + pub buffers: UntypedPhaseBatchedInstanceBuffers, + phantom: PhantomData, +} + +impl Default for PhaseBatchedInstanceBuffers +where + PI: PhaseItem, + BD: GpuArrayBufferable + Sync + Send + 'static, +{ + fn default() -> Self { + PhaseBatchedInstanceBuffers { + buffers: UntypedPhaseBatchedInstanceBuffers::default(), + phantom: PhantomData, + } + } +} + +/// The GPU buffers holding the data needed to render batches for a single +/// phase, without a type parameter for that phase. +/// +/// Since this structure doesn't have a type parameter, it can be placed in +/// [`BatchedInstanceBuffers::phase_instance_buffers`]. +pub struct UntypedPhaseBatchedInstanceBuffers +where + BD: GpuArrayBufferable + Sync + Send + 'static, +{ + /// A storage area for the buffer data that the GPU compute shader is + /// expected to write to. + /// + /// There will be one entry for each index. + pub data_buffer: UninitBufferVec, + + /// The index of the buffer data in the current input buffer that + /// corresponds to each instance. + /// + /// This is keyed off each view. Each view has a separate buffer. + pub work_item_buffers: HashMap, + + /// A buffer that holds the number of indexed meshes that weren't visible in + /// the previous frame, when GPU occlusion culling is in use. + /// + /// There's one set of [`LatePreprocessWorkItemIndirectParameters`] per + /// view. Bevy uses this value to determine how many threads to dispatch to + /// check meshes that weren't visible next frame to see if they became newly + /// visible this frame. + pub late_indexed_indirect_parameters_buffer: + RawBufferVec, + + /// A buffer that holds the number of non-indexed meshes that weren't + /// visible in the previous frame, when GPU occlusion culling is in use. + /// + /// There's one set of [`LatePreprocessWorkItemIndirectParameters`] per + /// view. Bevy uses this value to determine how many threads to dispatch to + /// check meshes that weren't visible next frame to see if they became newly + /// visible this frame. + pub late_non_indexed_indirect_parameters_buffer: + RawBufferVec, +} + +/// Holds the GPU buffer of instance input data, which is the data about each +/// mesh instance that the CPU provides. +/// +/// `BDI` is the *buffer data input* type, which the GPU mesh preprocessing +/// shader is expected to expand to the full *buffer data* type. +pub struct InstanceInputUniformBuffer +where + BDI: Pod + Default, +{ + /// The buffer containing the data that will be uploaded to the GPU. + buffer: RawBufferVec, + + /// Indices of slots that are free within the buffer. + /// + /// When adding data, we preferentially overwrite these slots first before + /// growing the buffer itself. + free_uniform_indices: Vec, +} + +impl InstanceInputUniformBuffer +where + BDI: Pod + Default, +{ + /// Creates a new, empty buffer. + pub fn new() -> InstanceInputUniformBuffer { + InstanceInputUniformBuffer { + buffer: RawBufferVec::new(BufferUsages::STORAGE), + free_uniform_indices: vec![], + } + } + + /// Clears the buffer and entity list out. + pub fn clear(&mut self) { + self.buffer.clear(); + self.free_uniform_indices.clear(); + } + + /// Returns the [`RawBufferVec`] corresponding to this input uniform buffer. + #[inline] + pub fn buffer(&self) -> &RawBufferVec { + &self.buffer + } + + /// Adds a new piece of buffered data to the uniform buffer and returns its + /// index. + pub fn add(&mut self, element: BDI) -> u32 { + match self.free_uniform_indices.pop() { + Some(uniform_index) => { + self.buffer.values_mut()[uniform_index as usize] = element; + uniform_index + } + None => self.buffer.push(element) as u32, + } + } + + /// Removes a piece of buffered data from the uniform buffer. + /// + /// This simply marks the data as free. + pub fn remove(&mut self, uniform_index: u32) { + self.free_uniform_indices.push(uniform_index); + } + + /// Returns the piece of buffered data at the given index. + /// + /// Returns [`None`] if the index is out of bounds or the data is removed. + pub fn get(&self, uniform_index: u32) -> Option { + if (uniform_index as usize) >= self.buffer.len() + || self.free_uniform_indices.contains(&uniform_index) + { + None + } else { + Some(self.get_unchecked(uniform_index)) + } + } + + /// Returns the piece of buffered data at the given index. + /// Can return data that has previously been removed. + /// + /// # Panics + /// if `uniform_index` is not in bounds of [`Self::buffer`]. + pub fn get_unchecked(&self, uniform_index: u32) -> BDI { + self.buffer.values()[uniform_index as usize] + } + + /// Stores a piece of buffered data at the given index. + /// + /// # Panics + /// if `uniform_index` is not in bounds of [`Self::buffer`]. + pub fn set(&mut self, uniform_index: u32, element: BDI) { + self.buffer.values_mut()[uniform_index as usize] = element; + } + + // Ensures that the buffers are nonempty, which the GPU requires before an + // upload can take place. + pub fn ensure_nonempty(&mut self) { + if self.buffer.is_empty() { + self.buffer.push(default()); + } + } + + /// Returns the number of instances in this buffer. + pub fn len(&self) -> usize { + self.buffer.len() + } + + /// Returns true if this buffer has no instances or false if it contains any + /// instances. + pub fn is_empty(&self) -> bool { + self.buffer.is_empty() + } + + /// Consumes this [`InstanceInputUniformBuffer`] and returns the raw buffer + /// ready to be uploaded to the GPU. + pub fn into_buffer(self) -> RawBufferVec { + self.buffer + } +} + +impl Default for InstanceInputUniformBuffer +where + BDI: Pod + Default, +{ + fn default() -> Self { + Self::new() + } +} + +/// The buffer of GPU preprocessing work items for a single view. +#[cfg_attr( + not(target_arch = "wasm32"), + expect( + clippy::large_enum_variant, + reason = "See https://github.com/bevyengine/bevy/issues/19220" + ) +)] +pub enum PreprocessWorkItemBuffers { + /// The work items we use if we aren't using indirect drawing. + /// + /// Because we don't have to separate indexed from non-indexed meshes in + /// direct mode, we only have a single buffer here. + Direct(RawBufferVec), + + /// The buffer of work items we use if we are using indirect drawing. + /// + /// We need to separate out indexed meshes from non-indexed meshes in this + /// case because the indirect parameters for these two types of meshes have + /// different sizes. + Indirect { + /// The buffer of work items corresponding to indexed meshes. + indexed: RawBufferVec, + /// The buffer of work items corresponding to non-indexed meshes. + non_indexed: RawBufferVec, + /// The work item buffers we use when GPU occlusion culling is in use. + gpu_occlusion_culling: Option, + }, +} + +/// The work item buffers we use when GPU occlusion culling is in use. +pub struct GpuOcclusionCullingWorkItemBuffers { + /// The buffer of work items corresponding to indexed meshes. + pub late_indexed: UninitBufferVec, + /// The buffer of work items corresponding to non-indexed meshes. + pub late_non_indexed: UninitBufferVec, + /// The offset into the + /// [`UntypedPhaseBatchedInstanceBuffers::late_indexed_indirect_parameters_buffer`] + /// where this view's indirect dispatch counts for indexed meshes live. + pub late_indirect_parameters_indexed_offset: u32, + /// The offset into the + /// [`UntypedPhaseBatchedInstanceBuffers::late_non_indexed_indirect_parameters_buffer`] + /// where this view's indirect dispatch counts for non-indexed meshes live. + pub late_indirect_parameters_non_indexed_offset: u32, +} + +/// A GPU-side data structure that stores the number of workgroups to dispatch +/// for the second phase of GPU occlusion culling. +/// +/// The late mesh preprocessing phase checks meshes that weren't visible frame +/// to determine if they're potentially visible this frame. +#[derive(Clone, Copy, ShaderType, Pod, Zeroable)] +#[repr(C)] +pub struct LatePreprocessWorkItemIndirectParameters { + /// The number of workgroups to dispatch. + /// + /// This will be equal to `work_item_count / 64`, rounded *up*. + dispatch_x: u32, + /// The number of workgroups along the abstract Y axis to dispatch: always + /// 1. + dispatch_y: u32, + /// The number of workgroups along the abstract Z axis to dispatch: always + /// 1. + dispatch_z: u32, + /// The actual number of work items. + /// + /// The GPU indirect dispatch doesn't read this, but it's used internally to + /// determine the actual number of work items that exist in the late + /// preprocessing work item buffer. + work_item_count: u32, + /// Padding to 64-byte boundaries for some hardware. + pad: UVec4, +} + +impl Default for LatePreprocessWorkItemIndirectParameters { + fn default() -> LatePreprocessWorkItemIndirectParameters { + LatePreprocessWorkItemIndirectParameters { + dispatch_x: 0, + dispatch_y: 1, + dispatch_z: 1, + work_item_count: 0, + pad: default(), + } + } +} + +/// Returns the set of work item buffers for the given view, first creating it +/// if necessary. +/// +/// Bevy uses work item buffers to tell the mesh preprocessing compute shader +/// which meshes are to be drawn. +/// +/// You may need to call this function if you're implementing your own custom +/// render phases. See the `specialized_mesh_pipeline` example. +pub fn get_or_create_work_item_buffer<'a, I>( + work_item_buffers: &'a mut HashMap, + view: RetainedViewEntity, + no_indirect_drawing: bool, + enable_gpu_occlusion_culling: bool, +) -> &'a mut PreprocessWorkItemBuffers +where + I: 'static, +{ + let preprocess_work_item_buffers = match work_item_buffers.entry(view) { + Entry::Occupied(occupied_entry) => occupied_entry.into_mut(), + Entry::Vacant(vacant_entry) => { + if no_indirect_drawing { + vacant_entry.insert(PreprocessWorkItemBuffers::Direct(RawBufferVec::new( + BufferUsages::STORAGE, + ))) + } else { + vacant_entry.insert(PreprocessWorkItemBuffers::Indirect { + indexed: RawBufferVec::new(BufferUsages::STORAGE), + non_indexed: RawBufferVec::new(BufferUsages::STORAGE), + // We fill this in below if `enable_gpu_occlusion_culling` + // is set. + gpu_occlusion_culling: None, + }) + } + } + }; + + // Initialize the GPU occlusion culling buffers if necessary. + if let PreprocessWorkItemBuffers::Indirect { + ref mut gpu_occlusion_culling, + .. + } = *preprocess_work_item_buffers + { + match ( + enable_gpu_occlusion_culling, + gpu_occlusion_culling.is_some(), + ) { + (false, false) | (true, true) => {} + (false, true) => { + *gpu_occlusion_culling = None; + } + (true, false) => { + *gpu_occlusion_culling = Some(GpuOcclusionCullingWorkItemBuffers { + late_indexed: UninitBufferVec::new(BufferUsages::STORAGE), + late_non_indexed: UninitBufferVec::new(BufferUsages::STORAGE), + late_indirect_parameters_indexed_offset: 0, + late_indirect_parameters_non_indexed_offset: 0, + }); + } + } + } + + preprocess_work_item_buffers +} + +/// Initializes work item buffers for a phase in preparation for a new frame. +pub fn init_work_item_buffers( + work_item_buffers: &mut PreprocessWorkItemBuffers, + late_indexed_indirect_parameters_buffer: &'_ mut RawBufferVec< + LatePreprocessWorkItemIndirectParameters, + >, + late_non_indexed_indirect_parameters_buffer: &'_ mut RawBufferVec< + LatePreprocessWorkItemIndirectParameters, + >, +) { + // Add the offsets for indirect parameters that the late phase of mesh + // preprocessing writes to. + if let PreprocessWorkItemBuffers::Indirect { + gpu_occlusion_culling: + Some(GpuOcclusionCullingWorkItemBuffers { + ref mut late_indirect_parameters_indexed_offset, + ref mut late_indirect_parameters_non_indexed_offset, + .. + }), + .. + } = *work_item_buffers + { + *late_indirect_parameters_indexed_offset = late_indexed_indirect_parameters_buffer + .push(LatePreprocessWorkItemIndirectParameters::default()) + as u32; + *late_indirect_parameters_non_indexed_offset = late_non_indexed_indirect_parameters_buffer + .push(LatePreprocessWorkItemIndirectParameters::default()) + as u32; + } +} + +impl PreprocessWorkItemBuffers { + /// Adds a new work item to the appropriate buffer. + /// + /// `indexed` specifies whether the work item corresponds to an indexed + /// mesh. + pub fn push(&mut self, indexed: bool, preprocess_work_item: PreprocessWorkItem) { + match *self { + PreprocessWorkItemBuffers::Direct(ref mut buffer) => { + buffer.push(preprocess_work_item); + } + PreprocessWorkItemBuffers::Indirect { + indexed: ref mut indexed_buffer, + non_indexed: ref mut non_indexed_buffer, + ref mut gpu_occlusion_culling, + } => { + if indexed { + indexed_buffer.push(preprocess_work_item); + } else { + non_indexed_buffer.push(preprocess_work_item); + } + + if let Some(ref mut gpu_occlusion_culling) = *gpu_occlusion_culling { + if indexed { + gpu_occlusion_culling.late_indexed.add(); + } else { + gpu_occlusion_culling.late_non_indexed.add(); + } + } + } + } + } + + /// Clears out the GPU work item buffers in preparation for a new frame. + pub fn clear(&mut self) { + match *self { + PreprocessWorkItemBuffers::Direct(ref mut buffer) => { + buffer.clear(); + } + PreprocessWorkItemBuffers::Indirect { + indexed: ref mut indexed_buffer, + non_indexed: ref mut non_indexed_buffer, + ref mut gpu_occlusion_culling, + } => { + indexed_buffer.clear(); + non_indexed_buffer.clear(); + + if let Some(ref mut gpu_occlusion_culling) = *gpu_occlusion_culling { + gpu_occlusion_culling.late_indexed.clear(); + gpu_occlusion_culling.late_non_indexed.clear(); + gpu_occlusion_culling.late_indirect_parameters_indexed_offset = 0; + gpu_occlusion_culling.late_indirect_parameters_non_indexed_offset = 0; + } + } + } + } +} + +/// One invocation of the preprocessing shader: i.e. one mesh instance in a +/// view. +#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)] +#[repr(C)] +pub struct PreprocessWorkItem { + /// The index of the batch input data in the input buffer that the shader + /// reads from. + pub input_index: u32, + + /// In direct mode, the index of the mesh uniform; in indirect mode, the + /// index of the [`IndirectParametersGpuMetadata`]. + /// + /// In indirect mode, this is the index of the + /// [`IndirectParametersGpuMetadata`] in the + /// `IndirectParametersBuffers::indexed_metadata` or + /// `IndirectParametersBuffers::non_indexed_metadata`. + pub output_or_indirect_parameters_index: u32, +} + +/// The `wgpu` indirect parameters structure that specifies a GPU draw command. +/// +/// This is the variant for indexed meshes. We generate the instances of this +/// structure in the `build_indirect_params.wgsl` compute shader. +#[derive(Clone, Copy, Debug, Pod, Zeroable, ShaderType)] +#[repr(C)] +pub struct IndirectParametersIndexed { + /// The number of indices that this mesh has. + pub index_count: u32, + /// The number of instances we are to draw. + pub instance_count: u32, + /// The offset of the first index for this mesh in the index buffer slab. + pub first_index: u32, + /// The offset of the first vertex for this mesh in the vertex buffer slab. + pub base_vertex: u32, + /// The index of the first mesh instance in the `MeshUniform` buffer. + pub first_instance: u32, +} + +/// The `wgpu` indirect parameters structure that specifies a GPU draw command. +/// +/// This is the variant for non-indexed meshes. We generate the instances of +/// this structure in the `build_indirect_params.wgsl` compute shader. +#[derive(Clone, Copy, Debug, Pod, Zeroable, ShaderType)] +#[repr(C)] +pub struct IndirectParametersNonIndexed { + /// The number of vertices that this mesh has. + pub vertex_count: u32, + /// The number of instances we are to draw. + pub instance_count: u32, + /// The offset of the first vertex for this mesh in the vertex buffer slab. + pub base_vertex: u32, + /// The index of the first mesh instance in the `Mesh` buffer. + pub first_instance: u32, +} + +/// A structure, initialized on CPU and read on GPU, that contains metadata +/// about each batch. +/// +/// Each batch will have one instance of this structure. +#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)] +#[repr(C)] +pub struct IndirectParametersCpuMetadata { + /// The index of the first instance of this mesh in the array of + /// `MeshUniform`s. + /// + /// Note that this is the *first* output index in this batch. Since each + /// instance of this structure refers to arbitrarily many instances, the + /// `MeshUniform`s corresponding to this batch span the indices + /// `base_output_index..(base_output_index + instance_count)`. + pub base_output_index: u32, + + /// The index of the batch set that this batch belongs to in the + /// [`IndirectBatchSet`] buffer. + /// + /// A *batch set* is a set of meshes that may be multi-drawn together. + /// Multiple batches (and therefore multiple instances of + /// [`IndirectParametersGpuMetadata`] structures) can be part of the same + /// batch set. + pub batch_set_index: u32, +} + +/// A structure, written and read GPU, that records how many instances of each +/// mesh are actually to be drawn. +/// +/// The GPU mesh preprocessing shader increments the +/// [`Self::early_instance_count`] and [`Self::late_instance_count`] as it +/// determines that meshes are visible. The indirect parameter building shader +/// reads this metadata in order to construct the indirect draw parameters. +/// +/// Each batch will have one instance of this structure. +#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)] +#[repr(C)] +pub struct IndirectParametersGpuMetadata { + /// The index of the first mesh in this batch in the array of + /// `MeshInputUniform`s. + pub mesh_index: u32, + + /// The number of instances that were judged visible last frame. + /// + /// The CPU sets this value to 0, and the GPU mesh preprocessing shader + /// increments it as it culls mesh instances. + pub early_instance_count: u32, + + /// The number of instances that have been judged potentially visible this + /// frame that weren't in the last frame's potentially visible set. + /// + /// The CPU sets this value to 0, and the GPU mesh preprocessing shader + /// increments it as it culls mesh instances. + pub late_instance_count: u32, +} + +/// A structure, shared between CPU and GPU, that holds the number of on-GPU +/// indirect draw commands for each *batch set*. +/// +/// A *batch set* is a set of meshes that may be multi-drawn together. +/// +/// If the current hardware and driver support `multi_draw_indirect_count`, the +/// indirect parameters building shader increments +/// [`Self::indirect_parameters_count`] as it generates indirect parameters. The +/// `multi_draw_indirect_count` command reads +/// [`Self::indirect_parameters_count`] in order to determine how many commands +/// belong to each batch set. +#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)] +#[repr(C)] +pub struct IndirectBatchSet { + /// The number of indirect parameter commands (i.e. batches) in this batch + /// set. + /// + /// The CPU sets this value to 0 before uploading this structure to GPU. The + /// indirect parameters building shader increments this value as it creates + /// indirect parameters. Then the `multi_draw_indirect_count` command reads + /// this value in order to determine how many indirect draw commands to + /// process. + pub indirect_parameters_count: u32, + + /// The offset within the `IndirectParametersBuffers::indexed_data` or + /// `IndirectParametersBuffers::non_indexed_data` of the first indirect draw + /// command for this batch set. + /// + /// The CPU fills out this value. + pub indirect_parameters_base: u32, +} + +/// The buffers containing all the information that indirect draw commands +/// (`multi_draw_indirect`, `multi_draw_indirect_count`) use to draw the scene. +/// +/// In addition to the indirect draw buffers themselves, this structure contains +/// the buffers that store [`IndirectParametersGpuMetadata`], which are the +/// structures that culling writes to so that the indirect parameter building +/// pass can determine how many meshes are actually to be drawn. +/// +/// These buffers will remain empty if indirect drawing isn't in use. +#[derive(Resource, Deref, DerefMut)] +pub struct IndirectParametersBuffers { + /// A mapping from a phase type ID to the indirect parameters buffers for + /// that phase. + /// + /// Examples of phase type IDs are `Opaque3d` and `AlphaMask3d`. + #[deref] + pub buffers: TypeIdMap, + /// If true, this sets the `COPY_SRC` flag on indirect draw parameters so + /// that they can be read back to CPU. + /// + /// This is a debugging feature that may reduce performance. It primarily + /// exists for the `occlusion_culling` example. + pub allow_copies_from_indirect_parameter_buffers: bool, +} + +impl IndirectParametersBuffers { + /// Initializes a new [`IndirectParametersBuffers`] resource. + pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> IndirectParametersBuffers { + IndirectParametersBuffers { + buffers: TypeIdMap::default(), + allow_copies_from_indirect_parameter_buffers, + } + } +} + +/// The buffers containing all the information that indirect draw commands use +/// to draw the scene, for a single phase. +/// +/// This is the version of the structure that has a type parameter, so that the +/// batching for different phases can run in parallel. +/// +/// See the [`IndirectParametersBuffers`] documentation for more information. +#[derive(Resource)] +pub struct PhaseIndirectParametersBuffers +where + PI: PhaseItem, +{ + /// The indirect draw buffers for the phase. + pub buffers: UntypedPhaseIndirectParametersBuffers, + phantom: PhantomData, +} + +impl PhaseIndirectParametersBuffers +where + PI: PhaseItem, +{ + pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> Self { + PhaseIndirectParametersBuffers { + buffers: UntypedPhaseIndirectParametersBuffers::new( + allow_copies_from_indirect_parameter_buffers, + ), + phantom: PhantomData, + } + } +} + +/// The buffers containing all the information that indirect draw commands use +/// to draw the scene, for a single phase. +/// +/// This is the version of the structure that doesn't have a type parameter, so +/// that it can be inserted into [`IndirectParametersBuffers::buffers`] +/// +/// See the [`IndirectParametersBuffers`] documentation for more information. +pub struct UntypedPhaseIndirectParametersBuffers { + /// Information that indirect draw commands use to draw indexed meshes in + /// the scene. + pub indexed: MeshClassIndirectParametersBuffers, + /// Information that indirect draw commands use to draw non-indexed meshes + /// in the scene. + pub non_indexed: MeshClassIndirectParametersBuffers, +} + +impl UntypedPhaseIndirectParametersBuffers { + /// Creates the indirect parameters buffers. + pub fn new( + allow_copies_from_indirect_parameter_buffers: bool, + ) -> UntypedPhaseIndirectParametersBuffers { + let mut indirect_parameter_buffer_usages = BufferUsages::STORAGE | BufferUsages::INDIRECT; + if allow_copies_from_indirect_parameter_buffers { + indirect_parameter_buffer_usages |= BufferUsages::COPY_SRC; + } + + UntypedPhaseIndirectParametersBuffers { + non_indexed: MeshClassIndirectParametersBuffers::new( + allow_copies_from_indirect_parameter_buffers, + ), + indexed: MeshClassIndirectParametersBuffers::new( + allow_copies_from_indirect_parameter_buffers, + ), + } + } + + /// Reserves space for `count` new batches. + /// + /// The `indexed` parameter specifies whether the meshes that these batches + /// correspond to are indexed or not. + pub fn allocate(&mut self, indexed: bool, count: u32) -> u32 { + if indexed { + self.indexed.allocate(count) + } else { + self.non_indexed.allocate(count) + } + } + + /// Returns the number of batches currently allocated. + /// + /// The `indexed` parameter specifies whether the meshes that these batches + /// correspond to are indexed or not. + fn batch_count(&self, indexed: bool) -> usize { + if indexed { + self.indexed.batch_count() + } else { + self.non_indexed.batch_count() + } + } + + /// Returns the number of batch sets currently allocated. + /// + /// The `indexed` parameter specifies whether the meshes that these batch + /// sets correspond to are indexed or not. + pub fn batch_set_count(&self, indexed: bool) -> usize { + if indexed { + self.indexed.batch_sets.len() + } else { + self.non_indexed.batch_sets.len() + } + } + + /// Adds a new batch set to `Self::indexed_batch_sets` or + /// `Self::non_indexed_batch_sets` as appropriate. + /// + /// `indexed` specifies whether the meshes that these batch sets correspond + /// to are indexed or not. `indirect_parameters_base` specifies the offset + /// within `Self::indexed_data` or `Self::non_indexed_data` of the first + /// batch in this batch set. + #[inline] + pub fn add_batch_set(&mut self, indexed: bool, indirect_parameters_base: u32) { + if indexed { + self.indexed.batch_sets.push(IndirectBatchSet { + indirect_parameters_base, + indirect_parameters_count: 0, + }); + } else { + self.non_indexed.batch_sets.push(IndirectBatchSet { + indirect_parameters_base, + indirect_parameters_count: 0, + }); + } + } + + /// Returns the index that a newly-added batch set will have. + /// + /// The `indexed` parameter specifies whether the meshes in such a batch set + /// are indexed or not. + pub fn get_next_batch_set_index(&self, indexed: bool) -> Option { + NonMaxU32::new(self.batch_set_count(indexed) as u32) + } + + /// Clears out the buffers in preparation for a new frame. + pub fn clear(&mut self) { + self.indexed.clear(); + self.non_indexed.clear(); + } +} + +/// The buffers containing all the information that indirect draw commands use +/// to draw the scene, for a single mesh class (indexed or non-indexed), for a +/// single phase. +pub struct MeshClassIndirectParametersBuffers +where + IP: Clone + ShaderSize + WriteInto, +{ + /// The GPU buffer that stores the indirect draw parameters for the meshes. + /// + /// The indirect parameters building shader writes to this buffer, while the + /// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from + /// it to perform the draws. + data: UninitBufferVec, + + /// The GPU buffer that holds the data used to construct indirect draw + /// parameters for meshes. + /// + /// The GPU mesh preprocessing shader writes to this buffer, and the + /// indirect parameters building shader reads this buffer to construct the + /// indirect draw parameters. + cpu_metadata: RawBufferVec, + + /// The GPU buffer that holds data built by the GPU used to construct + /// indirect draw parameters for meshes. + /// + /// The GPU mesh preprocessing shader writes to this buffer, and the + /// indirect parameters building shader reads this buffer to construct the + /// indirect draw parameters. + gpu_metadata: UninitBufferVec, + + /// The GPU buffer that holds the number of indirect draw commands for each + /// phase of each view, for meshes. + /// + /// The indirect parameters building shader writes to this buffer, and the + /// `multi_draw_indirect_count` command reads from it in order to know how + /// many indirect draw commands to process. + batch_sets: RawBufferVec, +} + +impl MeshClassIndirectParametersBuffers +where + IP: Clone + ShaderSize + WriteInto, +{ + fn new( + allow_copies_from_indirect_parameter_buffers: bool, + ) -> MeshClassIndirectParametersBuffers { + let mut indirect_parameter_buffer_usages = BufferUsages::STORAGE | BufferUsages::INDIRECT; + if allow_copies_from_indirect_parameter_buffers { + indirect_parameter_buffer_usages |= BufferUsages::COPY_SRC; + } + + MeshClassIndirectParametersBuffers { + data: UninitBufferVec::new(indirect_parameter_buffer_usages), + cpu_metadata: RawBufferVec::new(BufferUsages::STORAGE), + gpu_metadata: UninitBufferVec::new(BufferUsages::STORAGE), + batch_sets: RawBufferVec::new(indirect_parameter_buffer_usages), + } + } + + /// Returns the GPU buffer that stores the indirect draw parameters for + /// indexed meshes. + /// + /// The indirect parameters building shader writes to this buffer, while the + /// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from + /// it to perform the draws. + #[inline] + pub fn data_buffer(&self) -> Option<&Buffer> { + self.data.buffer() + } + + /// Returns the GPU buffer that holds the CPU-constructed data used to + /// construct indirect draw parameters for meshes. + /// + /// The CPU writes to this buffer, and the indirect parameters building + /// shader reads this buffer to construct the indirect draw parameters. + #[inline] + pub fn cpu_metadata_buffer(&self) -> Option<&Buffer> { + self.cpu_metadata.buffer() + } + + /// Returns the GPU buffer that holds the GPU-constructed data used to + /// construct indirect draw parameters for meshes. + /// + /// The GPU mesh preprocessing shader writes to this buffer, and the + /// indirect parameters building shader reads this buffer to construct the + /// indirect draw parameters. + #[inline] + pub fn gpu_metadata_buffer(&self) -> Option<&Buffer> { + self.gpu_metadata.buffer() + } + + /// Returns the GPU buffer that holds the number of indirect draw commands + /// for each phase of each view. + /// + /// The indirect parameters building shader writes to this buffer, and the + /// `multi_draw_indirect_count` command reads from it in order to know how + /// many indirect draw commands to process. + #[inline] + pub fn batch_sets_buffer(&self) -> Option<&Buffer> { + self.batch_sets.buffer() + } + + /// Reserves space for `count` new batches. + /// + /// This allocates in the [`Self::cpu_metadata`], [`Self::gpu_metadata`], + /// and [`Self::data`] buffers. + fn allocate(&mut self, count: u32) -> u32 { + let length = self.data.len(); + self.cpu_metadata.reserve_internal(count as usize); + self.gpu_metadata.add_multiple(count as usize); + for _ in 0..count { + self.data.add(); + self.cpu_metadata + .push(IndirectParametersCpuMetadata::default()); + } + length as u32 + } + + /// Sets the [`IndirectParametersCpuMetadata`] for the mesh at the given + /// index. + pub fn set(&mut self, index: u32, value: IndirectParametersCpuMetadata) { + self.cpu_metadata.set(index, value); + } + + /// Returns the number of batches corresponding to meshes that are currently + /// allocated. + #[inline] + pub fn batch_count(&self) -> usize { + self.data.len() + } + + /// Clears out all the buffers in preparation for a new frame. + pub fn clear(&mut self) { + self.data.clear(); + self.cpu_metadata.clear(); + self.gpu_metadata.clear(); + self.batch_sets.clear(); + } +} + +impl Default for IndirectParametersBuffers { + fn default() -> Self { + // By default, we don't allow GPU indirect parameter mapping, since + // that's a debugging option. + Self::new(false) + } +} + +impl FromWorld for GpuPreprocessingSupport { + fn from_world(world: &mut World) -> Self { + let adapter = world.resource::(); + let device = world.resource::(); + + // Filter Android drivers that are incompatible with GPU preprocessing: + // - We filter out Adreno 730 and earlier GPUs (except 720, as it's newer + // than 730). + // - We filter out Mali GPUs with driver versions lower than 48. + fn is_non_supported_android_device(adapter_info: &RenderAdapterInfo) -> bool { + crate::render::get_adreno_model(adapter_info).is_some_and(|model| model != 720 && model <= 730) + || crate::render::get_mali_driver_version(adapter_info).is_some_and(|version| version < 48) + } + + let culling_feature_support = device.features().contains( + Features::INDIRECT_FIRST_INSTANCE + | Features::MULTI_DRAW_INDIRECT + | Features::PUSH_CONSTANTS, + ); + // Depth downsampling for occlusion culling requires 12 textures + let limit_support = device.limits().max_storage_textures_per_shader_stage >= 12 && + // Even if the adapter supports compute, we might be simulating a lack of + // compute via device limits (see `WgpuSettingsPriority::WebGL2` and + // `wgpu::Limits::downlevel_webgl2_defaults()`). This will have set all the + // `max_compute_*` limits to zero, so we arbitrarily pick one as a canary. + device.limits().max_compute_workgroup_storage_size != 0; + + let downlevel_support = adapter + .get_downlevel_capabilities() + .flags + .contains(DownlevelFlags::COMPUTE_SHADERS); + + let adapter_info = RenderAdapterInfo(WgpuWrapper::new(adapter.get_info())); + + let max_supported_mode = if device.limits().max_compute_workgroup_size_x == 0 + || is_non_supported_android_device(&adapter_info) + || adapter_info.backend == wgpu::Backend::Gl + { + info!( + "GPU preprocessing is not supported on this device. \ + Falling back to CPU preprocessing.", + ); + GpuPreprocessingMode::None + } else if !(culling_feature_support && limit_support && downlevel_support) { + info!("Some GPU preprocessing are limited on this device."); + GpuPreprocessingMode::PreprocessingOnly + } else { + info!("GPU preprocessing is fully supported on this device."); + GpuPreprocessingMode::Culling + }; + + GpuPreprocessingSupport { max_supported_mode } + } +} + +impl BatchedInstanceBuffers +where + BD: GpuArrayBufferable + Sync + Send + 'static, + BDI: Pod + Sync + Send + Default + 'static, +{ + /// Creates new buffers. + pub fn new() -> Self { + Self::default() + } + + /// Clears out the buffers in preparation for a new frame. + pub fn clear(&mut self) { + for phase_instance_buffer in self.phase_instance_buffers.values_mut() { + phase_instance_buffer.clear(); + } + } +} + +impl UntypedPhaseBatchedInstanceBuffers +where + BD: GpuArrayBufferable + Sync + Send + 'static, +{ + pub fn new() -> Self { + UntypedPhaseBatchedInstanceBuffers { + data_buffer: UninitBufferVec::new(BufferUsages::STORAGE), + work_item_buffers: HashMap::default(), + late_indexed_indirect_parameters_buffer: RawBufferVec::new( + BufferUsages::STORAGE | BufferUsages::INDIRECT, + ), + late_non_indexed_indirect_parameters_buffer: RawBufferVec::new( + BufferUsages::STORAGE | BufferUsages::INDIRECT, + ), + } + } + + /// Returns the binding of the buffer that contains the per-instance data. + /// + /// This buffer needs to be filled in via a compute shader. + pub fn instance_data_binding(&self) -> Option> { + self.data_buffer + .buffer() + .map(|buffer| buffer.as_entire_binding()) + } + + /// Clears out the buffers in preparation for a new frame. + pub fn clear(&mut self) { + self.data_buffer.clear(); + self.late_indexed_indirect_parameters_buffer.clear(); + self.late_non_indexed_indirect_parameters_buffer.clear(); + + // Clear each individual set of buffers, but don't depopulate the hash + // table. We want to avoid reallocating these vectors every frame. + for view_work_item_buffers in self.work_item_buffers.values_mut() { + view_work_item_buffers.clear(); + } + } +} + +impl Default for UntypedPhaseBatchedInstanceBuffers +where + BD: GpuArrayBufferable + Sync + Send + 'static, +{ + fn default() -> Self { + Self::new() + } +} + +/// Information about a render batch that we're building up during a sorted +/// render phase. +struct SortedRenderBatch +where + F: GetBatchData, +{ + /// The index of the first phase item in this batch in the list of phase + /// items. + phase_item_start_index: u32, + + /// The index of the first instance in this batch in the instance buffer. + instance_start_index: u32, + + /// True if the mesh in question has an index buffer; false otherwise. + indexed: bool, + + /// The index of the indirect parameters for this batch in the + /// [`IndirectParametersBuffers`]. + /// + /// If CPU culling is being used, then this will be `None`. + indirect_parameters_index: Option, + + /// Metadata that can be used to determine whether an instance can be placed + /// into this batch. + /// + /// If `None`, the item inside is unbatchable. + meta: Option>, +} + +impl SortedRenderBatch +where + F: GetBatchData, +{ + /// Finalizes this batch and updates the [`SortedRenderPhase`] with the + /// appropriate indices. + /// + /// `instance_end_index` is the index of the last instance in this batch + /// plus one. + fn flush( + self, + instance_end_index: u32, + phase: &mut SortedRenderPhase, + phase_indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers, + ) where + I: CachedRenderPipelinePhaseItem + SortedPhaseItem, + { + let (batch_range, batch_extra_index) = + phase.items[self.phase_item_start_index as usize].batch_range_and_extra_index_mut(); + *batch_range = self.instance_start_index..instance_end_index; + *batch_extra_index = match self.indirect_parameters_index { + Some(indirect_parameters_index) => PhaseItemExtraIndex::IndirectParametersIndex { + range: u32::from(indirect_parameters_index) + ..(u32::from(indirect_parameters_index) + 1), + batch_set_index: None, + }, + None => PhaseItemExtraIndex::None, + }; + if let Some(indirect_parameters_index) = self.indirect_parameters_index { + phase_indirect_parameters_buffers + .add_batch_set(self.indexed, indirect_parameters_index.into()); + } + } +} + +/// A system that runs early in extraction and clears out all the +/// [`BatchedInstanceBuffers`] for the frame. +/// +/// We have to run this during extraction because, if GPU preprocessing is in +/// use, the extraction phase will write to the mesh input uniform buffers +/// directly, so the buffers need to be cleared before then. +pub fn clear_batched_gpu_instance_buffers( + gpu_batched_instance_buffers: Option< + ResMut>, + >, +) where + GFBD: GetFullBatchData, +{ + // Don't clear the entire table, because that would delete the buffers, and + // we want to reuse those allocations. + if let Some(mut gpu_batched_instance_buffers) = gpu_batched_instance_buffers { + gpu_batched_instance_buffers.clear(); + } +} + +/// A system that removes GPU preprocessing work item buffers that correspond to +/// deleted [`ExtractedView`]s. +/// +/// This is a separate system from [`clear_batched_gpu_instance_buffers`] +/// because [`ExtractedView`]s aren't created until after the extraction phase +/// is completed. +pub fn delete_old_work_item_buffers( + mut gpu_batched_instance_buffers: ResMut< + BatchedInstanceBuffers, + >, + extracted_views: Query<&ExtractedView>, +) where + GFBD: GetFullBatchData, +{ + let retained_view_entities: HashSet<_> = extracted_views + .iter() + .map(|extracted_view| extracted_view.retained_view_entity) + .collect(); + for phase_instance_buffers in gpu_batched_instance_buffers + .phase_instance_buffers + .values_mut() + { + phase_instance_buffers + .work_item_buffers + .retain(|retained_view_entity, _| { + retained_view_entities.contains(retained_view_entity) + }); + } +} + +/// Batch the items in a sorted render phase, when GPU instance buffer building +/// is in use. This means comparing metadata needed to draw each phase item and +/// trying to combine the draws into a batch. +pub fn batch_and_prepare_sorted_render_phase( + mut phase_batched_instance_buffers: ResMut>, + mut phase_indirect_parameters_buffers: ResMut>, + mut sorted_render_phases: ResMut>, + mut views: Query<( + &ExtractedView, + Has, + Has, + )>, + system_param_item: StaticSystemParam, +) where + I: CachedRenderPipelinePhaseItem + SortedPhaseItem, + GFBD: GetFullBatchData, +{ + // We only process GPU-built batch data in this function. + let UntypedPhaseBatchedInstanceBuffers { + ref mut data_buffer, + ref mut work_item_buffers, + ref mut late_indexed_indirect_parameters_buffer, + ref mut late_non_indexed_indirect_parameters_buffer, + } = phase_batched_instance_buffers.buffers; + + for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views { + let Some(phase) = sorted_render_phases.get_mut(&extracted_view.retained_view_entity) else { + continue; + }; + + // Create the work item buffer if necessary. + let work_item_buffer = get_or_create_work_item_buffer::( + work_item_buffers, + extracted_view.retained_view_entity, + no_indirect_drawing, + gpu_occlusion_culling, + ); + + // Initialize those work item buffers in preparation for this new frame. + init_work_item_buffers( + work_item_buffer, + late_indexed_indirect_parameters_buffer, + late_non_indexed_indirect_parameters_buffer, + ); + + // Walk through the list of phase items, building up batches as we go. + let mut batch: Option> = None; + + for current_index in 0..phase.items.len() { + // Get the index of the input data, and comparison metadata, for + // this entity. + let item = &phase.items[current_index]; + let entity = item.main_entity(); + let item_is_indexed = item.indexed(); + let current_batch_input_index = + GFBD::get_index_and_compare_data(&system_param_item, entity); + + // Unpack that index and metadata. Note that it's possible for index + // and/or metadata to not be present, which signifies that this + // entity is unbatchable. In that case, we break the batch here. + // If the index isn't present the item is not part of this pipeline and so will be skipped. + let Some((current_input_index, current_meta)) = current_batch_input_index else { + // Break a batch if we need to. + if let Some(batch) = batch.take() { + batch.flush( + data_buffer.len() as u32, + phase, + &mut phase_indirect_parameters_buffers.buffers, + ); + } + + continue; + }; + let current_meta = + current_meta.map(|meta| BatchMeta::new(&phase.items[current_index], meta)); + + // Determine if this entity can be included in the batch we're + // building up. + let can_batch = batch.as_ref().is_some_and(|batch| { + // `None` for metadata indicates that the items are unbatchable. + match (¤t_meta, &batch.meta) { + (Some(current_meta), Some(batch_meta)) => current_meta == batch_meta, + (_, _) => false, + } + }); + + // Make space in the data buffer for this instance. + let output_index = data_buffer.add() as u32; + + // If we can't batch, break the existing batch and make a new one. + if !can_batch { + // Break a batch if we need to. + if let Some(batch) = batch.take() { + batch.flush( + output_index, + phase, + &mut phase_indirect_parameters_buffers.buffers, + ); + } + + let indirect_parameters_index = if no_indirect_drawing { + None + } else if item_is_indexed { + Some( + phase_indirect_parameters_buffers + .buffers + .indexed + .allocate(1), + ) + } else { + Some( + phase_indirect_parameters_buffers + .buffers + .non_indexed + .allocate(1), + ) + }; + + // Start a new batch. + if let Some(indirect_parameters_index) = indirect_parameters_index { + GFBD::write_batch_indirect_parameters_metadata( + item_is_indexed, + output_index, + None, + &mut phase_indirect_parameters_buffers.buffers, + indirect_parameters_index, + ); + }; + + batch = Some(SortedRenderBatch { + phase_item_start_index: current_index as u32, + instance_start_index: output_index, + indexed: item_is_indexed, + indirect_parameters_index: indirect_parameters_index.and_then(NonMaxU32::new), + meta: current_meta, + }); + } + + // Add a new preprocessing work item so that the preprocessing + // shader will copy the per-instance data over. + if let Some(batch) = batch.as_ref() { + work_item_buffer.push( + item_is_indexed, + PreprocessWorkItem { + input_index: current_input_index.into(), + output_or_indirect_parameters_index: match ( + no_indirect_drawing, + batch.indirect_parameters_index, + ) { + (true, _) => output_index, + (false, Some(indirect_parameters_index)) => { + indirect_parameters_index.into() + } + (false, None) => 0, + }, + }, + ); + } + } + + // Flush the final batch if necessary. + if let Some(batch) = batch.take() { + batch.flush( + data_buffer.len() as u32, + phase, + &mut phase_indirect_parameters_buffers.buffers, + ); + } + } +} + +/// Creates batches for a render phase that uses bins. +pub fn batch_and_prepare_binned_render_phase( + mut phase_batched_instance_buffers: ResMut>, + phase_indirect_parameters_buffers: ResMut>, + mut binned_render_phases: ResMut>, + mut views: Query< + ( + &ExtractedView, + Has, + Has, + ), + With, + >, + param: StaticSystemParam, +) where + BPI: BinnedPhaseItem, + GFBD: GetFullBatchData, +{ + let system_param_item = param.into_inner(); + + let phase_indirect_parameters_buffers = phase_indirect_parameters_buffers.into_inner(); + + let UntypedPhaseBatchedInstanceBuffers { + ref mut data_buffer, + ref mut work_item_buffers, + ref mut late_indexed_indirect_parameters_buffer, + ref mut late_non_indexed_indirect_parameters_buffer, + } = phase_batched_instance_buffers.buffers; + + for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views { + let Some(phase) = binned_render_phases.get_mut(&extracted_view.retained_view_entity) else { + continue; + }; + + // Create the work item buffer if necessary; otherwise, just mark it as + // used this frame. + let work_item_buffer = get_or_create_work_item_buffer::( + work_item_buffers, + extracted_view.retained_view_entity, + no_indirect_drawing, + gpu_occlusion_culling, + ); + + // Initialize those work item buffers in preparation for this new frame. + init_work_item_buffers( + work_item_buffer, + late_indexed_indirect_parameters_buffer, + late_non_indexed_indirect_parameters_buffer, + ); + + // Prepare multidrawables. + + if let ( + &mut BinnedRenderPhaseBatchSets::MultidrawIndirect(ref mut batch_sets), + &mut PreprocessWorkItemBuffers::Indirect { + indexed: ref mut indexed_work_item_buffer, + non_indexed: ref mut non_indexed_work_item_buffer, + gpu_occlusion_culling: ref mut gpu_occlusion_culling_buffers, + }, + ) = (&mut phase.batch_sets, &mut *work_item_buffer) + { + let mut output_index = data_buffer.len() as u32; + + // Initialize the state for both indexed and non-indexed meshes. + let mut indexed_preparer: MultidrawableBatchSetPreparer = + MultidrawableBatchSetPreparer::new( + phase_indirect_parameters_buffers.buffers.batch_count(true) as u32, + phase_indirect_parameters_buffers + .buffers + .indexed + .batch_sets + .len() as u32, + ); + let mut non_indexed_preparer: MultidrawableBatchSetPreparer = + MultidrawableBatchSetPreparer::new( + phase_indirect_parameters_buffers.buffers.batch_count(false) as u32, + phase_indirect_parameters_buffers + .buffers + .non_indexed + .batch_sets + .len() as u32, + ); + + // Prepare each batch set. + for (batch_set_key, bins) in &phase.multidrawable_meshes { + if batch_set_key.indexed() { + indexed_preparer.prepare_multidrawable_binned_batch_set( + bins, + &mut output_index, + data_buffer, + indexed_work_item_buffer, + &mut phase_indirect_parameters_buffers.buffers.indexed, + batch_sets, + ); + } else { + non_indexed_preparer.prepare_multidrawable_binned_batch_set( + bins, + &mut output_index, + data_buffer, + non_indexed_work_item_buffer, + &mut phase_indirect_parameters_buffers.buffers.non_indexed, + batch_sets, + ); + } + } + + // Reserve space in the occlusion culling buffers, if necessary. + if let Some(gpu_occlusion_culling_buffers) = gpu_occlusion_culling_buffers { + gpu_occlusion_culling_buffers + .late_indexed + .add_multiple(indexed_preparer.work_item_count); + gpu_occlusion_culling_buffers + .late_non_indexed + .add_multiple(non_indexed_preparer.work_item_count); + } + } + + // Prepare batchables. + + for (key, bin) in &phase.batchable_meshes { + let mut batch: Option = None; + for (&main_entity, &input_index) in bin.entities() { + let output_index = data_buffer.add() as u32; + + match batch { + Some(ref mut batch) => { + batch.instance_range.end = output_index + 1; + + // Append to the current batch. + // + // If we're in indirect mode, then we write the first + // output index of this batch, so that we have a + // tightly-packed buffer if GPU culling discards some of + // the instances. Otherwise, we can just write the + // output index directly. + work_item_buffer.push( + key.0.indexed(), + PreprocessWorkItem { + input_index: *input_index, + output_or_indirect_parameters_index: match ( + no_indirect_drawing, + &batch.extra_index, + ) { + (true, _) => output_index, + ( + false, + PhaseItemExtraIndex::IndirectParametersIndex { + range: indirect_parameters_range, + .. + }, + ) => indirect_parameters_range.start, + (false, &PhaseItemExtraIndex::DynamicOffset(_)) + | (false, &PhaseItemExtraIndex::None) => 0, + }, + }, + ); + } + + None if !no_indirect_drawing => { + // Start a new batch, in indirect mode. + let indirect_parameters_index = phase_indirect_parameters_buffers + .buffers + .allocate(key.0.indexed(), 1); + let batch_set_index = phase_indirect_parameters_buffers + .buffers + .get_next_batch_set_index(key.0.indexed()); + + GFBD::write_batch_indirect_parameters_metadata( + key.0.indexed(), + output_index, + batch_set_index, + &mut phase_indirect_parameters_buffers.buffers, + indirect_parameters_index, + ); + work_item_buffer.push( + key.0.indexed(), + PreprocessWorkItem { + input_index: *input_index, + output_or_indirect_parameters_index: indirect_parameters_index, + }, + ); + batch = Some(BinnedRenderPhaseBatch { + representative_entity: (Entity::PLACEHOLDER, main_entity), + instance_range: output_index..output_index + 1, + extra_index: PhaseItemExtraIndex::IndirectParametersIndex { + range: indirect_parameters_index..(indirect_parameters_index + 1), + batch_set_index: None, + }, + }); + } + + None => { + // Start a new batch, in direct mode. + work_item_buffer.push( + key.0.indexed(), + PreprocessWorkItem { + input_index: *input_index, + output_or_indirect_parameters_index: output_index, + }, + ); + batch = Some(BinnedRenderPhaseBatch { + representative_entity: (Entity::PLACEHOLDER, main_entity), + instance_range: output_index..output_index + 1, + extra_index: PhaseItemExtraIndex::None, + }); + } + } + } + + if let Some(batch) = batch { + match phase.batch_sets { + BinnedRenderPhaseBatchSets::DynamicUniforms(_) => { + error!("Dynamic uniform batch sets shouldn't be used here"); + } + BinnedRenderPhaseBatchSets::Direct(ref mut vec) => { + vec.push(batch); + } + BinnedRenderPhaseBatchSets::MultidrawIndirect(ref mut vec) => { + // The Bevy renderer will never mark a mesh as batchable + // but not multidrawable if multidraw is in use. + // However, custom render pipelines might do so, such as + // the `specialized_mesh_pipeline` example. + vec.push(BinnedRenderPhaseBatchSet { + first_batch: batch, + batch_count: 1, + bin_key: key.1.clone(), + index: phase_indirect_parameters_buffers + .buffers + .batch_set_count(key.0.indexed()) + as u32, + }); + } + } + } + } + + // Prepare unbatchables. + for (key, unbatchables) in &mut phase.unbatchable_meshes { + // Allocate the indirect parameters if necessary. + let mut indirect_parameters_offset = if no_indirect_drawing { + None + } else if key.0.indexed() { + Some( + phase_indirect_parameters_buffers + .buffers + .indexed + .allocate(unbatchables.entities.len() as u32), + ) + } else { + Some( + phase_indirect_parameters_buffers + .buffers + .non_indexed + .allocate(unbatchables.entities.len() as u32), + ) + }; + + for main_entity in unbatchables.entities.keys() { + let Some(input_index) = GFBD::get_binned_index(&system_param_item, *main_entity) + else { + continue; + }; + let output_index = data_buffer.add() as u32; + + if let Some(ref mut indirect_parameters_index) = indirect_parameters_offset { + // We're in indirect mode, so add an indirect parameters + // index. + GFBD::write_batch_indirect_parameters_metadata( + key.0.indexed(), + output_index, + None, + &mut phase_indirect_parameters_buffers.buffers, + *indirect_parameters_index, + ); + work_item_buffer.push( + key.0.indexed(), + PreprocessWorkItem { + input_index: input_index.into(), + output_or_indirect_parameters_index: *indirect_parameters_index, + }, + ); + unbatchables + .buffer_indices + .add(UnbatchableBinnedEntityIndices { + instance_index: *indirect_parameters_index, + extra_index: PhaseItemExtraIndex::IndirectParametersIndex { + range: *indirect_parameters_index..(*indirect_parameters_index + 1), + batch_set_index: None, + }, + }); + phase_indirect_parameters_buffers + .buffers + .add_batch_set(key.0.indexed(), *indirect_parameters_index); + *indirect_parameters_index += 1; + } else { + work_item_buffer.push( + key.0.indexed(), + PreprocessWorkItem { + input_index: input_index.into(), + output_or_indirect_parameters_index: output_index, + }, + ); + unbatchables + .buffer_indices + .add(UnbatchableBinnedEntityIndices { + instance_index: output_index, + extra_index: PhaseItemExtraIndex::None, + }); + } + } + } + } +} + +/// The state that [`batch_and_prepare_binned_render_phase`] uses to construct +/// multidrawable batch sets. +/// +/// The [`batch_and_prepare_binned_render_phase`] system maintains two of these: +/// one for indexed meshes and one for non-indexed meshes. +struct MultidrawableBatchSetPreparer +where + BPI: BinnedPhaseItem, + GFBD: GetFullBatchData, +{ + /// The offset in the indirect parameters buffer at which the next indirect + /// parameters will be written. + indirect_parameters_index: u32, + /// The number of batch sets we've built so far for this mesh class. + batch_set_index: u32, + /// The number of work items we've emitted so far for this mesh class. + work_item_count: usize, + phantom: PhantomData<(BPI, GFBD)>, +} + +impl MultidrawableBatchSetPreparer +where + BPI: BinnedPhaseItem, + GFBD: GetFullBatchData, +{ + /// Creates a new [`MultidrawableBatchSetPreparer`] that will start writing + /// indirect parameters and batch sets at the given indices. + #[inline] + fn new(initial_indirect_parameters_index: u32, initial_batch_set_index: u32) -> Self { + MultidrawableBatchSetPreparer { + indirect_parameters_index: initial_indirect_parameters_index, + batch_set_index: initial_batch_set_index, + work_item_count: 0, + phantom: PhantomData, + } + } + + /// Creates batch sets and writes the GPU data needed to draw all visible + /// entities of one mesh class in the given batch set. + /// + /// The *mesh class* represents whether the mesh has indices or not. + #[inline] + fn prepare_multidrawable_binned_batch_set( + &mut self, + bins: &IndexMap, + output_index: &mut u32, + data_buffer: &mut UninitBufferVec, + indexed_work_item_buffer: &mut RawBufferVec, + mesh_class_buffers: &mut MeshClassIndirectParametersBuffers, + batch_sets: &mut Vec>, + ) where + IP: Clone + ShaderSize + WriteInto, + { + let current_indexed_batch_set_index = self.batch_set_index; + let current_output_index = *output_index; + + let indirect_parameters_base = self.indirect_parameters_index; + + // We're going to write the first entity into the batch set. Do this + // here so that we can preload the bin into cache as a side effect. + let Some((first_bin_key, first_bin)) = bins.iter().next() else { + return; + }; + let first_bin_len = first_bin.entities().len(); + let first_bin_entity = first_bin + .entities() + .keys() + .next() + .copied() + .unwrap_or(MainEntity::from(Entity::PLACEHOLDER)); + + // Traverse the batch set, processing each bin. + for bin in bins.values() { + // Record the first output index for this batch, as well as its own + // index. + mesh_class_buffers + .cpu_metadata + .push(IndirectParametersCpuMetadata { + base_output_index: *output_index, + batch_set_index: self.batch_set_index, + }); + + // Traverse the bin, pushing `PreprocessWorkItem`s for each entity + // within it. This is a hot loop, so make it as fast as possible. + for &input_index in bin.entities().values() { + indexed_work_item_buffer.push(PreprocessWorkItem { + input_index: *input_index, + output_or_indirect_parameters_index: self.indirect_parameters_index, + }); + } + + // Reserve space for the appropriate number of entities in the data + // buffer. Also, advance the output index and work item count. + let bin_entity_count = bin.entities().len(); + data_buffer.add_multiple(bin_entity_count); + *output_index += bin_entity_count as u32; + self.work_item_count += bin_entity_count; + + self.indirect_parameters_index += 1; + } + + // Reserve space for the bins in this batch set in the GPU buffers. + let bin_count = bins.len(); + mesh_class_buffers.gpu_metadata.add_multiple(bin_count); + mesh_class_buffers.data.add_multiple(bin_count); + + // Write the information the GPU will need about this batch set. + mesh_class_buffers.batch_sets.push(IndirectBatchSet { + indirect_parameters_base, + indirect_parameters_count: 0, + }); + + self.batch_set_index += 1; + + // Record the batch set. The render node later processes this record to + // render the batches. + batch_sets.push(BinnedRenderPhaseBatchSet { + first_batch: BinnedRenderPhaseBatch { + representative_entity: (Entity::PLACEHOLDER, first_bin_entity), + instance_range: current_output_index..(current_output_index + first_bin_len as u32), + extra_index: PhaseItemExtraIndex::maybe_indirect_parameters_index(NonMaxU32::new( + indirect_parameters_base, + )), + }, + bin_key: (*first_bin_key).clone(), + batch_count: self.indirect_parameters_index - indirect_parameters_base, + index: current_indexed_batch_set_index, + }); + } +} + +/// A system that gathers up the per-phase GPU buffers and inserts them into the +/// [`BatchedInstanceBuffers`] and [`IndirectParametersBuffers`] tables. +/// +/// This runs after the [`batch_and_prepare_binned_render_phase`] or +/// [`batch_and_prepare_sorted_render_phase`] systems. It takes the per-phase +/// [`PhaseBatchedInstanceBuffers`] and [`PhaseIndirectParametersBuffers`] +/// resources and inserts them into the global [`BatchedInstanceBuffers`] and +/// [`IndirectParametersBuffers`] tables. +/// +/// This system exists so that the [`batch_and_prepare_binned_render_phase`] and +/// [`batch_and_prepare_sorted_render_phase`] can run in parallel with one +/// another. If those two systems manipulated [`BatchedInstanceBuffers`] and +/// [`IndirectParametersBuffers`] directly, then they wouldn't be able to run in +/// parallel. +pub fn collect_buffers_for_phase( + mut phase_batched_instance_buffers: ResMut>, + mut phase_indirect_parameters_buffers: ResMut>, + mut batched_instance_buffers: ResMut< + BatchedInstanceBuffers, + >, + mut indirect_parameters_buffers: ResMut, +) where + PI: PhaseItem, + GFBD: GetFullBatchData + Send + Sync + 'static, +{ + // Insert the `PhaseBatchedInstanceBuffers` into the global table. Replace + // the contents of the per-phase resource with the old batched instance + // buffers in order to reuse allocations. + let untyped_phase_batched_instance_buffers = + mem::take(&mut phase_batched_instance_buffers.buffers); + if let Some(mut old_untyped_phase_batched_instance_buffers) = batched_instance_buffers + .phase_instance_buffers + .insert(TypeId::of::(), untyped_phase_batched_instance_buffers) + { + old_untyped_phase_batched_instance_buffers.clear(); + phase_batched_instance_buffers.buffers = old_untyped_phase_batched_instance_buffers; + } + + // Insert the `PhaseIndirectParametersBuffers` into the global table. + // Replace the contents of the per-phase resource with the old indirect + // parameters buffers in order to reuse allocations. + let untyped_phase_indirect_parameters_buffers = mem::replace( + &mut phase_indirect_parameters_buffers.buffers, + UntypedPhaseIndirectParametersBuffers::new( + indirect_parameters_buffers.allow_copies_from_indirect_parameter_buffers, + ), + ); + if let Some(mut old_untyped_phase_indirect_parameters_buffers) = indirect_parameters_buffers + .insert( + TypeId::of::(), + untyped_phase_indirect_parameters_buffers, + ) + { + old_untyped_phase_indirect_parameters_buffers.clear(); + phase_indirect_parameters_buffers.buffers = old_untyped_phase_indirect_parameters_buffers; + } +} + +/// A system that writes all instance buffers to the GPU. +pub fn write_batched_instance_buffers( + render_device: Res, + render_queue: Res, + gpu_array_buffer: ResMut>, +) where + GFBD: GetFullBatchData, +{ + let BatchedInstanceBuffers { + current_input_buffer, + previous_input_buffer, + phase_instance_buffers, + } = gpu_array_buffer.into_inner(); + + current_input_buffer + .buffer + .write_buffer(&render_device, &render_queue); + previous_input_buffer + .buffer + .write_buffer(&render_device, &render_queue); + + for phase_instance_buffers in phase_instance_buffers.values_mut() { + let UntypedPhaseBatchedInstanceBuffers { + ref mut data_buffer, + ref mut work_item_buffers, + ref mut late_indexed_indirect_parameters_buffer, + ref mut late_non_indexed_indirect_parameters_buffer, + } = *phase_instance_buffers; + + data_buffer.write_buffer(&render_device); + late_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue); + late_non_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue); + + for phase_work_item_buffers in work_item_buffers.values_mut() { + match *phase_work_item_buffers { + PreprocessWorkItemBuffers::Direct(ref mut buffer_vec) => { + buffer_vec.write_buffer(&render_device, &render_queue); + } + PreprocessWorkItemBuffers::Indirect { + ref mut indexed, + ref mut non_indexed, + ref mut gpu_occlusion_culling, + } => { + indexed.write_buffer(&render_device, &render_queue); + non_indexed.write_buffer(&render_device, &render_queue); + + if let Some(GpuOcclusionCullingWorkItemBuffers { + ref mut late_indexed, + ref mut late_non_indexed, + late_indirect_parameters_indexed_offset: _, + late_indirect_parameters_non_indexed_offset: _, + }) = *gpu_occlusion_culling + { + if !late_indexed.is_empty() { + late_indexed.write_buffer(&render_device); + } + if !late_non_indexed.is_empty() { + late_non_indexed.write_buffer(&render_device); + } + } + } + } + } + } +} + +pub fn clear_indirect_parameters_buffers( + mut indirect_parameters_buffers: ResMut, +) { + for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() { + phase_indirect_parameters_buffers.clear(); + } +} + +pub fn write_indirect_parameters_buffers( + render_device: Res, + render_queue: Res, + mut indirect_parameters_buffers: ResMut, +) { + for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() { + phase_indirect_parameters_buffers + .indexed + .data + .write_buffer(&render_device); + phase_indirect_parameters_buffers + .non_indexed + .data + .write_buffer(&render_device); + + phase_indirect_parameters_buffers + .indexed + .cpu_metadata + .write_buffer(&render_device, &render_queue); + phase_indirect_parameters_buffers + .non_indexed + .cpu_metadata + .write_buffer(&render_device, &render_queue); + + phase_indirect_parameters_buffers + .non_indexed + .gpu_metadata + .write_buffer(&render_device); + phase_indirect_parameters_buffers + .indexed + .gpu_metadata + .write_buffer(&render_device); + + phase_indirect_parameters_buffers + .indexed + .batch_sets + .write_buffer(&render_device, &render_queue); + phase_indirect_parameters_buffers + .non_indexed + .batch_sets + .write_buffer(&render_device, &render_queue); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn instance_buffer_correct_behavior() { + let mut instance_buffer = InstanceInputUniformBuffer::new(); + + let index = instance_buffer.add(2); + instance_buffer.remove(index); + assert_eq!(instance_buffer.get_unchecked(index), 2); + assert_eq!(instance_buffer.get(index), None); + + instance_buffer.add(5); + assert_eq!(instance_buffer.buffer().len(), 1); + } +} diff --git a/crates/libmarathon/src/render/batching/mod.rs b/crates/libmarathon/src/render/batching/mod.rs new file mode 100644 index 0000000..0600ee7 --- /dev/null +++ b/crates/libmarathon/src/render/batching/mod.rs @@ -0,0 +1,225 @@ +use bevy_ecs::{ + component::Component, + entity::Entity, + system::{ResMut, SystemParam, SystemParamItem}, +}; +use bytemuck::Pod; +use gpu_preprocessing::UntypedPhaseIndirectParametersBuffers; +use nonmax::NonMaxU32; + +use crate::render::{ + render_phase::{ + BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItemExtraIndex, + SortedPhaseItem, SortedRenderPhase, ViewBinnedRenderPhases, + }, + render_resource::{CachedRenderPipelineId, GpuArrayBufferable}, + sync_world::MainEntity, +}; + +pub mod gpu_preprocessing; +pub mod no_gpu_preprocessing; + +/// Add this component to mesh entities to disable automatic batching +#[derive(Component, Default)] +pub struct NoAutomaticBatching; + +/// Data necessary to be equal for two draw commands to be mergeable +/// +/// This is based on the following assumptions: +/// - Only entities with prepared assets (pipelines, materials, meshes) are +/// queued to phases +/// - View bindings are constant across a phase for a given draw function as +/// phases are per-view +/// - `batch_and_prepare_render_phase` is the only system that performs this +/// batching and has sole responsibility for preparing the per-object data. +/// As such the mesh binding and dynamic offsets are assumed to only be +/// variable as a result of the `batch_and_prepare_render_phase` system, e.g. +/// due to having to split data across separate uniform bindings within the +/// same buffer due to the maximum uniform buffer binding size. +#[derive(PartialEq)] +struct BatchMeta { + /// The pipeline id encompasses all pipeline configuration including vertex + /// buffers and layouts, shaders and their specializations, bind group + /// layouts, etc. + pipeline_id: CachedRenderPipelineId, + /// The draw function id defines the `RenderCommands` that are called to + /// set the pipeline and bindings, and make the draw command + draw_function_id: DrawFunctionId, + dynamic_offset: Option, + user_data: T, +} + +impl BatchMeta { + fn new(item: &impl CachedRenderPipelinePhaseItem, user_data: T) -> Self { + BatchMeta { + pipeline_id: item.cached_pipeline(), + draw_function_id: item.draw_function(), + dynamic_offset: match item.extra_index() { + PhaseItemExtraIndex::DynamicOffset(dynamic_offset) => { + NonMaxU32::new(dynamic_offset) + } + PhaseItemExtraIndex::None | PhaseItemExtraIndex::IndirectParametersIndex { .. } => { + None + } + }, + user_data, + } + } +} + +/// A trait to support getting data used for batching draw commands via phase +/// items. +/// +/// This is a simple version that only allows for sorting, not binning, as well +/// as only CPU processing, not GPU preprocessing. For these fancier features, +/// see [`GetFullBatchData`]. +pub trait GetBatchData { + /// The system parameters [`GetBatchData::get_batch_data`] needs in + /// order to compute the batch data. + type Param: SystemParam + 'static; + /// Data used for comparison between phase items. If the pipeline id, draw + /// function id, per-instance data buffer dynamic offset and this data + /// matches, the draws can be batched. + type CompareData: PartialEq; + /// The per-instance data to be inserted into the + /// [`crate::render_resource::GpuArrayBuffer`] containing these data for all + /// instances. + type BufferData: GpuArrayBufferable + Sync + Send + 'static; + /// Get the per-instance data to be inserted into the + /// [`crate::render_resource::GpuArrayBuffer`]. If the instance can be + /// batched, also return the data used for comparison when deciding whether + /// draws can be batched, else return None for the `CompareData`. + /// + /// This is only called when building instance data on CPU. In the GPU + /// instance data building path, we use + /// [`GetFullBatchData::get_index_and_compare_data`] instead. + fn get_batch_data( + param: &SystemParamItem, + query_item: (Entity, MainEntity), + ) -> Option<(Self::BufferData, Option)>; +} + +/// A trait to support getting data used for batching draw commands via phase +/// items. +/// +/// This version allows for binning and GPU preprocessing. +pub trait GetFullBatchData: GetBatchData { + /// The per-instance data that was inserted into the + /// [`crate::render_resource::BufferVec`] during extraction. + type BufferInputData: Pod + Default + Sync + Send; + + /// Get the per-instance data to be inserted into the + /// [`crate::render_resource::GpuArrayBuffer`]. + /// + /// This is only called when building uniforms on CPU. In the GPU instance + /// buffer building path, we use + /// [`GetFullBatchData::get_index_and_compare_data`] instead. + fn get_binned_batch_data( + param: &SystemParamItem, + query_item: MainEntity, + ) -> Option; + + /// Returns the index of the [`GetFullBatchData::BufferInputData`] that the + /// GPU preprocessing phase will use. + /// + /// We already inserted the [`GetFullBatchData::BufferInputData`] during the + /// extraction phase before we got here, so this function shouldn't need to + /// look up any render data. If CPU instance buffer building is in use, this + /// function will never be called. + fn get_index_and_compare_data( + param: &SystemParamItem, + query_item: MainEntity, + ) -> Option<(NonMaxU32, Option)>; + + /// Returns the index of the [`GetFullBatchData::BufferInputData`] that the + /// GPU preprocessing phase will use. + /// + /// We already inserted the [`GetFullBatchData::BufferInputData`] during the + /// extraction phase before we got here, so this function shouldn't need to + /// look up any render data. + /// + /// This function is currently only called for unbatchable entities when GPU + /// instance buffer building is in use. For batchable entities, the uniform + /// index is written during queuing (e.g. in `queue_material_meshes`). In + /// the case of CPU instance buffer building, the CPU writes the uniforms, + /// so there's no index to return. + fn get_binned_index( + param: &SystemParamItem, + query_item: MainEntity, + ) -> Option; + + /// Writes the [`gpu_preprocessing::IndirectParametersGpuMetadata`] + /// necessary to draw this batch into the given metadata buffer at the given + /// index. + /// + /// This is only used if GPU culling is enabled (which requires GPU + /// preprocessing). + /// + /// * `indexed` is true if the mesh is indexed or false if it's non-indexed. + /// + /// * `base_output_index` is the index of the first mesh instance in this + /// batch in the `MeshUniform` output buffer. + /// + /// * `batch_set_index` is the index of the batch set in the + /// [`gpu_preprocessing::IndirectBatchSet`] buffer, if this batch belongs to + /// a batch set. + /// + /// * `indirect_parameters_buffers` is the buffer in which to write the + /// metadata. + /// + /// * `indirect_parameters_offset` is the index in that buffer at which to + /// write the metadata. + fn write_batch_indirect_parameters_metadata( + indexed: bool, + base_output_index: u32, + batch_set_index: Option, + indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers, + indirect_parameters_offset: u32, + ); +} + +/// Sorts a render phase that uses bins. +pub fn sort_binned_render_phase(mut phases: ResMut>) +where + BPI: BinnedPhaseItem, +{ + for phase in phases.values_mut() { + phase.multidrawable_meshes.sort_unstable_keys(); + phase.batchable_meshes.sort_unstable_keys(); + phase.unbatchable_meshes.sort_unstable_keys(); + phase.non_mesh_items.sort_unstable_keys(); + } +} + +/// Batches the items in a sorted render phase. +/// +/// This means comparing metadata needed to draw each phase item and trying to +/// combine the draws into a batch. +/// +/// This is common code factored out from +/// [`gpu_preprocessing::batch_and_prepare_sorted_render_phase`] and +/// [`no_gpu_preprocessing::batch_and_prepare_sorted_render_phase`]. +fn batch_and_prepare_sorted_render_phase( + phase: &mut SortedRenderPhase, + mut process_item: impl FnMut(&mut I) -> Option, +) where + I: CachedRenderPipelinePhaseItem + SortedPhaseItem, + GBD: GetBatchData, +{ + let items = phase.items.iter_mut().map(|item| { + let batch_data = match process_item(item) { + Some(compare_data) if I::AUTOMATIC_BATCHING => Some(BatchMeta::new(item, compare_data)), + _ => None, + }; + (item.batch_range_mut(), batch_data) + }); + + items.reduce(|(start_range, prev_batch_meta), (range, batch_meta)| { + if batch_meta.is_some() && prev_batch_meta == batch_meta { + start_range.end = range.end; + (start_range, prev_batch_meta) + } else { + (range, batch_meta) + } + }); +} diff --git a/crates/libmarathon/src/render/batching/no_gpu_preprocessing.rs b/crates/libmarathon/src/render/batching/no_gpu_preprocessing.rs new file mode 100644 index 0000000..6accfdf --- /dev/null +++ b/crates/libmarathon/src/render/batching/no_gpu_preprocessing.rs @@ -0,0 +1,182 @@ +//! Batching functionality when GPU preprocessing isn't in use. + +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::entity::Entity; +use bevy_ecs::resource::Resource; +use bevy_ecs::system::{Res, ResMut, StaticSystemParam}; +use smallvec::{smallvec, SmallVec}; +use tracing::error; +use wgpu::BindingResource; + +use crate::render::{ + render_phase::{ + BinnedPhaseItem, BinnedRenderPhaseBatch, BinnedRenderPhaseBatchSets, + CachedRenderPipelinePhaseItem, PhaseItemExtraIndex, SortedPhaseItem, + ViewBinnedRenderPhases, ViewSortedRenderPhases, + }, + render_resource::{GpuArrayBuffer, GpuArrayBufferable}, + renderer::{RenderDevice, RenderQueue}, +}; + +use super::{GetBatchData, GetFullBatchData}; + +/// The GPU buffers holding the data needed to render batches. +/// +/// For example, in the 3D PBR pipeline this holds `MeshUniform`s, which are the +/// `BD` type parameter in that mode. +#[derive(Resource, Deref, DerefMut)] +pub struct BatchedInstanceBuffer(pub GpuArrayBuffer) +where + BD: GpuArrayBufferable + Sync + Send + 'static; + +impl BatchedInstanceBuffer +where + BD: GpuArrayBufferable + Sync + Send + 'static, +{ + /// Creates a new buffer. + pub fn new(render_device: &RenderDevice) -> Self { + BatchedInstanceBuffer(GpuArrayBuffer::new(render_device)) + } + + /// Returns the binding of the buffer that contains the per-instance data. + /// + /// If we're in the GPU instance buffer building mode, this buffer needs to + /// be filled in via a compute shader. + pub fn instance_data_binding(&self) -> Option> { + self.binding() + } +} + +/// A system that clears out the [`BatchedInstanceBuffer`] for the frame. +/// +/// This needs to run before the CPU batched instance buffers are used. +pub fn clear_batched_cpu_instance_buffers( + cpu_batched_instance_buffer: Option>>, +) where + GBD: GetBatchData, +{ + if let Some(mut cpu_batched_instance_buffer) = cpu_batched_instance_buffer { + cpu_batched_instance_buffer.clear(); + } +} + +/// Batch the items in a sorted render phase, when GPU instance buffer building +/// isn't in use. This means comparing metadata needed to draw each phase item +/// and trying to combine the draws into a batch. +pub fn batch_and_prepare_sorted_render_phase( + batched_instance_buffer: ResMut>, + mut phases: ResMut>, + param: StaticSystemParam, +) where + I: CachedRenderPipelinePhaseItem + SortedPhaseItem, + GBD: GetBatchData, +{ + let system_param_item = param.into_inner(); + + // We only process CPU-built batch data in this function. + let batched_instance_buffer = batched_instance_buffer.into_inner(); + + for phase in phases.values_mut() { + super::batch_and_prepare_sorted_render_phase::(phase, |item| { + let (buffer_data, compare_data) = + GBD::get_batch_data(&system_param_item, (item.entity(), item.main_entity()))?; + let buffer_index = batched_instance_buffer.push(buffer_data); + + let index = buffer_index.index; + let (batch_range, extra_index) = item.batch_range_and_extra_index_mut(); + *batch_range = index..index + 1; + *extra_index = PhaseItemExtraIndex::maybe_dynamic_offset(buffer_index.dynamic_offset); + + compare_data + }); + } +} + +/// Creates batches for a render phase that uses bins, when GPU batch data +/// building isn't in use. +pub fn batch_and_prepare_binned_render_phase( + gpu_array_buffer: ResMut>, + mut phases: ResMut>, + param: StaticSystemParam, +) where + BPI: BinnedPhaseItem, + GFBD: GetFullBatchData, +{ + let gpu_array_buffer = gpu_array_buffer.into_inner(); + let system_param_item = param.into_inner(); + + for phase in phases.values_mut() { + // Prepare batchables. + + for bin in phase.batchable_meshes.values_mut() { + let mut batch_set: SmallVec<[BinnedRenderPhaseBatch; 1]> = smallvec![]; + for main_entity in bin.entities().keys() { + let Some(buffer_data) = + GFBD::get_binned_batch_data(&system_param_item, *main_entity) + else { + continue; + }; + let instance = gpu_array_buffer.push(buffer_data); + + // If the dynamic offset has changed, flush the batch. + // + // This is the only time we ever have more than one batch per + // bin. Note that dynamic offsets are only used on platforms + // with no storage buffers. + if !batch_set.last().is_some_and(|batch| { + batch.instance_range.end == instance.index + && batch.extra_index + == PhaseItemExtraIndex::maybe_dynamic_offset(instance.dynamic_offset) + }) { + batch_set.push(BinnedRenderPhaseBatch { + representative_entity: (Entity::PLACEHOLDER, *main_entity), + instance_range: instance.index..instance.index, + extra_index: PhaseItemExtraIndex::maybe_dynamic_offset( + instance.dynamic_offset, + ), + }); + } + + if let Some(batch) = batch_set.last_mut() { + batch.instance_range.end = instance.index + 1; + } + } + + match phase.batch_sets { + BinnedRenderPhaseBatchSets::DynamicUniforms(ref mut batch_sets) => { + batch_sets.push(batch_set); + } + BinnedRenderPhaseBatchSets::Direct(_) + | BinnedRenderPhaseBatchSets::MultidrawIndirect { .. } => { + error!( + "Dynamic uniform batch sets should be used when GPU preprocessing is off" + ); + } + } + } + + // Prepare unbatchables. + for unbatchables in phase.unbatchable_meshes.values_mut() { + for main_entity in unbatchables.entities.keys() { + let Some(buffer_data) = + GFBD::get_binned_batch_data(&system_param_item, *main_entity) + else { + continue; + }; + let instance = gpu_array_buffer.push(buffer_data); + unbatchables.buffer_indices.add(instance.into()); + } + } + } +} + +/// Writes the instance buffer data to the GPU. +pub fn write_batched_instance_buffer( + render_device: Res, + render_queue: Res, + mut cpu_batched_instance_buffer: ResMut>, +) where + GBD: GetBatchData, +{ + cpu_batched_instance_buffer.write_buffer(&render_device, &render_queue); +} diff --git a/crates/libmarathon/src/render/bindless.wgsl b/crates/libmarathon/src/render/bindless.wgsl new file mode 100644 index 0000000..6c8eff1 --- /dev/null +++ b/crates/libmarathon/src/render/bindless.wgsl @@ -0,0 +1,37 @@ +// Defines the common arrays used to access bindless resources. +// +// This need to be kept up to date with the `BINDING_NUMBERS` table in +// `bindless.rs`. +// +// You access these by indexing into the bindless index table, and from there +// indexing into the appropriate binding array. For example, to access the base +// color texture of a `StandardMaterial` in bindless mode, write +// `bindless_textures_2d[materials[slot].base_color_texture]`, where +// `materials` is the bindless index table and `slot` is the index into that +// table (which can be found in the `Mesh`). + +#define_import_path bevy_render::bindless + +#ifdef BINDLESS + +// Binding 0 is the bindless index table. +// Filtering samplers. +@group(#{MATERIAL_BIND_GROUP}) @binding(1) var bindless_samplers_filtering: binding_array; +// Non-filtering samplers (nearest neighbor). +@group(#{MATERIAL_BIND_GROUP}) @binding(2) var bindless_samplers_non_filtering: binding_array; +// Comparison samplers (typically for shadow mapping). +@group(#{MATERIAL_BIND_GROUP}) @binding(3) var bindless_samplers_comparison: binding_array; +// 1D textures. +@group(#{MATERIAL_BIND_GROUP}) @binding(4) var bindless_textures_1d: binding_array>; +// 2D textures. +@group(#{MATERIAL_BIND_GROUP}) @binding(5) var bindless_textures_2d: binding_array>; +// 2D array textures. +@group(#{MATERIAL_BIND_GROUP}) @binding(6) var bindless_textures_2d_array: binding_array>; +// 3D textures. +@group(#{MATERIAL_BIND_GROUP}) @binding(7) var bindless_textures_3d: binding_array>; +// Cubemap textures. +@group(#{MATERIAL_BIND_GROUP}) @binding(8) var bindless_textures_cube: binding_array>; +// Cubemap array textures. +@group(#{MATERIAL_BIND_GROUP}) @binding(9) var bindless_textures_cube_array: binding_array>; + +#endif // BINDLESS diff --git a/crates/libmarathon/src/render/blit/blit.wgsl b/crates/libmarathon/src/render/blit/blit.wgsl new file mode 100644 index 0000000..82521bf --- /dev/null +++ b/crates/libmarathon/src/render/blit/blit.wgsl @@ -0,0 +1,9 @@ +#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput + +@group(0) @binding(0) var in_texture: texture_2d; +@group(0) @binding(1) var in_sampler: sampler; + +@fragment +fn fs_main(in: FullscreenVertexOutput) -> @location(0) vec4 { + return textureSample(in_texture, in_sampler, in.uv); +} diff --git a/crates/libmarathon/src/render/blit/mod.rs b/crates/libmarathon/src/render/blit/mod.rs new file mode 100644 index 0000000..7a205af --- /dev/null +++ b/crates/libmarathon/src/render/blit/mod.rs @@ -0,0 +1,114 @@ +use crate::render::FullscreenShader; +use bevy_app::{App, Plugin}; +use bevy_asset::{embedded_asset, load_embedded_asset, AssetServer, Handle}; +use bevy_ecs::prelude::*; +use crate::render::{ + render_resource::{ + binding_types::{sampler, texture_2d}, + *, + }, + renderer::RenderDevice, + RenderApp, RenderStartup, +}; +use bevy_shader::Shader; +use bevy_utils::default; + +/// Adds support for specialized "blit pipelines", which can be used to write one texture to another. +pub struct BlitPlugin; + +impl Plugin for BlitPlugin { + fn build(&self, app: &mut App) { + embedded_asset!(app, "blit.wgsl"); + + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + render_app + .allow_ambiguous_resource::>() + .init_resource::>() + .add_systems(RenderStartup, init_blit_pipeline); + } +} + +#[derive(Resource)] +pub struct BlitPipeline { + pub layout: BindGroupLayout, + pub sampler: Sampler, + pub fullscreen_shader: FullscreenShader, + pub fragment_shader: Handle, +} + +pub fn init_blit_pipeline( + mut commands: Commands, + render_device: Res, + fullscreen_shader: Res, + asset_server: Res, +) { + let layout = render_device.create_bind_group_layout( + "blit_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::FRAGMENT, + ( + texture_2d(TextureSampleType::Float { filterable: false }), + sampler(SamplerBindingType::NonFiltering), + ), + ), + ); + + let sampler = render_device.create_sampler(&SamplerDescriptor::default()); + + commands.insert_resource(BlitPipeline { + layout, + sampler, + fullscreen_shader: fullscreen_shader.clone(), + fragment_shader: load_embedded_asset!(asset_server.as_ref(), "blit.wgsl"), + }); +} + +impl BlitPipeline { + pub fn create_bind_group( + &self, + render_device: &RenderDevice, + src_texture: &TextureView, + ) -> BindGroup { + render_device.create_bind_group( + None, + &self.layout, + &BindGroupEntries::sequential((src_texture, &self.sampler)), + ) + } +} + +#[derive(PartialEq, Eq, Hash, Clone, Copy)] +pub struct BlitPipelineKey { + pub texture_format: TextureFormat, + pub blend_state: Option, + pub samples: u32, +} + +impl SpecializedRenderPipeline for BlitPipeline { + type Key = BlitPipelineKey; + + fn specialize(&self, key: Self::Key) -> RenderPipelineDescriptor { + RenderPipelineDescriptor { + label: Some("blit pipeline".into()), + layout: vec![self.layout.clone()], + vertex: self.fullscreen_shader.to_vertex_state(), + fragment: Some(FragmentState { + shader: self.fragment_shader.clone(), + targets: vec![Some(ColorTargetState { + format: key.texture_format, + blend: key.blend_state, + write_mask: ColorWrites::ALL, + })], + ..default() + }), + multisample: MultisampleState { + count: key.samples, + ..default() + }, + ..default() + } + } +} diff --git a/crates/libmarathon/src/render/camera.rs b/crates/libmarathon/src/render/camera.rs new file mode 100644 index 0000000..b2e701e --- /dev/null +++ b/crates/libmarathon/src/render/camera.rs @@ -0,0 +1,695 @@ +use crate::render::{ + batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport}, + extract_component::{ExtractComponent, ExtractComponentPlugin}, + extract_resource::{ExtractResource, ExtractResourcePlugin}, + render_asset::RenderAssets, + render_graph::{CameraDriverNode, InternedRenderSubGraph, RenderGraph, RenderSubGraph}, + render_resource::TextureView, + sync_world::{RenderEntity, SyncToRenderWorld}, + texture::{GpuImage, ManualTextureViews}, + view::{ + ColorGrading, ExtractedView, ExtractedWindows, Hdr, Msaa, NoIndirectDrawing, + RenderVisibleEntities, RetainedViewEntity, ViewUniformOffset, + }, + Extract, ExtractSchedule, Render, RenderApp, RenderSystems, +}; + +use bevy_app::{App, Plugin, PostStartup, PostUpdate}; +use bevy_asset::{AssetEvent, AssetEventSystems, AssetId, Assets}; +use bevy_camera::{ + primitives::Frustum, + visibility::{self, RenderLayers, VisibleEntities}, + Camera, Camera2d, Camera3d, CameraMainTextureUsages, CameraOutputMode, CameraUpdateSystems, + ClearColor, ClearColorConfig, Exposure, ManualTextureViewHandle, NormalizedRenderTarget, + Projection, RenderTargetInfo, Viewport, +}; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::{ + change_detection::DetectChanges, + component::Component, + entity::{ContainsEntity, Entity}, + error::BevyError, + lifecycle::HookContext, + message::MessageReader, + prelude::With, + query::{Has, QueryItem}, + reflect::ReflectComponent, + resource::Resource, + schedule::IntoScheduleConfigs, + system::{Commands, Query, Res, ResMut}, + world::DeferredWorld, +}; +use bevy_image::Image; +use bevy_math::{uvec2, vec2, Mat4, URect, UVec2, UVec4, Vec2}; +use bevy_platform::collections::{HashMap, HashSet}; +use bevy_reflect::prelude::*; +use bevy_transform::components::GlobalTransform; +use bevy_window::{PrimaryWindow, Window, WindowCreated, WindowResized, WindowScaleFactorChanged}; +use tracing::warn; +use wgpu::TextureFormat; + +#[derive(Default)] +pub struct CameraPlugin; + +impl Plugin for CameraPlugin { + fn build(&self, app: &mut App) { + app.register_required_components::() + .register_required_components::() + .register_required_components::() + .register_required_components::() + .add_plugins(( + ExtractResourcePlugin::::default(), + ExtractComponentPlugin::::default(), + )) + .add_systems(PostStartup, camera_system.in_set(CameraUpdateSystems)) + .add_systems( + PostUpdate, + camera_system + .in_set(CameraUpdateSystems) + .before(AssetEventSystems) + .before(visibility::update_frusta), + ); + app.world_mut() + .register_component_hooks::() + .on_add(warn_on_no_render_graph); + + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + render_app + .init_resource::() + .add_systems(ExtractSchedule, extract_cameras) + .add_systems(Render, sort_cameras.in_set(RenderSystems::ManageViews)); + let camera_driver_node = CameraDriverNode::new(render_app.world_mut()); + let mut render_graph = render_app.world_mut().resource_mut::(); + render_graph.add_node(crate::render::graph::CameraDriverLabel, camera_driver_node); + } + } +} + +fn warn_on_no_render_graph(world: DeferredWorld, HookContext { entity, caller, .. }: HookContext) { + if !world.entity(entity).contains::() { + warn!("{}Entity {entity} has a `Camera` component, but it doesn't have a render graph configured. Usually, adding a `Camera2d` or `Camera3d` component will work. + However, you may instead need to enable `bevy_core_pipeline`, or may want to manually add a `CameraRenderGraph` component to create a custom render graph.", caller.map(|location|format!("{location}: ")).unwrap_or_default()); + } +} + +impl ExtractResource for ClearColor { + type Source = Self; + + fn extract_resource(source: &Self::Source) -> Self { + source.clone() + } +} +impl ExtractComponent for CameraMainTextureUsages { + type QueryData = &'static Self; + type QueryFilter = (); + type Out = Self; + + fn extract_component(item: QueryItem) -> Option { + Some(*item) + } +} +impl ExtractComponent for Camera2d { + type QueryData = &'static Self; + type QueryFilter = With; + type Out = Self; + + fn extract_component(item: QueryItem) -> Option { + Some(item.clone()) + } +} +impl ExtractComponent for Camera3d { + type QueryData = &'static Self; + type QueryFilter = With; + type Out = Self; + + fn extract_component(item: QueryItem) -> Option { + Some(item.clone()) + } +} + +/// Configures the [`RenderGraph`] name assigned to be run for a given [`Camera`] entity. +#[derive(Component, Debug, Deref, DerefMut, Reflect, Clone)] +#[reflect(opaque)] +#[reflect(Component, Debug, Clone)] +pub struct CameraRenderGraph(InternedRenderSubGraph); + +impl CameraRenderGraph { + /// Creates a new [`CameraRenderGraph`] from any string-like type. + #[inline] + pub fn new(name: T) -> Self { + Self(name.intern()) + } + + /// Sets the graph name. + #[inline] + pub fn set(&mut self, name: T) { + self.0 = name.intern(); + } +} + +pub trait NormalizedRenderTargetExt { + fn get_texture_view<'a>( + &self, + windows: &'a ExtractedWindows, + images: &'a RenderAssets, + manual_texture_views: &'a ManualTextureViews, + ) -> Option<&'a TextureView>; + + /// Retrieves the [`TextureFormat`] of this render target, if it exists. + fn get_texture_format<'a>( + &self, + windows: &'a ExtractedWindows, + images: &'a RenderAssets, + manual_texture_views: &'a ManualTextureViews, + ) -> Option; + + fn get_render_target_info<'a>( + &self, + resolutions: impl IntoIterator, + images: &Assets, + manual_texture_views: &ManualTextureViews, + ) -> Result; + + // Check if this render target is contained in the given changed windows or images. + fn is_changed( + &self, + changed_window_ids: &HashSet, + changed_image_handles: &HashSet<&AssetId>, + ) -> bool; +} + +impl NormalizedRenderTargetExt for NormalizedRenderTarget { + fn get_texture_view<'a>( + &self, + windows: &'a ExtractedWindows, + images: &'a RenderAssets, + manual_texture_views: &'a ManualTextureViews, + ) -> Option<&'a TextureView> { + match self { + NormalizedRenderTarget::Window(window_ref) => windows + .get(&window_ref.entity()) + .and_then(|window| window.swap_chain_texture_view.as_ref()), + NormalizedRenderTarget::Image(image_target) => images + .get(&image_target.handle) + .map(|image| &image.texture_view), + NormalizedRenderTarget::TextureView(id) => { + manual_texture_views.get(id).map(|tex| &tex.texture_view) + } + NormalizedRenderTarget::None { .. } => None, + } + } + + /// Retrieves the [`TextureFormat`] of this render target, if it exists. + fn get_texture_format<'a>( + &self, + windows: &'a ExtractedWindows, + images: &'a RenderAssets, + manual_texture_views: &'a ManualTextureViews, + ) -> Option { + match self { + NormalizedRenderTarget::Window(window_ref) => windows + .get(&window_ref.entity()) + .and_then(|window| window.swap_chain_texture_format), + NormalizedRenderTarget::Image(image_target) => images + .get(&image_target.handle) + .map(|image| image.texture_format), + NormalizedRenderTarget::TextureView(id) => { + manual_texture_views.get(id).map(|tex| tex.format) + } + NormalizedRenderTarget::None { .. } => None, + } + } + + fn get_render_target_info<'a>( + &self, + resolutions: impl IntoIterator, + images: &Assets, + manual_texture_views: &ManualTextureViews, + ) -> Result { + match self { + NormalizedRenderTarget::Window(window_ref) => resolutions + .into_iter() + .find(|(entity, _)| *entity == window_ref.entity()) + .map(|(_, window)| RenderTargetInfo { + physical_size: window.physical_size(), + scale_factor: window.resolution.scale_factor(), + }) + .ok_or(MissingRenderTargetInfoError::Window { + window: window_ref.entity(), + }), + NormalizedRenderTarget::Image(image_target) => images + .get(&image_target.handle) + .map(|image| RenderTargetInfo { + physical_size: image.size(), + scale_factor: image_target.scale_factor.0, + }) + .ok_or(MissingRenderTargetInfoError::Image { + image: image_target.handle.id(), + }), + NormalizedRenderTarget::TextureView(id) => manual_texture_views + .get(id) + .map(|tex| RenderTargetInfo { + physical_size: tex.size, + scale_factor: 1.0, + }) + .ok_or(MissingRenderTargetInfoError::TextureView { texture_view: *id }), + NormalizedRenderTarget::None { width, height } => Ok(RenderTargetInfo { + physical_size: uvec2(*width, *height), + scale_factor: 1.0, + }), + } + } + + // Check if this render target is contained in the given changed windows or images. + fn is_changed( + &self, + changed_window_ids: &HashSet, + changed_image_handles: &HashSet<&AssetId>, + ) -> bool { + match self { + NormalizedRenderTarget::Window(window_ref) => { + changed_window_ids.contains(&window_ref.entity()) + } + NormalizedRenderTarget::Image(image_target) => { + changed_image_handles.contains(&image_target.handle.id()) + } + NormalizedRenderTarget::TextureView(_) => true, + NormalizedRenderTarget::None { .. } => false, + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum MissingRenderTargetInfoError { + #[error("RenderTarget::Window missing ({window:?}): Make sure the provided entity has a Window component.")] + Window { window: Entity }, + #[error("RenderTarget::Image missing ({image:?}): Make sure the Image's usages include RenderAssetUsages::MAIN_WORLD.")] + Image { image: AssetId }, + #[error("RenderTarget::TextureView missing ({texture_view:?}): make sure the texture view handle was not removed.")] + TextureView { + texture_view: ManualTextureViewHandle, + }, +} + +/// System in charge of updating a [`Camera`] when its window or projection changes. +/// +/// The system detects window creation, resize, and scale factor change events to update the camera +/// [`Projection`] if needed. +/// +/// ## World Resources +/// +/// [`Res>`](Assets) -- For cameras that render to an image, this resource is used to +/// inspect information about the render target. This system will not access any other image assets. +/// +/// [`OrthographicProjection`]: bevy_camera::OrthographicProjection +/// [`PerspectiveProjection`]: bevy_camera::PerspectiveProjection +pub fn camera_system( + mut window_resized_reader: MessageReader, + mut window_created_reader: MessageReader, + mut window_scale_factor_changed_reader: MessageReader, + mut image_asset_event_reader: MessageReader>, + primary_window: Query>, + windows: Query<(Entity, &Window)>, + images: Res>, + manual_texture_views: Res, + mut cameras: Query<(&mut Camera, &mut Projection)>, +) -> Result<(), BevyError> { + let primary_window = primary_window.iter().next(); + + let mut changed_window_ids = >::default(); + changed_window_ids.extend(window_created_reader.read().map(|event| event.window)); + changed_window_ids.extend(window_resized_reader.read().map(|event| event.window)); + let scale_factor_changed_window_ids: HashSet<_> = window_scale_factor_changed_reader + .read() + .map(|event| event.window) + .collect(); + changed_window_ids.extend(scale_factor_changed_window_ids.clone()); + + let changed_image_handles: HashSet<&AssetId> = image_asset_event_reader + .read() + .filter_map(|event| match event { + AssetEvent::Modified { id } | AssetEvent::Added { id } => Some(id), + _ => None, + }) + .collect(); + + for (mut camera, mut camera_projection) in &mut cameras { + let mut viewport_size = camera + .viewport + .as_ref() + .map(|viewport| viewport.physical_size); + + if let Some(normalized_target) = &camera.target.normalize(primary_window) + && (normalized_target.is_changed(&changed_window_ids, &changed_image_handles) + || camera.is_added() + || camera_projection.is_changed() + || camera.computed.old_viewport_size != viewport_size + || camera.computed.old_sub_camera_view != camera.sub_camera_view) + { + let new_computed_target_info = normalized_target.get_render_target_info( + windows, + &images, + &manual_texture_views, + )?; + // Check for the scale factor changing, and resize the viewport if needed. + // This can happen when the window is moved between monitors with different DPIs. + // Without this, the viewport will take a smaller portion of the window moved to + // a higher DPI monitor. + if normalized_target.is_changed(&scale_factor_changed_window_ids, &HashSet::default()) + && let Some(old_scale_factor) = camera + .computed + .target_info + .as_ref() + .map(|info| info.scale_factor) + { + let resize_factor = new_computed_target_info.scale_factor / old_scale_factor; + if let Some(ref mut viewport) = camera.viewport { + let resize = |vec: UVec2| (vec.as_vec2() * resize_factor).as_uvec2(); + viewport.physical_position = resize(viewport.physical_position); + viewport.physical_size = resize(viewport.physical_size); + viewport_size = Some(viewport.physical_size); + } + } + // This check is needed because when changing WindowMode to Fullscreen, the viewport may have invalid + // arguments due to a sudden change on the window size to a lower value. + // If the size of the window is lower, the viewport will match that lower value. + if let Some(viewport) = &mut camera.viewport { + viewport.clamp_to_size(new_computed_target_info.physical_size); + } + camera.computed.target_info = Some(new_computed_target_info); + if let Some(size) = camera.logical_viewport_size() + && size.x != 0.0 + && size.y != 0.0 + { + camera_projection.update(size.x, size.y); + camera.computed.clip_from_view = match &camera.sub_camera_view { + Some(sub_view) => camera_projection.get_clip_from_view_for_sub(sub_view), + None => camera_projection.get_clip_from_view(), + } + } + } + + if camera.computed.old_viewport_size != viewport_size { + camera.computed.old_viewport_size = viewport_size; + } + + if camera.computed.old_sub_camera_view != camera.sub_camera_view { + camera.computed.old_sub_camera_view = camera.sub_camera_view; + } + } + Ok(()) +} + +#[derive(Component, Debug)] +pub struct ExtractedCamera { + pub target: Option, + pub physical_viewport_size: Option, + pub physical_target_size: Option, + pub viewport: Option, + pub render_graph: InternedRenderSubGraph, + pub order: isize, + pub output_mode: CameraOutputMode, + pub msaa_writeback: bool, + pub clear_color: ClearColorConfig, + pub sorted_camera_index_for_target: usize, + pub exposure: f32, + pub hdr: bool, +} + +pub fn extract_cameras( + mut commands: Commands, + query: Extract< + Query<( + Entity, + RenderEntity, + &Camera, + &CameraRenderGraph, + &GlobalTransform, + &VisibleEntities, + &Frustum, + Has, + Option<&ColorGrading>, + Option<&Exposure>, + Option<&TemporalJitter>, + Option<&MipBias>, + Option<&RenderLayers>, + Option<&Projection>, + Has, + )>, + >, + primary_window: Extract>>, + gpu_preprocessing_support: Res, + mapper: Extract>, +) { + let primary_window = primary_window.iter().next(); + type ExtractedCameraComponents = ( + ExtractedCamera, + ExtractedView, + RenderVisibleEntities, + TemporalJitter, + MipBias, + RenderLayers, + Projection, + NoIndirectDrawing, + ViewUniformOffset, + ); + for ( + main_entity, + render_entity, + camera, + camera_render_graph, + transform, + visible_entities, + frustum, + hdr, + color_grading, + exposure, + temporal_jitter, + mip_bias, + render_layers, + projection, + no_indirect_drawing, + ) in query.iter() + { + if !camera.is_active { + commands + .entity(render_entity) + .remove::(); + continue; + } + + let color_grading = color_grading.unwrap_or(&ColorGrading::default()).clone(); + + if let ( + Some(URect { + min: viewport_origin, + .. + }), + Some(viewport_size), + Some(target_size), + ) = ( + camera.physical_viewport_rect(), + camera.physical_viewport_size(), + camera.physical_target_size(), + ) { + if target_size.x == 0 || target_size.y == 0 { + commands + .entity(render_entity) + .remove::(); + continue; + } + + let render_visible_entities = RenderVisibleEntities { + entities: visible_entities + .entities + .iter() + .map(|(type_id, entities)| { + let entities = entities + .iter() + .map(|entity| { + let render_entity = mapper + .get(*entity) + .cloned() + .map(|entity| entity.id()) + .unwrap_or(Entity::PLACEHOLDER); + (render_entity, (*entity).into()) + }) + .collect(); + (*type_id, entities) + }) + .collect(), + }; + + let mut commands = commands.entity(render_entity); + commands.insert(( + ExtractedCamera { + target: camera.target.normalize(primary_window), + viewport: camera.viewport.clone(), + physical_viewport_size: Some(viewport_size), + physical_target_size: Some(target_size), + render_graph: camera_render_graph.0, + order: camera.order, + output_mode: camera.output_mode, + msaa_writeback: camera.msaa_writeback, + clear_color: camera.clear_color, + // this will be set in sort_cameras + sorted_camera_index_for_target: 0, + exposure: exposure + .map(Exposure::exposure) + .unwrap_or_else(|| Exposure::default().exposure()), + hdr, + }, + ExtractedView { + retained_view_entity: RetainedViewEntity::new(main_entity.into(), None, 0), + clip_from_view: camera.clip_from_view(), + world_from_view: *transform, + clip_from_world: None, + hdr, + viewport: UVec4::new( + viewport_origin.x, + viewport_origin.y, + viewport_size.x, + viewport_size.y, + ), + color_grading, + }, + render_visible_entities, + *frustum, + )); + + if let Some(temporal_jitter) = temporal_jitter { + commands.insert(temporal_jitter.clone()); + } else { + commands.remove::(); + } + + if let Some(mip_bias) = mip_bias { + commands.insert(mip_bias.clone()); + } else { + commands.remove::(); + } + + if let Some(render_layers) = render_layers { + commands.insert(render_layers.clone()); + } else { + commands.remove::(); + } + + if let Some(projection) = projection { + commands.insert(projection.clone()); + } else { + commands.remove::(); + } + + if no_indirect_drawing + || !matches!( + gpu_preprocessing_support.max_supported_mode, + GpuPreprocessingMode::Culling + ) + { + commands.insert(NoIndirectDrawing); + } else { + commands.remove::(); + } + }; + } +} + +/// Cameras sorted by their order field. This is updated in the [`sort_cameras`] system. +#[derive(Resource, Default)] +pub struct SortedCameras(pub Vec); + +pub struct SortedCamera { + pub entity: Entity, + pub order: isize, + pub target: Option, + pub hdr: bool, +} + +pub fn sort_cameras( + mut sorted_cameras: ResMut, + mut cameras: Query<(Entity, &mut ExtractedCamera)>, +) { + sorted_cameras.0.clear(); + for (entity, camera) in cameras.iter() { + sorted_cameras.0.push(SortedCamera { + entity, + order: camera.order, + target: camera.target.clone(), + hdr: camera.hdr, + }); + } + // sort by order and ensure within an order, RenderTargets of the same type are packed together + sorted_cameras + .0 + .sort_by(|c1, c2| (c1.order, &c1.target).cmp(&(c2.order, &c2.target))); + let mut previous_order_target = None; + let mut ambiguities = >::default(); + let mut target_counts = >::default(); + for sorted_camera in &mut sorted_cameras.0 { + let new_order_target = (sorted_camera.order, sorted_camera.target.clone()); + if let Some(previous_order_target) = previous_order_target + && previous_order_target == new_order_target + { + ambiguities.insert(new_order_target.clone()); + } + if let Some(target) = &sorted_camera.target { + let count = target_counts + .entry((target.clone(), sorted_camera.hdr)) + .or_insert(0usize); + let (_, mut camera) = cameras.get_mut(sorted_camera.entity).unwrap(); + camera.sorted_camera_index_for_target = *count; + *count += 1; + } + previous_order_target = Some(new_order_target); + } + + if !ambiguities.is_empty() { + warn!( + "Camera order ambiguities detected for active cameras with the following priorities: {:?}. \ + To fix this, ensure there is exactly one Camera entity spawned with a given order for a given RenderTarget. \ + Ambiguities should be resolved because either (1) multiple active cameras were spawned accidentally, which will \ + result in rendering multiple instances of the scene or (2) for cases where multiple active cameras is intentional, \ + ambiguities could result in unpredictable render results.", + ambiguities + ); + } +} + +/// A subpixel offset to jitter a perspective camera's frustum by. +/// +/// Useful for temporal rendering techniques. +#[derive(Component, Clone, Default, Reflect)] +#[reflect(Default, Component, Clone)] +pub struct TemporalJitter { + /// Offset is in range [-0.5, 0.5]. + pub offset: Vec2, +} + +impl TemporalJitter { + pub fn jitter_projection(&self, clip_from_view: &mut Mat4, view_size: Vec2) { + // https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK/blob/d7531ae47d8b36a5d4025663e731a47a38be882f/docs/techniques/media/super-resolution-temporal/jitter-space.svg + let mut jitter = (self.offset * vec2(2.0, -2.0)) / view_size; + + // orthographic + if clip_from_view.w_axis.w == 1.0 { + jitter *= vec2(clip_from_view.x_axis.x, clip_from_view.y_axis.y) * 0.5; + } + + clip_from_view.z_axis.x += jitter.x; + clip_from_view.z_axis.y += jitter.y; + } +} + +/// Camera component specifying a mip bias to apply when sampling from material textures. +/// +/// Often used in conjunction with antialiasing post-process effects to reduce textures blurriness. +#[derive(Component, Reflect, Clone)] +#[reflect(Default, Component)] +pub struct MipBias(pub f32); + +impl Default for MipBias { + fn default() -> Self { + Self(-1.0) + } +} diff --git a/crates/libmarathon/src/render/color_operations.wgsl b/crates/libmarathon/src/render/color_operations.wgsl new file mode 100644 index 0000000..b68ad2a --- /dev/null +++ b/crates/libmarathon/src/render/color_operations.wgsl @@ -0,0 +1,47 @@ +#define_import_path bevy_render::color_operations + +#import bevy_render::maths::FRAC_PI_3 + +// Converts HSV to RGB. +// +// Input: H ∈ [0, 2π), S ∈ [0, 1], V ∈ [0, 1]. +// Output: R ∈ [0, 1], G ∈ [0, 1], B ∈ [0, 1]. +// +// +fn hsv_to_rgb(hsv: vec3) -> vec3 { + let n = vec3(5.0, 3.0, 1.0); + let k = (n + hsv.x / FRAC_PI_3) % 6.0; + return hsv.z - hsv.z * hsv.y * max(vec3(0.0), min(k, min(4.0 - k, vec3(1.0)))); +} + +// Converts RGB to HSV. +// +// Input: R ∈ [0, 1], G ∈ [0, 1], B ∈ [0, 1]. +// Output: H ∈ [0, 2π), S ∈ [0, 1], V ∈ [0, 1]. +// +// +fn rgb_to_hsv(rgb: vec3) -> vec3 { + let x_max = max(rgb.r, max(rgb.g, rgb.b)); // i.e. V + let x_min = min(rgb.r, min(rgb.g, rgb.b)); + let c = x_max - x_min; // chroma + + var swizzle = vec3(0.0); + if (x_max == rgb.r) { + swizzle = vec3(rgb.gb, 0.0); + } else if (x_max == rgb.g) { + swizzle = vec3(rgb.br, 2.0); + } else { + swizzle = vec3(rgb.rg, 4.0); + } + + let h = FRAC_PI_3 * (((swizzle.x - swizzle.y) / c + swizzle.z) % 6.0); + + // Avoid division by zero. + var s = 0.0; + if (x_max > 0.0) { + s = c / x_max; + } + + return vec3(h, s, x_max); +} + diff --git a/crates/libmarathon/src/render/core_2d/main_opaque_pass_2d_node.rs b/crates/libmarathon/src/render/core_2d/main_opaque_pass_2d_node.rs new file mode 100644 index 0000000..5e8f9f3 --- /dev/null +++ b/crates/libmarathon/src/render/core_2d/main_opaque_pass_2d_node.rs @@ -0,0 +1,106 @@ +use crate::render::core_2d::Opaque2d; +use bevy_ecs::{prelude::World, query::QueryItem}; +use crate::render::{ + camera::ExtractedCamera, + diagnostic::RecordDiagnostics, + render_graph::{NodeRunError, RenderGraphContext, ViewNode}, + render_phase::{TrackedRenderPass, ViewBinnedRenderPhases}, + render_resource::{CommandEncoderDescriptor, RenderPassDescriptor, StoreOp}, + renderer::RenderContext, + view::{ExtractedView, ViewDepthTexture, ViewTarget}, +}; +use tracing::error; +#[cfg(feature = "trace")] +use tracing::info_span; + +use super::AlphaMask2d; + +/// A [`bevy_render::render_graph::Node`] that runs the +/// [`Opaque2d`] [`ViewBinnedRenderPhases`] and [`AlphaMask2d`] [`ViewBinnedRenderPhases`] +#[derive(Default)] +pub struct MainOpaquePass2dNode; +impl ViewNode for MainOpaquePass2dNode { + type ViewQuery = ( + &'static ExtractedCamera, + &'static ExtractedView, + &'static ViewTarget, + &'static ViewDepthTexture, + ); + + fn run<'w>( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + (camera, view, target, depth): QueryItem<'w, '_, Self::ViewQuery>, + world: &'w World, + ) -> Result<(), NodeRunError> { + let (Some(opaque_phases), Some(alpha_mask_phases)) = ( + world.get_resource::>(), + world.get_resource::>(), + ) else { + return Ok(()); + }; + + let diagnostics = render_context.diagnostic_recorder(); + + let color_attachments = [Some(target.get_color_attachment())]; + let depth_stencil_attachment = Some(depth.get_attachment(StoreOp::Store)); + + let view_entity = graph.view_entity(); + let (Some(opaque_phase), Some(alpha_mask_phase)) = ( + opaque_phases.get(&view.retained_view_entity), + alpha_mask_phases.get(&view.retained_view_entity), + ) else { + return Ok(()); + }; + render_context.add_command_buffer_generation_task(move |render_device| { + #[cfg(feature = "trace")] + let _main_opaque_pass_2d_span = info_span!("main_opaque_pass_2d").entered(); + + // Command encoder setup + let mut command_encoder = + render_device.create_command_encoder(&CommandEncoderDescriptor { + label: Some("main_opaque_pass_2d_command_encoder"), + }); + + // Render pass setup + let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor { + label: Some("main_opaque_pass_2d"), + color_attachments: &color_attachments, + depth_stencil_attachment, + timestamp_writes: None, + occlusion_query_set: None, + }); + let mut render_pass = TrackedRenderPass::new(&render_device, render_pass); + let pass_span = diagnostics.pass_span(&mut render_pass, "main_opaque_pass_2d"); + + if let Some(viewport) = camera.viewport.as_ref() { + render_pass.set_camera_viewport(viewport); + } + + // Opaque draws + if !opaque_phase.is_empty() { + #[cfg(feature = "trace")] + let _opaque_main_pass_2d_span = info_span!("opaque_main_pass_2d").entered(); + if let Err(err) = opaque_phase.render(&mut render_pass, world, view_entity) { + error!("Error encountered while rendering the 2d opaque phase {err:?}"); + } + } + + // Alpha mask draws + if !alpha_mask_phase.is_empty() { + #[cfg(feature = "trace")] + let _alpha_mask_main_pass_2d_span = info_span!("alpha_mask_main_pass_2d").entered(); + if let Err(err) = alpha_mask_phase.render(&mut render_pass, world, view_entity) { + error!("Error encountered while rendering the 2d alpha mask phase {err:?}"); + } + } + + pass_span.end(&mut render_pass); + drop(render_pass); + command_encoder.finish() + }); + + Ok(()) + } +} diff --git a/crates/libmarathon/src/render/core_2d/main_transparent_pass_2d_node.rs b/crates/libmarathon/src/render/core_2d/main_transparent_pass_2d_node.rs new file mode 100644 index 0000000..7e890a7 --- /dev/null +++ b/crates/libmarathon/src/render/core_2d/main_transparent_pass_2d_node.rs @@ -0,0 +1,120 @@ +use crate::render::core_2d::Transparent2d; +use bevy_ecs::prelude::*; +use crate::render::{ + camera::ExtractedCamera, + diagnostic::RecordDiagnostics, + render_graph::{NodeRunError, RenderGraphContext, ViewNode}, + render_phase::{TrackedRenderPass, ViewSortedRenderPhases}, + render_resource::{CommandEncoderDescriptor, RenderPassDescriptor, StoreOp}, + renderer::RenderContext, + view::{ExtractedView, ViewDepthTexture, ViewTarget}, +}; +use tracing::error; +#[cfg(feature = "trace")] +use tracing::info_span; + +#[derive(Default)] +pub struct MainTransparentPass2dNode {} + +impl ViewNode for MainTransparentPass2dNode { + type ViewQuery = ( + &'static ExtractedCamera, + &'static ExtractedView, + &'static ViewTarget, + &'static ViewDepthTexture, + ); + + fn run<'w>( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + (camera, view, target, depth): bevy_ecs::query::QueryItem<'w, '_, Self::ViewQuery>, + world: &'w World, + ) -> Result<(), NodeRunError> { + let Some(transparent_phases) = + world.get_resource::>() + else { + return Ok(()); + }; + + let view_entity = graph.view_entity(); + let Some(transparent_phase) = transparent_phases.get(&view.retained_view_entity) else { + return Ok(()); + }; + + let diagnostics = render_context.diagnostic_recorder(); + + let color_attachments = [Some(target.get_color_attachment())]; + // NOTE: For the transparent pass we load the depth buffer. There should be no + // need to write to it, but store is set to `true` as a workaround for issue #3776, + // https://github.com/bevyengine/bevy/issues/3776 + // so that wgpu does not clear the depth buffer. + // As the opaque and alpha mask passes run first, opaque meshes can occlude + // transparent ones. + let depth_stencil_attachment = Some(depth.get_attachment(StoreOp::Store)); + + render_context.add_command_buffer_generation_task(move |render_device| { + // Command encoder setup + let mut command_encoder = + render_device.create_command_encoder(&CommandEncoderDescriptor { + label: Some("main_transparent_pass_2d_command_encoder"), + }); + + // This needs to run at least once to clear the background color, even if there are no items to render + { + #[cfg(feature = "trace")] + let _main_pass_2d = info_span!("main_transparent_pass_2d").entered(); + + let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor { + label: Some("main_transparent_pass_2d"), + color_attachments: &color_attachments, + depth_stencil_attachment, + timestamp_writes: None, + occlusion_query_set: None, + }); + let mut render_pass = TrackedRenderPass::new(&render_device, render_pass); + + let pass_span = diagnostics.pass_span(&mut render_pass, "main_transparent_pass_2d"); + + if let Some(viewport) = camera.viewport.as_ref() { + render_pass.set_camera_viewport(viewport); + } + + if !transparent_phase.items.is_empty() { + #[cfg(feature = "trace")] + let _transparent_main_pass_2d_span = + info_span!("transparent_main_pass_2d").entered(); + if let Err(err) = transparent_phase.render(&mut render_pass, world, view_entity) + { + error!( + "Error encountered while rendering the transparent 2D phase {err:?}" + ); + } + } + + pass_span.end(&mut render_pass); + } + + // WebGL2 quirk: if ending with a render pass with a custom viewport, the viewport isn't + // reset for the next render pass so add an empty render pass without a custom viewport + #[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] + if camera.viewport.is_some() { + #[cfg(feature = "trace")] + let _reset_viewport_pass_2d = info_span!("reset_viewport_pass_2d").entered(); + let pass_descriptor = RenderPassDescriptor { + label: Some("reset_viewport_pass_2d"), + color_attachments: &[Some(target.get_color_attachment())], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + }; + + command_encoder.begin_render_pass(&pass_descriptor); + } + + command_encoder.finish() + }); + + Ok(()) + } +} diff --git a/crates/libmarathon/src/render/core_2d/mod.rs b/crates/libmarathon/src/render/core_2d/mod.rs new file mode 100644 index 0000000..b521f87 --- /dev/null +++ b/crates/libmarathon/src/render/core_2d/mod.rs @@ -0,0 +1,508 @@ +mod main_opaque_pass_2d_node; +mod main_transparent_pass_2d_node; + +pub mod graph { + use crate::render::render_graph::{RenderLabel, RenderSubGraph}; + + #[derive(Debug, Hash, PartialEq, Eq, Clone, RenderSubGraph)] + pub struct Core2d; + + pub mod input { + pub const VIEW_ENTITY: &str = "view_entity"; + } + + #[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)] + pub enum Node2d { + MsaaWriteback, + StartMainPass, + MainOpaquePass, + MainTransparentPass, + EndMainPass, + Wireframe, + StartMainPassPostProcessing, + Bloom, + PostProcessing, + Tonemapping, + Fxaa, + Smaa, + Upscaling, + ContrastAdaptiveSharpening, + EndMainPassPostProcessing, + } +} + +use core::ops::Range; + +use bevy_asset::UntypedAssetId; +use bevy_camera::{Camera, Camera2d}; +use bevy_image::ToExtents; +use bevy_platform::collections::{HashMap, HashSet}; +use crate::render::{ + batching::gpu_preprocessing::GpuPreprocessingMode, + camera::CameraRenderGraph, + render_phase::PhaseItemBatchSetKey, + view::{ExtractedView, RetainedViewEntity}, +}; +pub use main_opaque_pass_2d_node::*; +pub use main_transparent_pass_2d_node::*; + +use crate::render::{ + tonemapping::{DebandDither, Tonemapping, TonemappingNode}, + upscaling::UpscalingNode, +}; +use bevy_app::{App, Plugin}; +use bevy_ecs::prelude::*; +use bevy_math::FloatOrd; +use crate::render::{ + camera::ExtractedCamera, + extract_component::ExtractComponentPlugin, + render_graph::{EmptyNode, RenderGraphExt, ViewNodeRunner}, + render_phase::{ + sort_phase_system, BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, + DrawFunctions, PhaseItem, PhaseItemExtraIndex, SortedPhaseItem, ViewBinnedRenderPhases, + ViewSortedRenderPhases, + }, + render_resource::{ + BindGroupId, CachedRenderPipelineId, TextureDescriptor, TextureDimension, TextureFormat, + TextureUsages, + }, + renderer::RenderDevice, + sync_world::MainEntity, + texture::TextureCache, + view::{Msaa, ViewDepthTexture}, + Extract, ExtractSchedule, Render, RenderApp, RenderSystems, +}; + +use self::graph::{Core2d, Node2d}; + +pub const CORE_2D_DEPTH_FORMAT: TextureFormat = TextureFormat::Depth32Float; + +pub struct Core2dPlugin; + +impl Plugin for Core2dPlugin { + fn build(&self, app: &mut App) { + app.register_required_components::() + .register_required_components_with::(|| { + CameraRenderGraph::new(Core2d) + }) + .register_required_components_with::(|| Tonemapping::None) + .add_plugins(ExtractComponentPlugin::::default()); + + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + render_app + .init_resource::>() + .init_resource::>() + .init_resource::>() + .init_resource::>() + .init_resource::>() + .init_resource::>() + .add_systems(ExtractSchedule, extract_core_2d_camera_phases) + .add_systems( + Render, + ( + sort_phase_system::.in_set(RenderSystems::PhaseSort), + prepare_core_2d_depth_textures.in_set(RenderSystems::PrepareResources), + ), + ); + + render_app + .add_render_sub_graph(Core2d) + .add_render_graph_node::(Core2d, Node2d::StartMainPass) + .add_render_graph_node::>( + Core2d, + Node2d::MainOpaquePass, + ) + .add_render_graph_node::>( + Core2d, + Node2d::MainTransparentPass, + ) + .add_render_graph_node::(Core2d, Node2d::EndMainPass) + .add_render_graph_node::(Core2d, Node2d::StartMainPassPostProcessing) + .add_render_graph_node::>(Core2d, Node2d::Tonemapping) + .add_render_graph_node::(Core2d, Node2d::EndMainPassPostProcessing) + .add_render_graph_node::>(Core2d, Node2d::Upscaling) + .add_render_graph_edges( + Core2d, + ( + Node2d::StartMainPass, + Node2d::MainOpaquePass, + Node2d::MainTransparentPass, + Node2d::EndMainPass, + Node2d::StartMainPassPostProcessing, + Node2d::Tonemapping, + Node2d::EndMainPassPostProcessing, + Node2d::Upscaling, + ), + ); + } +} + +/// Opaque 2D [`BinnedPhaseItem`]s. +pub struct Opaque2d { + /// Determines which objects can be placed into a *batch set*. + /// + /// Objects in a single batch set can potentially be multi-drawn together, + /// if it's enabled and the current platform supports it. + pub batch_set_key: BatchSetKey2d, + /// The key, which determines which can be batched. + pub bin_key: Opaque2dBinKey, + /// An entity from which data will be fetched, including the mesh if + /// applicable. + pub representative_entity: (Entity, MainEntity), + /// The ranges of instances. + pub batch_range: Range, + /// An extra index, which is either a dynamic offset or an index in the + /// indirect parameters list. + pub extra_index: PhaseItemExtraIndex, +} + +/// Data that must be identical in order to batch phase items together. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Opaque2dBinKey { + /// The identifier of the render pipeline. + pub pipeline: CachedRenderPipelineId, + /// The function used to draw. + pub draw_function: DrawFunctionId, + /// The asset that this phase item is associated with. + /// + /// Normally, this is the ID of the mesh, but for non-mesh items it might be + /// the ID of another type of asset. + pub asset_id: UntypedAssetId, + /// The ID of a bind group specific to the material. + pub material_bind_group_id: Option, +} + +impl PhaseItem for Opaque2d { + #[inline] + fn entity(&self) -> Entity { + self.representative_entity.0 + } + + fn main_entity(&self) -> MainEntity { + self.representative_entity.1 + } + + #[inline] + fn draw_function(&self) -> DrawFunctionId { + self.bin_key.draw_function + } + + #[inline] + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index.clone() + } + + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) + } +} + +impl BinnedPhaseItem for Opaque2d { + // Since 2D meshes presently can't be multidrawn, the batch set key is + // irrelevant. + type BatchSetKey = BatchSetKey2d; + + type BinKey = Opaque2dBinKey; + + fn new( + batch_set_key: Self::BatchSetKey, + bin_key: Self::BinKey, + representative_entity: (Entity, MainEntity), + batch_range: Range, + extra_index: PhaseItemExtraIndex, + ) -> Self { + Opaque2d { + batch_set_key, + bin_key, + representative_entity, + batch_range, + extra_index, + } + } +} + +/// 2D meshes aren't currently multi-drawn together, so this batch set key only +/// stores whether the mesh is indexed. +#[derive(Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +pub struct BatchSetKey2d { + /// True if the mesh is indexed. + pub indexed: bool, +} + +impl PhaseItemBatchSetKey for BatchSetKey2d { + fn indexed(&self) -> bool { + self.indexed + } +} + +impl CachedRenderPipelinePhaseItem for Opaque2d { + #[inline] + fn cached_pipeline(&self) -> CachedRenderPipelineId { + self.bin_key.pipeline + } +} + +/// Alpha mask 2D [`BinnedPhaseItem`]s. +pub struct AlphaMask2d { + /// Determines which objects can be placed into a *batch set*. + /// + /// Objects in a single batch set can potentially be multi-drawn together, + /// if it's enabled and the current platform supports it. + pub batch_set_key: BatchSetKey2d, + /// The key, which determines which can be batched. + pub bin_key: AlphaMask2dBinKey, + /// An entity from which data will be fetched, including the mesh if + /// applicable. + pub representative_entity: (Entity, MainEntity), + /// The ranges of instances. + pub batch_range: Range, + /// An extra index, which is either a dynamic offset or an index in the + /// indirect parameters list. + pub extra_index: PhaseItemExtraIndex, +} + +/// Data that must be identical in order to batch phase items together. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct AlphaMask2dBinKey { + /// The identifier of the render pipeline. + pub pipeline: CachedRenderPipelineId, + /// The function used to draw. + pub draw_function: DrawFunctionId, + /// The asset that this phase item is associated with. + /// + /// Normally, this is the ID of the mesh, but for non-mesh items it might be + /// the ID of another type of asset. + pub asset_id: UntypedAssetId, + /// The ID of a bind group specific to the material. + pub material_bind_group_id: Option, +} + +impl PhaseItem for AlphaMask2d { + #[inline] + fn entity(&self) -> Entity { + self.representative_entity.0 + } + + #[inline] + fn main_entity(&self) -> MainEntity { + self.representative_entity.1 + } + + #[inline] + fn draw_function(&self) -> DrawFunctionId { + self.bin_key.draw_function + } + + #[inline] + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index.clone() + } + + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) + } +} + +impl BinnedPhaseItem for AlphaMask2d { + // Since 2D meshes presently can't be multidrawn, the batch set key is + // irrelevant. + type BatchSetKey = BatchSetKey2d; + + type BinKey = AlphaMask2dBinKey; + + fn new( + batch_set_key: Self::BatchSetKey, + bin_key: Self::BinKey, + representative_entity: (Entity, MainEntity), + batch_range: Range, + extra_index: PhaseItemExtraIndex, + ) -> Self { + AlphaMask2d { + batch_set_key, + bin_key, + representative_entity, + batch_range, + extra_index, + } + } +} + +impl CachedRenderPipelinePhaseItem for AlphaMask2d { + #[inline] + fn cached_pipeline(&self) -> CachedRenderPipelineId { + self.bin_key.pipeline + } +} + +/// Transparent 2D [`SortedPhaseItem`]s. +pub struct Transparent2d { + pub sort_key: FloatOrd, + pub entity: (Entity, MainEntity), + pub pipeline: CachedRenderPipelineId, + pub draw_function: DrawFunctionId, + pub batch_range: Range, + pub extracted_index: usize, + pub extra_index: PhaseItemExtraIndex, + /// Whether the mesh in question is indexed (uses an index buffer in + /// addition to its vertex buffer). + pub indexed: bool, +} + +impl PhaseItem for Transparent2d { + #[inline] + fn entity(&self) -> Entity { + self.entity.0 + } + + #[inline] + fn main_entity(&self) -> MainEntity { + self.entity.1 + } + + #[inline] + fn draw_function(&self) -> DrawFunctionId { + self.draw_function + } + + #[inline] + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + #[inline] + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index.clone() + } + + #[inline] + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) + } +} + +impl SortedPhaseItem for Transparent2d { + type SortKey = FloatOrd; + + #[inline] + fn sort_key(&self) -> Self::SortKey { + self.sort_key + } + + #[inline] + fn sort(items: &mut [Self]) { + // radsort is a stable radix sort that performed better than `slice::sort_by_key` or `slice::sort_unstable_by_key`. + radsort::sort_by_key(items, |item| item.sort_key().0); + } + + fn indexed(&self) -> bool { + self.indexed + } +} + +impl CachedRenderPipelinePhaseItem for Transparent2d { + #[inline] + fn cached_pipeline(&self) -> CachedRenderPipelineId { + self.pipeline + } +} + +pub fn extract_core_2d_camera_phases( + mut transparent_2d_phases: ResMut>, + mut opaque_2d_phases: ResMut>, + mut alpha_mask_2d_phases: ResMut>, + cameras_2d: Extract>>, + mut live_entities: Local>, +) { + live_entities.clear(); + + for (main_entity, camera) in &cameras_2d { + if !camera.is_active { + continue; + } + + // This is the main 2D camera, so we use the first subview index (0). + let retained_view_entity = RetainedViewEntity::new(main_entity.into(), None, 0); + + transparent_2d_phases.insert_or_clear(retained_view_entity); + opaque_2d_phases.prepare_for_new_frame(retained_view_entity, GpuPreprocessingMode::None); + alpha_mask_2d_phases + .prepare_for_new_frame(retained_view_entity, GpuPreprocessingMode::None); + + live_entities.insert(retained_view_entity); + } + + // Clear out all dead views. + transparent_2d_phases.retain(|camera_entity, _| live_entities.contains(camera_entity)); + opaque_2d_phases.retain(|camera_entity, _| live_entities.contains(camera_entity)); + alpha_mask_2d_phases.retain(|camera_entity, _| live_entities.contains(camera_entity)); +} + +pub fn prepare_core_2d_depth_textures( + mut commands: Commands, + mut texture_cache: ResMut, + render_device: Res, + transparent_2d_phases: Res>, + opaque_2d_phases: Res>, + views_2d: Query<(Entity, &ExtractedCamera, &ExtractedView, &Msaa), (With,)>, +) { + let mut textures = >::default(); + for (view, camera, extracted_view, msaa) in &views_2d { + if !opaque_2d_phases.contains_key(&extracted_view.retained_view_entity) + || !transparent_2d_phases.contains_key(&extracted_view.retained_view_entity) + { + continue; + }; + + let Some(physical_target_size) = camera.physical_target_size else { + continue; + }; + + let cached_texture = textures + .entry(camera.target.clone()) + .or_insert_with(|| { + let descriptor = TextureDescriptor { + label: Some("view_depth_texture"), + // The size of the depth texture + size: physical_target_size.to_extents(), + mip_level_count: 1, + sample_count: msaa.samples(), + dimension: TextureDimension::D2, + format: CORE_2D_DEPTH_FORMAT, + usage: TextureUsages::RENDER_ATTACHMENT, + view_formats: &[], + }; + + texture_cache.get(&render_device, descriptor) + }) + .clone(); + + commands + .entity(view) + .insert(ViewDepthTexture::new(cached_texture, Some(0.0))); + } +} diff --git a/crates/libmarathon/src/render/core_3d/main_opaque_pass_3d_node.rs b/crates/libmarathon/src/render/core_3d/main_opaque_pass_3d_node.rs new file mode 100644 index 0000000..f910606 --- /dev/null +++ b/crates/libmarathon/src/render/core_3d/main_opaque_pass_3d_node.rs @@ -0,0 +1,142 @@ +use crate::render::{ + core_3d::Opaque3d, + skybox::{SkyboxBindGroup, SkyboxPipelineId}, +}; +use bevy_camera::{MainPassResolutionOverride, Viewport}; +use bevy_ecs::{prelude::World, query::QueryItem}; +use crate::render::{ + camera::ExtractedCamera, + diagnostic::RecordDiagnostics, + render_graph::{NodeRunError, RenderGraphContext, ViewNode}, + render_phase::{TrackedRenderPass, ViewBinnedRenderPhases}, + render_resource::{CommandEncoderDescriptor, PipelineCache, RenderPassDescriptor, StoreOp}, + renderer::RenderContext, + view::{ExtractedView, ViewDepthTexture, ViewTarget, ViewUniformOffset}, +}; +use tracing::error; +#[cfg(feature = "trace")] +use tracing::info_span; + +use super::AlphaMask3d; + +/// A [`bevy_render::render_graph::Node`] that runs the [`Opaque3d`] and [`AlphaMask3d`] +/// [`ViewBinnedRenderPhases`]s. +#[derive(Default)] +pub struct MainOpaquePass3dNode; +impl ViewNode for MainOpaquePass3dNode { + type ViewQuery = ( + &'static ExtractedCamera, + &'static ExtractedView, + &'static ViewTarget, + &'static ViewDepthTexture, + Option<&'static SkyboxPipelineId>, + Option<&'static SkyboxBindGroup>, + &'static ViewUniformOffset, + Option<&'static MainPassResolutionOverride>, + ); + + fn run<'w>( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + ( + camera, + extracted_view, + target, + depth, + skybox_pipeline, + skybox_bind_group, + view_uniform_offset, + resolution_override, + ): QueryItem<'w, '_, Self::ViewQuery>, + world: &'w World, + ) -> Result<(), NodeRunError> { + let (Some(opaque_phases), Some(alpha_mask_phases)) = ( + world.get_resource::>(), + world.get_resource::>(), + ) else { + return Ok(()); + }; + + let (Some(opaque_phase), Some(alpha_mask_phase)) = ( + opaque_phases.get(&extracted_view.retained_view_entity), + alpha_mask_phases.get(&extracted_view.retained_view_entity), + ) else { + return Ok(()); + }; + + let diagnostics = render_context.diagnostic_recorder(); + + let color_attachments = [Some(target.get_color_attachment())]; + let depth_stencil_attachment = Some(depth.get_attachment(StoreOp::Store)); + + let view_entity = graph.view_entity(); + render_context.add_command_buffer_generation_task(move |render_device| { + #[cfg(feature = "trace")] + let _main_opaque_pass_3d_span = info_span!("main_opaque_pass_3d").entered(); + + // Command encoder setup + let mut command_encoder = + render_device.create_command_encoder(&CommandEncoderDescriptor { + label: Some("main_opaque_pass_3d_command_encoder"), + }); + + // Render pass setup + let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor { + label: Some("main_opaque_pass_3d"), + color_attachments: &color_attachments, + depth_stencil_attachment, + timestamp_writes: None, + occlusion_query_set: None, + }); + let mut render_pass = TrackedRenderPass::new(&render_device, render_pass); + let pass_span = diagnostics.pass_span(&mut render_pass, "main_opaque_pass_3d"); + + if let Some(viewport) = + Viewport::from_viewport_and_override(camera.viewport.as_ref(), resolution_override) + { + render_pass.set_camera_viewport(&viewport); + } + + // Opaque draws + if !opaque_phase.is_empty() { + #[cfg(feature = "trace")] + let _opaque_main_pass_3d_span = info_span!("opaque_main_pass_3d").entered(); + if let Err(err) = opaque_phase.render(&mut render_pass, world, view_entity) { + error!("Error encountered while rendering the opaque phase {err:?}"); + } + } + + // Alpha draws + if !alpha_mask_phase.is_empty() { + #[cfg(feature = "trace")] + let _alpha_mask_main_pass_3d_span = info_span!("alpha_mask_main_pass_3d").entered(); + if let Err(err) = alpha_mask_phase.render(&mut render_pass, world, view_entity) { + error!("Error encountered while rendering the alpha mask phase {err:?}"); + } + } + + // Skybox draw using a fullscreen triangle + if let (Some(skybox_pipeline), Some(SkyboxBindGroup(skybox_bind_group))) = + (skybox_pipeline, skybox_bind_group) + { + let pipeline_cache = world.resource::(); + if let Some(pipeline) = pipeline_cache.get_render_pipeline(skybox_pipeline.0) { + render_pass.set_render_pipeline(pipeline); + render_pass.set_bind_group( + 0, + &skybox_bind_group.0, + &[view_uniform_offset.offset, skybox_bind_group.1], + ); + render_pass.draw(0..3, 0..1); + } + } + + pass_span.end(&mut render_pass); + drop(render_pass); + command_encoder.finish() + }); + + Ok(()) + } +} diff --git a/crates/libmarathon/src/render/core_3d/main_transmissive_pass_3d_node.rs b/crates/libmarathon/src/render/core_3d/main_transmissive_pass_3d_node.rs new file mode 100644 index 0000000..d67c748 --- /dev/null +++ b/crates/libmarathon/src/render/core_3d/main_transmissive_pass_3d_node.rs @@ -0,0 +1,167 @@ +use super::ViewTransmissionTexture; +use crate::render::core_3d::Transmissive3d; +use bevy_camera::{Camera3d, MainPassResolutionOverride, Viewport}; +use bevy_ecs::{prelude::*, query::QueryItem}; +use bevy_image::ToExtents; +use crate::render::{ + camera::ExtractedCamera, + diagnostic::RecordDiagnostics, + render_graph::{NodeRunError, RenderGraphContext, ViewNode}, + render_phase::ViewSortedRenderPhases, + render_resource::{RenderPassDescriptor, StoreOp}, + renderer::RenderContext, + view::{ExtractedView, ViewDepthTexture, ViewTarget}, +}; +use core::ops::Range; +use tracing::error; +#[cfg(feature = "trace")] +use tracing::info_span; + +/// A [`bevy_render::render_graph::Node`] that runs the [`Transmissive3d`] +/// [`ViewSortedRenderPhases`]. +#[derive(Default)] +pub struct MainTransmissivePass3dNode; + +impl ViewNode for MainTransmissivePass3dNode { + type ViewQuery = ( + &'static ExtractedCamera, + &'static ExtractedView, + &'static Camera3d, + &'static ViewTarget, + Option<&'static ViewTransmissionTexture>, + &'static ViewDepthTexture, + Option<&'static MainPassResolutionOverride>, + ); + + fn run( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext, + (camera, view, camera_3d, target, transmission, depth, resolution_override): QueryItem< + Self::ViewQuery, + >, + world: &World, + ) -> Result<(), NodeRunError> { + let view_entity = graph.view_entity(); + + let Some(transmissive_phases) = + world.get_resource::>() + else { + return Ok(()); + }; + + let Some(transmissive_phase) = transmissive_phases.get(&view.retained_view_entity) else { + return Ok(()); + }; + + let diagnostics = render_context.diagnostic_recorder(); + + let physical_target_size = camera.physical_target_size.unwrap(); + + let render_pass_descriptor = RenderPassDescriptor { + label: Some("main_transmissive_pass_3d"), + color_attachments: &[Some(target.get_color_attachment())], + depth_stencil_attachment: Some(depth.get_attachment(StoreOp::Store)), + timestamp_writes: None, + occlusion_query_set: None, + }; + + // Run the transmissive pass, sorted back-to-front + // NOTE: Scoped to drop the mutable borrow of render_context + #[cfg(feature = "trace")] + let _main_transmissive_pass_3d_span = info_span!("main_transmissive_pass_3d").entered(); + + if !transmissive_phase.items.is_empty() { + let screen_space_specular_transmission_steps = + camera_3d.screen_space_specular_transmission_steps; + if screen_space_specular_transmission_steps > 0 { + let transmission = + transmission.expect("`ViewTransmissionTexture` should exist at this point"); + + // `transmissive_phase.items` are depth sorted, so we split them into N = `screen_space_specular_transmission_steps` + // ranges, rendering them back-to-front in multiple steps, allowing multiple levels of transparency. + // + // Note: For the sake of simplicity, we currently split items evenly among steps. In the future, we + // might want to use a more sophisticated heuristic (e.g. based on view bounds, or with an exponential + // falloff so that nearby objects have more levels of transparency available to them) + for range in split_range( + 0..transmissive_phase.items.len(), + screen_space_specular_transmission_steps, + ) { + // Copy the main texture to the transmission texture, allowing to use the color output of the + // previous step (or of the `Opaque3d` phase, for the first step) as a transmissive color input + render_context.command_encoder().copy_texture_to_texture( + target.main_texture().as_image_copy(), + transmission.texture.as_image_copy(), + physical_target_size.to_extents(), + ); + + let mut render_pass = + render_context.begin_tracked_render_pass(render_pass_descriptor.clone()); + let pass_span = + diagnostics.pass_span(&mut render_pass, "main_transmissive_pass_3d"); + + if let Some(viewport) = camera.viewport.as_ref() { + render_pass.set_camera_viewport(viewport); + } + + // render items in range + if let Err(err) = + transmissive_phase.render_range(&mut render_pass, world, view_entity, range) + { + error!("Error encountered while rendering the transmissive phase {err:?}"); + } + + pass_span.end(&mut render_pass); + } + } else { + let mut render_pass = + render_context.begin_tracked_render_pass(render_pass_descriptor); + let pass_span = + diagnostics.pass_span(&mut render_pass, "main_transmissive_pass_3d"); + + if let Some(viewport) = Viewport::from_viewport_and_override( + camera.viewport.as_ref(), + resolution_override, + ) { + render_pass.set_camera_viewport(&viewport); + } + + if let Err(err) = transmissive_phase.render(&mut render_pass, world, view_entity) { + error!("Error encountered while rendering the transmissive phase {err:?}"); + } + + pass_span.end(&mut render_pass); + } + } + + Ok(()) + } +} + +/// Splits a [`Range`] into at most `max_num_splits` sub-ranges without overlaps +/// +/// Properly takes into account remainders of inexact divisions (by adding extra +/// elements to the initial sub-ranges as needed) +fn split_range(range: Range, max_num_splits: usize) -> impl Iterator> { + let len = range.end - range.start; + assert!(len > 0, "to be split, a range must not be empty"); + assert!(max_num_splits > 0, "max_num_splits must be at least 1"); + let num_splits = max_num_splits.min(len); + let step = len / num_splits; + let mut rem = len % num_splits; + let mut start = range.start; + + (0..num_splits).map(move |_| { + let extra = if rem > 0 { + rem -= 1; + 1 + } else { + 0 + }; + let end = (start + step + extra).min(range.end); + let result = start..end; + start = end; + result + }) +} diff --git a/crates/libmarathon/src/render/core_3d/main_transparent_pass_3d_node.rs b/crates/libmarathon/src/render/core_3d/main_transparent_pass_3d_node.rs new file mode 100644 index 0000000..58dbd89 --- /dev/null +++ b/crates/libmarathon/src/render/core_3d/main_transparent_pass_3d_node.rs @@ -0,0 +1,107 @@ +use crate::render::core_3d::Transparent3d; +use bevy_camera::{MainPassResolutionOverride, Viewport}; +use bevy_ecs::{prelude::*, query::QueryItem}; +use crate::render::{ + camera::ExtractedCamera, + diagnostic::RecordDiagnostics, + render_graph::{NodeRunError, RenderGraphContext, ViewNode}, + render_phase::ViewSortedRenderPhases, + render_resource::{RenderPassDescriptor, StoreOp}, + renderer::RenderContext, + view::{ExtractedView, ViewDepthTexture, ViewTarget}, +}; +use tracing::error; +#[cfg(feature = "trace")] +use tracing::info_span; + +/// A [`bevy_render::render_graph::Node`] that runs the [`Transparent3d`] +/// [`ViewSortedRenderPhases`]. +#[derive(Default)] +pub struct MainTransparentPass3dNode; + +impl ViewNode for MainTransparentPass3dNode { + type ViewQuery = ( + &'static ExtractedCamera, + &'static ExtractedView, + &'static ViewTarget, + &'static ViewDepthTexture, + Option<&'static MainPassResolutionOverride>, + ); + fn run( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext, + (camera, view, target, depth, resolution_override): QueryItem, + world: &World, + ) -> Result<(), NodeRunError> { + let view_entity = graph.view_entity(); + + let Some(transparent_phases) = + world.get_resource::>() + else { + return Ok(()); + }; + + let Some(transparent_phase) = transparent_phases.get(&view.retained_view_entity) else { + return Ok(()); + }; + + if !transparent_phase.items.is_empty() { + // Run the transparent pass, sorted back-to-front + // NOTE: Scoped to drop the mutable borrow of render_context + #[cfg(feature = "trace")] + let _main_transparent_pass_3d_span = info_span!("main_transparent_pass_3d").entered(); + + let diagnostics = render_context.diagnostic_recorder(); + + let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some("main_transparent_pass_3d"), + color_attachments: &[Some(target.get_color_attachment())], + // NOTE: For the transparent pass we load the depth buffer. There should be no + // need to write to it, but store is set to `true` as a workaround for issue #3776, + // https://github.com/bevyengine/bevy/issues/3776 + // so that wgpu does not clear the depth buffer. + // As the opaque and alpha mask passes run first, opaque meshes can occlude + // transparent ones. + depth_stencil_attachment: Some(depth.get_attachment(StoreOp::Store)), + timestamp_writes: None, + occlusion_query_set: None, + }); + + let pass_span = diagnostics.pass_span(&mut render_pass, "main_transparent_pass_3d"); + + if let Some(viewport) = + Viewport::from_viewport_and_override(camera.viewport.as_ref(), resolution_override) + { + render_pass.set_camera_viewport(&viewport); + } + + if let Err(err) = transparent_phase.render(&mut render_pass, world, view_entity) { + error!("Error encountered while rendering the transparent phase {err:?}"); + } + + pass_span.end(&mut render_pass); + } + + // WebGL2 quirk: if ending with a render pass with a custom viewport, the viewport isn't + // reset for the next render pass so add an empty render pass without a custom viewport + #[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] + if camera.viewport.is_some() { + #[cfg(feature = "trace")] + let _reset_viewport_pass_3d = info_span!("reset_viewport_pass_3d").entered(); + let pass_descriptor = RenderPassDescriptor { + label: Some("reset_viewport_pass_3d"), + color_attachments: &[Some(target.get_color_attachment())], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + }; + + render_context + .command_encoder() + .begin_render_pass(&pass_descriptor); + } + + Ok(()) + } +} diff --git a/crates/libmarathon/src/render/core_3d/mod.rs b/crates/libmarathon/src/render/core_3d/mod.rs new file mode 100644 index 0000000..04c61fb --- /dev/null +++ b/crates/libmarathon/src/render/core_3d/mod.rs @@ -0,0 +1,1150 @@ +mod main_opaque_pass_3d_node; +mod main_transmissive_pass_3d_node; +mod main_transparent_pass_3d_node; + +pub mod graph { + use crate::render::render_graph::{RenderLabel, RenderSubGraph}; + + #[derive(Debug, Hash, PartialEq, Eq, Clone, RenderSubGraph)] + pub struct Core3d; + + pub mod input { + pub const VIEW_ENTITY: &str = "view_entity"; + } + + #[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)] + pub enum Node3d { + MsaaWriteback, + EarlyPrepass, + EarlyDownsampleDepth, + LatePrepass, + EarlyDeferredPrepass, + LateDeferredPrepass, + CopyDeferredLightingId, + EndPrepasses, + StartMainPass, + MainOpaquePass, + MainTransmissivePass, + MainTransparentPass, + EndMainPass, + Wireframe, + StartMainPassPostProcessing, + LateDownsampleDepth, + MotionBlur, + Taa, + DlssSuperResolution, + DlssRayReconstruction, + Bloom, + AutoExposure, + DepthOfField, + PostProcessing, + Tonemapping, + Fxaa, + Smaa, + Upscaling, + ContrastAdaptiveSharpening, + EndMainPassPostProcessing, + } +} + +// PERF: vulkan docs recommend using 24 bit depth for better performance +pub const CORE_3D_DEPTH_FORMAT: TextureFormat = TextureFormat::Depth32Float; + +/// True if multisampled depth textures are supported on this platform. +/// +/// In theory, Naga supports depth textures on WebGL 2. In practice, it doesn't, +/// because of a silly bug whereby Naga assumes that all depth textures are +/// `sampler2DShadow` and will cheerfully generate invalid GLSL that tries to +/// perform non-percentage-closer-filtering with such a sampler. Therefore we +/// disable depth of field and screen space reflections entirely on WebGL 2. +#[cfg(not(any(feature = "webgpu", not(target_arch = "wasm32"))))] +pub const DEPTH_TEXTURE_SAMPLING_SUPPORTED: bool = false; + +/// True if multisampled depth textures are supported on this platform. +/// +/// In theory, Naga supports depth textures on WebGL 2. In practice, it doesn't, +/// because of a silly bug whereby Naga assumes that all depth textures are +/// `sampler2DShadow` and will cheerfully generate invalid GLSL that tries to +/// perform non-percentage-closer-filtering with such a sampler. Therefore we +/// disable depth of field and screen space reflections entirely on WebGL 2. +#[cfg(any(feature = "webgpu", not(target_arch = "wasm32")))] +pub const DEPTH_TEXTURE_SAMPLING_SUPPORTED: bool = true; + +use core::ops::Range; + +use bevy_camera::{Camera, Camera3d, Camera3dDepthLoadOp}; +use crate::render::{ + batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport}, + camera::CameraRenderGraph, + experimental::occlusion_culling::OcclusionCulling, + mesh::allocator::SlabId, + render_phase::PhaseItemBatchSetKey, + view::{prepare_view_targets, NoIndirectDrawing, RetainedViewEntity}, +}; +pub use main_opaque_pass_3d_node::*; +pub use main_transparent_pass_3d_node::*; + +use bevy_app::{App, Plugin, PostUpdate}; +use bevy_asset::UntypedAssetId; +use bevy_color::LinearRgba; +use bevy_ecs::prelude::*; +use bevy_image::{BevyDefault, ToExtents}; +use bevy_math::FloatOrd; +use bevy_platform::collections::{HashMap, HashSet}; +use crate::render::{ + camera::ExtractedCamera, + extract_component::ExtractComponentPlugin, + prelude::Msaa, + render_graph::{EmptyNode, RenderGraphExt, ViewNodeRunner}, + render_phase::{ + sort_phase_system, BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, + DrawFunctions, PhaseItem, PhaseItemExtraIndex, SortedPhaseItem, ViewBinnedRenderPhases, + ViewSortedRenderPhases, + }, + render_resource::{ + CachedRenderPipelineId, FilterMode, Sampler, SamplerDescriptor, Texture, TextureDescriptor, + TextureDimension, TextureFormat, TextureUsages, TextureView, + }, + renderer::RenderDevice, + sync_world::{MainEntity, RenderEntity}, + texture::{ColorAttachment, TextureCache}, + view::{ExtractedView, ViewDepthTexture, ViewTarget}, + Extract, ExtractSchedule, Render, RenderApp, RenderSystems, +}; +use nonmax::NonMaxU32; +use tracing::warn; + +use crate::render::{ + core_3d::main_transmissive_pass_3d_node::MainTransmissivePass3dNode, + deferred::{ + copy_lighting_id::CopyDeferredLightingIdNode, + node::{EarlyDeferredGBufferPrepassNode, LateDeferredGBufferPrepassNode}, + AlphaMask3dDeferred, Opaque3dDeferred, DEFERRED_LIGHTING_PASS_ID_FORMAT, + DEFERRED_PREPASS_FORMAT, + }, + prepass::{ + node::{EarlyPrepassNode, LatePrepassNode}, + AlphaMask3dPrepass, DeferredPrepass, DepthPrepass, MotionVectorPrepass, NormalPrepass, + Opaque3dPrepass, OpaqueNoLightmap3dBatchSetKey, OpaqueNoLightmap3dBinKey, + ViewPrepassTextures, MOTION_VECTOR_PREPASS_FORMAT, NORMAL_PREPASS_FORMAT, + }, + skybox::SkyboxPlugin, + tonemapping::{DebandDither, Tonemapping, TonemappingNode}, + upscaling::UpscalingNode, +}; + +use self::graph::{Core3d, Node3d}; + +pub struct Core3dPlugin; + +impl Plugin for Core3dPlugin { + fn build(&self, app: &mut App) { + app.register_required_components_with::(|| DebandDither::Enabled) + .register_required_components_with::(|| { + CameraRenderGraph::new(Core3d) + }) + .register_required_components::() + .add_plugins((SkyboxPlugin, ExtractComponentPlugin::::default())) + .add_systems(PostUpdate, check_msaa); + + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + render_app + .init_resource::>() + .init_resource::>() + .init_resource::>() + .init_resource::>() + .init_resource::>() + .init_resource::>() + .init_resource::>() + .init_resource::>() + .init_resource::>() + .init_resource::>() + .init_resource::>() + .init_resource::>() + .init_resource::>() + .init_resource::>() + .init_resource::>() + .init_resource::>() + .add_systems(ExtractSchedule, extract_core_3d_camera_phases) + .add_systems(ExtractSchedule, extract_camera_prepass_phase) + .add_systems( + Render, + ( + sort_phase_system::.in_set(RenderSystems::PhaseSort), + sort_phase_system::.in_set(RenderSystems::PhaseSort), + configure_occlusion_culling_view_targets + .after(prepare_view_targets) + .in_set(RenderSystems::ManageViews), + prepare_core_3d_depth_textures.in_set(RenderSystems::PrepareResources), + prepare_core_3d_transmission_textures.in_set(RenderSystems::PrepareResources), + prepare_prepass_textures.in_set(RenderSystems::PrepareResources), + ), + ); + + render_app + .add_render_sub_graph(Core3d) + .add_render_graph_node::>(Core3d, Node3d::EarlyPrepass) + .add_render_graph_node::>(Core3d, Node3d::LatePrepass) + .add_render_graph_node::>( + Core3d, + Node3d::EarlyDeferredPrepass, + ) + .add_render_graph_node::>( + Core3d, + Node3d::LateDeferredPrepass, + ) + .add_render_graph_node::>( + Core3d, + Node3d::CopyDeferredLightingId, + ) + .add_render_graph_node::(Core3d, Node3d::EndPrepasses) + .add_render_graph_node::(Core3d, Node3d::StartMainPass) + .add_render_graph_node::>( + Core3d, + Node3d::MainOpaquePass, + ) + .add_render_graph_node::>( + Core3d, + Node3d::MainTransmissivePass, + ) + .add_render_graph_node::>( + Core3d, + Node3d::MainTransparentPass, + ) + .add_render_graph_node::(Core3d, Node3d::EndMainPass) + .add_render_graph_node::(Core3d, Node3d::StartMainPassPostProcessing) + .add_render_graph_node::>(Core3d, Node3d::Tonemapping) + .add_render_graph_node::(Core3d, Node3d::EndMainPassPostProcessing) + .add_render_graph_node::>(Core3d, Node3d::Upscaling) + .add_render_graph_edges( + Core3d, + ( + Node3d::EarlyPrepass, + Node3d::EarlyDeferredPrepass, + Node3d::LatePrepass, + Node3d::LateDeferredPrepass, + Node3d::CopyDeferredLightingId, + Node3d::EndPrepasses, + Node3d::StartMainPass, + Node3d::MainOpaquePass, + Node3d::MainTransmissivePass, + Node3d::MainTransparentPass, + Node3d::EndMainPass, + Node3d::StartMainPassPostProcessing, + Node3d::Tonemapping, + Node3d::EndMainPassPostProcessing, + Node3d::Upscaling, + ), + ); + } +} + +/// Opaque 3D [`BinnedPhaseItem`]s. +pub struct Opaque3d { + /// Determines which objects can be placed into a *batch set*. + /// + /// Objects in a single batch set can potentially be multi-drawn together, + /// if it's enabled and the current platform supports it. + pub batch_set_key: Opaque3dBatchSetKey, + /// The key, which determines which can be batched. + pub bin_key: Opaque3dBinKey, + /// An entity from which data will be fetched, including the mesh if + /// applicable. + pub representative_entity: (Entity, MainEntity), + /// The ranges of instances. + pub batch_range: Range, + /// An extra index, which is either a dynamic offset or an index in the + /// indirect parameters list. + pub extra_index: PhaseItemExtraIndex, +} + +/// Information that must be identical in order to place opaque meshes in the +/// same *batch set*. +/// +/// A batch set is a set of batches that can be multi-drawn together, if +/// multi-draw is in use. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Opaque3dBatchSetKey { + /// The identifier of the render pipeline. + pub pipeline: CachedRenderPipelineId, + + /// The function used to draw. + pub draw_function: DrawFunctionId, + + /// The ID of a bind group specific to the material instance. + /// + /// In the case of PBR, this is the `MaterialBindGroupIndex`. + pub material_bind_group_index: Option, + + /// The ID of the slab of GPU memory that contains vertex data. + /// + /// For non-mesh items, you can fill this with 0 if your items can be + /// multi-drawn, or with a unique value if they can't. + pub vertex_slab: SlabId, + + /// The ID of the slab of GPU memory that contains index data, if present. + /// + /// For non-mesh items, you can safely fill this with `None`. + pub index_slab: Option, + + /// Index of the slab that the lightmap resides in, if a lightmap is + /// present. + pub lightmap_slab: Option, +} + +impl PhaseItemBatchSetKey for Opaque3dBatchSetKey { + fn indexed(&self) -> bool { + self.index_slab.is_some() + } +} + +/// Data that must be identical in order to *batch* phase items together. +/// +/// Note that a *batch set* (if multi-draw is in use) contains multiple batches. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Opaque3dBinKey { + /// The asset that this phase item is associated with. + /// + /// Normally, this is the ID of the mesh, but for non-mesh items it might be + /// the ID of another type of asset. + pub asset_id: UntypedAssetId, +} + +impl PhaseItem for Opaque3d { + #[inline] + fn entity(&self) -> Entity { + self.representative_entity.0 + } + + #[inline] + fn main_entity(&self) -> MainEntity { + self.representative_entity.1 + } + + #[inline] + fn draw_function(&self) -> DrawFunctionId { + self.batch_set_key.draw_function + } + + #[inline] + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index.clone() + } + + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) + } +} + +impl BinnedPhaseItem for Opaque3d { + type BatchSetKey = Opaque3dBatchSetKey; + type BinKey = Opaque3dBinKey; + + #[inline] + fn new( + batch_set_key: Self::BatchSetKey, + bin_key: Self::BinKey, + representative_entity: (Entity, MainEntity), + batch_range: Range, + extra_index: PhaseItemExtraIndex, + ) -> Self { + Opaque3d { + batch_set_key, + bin_key, + representative_entity, + batch_range, + extra_index, + } + } +} + +impl CachedRenderPipelinePhaseItem for Opaque3d { + #[inline] + fn cached_pipeline(&self) -> CachedRenderPipelineId { + self.batch_set_key.pipeline + } +} + +pub struct AlphaMask3d { + /// Determines which objects can be placed into a *batch set*. + /// + /// Objects in a single batch set can potentially be multi-drawn together, + /// if it's enabled and the current platform supports it. + pub batch_set_key: OpaqueNoLightmap3dBatchSetKey, + /// The key, which determines which can be batched. + pub bin_key: OpaqueNoLightmap3dBinKey, + pub representative_entity: (Entity, MainEntity), + pub batch_range: Range, + pub extra_index: PhaseItemExtraIndex, +} + +impl PhaseItem for AlphaMask3d { + #[inline] + fn entity(&self) -> Entity { + self.representative_entity.0 + } + + fn main_entity(&self) -> MainEntity { + self.representative_entity.1 + } + + #[inline] + fn draw_function(&self) -> DrawFunctionId { + self.batch_set_key.draw_function + } + + #[inline] + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + #[inline] + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index.clone() + } + + #[inline] + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) + } +} + +impl BinnedPhaseItem for AlphaMask3d { + type BinKey = OpaqueNoLightmap3dBinKey; + type BatchSetKey = OpaqueNoLightmap3dBatchSetKey; + + #[inline] + fn new( + batch_set_key: Self::BatchSetKey, + bin_key: Self::BinKey, + representative_entity: (Entity, MainEntity), + batch_range: Range, + extra_index: PhaseItemExtraIndex, + ) -> Self { + Self { + batch_set_key, + bin_key, + representative_entity, + batch_range, + extra_index, + } + } +} + +impl CachedRenderPipelinePhaseItem for AlphaMask3d { + #[inline] + fn cached_pipeline(&self) -> CachedRenderPipelineId { + self.batch_set_key.pipeline + } +} + +pub struct Transmissive3d { + pub distance: f32, + pub pipeline: CachedRenderPipelineId, + pub entity: (Entity, MainEntity), + pub draw_function: DrawFunctionId, + pub batch_range: Range, + pub extra_index: PhaseItemExtraIndex, + /// Whether the mesh in question is indexed (uses an index buffer in + /// addition to its vertex buffer). + pub indexed: bool, +} + +impl PhaseItem for Transmissive3d { + /// For now, automatic batching is disabled for transmissive items because their rendering is + /// split into multiple steps depending on [`Camera3d::screen_space_specular_transmission_steps`], + /// which the batching system doesn't currently know about. + /// + /// Having batching enabled would cause the same item to be drawn multiple times across different + /// steps, whenever the batching range crossed a step boundary. + /// + /// Eventually, we could add support for this by having the batching system break up the batch ranges + /// using the same logic as the transmissive pass, but for now it's simpler to just disable batching. + const AUTOMATIC_BATCHING: bool = false; + + #[inline] + fn entity(&self) -> Entity { + self.entity.0 + } + + #[inline] + fn main_entity(&self) -> MainEntity { + self.entity.1 + } + + #[inline] + fn draw_function(&self) -> DrawFunctionId { + self.draw_function + } + + #[inline] + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + #[inline] + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index.clone() + } + + #[inline] + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) + } +} + +impl SortedPhaseItem for Transmissive3d { + // NOTE: Values increase towards the camera. Back-to-front ordering for transmissive means we need an ascending sort. + type SortKey = FloatOrd; + + #[inline] + fn sort_key(&self) -> Self::SortKey { + FloatOrd(self.distance) + } + + #[inline] + fn sort(items: &mut [Self]) { + radsort::sort_by_key(items, |item| item.distance); + } + + #[inline] + fn indexed(&self) -> bool { + self.indexed + } +} + +impl CachedRenderPipelinePhaseItem for Transmissive3d { + #[inline] + fn cached_pipeline(&self) -> CachedRenderPipelineId { + self.pipeline + } +} + +pub struct Transparent3d { + pub distance: f32, + pub pipeline: CachedRenderPipelineId, + pub entity: (Entity, MainEntity), + pub draw_function: DrawFunctionId, + pub batch_range: Range, + pub extra_index: PhaseItemExtraIndex, + /// Whether the mesh in question is indexed (uses an index buffer in + /// addition to its vertex buffer). + pub indexed: bool, +} + +impl PhaseItem for Transparent3d { + #[inline] + fn entity(&self) -> Entity { + self.entity.0 + } + + fn main_entity(&self) -> MainEntity { + self.entity.1 + } + + #[inline] + fn draw_function(&self) -> DrawFunctionId { + self.draw_function + } + + #[inline] + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + #[inline] + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index.clone() + } + + #[inline] + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) + } +} + +impl SortedPhaseItem for Transparent3d { + // NOTE: Values increase towards the camera. Back-to-front ordering for transparent means we need an ascending sort. + type SortKey = FloatOrd; + + #[inline] + fn sort_key(&self) -> Self::SortKey { + FloatOrd(self.distance) + } + + #[inline] + fn sort(items: &mut [Self]) { + radsort::sort_by_key(items, |item| item.distance); + } + + #[inline] + fn indexed(&self) -> bool { + self.indexed + } +} + +impl CachedRenderPipelinePhaseItem for Transparent3d { + #[inline] + fn cached_pipeline(&self) -> CachedRenderPipelineId { + self.pipeline + } +} + +pub fn extract_core_3d_camera_phases( + mut opaque_3d_phases: ResMut>, + mut alpha_mask_3d_phases: ResMut>, + mut transmissive_3d_phases: ResMut>, + mut transparent_3d_phases: ResMut>, + cameras_3d: Extract), With>>, + mut live_entities: Local>, + gpu_preprocessing_support: Res, +) { + live_entities.clear(); + + for (main_entity, camera, no_indirect_drawing) in &cameras_3d { + if !camera.is_active { + continue; + } + + // If GPU culling is in use, use it (and indirect mode); otherwise, just + // preprocess the meshes. + let gpu_preprocessing_mode = gpu_preprocessing_support.min(if !no_indirect_drawing { + GpuPreprocessingMode::Culling + } else { + GpuPreprocessingMode::PreprocessingOnly + }); + + // This is the main 3D camera, so use the first subview index (0). + let retained_view_entity = RetainedViewEntity::new(main_entity.into(), None, 0); + + opaque_3d_phases.prepare_for_new_frame(retained_view_entity, gpu_preprocessing_mode); + alpha_mask_3d_phases.prepare_for_new_frame(retained_view_entity, gpu_preprocessing_mode); + transmissive_3d_phases.insert_or_clear(retained_view_entity); + transparent_3d_phases.insert_or_clear(retained_view_entity); + + live_entities.insert(retained_view_entity); + } + + opaque_3d_phases.retain(|view_entity, _| live_entities.contains(view_entity)); + alpha_mask_3d_phases.retain(|view_entity, _| live_entities.contains(view_entity)); + transmissive_3d_phases.retain(|view_entity, _| live_entities.contains(view_entity)); + transparent_3d_phases.retain(|view_entity, _| live_entities.contains(view_entity)); +} + +// Extract the render phases for the prepass + +pub fn extract_camera_prepass_phase( + mut commands: Commands, + mut opaque_3d_prepass_phases: ResMut>, + mut alpha_mask_3d_prepass_phases: ResMut>, + mut opaque_3d_deferred_phases: ResMut>, + mut alpha_mask_3d_deferred_phases: ResMut>, + cameras_3d: Extract< + Query< + ( + Entity, + RenderEntity, + &Camera, + Has, + Has, + Has, + Has, + Has, + ), + With, + >, + >, + mut live_entities: Local>, + gpu_preprocessing_support: Res, +) { + live_entities.clear(); + + for ( + main_entity, + entity, + camera, + no_indirect_drawing, + depth_prepass, + normal_prepass, + motion_vector_prepass, + deferred_prepass, + ) in cameras_3d.iter() + { + if !camera.is_active { + continue; + } + + // If GPU culling is in use, use it (and indirect mode); otherwise, just + // preprocess the meshes. + let gpu_preprocessing_mode = gpu_preprocessing_support.min(if !no_indirect_drawing { + GpuPreprocessingMode::Culling + } else { + GpuPreprocessingMode::PreprocessingOnly + }); + + // This is the main 3D camera, so we use the first subview index (0). + let retained_view_entity = RetainedViewEntity::new(main_entity.into(), None, 0); + + if depth_prepass || normal_prepass || motion_vector_prepass { + opaque_3d_prepass_phases + .prepare_for_new_frame(retained_view_entity, gpu_preprocessing_mode); + alpha_mask_3d_prepass_phases + .prepare_for_new_frame(retained_view_entity, gpu_preprocessing_mode); + } else { + opaque_3d_prepass_phases.remove(&retained_view_entity); + alpha_mask_3d_prepass_phases.remove(&retained_view_entity); + } + + if deferred_prepass { + opaque_3d_deferred_phases + .prepare_for_new_frame(retained_view_entity, gpu_preprocessing_mode); + alpha_mask_3d_deferred_phases + .prepare_for_new_frame(retained_view_entity, gpu_preprocessing_mode); + } else { + opaque_3d_deferred_phases.remove(&retained_view_entity); + alpha_mask_3d_deferred_phases.remove(&retained_view_entity); + } + live_entities.insert(retained_view_entity); + + // Add or remove prepasses as appropriate. + + let mut camera_commands = commands + .get_entity(entity) + .expect("Camera entity wasn't synced."); + + if depth_prepass { + camera_commands.insert(DepthPrepass); + } else { + camera_commands.remove::(); + } + + if normal_prepass { + camera_commands.insert(NormalPrepass); + } else { + camera_commands.remove::(); + } + + if motion_vector_prepass { + camera_commands.insert(MotionVectorPrepass); + } else { + camera_commands.remove::(); + } + + if deferred_prepass { + camera_commands.insert(DeferredPrepass); + } else { + camera_commands.remove::(); + } + } + + opaque_3d_prepass_phases.retain(|view_entity, _| live_entities.contains(view_entity)); + alpha_mask_3d_prepass_phases.retain(|view_entity, _| live_entities.contains(view_entity)); + opaque_3d_deferred_phases.retain(|view_entity, _| live_entities.contains(view_entity)); + alpha_mask_3d_deferred_phases.retain(|view_entity, _| live_entities.contains(view_entity)); +} + +pub fn prepare_core_3d_depth_textures( + mut commands: Commands, + mut texture_cache: ResMut, + render_device: Res, + opaque_3d_phases: Res>, + alpha_mask_3d_phases: Res>, + transmissive_3d_phases: Res>, + transparent_3d_phases: Res>, + views_3d: Query<( + Entity, + &ExtractedCamera, + &ExtractedView, + Option<&DepthPrepass>, + &Camera3d, + &Msaa, + )>, +) { + let mut render_target_usage = >::default(); + for (_, camera, extracted_view, depth_prepass, camera_3d, _msaa) in &views_3d { + if !opaque_3d_phases.contains_key(&extracted_view.retained_view_entity) + || !alpha_mask_3d_phases.contains_key(&extracted_view.retained_view_entity) + || !transmissive_3d_phases.contains_key(&extracted_view.retained_view_entity) + || !transparent_3d_phases.contains_key(&extracted_view.retained_view_entity) + { + continue; + }; + + // Default usage required to write to the depth texture + let mut usage: TextureUsages = camera_3d.depth_texture_usages.into(); + if depth_prepass.is_some() { + // Required to read the output of the prepass + usage |= TextureUsages::COPY_SRC; + } + render_target_usage + .entry(camera.target.clone()) + .and_modify(|u| *u |= usage) + .or_insert_with(|| usage); + } + + let mut textures = >::default(); + for (entity, camera, _, _, camera_3d, msaa) in &views_3d { + let Some(physical_target_size) = camera.physical_target_size else { + continue; + }; + + let cached_texture = textures + .entry((camera.target.clone(), msaa)) + .or_insert_with(|| { + let usage = *render_target_usage + .get(&camera.target.clone()) + .expect("The depth texture usage should already exist for this target"); + + let descriptor = TextureDescriptor { + label: Some("view_depth_texture"), + // The size of the depth texture + size: physical_target_size.to_extents(), + mip_level_count: 1, + sample_count: msaa.samples(), + dimension: TextureDimension::D2, + format: CORE_3D_DEPTH_FORMAT, + usage, + view_formats: &[], + }; + + texture_cache.get(&render_device, descriptor) + }) + .clone(); + + commands.entity(entity).insert(ViewDepthTexture::new( + cached_texture, + match camera_3d.depth_load_op { + Camera3dDepthLoadOp::Clear(v) => Some(v), + Camera3dDepthLoadOp::Load => None, + }, + )); + } +} + +#[derive(Component)] +pub struct ViewTransmissionTexture { + pub texture: Texture, + pub view: TextureView, + pub sampler: Sampler, +} + +pub fn prepare_core_3d_transmission_textures( + mut commands: Commands, + mut texture_cache: ResMut, + render_device: Res, + opaque_3d_phases: Res>, + alpha_mask_3d_phases: Res>, + transmissive_3d_phases: Res>, + transparent_3d_phases: Res>, + views_3d: Query<(Entity, &ExtractedCamera, &Camera3d, &ExtractedView)>, +) { + let mut textures = >::default(); + for (entity, camera, camera_3d, view) in &views_3d { + if !opaque_3d_phases.contains_key(&view.retained_view_entity) + || !alpha_mask_3d_phases.contains_key(&view.retained_view_entity) + || !transparent_3d_phases.contains_key(&view.retained_view_entity) + { + continue; + }; + + let Some(transmissive_3d_phase) = transmissive_3d_phases.get(&view.retained_view_entity) + else { + continue; + }; + + let Some(physical_target_size) = camera.physical_target_size else { + continue; + }; + + // Don't prepare a transmission texture if the number of steps is set to 0 + if camera_3d.screen_space_specular_transmission_steps == 0 { + continue; + } + + // Don't prepare a transmission texture if there are no transmissive items to render + if transmissive_3d_phase.items.is_empty() { + continue; + } + + let cached_texture = textures + .entry(camera.target.clone()) + .or_insert_with(|| { + let usage = TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_DST; + + let format = if view.hdr { + ViewTarget::TEXTURE_FORMAT_HDR + } else { + TextureFormat::bevy_default() + }; + + let descriptor = TextureDescriptor { + label: Some("view_transmission_texture"), + // The size of the transmission texture + size: physical_target_size.to_extents(), + mip_level_count: 1, + sample_count: 1, // No need for MSAA, as we'll only copy the main texture here + dimension: TextureDimension::D2, + format, + usage, + view_formats: &[], + }; + + texture_cache.get(&render_device, descriptor) + }) + .clone(); + + let sampler = render_device.create_sampler(&SamplerDescriptor { + label: Some("view_transmission_sampler"), + mag_filter: FilterMode::Linear, + min_filter: FilterMode::Linear, + ..Default::default() + }); + + commands.entity(entity).insert(ViewTransmissionTexture { + texture: cached_texture.texture, + view: cached_texture.default_view, + sampler, + }); + } +} + +/// Sets the `TEXTURE_BINDING` flag on the depth texture if necessary for +/// occlusion culling. +/// +/// We need that flag to be set in order to read from the texture. +fn configure_occlusion_culling_view_targets( + mut view_targets: Query< + &mut Camera3d, + ( + With, + Without, + With, + ), + >, +) { + for mut camera_3d in &mut view_targets { + let mut depth_texture_usages = TextureUsages::from(camera_3d.depth_texture_usages); + depth_texture_usages |= TextureUsages::TEXTURE_BINDING; + camera_3d.depth_texture_usages = depth_texture_usages.into(); + } +} + +// Disable MSAA and warn if using deferred rendering +pub fn check_msaa(mut deferred_views: Query<&mut Msaa, (With, With)>) { + for mut msaa in deferred_views.iter_mut() { + match *msaa { + Msaa::Off => (), + _ => { + warn!("MSAA is incompatible with deferred rendering and has been disabled."); + *msaa = Msaa::Off; + } + }; + } +} + +// Prepares the textures used by the prepass +pub fn prepare_prepass_textures( + mut commands: Commands, + mut texture_cache: ResMut, + render_device: Res, + opaque_3d_prepass_phases: Res>, + alpha_mask_3d_prepass_phases: Res>, + opaque_3d_deferred_phases: Res>, + alpha_mask_3d_deferred_phases: Res>, + views_3d: Query<( + Entity, + &ExtractedCamera, + &ExtractedView, + &Msaa, + Has, + Has, + Has, + Has, + )>, +) { + let mut depth_textures = >::default(); + let mut normal_textures = >::default(); + let mut deferred_textures = >::default(); + let mut deferred_lighting_id_textures = >::default(); + let mut motion_vectors_textures = >::default(); + for ( + entity, + camera, + view, + msaa, + depth_prepass, + normal_prepass, + motion_vector_prepass, + deferred_prepass, + ) in &views_3d + { + if !opaque_3d_prepass_phases.contains_key(&view.retained_view_entity) + && !alpha_mask_3d_prepass_phases.contains_key(&view.retained_view_entity) + && !opaque_3d_deferred_phases.contains_key(&view.retained_view_entity) + && !alpha_mask_3d_deferred_phases.contains_key(&view.retained_view_entity) + { + commands.entity(entity).remove::(); + continue; + }; + + let Some(physical_target_size) = camera.physical_target_size else { + continue; + }; + + let size = physical_target_size.to_extents(); + + let cached_depth_texture = depth_prepass.then(|| { + depth_textures + .entry(camera.target.clone()) + .or_insert_with(|| { + let descriptor = TextureDescriptor { + label: Some("prepass_depth_texture"), + size, + mip_level_count: 1, + sample_count: msaa.samples(), + dimension: TextureDimension::D2, + format: CORE_3D_DEPTH_FORMAT, + usage: TextureUsages::COPY_DST + | TextureUsages::RENDER_ATTACHMENT + | TextureUsages::TEXTURE_BINDING + | TextureUsages::COPY_SRC, // TODO: Remove COPY_SRC, double buffer instead (for bevy_solari) + view_formats: &[], + }; + texture_cache.get(&render_device, descriptor) + }) + .clone() + }); + + let cached_normals_texture = normal_prepass.then(|| { + normal_textures + .entry(camera.target.clone()) + .or_insert_with(|| { + texture_cache.get( + &render_device, + TextureDescriptor { + label: Some("prepass_normal_texture"), + size, + mip_level_count: 1, + sample_count: msaa.samples(), + dimension: TextureDimension::D2, + format: NORMAL_PREPASS_FORMAT, + usage: TextureUsages::RENDER_ATTACHMENT + | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }, + ) + }) + .clone() + }); + + let cached_motion_vectors_texture = motion_vector_prepass.then(|| { + motion_vectors_textures + .entry(camera.target.clone()) + .or_insert_with(|| { + texture_cache.get( + &render_device, + TextureDescriptor { + label: Some("prepass_motion_vectors_textures"), + size, + mip_level_count: 1, + sample_count: msaa.samples(), + dimension: TextureDimension::D2, + format: MOTION_VECTOR_PREPASS_FORMAT, + usage: TextureUsages::RENDER_ATTACHMENT + | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }, + ) + }) + .clone() + }); + + let cached_deferred_texture = deferred_prepass.then(|| { + deferred_textures + .entry(camera.target.clone()) + .or_insert_with(|| { + texture_cache.get( + &render_device, + TextureDescriptor { + label: Some("prepass_deferred_texture"), + size, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: DEFERRED_PREPASS_FORMAT, + usage: TextureUsages::RENDER_ATTACHMENT + | TextureUsages::TEXTURE_BINDING + | TextureUsages::COPY_SRC, // TODO: Remove COPY_SRC, double buffer instead (for bevy_solari) + view_formats: &[], + }, + ) + }) + .clone() + }); + + let cached_deferred_lighting_pass_id_texture = deferred_prepass.then(|| { + deferred_lighting_id_textures + .entry(camera.target.clone()) + .or_insert_with(|| { + texture_cache.get( + &render_device, + TextureDescriptor { + label: Some("deferred_lighting_pass_id_texture"), + size, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: DEFERRED_LIGHTING_PASS_ID_FORMAT, + usage: TextureUsages::RENDER_ATTACHMENT + | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }, + ) + }) + .clone() + }); + + commands.entity(entity).insert(ViewPrepassTextures { + depth: cached_depth_texture + .map(|t| ColorAttachment::new(t, None, Some(LinearRgba::BLACK))), + normal: cached_normals_texture + .map(|t| ColorAttachment::new(t, None, Some(LinearRgba::BLACK))), + // Red and Green channels are X and Y components of the motion vectors + // Blue channel doesn't matter, but set to 0.0 for possible faster clear + // https://gpuopen.com/performance/#clears + motion_vectors: cached_motion_vectors_texture + .map(|t| ColorAttachment::new(t, None, Some(LinearRgba::BLACK))), + deferred: cached_deferred_texture + .map(|t| ColorAttachment::new(t, None, Some(LinearRgba::BLACK))), + deferred_lighting_pass_id: cached_deferred_lighting_pass_id_texture + .map(|t| ColorAttachment::new(t, None, Some(LinearRgba::BLACK))), + size, + }); + } +} diff --git a/crates/libmarathon/src/render/deferred/copy_deferred_lighting_id.wgsl b/crates/libmarathon/src/render/deferred/copy_deferred_lighting_id.wgsl new file mode 100644 index 0000000..25acf47 --- /dev/null +++ b/crates/libmarathon/src/render/deferred/copy_deferred_lighting_id.wgsl @@ -0,0 +1,18 @@ +#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput + +@group(0) @binding(0) +var material_id_texture: texture_2d; + +struct FragmentOutput { + @builtin(frag_depth) frag_depth: f32, + +} + +@fragment +fn fragment(in: FullscreenVertexOutput) -> FragmentOutput { + var out: FragmentOutput; + // Depth is stored as unorm, so we are dividing the u8 by 255.0 here. + out.frag_depth = f32(textureLoad(material_id_texture, vec2(in.position.xy), 0).x) / 255.0; + return out; +} + diff --git a/crates/libmarathon/src/render/deferred/copy_lighting_id.rs b/crates/libmarathon/src/render/deferred/copy_lighting_id.rs new file mode 100644 index 0000000..39f0de5 --- /dev/null +++ b/crates/libmarathon/src/render/deferred/copy_lighting_id.rs @@ -0,0 +1,193 @@ +use crate::render::{ + prepass::{DeferredPrepass, ViewPrepassTextures}, + FullscreenShader, +}; +use bevy_app::prelude::*; +use bevy_asset::{embedded_asset, load_embedded_asset, AssetServer}; +use bevy_ecs::prelude::*; +use bevy_image::ToExtents; +use crate::render::{ + camera::ExtractedCamera, + diagnostic::RecordDiagnostics, + render_resource::{binding_types::texture_2d, *}, + renderer::RenderDevice, + texture::{CachedTexture, TextureCache}, + view::ViewTarget, + Render, RenderApp, RenderStartup, RenderSystems, +}; + +use super::DEFERRED_LIGHTING_PASS_ID_DEPTH_FORMAT; +use bevy_ecs::query::QueryItem; +use crate::render::{ + render_graph::{NodeRunError, RenderGraphContext, ViewNode}, + renderer::RenderContext, +}; +use bevy_utils::default; + +pub struct CopyDeferredLightingIdPlugin; + +impl Plugin for CopyDeferredLightingIdPlugin { + fn build(&self, app: &mut App) { + embedded_asset!(app, "copy_deferred_lighting_id.wgsl"); + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + render_app + .add_systems(RenderStartup, init_copy_deferred_lighting_id_pipeline) + .add_systems( + Render, + (prepare_deferred_lighting_id_textures.in_set(RenderSystems::PrepareResources),), + ); + } +} + +#[derive(Default)] +pub struct CopyDeferredLightingIdNode; +impl CopyDeferredLightingIdNode { + pub const NAME: &'static str = "copy_deferred_lighting_id"; +} + +impl ViewNode for CopyDeferredLightingIdNode { + type ViewQuery = ( + &'static ViewTarget, + &'static ViewPrepassTextures, + &'static DeferredLightingIdDepthTexture, + ); + + fn run( + &self, + _graph: &mut RenderGraphContext, + render_context: &mut RenderContext, + (_view_target, view_prepass_textures, deferred_lighting_id_depth_texture): QueryItem< + Self::ViewQuery, + >, + world: &World, + ) -> Result<(), NodeRunError> { + let copy_deferred_lighting_id_pipeline = world.resource::(); + + let pipeline_cache = world.resource::(); + + let Some(pipeline) = + pipeline_cache.get_render_pipeline(copy_deferred_lighting_id_pipeline.pipeline_id) + else { + return Ok(()); + }; + let Some(deferred_lighting_pass_id_texture) = + &view_prepass_textures.deferred_lighting_pass_id + else { + return Ok(()); + }; + + let diagnostics = render_context.diagnostic_recorder(); + + let bind_group = render_context.render_device().create_bind_group( + "copy_deferred_lighting_id_bind_group", + ©_deferred_lighting_id_pipeline.layout, + &BindGroupEntries::single(&deferred_lighting_pass_id_texture.texture.default_view), + ); + + let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some("copy_deferred_lighting_id"), + color_attachments: &[], + depth_stencil_attachment: Some(RenderPassDepthStencilAttachment { + view: &deferred_lighting_id_depth_texture.texture.default_view, + depth_ops: Some(Operations { + load: LoadOp::Clear(0.0), + store: StoreOp::Store, + }), + stencil_ops: None, + }), + timestamp_writes: None, + occlusion_query_set: None, + }); + + let pass_span = diagnostics.pass_span(&mut render_pass, "copy_deferred_lighting_id"); + + render_pass.set_render_pipeline(pipeline); + render_pass.set_bind_group(0, &bind_group, &[]); + render_pass.draw(0..3, 0..1); + + pass_span.end(&mut render_pass); + + Ok(()) + } +} + +#[derive(Resource)] +struct CopyDeferredLightingIdPipeline { + layout: BindGroupLayout, + pipeline_id: CachedRenderPipelineId, +} + +pub fn init_copy_deferred_lighting_id_pipeline( + mut commands: Commands, + render_device: Res, + fullscreen_shader: Res, + asset_server: Res, + pipeline_cache: Res, +) { + let layout = render_device.create_bind_group_layout( + "copy_deferred_lighting_id_bind_group_layout", + &BindGroupLayoutEntries::single( + ShaderStages::FRAGMENT, + texture_2d(TextureSampleType::Uint), + ), + ); + + let vertex_state = fullscreen_shader.to_vertex_state(); + let shader = load_embedded_asset!(asset_server.as_ref(), "copy_deferred_lighting_id.wgsl"); + + let pipeline_id = pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor { + label: Some("copy_deferred_lighting_id_pipeline".into()), + layout: vec![layout.clone()], + vertex: vertex_state, + fragment: Some(FragmentState { + shader, + ..default() + }), + depth_stencil: Some(DepthStencilState { + format: DEFERRED_LIGHTING_PASS_ID_DEPTH_FORMAT, + depth_write_enabled: true, + depth_compare: CompareFunction::Always, + stencil: StencilState::default(), + bias: DepthBiasState::default(), + }), + ..default() + }); + + commands.insert_resource(CopyDeferredLightingIdPipeline { + layout, + pipeline_id, + }); +} + +#[derive(Component)] +pub struct DeferredLightingIdDepthTexture { + pub texture: CachedTexture, +} + +fn prepare_deferred_lighting_id_textures( + mut commands: Commands, + mut texture_cache: ResMut, + render_device: Res, + views: Query<(Entity, &ExtractedCamera), With>, +) { + for (entity, camera) in &views { + if let Some(physical_target_size) = camera.physical_target_size { + let texture_descriptor = TextureDescriptor { + label: Some("deferred_lighting_id_depth_texture_a"), + size: physical_target_size.to_extents(), + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: DEFERRED_LIGHTING_PASS_ID_DEPTH_FORMAT, + usage: TextureUsages::RENDER_ATTACHMENT | TextureUsages::COPY_SRC, + view_formats: &[], + }; + let texture = texture_cache.get(&render_device, texture_descriptor); + commands + .entity(entity) + .insert(DeferredLightingIdDepthTexture { texture }); + } + } +} diff --git a/crates/libmarathon/src/render/deferred/mod.rs b/crates/libmarathon/src/render/deferred/mod.rs new file mode 100644 index 0000000..65b76b3 --- /dev/null +++ b/crates/libmarathon/src/render/deferred/mod.rs @@ -0,0 +1,186 @@ +pub mod copy_lighting_id; +pub mod node; + +use core::ops::Range; + +use crate::render::prepass::{OpaqueNoLightmap3dBatchSetKey, OpaqueNoLightmap3dBinKey}; +use bevy_ecs::prelude::*; +use crate::render::sync_world::MainEntity; +use crate::render::{ + render_phase::{ + BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItem, + PhaseItemExtraIndex, + }, + render_resource::{CachedRenderPipelineId, TextureFormat}, +}; + +pub const DEFERRED_PREPASS_FORMAT: TextureFormat = TextureFormat::Rgba32Uint; +pub const DEFERRED_LIGHTING_PASS_ID_FORMAT: TextureFormat = TextureFormat::R8Uint; +pub const DEFERRED_LIGHTING_PASS_ID_DEPTH_FORMAT: TextureFormat = TextureFormat::Depth16Unorm; + +/// Opaque phase of the 3D Deferred pass. +/// +/// Sorted by pipeline, then by mesh to improve batching. +/// +/// Used to render all 3D meshes with materials that have no transparency. +#[derive(PartialEq, Eq, Hash)] +pub struct Opaque3dDeferred { + /// Determines which objects can be placed into a *batch set*. + /// + /// Objects in a single batch set can potentially be multi-drawn together, + /// if it's enabled and the current platform supports it. + pub batch_set_key: OpaqueNoLightmap3dBatchSetKey, + /// Information that separates items into bins. + pub bin_key: OpaqueNoLightmap3dBinKey, + pub representative_entity: (Entity, MainEntity), + pub batch_range: Range, + pub extra_index: PhaseItemExtraIndex, +} + +impl PhaseItem for Opaque3dDeferred { + #[inline] + fn entity(&self) -> Entity { + self.representative_entity.0 + } + + fn main_entity(&self) -> MainEntity { + self.representative_entity.1 + } + + #[inline] + fn draw_function(&self) -> DrawFunctionId { + self.batch_set_key.draw_function + } + + #[inline] + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + #[inline] + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index.clone() + } + + #[inline] + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) + } +} + +impl BinnedPhaseItem for Opaque3dDeferred { + type BatchSetKey = OpaqueNoLightmap3dBatchSetKey; + type BinKey = OpaqueNoLightmap3dBinKey; + + #[inline] + fn new( + batch_set_key: Self::BatchSetKey, + bin_key: Self::BinKey, + representative_entity: (Entity, MainEntity), + batch_range: Range, + extra_index: PhaseItemExtraIndex, + ) -> Self { + Self { + batch_set_key, + bin_key, + representative_entity, + batch_range, + extra_index, + } + } +} + +impl CachedRenderPipelinePhaseItem for Opaque3dDeferred { + #[inline] + fn cached_pipeline(&self) -> CachedRenderPipelineId { + self.batch_set_key.pipeline + } +} + +/// Alpha mask phase of the 3D Deferred pass. +/// +/// Sorted by pipeline, then by mesh to improve batching. +/// +/// Used to render all meshes with a material with an alpha mask. +pub struct AlphaMask3dDeferred { + /// Determines which objects can be placed into a *batch set*. + /// + /// Objects in a single batch set can potentially be multi-drawn together, + /// if it's enabled and the current platform supports it. + pub batch_set_key: OpaqueNoLightmap3dBatchSetKey, + /// Information that separates items into bins. + pub bin_key: OpaqueNoLightmap3dBinKey, + pub representative_entity: (Entity, MainEntity), + pub batch_range: Range, + pub extra_index: PhaseItemExtraIndex, +} + +impl PhaseItem for AlphaMask3dDeferred { + #[inline] + fn entity(&self) -> Entity { + self.representative_entity.0 + } + + #[inline] + fn main_entity(&self) -> MainEntity { + self.representative_entity.1 + } + + #[inline] + fn draw_function(&self) -> DrawFunctionId { + self.batch_set_key.draw_function + } + + #[inline] + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + #[inline] + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index.clone() + } + + #[inline] + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) + } +} + +impl BinnedPhaseItem for AlphaMask3dDeferred { + type BatchSetKey = OpaqueNoLightmap3dBatchSetKey; + type BinKey = OpaqueNoLightmap3dBinKey; + + fn new( + batch_set_key: Self::BatchSetKey, + bin_key: Self::BinKey, + representative_entity: (Entity, MainEntity), + batch_range: Range, + extra_index: PhaseItemExtraIndex, + ) -> Self { + Self { + batch_set_key, + bin_key, + representative_entity, + batch_range, + extra_index, + } + } +} + +impl CachedRenderPipelinePhaseItem for AlphaMask3dDeferred { + #[inline] + fn cached_pipeline(&self) -> CachedRenderPipelineId { + self.batch_set_key.pipeline + } +} diff --git a/crates/libmarathon/src/render/deferred/node.rs b/crates/libmarathon/src/render/deferred/node.rs new file mode 100644 index 0000000..79d1584 --- /dev/null +++ b/crates/libmarathon/src/render/deferred/node.rs @@ -0,0 +1,273 @@ +use bevy_camera::{MainPassResolutionOverride, Viewport}; +use bevy_ecs::{prelude::*, query::QueryItem}; +use crate::render::experimental::occlusion_culling::OcclusionCulling; +use crate::render::render_graph::ViewNode; + +use crate::render::view::{ExtractedView, NoIndirectDrawing}; +use crate::render::{ + camera::ExtractedCamera, + diagnostic::RecordDiagnostics, + render_graph::{NodeRunError, RenderGraphContext}, + render_phase::{TrackedRenderPass, ViewBinnedRenderPhases}, + render_resource::{CommandEncoderDescriptor, RenderPassDescriptor, StoreOp}, + renderer::RenderContext, + view::ViewDepthTexture, +}; +use tracing::error; +#[cfg(feature = "trace")] +use tracing::info_span; + +use crate::render::prepass::ViewPrepassTextures; + +use super::{AlphaMask3dDeferred, Opaque3dDeferred}; + +/// The phase of the deferred prepass that draws meshes that were visible last +/// frame. +/// +/// If occlusion culling isn't in use, this prepass simply draws all meshes. +/// +/// Like all prepass nodes, this is inserted before the main pass in the render +/// graph. +#[derive(Default)] +pub struct EarlyDeferredGBufferPrepassNode; + +impl ViewNode for EarlyDeferredGBufferPrepassNode { + type ViewQuery = ::ViewQuery; + + fn run<'w>( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + view_query: QueryItem<'w, '_, Self::ViewQuery>, + world: &'w World, + ) -> Result<(), NodeRunError> { + run_deferred_prepass( + graph, + render_context, + view_query, + false, + world, + "early deferred prepass", + ) + } +} + +/// The phase of the prepass that runs after occlusion culling against the +/// meshes that were visible last frame. +/// +/// If occlusion culling isn't in use, this is a no-op. +/// +/// Like all prepass nodes, this is inserted before the main pass in the render +/// graph. +#[derive(Default)] +pub struct LateDeferredGBufferPrepassNode; + +impl ViewNode for LateDeferredGBufferPrepassNode { + type ViewQuery = ( + &'static ExtractedCamera, + &'static ExtractedView, + &'static ViewDepthTexture, + &'static ViewPrepassTextures, + Option<&'static MainPassResolutionOverride>, + Has, + Has, + ); + + fn run<'w>( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + view_query: QueryItem<'w, '_, Self::ViewQuery>, + world: &'w World, + ) -> Result<(), NodeRunError> { + let (.., occlusion_culling, no_indirect_drawing) = view_query; + if !occlusion_culling || no_indirect_drawing { + return Ok(()); + } + + run_deferred_prepass( + graph, + render_context, + view_query, + true, + world, + "late deferred prepass", + ) + } +} + +/// Runs the deferred prepass that draws all meshes to the depth buffer and +/// G-buffers. +/// +/// If occlusion culling isn't in use, and a prepass is enabled, then there's +/// only one prepass. If occlusion culling is in use, then any prepass is split +/// into two: an *early* prepass and a *late* prepass. The early prepass draws +/// what was visible last frame, and the last prepass performs occlusion culling +/// against a conservative hierarchical Z buffer before drawing unoccluded +/// meshes. +fn run_deferred_prepass<'w>( + graph: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + (camera, extracted_view, view_depth_texture, view_prepass_textures, resolution_override, _, _): QueryItem< + 'w, + '_, + ::ViewQuery, + >, + is_late: bool, + world: &'w World, + label: &'static str, +) -> Result<(), NodeRunError> { + let (Some(opaque_deferred_phases), Some(alpha_mask_deferred_phases)) = ( + world.get_resource::>(), + world.get_resource::>(), + ) else { + return Ok(()); + }; + + let (Some(opaque_deferred_phase), Some(alpha_mask_deferred_phase)) = ( + opaque_deferred_phases.get(&extracted_view.retained_view_entity), + alpha_mask_deferred_phases.get(&extracted_view.retained_view_entity), + ) else { + return Ok(()); + }; + + let diagnostic = render_context.diagnostic_recorder(); + + let mut color_attachments = vec![]; + color_attachments.push( + view_prepass_textures + .normal + .as_ref() + .map(|normals_texture| normals_texture.get_attachment()), + ); + color_attachments.push( + view_prepass_textures + .motion_vectors + .as_ref() + .map(|motion_vectors_texture| motion_vectors_texture.get_attachment()), + ); + + // If we clear the deferred texture with LoadOp::Clear(Default::default()) we get these errors: + // Chrome: GL_INVALID_OPERATION: No defined conversion between clear value and attachment format. + // Firefox: WebGL warning: clearBufferu?[fi]v: This attachment is of type FLOAT, but this function is of type UINT. + // Appears to be unsupported: https://registry.khronos.org/webgl/specs/latest/2.0/#3.7.9 + // For webgl2 we fallback to manually clearing + #[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] + if !is_late { + if let Some(deferred_texture) = &view_prepass_textures.deferred { + render_context.command_encoder().clear_texture( + &deferred_texture.texture.texture, + &bevy_render::render_resource::ImageSubresourceRange::default(), + ); + } + } + + color_attachments.push( + view_prepass_textures + .deferred + .as_ref() + .map(|deferred_texture| { + if is_late { + deferred_texture.get_attachment() + } else { + #[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] + { + bevy_render::render_resource::RenderPassColorAttachment { + view: &deferred_texture.texture.default_view, + resolve_target: None, + ops: bevy_render::render_resource::Operations { + load: bevy_render::render_resource::LoadOp::Load, + store: StoreOp::Store, + }, + depth_slice: None, + } + } + #[cfg(any( + not(feature = "webgl"), + not(target_arch = "wasm32"), + feature = "webgpu" + ))] + deferred_texture.get_attachment() + } + }), + ); + + color_attachments.push( + view_prepass_textures + .deferred_lighting_pass_id + .as_ref() + .map(|deferred_lighting_pass_id| deferred_lighting_pass_id.get_attachment()), + ); + + // If all color attachments are none: clear the color attachment list so that no fragment shader is required + if color_attachments.iter().all(Option::is_none) { + color_attachments.clear(); + } + + let depth_stencil_attachment = Some(view_depth_texture.get_attachment(StoreOp::Store)); + + let view_entity = graph.view_entity(); + render_context.add_command_buffer_generation_task(move |render_device| { + #[cfg(feature = "trace")] + let _deferred_span = info_span!("deferred_prepass").entered(); + + // Command encoder setup + let mut command_encoder = render_device.create_command_encoder(&CommandEncoderDescriptor { + label: Some("deferred_prepass_command_encoder"), + }); + + // Render pass setup + let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor { + label: Some(label), + color_attachments: &color_attachments, + depth_stencil_attachment, + timestamp_writes: None, + occlusion_query_set: None, + }); + let mut render_pass = TrackedRenderPass::new(&render_device, render_pass); + let pass_span = diagnostic.pass_span(&mut render_pass, label); + if let Some(viewport) = + Viewport::from_viewport_and_override(camera.viewport.as_ref(), resolution_override) + { + render_pass.set_camera_viewport(&viewport); + } + + // Opaque draws + if !opaque_deferred_phase.multidrawable_meshes.is_empty() + || !opaque_deferred_phase.batchable_meshes.is_empty() + || !opaque_deferred_phase.unbatchable_meshes.is_empty() + { + #[cfg(feature = "trace")] + let _opaque_prepass_span = info_span!("opaque_deferred_prepass").entered(); + if let Err(err) = opaque_deferred_phase.render(&mut render_pass, world, view_entity) { + error!("Error encountered while rendering the opaque deferred phase {err:?}"); + } + } + + // Alpha masked draws + if !alpha_mask_deferred_phase.is_empty() { + #[cfg(feature = "trace")] + let _alpha_mask_deferred_span = info_span!("alpha_mask_deferred_prepass").entered(); + if let Err(err) = alpha_mask_deferred_phase.render(&mut render_pass, world, view_entity) + { + error!("Error encountered while rendering the alpha mask deferred phase {err:?}"); + } + } + + pass_span.end(&mut render_pass); + drop(render_pass); + + // After rendering to the view depth texture, copy it to the prepass depth texture + if let Some(prepass_depth_texture) = &view_prepass_textures.depth { + command_encoder.copy_texture_to_texture( + view_depth_texture.texture.as_image_copy(), + prepass_depth_texture.texture.texture.as_image_copy(), + view_prepass_textures.size, + ); + } + + command_encoder.finish() + }); + + Ok(()) +} diff --git a/crates/libmarathon/src/render/diagnostic/internal.rs b/crates/libmarathon/src/render/diagnostic/internal.rs new file mode 100644 index 0000000..876d066 --- /dev/null +++ b/crates/libmarathon/src/render/diagnostic/internal.rs @@ -0,0 +1,709 @@ +use std::{borrow::Cow, sync::Arc}; +use core::{ + ops::{DerefMut, Range}, + sync::atomic::{AtomicBool, Ordering}, +}; +use std::thread::{self, ThreadId}; + +use bevy_diagnostic::{Diagnostic, DiagnosticMeasurement, DiagnosticPath, DiagnosticsStore}; +use bevy_ecs::resource::Resource; +use bevy_ecs::system::{Res, ResMut}; +use bevy_platform::time::Instant; +use std::sync::Mutex; +use wgpu::{ + Buffer, BufferDescriptor, BufferUsages, CommandEncoder, ComputePass, Features, MapMode, + PipelineStatisticsTypes, QuerySet, QuerySetDescriptor, QueryType, RenderPass, +}; + +use crate::render::renderer::{RenderAdapterInfo, RenderDevice, RenderQueue, WgpuWrapper}; + +use super::RecordDiagnostics; + +// buffer offset must be divisible by 256, so this constant must be divisible by 32 (=256/8) +const MAX_TIMESTAMP_QUERIES: u32 = 256; +const MAX_PIPELINE_STATISTICS: u32 = 128; + +const TIMESTAMP_SIZE: u64 = 8; +const PIPELINE_STATISTICS_SIZE: u64 = 40; + +struct DiagnosticsRecorderInternal { + timestamp_period_ns: f32, + features: Features, + current_frame: Mutex, + submitted_frames: Vec, + finished_frames: Vec, + #[cfg(feature = "tracing-tracy")] + tracy_gpu_context: tracy_client::GpuContext, +} + +/// Records diagnostics into [`QuerySet`]'s keeping track of the mapping between +/// spans and indices to the corresponding entries in the [`QuerySet`]. +#[derive(Resource)] +pub struct DiagnosticsRecorder(WgpuWrapper); + +impl DiagnosticsRecorder { + /// Creates the new `DiagnosticsRecorder`. + pub fn new( + adapter_info: &RenderAdapterInfo, + device: &RenderDevice, + queue: &RenderQueue, + ) -> DiagnosticsRecorder { + let features = device.features(); + + #[cfg(feature = "tracing-tracy")] + let tracy_gpu_context = + super::tracy_gpu::new_tracy_gpu_context(adapter_info, device, queue); + let _ = adapter_info; // Prevent unused variable warnings when tracing-tracy is not enabled + + DiagnosticsRecorder(WgpuWrapper::new(DiagnosticsRecorderInternal { + timestamp_period_ns: queue.get_timestamp_period(), + features, + current_frame: Mutex::new(FrameData::new( + device, + features, + #[cfg(feature = "tracing-tracy")] + tracy_gpu_context.clone(), + )), + submitted_frames: Vec::new(), + finished_frames: Vec::new(), + #[cfg(feature = "tracing-tracy")] + tracy_gpu_context, + })) + } + + fn current_frame_mut(&mut self) -> &mut FrameData { + self.0.current_frame.get_mut().expect("lock poisoned") + } + + fn current_frame_lock(&self) -> impl DerefMut + '_ { + self.0.current_frame.lock().expect("lock poisoned") + } + + /// Begins recording diagnostics for a new frame. + pub fn begin_frame(&mut self) { + let internal = &mut self.0; + let mut idx = 0; + while idx < internal.submitted_frames.len() { + let timestamp = internal.timestamp_period_ns; + if internal.submitted_frames[idx].run_mapped_callback(timestamp) { + let removed = internal.submitted_frames.swap_remove(idx); + internal.finished_frames.push(removed); + } else { + idx += 1; + } + } + + self.current_frame_mut().begin(); + } + + /// Copies data from [`QuerySet`]'s to a [`Buffer`], after which it can be downloaded to CPU. + /// + /// Should be called before [`DiagnosticsRecorder::finish_frame`]. + pub fn resolve(&mut self, encoder: &mut CommandEncoder) { + self.current_frame_mut().resolve(encoder); + } + + /// Finishes recording diagnostics for the current frame. + /// + /// The specified `callback` will be invoked when diagnostics become available. + /// + /// Should be called after [`DiagnosticsRecorder::resolve`], + /// and **after** all commands buffers have been queued. + pub fn finish_frame( + &mut self, + device: &RenderDevice, + callback: impl FnOnce(RenderDiagnostics) + Send + Sync + 'static, + ) { + #[cfg(feature = "tracing-tracy")] + let tracy_gpu_context = self.0.tracy_gpu_context.clone(); + + let internal = &mut self.0; + internal + .current_frame + .get_mut() + .expect("lock poisoned") + .finish(callback); + + // reuse one of the finished frames, if we can + let new_frame = match internal.finished_frames.pop() { + Some(frame) => frame, + None => FrameData::new( + device, + internal.features, + #[cfg(feature = "tracing-tracy")] + tracy_gpu_context, + ), + }; + + let old_frame = core::mem::replace( + internal.current_frame.get_mut().expect("lock poisoned"), + new_frame, + ); + internal.submitted_frames.push(old_frame); + } +} + +impl RecordDiagnostics for DiagnosticsRecorder { + fn begin_time_span(&self, encoder: &mut E, span_name: Cow<'static, str>) { + self.current_frame_lock() + .begin_time_span(encoder, span_name); + } + + fn end_time_span(&self, encoder: &mut E) { + self.current_frame_lock().end_time_span(encoder); + } + + fn begin_pass_span(&self, pass: &mut P, span_name: Cow<'static, str>) { + self.current_frame_lock().begin_pass(pass, span_name); + } + + fn end_pass_span(&self, pass: &mut P) { + self.current_frame_lock().end_pass(pass); + } +} + +struct SpanRecord { + thread_id: ThreadId, + path_range: Range, + pass_kind: Option, + begin_timestamp_index: Option, + end_timestamp_index: Option, + begin_instant: Option, + end_instant: Option, + pipeline_statistics_index: Option, +} + +struct FrameData { + timestamps_query_set: Option, + num_timestamps: u32, + supports_timestamps_inside_passes: bool, + supports_timestamps_inside_encoders: bool, + pipeline_statistics_query_set: Option, + num_pipeline_statistics: u32, + buffer_size: u64, + pipeline_statistics_buffer_offset: u64, + resolve_buffer: Option, + read_buffer: Option, + path_components: Vec>, + open_spans: Vec, + closed_spans: Vec, + is_mapped: Arc, + callback: Option>, + #[cfg(feature = "tracing-tracy")] + tracy_gpu_context: tracy_client::GpuContext, +} + +impl FrameData { + fn new( + device: &RenderDevice, + features: Features, + #[cfg(feature = "tracing-tracy")] tracy_gpu_context: tracy_client::GpuContext, + ) -> FrameData { + let wgpu_device = device.wgpu_device(); + let mut buffer_size = 0; + + let timestamps_query_set = if features.contains(Features::TIMESTAMP_QUERY) { + buffer_size += u64::from(MAX_TIMESTAMP_QUERIES) * TIMESTAMP_SIZE; + Some(wgpu_device.create_query_set(&QuerySetDescriptor { + label: Some("timestamps_query_set"), + ty: QueryType::Timestamp, + count: MAX_TIMESTAMP_QUERIES, + })) + } else { + None + }; + + let pipeline_statistics_buffer_offset = buffer_size; + + let pipeline_statistics_query_set = + if features.contains(Features::PIPELINE_STATISTICS_QUERY) { + buffer_size += u64::from(MAX_PIPELINE_STATISTICS) * PIPELINE_STATISTICS_SIZE; + Some(wgpu_device.create_query_set(&QuerySetDescriptor { + label: Some("pipeline_statistics_query_set"), + ty: QueryType::PipelineStatistics(PipelineStatisticsTypes::all()), + count: MAX_PIPELINE_STATISTICS, + })) + } else { + None + }; + + let (resolve_buffer, read_buffer) = if buffer_size > 0 { + let resolve_buffer = wgpu_device.create_buffer(&BufferDescriptor { + label: Some("render_statistics_resolve_buffer"), + size: buffer_size, + usage: BufferUsages::QUERY_RESOLVE | BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + let read_buffer = wgpu_device.create_buffer(&BufferDescriptor { + label: Some("render_statistics_read_buffer"), + size: buffer_size, + usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + (Some(resolve_buffer), Some(read_buffer)) + } else { + (None, None) + }; + + FrameData { + timestamps_query_set, + num_timestamps: 0, + supports_timestamps_inside_passes: features + .contains(Features::TIMESTAMP_QUERY_INSIDE_PASSES), + supports_timestamps_inside_encoders: features + .contains(Features::TIMESTAMP_QUERY_INSIDE_ENCODERS), + pipeline_statistics_query_set, + num_pipeline_statistics: 0, + buffer_size, + pipeline_statistics_buffer_offset, + resolve_buffer, + read_buffer, + path_components: Vec::new(), + open_spans: Vec::new(), + closed_spans: Vec::new(), + is_mapped: Arc::new(AtomicBool::new(false)), + callback: None, + #[cfg(feature = "tracing-tracy")] + tracy_gpu_context, + } + } + + fn begin(&mut self) { + self.num_timestamps = 0; + self.num_pipeline_statistics = 0; + self.path_components.clear(); + self.open_spans.clear(); + self.closed_spans.clear(); + } + + fn write_timestamp( + &mut self, + encoder: &mut impl WriteTimestamp, + is_inside_pass: bool, + ) -> Option { + // `encoder.write_timestamp` is unsupported on WebGPU. + if !self.supports_timestamps_inside_encoders { + return None; + } + + if is_inside_pass && !self.supports_timestamps_inside_passes { + return None; + } + + if self.num_timestamps >= MAX_TIMESTAMP_QUERIES { + return None; + } + + let set = self.timestamps_query_set.as_ref()?; + let index = self.num_timestamps; + encoder.write_timestamp(set, index); + self.num_timestamps += 1; + Some(index) + } + + fn write_pipeline_statistics( + &mut self, + encoder: &mut impl WritePipelineStatistics, + ) -> Option { + if self.num_pipeline_statistics >= MAX_PIPELINE_STATISTICS { + return None; + } + + let set = self.pipeline_statistics_query_set.as_ref()?; + let index = self.num_pipeline_statistics; + encoder.begin_pipeline_statistics_query(set, index); + self.num_pipeline_statistics += 1; + Some(index) + } + + fn open_span( + &mut self, + pass_kind: Option, + name: Cow<'static, str>, + ) -> &mut SpanRecord { + let thread_id = thread::current().id(); + + let parent = self + .open_spans + .iter() + .filter(|v| v.thread_id == thread_id) + .next_back(); + + let path_range = match &parent { + Some(parent) if parent.path_range.end == self.path_components.len() => { + parent.path_range.start..parent.path_range.end + 1 + } + Some(parent) => { + self.path_components + .extend_from_within(parent.path_range.clone()); + self.path_components.len() - parent.path_range.len()..self.path_components.len() + 1 + } + None => self.path_components.len()..self.path_components.len() + 1, + }; + + self.path_components.push(name); + + self.open_spans.push(SpanRecord { + thread_id, + path_range, + pass_kind, + begin_timestamp_index: None, + end_timestamp_index: None, + begin_instant: None, + end_instant: None, + pipeline_statistics_index: None, + }); + + self.open_spans.last_mut().unwrap() + } + + fn close_span(&mut self) -> &mut SpanRecord { + let thread_id = thread::current().id(); + + let iter = self.open_spans.iter(); + let (index, _) = iter + .enumerate() + .filter(|(_, v)| v.thread_id == thread_id) + .next_back() + .unwrap(); + + let span = self.open_spans.swap_remove(index); + self.closed_spans.push(span); + self.closed_spans.last_mut().unwrap() + } + + fn begin_time_span(&mut self, encoder: &mut impl WriteTimestamp, name: Cow<'static, str>) { + let begin_instant = Instant::now(); + let begin_timestamp_index = self.write_timestamp(encoder, false); + + let span = self.open_span(None, name); + span.begin_instant = Some(begin_instant); + span.begin_timestamp_index = begin_timestamp_index; + } + + fn end_time_span(&mut self, encoder: &mut impl WriteTimestamp) { + let end_timestamp_index = self.write_timestamp(encoder, false); + + let span = self.close_span(); + span.end_timestamp_index = end_timestamp_index; + span.end_instant = Some(Instant::now()); + } + + fn begin_pass(&mut self, pass: &mut P, name: Cow<'static, str>) { + let begin_instant = Instant::now(); + + let begin_timestamp_index = self.write_timestamp(pass, true); + let pipeline_statistics_index = self.write_pipeline_statistics(pass); + + let span = self.open_span(Some(P::KIND), name); + span.begin_instant = Some(begin_instant); + span.begin_timestamp_index = begin_timestamp_index; + span.pipeline_statistics_index = pipeline_statistics_index; + } + + fn end_pass(&mut self, pass: &mut impl Pass) { + let end_timestamp_index = self.write_timestamp(pass, true); + + let span = self.close_span(); + span.end_timestamp_index = end_timestamp_index; + + if span.pipeline_statistics_index.is_some() { + pass.end_pipeline_statistics_query(); + } + + span.end_instant = Some(Instant::now()); + } + + fn resolve(&mut self, encoder: &mut CommandEncoder) { + let Some(resolve_buffer) = &self.resolve_buffer else { + return; + }; + + match &self.timestamps_query_set { + Some(set) if self.num_timestamps > 0 => { + encoder.resolve_query_set(set, 0..self.num_timestamps, resolve_buffer, 0); + } + _ => {} + } + + match &self.pipeline_statistics_query_set { + Some(set) if self.num_pipeline_statistics > 0 => { + encoder.resolve_query_set( + set, + 0..self.num_pipeline_statistics, + resolve_buffer, + self.pipeline_statistics_buffer_offset, + ); + } + _ => {} + } + + let Some(read_buffer) = &self.read_buffer else { + return; + }; + + encoder.copy_buffer_to_buffer(resolve_buffer, 0, read_buffer, 0, self.buffer_size); + } + + fn diagnostic_path(&self, range: &Range, field: &str) -> DiagnosticPath { + DiagnosticPath::from_components( + core::iter::once("render") + .chain(self.path_components[range.clone()].iter().map(|v| &**v)) + .chain(core::iter::once(field)), + ) + } + + fn finish(&mut self, callback: impl FnOnce(RenderDiagnostics) + Send + Sync + 'static) { + let Some(read_buffer) = &self.read_buffer else { + // we still have cpu timings, so let's use them + + let mut diagnostics = Vec::new(); + + for span in &self.closed_spans { + if let (Some(begin), Some(end)) = (span.begin_instant, span.end_instant) { + diagnostics.push(RenderDiagnostic { + path: self.diagnostic_path(&span.path_range, "elapsed_cpu"), + suffix: "ms", + value: (end - begin).as_secs_f64() * 1000.0, + }); + } + } + + callback(RenderDiagnostics(diagnostics)); + return; + }; + + self.callback = Some(Box::new(callback)); + + let is_mapped = self.is_mapped.clone(); + read_buffer.slice(..).map_async(MapMode::Read, move |res| { + if let Err(e) = res { + tracing::warn!("Failed to download render statistics buffer: {e}"); + return; + } + + is_mapped.store(true, Ordering::Release); + }); + } + + // returns true if the frame is considered finished, false otherwise + fn run_mapped_callback(&mut self, timestamp_period_ns: f32) -> bool { + let Some(read_buffer) = &self.read_buffer else { + return true; + }; + if !self.is_mapped.load(Ordering::Acquire) { + // need to wait more + return false; + } + let Some(callback) = self.callback.take() else { + return true; + }; + + let data = read_buffer.slice(..).get_mapped_range(); + + let timestamps = data[..(self.num_timestamps * 8) as usize] + .chunks(8) + .map(|v| u64::from_le_bytes(v.try_into().unwrap())) + .collect::>(); + + let start = self.pipeline_statistics_buffer_offset as usize; + let len = (self.num_pipeline_statistics as usize) * 40; + let pipeline_statistics = data[start..start + len] + .chunks(8) + .map(|v| u64::from_le_bytes(v.try_into().unwrap())) + .collect::>(); + + let mut diagnostics = Vec::new(); + + for span in &self.closed_spans { + if let (Some(begin), Some(end)) = (span.begin_instant, span.end_instant) { + diagnostics.push(RenderDiagnostic { + path: self.diagnostic_path(&span.path_range, "elapsed_cpu"), + suffix: "ms", + value: (end - begin).as_secs_f64() * 1000.0, + }); + } + + if let (Some(begin), Some(end)) = (span.begin_timestamp_index, span.end_timestamp_index) + { + let begin = timestamps[begin as usize] as f64; + let end = timestamps[end as usize] as f64; + let value = (end - begin) * (timestamp_period_ns as f64) / 1e6; + + #[cfg(feature = "tracing-tracy")] + { + // Calling span_alloc() and end_zone() here instead of in open_span() and close_span() means that tracy does not know where each GPU command was recorded on the CPU timeline. + // Unfortunately we must do it this way, because tracy does not play nicely with multithreaded command recording. The start/end pairs would get all mixed up. + // The GPU spans themselves are still accurate though, and it's probably safe to assume that each GPU span in frame N belongs to the corresponding CPU render node span from frame N-1. + let name = &self.path_components[span.path_range.clone()].join("/"); + let mut tracy_gpu_span = + self.tracy_gpu_context.span_alloc(name, "", "", 0).unwrap(); + tracy_gpu_span.end_zone(); + tracy_gpu_span.upload_timestamp_start(begin as i64); + tracy_gpu_span.upload_timestamp_end(end as i64); + } + + diagnostics.push(RenderDiagnostic { + path: self.diagnostic_path(&span.path_range, "elapsed_gpu"), + suffix: "ms", + value, + }); + } + + if let Some(index) = span.pipeline_statistics_index { + let index = (index as usize) * 5; + + if span.pass_kind == Some(PassKind::Render) { + diagnostics.push(RenderDiagnostic { + path: self.diagnostic_path(&span.path_range, "vertex_shader_invocations"), + suffix: "", + value: pipeline_statistics[index] as f64, + }); + + diagnostics.push(RenderDiagnostic { + path: self.diagnostic_path(&span.path_range, "clipper_invocations"), + suffix: "", + value: pipeline_statistics[index + 1] as f64, + }); + + diagnostics.push(RenderDiagnostic { + path: self.diagnostic_path(&span.path_range, "clipper_primitives_out"), + suffix: "", + value: pipeline_statistics[index + 2] as f64, + }); + + diagnostics.push(RenderDiagnostic { + path: self.diagnostic_path(&span.path_range, "fragment_shader_invocations"), + suffix: "", + value: pipeline_statistics[index + 3] as f64, + }); + } + + if span.pass_kind == Some(PassKind::Compute) { + diagnostics.push(RenderDiagnostic { + path: self.diagnostic_path(&span.path_range, "compute_shader_invocations"), + suffix: "", + value: pipeline_statistics[index + 4] as f64, + }); + } + } + } + + callback(RenderDiagnostics(diagnostics)); + + drop(data); + read_buffer.unmap(); + self.is_mapped.store(false, Ordering::Release); + + true + } +} + +/// Resource which stores render diagnostics of the most recent frame. +#[derive(Debug, Default, Clone, Resource)] +pub struct RenderDiagnostics(Vec); + +/// A render diagnostic which has been recorded, but not yet stored in [`DiagnosticsStore`]. +#[derive(Debug, Clone, Resource)] +pub struct RenderDiagnostic { + pub path: DiagnosticPath, + pub suffix: &'static str, + pub value: f64, +} + +/// Stores render diagnostics before they can be synced with the main app. +/// +/// This mutex is locked twice per frame: +/// 1. in `PreUpdate`, during [`sync_diagnostics`], +/// 2. after rendering has finished and statistics have been downloaded from GPU. +#[derive(Debug, Default, Clone, Resource)] +pub struct RenderDiagnosticsMutex(pub(crate) Arc>>); + +/// Updates render diagnostics measurements. +pub fn sync_diagnostics(mutex: Res, mut store: ResMut) { + let Some(diagnostics) = mutex.0.lock().ok().and_then(|mut v| v.take()) else { + return; + }; + + let time = Instant::now(); + + for diagnostic in &diagnostics.0 { + if store.get(&diagnostic.path).is_none() { + store.add(Diagnostic::new(diagnostic.path.clone()).with_suffix(diagnostic.suffix)); + } + + store + .get_mut(&diagnostic.path) + .unwrap() + .add_measurement(DiagnosticMeasurement { + time, + value: diagnostic.value, + }); + } +} + +pub trait WriteTimestamp { + fn write_timestamp(&mut self, query_set: &QuerySet, index: u32); +} + +impl WriteTimestamp for CommandEncoder { + fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) { + CommandEncoder::write_timestamp(self, query_set, index); + } +} + +impl WriteTimestamp for RenderPass<'_> { + fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) { + RenderPass::write_timestamp(self, query_set, index); + } +} + +impl WriteTimestamp for ComputePass<'_> { + fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) { + ComputePass::write_timestamp(self, query_set, index); + } +} + +pub trait WritePipelineStatistics { + fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32); + + fn end_pipeline_statistics_query(&mut self); +} + +impl WritePipelineStatistics for RenderPass<'_> { + fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32) { + RenderPass::begin_pipeline_statistics_query(self, query_set, index); + } + + fn end_pipeline_statistics_query(&mut self) { + RenderPass::end_pipeline_statistics_query(self); + } +} + +impl WritePipelineStatistics for ComputePass<'_> { + fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32) { + ComputePass::begin_pipeline_statistics_query(self, query_set, index); + } + + fn end_pipeline_statistics_query(&mut self) { + ComputePass::end_pipeline_statistics_query(self); + } +} + +pub trait Pass: WritePipelineStatistics + WriteTimestamp { + const KIND: PassKind; +} + +impl Pass for RenderPass<'_> { + const KIND: PassKind = PassKind::Render; +} + +impl Pass for ComputePass<'_> { + const KIND: PassKind = PassKind::Compute; +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] +pub enum PassKind { + Render, + Compute, +} diff --git a/crates/libmarathon/src/render/diagnostic/mod.rs b/crates/libmarathon/src/render/diagnostic/mod.rs new file mode 100644 index 0000000..6e03946 --- /dev/null +++ b/crates/libmarathon/src/render/diagnostic/mod.rs @@ -0,0 +1,188 @@ +//! Infrastructure for recording render diagnostics. +//! +//! For more info, see [`RenderDiagnosticsPlugin`]. + +pub(crate) mod internal; +#[cfg(feature = "tracing-tracy")] +mod tracy_gpu; + +use std::{borrow::Cow, sync::Arc}; +use core::marker::PhantomData; + +use bevy_app::{App, Plugin, PreUpdate}; + +use crate::render::{renderer::RenderAdapterInfo, RenderApp}; + +use self::internal::{ + sync_diagnostics, DiagnosticsRecorder, Pass, RenderDiagnosticsMutex, WriteTimestamp, +}; + +use crate::render::renderer::{RenderDevice, RenderQueue}; + +/// Enables collecting render diagnostics, such as CPU/GPU elapsed time per render pass, +/// as well as pipeline statistics (number of primitives, number of shader invocations, etc). +/// +/// To access the diagnostics, you can use the [`DiagnosticsStore`](bevy_diagnostic::DiagnosticsStore) resource, +/// add [`LogDiagnosticsPlugin`](bevy_diagnostic::LogDiagnosticsPlugin), or use [Tracy](https://github.com/bevyengine/bevy/blob/main/docs/profiling.md#tracy-renderqueue). +/// +/// To record diagnostics in your own passes: +/// 1. First, obtain the diagnostic recorder using [`RenderContext::diagnostic_recorder`](crate::renderer::RenderContext::diagnostic_recorder). +/// +/// It won't do anything unless [`RenderDiagnosticsPlugin`] is present, +/// so you're free to omit `#[cfg]` clauses. +/// ```ignore +/// let diagnostics = render_context.diagnostic_recorder(); +/// ``` +/// 2. Begin the span inside a command encoder, or a render/compute pass encoder. +/// ```ignore +/// let time_span = diagnostics.time_span(render_context.command_encoder(), "shadows"); +/// ``` +/// 3. End the span, providing the same encoder. +/// ```ignore +/// time_span.end(render_context.command_encoder()); +/// ``` +/// +/// # Supported platforms +/// Timestamp queries and pipeline statistics are currently supported only on Vulkan and DX12. +/// On other platforms (Metal, WebGPU, WebGL2) only CPU time will be recorded. +#[derive(Default)] +pub struct RenderDiagnosticsPlugin; + +impl Plugin for RenderDiagnosticsPlugin { + fn build(&self, app: &mut App) { + let render_diagnostics_mutex = RenderDiagnosticsMutex::default(); + app.insert_resource(render_diagnostics_mutex.clone()) + .add_systems(PreUpdate, sync_diagnostics); + + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + render_app.insert_resource(render_diagnostics_mutex); + } + } + + fn finish(&self, app: &mut App) { + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + let adapter_info = render_app.world().resource::(); + let device = render_app.world().resource::(); + let queue = render_app.world().resource::(); + render_app.insert_resource(DiagnosticsRecorder::new(adapter_info, device, queue)); + } +} + +/// Allows recording diagnostic spans. +pub trait RecordDiagnostics: Send + Sync { + /// Begin a time span, which will record elapsed CPU and GPU time. + /// + /// Returns a guard, which will panic on drop unless you end the span. + fn time_span(&self, encoder: &mut E, name: N) -> TimeSpanGuard<'_, Self, E> + where + E: WriteTimestamp, + N: Into>, + { + self.begin_time_span(encoder, name.into()); + TimeSpanGuard { + recorder: self, + marker: PhantomData, + } + } + + /// Begin a pass span, which will record elapsed CPU and GPU time, + /// as well as pipeline statistics on supported platforms. + /// + /// Returns a guard, which will panic on drop unless you end the span. + fn pass_span(&self, pass: &mut P, name: N) -> PassSpanGuard<'_, Self, P> + where + P: Pass, + N: Into>, + { + self.begin_pass_span(pass, name.into()); + PassSpanGuard { + recorder: self, + marker: PhantomData, + } + } + + #[doc(hidden)] + fn begin_time_span(&self, encoder: &mut E, name: Cow<'static, str>); + + #[doc(hidden)] + fn end_time_span(&self, encoder: &mut E); + + #[doc(hidden)] + fn begin_pass_span(&self, pass: &mut P, name: Cow<'static, str>); + + #[doc(hidden)] + fn end_pass_span(&self, pass: &mut P); +} + +/// Guard returned by [`RecordDiagnostics::time_span`]. +/// +/// Will panic on drop unless [`TimeSpanGuard::end`] is called. +pub struct TimeSpanGuard<'a, R: ?Sized, E> { + recorder: &'a R, + marker: PhantomData, +} + +impl TimeSpanGuard<'_, R, E> { + /// End the span. You have to provide the same encoder which was used to begin the span. + pub fn end(self, encoder: &mut E) { + self.recorder.end_time_span(encoder); + core::mem::forget(self); + } +} + +impl Drop for TimeSpanGuard<'_, R, E> { + fn drop(&mut self) { + panic!("TimeSpanScope::end was never called") + } +} + +/// Guard returned by [`RecordDiagnostics::pass_span`]. +/// +/// Will panic on drop unless [`PassSpanGuard::end`] is called. +pub struct PassSpanGuard<'a, R: ?Sized, P> { + recorder: &'a R, + marker: PhantomData

, +} + +impl PassSpanGuard<'_, R, P> { + /// End the span. You have to provide the same pass which was used to begin the span. + pub fn end(self, pass: &mut P) { + self.recorder.end_pass_span(pass); + core::mem::forget(self); + } +} + +impl Drop for PassSpanGuard<'_, R, P> { + fn drop(&mut self) { + panic!("PassSpanScope::end was never called") + } +} + +impl RecordDiagnostics for Option> { + fn begin_time_span(&self, encoder: &mut E, name: Cow<'static, str>) { + if let Some(recorder) = &self { + recorder.begin_time_span(encoder, name); + } + } + + fn end_time_span(&self, encoder: &mut E) { + if let Some(recorder) = &self { + recorder.end_time_span(encoder); + } + } + + fn begin_pass_span(&self, pass: &mut P, name: Cow<'static, str>) { + if let Some(recorder) = &self { + recorder.begin_pass_span(pass, name); + } + } + + fn end_pass_span(&self, pass: &mut P) { + if let Some(recorder) = &self { + recorder.end_pass_span(pass); + } + } +} diff --git a/crates/libmarathon/src/render/diagnostic/tracy_gpu.rs b/crates/libmarathon/src/render/diagnostic/tracy_gpu.rs new file mode 100644 index 0000000..2a86a15 --- /dev/null +++ b/crates/libmarathon/src/render/diagnostic/tracy_gpu.rs @@ -0,0 +1,69 @@ +use crate::render::renderer::{RenderAdapterInfo, RenderDevice, RenderQueue}; +use tracy_client::{Client, GpuContext, GpuContextType}; +use wgpu::{ + Backend, BufferDescriptor, BufferUsages, CommandEncoderDescriptor, MapMode, PollType, + QuerySetDescriptor, QueryType, QUERY_SIZE, +}; + +pub fn new_tracy_gpu_context( + adapter_info: &RenderAdapterInfo, + device: &RenderDevice, + queue: &RenderQueue, +) -> GpuContext { + let tracy_gpu_backend = match adapter_info.backend { + Backend::Vulkan => GpuContextType::Vulkan, + Backend::Dx12 => GpuContextType::Direct3D12, + Backend::Gl => GpuContextType::OpenGL, + Backend::Metal | Backend::BrowserWebGpu | Backend::Noop => GpuContextType::Invalid, + }; + + let tracy_client = Client::running().unwrap(); + tracy_client + .new_gpu_context( + Some("RenderQueue"), + tracy_gpu_backend, + initial_timestamp(device, queue), + queue.get_timestamp_period(), + ) + .unwrap() +} + +// Code copied from https://github.com/Wumpf/wgpu-profiler/blob/f9de342a62cb75f50904a98d11dd2bbeb40ceab8/src/tracy.rs +fn initial_timestamp(device: &RenderDevice, queue: &RenderQueue) -> i64 { + let query_set = device.wgpu_device().create_query_set(&QuerySetDescriptor { + label: None, + ty: QueryType::Timestamp, + count: 1, + }); + + let resolve_buffer = device.create_buffer(&BufferDescriptor { + label: None, + size: QUERY_SIZE as _, + usage: BufferUsages::QUERY_RESOLVE | BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + + let map_buffer = device.create_buffer(&BufferDescriptor { + label: None, + size: QUERY_SIZE as _, + usage: BufferUsages::MAP_READ | BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + + let mut timestamp_encoder = device.create_command_encoder(&CommandEncoderDescriptor::default()); + timestamp_encoder.write_timestamp(&query_set, 0); + timestamp_encoder.resolve_query_set(&query_set, 0..1, &resolve_buffer, 0); + // Workaround for https://github.com/gfx-rs/wgpu/issues/6406 + // TODO when that bug is fixed, merge these encoders together again + let mut copy_encoder = device.create_command_encoder(&CommandEncoderDescriptor::default()); + copy_encoder.copy_buffer_to_buffer(&resolve_buffer, 0, &map_buffer, 0, Some(QUERY_SIZE as _)); + queue.submit([timestamp_encoder.finish(), copy_encoder.finish()]); + + map_buffer.slice(..).map_async(MapMode::Read, |_| ()); + device + .poll(PollType::Wait) + .expect("Failed to poll device for map async"); + + let view = map_buffer.slice(..).get_mapped_range(); + i64::from_le_bytes((*view).try_into().unwrap()) +} diff --git a/crates/libmarathon/src/render/erased_render_asset.rs b/crates/libmarathon/src/render/erased_render_asset.rs new file mode 100644 index 0000000..dc30150 --- /dev/null +++ b/crates/libmarathon/src/render/erased_render_asset.rs @@ -0,0 +1,431 @@ +use crate::render::{ + render_resource::AsBindGroupError, ExtractSchedule, MainWorld, Render, RenderApp, + RenderSystems, +}; +use bevy_app::{App, Plugin, SubApp}; +use bevy_asset::RenderAssetUsages; +use bevy_asset::{Asset, AssetEvent, AssetId, Assets, UntypedAssetId}; +use bevy_ecs::{ + prelude::{Commands, IntoScheduleConfigs, MessageReader, Res, ResMut, Resource}, + schedule::{ScheduleConfigs, SystemSet}, + system::{ScheduleSystem, StaticSystemParam, SystemParam, SystemParamItem, SystemState}, + world::{FromWorld, Mut}, +}; +use bevy_platform::collections::{HashMap, HashSet}; +use crate::render::render_asset::RenderAssetBytesPerFrameLimiter; +use core::marker::PhantomData; +use thiserror::Error; +use tracing::{debug, error}; + +#[derive(Debug, Error)] +pub enum PrepareAssetError { + #[error("Failed to prepare asset")] + RetryNextUpdate(E), + #[error("Failed to build bind group: {0}")] + AsBindGroupError(AsBindGroupError), +} + +/// The system set during which we extract modified assets to the render world. +#[derive(SystemSet, Clone, PartialEq, Eq, Debug, Hash)] +pub struct AssetExtractionSystems; + +/// Deprecated alias for [`AssetExtractionSystems`]. +#[deprecated(since = "0.17.0", note = "Renamed to `AssetExtractionSystems`.")] +pub type ExtractAssetsSet = AssetExtractionSystems; + +/// Describes how an asset gets extracted and prepared for rendering. +/// +/// In the [`ExtractSchedule`] step the [`ErasedRenderAsset::SourceAsset`] is transferred +/// from the "main world" into the "render world". +/// +/// After that in the [`RenderSystems::PrepareAssets`] step the extracted asset +/// is transformed into its GPU-representation of type [`ErasedRenderAsset`]. +pub trait ErasedRenderAsset: Send + Sync + 'static { + /// The representation of the asset in the "main world". + type SourceAsset: Asset + Clone; + /// The target representation of the asset in the "render world". + type ErasedAsset: Send + Sync + 'static + Sized; + + /// Specifies all ECS data required by [`ErasedRenderAsset::prepare_asset`]. + /// + /// For convenience use the [`lifetimeless`](bevy_ecs::system::lifetimeless) [`SystemParam`]. + type Param: SystemParam; + + /// Whether or not to unload the asset after extracting it to the render world. + #[inline] + fn asset_usage(_source_asset: &Self::SourceAsset) -> RenderAssetUsages { + RenderAssetUsages::default() + } + + /// Size of the data the asset will upload to the gpu. Specifying a return value + /// will allow the asset to be throttled via [`RenderAssetBytesPerFrameLimiter`]. + #[inline] + #[expect( + unused_variables, + reason = "The parameters here are intentionally unused by the default implementation; however, putting underscores here will result in the underscores being copied by rust-analyzer's tab completion." + )] + fn byte_len(erased_asset: &Self::SourceAsset) -> Option { + None + } + + /// Prepares the [`ErasedRenderAsset::SourceAsset`] for the GPU by transforming it into a [`ErasedRenderAsset`]. + /// + /// ECS data may be accessed via `param`. + fn prepare_asset( + source_asset: Self::SourceAsset, + asset_id: AssetId, + param: &mut SystemParamItem, + ) -> Result>; + + /// Called whenever the [`ErasedRenderAsset::SourceAsset`] has been removed. + /// + /// You can implement this method if you need to access ECS data (via + /// `_param`) in order to perform cleanup tasks when the asset is removed. + /// + /// The default implementation does nothing. + fn unload_asset( + _source_asset: AssetId, + _param: &mut SystemParamItem, + ) { + } +} + +/// This plugin extracts the changed assets from the "app world" into the "render world" +/// and prepares them for the GPU. They can then be accessed from the [`ErasedRenderAssets`] resource. +/// +/// Therefore it sets up the [`ExtractSchedule`] and +/// [`RenderSystems::PrepareAssets`] steps for the specified [`ErasedRenderAsset`]. +/// +/// The `AFTER` generic parameter can be used to specify that `A::prepare_asset` should not be run until +/// `prepare_assets::` has completed. This allows the `prepare_asset` function to depend on another +/// prepared [`ErasedRenderAsset`], for example `Mesh::prepare_asset` relies on `ErasedRenderAssets::` for morph +/// targets, so the plugin is created as `ErasedRenderAssetPlugin::::default()`. +pub struct ErasedRenderAssetPlugin< + A: ErasedRenderAsset, + AFTER: ErasedRenderAssetDependency + 'static = (), +> { + phantom: PhantomData (A, AFTER)>, +} + +impl Default + for ErasedRenderAssetPlugin +{ + fn default() -> Self { + Self { + phantom: Default::default(), + } + } +} + +impl Plugin + for ErasedRenderAssetPlugin +{ + fn build(&self, app: &mut App) { + app.init_resource::>(); + } + + fn finish(&self, app: &mut App) { + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + render_app + .init_resource::>() + .init_resource::>() + .init_resource::>() + .add_systems( + ExtractSchedule, + extract_erased_render_asset::.in_set(AssetExtractionSystems), + ); + AFTER::register_system( + render_app, + prepare_erased_assets::.in_set(RenderSystems::PrepareAssets), + ); + } + } +} + +// helper to allow specifying dependencies between render assets +pub trait ErasedRenderAssetDependency { + fn register_system(render_app: &mut SubApp, system: ScheduleConfigs); +} + +impl ErasedRenderAssetDependency for () { + fn register_system(render_app: &mut SubApp, system: ScheduleConfigs) { + render_app.add_systems(Render, system); + } +} + +impl ErasedRenderAssetDependency for A { + fn register_system(render_app: &mut SubApp, system: ScheduleConfigs) { + render_app.add_systems(Render, system.after(prepare_erased_assets::)); + } +} + +/// Temporarily stores the extracted and removed assets of the current frame. +#[derive(Resource)] +pub struct ExtractedAssets { + /// The assets extracted this frame. + /// + /// These are assets that were either added or modified this frame. + pub extracted: Vec<(AssetId, A::SourceAsset)>, + + /// IDs of the assets that were removed this frame. + /// + /// These assets will not be present in [`ExtractedAssets::extracted`]. + pub removed: HashSet>, + + /// IDs of the assets that were modified this frame. + pub modified: HashSet>, + + /// IDs of the assets that were added this frame. + pub added: HashSet>, +} + +impl Default for ExtractedAssets { + fn default() -> Self { + Self { + extracted: Default::default(), + removed: Default::default(), + modified: Default::default(), + added: Default::default(), + } + } +} + +/// Stores all GPU representations ([`ErasedRenderAsset`]) +/// of [`ErasedRenderAsset::SourceAsset`] as long as they exist. +#[derive(Resource)] +pub struct ErasedRenderAssets(HashMap); + +impl Default for ErasedRenderAssets { + fn default() -> Self { + Self(Default::default()) + } +} + +impl ErasedRenderAssets { + pub fn get(&self, id: impl Into) -> Option<&ERA> { + self.0.get(&id.into()) + } + + pub fn get_mut(&mut self, id: impl Into) -> Option<&mut ERA> { + self.0.get_mut(&id.into()) + } + + pub fn insert(&mut self, id: impl Into, value: ERA) -> Option { + self.0.insert(id.into(), value) + } + + pub fn remove(&mut self, id: impl Into) -> Option { + self.0.remove(&id.into()) + } + + pub fn iter(&self) -> impl Iterator { + self.0.iter().map(|(k, v)| (*k, v)) + } + + pub fn iter_mut(&mut self) -> impl Iterator { + self.0.iter_mut().map(|(k, v)| (*k, v)) + } +} + +#[derive(Resource)] +struct CachedExtractErasedRenderAssetSystemState { + state: SystemState<( + MessageReader<'static, 'static, AssetEvent>, + ResMut<'static, Assets>, + )>, +} + +impl FromWorld for CachedExtractErasedRenderAssetSystemState { + fn from_world(world: &mut bevy_ecs::world::World) -> Self { + Self { + state: SystemState::new(world), + } + } +} + +/// This system extracts all created or modified assets of the corresponding [`ErasedRenderAsset::SourceAsset`] type +/// into the "render world". +pub(crate) fn extract_erased_render_asset( + mut commands: Commands, + mut main_world: ResMut, +) { + main_world.resource_scope( + |world, mut cached_state: Mut>| { + let (mut events, mut assets) = cached_state.state.get_mut(world); + + let mut needs_extracting = >::default(); + let mut removed = >::default(); + let mut modified = >::default(); + + for event in events.read() { + #[expect( + clippy::match_same_arms, + reason = "LoadedWithDependencies is marked as a TODO, so it's likely this will no longer lint soon." + )] + match event { + AssetEvent::Added { id } => { + needs_extracting.insert(*id); + } + AssetEvent::Modified { id } => { + needs_extracting.insert(*id); + modified.insert(*id); + } + AssetEvent::Removed { .. } => { + // We don't care that the asset was removed from Assets in the main world. + // An asset is only removed from ErasedRenderAssets when its last handle is dropped (AssetEvent::Unused). + } + AssetEvent::Unused { id } => { + needs_extracting.remove(id); + modified.remove(id); + removed.insert(*id); + } + AssetEvent::LoadedWithDependencies { .. } => { + // TODO: handle this + } + } + } + + let mut extracted_assets = Vec::new(); + let mut added = >::default(); + for id in needs_extracting.drain() { + if let Some(asset) = assets.get(id) { + let asset_usage = A::asset_usage(asset); + if asset_usage.contains(RenderAssetUsages::RENDER_WORLD) { + if asset_usage == RenderAssetUsages::RENDER_WORLD { + if let Some(asset) = assets.remove(id) { + extracted_assets.push((id, asset)); + added.insert(id); + } + } else { + extracted_assets.push((id, asset.clone())); + added.insert(id); + } + } + } + } + + commands.insert_resource(ExtractedAssets:: { + extracted: extracted_assets, + removed, + modified, + added, + }); + cached_state.state.apply(world); + }, + ); +} + +// TODO: consider storing inside system? +/// All assets that should be prepared next frame. +#[derive(Resource)] +pub struct PrepareNextFrameAssets { + assets: Vec<(AssetId, A::SourceAsset)>, +} + +impl Default for PrepareNextFrameAssets { + fn default() -> Self { + Self { + assets: Default::default(), + } + } +} + +/// This system prepares all assets of the corresponding [`ErasedRenderAsset::SourceAsset`] type +/// which where extracted this frame for the GPU. +pub fn prepare_erased_assets( + mut extracted_assets: ResMut>, + mut render_assets: ResMut>, + mut prepare_next_frame: ResMut>, + param: StaticSystemParam<::Param>, + bpf: Res, +) { + let mut wrote_asset_count = 0; + + let mut param = param.into_inner(); + let queued_assets = core::mem::take(&mut prepare_next_frame.assets); + for (id, extracted_asset) in queued_assets { + if extracted_assets.removed.contains(&id) || extracted_assets.added.contains(&id) { + // skip previous frame's assets that have been removed or updated + continue; + } + + let write_bytes = if let Some(size) = A::byte_len(&extracted_asset) { + // we could check if available bytes > byte_len here, but we want to make some + // forward progress even if the asset is larger than the max bytes per frame. + // this way we always write at least one (sized) asset per frame. + // in future we could also consider partial asset uploads. + if bpf.exhausted() { + prepare_next_frame.assets.push((id, extracted_asset)); + continue; + } + size + } else { + 0 + }; + + match A::prepare_asset(extracted_asset, id, &mut param) { + Ok(prepared_asset) => { + render_assets.insert(id, prepared_asset); + bpf.write_bytes(write_bytes); + wrote_asset_count += 1; + } + Err(PrepareAssetError::RetryNextUpdate(extracted_asset)) => { + prepare_next_frame.assets.push((id, extracted_asset)); + } + Err(PrepareAssetError::AsBindGroupError(e)) => { + error!( + "{} Bind group construction failed: {e}", + core::any::type_name::() + ); + } + } + } + + for removed in extracted_assets.removed.drain() { + render_assets.remove(removed); + A::unload_asset(removed, &mut param); + } + + for (id, extracted_asset) in extracted_assets.extracted.drain(..) { + // we remove previous here to ensure that if we are updating the asset then + // any users will not see the old asset after a new asset is extracted, + // even if the new asset is not yet ready or we are out of bytes to write. + render_assets.remove(id); + + let write_bytes = if let Some(size) = A::byte_len(&extracted_asset) { + if bpf.exhausted() { + prepare_next_frame.assets.push((id, extracted_asset)); + continue; + } + size + } else { + 0 + }; + + match A::prepare_asset(extracted_asset, id, &mut param) { + Ok(prepared_asset) => { + render_assets.insert(id, prepared_asset); + bpf.write_bytes(write_bytes); + wrote_asset_count += 1; + } + Err(PrepareAssetError::RetryNextUpdate(extracted_asset)) => { + prepare_next_frame.assets.push((id, extracted_asset)); + } + Err(PrepareAssetError::AsBindGroupError(e)) => { + error!( + "{} Bind group construction failed: {e}", + core::any::type_name::() + ); + } + } + } + + if bpf.exhausted() && !prepare_next_frame.assets.is_empty() { + debug!( + "{} write budget exhausted with {} assets remaining (wrote {})", + core::any::type_name::(), + prepare_next_frame.assets.len(), + wrote_asset_count + ); + } +} diff --git a/crates/libmarathon/src/render/experimental/mip_generation/downsample_depth.wgsl b/crates/libmarathon/src/render/experimental/mip_generation/downsample_depth.wgsl new file mode 100644 index 0000000..12a4d2b --- /dev/null +++ b/crates/libmarathon/src/render/experimental/mip_generation/downsample_depth.wgsl @@ -0,0 +1,338 @@ +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT +@group(0) @binding(0) var mip_0: texture_storage_2d; +#else +#ifdef MESHLET +@group(0) @binding(0) var mip_0: texture_storage_2d; +#else // MESHLET +#ifdef MULTISAMPLE +@group(0) @binding(0) var mip_0: texture_depth_multisampled_2d; +#else // MULTISAMPLE +@group(0) @binding(0) var mip_0: texture_depth_2d; +#endif // MULTISAMPLE +#endif // MESHLET +#endif // MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT +@group(0) @binding(1) var mip_1: texture_storage_2d; +@group(0) @binding(2) var mip_2: texture_storage_2d; +@group(0) @binding(3) var mip_3: texture_storage_2d; +@group(0) @binding(4) var mip_4: texture_storage_2d; +@group(0) @binding(5) var mip_5: texture_storage_2d; +@group(0) @binding(6) var mip_6: texture_storage_2d; +@group(0) @binding(7) var mip_7: texture_storage_2d; +@group(0) @binding(8) var mip_8: texture_storage_2d; +@group(0) @binding(9) var mip_9: texture_storage_2d; +@group(0) @binding(10) var mip_10: texture_storage_2d; +@group(0) @binding(11) var mip_11: texture_storage_2d; +@group(0) @binding(12) var mip_12: texture_storage_2d; +@group(0) @binding(13) var samplr: sampler; +struct Constants { max_mip_level: u32 } +var constants: Constants; + +/// Generates a hierarchical depth buffer. +/// Based on FidelityFX SPD v2.1 https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK/blob/d7531ae47d8b36a5d4025663e731a47a38be882f/sdk/include/FidelityFX/gpu/spd/ffx_spd.h#L528 + +// TODO: +// * Subgroup support +// * True single pass downsampling + +var intermediate_memory: array, 16>; + +@compute +@workgroup_size(256, 1, 1) +fn downsample_depth_first( + @builtin(workgroup_id) workgroup_id: vec3u, + @builtin(local_invocation_index) local_invocation_index: u32, +) { + let sub_xy = remap_for_wave_reduction(local_invocation_index % 64u); + let x = sub_xy.x + 8u * ((local_invocation_index >> 6u) % 2u); + let y = sub_xy.y + 8u * (local_invocation_index >> 7u); + + downsample_mips_0_and_1(x, y, workgroup_id.xy, local_invocation_index); + + downsample_mips_2_to_5(x, y, workgroup_id.xy, local_invocation_index); +} + +@compute +@workgroup_size(256, 1, 1) +fn downsample_depth_second(@builtin(local_invocation_index) local_invocation_index: u32) { + let sub_xy = remap_for_wave_reduction(local_invocation_index % 64u); + let x = sub_xy.x + 8u * ((local_invocation_index >> 6u) % 2u); + let y = sub_xy.y + 8u * (local_invocation_index >> 7u); + + downsample_mips_6_and_7(x, y); + + downsample_mips_8_to_11(x, y, local_invocation_index); +} + +fn downsample_mips_0_and_1(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) { + var v: vec4f; + + var tex = vec2(workgroup_id * 64u) + vec2(x * 2u, y * 2u); + var pix = vec2(workgroup_id * 32u) + vec2(x, y); + v[0] = reduce_load_mip_0(tex); + textureStore(mip_1, pix, vec4(v[0])); + + tex = vec2(workgroup_id * 64u) + vec2(x * 2u + 32u, y * 2u); + pix = vec2(workgroup_id * 32u) + vec2(x + 16u, y); + v[1] = reduce_load_mip_0(tex); + textureStore(mip_1, pix, vec4(v[1])); + + tex = vec2(workgroup_id * 64u) + vec2(x * 2u, y * 2u + 32u); + pix = vec2(workgroup_id * 32u) + vec2(x, y + 16u); + v[2] = reduce_load_mip_0(tex); + textureStore(mip_1, pix, vec4(v[2])); + + tex = vec2(workgroup_id * 64u) + vec2(x * 2u + 32u, y * 2u + 32u); + pix = vec2(workgroup_id * 32u) + vec2(x + 16u, y + 16u); + v[3] = reduce_load_mip_0(tex); + textureStore(mip_1, pix, vec4(v[3])); + + if constants.max_mip_level <= 1u { return; } + + for (var i = 0u; i < 4u; i++) { + intermediate_memory[x][y] = v[i]; + workgroupBarrier(); + if local_invocation_index < 64u { + v[i] = reduce_4(vec4( + intermediate_memory[x * 2u + 0u][y * 2u + 0u], + intermediate_memory[x * 2u + 1u][y * 2u + 0u], + intermediate_memory[x * 2u + 0u][y * 2u + 1u], + intermediate_memory[x * 2u + 1u][y * 2u + 1u], + )); + pix = (workgroup_id * 16u) + vec2( + x + (i % 2u) * 8u, + y + (i / 2u) * 8u, + ); + textureStore(mip_2, pix, vec4(v[i])); + } + workgroupBarrier(); + } + + if local_invocation_index < 64u { + intermediate_memory[x + 0u][y + 0u] = v[0]; + intermediate_memory[x + 8u][y + 0u] = v[1]; + intermediate_memory[x + 0u][y + 8u] = v[2]; + intermediate_memory[x + 8u][y + 8u] = v[3]; + } +} + +fn downsample_mips_2_to_5(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) { + if constants.max_mip_level <= 2u { return; } + workgroupBarrier(); + downsample_mip_2(x, y, workgroup_id, local_invocation_index); + + if constants.max_mip_level <= 3u { return; } + workgroupBarrier(); + downsample_mip_3(x, y, workgroup_id, local_invocation_index); + + if constants.max_mip_level <= 4u { return; } + workgroupBarrier(); + downsample_mip_4(x, y, workgroup_id, local_invocation_index); + + if constants.max_mip_level <= 5u { return; } + workgroupBarrier(); + downsample_mip_5(workgroup_id, local_invocation_index); +} + +fn downsample_mip_2(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) { + if local_invocation_index < 64u { + let v = reduce_4(vec4( + intermediate_memory[x * 2u + 0u][y * 2u + 0u], + intermediate_memory[x * 2u + 1u][y * 2u + 0u], + intermediate_memory[x * 2u + 0u][y * 2u + 1u], + intermediate_memory[x * 2u + 1u][y * 2u + 1u], + )); + textureStore(mip_3, (workgroup_id * 8u) + vec2(x, y), vec4(v)); + intermediate_memory[x * 2u + y % 2u][y * 2u] = v; + } +} + +fn downsample_mip_3(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) { + if local_invocation_index < 16u { + let v = reduce_4(vec4( + intermediate_memory[x * 4u + 0u + 0u][y * 4u + 0u], + intermediate_memory[x * 4u + 2u + 0u][y * 4u + 0u], + intermediate_memory[x * 4u + 0u + 1u][y * 4u + 2u], + intermediate_memory[x * 4u + 2u + 1u][y * 4u + 2u], + )); + textureStore(mip_4, (workgroup_id * 4u) + vec2(x, y), vec4(v)); + intermediate_memory[x * 4u + y][y * 4u] = v; + } +} + +fn downsample_mip_4(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) { + if local_invocation_index < 4u { + let v = reduce_4(vec4( + intermediate_memory[x * 8u + 0u + 0u + y * 2u][y * 8u + 0u], + intermediate_memory[x * 8u + 4u + 0u + y * 2u][y * 8u + 0u], + intermediate_memory[x * 8u + 0u + 1u + y * 2u][y * 8u + 4u], + intermediate_memory[x * 8u + 4u + 1u + y * 2u][y * 8u + 4u], + )); + textureStore(mip_5, (workgroup_id * 2u) + vec2(x, y), vec4(v)); + intermediate_memory[x + y * 2u][0u] = v; + } +} + +fn downsample_mip_5(workgroup_id: vec2u, local_invocation_index: u32) { + if local_invocation_index < 1u { + let v = reduce_4(vec4( + intermediate_memory[0u][0u], + intermediate_memory[1u][0u], + intermediate_memory[2u][0u], + intermediate_memory[3u][0u], + )); + textureStore(mip_6, workgroup_id, vec4(v)); + } +} + +fn downsample_mips_6_and_7(x: u32, y: u32) { + var v: vec4f; + + var tex = vec2(x * 4u + 0u, y * 4u + 0u); + var pix = vec2(x * 2u + 0u, y * 2u + 0u); + v[0] = reduce_load_mip_6(tex); + textureStore(mip_7, pix, vec4(v[0])); + + tex = vec2(x * 4u + 2u, y * 4u + 0u); + pix = vec2(x * 2u + 1u, y * 2u + 0u); + v[1] = reduce_load_mip_6(tex); + textureStore(mip_7, pix, vec4(v[1])); + + tex = vec2(x * 4u + 0u, y * 4u + 2u); + pix = vec2(x * 2u + 0u, y * 2u + 1u); + v[2] = reduce_load_mip_6(tex); + textureStore(mip_7, pix, vec4(v[2])); + + tex = vec2(x * 4u + 2u, y * 4u + 2u); + pix = vec2(x * 2u + 1u, y * 2u + 1u); + v[3] = reduce_load_mip_6(tex); + textureStore(mip_7, pix, vec4(v[3])); + + if constants.max_mip_level <= 7u { return; } + + let vr = reduce_4(v); + textureStore(mip_8, vec2(x, y), vec4(vr)); + intermediate_memory[x][y] = vr; +} + +fn downsample_mips_8_to_11(x: u32, y: u32, local_invocation_index: u32) { + if constants.max_mip_level <= 8u { return; } + workgroupBarrier(); + downsample_mip_8(x, y, local_invocation_index); + + if constants.max_mip_level <= 9u { return; } + workgroupBarrier(); + downsample_mip_9(x, y, local_invocation_index); + + if constants.max_mip_level <= 10u { return; } + workgroupBarrier(); + downsample_mip_10(x, y, local_invocation_index); + + if constants.max_mip_level <= 11u { return; } + workgroupBarrier(); + downsample_mip_11(local_invocation_index); +} + +fn downsample_mip_8(x: u32, y: u32, local_invocation_index: u32) { + if local_invocation_index < 64u { + let v = reduce_4(vec4( + intermediate_memory[x * 2u + 0u][y * 2u + 0u], + intermediate_memory[x * 2u + 1u][y * 2u + 0u], + intermediate_memory[x * 2u + 0u][y * 2u + 1u], + intermediate_memory[x * 2u + 1u][y * 2u + 1u], + )); + textureStore(mip_9, vec2(x, y), vec4(v)); + intermediate_memory[x * 2u + y % 2u][y * 2u] = v; + } +} + +fn downsample_mip_9(x: u32, y: u32, local_invocation_index: u32) { + if local_invocation_index < 16u { + let v = reduce_4(vec4( + intermediate_memory[x * 4u + 0u + 0u][y * 4u + 0u], + intermediate_memory[x * 4u + 2u + 0u][y * 4u + 0u], + intermediate_memory[x * 4u + 0u + 1u][y * 4u + 2u], + intermediate_memory[x * 4u + 2u + 1u][y * 4u + 2u], + )); + textureStore(mip_10, vec2(x, y), vec4(v)); + intermediate_memory[x * 4u + y][y * 4u] = v; + } +} + +fn downsample_mip_10(x: u32, y: u32, local_invocation_index: u32) { + if local_invocation_index < 4u { + let v = reduce_4(vec4( + intermediate_memory[x * 8u + 0u + 0u + y * 2u][y * 8u + 0u], + intermediate_memory[x * 8u + 4u + 0u + y * 2u][y * 8u + 0u], + intermediate_memory[x * 8u + 0u + 1u + y * 2u][y * 8u + 4u], + intermediate_memory[x * 8u + 4u + 1u + y * 2u][y * 8u + 4u], + )); + textureStore(mip_11, vec2(x, y), vec4(v)); + intermediate_memory[x + y * 2u][0u] = v; + } +} + +fn downsample_mip_11(local_invocation_index: u32) { + if local_invocation_index < 1u { + let v = reduce_4(vec4( + intermediate_memory[0u][0u], + intermediate_memory[1u][0u], + intermediate_memory[2u][0u], + intermediate_memory[3u][0u], + )); + textureStore(mip_12, vec2(0u, 0u), vec4(v)); + } +} + +fn remap_for_wave_reduction(a: u32) -> vec2u { + return vec2( + insertBits(extractBits(a, 2u, 3u), a, 0u, 1u), + insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u), + ); +} + +fn reduce_load_mip_0(tex: vec2u) -> f32 { + let a = load_mip_0(tex.x, tex.y); + let b = load_mip_0(tex.x + 1u, tex.y); + let c = load_mip_0(tex.x, tex.y + 1u); + let d = load_mip_0(tex.x + 1u, tex.y + 1u); + return reduce_4(vec4(a, b, c, d)); +} + +fn reduce_load_mip_6(tex: vec2u) -> f32 { + return reduce_4(vec4( + textureLoad(mip_6, tex + vec2(0u, 0u)).r, + textureLoad(mip_6, tex + vec2(0u, 1u)).r, + textureLoad(mip_6, tex + vec2(1u, 0u)).r, + textureLoad(mip_6, tex + vec2(1u, 1u)).r, + )); +} + +fn load_mip_0(x: u32, y: u32) -> f32 { +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + let visibility = textureLoad(mip_0, vec2(x, y)).r; + return bitcast(u32(visibility >> 32u)); +#else // MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT +#ifdef MESHLET + let visibility = textureLoad(mip_0, vec2(x, y)).r; + return bitcast(visibility); +#else // MESHLET + // Downsample the top level. +#ifdef MULTISAMPLE + // The top level is multisampled, so we need to loop over all the samples + // and reduce them to 1. + var result = textureLoad(mip_0, vec2(x, y), 0); + let sample_count = i32(textureNumSamples(mip_0)); + for (var sample = 1; sample < sample_count; sample += 1) { + result = min(result, textureLoad(mip_0, vec2(x, y), sample)); + } + return result; +#else // MULTISAMPLE + return textureLoad(mip_0, vec2(x, y), 0); +#endif // MULTISAMPLE +#endif // MESHLET +#endif // MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT +} + +fn reduce_4(v: vec4f) -> f32 { + return min(min(v.x, v.y), min(v.z, v.w)); +} diff --git a/crates/libmarathon/src/render/experimental/mip_generation/mod.rs b/crates/libmarathon/src/render/experimental/mip_generation/mod.rs new file mode 100644 index 0000000..f773e75 --- /dev/null +++ b/crates/libmarathon/src/render/experimental/mip_generation/mod.rs @@ -0,0 +1,783 @@ +//! Downsampling of textures to produce mipmap levels. +//! +//! Currently, this module only supports generation of hierarchical Z buffers +//! for occlusion culling. It's marked experimental because the shader is +//! designed only for power-of-two texture sizes and is slightly incorrect for +//! non-power-of-two depth buffer sizes. + +use core::array; + +use crate::render::core_3d::{ + graph::{Core3d, Node3d}, + prepare_core_3d_depth_textures, +}; +use bevy_app::{App, Plugin}; +use bevy_asset::{embedded_asset, load_embedded_asset, Handle}; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::{ + component::Component, + entity::Entity, + prelude::{resource_exists, Without}, + query::{Or, QueryState, With}, + resource::Resource, + schedule::IntoScheduleConfigs as _, + system::{lifetimeless::Read, Commands, Local, Query, Res, ResMut}, + world::{FromWorld, World}, +}; +use bevy_math::{uvec2, UVec2, Vec4Swizzles as _}; +use crate::render::{batching::gpu_preprocessing::GpuPreprocessingSupport, RenderStartup}; +use crate::render::{ + experimental::occlusion_culling::{ + OcclusionCulling, OcclusionCullingSubview, OcclusionCullingSubviewEntities, + }, + render_graph::{Node, NodeRunError, RenderGraphContext, RenderGraphExt}, + render_resource::{ + binding_types::{sampler, texture_2d, texture_2d_multisampled, texture_storage_2d}, + BindGroup, BindGroupEntries, BindGroupLayout, BindGroupLayoutEntries, + CachedComputePipelineId, ComputePassDescriptor, ComputePipeline, ComputePipelineDescriptor, + Extent3d, IntoBinding, PipelineCache, PushConstantRange, Sampler, SamplerBindingType, + SamplerDescriptor, ShaderStages, SpecializedComputePipeline, SpecializedComputePipelines, + StorageTextureAccess, TextureAspect, TextureDescriptor, TextureDimension, TextureFormat, + TextureSampleType, TextureUsages, TextureView, TextureViewDescriptor, TextureViewDimension, + }, + renderer::{RenderContext, RenderDevice}, + texture::TextureCache, + view::{ExtractedView, NoIndirectDrawing, ViewDepthTexture}, + Render, RenderApp, RenderSystems, +}; +use bevy_shader::Shader; +use bevy_utils::default; +use bitflags::bitflags; +use tracing::debug; + +/// Identifies the `downsample_depth.wgsl` shader. +#[derive(Resource, Deref)] +pub struct DownsampleDepthShader(Handle); + +/// The maximum number of mip levels that we can produce. +/// +/// 2^12 is 4096, so that's the maximum size of the depth buffer that we +/// support. +pub const DEPTH_PYRAMID_MIP_COUNT: usize = 12; + +/// A plugin that allows Bevy to repeatedly downsample textures to create +/// mipmaps. +/// +/// Currently, this is only used for hierarchical Z buffer generation for the +/// purposes of occlusion culling. +pub struct MipGenerationPlugin; + +impl Plugin for MipGenerationPlugin { + fn build(&self, app: &mut App) { + embedded_asset!(app, "downsample_depth.wgsl"); + + let downsample_depth_shader = load_embedded_asset!(app, "downsample_depth.wgsl"); + + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + render_app + .insert_resource(DownsampleDepthShader(downsample_depth_shader)) + .init_resource::>() + .add_render_graph_node::(Core3d, Node3d::EarlyDownsampleDepth) + .add_render_graph_node::(Core3d, Node3d::LateDownsampleDepth) + .add_render_graph_edges( + Core3d, + ( + Node3d::EarlyPrepass, + Node3d::EarlyDeferredPrepass, + Node3d::EarlyDownsampleDepth, + Node3d::LatePrepass, + Node3d::LateDeferredPrepass, + ), + ) + .add_render_graph_edges( + Core3d, + ( + Node3d::StartMainPassPostProcessing, + Node3d::LateDownsampleDepth, + Node3d::EndMainPassPostProcessing, + ), + ) + .add_systems(RenderStartup, init_depth_pyramid_dummy_texture) + .add_systems( + Render, + create_downsample_depth_pipelines.in_set(RenderSystems::Prepare), + ) + .add_systems( + Render, + ( + prepare_view_depth_pyramids, + prepare_downsample_depth_view_bind_groups, + ) + .chain() + .in_set(RenderSystems::PrepareResources) + .run_if(resource_exists::) + .after(prepare_core_3d_depth_textures), + ); + } +} + +/// The nodes that produce a hierarchical Z-buffer, also known as a depth +/// pyramid. +/// +/// This runs the single-pass downsampling (SPD) shader with the *min* filter in +/// order to generate a series of mipmaps for the Z buffer. The resulting +/// hierarchical Z-buffer can be used for occlusion culling. +/// +/// There are two instances of this node. The *early* downsample depth pass is +/// the first hierarchical Z-buffer stage, which runs after the early prepass +/// and before the late prepass. It prepares the Z-buffer for the bounding box +/// tests that the late mesh preprocessing stage will perform. The *late* +/// downsample depth pass runs at the end of the main phase. It prepares the +/// Z-buffer for the occlusion culling that the early mesh preprocessing phase +/// of the *next* frame will perform. +/// +/// This node won't do anything if occlusion culling isn't on. +pub struct DownsampleDepthNode { + /// The query that we use to find views that need occlusion culling for + /// their Z-buffer. + main_view_query: QueryState<( + Read, + Read, + Read, + Option>, + )>, + /// The query that we use to find shadow maps that need occlusion culling. + shadow_view_query: QueryState<( + Read, + Read, + Read, + )>, +} + +impl FromWorld for DownsampleDepthNode { + fn from_world(world: &mut World) -> Self { + Self { + main_view_query: QueryState::new(world), + shadow_view_query: QueryState::new(world), + } + } +} + +impl Node for DownsampleDepthNode { + fn update(&mut self, world: &mut World) { + self.main_view_query.update_archetypes(world); + self.shadow_view_query.update_archetypes(world); + } + + fn run<'w>( + &self, + render_graph_context: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + world: &'w World, + ) -> Result<(), NodeRunError> { + let Ok(( + view_depth_pyramid, + view_downsample_depth_bind_group, + view_depth_texture, + maybe_view_light_entities, + )) = self + .main_view_query + .get_manual(world, render_graph_context.view_entity()) + else { + return Ok(()); + }; + + // Downsample depth for the main Z-buffer. + downsample_depth( + render_graph_context, + render_context, + world, + view_depth_pyramid, + view_downsample_depth_bind_group, + uvec2( + view_depth_texture.texture.width(), + view_depth_texture.texture.height(), + ), + view_depth_texture.texture.sample_count(), + )?; + + // Downsample depth for shadow maps that have occlusion culling enabled. + if let Some(view_light_entities) = maybe_view_light_entities { + for &view_light_entity in &view_light_entities.0 { + let Ok((view_depth_pyramid, view_downsample_depth_bind_group, occlusion_culling)) = + self.shadow_view_query.get_manual(world, view_light_entity) + else { + continue; + }; + downsample_depth( + render_graph_context, + render_context, + world, + view_depth_pyramid, + view_downsample_depth_bind_group, + UVec2::splat(occlusion_culling.depth_texture_size), + 1, + )?; + } + } + + Ok(()) + } +} + +/// Produces a depth pyramid from the current depth buffer for a single view. +/// The resulting depth pyramid can be used for occlusion testing. +fn downsample_depth<'w>( + render_graph_context: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + world: &'w World, + view_depth_pyramid: &ViewDepthPyramid, + view_downsample_depth_bind_group: &ViewDownsampleDepthBindGroup, + view_size: UVec2, + sample_count: u32, +) -> Result<(), NodeRunError> { + let downsample_depth_pipelines = world.resource::(); + let pipeline_cache = world.resource::(); + + // Despite the name "single-pass downsampling", we actually need two + // passes because of the lack of `coherent` buffers in WGPU/WGSL. + // Between each pass, there's an implicit synchronization barrier. + + // Fetch the appropriate pipeline ID, depending on whether the depth + // buffer is multisampled or not. + let (Some(first_downsample_depth_pipeline_id), Some(second_downsample_depth_pipeline_id)) = + (if sample_count > 1 { + ( + downsample_depth_pipelines.first_multisample.pipeline_id, + downsample_depth_pipelines.second_multisample.pipeline_id, + ) + } else { + ( + downsample_depth_pipelines.first.pipeline_id, + downsample_depth_pipelines.second.pipeline_id, + ) + }) + else { + return Ok(()); + }; + + // Fetch the pipelines for the two passes. + let (Some(first_downsample_depth_pipeline), Some(second_downsample_depth_pipeline)) = ( + pipeline_cache.get_compute_pipeline(first_downsample_depth_pipeline_id), + pipeline_cache.get_compute_pipeline(second_downsample_depth_pipeline_id), + ) else { + return Ok(()); + }; + + // Run the depth downsampling. + view_depth_pyramid.downsample_depth( + &format!("{:?}", render_graph_context.label()), + render_context, + view_size, + view_downsample_depth_bind_group, + first_downsample_depth_pipeline, + second_downsample_depth_pipeline, + ); + Ok(()) +} + +/// A single depth downsample pipeline. +#[derive(Resource)] +pub struct DownsampleDepthPipeline { + /// The bind group layout for this pipeline. + bind_group_layout: BindGroupLayout, + /// A handle that identifies the compiled shader. + pipeline_id: Option, + /// The shader asset handle. + shader: Handle, +} + +impl DownsampleDepthPipeline { + /// Creates a new [`DownsampleDepthPipeline`] from a bind group layout and the downsample + /// shader. + /// + /// This doesn't actually specialize the pipeline; that must be done + /// afterward. + fn new(bind_group_layout: BindGroupLayout, shader: Handle) -> DownsampleDepthPipeline { + DownsampleDepthPipeline { + bind_group_layout, + pipeline_id: None, + shader, + } + } +} + +/// Stores all depth buffer downsampling pipelines. +#[derive(Resource)] +pub struct DownsampleDepthPipelines { + /// The first pass of the pipeline, when the depth buffer is *not* + /// multisampled. + first: DownsampleDepthPipeline, + /// The second pass of the pipeline, when the depth buffer is *not* + /// multisampled. + second: DownsampleDepthPipeline, + /// The first pass of the pipeline, when the depth buffer is multisampled. + first_multisample: DownsampleDepthPipeline, + /// The second pass of the pipeline, when the depth buffer is multisampled. + second_multisample: DownsampleDepthPipeline, + /// The sampler that the depth downsampling shader uses to sample the depth + /// buffer. + sampler: Sampler, +} + +/// Creates the [`DownsampleDepthPipelines`] if downsampling is supported on the +/// current platform. +fn create_downsample_depth_pipelines( + mut commands: Commands, + render_device: Res, + pipeline_cache: Res, + mut specialized_compute_pipelines: ResMut>, + gpu_preprocessing_support: Res, + downsample_depth_shader: Res, + mut has_run: Local, +) { + // Only run once. + // We can't use a `resource_exists` or similar run condition here because + // this function might fail to create downsample depth pipelines if the + // current platform doesn't support compute shaders. + if *has_run { + return; + } + *has_run = true; + + if !gpu_preprocessing_support.is_culling_supported() { + debug!("Downsample depth is not supported on this platform."); + return; + } + + // Create the bind group layouts. The bind group layouts are identical + // between the first and second passes, so the only thing we need to + // treat specially is the type of the first mip level (non-multisampled + // or multisampled). + let standard_bind_group_layout = + create_downsample_depth_bind_group_layout(&render_device, false); + let multisampled_bind_group_layout = + create_downsample_depth_bind_group_layout(&render_device, true); + + // Create the depth pyramid sampler. This is shared among all shaders. + let sampler = render_device.create_sampler(&SamplerDescriptor { + label: Some("depth pyramid sampler"), + ..SamplerDescriptor::default() + }); + + // Initialize the pipelines. + let mut downsample_depth_pipelines = DownsampleDepthPipelines { + first: DownsampleDepthPipeline::new( + standard_bind_group_layout.clone(), + downsample_depth_shader.0.clone(), + ), + second: DownsampleDepthPipeline::new( + standard_bind_group_layout.clone(), + downsample_depth_shader.0.clone(), + ), + first_multisample: DownsampleDepthPipeline::new( + multisampled_bind_group_layout.clone(), + downsample_depth_shader.0.clone(), + ), + second_multisample: DownsampleDepthPipeline::new( + multisampled_bind_group_layout.clone(), + downsample_depth_shader.0.clone(), + ), + sampler, + }; + + // Specialize each pipeline with the appropriate + // `DownsampleDepthPipelineKey`. + downsample_depth_pipelines.first.pipeline_id = Some(specialized_compute_pipelines.specialize( + &pipeline_cache, + &downsample_depth_pipelines.first, + DownsampleDepthPipelineKey::empty(), + )); + downsample_depth_pipelines.second.pipeline_id = Some(specialized_compute_pipelines.specialize( + &pipeline_cache, + &downsample_depth_pipelines.second, + DownsampleDepthPipelineKey::SECOND_PHASE, + )); + downsample_depth_pipelines.first_multisample.pipeline_id = + Some(specialized_compute_pipelines.specialize( + &pipeline_cache, + &downsample_depth_pipelines.first_multisample, + DownsampleDepthPipelineKey::MULTISAMPLE, + )); + downsample_depth_pipelines.second_multisample.pipeline_id = + Some(specialized_compute_pipelines.specialize( + &pipeline_cache, + &downsample_depth_pipelines.second_multisample, + DownsampleDepthPipelineKey::SECOND_PHASE | DownsampleDepthPipelineKey::MULTISAMPLE, + )); + + commands.insert_resource(downsample_depth_pipelines); +} + +/// Creates a single bind group layout for the downsample depth pass. +fn create_downsample_depth_bind_group_layout( + render_device: &RenderDevice, + is_multisampled: bool, +) -> BindGroupLayout { + render_device.create_bind_group_layout( + if is_multisampled { + "downsample multisample depth bind group layout" + } else { + "downsample depth bind group layout" + }, + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + // We only care about the multisample status of the depth buffer + // for the first mip level. After the first mip level is + // sampled, we drop to a single sample. + if is_multisampled { + texture_2d_multisampled(TextureSampleType::Depth) + } else { + texture_2d(TextureSampleType::Depth) + }, + // All the mip levels follow: + texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly), + texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly), + texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly), + texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly), + texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly), + texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::ReadWrite), + texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly), + texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly), + texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly), + texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly), + texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly), + texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly), + sampler(SamplerBindingType::NonFiltering), + ), + ), + ) +} + +bitflags! { + /// Uniquely identifies a configuration of the downsample depth shader. + /// + /// Note that meshlets maintain their downsample depth shaders on their own + /// and don't use this infrastructure; thus there's no flag for meshlets in + /// here, even though the shader has defines for it. + #[derive(Clone, Copy, PartialEq, Eq, Hash)] + pub struct DownsampleDepthPipelineKey: u8 { + /// True if the depth buffer is multisampled. + const MULTISAMPLE = 1; + /// True if this shader is the second phase of the downsample depth + /// process; false if this shader is the first phase. + const SECOND_PHASE = 2; + } +} + +impl SpecializedComputePipeline for DownsampleDepthPipeline { + type Key = DownsampleDepthPipelineKey; + + fn specialize(&self, key: Self::Key) -> ComputePipelineDescriptor { + let mut shader_defs = vec![]; + if key.contains(DownsampleDepthPipelineKey::MULTISAMPLE) { + shader_defs.push("MULTISAMPLE".into()); + } + + let label = format!( + "downsample depth{}{} pipeline", + if key.contains(DownsampleDepthPipelineKey::MULTISAMPLE) { + " multisample" + } else { + "" + }, + if key.contains(DownsampleDepthPipelineKey::SECOND_PHASE) { + " second phase" + } else { + " first phase" + } + ) + .into(); + + ComputePipelineDescriptor { + label: Some(label), + layout: vec![self.bind_group_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..4, + }], + shader: self.shader.clone(), + shader_defs, + entry_point: Some(if key.contains(DownsampleDepthPipelineKey::SECOND_PHASE) { + "downsample_depth_second".into() + } else { + "downsample_depth_first".into() + }), + ..default() + } + } +} + +/// Stores a placeholder texture that can be bound to a depth pyramid binding if +/// no depth pyramid is needed. +#[derive(Resource, Deref, DerefMut)] +pub struct DepthPyramidDummyTexture(TextureView); + +pub fn init_depth_pyramid_dummy_texture(mut commands: Commands, render_device: Res) { + commands.insert_resource(DepthPyramidDummyTexture( + create_depth_pyramid_dummy_texture( + &render_device, + "depth pyramid dummy texture", + "depth pyramid dummy texture view", + ), + )); +} + +/// Creates a placeholder texture that can be bound to a depth pyramid binding +/// if no depth pyramid is needed. +pub fn create_depth_pyramid_dummy_texture( + render_device: &RenderDevice, + texture_label: &'static str, + texture_view_label: &'static str, +) -> TextureView { + render_device + .create_texture(&TextureDescriptor { + label: Some(texture_label), + size: Extent3d::default(), + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R32Float, + usage: TextureUsages::STORAGE_BINDING, + view_formats: &[], + }) + .create_view(&TextureViewDescriptor { + label: Some(texture_view_label), + format: Some(TextureFormat::R32Float), + dimension: Some(TextureViewDimension::D2), + usage: None, + aspect: TextureAspect::All, + base_mip_level: 0, + mip_level_count: Some(1), + base_array_layer: 0, + array_layer_count: Some(1), + }) +} + +/// Stores a hierarchical Z-buffer for a view, which is a series of mipmaps +/// useful for efficient occlusion culling. +/// +/// This will only be present on a view when occlusion culling is enabled. +#[derive(Component)] +pub struct ViewDepthPyramid { + /// A texture view containing the entire depth texture. + pub all_mips: TextureView, + /// A series of texture views containing one mip level each. + pub mips: [TextureView; DEPTH_PYRAMID_MIP_COUNT], + /// The total number of mipmap levels. + /// + /// This is the base-2 logarithm of the greatest dimension of the depth + /// buffer, rounded up. + pub mip_count: u32, +} + +impl ViewDepthPyramid { + /// Allocates a new depth pyramid for a depth buffer with the given size. + pub fn new( + render_device: &RenderDevice, + texture_cache: &mut TextureCache, + depth_pyramid_dummy_texture: &TextureView, + size: UVec2, + texture_label: &'static str, + texture_view_label: &'static str, + ) -> ViewDepthPyramid { + // Calculate the size of the depth pyramid. + let depth_pyramid_size = Extent3d { + width: size.x.div_ceil(2), + height: size.y.div_ceil(2), + depth_or_array_layers: 1, + }; + + // Calculate the number of mip levels we need. + let depth_pyramid_mip_count = depth_pyramid_size.max_mips(TextureDimension::D2); + + // Create the depth pyramid. + let depth_pyramid = texture_cache.get( + render_device, + TextureDescriptor { + label: Some(texture_label), + size: depth_pyramid_size, + mip_level_count: depth_pyramid_mip_count, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R32Float, + usage: TextureUsages::STORAGE_BINDING | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }, + ); + + // Create individual views for each level of the depth pyramid. + let depth_pyramid_mips = array::from_fn(|i| { + if (i as u32) < depth_pyramid_mip_count { + depth_pyramid.texture.create_view(&TextureViewDescriptor { + label: Some(texture_view_label), + format: Some(TextureFormat::R32Float), + dimension: Some(TextureViewDimension::D2), + usage: None, + aspect: TextureAspect::All, + base_mip_level: i as u32, + mip_level_count: Some(1), + base_array_layer: 0, + array_layer_count: Some(1), + }) + } else { + (*depth_pyramid_dummy_texture).clone() + } + }); + + // Create the view for the depth pyramid as a whole. + let depth_pyramid_all_mips = depth_pyramid.default_view.clone(); + + Self { + all_mips: depth_pyramid_all_mips, + mips: depth_pyramid_mips, + mip_count: depth_pyramid_mip_count, + } + } + + /// Creates a bind group that allows the depth buffer to be attached to the + /// `downsample_depth.wgsl` shader. + pub fn create_bind_group<'a, R>( + &'a self, + render_device: &RenderDevice, + label: &'static str, + bind_group_layout: &BindGroupLayout, + source_image: R, + sampler: &'a Sampler, + ) -> BindGroup + where + R: IntoBinding<'a>, + { + render_device.create_bind_group( + label, + bind_group_layout, + &BindGroupEntries::sequential(( + source_image, + &self.mips[0], + &self.mips[1], + &self.mips[2], + &self.mips[3], + &self.mips[4], + &self.mips[5], + &self.mips[6], + &self.mips[7], + &self.mips[8], + &self.mips[9], + &self.mips[10], + &self.mips[11], + sampler, + )), + ) + } + + /// Invokes the shaders to generate the hierarchical Z-buffer. + /// + /// This is intended to be invoked as part of a render node. + pub fn downsample_depth( + &self, + label: &str, + render_context: &mut RenderContext, + view_size: UVec2, + downsample_depth_bind_group: &BindGroup, + downsample_depth_first_pipeline: &ComputePipeline, + downsample_depth_second_pipeline: &ComputePipeline, + ) { + let command_encoder = render_context.command_encoder(); + let mut downsample_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor { + label: Some(label), + timestamp_writes: None, + }); + downsample_pass.set_pipeline(downsample_depth_first_pipeline); + // Pass the mip count as a push constant, for simplicity. + downsample_pass.set_push_constants(0, &self.mip_count.to_le_bytes()); + downsample_pass.set_bind_group(0, downsample_depth_bind_group, &[]); + downsample_pass.dispatch_workgroups(view_size.x.div_ceil(64), view_size.y.div_ceil(64), 1); + + if self.mip_count >= 7 { + downsample_pass.set_pipeline(downsample_depth_second_pipeline); + downsample_pass.dispatch_workgroups(1, 1, 1); + } + } +} + +/// Creates depth pyramids for views that have occlusion culling enabled. +pub fn prepare_view_depth_pyramids( + mut commands: Commands, + render_device: Res, + mut texture_cache: ResMut, + depth_pyramid_dummy_texture: Res, + views: Query<(Entity, &ExtractedView), (With, Without)>, +) { + for (view_entity, view) in &views { + commands.entity(view_entity).insert(ViewDepthPyramid::new( + &render_device, + &mut texture_cache, + &depth_pyramid_dummy_texture, + view.viewport.zw(), + "view depth pyramid texture", + "view depth pyramid texture view", + )); + } +} + +/// The bind group that we use to attach the depth buffer and depth pyramid for +/// a view to the `downsample_depth.wgsl` shader. +/// +/// This will only be present for a view if occlusion culling is enabled. +#[derive(Component, Deref, DerefMut)] +pub struct ViewDownsampleDepthBindGroup(BindGroup); + +/// Creates the [`ViewDownsampleDepthBindGroup`]s for all views with occlusion +/// culling enabled. +fn prepare_downsample_depth_view_bind_groups( + mut commands: Commands, + render_device: Res, + downsample_depth_pipelines: Res, + view_depth_textures: Query< + ( + Entity, + &ViewDepthPyramid, + Option<&ViewDepthTexture>, + Option<&OcclusionCullingSubview>, + ), + Or<(With, With)>, + >, +) { + for (view_entity, view_depth_pyramid, view_depth_texture, shadow_occlusion_culling) in + &view_depth_textures + { + let is_multisampled = view_depth_texture + .is_some_and(|view_depth_texture| view_depth_texture.texture.sample_count() > 1); + commands + .entity(view_entity) + .insert(ViewDownsampleDepthBindGroup( + view_depth_pyramid.create_bind_group( + &render_device, + if is_multisampled { + "downsample multisample depth bind group" + } else { + "downsample depth bind group" + }, + if is_multisampled { + &downsample_depth_pipelines + .first_multisample + .bind_group_layout + } else { + &downsample_depth_pipelines.first.bind_group_layout + }, + match (view_depth_texture, shadow_occlusion_culling) { + (Some(view_depth_texture), _) => view_depth_texture.view(), + (None, Some(shadow_occlusion_culling)) => { + &shadow_occlusion_culling.depth_texture_view + } + (None, None) => panic!("Should never happen"), + }, + &downsample_depth_pipelines.sampler, + ), + )); + } +} diff --git a/crates/libmarathon/src/render/experimental/mod.rs b/crates/libmarathon/src/render/experimental/mod.rs new file mode 100644 index 0000000..47c42bd --- /dev/null +++ b/crates/libmarathon/src/render/experimental/mod.rs @@ -0,0 +1,8 @@ +//! Experimental rendering features. +//! +//! Experimental features are features with known problems, missing features, +//! compatibility issues, low performance, and/or future breaking changes, but +//! are included nonetheless for testing purposes. + +pub mod mip_generation; +pub mod occlusion_culling; diff --git a/crates/libmarathon/src/render/experimental/occlusion_culling/mesh_preprocess_types.wgsl b/crates/libmarathon/src/render/experimental/occlusion_culling/mesh_preprocess_types.wgsl new file mode 100644 index 0000000..a597fb0 --- /dev/null +++ b/crates/libmarathon/src/render/experimental/occlusion_culling/mesh_preprocess_types.wgsl @@ -0,0 +1,69 @@ +// Types needed for GPU mesh uniform building. + +#define_import_path bevy_pbr::mesh_preprocess_types + +// Per-frame data that the CPU supplies to the GPU. +struct MeshInput { + // The model transform. + world_from_local: mat3x4, + // The lightmap UV rect, packed into 64 bits. + lightmap_uv_rect: vec2, + // Various flags. + flags: u32, + previous_input_index: u32, + first_vertex_index: u32, + first_index_index: u32, + index_count: u32, + current_skin_index: u32, + // Low 16 bits: index of the material inside the bind group data. + // High 16 bits: index of the lightmap in the binding array. + material_and_lightmap_bind_group_slot: u32, + timestamp: u32, + // User supplied index to identify the mesh instance + tag: u32, + pad: u32, +} + +// The `wgpu` indirect parameters structure. This is a union of two structures. +// For more information, see the corresponding comment in +// `gpu_preprocessing.rs`. +struct IndirectParametersIndexed { + // `vertex_count` or `index_count`. + index_count: u32, + // `instance_count` in both structures. + instance_count: u32, + // `first_vertex` or `first_index`. + first_index: u32, + // `base_vertex` or `first_instance`. + base_vertex: u32, + // A read-only copy of `instance_index`. + first_instance: u32, +} + +struct IndirectParametersNonIndexed { + vertex_count: u32, + instance_count: u32, + base_vertex: u32, + first_instance: u32, +} + +struct IndirectParametersCpuMetadata { + base_output_index: u32, + batch_set_index: u32, +} + +struct IndirectParametersGpuMetadata { + mesh_index: u32, +#ifdef WRITE_INDIRECT_PARAMETERS_METADATA + early_instance_count: atomic, + late_instance_count: atomic, +#else // WRITE_INDIRECT_PARAMETERS_METADATA + early_instance_count: u32, + late_instance_count: u32, +#endif // WRITE_INDIRECT_PARAMETERS_METADATA +} + +struct IndirectBatchSet { + indirect_parameters_count: atomic, + indirect_parameters_base: u32, +} diff --git a/crates/libmarathon/src/render/experimental/occlusion_culling/mod.rs b/crates/libmarathon/src/render/experimental/occlusion_culling/mod.rs new file mode 100644 index 0000000..0b280a9 --- /dev/null +++ b/crates/libmarathon/src/render/experimental/occlusion_culling/mod.rs @@ -0,0 +1,104 @@ +//! GPU occlusion culling. +//! +//! See [`OcclusionCulling`] for a detailed description of occlusion culling in +//! Bevy. + +use bevy_app::{App, Plugin}; +use bevy_ecs::{component::Component, entity::Entity, prelude::ReflectComponent}; +use bevy_reflect::{prelude::ReflectDefault, Reflect}; +use bevy_shader::load_shader_library; + +use crate::render::{extract_component::ExtractComponent, render_resource::TextureView}; + +/// Enables GPU occlusion culling. +/// +/// See [`OcclusionCulling`] for a detailed description of occlusion culling in +/// Bevy. +pub struct OcclusionCullingPlugin; + +impl Plugin for OcclusionCullingPlugin { + fn build(&self, app: &mut App) { + load_shader_library!(app, "mesh_preprocess_types.wgsl"); + } +} + +/// Add this component to a view in order to enable experimental GPU occlusion +/// culling. +/// +/// *Bevy's occlusion culling is currently marked as experimental.* There are +/// known issues whereby, in rare circumstances, occlusion culling can result in +/// meshes being culled that shouldn't be (i.e. meshes that turn invisible). +/// Please try it out and report issues. +/// +/// *Occlusion culling* allows Bevy to avoid rendering objects that are fully +/// behind other opaque or alpha tested objects. This is different from, and +/// complements, depth fragment rejection as the `DepthPrepass` enables. While +/// depth rejection allows Bevy to avoid rendering *pixels* that are behind +/// other objects, the GPU still has to examine those pixels to reject them, +/// which requires transforming the vertices of the objects and performing +/// skinning if the objects were skinned. Occlusion culling allows the GPU to go +/// a step further, avoiding even transforming the vertices of objects that it +/// can quickly prove to be behind other objects. +/// +/// Occlusion culling inherently has some overhead, because Bevy must examine +/// the objects' bounding boxes, and create an acceleration structure +/// (hierarchical Z-buffer) to perform the occlusion tests. Therefore, occlusion +/// culling is disabled by default. Only enable it if you measure it to be a +/// speedup on your scene. Note that, because Bevy's occlusion culling runs on +/// the GPU and is quite efficient, it's rare for occlusion culling to result in +/// a significant slowdown. +/// +/// Occlusion culling currently requires a `DepthPrepass`. If no depth prepass +/// is present on the view, the [`OcclusionCulling`] component will be ignored. +/// Additionally, occlusion culling is currently incompatible with deferred +/// shading; including both `DeferredPrepass` and [`OcclusionCulling`] results +/// in unspecified behavior. +/// +/// The algorithm that Bevy uses is known as [*two-phase occlusion culling*]. +/// When you enable occlusion culling, Bevy splits the depth prepass into two: +/// an *early* depth prepass and a *late* depth prepass. The early depth prepass +/// renders all the meshes that were visible last frame to produce a +/// conservative approximation of the depth buffer. Then, after producing an +/// acceleration structure known as a hierarchical Z-buffer or depth pyramid, +/// Bevy tests the bounding boxes of all meshes against that depth buffer. Those +/// that can be quickly proven to be behind the geometry rendered during the +/// early depth prepass are skipped entirely. The other potentially-visible +/// meshes are rendered during the late prepass, and finally all the visible +/// meshes are rendered as usual during the opaque, transparent, etc. passes. +/// +/// Unlike other occlusion culling systems you may be familiar with, Bevy's +/// occlusion culling is fully dynamic and requires no baking step. The CPU +/// overhead is minimal. Large skinned meshes and other dynamic objects can +/// occlude other objects. +/// +/// [*two-phase occlusion culling*]: +/// https://medium.com/@mil_kru/two-pass-occlusion-culling-4100edcad501 +#[derive(Component, ExtractComponent, Clone, Copy, Default, Reflect)] +#[reflect(Component, Default, Clone)] +pub struct OcclusionCulling; + +/// A render-world component that contains resources necessary to perform +/// occlusion culling on any view other than a camera. +/// +/// Bevy automatically places this component on views created for shadow +/// mapping. You don't ordinarily need to add this component yourself. +#[derive(Clone, Component)] +pub struct OcclusionCullingSubview { + /// A texture view of the Z-buffer. + pub depth_texture_view: TextureView, + /// The size of the texture along both dimensions. + /// + /// Because [`OcclusionCullingSubview`] is only currently used for shadow + /// maps, they're guaranteed to have sizes equal to a power of two, so we + /// don't have to store the two dimensions individually here. + pub depth_texture_size: u32, +} + +/// A render-world component placed on each camera that stores references to all +/// entities other than cameras that need occlusion culling. +/// +/// Bevy automatically places this component on cameras that are drawing +/// shadows, when those shadows come from lights with occlusion culling enabled. +/// You don't ordinarily need to add this component yourself. +#[derive(Clone, Component)] +pub struct OcclusionCullingSubviewEntities(pub Vec); diff --git a/crates/libmarathon/src/render/extract_component.rs b/crates/libmarathon/src/render/extract_component.rs new file mode 100644 index 0000000..47f4bb1 --- /dev/null +++ b/crates/libmarathon/src/render/extract_component.rs @@ -0,0 +1,236 @@ +use crate::render::{ + render_resource::{encase::internal::WriteInto, DynamicUniformBuffer, ShaderType}, + renderer::{RenderDevice, RenderQueue}, + sync_component::SyncComponentPlugin, + sync_world::RenderEntity, + Extract, ExtractSchedule, Render, RenderApp, RenderSystems, +}; +use bevy_app::{App, Plugin}; +use bevy_camera::visibility::ViewVisibility; +use bevy_ecs::{ + bundle::NoBundleEffect, + component::Component, + prelude::*, + query::{QueryFilter, QueryItem, ReadOnlyQueryData}, +}; +use core::{marker::PhantomData, ops::Deref}; + +pub use macros::ExtractComponent; + +/// Stores the index of a uniform inside of [`ComponentUniforms`]. +#[derive(Component)] +pub struct DynamicUniformIndex { + index: u32, + marker: PhantomData, +} + +impl DynamicUniformIndex { + #[inline] + pub fn index(&self) -> u32 { + self.index + } +} + +/// Describes how a component gets extracted for rendering. +/// +/// Therefore the component is transferred from the "app world" into the "render world" +/// in the [`ExtractSchedule`] step. +pub trait ExtractComponent: Component { + /// ECS [`ReadOnlyQueryData`] to fetch the components to extract. + type QueryData: ReadOnlyQueryData; + /// Filters the entities with additional constraints. + type QueryFilter: QueryFilter; + + /// The output from extraction. + /// + /// Returning `None` based on the queried item will remove the component from the entity in + /// the render world. This can be used, for example, to conditionally extract camera settings + /// in order to disable a rendering feature on the basis of those settings, without removing + /// the component from the entity in the main world. + /// + /// The output may be different from the queried component. + /// This can be useful for example if only a subset of the fields are useful + /// in the render world. + /// + /// `Out` has a [`Bundle`] trait bound instead of a [`Component`] trait bound in order to allow use cases + /// such as tuples of components as output. + type Out: Bundle; + + // TODO: https://github.com/rust-lang/rust/issues/29661 + // type Out: Component = Self; + + /// Defines how the component is transferred into the "render world". + fn extract_component(item: QueryItem<'_, '_, Self::QueryData>) -> Option; +} + +/// This plugin prepares the components of the corresponding type for the GPU +/// by transforming them into uniforms. +/// +/// They can then be accessed from the [`ComponentUniforms`] resource. +/// For referencing the newly created uniforms a [`DynamicUniformIndex`] is inserted +/// for every processed entity. +/// +/// Therefore it sets up the [`RenderSystems::Prepare`] step +/// for the specified [`ExtractComponent`]. +pub struct UniformComponentPlugin(PhantomData C>); + +impl Default for UniformComponentPlugin { + fn default() -> Self { + Self(PhantomData) + } +} + +impl Plugin for UniformComponentPlugin { + fn build(&self, app: &mut App) { + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + render_app + .insert_resource(ComponentUniforms::::default()) + .add_systems( + Render, + prepare_uniform_components::.in_set(RenderSystems::PrepareResources), + ); + } + } +} + +/// Stores all uniforms of the component type. +#[derive(Resource)] +pub struct ComponentUniforms { + uniforms: DynamicUniformBuffer, +} + +impl Deref for ComponentUniforms { + type Target = DynamicUniformBuffer; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.uniforms + } +} + +impl ComponentUniforms { + #[inline] + pub fn uniforms(&self) -> &DynamicUniformBuffer { + &self.uniforms + } +} + +impl Default for ComponentUniforms { + fn default() -> Self { + Self { + uniforms: Default::default(), + } + } +} + +/// This system prepares all components of the corresponding component type. +/// They are transformed into uniforms and stored in the [`ComponentUniforms`] resource. +fn prepare_uniform_components( + mut commands: Commands, + render_device: Res, + render_queue: Res, + mut component_uniforms: ResMut>, + components: Query<(Entity, &C)>, +) where + C: Component + ShaderType + WriteInto + Clone, +{ + let components_iter = components.iter(); + let count = components_iter.len(); + let Some(mut writer) = + component_uniforms + .uniforms + .get_writer(count, &render_device, &render_queue) + else { + return; + }; + let entities = components_iter + .map(|(entity, component)| { + ( + entity, + DynamicUniformIndex:: { + index: writer.write(component), + marker: PhantomData, + }, + ) + }) + .collect::>(); + commands.try_insert_batch(entities); +} + +/// This plugin extracts the components into the render world for synced entities. +/// +/// To do so, it sets up the [`ExtractSchedule`] step for the specified [`ExtractComponent`]. +pub struct ExtractComponentPlugin { + only_extract_visible: bool, + marker: PhantomData (C, F)>, +} + +impl Default for ExtractComponentPlugin { + fn default() -> Self { + Self { + only_extract_visible: false, + marker: PhantomData, + } + } +} + +impl ExtractComponentPlugin { + pub fn extract_visible() -> Self { + Self { + only_extract_visible: true, + marker: PhantomData, + } + } +} + +impl Plugin for ExtractComponentPlugin { + fn build(&self, app: &mut App) { + app.add_plugins(SyncComponentPlugin::::default()); + + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + if self.only_extract_visible { + render_app.add_systems(ExtractSchedule, extract_visible_components::); + } else { + render_app.add_systems(ExtractSchedule, extract_components::); + } + } + } +} + +/// This system extracts all components of the corresponding [`ExtractComponent`], for entities that are synced via [`crate::sync_world::SyncToRenderWorld`]. +fn extract_components( + mut commands: Commands, + mut previous_len: Local, + query: Extract>, +) { + let mut values = Vec::with_capacity(*previous_len); + for (entity, query_item) in &query { + if let Some(component) = C::extract_component(query_item) { + values.push((entity, component)); + } else { + commands.entity(entity).remove::(); + } + } + *previous_len = values.len(); + commands.try_insert_batch(values); +} + +/// This system extracts all components of the corresponding [`ExtractComponent`], for entities that are visible and synced via [`crate::sync_world::SyncToRenderWorld`]. +fn extract_visible_components( + mut commands: Commands, + mut previous_len: Local, + query: Extract>, +) { + let mut values = Vec::with_capacity(*previous_len); + for (entity, view_visibility, query_item) in &query { + if view_visibility.get() { + if let Some(component) = C::extract_component(query_item) { + values.push((entity, component)); + } else { + commands.entity(entity).remove::(); + } + } + } + *previous_len = values.len(); + commands.try_insert_batch(values); +} diff --git a/crates/libmarathon/src/render/extract_instances.rs b/crates/libmarathon/src/render/extract_instances.rs new file mode 100644 index 0000000..d3c0b2c --- /dev/null +++ b/crates/libmarathon/src/render/extract_instances.rs @@ -0,0 +1,137 @@ +//! Convenience logic for turning components from the main world into extracted +//! instances in the render world. +//! +//! This is essentially the same as the `extract_component` module, but +//! higher-performance because it avoids the ECS overhead. + +use core::marker::PhantomData; + +use bevy_app::{App, Plugin}; +use bevy_camera::visibility::ViewVisibility; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::{ + prelude::Entity, + query::{QueryFilter, QueryItem, ReadOnlyQueryData}, + resource::Resource, + system::{Query, ResMut}, +}; + +use crate::render::sync_world::MainEntityHashMap; +use crate::render::{Extract, ExtractSchedule, RenderApp}; + +/// Describes how to extract data needed for rendering from a component or +/// components. +/// +/// Before rendering, any applicable components will be transferred from the +/// main world to the render world in the [`ExtractSchedule`] step. +/// +/// This is essentially the same as +/// [`ExtractComponent`](crate::extract_component::ExtractComponent), but +/// higher-performance because it avoids the ECS overhead. +pub trait ExtractInstance: Send + Sync + Sized + 'static { + /// ECS [`ReadOnlyQueryData`] to fetch the components to extract. + type QueryData: ReadOnlyQueryData; + /// Filters the entities with additional constraints. + type QueryFilter: QueryFilter; + + /// Defines how the component is transferred into the "render world". + fn extract(item: QueryItem<'_, '_, Self::QueryData>) -> Option; +} + +/// This plugin extracts one or more components into the "render world" as +/// extracted instances. +/// +/// Therefore it sets up the [`ExtractSchedule`] step for the specified +/// [`ExtractedInstances`]. +#[derive(Default)] +pub struct ExtractInstancesPlugin +where + EI: ExtractInstance, +{ + only_extract_visible: bool, + marker: PhantomData EI>, +} + +/// Stores all extract instances of a type in the render world. +#[derive(Resource, Deref, DerefMut)] +pub struct ExtractedInstances(MainEntityHashMap) +where + EI: ExtractInstance; + +impl Default for ExtractedInstances +where + EI: ExtractInstance, +{ + fn default() -> Self { + Self(Default::default()) + } +} + +impl ExtractInstancesPlugin +where + EI: ExtractInstance, +{ + /// Creates a new [`ExtractInstancesPlugin`] that unconditionally extracts to + /// the render world, whether the entity is visible or not. + pub fn new() -> Self { + Self { + only_extract_visible: false, + marker: PhantomData, + } + } + + /// Creates a new [`ExtractInstancesPlugin`] that extracts to the render world + /// if and only if the entity it's attached to is visible. + pub fn extract_visible() -> Self { + Self { + only_extract_visible: true, + marker: PhantomData, + } + } +} + +impl Plugin for ExtractInstancesPlugin +where + EI: ExtractInstance, +{ + fn build(&self, app: &mut App) { + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + render_app.init_resource::>(); + if self.only_extract_visible { + render_app.add_systems(ExtractSchedule, extract_visible::); + } else { + render_app.add_systems(ExtractSchedule, extract_all::); + } + } + } +} + +fn extract_all( + mut extracted_instances: ResMut>, + query: Extract>, +) where + EI: ExtractInstance, +{ + extracted_instances.clear(); + for (entity, other) in &query { + if let Some(extract_instance) = EI::extract(other) { + extracted_instances.insert(entity.into(), extract_instance); + } + } +} + +fn extract_visible( + mut extracted_instances: ResMut>, + query: Extract>, +) where + EI: ExtractInstance, +{ + extracted_instances.clear(); + for (entity, view_visibility, other) in &query { + if view_visibility.get() + && let Some(extract_instance) = EI::extract(other) + { + extracted_instances.insert(entity.into(), extract_instance); + } + } +} diff --git a/crates/libmarathon/src/render/extract_param.rs b/crates/libmarathon/src/render/extract_param.rs new file mode 100644 index 0000000..c578406 --- /dev/null +++ b/crates/libmarathon/src/render/extract_param.rs @@ -0,0 +1,177 @@ +use crate::render::MainWorld; +use bevy_ecs::{ + component::Tick, + prelude::*, + query::FilteredAccessSet, + system::{ + ReadOnlySystemParam, SystemMeta, SystemParam, SystemParamItem, SystemParamValidationError, + SystemState, + }, + world::unsafe_world_cell::UnsafeWorldCell, +}; +use core::ops::{Deref, DerefMut}; + +/// A helper for accessing [`MainWorld`] content using a system parameter. +/// +/// A [`SystemParam`] adapter which applies the contained `SystemParam` to the [`World`] +/// contained in [`MainWorld`]. This parameter only works for systems run +/// during the [`ExtractSchedule`](crate::ExtractSchedule). +/// +/// This requires that the contained [`SystemParam`] does not mutate the world, as it +/// uses a read-only reference to [`MainWorld`] internally. +/// +/// ## Context +/// +/// [`ExtractSchedule`] is used to extract (move) data from the simulation world ([`MainWorld`]) to the +/// render world. The render world drives rendering each frame (generally to a `Window`). +/// This design is used to allow performing calculations related to rendering a prior frame at the same +/// time as the next frame is simulated, which increases throughput (FPS). +/// +/// [`Extract`] is used to get data from the main world during [`ExtractSchedule`]. +/// +/// ## Examples +/// +/// ``` +/// use bevy_ecs::prelude::*; +/// use crate::render::Extract; +/// use crate::render::sync_world::RenderEntity; +/// # #[derive(Component)] +/// // Do make sure to sync the cloud entities before extracting them. +/// # struct Cloud; +/// fn extract_clouds(mut commands: Commands, clouds: Extract>>) { +/// for cloud in &clouds { +/// commands.entity(cloud).insert(Cloud); +/// } +/// } +/// ``` +/// +/// [`ExtractSchedule`]: crate::ExtractSchedule +/// [Window]: bevy_window::Window +pub struct Extract<'w, 's, P> +where + P: ReadOnlySystemParam + 'static, +{ + item: SystemParamItem<'w, 's, P>, +} + +#[doc(hidden)] +pub struct ExtractState { + state: SystemState

, + main_world_state: as SystemParam>::State, +} + +// SAFETY: The only `World` access (`Res`) is read-only. +unsafe impl

ReadOnlySystemParam for Extract<'_, '_, P> where P: ReadOnlySystemParam {} + +// SAFETY: The only `World` access is properly registered by `Res::init_state`. +// This call will also ensure that there are no conflicts with prior params. +unsafe impl

SystemParam for Extract<'_, '_, P> +where + P: ReadOnlySystemParam, +{ + type State = ExtractState

; + type Item<'w, 's> = Extract<'w, 's, P>; + + fn init_state(world: &mut World) -> Self::State { + let mut main_world = world.resource_mut::(); + ExtractState { + state: SystemState::new(&mut main_world), + main_world_state: Res::::init_state(world), + } + } + + fn init_access( + state: &Self::State, + system_meta: &mut SystemMeta, + component_access_set: &mut FilteredAccessSet, + world: &mut World, + ) { + Res::::init_access( + &state.main_world_state, + system_meta, + component_access_set, + world, + ); + } + + #[inline] + unsafe fn validate_param( + state: &mut Self::State, + _system_meta: &SystemMeta, + world: UnsafeWorldCell, + ) -> Result<(), SystemParamValidationError> { + // SAFETY: Read-only access to world data registered in `init_state`. + let result = unsafe { world.get_resource_by_id(state.main_world_state) }; + let Some(main_world) = result else { + return Err(SystemParamValidationError::invalid::( + "`MainWorld` resource does not exist", + )); + }; + // SAFETY: Type is guaranteed by `SystemState`. + let main_world: &World = unsafe { main_world.deref() }; + // SAFETY: We provide the main world on which this system state was initialized on. + unsafe { + SystemState::

::validate_param( + &mut state.state, + main_world.as_unsafe_world_cell_readonly(), + ) + } + } + + #[inline] + unsafe fn get_param<'w, 's>( + state: &'s mut Self::State, + system_meta: &SystemMeta, + world: UnsafeWorldCell<'w>, + change_tick: Tick, + ) -> Self::Item<'w, 's> { + // SAFETY: + // - The caller ensures that `world` is the same one that `init_state` was called with. + // - The caller ensures that no other `SystemParam`s will conflict with the accesses we have registered. + let main_world = unsafe { + Res::::get_param( + &mut state.main_world_state, + system_meta, + world, + change_tick, + ) + }; + let item = state.state.get(main_world.into_inner()); + Extract { item } + } +} + +impl<'w, 's, P> Deref for Extract<'w, 's, P> +where + P: ReadOnlySystemParam, +{ + type Target = SystemParamItem<'w, 's, P>; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.item + } +} + +impl<'w, 's, P> DerefMut for Extract<'w, 's, P> +where + P: ReadOnlySystemParam, +{ + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.item + } +} + +impl<'a, 'w, 's, P> IntoIterator for &'a Extract<'w, 's, P> +where + P: ReadOnlySystemParam, + &'a SystemParamItem<'w, 's, P>: IntoIterator, +{ + type Item = <&'a SystemParamItem<'w, 's, P> as IntoIterator>::Item; + type IntoIter = <&'a SystemParamItem<'w, 's, P> as IntoIterator>::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + (&self.item).into_iter() + } +} diff --git a/crates/libmarathon/src/render/extract_resource.rs b/crates/libmarathon/src/render/extract_resource.rs new file mode 100644 index 0000000..d1be4a9 --- /dev/null +++ b/crates/libmarathon/src/render/extract_resource.rs @@ -0,0 +1,70 @@ +use core::marker::PhantomData; + +use bevy_app::{App, Plugin}; +use bevy_ecs::prelude::*; +pub use macros::ExtractResource; +use bevy_utils::once; + +use crate::render::{Extract, ExtractSchedule, RenderApp}; + +/// Describes how a resource gets extracted for rendering. +/// +/// Therefore the resource is transferred from the "main world" into the "render world" +/// in the [`ExtractSchedule`] step. +pub trait ExtractResource: Resource { + type Source: Resource; + + /// Defines how the resource is transferred into the "render world". + fn extract_resource(source: &Self::Source) -> Self; +} + +/// This plugin extracts the resources into the "render world". +/// +/// Therefore it sets up the[`ExtractSchedule`] step +/// for the specified [`Resource`]. +pub struct ExtractResourcePlugin(PhantomData); + +impl Default for ExtractResourcePlugin { + fn default() -> Self { + Self(PhantomData) + } +} + +impl Plugin for ExtractResourcePlugin { + fn build(&self, app: &mut App) { + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + render_app.add_systems(ExtractSchedule, extract_resource::); + } else { + once!(tracing::error!( + "Render app did not exist when trying to add `extract_resource` for <{}>.", + core::any::type_name::() + )); + } + } +} + +/// This system extracts the resource of the corresponding [`Resource`] type +pub fn extract_resource( + mut commands: Commands, + main_resource: Extract>>, + target_resource: Option>, +) { + if let Some(main_resource) = main_resource.as_ref() { + if let Some(mut target_resource) = target_resource { + if main_resource.is_changed() { + *target_resource = R::extract_resource(main_resource); + } + } else { + #[cfg(debug_assertions)] + if !main_resource.is_added() { + once!(tracing::warn!( + "Removing resource {} from render world not expected, adding using `Commands`. + This may decrease performance", + core::any::type_name::() + )); + } + + commands.insert_resource(R::extract_resource(main_resource)); + } + } +} diff --git a/crates/libmarathon/src/render/fullscreen_vertex_shader/fullscreen.wgsl b/crates/libmarathon/src/render/fullscreen_vertex_shader/fullscreen.wgsl new file mode 100644 index 0000000..04c3c49 --- /dev/null +++ b/crates/libmarathon/src/render/fullscreen_vertex_shader/fullscreen.wgsl @@ -0,0 +1,34 @@ +#define_import_path bevy_core_pipeline::fullscreen_vertex_shader + +struct FullscreenVertexOutput { + @builtin(position) + position: vec4, + @location(0) + uv: vec2, +}; + +// This vertex shader produces the following, when drawn using indices 0..3: +// +// 1 | 0-----x.....2 +// 0 | | s | . ´ +// -1 | x_____x´ +// -2 | : .´ +// -3 | 1´ +// +--------------- +// -1 0 1 2 3 +// +// The axes are clip-space x and y. The region marked s is the visible region. +// The digits in the corners of the right-angled triangle are the vertex +// indices. +// +// The top-left has UV 0,0, the bottom-left has 0,2, and the top-right has 2,0. +// This means that the UV gets interpolated to 1,1 at the bottom-right corner +// of the clip-space rectangle that is at 1,-1 in clip space. +@vertex +fn fullscreen_vertex_shader(@builtin(vertex_index) vertex_index: u32) -> FullscreenVertexOutput { + // See the explanation above for how this works + let uv = vec2(f32(vertex_index >> 1u), f32(vertex_index & 1u)) * 2.0; + let clip_position = vec4(uv * vec2(2.0, -2.0) + vec2(-1.0, 1.0), 0.0, 1.0); + + return FullscreenVertexOutput(clip_position, uv); +} diff --git a/crates/libmarathon/src/render/fullscreen_vertex_shader/mod.rs b/crates/libmarathon/src/render/fullscreen_vertex_shader/mod.rs new file mode 100644 index 0000000..d3f8435 --- /dev/null +++ b/crates/libmarathon/src/render/fullscreen_vertex_shader/mod.rs @@ -0,0 +1,41 @@ +use bevy_asset::{load_embedded_asset, Handle}; +use bevy_ecs::{resource::Resource, world::FromWorld}; +use crate::render::render_resource::VertexState; +use bevy_shader::Shader; + +/// A shader that renders to the whole screen. Useful for post-processing. +#[derive(Resource, Clone)] +pub struct FullscreenShader(Handle); + +impl FromWorld for FullscreenShader { + fn from_world(world: &mut bevy_ecs::world::World) -> Self { + Self(load_embedded_asset!(world, "fullscreen.wgsl")) + } +} + +impl FullscreenShader { + /// Gets the raw shader handle. + pub fn shader(&self) -> Handle { + self.0.clone() + } + + /// Creates a [`VertexState`] that uses the [`FullscreenShader`] to output a + /// ```wgsl + /// struct FullscreenVertexOutput { + /// @builtin(position) + /// position: vec4; + /// @location(0) + /// uv: vec2; + /// }; + /// ``` + /// from the vertex shader. + /// The draw call should render one triangle: `render_pass.draw(0..3, 0..1);` + pub fn to_vertex_state(&self) -> VertexState { + VertexState { + shader: self.0.clone(), + shader_defs: Vec::new(), + entry_point: Some("fullscreen_vertex_shader".into()), + buffers: Vec::new(), + } + } +} diff --git a/crates/libmarathon/src/render/globals.rs b/crates/libmarathon/src/render/globals.rs new file mode 100644 index 0000000..1489f6f --- /dev/null +++ b/crates/libmarathon/src/render/globals.rs @@ -0,0 +1,79 @@ +use crate::render::{ + extract_resource::ExtractResource, + render_resource::{ShaderType, UniformBuffer}, + renderer::{RenderDevice, RenderQueue}, + Extract, ExtractSchedule, Render, RenderApp, RenderSystems, +}; +use bevy_app::{App, Plugin}; +use bevy_diagnostic::FrameCount; +use bevy_ecs::prelude::*; +use bevy_reflect::prelude::*; +use bevy_shader::load_shader_library; +use bevy_time::Time; + +pub struct GlobalsPlugin; + +impl Plugin for GlobalsPlugin { + fn build(&self, app: &mut App) { + load_shader_library!(app, "globals.wgsl"); + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + render_app + .init_resource::() + .init_resource::

for SetMaterialBindGroup { + type Param = ( + SRes>, + SRes, + SRes, + ); + type ViewQuery = (); + type ItemQuery = (); + + #[inline] + fn render<'w>( + item: &P, + _view: (), + _item_query: Option<()>, + (materials, material_instances, material_bind_group_allocator): SystemParamItem< + 'w, + '_, + Self::Param, + >, + pass: &mut TrackedRenderPass<'w>, + ) -> RenderCommandResult { + let materials = materials.into_inner(); + let material_instances = material_instances.into_inner(); + let material_bind_group_allocators = material_bind_group_allocator.into_inner(); + + let Some(material_instance) = material_instances.instances.get(&item.main_entity()) else { + return RenderCommandResult::Skip; + }; + let Some(material_bind_group_allocator) = + material_bind_group_allocators.get(&material_instance.asset_id.type_id()) + else { + return RenderCommandResult::Skip; + }; + let Some(material) = materials.get(material_instance.asset_id) else { + return RenderCommandResult::Skip; + }; + let Some(material_bind_group) = material_bind_group_allocator.get(material.binding.group) + else { + return RenderCommandResult::Skip; + }; + let Some(bind_group) = material_bind_group.bind_group() else { + return RenderCommandResult::Skip; + }; + pass.set_bind_group(I, bind_group, &[]); + RenderCommandResult::Success + } +} + +/// Stores all extracted instances of all [`Material`]s in the render world. +#[derive(Resource, Default)] +pub struct RenderMaterialInstances { + /// Maps from each entity in the main world to the + /// [`RenderMaterialInstance`] associated with it. + pub instances: MainEntityHashMap, + /// A monotonically-increasing counter, which we use to sweep + /// [`RenderMaterialInstances::instances`] when the entities and/or required + /// components are removed. + pub current_change_tick: Tick, +} + +impl RenderMaterialInstances { + /// Returns the mesh material ID for the entity with the given mesh, or a + /// dummy mesh material ID if the mesh has no material ID. + /// + /// Meshes almost always have materials, but in very specific circumstances + /// involving custom pipelines they won't. (See the + /// `specialized_mesh_pipelines` example.) + pub(crate) fn mesh_material(&self, entity: MainEntity) -> UntypedAssetId { + match self.instances.get(&entity) { + Some(render_instance) => render_instance.asset_id, + None => DUMMY_MESH_MATERIAL.into(), + } + } +} + +/// The material associated with a single mesh instance in the main world. +/// +/// Note that this uses an [`UntypedAssetId`] and isn't generic over the +/// material type, for simplicity. +pub struct RenderMaterialInstance { + /// The material asset. + pub asset_id: UntypedAssetId, + /// The [`RenderMaterialInstances::current_change_tick`] at which this + /// material instance was last modified. + pub last_change_tick: Tick, +} + +/// A [`SystemSet`] that contains all `extract_mesh_materials` systems. +#[derive(SystemSet, Clone, PartialEq, Eq, Debug, Hash)] +pub struct MaterialExtractionSystems; + +/// Deprecated alias for [`MaterialExtractionSystems`]. +#[deprecated(since = "0.17.0", note = "Renamed to `MaterialExtractionSystems`.")] +pub type ExtractMaterialsSet = MaterialExtractionSystems; + +pub const fn alpha_mode_pipeline_key(alpha_mode: AlphaMode, msaa: &Msaa) -> MeshPipelineKey { + match alpha_mode { + // Premultiplied and Add share the same pipeline key + // They're made distinct in the PBR shader, via `premultiply_alpha()` + AlphaMode::Premultiplied | AlphaMode::Add => MeshPipelineKey::BLEND_PREMULTIPLIED_ALPHA, + AlphaMode::Blend => MeshPipelineKey::BLEND_ALPHA, + AlphaMode::Multiply => MeshPipelineKey::BLEND_MULTIPLY, + AlphaMode::Mask(_) => MeshPipelineKey::MAY_DISCARD, + AlphaMode::AlphaToCoverage => match *msaa { + Msaa::Off => MeshPipelineKey::MAY_DISCARD, + _ => MeshPipelineKey::BLEND_ALPHA_TO_COVERAGE, + }, + _ => MeshPipelineKey::NONE, + } +} + +pub const fn tonemapping_pipeline_key(tonemapping: Tonemapping) -> MeshPipelineKey { + match tonemapping { + Tonemapping::None => MeshPipelineKey::TONEMAP_METHOD_NONE, + Tonemapping::Reinhard => MeshPipelineKey::TONEMAP_METHOD_REINHARD, + Tonemapping::ReinhardLuminance => MeshPipelineKey::TONEMAP_METHOD_REINHARD_LUMINANCE, + Tonemapping::AcesFitted => MeshPipelineKey::TONEMAP_METHOD_ACES_FITTED, + Tonemapping::AgX => MeshPipelineKey::TONEMAP_METHOD_AGX, + Tonemapping::SomewhatBoringDisplayTransform => { + MeshPipelineKey::TONEMAP_METHOD_SOMEWHAT_BORING_DISPLAY_TRANSFORM + } + Tonemapping::TonyMcMapface => MeshPipelineKey::TONEMAP_METHOD_TONY_MC_MAPFACE, + Tonemapping::BlenderFilmic => MeshPipelineKey::TONEMAP_METHOD_BLENDER_FILMIC, + } +} + +pub const fn screen_space_specular_transmission_pipeline_key( + screen_space_transmissive_blur_quality: ScreenSpaceTransmissionQuality, +) -> MeshPipelineKey { + match screen_space_transmissive_blur_quality { + ScreenSpaceTransmissionQuality::Low => { + MeshPipelineKey::SCREEN_SPACE_SPECULAR_TRANSMISSION_LOW + } + ScreenSpaceTransmissionQuality::Medium => { + MeshPipelineKey::SCREEN_SPACE_SPECULAR_TRANSMISSION_MEDIUM + } + ScreenSpaceTransmissionQuality::High => { + MeshPipelineKey::SCREEN_SPACE_SPECULAR_TRANSMISSION_HIGH + } + ScreenSpaceTransmissionQuality::Ultra => { + MeshPipelineKey::SCREEN_SPACE_SPECULAR_TRANSMISSION_ULTRA + } + } +} + +/// A system that ensures that +/// [`crate::render::mesh::extract_meshes_for_gpu_building`] re-extracts meshes +/// whose materials changed. +/// +/// As [`crate::render::mesh::collect_meshes_for_gpu_building`] only considers +/// meshes that were newly extracted, and it writes information from the +/// [`RenderMaterialInstances`] into the +/// [`crate::render::mesh::MeshInputUniform`], we must tell +/// [`crate::render::mesh::extract_meshes_for_gpu_building`] to re-extract a +/// mesh if its material changed. Otherwise, the material binding information in +/// the [`crate::render::mesh::MeshInputUniform`] might not be updated properly. +/// The easiest way to ensure that +/// [`crate::render::mesh::extract_meshes_for_gpu_building`] re-extracts a mesh +/// is to mark its [`Mesh3d`] as changed, so that's what this system does. +fn mark_meshes_as_changed_if_their_materials_changed( + mut changed_meshes_query: Query< + &mut Mesh3d, + Or<(Changed>, AssetChanged>)>, + >, +) where + M: Material, +{ + for mut mesh in &mut changed_meshes_query { + mesh.set_changed(); + } +} + +/// Fills the [`RenderMaterialInstances`] resources from the meshes in the +/// scene. +fn extract_mesh_materials( + mut material_instances: ResMut, + changed_meshes_query: Extract< + Query< + (Entity, &ViewVisibility, &MeshMaterial3d), + Or<(Changed, Changed>)>, + >, + >, +) { + let last_change_tick = material_instances.current_change_tick; + + for (entity, view_visibility, material) in &changed_meshes_query { + if view_visibility.get() { + material_instances.instances.insert( + entity.into(), + RenderMaterialInstance { + asset_id: material.id().untyped(), + last_change_tick, + }, + ); + } else { + material_instances + .instances + .remove(&MainEntity::from(entity)); + } + } +} + +/// Removes mesh materials from [`RenderMaterialInstances`] when their +/// [`MeshMaterial3d`] components are removed. +/// +/// This is tricky because we have to deal with the case in which a material of +/// type A was removed and replaced with a material of type B in the same frame +/// (which is actually somewhat common of an operation). In this case, even +/// though an entry will be present in `RemovedComponents>`, +/// we must not remove the entry in `RenderMaterialInstances` which corresponds +/// to material B. To handle this case, we use change ticks to avoid removing +/// the entry if it was updated this frame. +/// +/// This is the first of two sweep phases. Because this phase runs once per +/// material type, we need a second phase in order to guarantee that we only +/// bump [`RenderMaterialInstances::current_change_tick`] once. +fn early_sweep_material_instances( + mut material_instances: ResMut, + mut removed_materials_query: Extract>>, +) where + M: Material, +{ + let last_change_tick = material_instances.current_change_tick; + + for entity in removed_materials_query.read() { + if let Entry::Occupied(occupied_entry) = material_instances.instances.entry(entity.into()) { + // Only sweep the entry if it wasn't updated this frame. + if occupied_entry.get().last_change_tick != last_change_tick { + occupied_entry.remove(); + } + } + } +} + +/// Removes mesh materials from [`RenderMaterialInstances`] when their +/// [`ViewVisibility`] components are removed. +/// +/// This runs after all invocations of [`early_sweep_material_instances`] and is +/// responsible for bumping [`RenderMaterialInstances::current_change_tick`] in +/// preparation for a new frame. +pub(crate) fn late_sweep_material_instances( + mut material_instances: ResMut, + mut removed_meshes_query: Extract>, +) { + let last_change_tick = material_instances.current_change_tick; + + for entity in removed_meshes_query.read() { + if let Entry::Occupied(occupied_entry) = material_instances.instances.entry(entity.into()) { + // Only sweep the entry if it wasn't updated this frame. It's + // possible that a `ViewVisibility` component was removed and + // re-added in the same frame. + if occupied_entry.get().last_change_tick != last_change_tick { + occupied_entry.remove(); + } + } + } + + material_instances + .current_change_tick + .set(last_change_tick.get() + 1); +} + +pub fn extract_entities_needs_specialization( + entities_needing_specialization: Extract>>, + material_instances: Res, + mut entity_specialization_ticks: ResMut, + mut removed_mesh_material_components: Extract>>, + mut specialized_material_pipeline_cache: ResMut, + mut specialized_prepass_material_pipeline_cache: Option< + ResMut, + >, + mut specialized_shadow_material_pipeline_cache: Option< + ResMut, + >, + views: Query<&ExtractedView>, + ticks: SystemChangeTick, +) where + M: Material, +{ + // Clean up any despawned entities, we do this first in case the removed material was re-added + // the same frame, thus will appear both in the removed components list and have been added to + // the `EntitiesNeedingSpecialization` collection by triggering the `Changed` filter + // + // Additionally, we need to make sure that we are careful about materials that could have changed + // type, e.g. from a `StandardMaterial` to a `CustomMaterial`, as this will also appear in the + // removed components list. As such, we make sure that this system runs after `MaterialExtractionSystems` + // so that the `RenderMaterialInstances` bookkeeping has already been done, and we can check if the entity + // still has a valid material instance. + for entity in removed_mesh_material_components.read() { + if material_instances + .instances + .contains_key(&MainEntity::from(entity)) + { + continue; + } + + entity_specialization_ticks.remove(&MainEntity::from(entity)); + for view in views { + if let Some(cache) = + specialized_material_pipeline_cache.get_mut(&view.retained_view_entity) + { + cache.remove(&MainEntity::from(entity)); + } + if let Some(cache) = specialized_prepass_material_pipeline_cache + .as_mut() + .and_then(|c| c.get_mut(&view.retained_view_entity)) + { + cache.remove(&MainEntity::from(entity)); + } + if let Some(cache) = specialized_shadow_material_pipeline_cache + .as_mut() + .and_then(|c| c.get_mut(&view.retained_view_entity)) + { + cache.remove(&MainEntity::from(entity)); + } + } + } + + for entity in entities_needing_specialization.iter() { + // Update the entity's specialization tick with this run's tick + entity_specialization_ticks.insert((*entity).into(), ticks.this_run()); + } +} + +#[derive(Resource, Deref, DerefMut, Clone, Debug)] +pub struct EntitiesNeedingSpecialization { + #[deref] + pub entities: Vec, + _marker: PhantomData, +} + +impl Default for EntitiesNeedingSpecialization { + fn default() -> Self { + Self { + entities: Default::default(), + _marker: Default::default(), + } + } +} + +#[derive(Resource, Deref, DerefMut, Default, Clone, Debug)] +pub struct EntitySpecializationTicks { + #[deref] + pub entities: MainEntityHashMap, +} + +/// Stores the [`SpecializedMaterialViewPipelineCache`] for each view. +#[derive(Resource, Deref, DerefMut, Default)] +pub struct SpecializedMaterialPipelineCache { + // view entity -> view pipeline cache + #[deref] + map: HashMap, +} + +/// Stores the cached render pipeline ID for each entity in a single view, as +/// well as the last time it was changed. +#[derive(Deref, DerefMut, Default)] +pub struct SpecializedMaterialViewPipelineCache { + // material entity -> (tick, pipeline_id) + #[deref] + map: MainEntityHashMap<(Tick, CachedRenderPipelineId)>, +} + +pub fn check_entities_needing_specialization( + needs_specialization: Query< + Entity, + ( + Or<( + Changed, + AssetChanged, + Changed>, + AssetChanged>, + )>, + With>, + ), + >, + mut par_local: Local>>, + mut entities_needing_specialization: ResMut>, +) where + M: Material, +{ + entities_needing_specialization.clear(); + + needs_specialization + .par_iter() + .for_each(|entity| par_local.borrow_local_mut().push(entity)); + + par_local.drain_into(&mut entities_needing_specialization); +} + +pub fn specialize_material_meshes( + render_meshes: Res>, + render_materials: Res>, + render_mesh_instances: Res, + render_material_instances: Res, + render_lightmaps: Res, + render_visibility_ranges: Res, + ( + opaque_render_phases, + alpha_mask_render_phases, + transmissive_render_phases, + transparent_render_phases, + ): ( + Res>, + Res>, + Res>, + Res>, + ), + views: Query<(&ExtractedView, &RenderVisibleEntities)>, + view_key_cache: Res, + entity_specialization_ticks: Res, + view_specialization_ticks: Res, + mut specialized_material_pipeline_cache: ResMut, + mut pipelines: ResMut>, + pipeline: Res, + pipeline_cache: Res, + ticks: SystemChangeTick, +) { + // Record the retained IDs of all shadow views so that we can expire old + // pipeline IDs. + let mut all_views: HashSet = HashSet::default(); + + for (view, visible_entities) in &views { + all_views.insert(view.retained_view_entity); + + if !transparent_render_phases.contains_key(&view.retained_view_entity) + && !opaque_render_phases.contains_key(&view.retained_view_entity) + && !alpha_mask_render_phases.contains_key(&view.retained_view_entity) + && !transmissive_render_phases.contains_key(&view.retained_view_entity) + { + continue; + } + + let Some(view_key) = view_key_cache.get(&view.retained_view_entity) else { + continue; + }; + + let view_tick = view_specialization_ticks + .get(&view.retained_view_entity) + .unwrap(); + let view_specialized_material_pipeline_cache = specialized_material_pipeline_cache + .entry(view.retained_view_entity) + .or_default(); + + for (_, visible_entity) in visible_entities.iter::() { + let Some(material_instance) = render_material_instances.instances.get(visible_entity) + else { + continue; + }; + let Some(mesh_instance) = render_mesh_instances.render_mesh_queue_data(*visible_entity) + else { + continue; + }; + let entity_tick = entity_specialization_ticks.get(visible_entity).unwrap(); + let last_specialized_tick = view_specialized_material_pipeline_cache + .get(visible_entity) + .map(|(tick, _)| *tick); + let needs_specialization = last_specialized_tick.is_none_or(|tick| { + view_tick.is_newer_than(tick, ticks.this_run()) + || entity_tick.is_newer_than(tick, ticks.this_run()) + }); + if !needs_specialization { + continue; + } + let Some(mesh) = render_meshes.get(mesh_instance.mesh_asset_id) else { + continue; + }; + let Some(material) = render_materials.get(material_instance.asset_id) else { + continue; + }; + + let mut mesh_pipeline_key_bits = material.properties.mesh_pipeline_key_bits; + mesh_pipeline_key_bits.insert(alpha_mode_pipeline_key( + material.properties.alpha_mode, + &Msaa::from_samples(view_key.msaa_samples()), + )); + let mut mesh_key = *view_key + | MeshPipelineKey::from_bits_retain(mesh.key_bits.bits()) + | mesh_pipeline_key_bits; + + if let Some(lightmap) = render_lightmaps.render_lightmaps.get(visible_entity) { + mesh_key |= MeshPipelineKey::LIGHTMAPPED; + + if lightmap.bicubic_sampling { + mesh_key |= MeshPipelineKey::LIGHTMAP_BICUBIC_SAMPLING; + } + } + + if render_visibility_ranges.entity_has_crossfading_visibility_ranges(*visible_entity) { + mesh_key |= MeshPipelineKey::VISIBILITY_RANGE_DITHER; + } + + if view_key.contains(MeshPipelineKey::MOTION_VECTOR_PREPASS) { + // If the previous frame have skins or morph targets, note that. + if mesh_instance + .flags + .contains(RenderMeshInstanceFlags::HAS_PREVIOUS_SKIN) + { + mesh_key |= MeshPipelineKey::HAS_PREVIOUS_SKIN; + } + if mesh_instance + .flags + .contains(RenderMeshInstanceFlags::HAS_PREVIOUS_MORPH) + { + mesh_key |= MeshPipelineKey::HAS_PREVIOUS_MORPH; + } + } + + let erased_key = ErasedMaterialPipelineKey { + type_id: material_instance.asset_id.type_id(), + mesh_key, + material_key: material.properties.material_key.clone(), + }; + let material_pipeline_specializer = MaterialPipelineSpecializer { + pipeline: pipeline.clone(), + properties: material.properties.clone(), + }; + let pipeline_id = pipelines.specialize( + &pipeline_cache, + &material_pipeline_specializer, + erased_key, + &mesh.layout, + ); + let pipeline_id = match pipeline_id { + Ok(id) => id, + Err(err) => { + error!("{}", err); + continue; + } + }; + + view_specialized_material_pipeline_cache + .insert(*visible_entity, (ticks.this_run(), pipeline_id)); + } + } + + // Delete specialized pipelines belonging to views that have expired. + specialized_material_pipeline_cache + .retain(|retained_view_entity, _| all_views.contains(retained_view_entity)); +} + +/// For each view, iterates over all the meshes visible from that view and adds +/// them to [`BinnedRenderPhase`]s or [`SortedRenderPhase`]s as appropriate. +pub fn queue_material_meshes( + render_materials: Res>, + render_mesh_instances: Res, + render_material_instances: Res, + mesh_allocator: Res, + gpu_preprocessing_support: Res, + mut opaque_render_phases: ResMut>, + mut alpha_mask_render_phases: ResMut>, + mut transmissive_render_phases: ResMut>, + mut transparent_render_phases: ResMut>, + views: Query<(&ExtractedView, &RenderVisibleEntities)>, + specialized_material_pipeline_cache: ResMut, +) { + for (view, visible_entities) in &views { + let ( + Some(opaque_phase), + Some(alpha_mask_phase), + Some(transmissive_phase), + Some(transparent_phase), + ) = ( + opaque_render_phases.get_mut(&view.retained_view_entity), + alpha_mask_render_phases.get_mut(&view.retained_view_entity), + transmissive_render_phases.get_mut(&view.retained_view_entity), + transparent_render_phases.get_mut(&view.retained_view_entity), + ) + else { + continue; + }; + + let Some(view_specialized_material_pipeline_cache) = + specialized_material_pipeline_cache.get(&view.retained_view_entity) + else { + continue; + }; + + let rangefinder = view.rangefinder3d(); + for (render_entity, visible_entity) in visible_entities.iter::() { + let Some((current_change_tick, pipeline_id)) = view_specialized_material_pipeline_cache + .get(visible_entity) + .map(|(current_change_tick, pipeline_id)| (*current_change_tick, *pipeline_id)) + else { + continue; + }; + + // Skip the entity if it's cached in a bin and up to date. + if opaque_phase.validate_cached_entity(*visible_entity, current_change_tick) + || alpha_mask_phase.validate_cached_entity(*visible_entity, current_change_tick) + { + continue; + } + + let Some(material_instance) = render_material_instances.instances.get(visible_entity) + else { + continue; + }; + let Some(mesh_instance) = render_mesh_instances.render_mesh_queue_data(*visible_entity) + else { + continue; + }; + let Some(material) = render_materials.get(material_instance.asset_id) else { + continue; + }; + + // Fetch the slabs that this mesh resides in. + let (vertex_slab, index_slab) = mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id); + let Some(draw_function) = material.properties.get_draw_function(MaterialDrawFunction) + else { + continue; + }; + + match material.properties.render_phase_type { + RenderPhaseType::Transmissive => { + let distance = rangefinder.distance_translation(&mesh_instance.translation) + + material.properties.depth_bias; + transmissive_phase.add(Transmissive3d { + entity: (*render_entity, *visible_entity), + draw_function, + pipeline: pipeline_id, + distance, + batch_range: 0..1, + extra_index: PhaseItemExtraIndex::None, + indexed: index_slab.is_some(), + }); + } + RenderPhaseType::Opaque => { + if material.properties.render_method == OpaqueRendererMethod::Deferred { + // Even though we aren't going to insert the entity into + // a bin, we still want to update its cache entry. That + // way, we know we don't need to re-examine it in future + // frames. + opaque_phase.update_cache(*visible_entity, None, current_change_tick); + continue; + } + let batch_set_key = Opaque3dBatchSetKey { + pipeline: pipeline_id, + draw_function, + material_bind_group_index: Some(material.binding.group.0), + vertex_slab: vertex_slab.unwrap_or_default(), + index_slab, + lightmap_slab: mesh_instance.shared.lightmap_slab_index.map(|index| *index), + }; + let bin_key = Opaque3dBinKey { + asset_id: mesh_instance.mesh_asset_id.into(), + }; + opaque_phase.add( + batch_set_key, + bin_key, + (*render_entity, *visible_entity), + mesh_instance.current_uniform_index, + BinnedRenderPhaseType::mesh( + mesh_instance.should_batch(), + &gpu_preprocessing_support, + ), + current_change_tick, + ); + } + // Alpha mask + RenderPhaseType::AlphaMask => { + let batch_set_key = OpaqueNoLightmap3dBatchSetKey { + draw_function, + pipeline: pipeline_id, + material_bind_group_index: Some(material.binding.group.0), + vertex_slab: vertex_slab.unwrap_or_default(), + index_slab, + }; + let bin_key = OpaqueNoLightmap3dBinKey { + asset_id: mesh_instance.mesh_asset_id.into(), + }; + alpha_mask_phase.add( + batch_set_key, + bin_key, + (*render_entity, *visible_entity), + mesh_instance.current_uniform_index, + BinnedRenderPhaseType::mesh( + mesh_instance.should_batch(), + &gpu_preprocessing_support, + ), + current_change_tick, + ); + } + RenderPhaseType::Transparent => { + let distance = rangefinder.distance_translation(&mesh_instance.translation) + + material.properties.depth_bias; + transparent_phase.add(Transparent3d { + entity: (*render_entity, *visible_entity), + draw_function, + pipeline: pipeline_id, + distance, + batch_range: 0..1, + extra_index: PhaseItemExtraIndex::None, + indexed: index_slab.is_some(), + }); + } + } + } + } +} + +/// Default render method used for opaque materials. +#[derive(Default, Resource, Clone, Debug, ExtractResource, Reflect)] +#[reflect(Resource, Default, Debug, Clone)] +pub struct DefaultOpaqueRendererMethod(OpaqueRendererMethod); + +impl DefaultOpaqueRendererMethod { + pub fn forward() -> Self { + DefaultOpaqueRendererMethod(OpaqueRendererMethod::Forward) + } + + pub fn deferred() -> Self { + DefaultOpaqueRendererMethod(OpaqueRendererMethod::Deferred) + } + + pub fn set_to_forward(&mut self) { + self.0 = OpaqueRendererMethod::Forward; + } + + pub fn set_to_deferred(&mut self) { + self.0 = OpaqueRendererMethod::Deferred; + } +} + +/// Render method used for opaque materials. +/// +/// The forward rendering main pass draws each mesh entity and shades it according to its +/// corresponding material and the lights that affect it. Some render features like Screen Space +/// Ambient Occlusion require running depth and normal prepasses, that are 'deferred'-like +/// prepasses over all mesh entities to populate depth and normal textures. This means that when +/// using render features that require running prepasses, multiple passes over all visible geometry +/// are required. This can be slow if there is a lot of geometry that cannot be batched into few +/// draws. +/// +/// Deferred rendering runs a prepass to gather not only geometric information like depth and +/// normals, but also all the material properties like base color, emissive color, reflectance, +/// metalness, etc, and writes them into a deferred 'g-buffer' texture. The deferred main pass is +/// then a fullscreen pass that reads data from these textures and executes shading. This allows +/// for one pass over geometry, but is at the cost of not being able to use MSAA, and has heavier +/// bandwidth usage which can be unsuitable for low end mobile or other bandwidth-constrained devices. +/// +/// If a material indicates `OpaqueRendererMethod::Auto`, `DefaultOpaqueRendererMethod` will be used. +#[derive(Default, Clone, Copy, Debug, PartialEq, Reflect)] +#[reflect(Default, Clone, PartialEq)] +pub enum OpaqueRendererMethod { + #[default] + Forward, + Deferred, + Auto, +} + +#[derive(ShaderLabel, Debug, Hash, PartialEq, Eq, Clone, Default)] +pub struct MaterialVertexShader; + +#[derive(ShaderLabel, Debug, Hash, PartialEq, Eq, Clone, Default)] +pub struct MaterialFragmentShader; + +#[derive(ShaderLabel, Debug, Hash, PartialEq, Eq, Clone, Default)] +pub struct PrepassVertexShader; + +#[derive(ShaderLabel, Debug, Hash, PartialEq, Eq, Clone, Default)] +pub struct PrepassFragmentShader; + +#[derive(ShaderLabel, Debug, Hash, PartialEq, Eq, Clone, Default)] +pub struct DeferredVertexShader; + +#[derive(ShaderLabel, Debug, Hash, PartialEq, Eq, Clone, Default)] +pub struct DeferredFragmentShader; + +#[derive(ShaderLabel, Debug, Hash, PartialEq, Eq, Clone, Default)] +pub struct MeshletFragmentShader; + +#[derive(ShaderLabel, Debug, Hash, PartialEq, Eq, Clone, Default)] +pub struct MeshletPrepassFragmentShader; + +#[derive(ShaderLabel, Debug, Hash, PartialEq, Eq, Clone, Default)] +pub struct MeshletDeferredFragmentShader; + +#[derive(DrawFunctionLabel, Debug, Hash, PartialEq, Eq, Clone, Default)] +pub struct MaterialDrawFunction; + +#[derive(DrawFunctionLabel, Debug, Hash, PartialEq, Eq, Clone, Default)] +pub struct PrepassDrawFunction; + +#[derive(DrawFunctionLabel, Debug, Hash, PartialEq, Eq, Clone, Default)] +pub struct DeferredDrawFunction; + +#[derive(DrawFunctionLabel, Debug, Hash, PartialEq, Eq, Clone, Default)] +pub struct ShadowsDrawFunction; + +#[derive(Debug)] +pub struct ErasedMaterialKey { + type_id: TypeId, + hash: u64, + value: Box, + vtable: Arc, +} + +#[derive(Debug)] +pub struct ErasedMaterialKeyVTable { + clone_fn: fn(&dyn Any) -> Box, + partial_eq_fn: fn(&dyn Any, &dyn Any) -> bool, +} + +impl ErasedMaterialKey { + pub fn new(material_key: T) -> Self + where + T: Clone + Hash + PartialEq + Send + Sync + 'static, + { + let type_id = TypeId::of::(); + let hash = FixedHasher::hash_one(&FixedHasher, &material_key); + + fn clone(any: &dyn Any) -> Box { + Box::new(any.downcast_ref::().unwrap().clone()) + } + fn partial_eq(a: &dyn Any, b: &dyn Any) -> bool { + a.downcast_ref::().unwrap() == b.downcast_ref::().unwrap() + } + + Self { + type_id, + hash, + value: Box::new(material_key), + vtable: Arc::new(ErasedMaterialKeyVTable { + clone_fn: clone::, + partial_eq_fn: partial_eq::, + }), + } + } + + pub fn to_key(&self) -> T { + debug_assert_eq!(self.type_id, TypeId::of::()); + self.value.downcast_ref::().unwrap().clone() + } +} + +impl PartialEq for ErasedMaterialKey { + fn eq(&self, other: &Self) -> bool { + self.type_id == other.type_id + && (self.vtable.partial_eq_fn)(self.value.as_ref(), other.value.as_ref()) + } +} + +impl Eq for ErasedMaterialKey {} + +impl Clone for ErasedMaterialKey { + fn clone(&self) -> Self { + Self { + type_id: self.type_id, + hash: self.hash, + value: (self.vtable.clone_fn)(self.value.as_ref()), + vtable: self.vtable.clone(), + } + } +} + +impl Hash for ErasedMaterialKey { + fn hash(&self, state: &mut H) { + self.type_id.hash(state); + self.hash.hash(state); + } +} + +impl Default for ErasedMaterialKey { + fn default() -> Self { + Self::new(()) + } +} + +/// Common [`Material`] properties, calculated for a specific material instance. +#[derive(Default)] +pub struct MaterialProperties { + /// Is this material should be rendered by the deferred renderer when. + /// [`AlphaMode::Opaque`] or [`AlphaMode::Mask`] + pub render_method: OpaqueRendererMethod, + /// The [`AlphaMode`] of this material. + pub alpha_mode: AlphaMode, + /// The bits in the [`MeshPipelineKey`] for this material. + /// + /// These are precalculated so that we can just "or" them together in + /// [`queue_material_meshes`]. + pub mesh_pipeline_key_bits: MeshPipelineKey, + /// Add a bias to the view depth of the mesh which can be used to force a specific render order + /// for meshes with equal depth, to avoid z-fighting. + /// The bias is in depth-texture units so large values may be needed to overcome small depth differences. + pub depth_bias: f32, + /// Whether the material would like to read from [`ViewTransmissionTexture`](bevy_core_pipeline::core_3d::ViewTransmissionTexture). + /// + /// This allows taking color output from the [`Opaque3d`] pass as an input, (for screen-space transmission) but requires + /// rendering to take place in a separate [`Transmissive3d`] pass. + pub reads_view_transmission_texture: bool, + pub render_phase_type: RenderPhaseType, + pub material_layout: Option, + /// Backing array is a size of 4 because the `StandardMaterial` needs 4 draw functions by default + pub draw_functions: SmallVec<[(InternedDrawFunctionLabel, DrawFunctionId); 4]>, + /// Backing array is a size of 3 because the `StandardMaterial` has 3 custom shaders (`frag`, `prepass_frag`, `deferred_frag`) which is the + /// most common use case + pub shaders: SmallVec<[(InternedShaderLabel, Handle); 3]>, + /// Whether this material *actually* uses bindless resources, taking the + /// platform support (or lack thereof) of bindless resources into account. + pub bindless: bool, + pub specialize: Option< + fn( + &MaterialPipeline, + &mut RenderPipelineDescriptor, + &MeshVertexBufferLayoutRef, + ErasedMaterialPipelineKey, + ) -> Result<(), SpecializedMeshPipelineError>, + >, + /// The key for this material, typically a bitfield of flags that are used to modify + /// the pipeline descriptor used for this material. + pub material_key: ErasedMaterialKey, + /// Whether shadows are enabled for this material + pub shadows_enabled: bool, + /// Whether prepass is enabled for this material + pub prepass_enabled: bool, +} + +impl MaterialProperties { + pub fn get_shader(&self, label: impl ShaderLabel) -> Option> { + self.shaders + .iter() + .find(|(inner_label, _)| inner_label == &label.intern()) + .map(|(_, shader)| shader) + .cloned() + } + + pub fn add_shader(&mut self, label: impl ShaderLabel, shader: Handle) { + self.shaders.push((label.intern(), shader)); + } + + pub fn get_draw_function(&self, label: impl DrawFunctionLabel) -> Option { + self.draw_functions + .iter() + .find(|(inner_label, _)| inner_label == &label.intern()) + .map(|(_, shader)| shader) + .cloned() + } + + pub fn add_draw_function( + &mut self, + label: impl DrawFunctionLabel, + draw_function: DrawFunctionId, + ) { + self.draw_functions.push((label.intern(), draw_function)); + } +} + +#[derive(Clone, Copy, Default)] +pub enum RenderPhaseType { + #[default] + Opaque, + AlphaMask, + Transmissive, + Transparent, +} + +/// A resource that maps each untyped material ID to its binding. +/// +/// This duplicates information in `RenderAssets`, but it doesn't have the +/// `M` type parameter, so it can be used in untyped contexts like +/// [`crate::render::mesh::collect_meshes_for_gpu_building`]. +#[derive(Resource, Default, Deref, DerefMut)] +pub struct RenderMaterialBindings(HashMap); + +/// Data prepared for a [`Material`] instance. +pub struct PreparedMaterial { + pub binding: MaterialBindingId, + pub properties: Arc, +} + +// orphan rules T_T +impl ErasedRenderAsset for MeshMaterial3d +where + M::Data: PartialEq + Eq + Hash + Clone, +{ + type SourceAsset = M; + type ErasedAsset = PreparedMaterial; + + type Param = ( + SRes, + SRes, + SResMut, + SResMut, + SRes>, + SRes>, + SRes>, + SRes>, + SRes>, + SRes>, + SRes>, + SRes>, + SRes>, + SRes, + ( + Option>>, + Option>>, + M::Param, + ), + ); + + fn prepare_asset( + material: Self::SourceAsset, + material_id: AssetId, + ( + render_device, + default_opaque_render_method, + bind_group_allocators, + render_material_bindings, + opaque_draw_functions, + alpha_mask_draw_functions, + transmissive_draw_functions, + transparent_draw_functions, + opaque_prepass_draw_functions, + alpha_mask_prepass_draw_functions, + opaque_deferred_draw_functions, + alpha_mask_deferred_draw_functions, + shadow_draw_functions, + asset_server, + (shadows_enabled, prepass_enabled, material_param), + ): &mut SystemParamItem, + ) -> Result> { + let material_layout = M::bind_group_layout(render_device); + + let shadows_enabled = shadows_enabled.is_some(); + let prepass_enabled = prepass_enabled.is_some(); + + let draw_opaque_pbr = opaque_draw_functions.read().id::(); + let draw_alpha_mask_pbr = alpha_mask_draw_functions.read().id::(); + let draw_transmissive_pbr = transmissive_draw_functions.read().id::(); + let draw_transparent_pbr = transparent_draw_functions.read().id::(); + let draw_opaque_prepass = opaque_prepass_draw_functions.read().get_id::(); + let draw_alpha_mask_prepass = alpha_mask_prepass_draw_functions + .read() + .get_id::(); + let draw_opaque_deferred = opaque_deferred_draw_functions + .read() + .get_id::(); + let draw_alpha_mask_deferred = alpha_mask_deferred_draw_functions + .read() + .get_id::(); + let shadow_draw_function_id = shadow_draw_functions.read().get_id::(); + + let render_method = match material.opaque_render_method() { + OpaqueRendererMethod::Forward => OpaqueRendererMethod::Forward, + OpaqueRendererMethod::Deferred => OpaqueRendererMethod::Deferred, + OpaqueRendererMethod::Auto => default_opaque_render_method.0, + }; + + let mut mesh_pipeline_key_bits = MeshPipelineKey::empty(); + mesh_pipeline_key_bits.set( + MeshPipelineKey::READS_VIEW_TRANSMISSION_TEXTURE, + material.reads_view_transmission_texture(), + ); + + let reads_view_transmission_texture = + mesh_pipeline_key_bits.contains(MeshPipelineKey::READS_VIEW_TRANSMISSION_TEXTURE); + + let render_phase_type = match material.alpha_mode() { + AlphaMode::Blend | AlphaMode::Premultiplied | AlphaMode::Add | AlphaMode::Multiply => { + RenderPhaseType::Transparent + } + _ if reads_view_transmission_texture => RenderPhaseType::Transmissive, + AlphaMode::Opaque | AlphaMode::AlphaToCoverage => RenderPhaseType::Opaque, + AlphaMode::Mask(_) => RenderPhaseType::AlphaMask, + }; + + let draw_function_id = match render_phase_type { + RenderPhaseType::Opaque => draw_opaque_pbr, + RenderPhaseType::AlphaMask => draw_alpha_mask_pbr, + RenderPhaseType::Transmissive => draw_transmissive_pbr, + RenderPhaseType::Transparent => draw_transparent_pbr, + }; + let prepass_draw_function_id = match render_phase_type { + RenderPhaseType::Opaque => draw_opaque_prepass, + RenderPhaseType::AlphaMask => draw_alpha_mask_prepass, + _ => None, + }; + let deferred_draw_function_id = match render_phase_type { + RenderPhaseType::Opaque => draw_opaque_deferred, + RenderPhaseType::AlphaMask => draw_alpha_mask_deferred, + _ => None, + }; + + let mut draw_functions = SmallVec::new(); + draw_functions.push((MaterialDrawFunction.intern(), draw_function_id)); + if let Some(prepass_draw_function_id) = prepass_draw_function_id { + draw_functions.push((PrepassDrawFunction.intern(), prepass_draw_function_id)); + } + if let Some(deferred_draw_function_id) = deferred_draw_function_id { + draw_functions.push((DeferredDrawFunction.intern(), deferred_draw_function_id)); + } + if let Some(shadow_draw_function_id) = shadow_draw_function_id { + draw_functions.push((ShadowsDrawFunction.intern(), shadow_draw_function_id)); + } + + let mut shaders = SmallVec::new(); + let mut add_shader = |label: InternedShaderLabel, shader_ref: ShaderRef| { + let mayber_shader = match shader_ref { + ShaderRef::Default => None, + ShaderRef::Handle(handle) => Some(handle), + ShaderRef::Path(path) => Some(asset_server.load(path)), + }; + if let Some(shader) = mayber_shader { + shaders.push((label, shader)); + } + }; + add_shader(MaterialVertexShader.intern(), M::vertex_shader()); + add_shader(MaterialFragmentShader.intern(), M::fragment_shader()); + add_shader(PrepassVertexShader.intern(), M::prepass_vertex_shader()); + add_shader(PrepassFragmentShader.intern(), M::prepass_fragment_shader()); + add_shader(DeferredVertexShader.intern(), M::deferred_vertex_shader()); + add_shader( + DeferredFragmentShader.intern(), + M::deferred_fragment_shader(), + ); + + #[cfg(feature = "meshlet")] + { + add_shader( + MeshletFragmentShader.intern(), + M::meshlet_mesh_fragment_shader(), + ); + add_shader( + MeshletPrepassFragmentShader.intern(), + M::meshlet_mesh_prepass_fragment_shader(), + ); + add_shader( + MeshletDeferredFragmentShader.intern(), + M::meshlet_mesh_deferred_fragment_shader(), + ); + } + + let bindless = material_uses_bindless_resources::(render_device); + let bind_group_data = material.bind_group_data(); + let material_key = ErasedMaterialKey::new(bind_group_data); + fn specialize( + pipeline: &MaterialPipeline, + descriptor: &mut RenderPipelineDescriptor, + mesh_layout: &MeshVertexBufferLayoutRef, + erased_key: ErasedMaterialPipelineKey, + ) -> Result<(), SpecializedMeshPipelineError> + where + M::Data: Hash + Clone, + { + let material_key = erased_key.material_key.to_key(); + M::specialize( + pipeline, + descriptor, + mesh_layout, + MaterialPipelineKey { + mesh_key: erased_key.mesh_key, + bind_group_data: material_key, + }, + ) + } + + match material.unprepared_bind_group(&material_layout, render_device, material_param, false) + { + Ok(unprepared) => { + let bind_group_allocator = + bind_group_allocators.get_mut(&TypeId::of::()).unwrap(); + // Allocate or update the material. + let binding = match render_material_bindings.entry(material_id.into()) { + Entry::Occupied(mut occupied_entry) => { + // TODO: Have a fast path that doesn't require + // recreating the bind group if only buffer contents + // change. For now, we just delete and recreate the bind + // group. + bind_group_allocator.free(*occupied_entry.get()); + let new_binding = + bind_group_allocator.allocate_unprepared(unprepared, &material_layout); + *occupied_entry.get_mut() = new_binding; + new_binding + } + Entry::Vacant(vacant_entry) => *vacant_entry.insert( + bind_group_allocator.allocate_unprepared(unprepared, &material_layout), + ), + }; + + Ok(PreparedMaterial { + binding, + properties: Arc::new(MaterialProperties { + alpha_mode: material.alpha_mode(), + depth_bias: material.depth_bias(), + reads_view_transmission_texture, + render_phase_type, + render_method, + mesh_pipeline_key_bits, + material_layout: Some(material_layout), + draw_functions, + shaders, + bindless, + specialize: Some(specialize::), + material_key, + shadows_enabled, + prepass_enabled, + }), + }) + } + + Err(AsBindGroupError::RetryNextUpdate) => { + Err(PrepareAssetError::RetryNextUpdate(material)) + } + + Err(AsBindGroupError::CreateBindGroupDirectly) => { + // This material has opted out of automatic bind group creation + // and is requesting a fully-custom bind group. Invoke + // `as_bind_group` as requested, and store the resulting bind + // group in the slot. + match material.as_bind_group(&material_layout, render_device, material_param) { + Ok(prepared_bind_group) => { + let bind_group_allocator = + bind_group_allocators.get_mut(&TypeId::of::()).unwrap(); + // Store the resulting bind group directly in the slot. + let material_binding_id = + bind_group_allocator.allocate_prepared(prepared_bind_group); + render_material_bindings.insert(material_id.into(), material_binding_id); + + Ok(PreparedMaterial { + binding: material_binding_id, + properties: Arc::new(MaterialProperties { + alpha_mode: material.alpha_mode(), + depth_bias: material.depth_bias(), + reads_view_transmission_texture, + render_phase_type, + render_method, + mesh_pipeline_key_bits, + material_layout: Some(material_layout), + draw_functions, + shaders, + bindless, + specialize: Some(specialize::), + material_key, + shadows_enabled, + prepass_enabled, + }), + }) + } + + Err(AsBindGroupError::RetryNextUpdate) => { + Err(PrepareAssetError::RetryNextUpdate(material)) + } + + Err(other) => Err(PrepareAssetError::AsBindGroupError(other)), + } + } + + Err(other) => Err(PrepareAssetError::AsBindGroupError(other)), + } + } + + fn unload_asset( + source_asset: AssetId, + (_, _, bind_group_allocators, render_material_bindings, ..): &mut SystemParamItem< + Self::Param, + >, + ) { + let Some(material_binding_id) = render_material_bindings.remove(&source_asset.untyped()) + else { + return; + }; + let bind_group_allactor = bind_group_allocators.get_mut(&TypeId::of::()).unwrap(); + bind_group_allactor.free(material_binding_id); + } +} + +/// Creates and/or recreates any bind groups that contain materials that were +/// modified this frame. +pub fn prepare_material_bind_groups( + mut allocators: ResMut, + render_device: Res, + fallback_image: Res, + fallback_resources: Res, +) { + for (_, allocator) in allocators.iter_mut() { + allocator.prepare_bind_groups(&render_device, &fallback_resources, &fallback_image); + } +} + +/// Uploads the contents of all buffers that the [`MaterialBindGroupAllocator`] +/// manages to the GPU. +/// +/// Non-bindless allocators don't currently manage any buffers, so this method +/// only has an effect for bindless allocators. +pub fn write_material_bind_group_buffers( + mut allocators: ResMut, + render_device: Res, + render_queue: Res, +) { + for (_, allocator) in allocators.iter_mut() { + allocator.write_buffers(&render_device, &render_queue); + } +} + +/// Marker resource for whether shadows are enabled for this material type +#[derive(Resource, Debug)] +pub struct ShadowsEnabled(PhantomData); + +impl Default for ShadowsEnabled { + fn default() -> Self { + Self(PhantomData) + } +} diff --git a/crates/libmarathon/src/render/pbr/material_bind_groups.rs b/crates/libmarathon/src/render/pbr/material_bind_groups.rs new file mode 100644 index 0000000..be5fc9c --- /dev/null +++ b/crates/libmarathon/src/render/pbr/material_bind_groups.rs @@ -0,0 +1,1996 @@ +//! Material bind group management for bindless resources. +//! +//! In bindless mode, Bevy's renderer groups materials into bind groups. This +//! allocator manages each bind group, assigning slots to materials as +//! appropriate. + +use crate::render::pbr::Material; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::{ + resource::Resource, + system::{Commands, Res}, +}; +use bevy_platform::collections::{HashMap, HashSet}; +use bevy_reflect::{prelude::ReflectDefault, Reflect}; +use crate::render::render_resource::BindlessSlabResourceLimit; +use crate::render::{ + render_resource::{ + BindGroup, BindGroupEntry, BindGroupLayout, BindingNumber, BindingResource, + BindingResources, BindlessDescriptor, BindlessIndex, BindlessIndexTableDescriptor, + BindlessResourceType, Buffer, BufferBinding, BufferDescriptor, BufferId, + BufferInitDescriptor, BufferUsages, CompareFunction, FilterMode, OwnedBindingResource, + PreparedBindGroup, RawBufferVec, Sampler, SamplerDescriptor, SamplerId, TextureView, + TextureViewDimension, TextureViewId, UnpreparedBindGroup, WgpuSampler, WgpuTextureView, + }, + renderer::{RenderDevice, RenderQueue}, + settings::WgpuFeatures, + texture::FallbackImage, +}; +use bevy_utils::{default, TypeIdMap}; +use bytemuck::Pod; +use core::hash::Hash; +use core::{cmp::Ordering, iter, mem, ops::Range}; +use tracing::{error, trace}; + +#[derive(Resource, Deref, DerefMut, Default)] +pub struct MaterialBindGroupAllocators(TypeIdMap); + +/// A resource that places materials into bind groups and tracks their +/// resources. +/// +/// Internally, Bevy has separate allocators for bindless and non-bindless +/// materials. This resource provides a common interface to the specific +/// allocator in use. +pub enum MaterialBindGroupAllocator { + /// The allocator used when the material is bindless. + Bindless(Box), + /// The allocator used when the material is non-bindless. + NonBindless(Box), +} + +/// The allocator that places bindless materials into bind groups and tracks +/// their resources. +pub struct MaterialBindGroupBindlessAllocator { + /// The label of the bind group allocator to use for allocated buffers. + label: Option<&'static str>, + /// The slabs, each of which contains a bind group. + slabs: Vec, + /// The layout of the bind groups that we produce. + bind_group_layout: BindGroupLayout, + /// Information about the bindless resources in the material. + /// + /// We use this information to create and maintain bind groups. + bindless_descriptor: BindlessDescriptor, + + /// Dummy buffers that we use to fill empty slots in buffer binding arrays. + /// + /// There's one fallback buffer for each buffer in the bind group, each + /// appropriately sized. Each buffer contains one uninitialized element of + /// the applicable type. + fallback_buffers: HashMap, + + /// The maximum number of resources that can be stored in a slab. + /// + /// This corresponds to `SLAB_CAPACITY` in the `#[bindless(SLAB_CAPACITY)]` + /// attribute, when deriving `AsBindGroup`. + slab_capacity: u32, +} + +/// A single bind group and the bookkeeping necessary to allocate into it. +pub struct MaterialBindlessSlab { + /// The current bind group, if it's up to date. + /// + /// If this is `None`, then the bind group is dirty and needs to be + /// regenerated. + bind_group: Option, + + /// The GPU-accessible buffers that hold the mapping from binding index to + /// bindless slot. + /// + /// This is conventionally assigned to bind group binding 0, but it can be + /// changed using the `#[bindless(index_table(binding(B)))]` attribute on + /// `AsBindGroup`. + /// + /// Because the slab binary searches this table, the entries within must be + /// sorted by bindless index. + bindless_index_tables: Vec, + + /// The binding arrays containing samplers. + samplers: HashMap>, + /// The binding arrays containing textures. + textures: HashMap>, + /// The binding arrays containing buffers. + buffers: HashMap>, + /// The buffers that contain plain old data (i.e. the structure-level + /// `#[data]` attribute of `AsBindGroup`). + data_buffers: HashMap, + + /// A list of free slot IDs. + free_slots: Vec, + /// The total number of materials currently allocated in this slab. + live_allocation_count: u32, + /// The total number of resources currently allocated in the binding arrays. + allocated_resource_count: u32, +} + +/// A GPU-accessible buffer that holds the mapping from binding index to +/// bindless slot. +/// +/// This is conventionally assigned to bind group binding 0, but it can be +/// changed by altering the [`Self::binding_number`], which corresponds to the +/// `#[bindless(index_table(binding(B)))]` attribute in `AsBindGroup`. +struct MaterialBindlessIndexTable { + /// The buffer containing the mappings. + buffer: RetainedRawBufferVec, + /// The range of bindless indices that this bindless index table covers. + /// + /// If this range is M..N, then the field at index $i$ maps to bindless + /// index $i$ + M. The size of this table is N - M. + /// + /// This corresponds to the `#[bindless(index_table(range(M..N)))]` + /// attribute in `AsBindGroup`. + index_range: Range, + /// The binding number that this index table is assigned to in the shader. + binding_number: BindingNumber, +} + +/// A single binding array for storing bindless resources and the bookkeeping +/// necessary to allocate into it. +struct MaterialBindlessBindingArray +where + R: GetBindingResourceId, +{ + /// The number of the binding that we attach this binding array to. + binding_number: BindingNumber, + /// A mapping from bindless slot index to the resource stored in that slot, + /// if any. + bindings: Vec>>, + /// The type of resource stored in this binding array. + resource_type: BindlessResourceType, + /// Maps a resource ID to the slot in which it's stored. + /// + /// This is essentially the inverse mapping of [`Self::bindings`]. + resource_to_slot: HashMap, + /// A list of free slots in [`Self::bindings`] that contain no binding. + free_slots: Vec, + /// The number of allocated objects in this binding array. + len: u32, +} + +/// A single resource (sampler, texture, or buffer) in a binding array. +/// +/// Resources hold a reference count, which specifies the number of materials +/// currently allocated within the slab that refer to this resource. When the +/// reference count drops to zero, the resource is freed. +struct MaterialBindlessBinding +where + R: GetBindingResourceId, +{ + /// The sampler, texture, or buffer. + resource: R, + /// The number of materials currently allocated within the containing slab + /// that use this resource. + ref_count: u32, +} + +/// The allocator that stores bind groups for non-bindless materials. +pub struct MaterialBindGroupNonBindlessAllocator { + /// The label of the bind group allocator to use for allocated buffers. + label: Option<&'static str>, + /// A mapping from [`MaterialBindGroupIndex`] to the bind group allocated in + /// each slot. + bind_groups: Vec>, + /// The bind groups that are dirty and need to be prepared. + /// + /// To prepare the bind groups, call + /// [`MaterialBindGroupAllocator::prepare_bind_groups`]. + to_prepare: HashSet, + /// A list of free bind group indices. + free_indices: Vec, +} + +/// A single bind group that a [`MaterialBindGroupNonBindlessAllocator`] is +/// currently managing. +enum MaterialNonBindlessAllocatedBindGroup { + /// An unprepared bind group. + /// + /// The allocator prepares all outstanding unprepared bind groups when + /// [`MaterialBindGroupNonBindlessAllocator::prepare_bind_groups`] is + /// called. + Unprepared { + /// The unprepared bind group, including extra data. + bind_group: UnpreparedBindGroup, + /// The layout of that bind group. + layout: BindGroupLayout, + }, + /// A bind group that's already been prepared. + Prepared { + bind_group: PreparedBindGroup, + #[expect(dead_code, reason = "These buffers are only referenced by bind groups")] + uniform_buffers: Vec, + }, +} + +/// Dummy instances of various resources that we fill unused slots in binding +/// arrays with. +#[derive(Resource)] +pub struct FallbackBindlessResources { + /// A dummy filtering sampler. + filtering_sampler: Sampler, + /// A dummy non-filtering sampler. + non_filtering_sampler: Sampler, + /// A dummy comparison sampler. + comparison_sampler: Sampler, +} + +/// The `wgpu` ID of a single bindless or non-bindless resource. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +enum BindingResourceId { + /// A buffer. + Buffer(BufferId), + /// A texture view, with the given dimension. + TextureView(TextureViewDimension, TextureViewId), + /// A sampler. + Sampler(SamplerId), + /// A buffer containing plain old data. + /// + /// This corresponds to the `#[data]` structure-level attribute on + /// `AsBindGroup`. + DataBuffer, +} + +/// A temporary list of references to `wgpu` bindless resources. +/// +/// We need this because the `wgpu` bindless API takes a slice of references. +/// Thus we need to create intermediate vectors of bindless resources in order +/// to satisfy `wgpu`'s lifetime requirements. +enum BindingResourceArray<'a> { + /// A list of bindings. + Buffers(Vec>), + /// A list of texture views. + TextureViews(Vec<&'a WgpuTextureView>), + /// A list of samplers. + Samplers(Vec<&'a WgpuSampler>), +} + +/// The location of a material (either bindless or non-bindless) within the +/// slabs. +#[derive(Clone, Copy, Debug, Default, Reflect)] +#[reflect(Clone, Default)] +pub struct MaterialBindingId { + /// The index of the bind group (slab) where the GPU data is located. + pub group: MaterialBindGroupIndex, + /// The slot within that bind group. + /// + /// Non-bindless materials will always have a slot of 0. + pub slot: MaterialBindGroupSlot, +} + +/// The index of each material bind group. +/// +/// In bindless mode, each bind group contains multiple materials. In +/// non-bindless mode, each bind group contains only one material. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, Reflect, Deref, DerefMut)] +#[reflect(Default, Clone, PartialEq, Hash)] +pub struct MaterialBindGroupIndex(pub u32); + +impl From for MaterialBindGroupIndex { + fn from(value: u32) -> Self { + MaterialBindGroupIndex(value) + } +} + +/// The index of the slot containing material data within each material bind +/// group. +/// +/// In bindless mode, this slot is needed to locate the material data in each +/// bind group, since multiple materials are packed into a single slab. In +/// non-bindless mode, this slot is always 0. +#[derive(Clone, Copy, Debug, Default, PartialEq, Reflect, Deref, DerefMut)] +#[reflect(Default, Clone, PartialEq)] +pub struct MaterialBindGroupSlot(pub u32); + +/// The CPU/GPU synchronization state of a buffer that we maintain. +/// +/// Currently, the only buffer that we maintain is the +/// [`MaterialBindlessIndexTable`]. +enum BufferDirtyState { + /// The buffer is currently synchronized between the CPU and GPU. + Clean, + /// The buffer hasn't been created yet. + NeedsReserve, + /// The buffer exists on both CPU and GPU, but the GPU data is out of date. + NeedsUpload, +} + +/// Information that describes a potential allocation of an +/// [`UnpreparedBindGroup`] into a slab. +struct BindlessAllocationCandidate { + /// A map that, for every resource in the [`UnpreparedBindGroup`] that + /// already existed in this slab, maps bindless index of that resource to + /// its slot in the appropriate binding array. + pre_existing_resources: HashMap, + /// Stores the number of free slots that are needed to satisfy this + /// allocation. + needed_free_slots: u32, +} + +/// A trait that allows fetching the [`BindingResourceId`] from a +/// [`BindlessResourceType`]. +/// +/// This is used when freeing bindless resources, in order to locate the IDs +/// assigned to each resource so that they can be removed from the appropriate +/// maps. +trait GetBindingResourceId { + /// Returns the [`BindingResourceId`] for this resource. + /// + /// `resource_type` specifies this resource's type. This is used for + /// textures, as a `wgpu` [`TextureView`] doesn't store enough information + /// itself to determine its dimension. + fn binding_resource_id(&self, resource_type: BindlessResourceType) -> BindingResourceId; +} + +/// The public interface to a slab, which represents a single bind group. +pub struct MaterialSlab<'a>(MaterialSlabImpl<'a>); + +/// The actual implementation of a material slab. +/// +/// This has bindless and non-bindless variants. +enum MaterialSlabImpl<'a> { + /// The implementation of the slab interface we use when the slab + /// is bindless. + Bindless(&'a MaterialBindlessSlab), + /// The implementation of the slab interface we use when the slab + /// is non-bindless. + NonBindless(MaterialNonBindlessSlab<'a>), +} + +/// A single bind group that the [`MaterialBindGroupNonBindlessAllocator`] +/// manages. +enum MaterialNonBindlessSlab<'a> { + /// A slab that has a bind group. + Prepared(&'a PreparedBindGroup), + /// A slab that doesn't yet have a bind group. + Unprepared, +} + +/// Manages an array of untyped plain old data on GPU and allocates individual +/// slots within that array. +/// +/// This supports the `#[data]` attribute of `AsBindGroup`. +struct MaterialDataBuffer { + /// The number of the binding that we attach this storage buffer to. + binding_number: BindingNumber, + /// The actual data. + /// + /// Note that this is untyped (`u8`); the actual aligned size of each + /// element is given by [`Self::aligned_element_size`]; + buffer: RetainedRawBufferVec, + /// The size of each element in the buffer, including padding and alignment + /// if any. + aligned_element_size: u32, + /// A list of free slots within the buffer. + free_slots: Vec, + /// The actual number of slots that have been allocated. + len: u32, +} + +/// A buffer containing plain old data, already packed into the appropriate GPU +/// format, and that can be updated incrementally. +/// +/// This structure exists in order to encapsulate the lazy update +/// ([`BufferDirtyState`]) logic in a single place. +#[derive(Deref, DerefMut)] +struct RetainedRawBufferVec +where + T: Pod, +{ + /// The contents of the buffer. + #[deref] + buffer: RawBufferVec, + /// Whether the contents of the buffer have been uploaded to the GPU. + dirty: BufferDirtyState, +} + +/// The size of the buffer that we assign to unused buffer slots, in bytes. +/// +/// This is essentially arbitrary, as it doesn't seem to matter to `wgpu` what +/// the size is. +const DEFAULT_BINDLESS_FALLBACK_BUFFER_SIZE: u64 = 16; + +impl From for MaterialBindGroupSlot { + fn from(value: u32) -> Self { + MaterialBindGroupSlot(value) + } +} + +impl From for u32 { + fn from(value: MaterialBindGroupSlot) -> Self { + value.0 + } +} + +impl<'a> From<&'a OwnedBindingResource> for BindingResourceId { + fn from(value: &'a OwnedBindingResource) -> Self { + match *value { + OwnedBindingResource::Buffer(ref buffer) => BindingResourceId::Buffer(buffer.id()), + OwnedBindingResource::Data(_) => BindingResourceId::DataBuffer, + OwnedBindingResource::TextureView(ref texture_view_dimension, ref texture_view) => { + BindingResourceId::TextureView(*texture_view_dimension, texture_view.id()) + } + OwnedBindingResource::Sampler(_, ref sampler) => { + BindingResourceId::Sampler(sampler.id()) + } + } + } +} + +impl GetBindingResourceId for Buffer { + fn binding_resource_id(&self, _: BindlessResourceType) -> BindingResourceId { + BindingResourceId::Buffer(self.id()) + } +} + +impl GetBindingResourceId for Sampler { + fn binding_resource_id(&self, _: BindlessResourceType) -> BindingResourceId { + BindingResourceId::Sampler(self.id()) + } +} + +impl GetBindingResourceId for TextureView { + fn binding_resource_id(&self, resource_type: BindlessResourceType) -> BindingResourceId { + let texture_view_dimension = match resource_type { + BindlessResourceType::Texture1d => TextureViewDimension::D1, + BindlessResourceType::Texture2d => TextureViewDimension::D2, + BindlessResourceType::Texture2dArray => TextureViewDimension::D2Array, + BindlessResourceType::Texture3d => TextureViewDimension::D3, + BindlessResourceType::TextureCube => TextureViewDimension::Cube, + BindlessResourceType::TextureCubeArray => TextureViewDimension::CubeArray, + _ => panic!("Resource type is not a texture"), + }; + BindingResourceId::TextureView(texture_view_dimension, self.id()) + } +} + +impl MaterialBindGroupAllocator { + /// Creates a new [`MaterialBindGroupAllocator`] managing the data for a + /// single material. + pub fn new( + render_device: &RenderDevice, + label: Option<&'static str>, + bindless_descriptor: Option, + bind_group_layout: BindGroupLayout, + slab_capacity: Option, + ) -> MaterialBindGroupAllocator { + if let Some(bindless_descriptor) = bindless_descriptor { + MaterialBindGroupAllocator::Bindless(Box::new(MaterialBindGroupBindlessAllocator::new( + render_device, + label, + bindless_descriptor, + bind_group_layout, + slab_capacity, + ))) + } else { + MaterialBindGroupAllocator::NonBindless(Box::new( + MaterialBindGroupNonBindlessAllocator::new(label), + )) + } + } + + /// Returns the slab with the given index, if one exists. + pub fn get(&self, group: MaterialBindGroupIndex) -> Option> { + match *self { + MaterialBindGroupAllocator::Bindless(ref bindless_allocator) => bindless_allocator + .get(group) + .map(|bindless_slab| MaterialSlab(MaterialSlabImpl::Bindless(bindless_slab))), + MaterialBindGroupAllocator::NonBindless(ref non_bindless_allocator) => { + non_bindless_allocator.get(group).map(|non_bindless_slab| { + MaterialSlab(MaterialSlabImpl::NonBindless(non_bindless_slab)) + }) + } + } + } + + /// Allocates an [`UnpreparedBindGroup`] and returns the resulting binding ID. + /// + /// This method should generally be preferred over + /// [`Self::allocate_prepared`], because this method supports both bindless + /// and non-bindless bind groups. Only use [`Self::allocate_prepared`] if + /// you need to prepare the bind group yourself. + pub fn allocate_unprepared( + &mut self, + unprepared_bind_group: UnpreparedBindGroup, + bind_group_layout: &BindGroupLayout, + ) -> MaterialBindingId { + match *self { + MaterialBindGroupAllocator::Bindless( + ref mut material_bind_group_bindless_allocator, + ) => material_bind_group_bindless_allocator.allocate_unprepared(unprepared_bind_group), + MaterialBindGroupAllocator::NonBindless( + ref mut material_bind_group_non_bindless_allocator, + ) => material_bind_group_non_bindless_allocator + .allocate_unprepared(unprepared_bind_group, (*bind_group_layout).clone()), + } + } + + /// Places a pre-prepared bind group into a slab. + /// + /// For bindless materials, the allocator internally manages the bind + /// groups, so calling this method will panic if this is a bindless + /// allocator. Only non-bindless allocators support this method. + /// + /// It's generally preferred to use [`Self::allocate_unprepared`], because + /// that method supports both bindless and non-bindless allocators. Only use + /// this method if you need to prepare the bind group yourself. + pub fn allocate_prepared( + &mut self, + prepared_bind_group: PreparedBindGroup, + ) -> MaterialBindingId { + match *self { + MaterialBindGroupAllocator::Bindless(_) => { + panic!( + "Bindless resources are incompatible with implementing `as_bind_group` \ + directly; implement `unprepared_bind_group` instead or disable bindless" + ) + } + MaterialBindGroupAllocator::NonBindless(ref mut non_bindless_allocator) => { + non_bindless_allocator.allocate_prepared(prepared_bind_group) + } + } + } + + /// Deallocates the material with the given binding ID. + /// + /// Any resources that are no longer referenced are removed from the slab. + pub fn free(&mut self, material_binding_id: MaterialBindingId) { + match *self { + MaterialBindGroupAllocator::Bindless( + ref mut material_bind_group_bindless_allocator, + ) => material_bind_group_bindless_allocator.free(material_binding_id), + MaterialBindGroupAllocator::NonBindless( + ref mut material_bind_group_non_bindless_allocator, + ) => material_bind_group_non_bindless_allocator.free(material_binding_id), + } + } + + /// Recreates any bind groups corresponding to slabs that have been modified + /// since last calling [`MaterialBindGroupAllocator::prepare_bind_groups`]. + pub fn prepare_bind_groups( + &mut self, + render_device: &RenderDevice, + fallback_bindless_resources: &FallbackBindlessResources, + fallback_image: &FallbackImage, + ) { + match *self { + MaterialBindGroupAllocator::Bindless( + ref mut material_bind_group_bindless_allocator, + ) => material_bind_group_bindless_allocator.prepare_bind_groups( + render_device, + fallback_bindless_resources, + fallback_image, + ), + MaterialBindGroupAllocator::NonBindless( + ref mut material_bind_group_non_bindless_allocator, + ) => material_bind_group_non_bindless_allocator.prepare_bind_groups(render_device), + } + } + + /// Uploads the contents of all buffers that this + /// [`MaterialBindGroupAllocator`] manages to the GPU. + /// + /// Non-bindless allocators don't currently manage any buffers, so this + /// method only has an effect for bindless allocators. + pub fn write_buffers(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) { + match *self { + MaterialBindGroupAllocator::Bindless( + ref mut material_bind_group_bindless_allocator, + ) => material_bind_group_bindless_allocator.write_buffers(render_device, render_queue), + MaterialBindGroupAllocator::NonBindless(_) => { + // Not applicable. + } + } + } +} + +impl MaterialBindlessIndexTable { + /// Creates a new [`MaterialBindlessIndexTable`] for a single slab. + fn new( + bindless_index_table_descriptor: &BindlessIndexTableDescriptor, + ) -> MaterialBindlessIndexTable { + // Preallocate space for one bindings table, so that there will always be a buffer. + let mut buffer = RetainedRawBufferVec::new(BufferUsages::STORAGE); + for _ in *bindless_index_table_descriptor.indices.start + ..*bindless_index_table_descriptor.indices.end + { + buffer.push(0); + } + + MaterialBindlessIndexTable { + buffer, + index_range: bindless_index_table_descriptor.indices.clone(), + binding_number: bindless_index_table_descriptor.binding_number, + } + } + + /// Returns the bindings in the binding index table. + /// + /// If the current [`MaterialBindlessIndexTable::index_range`] is M..N, then + /// element *i* of the returned binding index table contains the slot of the + /// bindless resource with bindless index *i* + M. + fn get(&self, slot: MaterialBindGroupSlot) -> &[u32] { + let struct_size = *self.index_range.end as usize - *self.index_range.start as usize; + let start = struct_size * slot.0 as usize; + &self.buffer.values()[start..(start + struct_size)] + } + + /// Returns a single binding from the binding index table. + fn get_binding( + &self, + slot: MaterialBindGroupSlot, + bindless_index: BindlessIndex, + ) -> Option { + if bindless_index < self.index_range.start || bindless_index >= self.index_range.end { + return None; + } + self.get(slot) + .get((*bindless_index - *self.index_range.start) as usize) + .copied() + } + + fn table_length(&self) -> u32 { + self.index_range.end.0 - self.index_range.start.0 + } + + /// Updates the binding index table for a single material. + /// + /// The `allocated_resource_slots` map contains a mapping from the + /// [`BindlessIndex`] of each resource that the material references to the + /// slot that that resource occupies in the appropriate binding array. This + /// method serializes that map into a binding index table that the shader + /// can read. + fn set( + &mut self, + slot: MaterialBindGroupSlot, + allocated_resource_slots: &HashMap, + ) { + let table_len = self.table_length() as usize; + let range = (slot.0 as usize * table_len)..((slot.0 as usize + 1) * table_len); + while self.buffer.len() < range.end { + self.buffer.push(0); + } + + for (&bindless_index, &resource_slot) in allocated_resource_slots { + if self.index_range.contains(&bindless_index) { + self.buffer.set( + *bindless_index + range.start as u32 - *self.index_range.start, + resource_slot, + ); + } + } + + // Mark the buffer as needing to be recreated, in case we grew it. + self.buffer.dirty = BufferDirtyState::NeedsReserve; + } + + /// Returns the [`BindGroupEntry`] for the index table itself. + fn bind_group_entry(&self) -> BindGroupEntry<'_> { + BindGroupEntry { + binding: *self.binding_number, + resource: self + .buffer + .buffer() + .expect("Bindings buffer must exist") + .as_entire_binding(), + } + } +} + +impl RetainedRawBufferVec +where + T: Pod, +{ + /// Creates a new empty [`RetainedRawBufferVec`] supporting the given + /// [`BufferUsages`]. + fn new(buffer_usages: BufferUsages) -> RetainedRawBufferVec { + RetainedRawBufferVec { + buffer: RawBufferVec::new(buffer_usages), + dirty: BufferDirtyState::NeedsUpload, + } + } + + /// Recreates the GPU backing buffer if needed. + fn prepare(&mut self, render_device: &RenderDevice) { + match self.dirty { + BufferDirtyState::Clean | BufferDirtyState::NeedsUpload => {} + BufferDirtyState::NeedsReserve => { + let capacity = self.buffer.len(); + self.buffer.reserve(capacity, render_device); + self.dirty = BufferDirtyState::NeedsUpload; + } + } + } + + /// Writes the current contents of the buffer to the GPU if necessary. + fn write(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) { + match self.dirty { + BufferDirtyState::Clean => {} + BufferDirtyState::NeedsReserve | BufferDirtyState::NeedsUpload => { + self.buffer.write_buffer(render_device, render_queue); + self.dirty = BufferDirtyState::Clean; + } + } + } +} + +impl MaterialBindGroupBindlessAllocator { + /// Creates a new [`MaterialBindGroupBindlessAllocator`] managing the data + /// for a single bindless material. + fn new( + render_device: &RenderDevice, + label: Option<&'static str>, + bindless_descriptor: BindlessDescriptor, + bind_group_layout: BindGroupLayout, + slab_capacity: Option, + ) -> MaterialBindGroupBindlessAllocator { + let fallback_buffers = bindless_descriptor + .buffers + .iter() + .map(|bindless_buffer_descriptor| { + ( + bindless_buffer_descriptor.bindless_index, + render_device.create_buffer(&BufferDescriptor { + label: Some("bindless fallback buffer"), + size: match bindless_buffer_descriptor.size { + Some(size) => size as u64, + None => DEFAULT_BINDLESS_FALLBACK_BUFFER_SIZE, + }, + usage: BufferUsages::STORAGE, + mapped_at_creation: false, + }), + ) + }) + .collect(); + + MaterialBindGroupBindlessAllocator { + label, + slabs: vec![], + bind_group_layout, + bindless_descriptor, + fallback_buffers, + slab_capacity: slab_capacity + .expect("Non-bindless materials should use the non-bindless allocator") + .resolve(), + } + } + + /// Allocates the resources for a single material into a slab and returns + /// the resulting ID. + /// + /// The returned [`MaterialBindingId`] can later be used to fetch the slab + /// that was used. + /// + /// This function can't fail. If all slabs are full, then a new slab is + /// created, and the material is allocated into it. + fn allocate_unprepared( + &mut self, + mut unprepared_bind_group: UnpreparedBindGroup, + ) -> MaterialBindingId { + for (slab_index, slab) in self.slabs.iter_mut().enumerate() { + trace!("Trying to allocate in slab {}", slab_index); + match slab.try_allocate(unprepared_bind_group, self.slab_capacity) { + Ok(slot) => { + return MaterialBindingId { + group: MaterialBindGroupIndex(slab_index as u32), + slot, + }; + } + Err(bind_group) => unprepared_bind_group = bind_group, + } + } + + let group = MaterialBindGroupIndex(self.slabs.len() as u32); + self.slabs + .push(MaterialBindlessSlab::new(&self.bindless_descriptor)); + + // Allocate into the newly-pushed slab. + let Ok(slot) = self + .slabs + .last_mut() + .expect("We just pushed a slab") + .try_allocate(unprepared_bind_group, self.slab_capacity) + else { + panic!("An allocation into an empty slab should always succeed") + }; + + MaterialBindingId { group, slot } + } + + /// Deallocates the material with the given binding ID. + /// + /// Any resources that are no longer referenced are removed from the slab. + fn free(&mut self, material_binding_id: MaterialBindingId) { + self.slabs + .get_mut(material_binding_id.group.0 as usize) + .expect("Slab should exist") + .free(material_binding_id.slot, &self.bindless_descriptor); + } + + /// Returns the slab with the given bind group index. + /// + /// A [`MaterialBindGroupIndex`] can be fetched from a + /// [`MaterialBindingId`]. + fn get(&self, group: MaterialBindGroupIndex) -> Option<&MaterialBindlessSlab> { + self.slabs.get(group.0 as usize) + } + + /// Recreates any bind groups corresponding to slabs that have been modified + /// since last calling + /// [`MaterialBindGroupBindlessAllocator::prepare_bind_groups`]. + fn prepare_bind_groups( + &mut self, + render_device: &RenderDevice, + fallback_bindless_resources: &FallbackBindlessResources, + fallback_image: &FallbackImage, + ) { + for slab in &mut self.slabs { + slab.prepare( + render_device, + self.label, + &self.bind_group_layout, + fallback_bindless_resources, + &self.fallback_buffers, + fallback_image, + &self.bindless_descriptor, + self.slab_capacity, + ); + } + } + + /// Writes any buffers that we're managing to the GPU. + /// + /// Currently, this only consists of the bindless index tables. + fn write_buffers(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) { + for slab in &mut self.slabs { + slab.write_buffer(render_device, render_queue); + } + } +} + +impl MaterialBindlessSlab { + /// Attempts to allocate the given unprepared bind group in this slab. + /// + /// If the allocation succeeds, this method returns the slot that the + /// allocation was placed in. If the allocation fails because the slab was + /// full, this method returns the unprepared bind group back to the caller + /// so that it can try to allocate again. + fn try_allocate( + &mut self, + unprepared_bind_group: UnpreparedBindGroup, + slot_capacity: u32, + ) -> Result { + // Locate pre-existing resources, and determine how many free slots we need. + let Some(allocation_candidate) = self.check_allocation(&unprepared_bind_group) else { + return Err(unprepared_bind_group); + }; + + // Check to see if we have enough free space. + // + // As a special case, note that if *nothing* is allocated in this slab, + // then we always allow a material to be placed in it, regardless of the + // number of bindings the material has. This is so that, if the + // platform's maximum bindless count is set too low to hold even a + // single material, we can still place each material into a separate + // slab instead of failing outright. + if self.allocated_resource_count > 0 + && self.allocated_resource_count + allocation_candidate.needed_free_slots + > slot_capacity + { + trace!("Slab is full, can't allocate"); + return Err(unprepared_bind_group); + } + + // OK, we can allocate in this slab. Assign a slot ID. + let slot = self + .free_slots + .pop() + .unwrap_or(MaterialBindGroupSlot(self.live_allocation_count)); + + // Bump the live allocation count. + self.live_allocation_count += 1; + + // Insert the resources into the binding arrays. + let allocated_resource_slots = + self.insert_resources(unprepared_bind_group.bindings, allocation_candidate); + + // Serialize the allocated resource slots. + for bindless_index_table in &mut self.bindless_index_tables { + bindless_index_table.set(slot, &allocated_resource_slots); + } + + // Invalidate the cached bind group. + self.bind_group = None; + + Ok(slot) + } + + /// Gathers the information needed to determine whether the given unprepared + /// bind group can be allocated in this slab. + fn check_allocation( + &self, + unprepared_bind_group: &UnpreparedBindGroup, + ) -> Option { + let mut allocation_candidate = BindlessAllocationCandidate { + pre_existing_resources: HashMap::default(), + needed_free_slots: 0, + }; + + for &(bindless_index, ref owned_binding_resource) in unprepared_bind_group.bindings.iter() { + let bindless_index = BindlessIndex(bindless_index); + match *owned_binding_resource { + OwnedBindingResource::Buffer(ref buffer) => { + let Some(binding_array) = self.buffers.get(&bindless_index) else { + error!( + "Binding array wasn't present for buffer at index {:?}", + bindless_index + ); + return None; + }; + match binding_array.find(BindingResourceId::Buffer(buffer.id())) { + Some(slot) => { + allocation_candidate + .pre_existing_resources + .insert(bindless_index, slot); + } + None => allocation_candidate.needed_free_slots += 1, + } + } + + OwnedBindingResource::Data(_) => { + // The size of a data buffer is unlimited. + } + + OwnedBindingResource::TextureView(texture_view_dimension, ref texture_view) => { + let bindless_resource_type = BindlessResourceType::from(texture_view_dimension); + match self + .textures + .get(&bindless_resource_type) + .expect("Missing binding array for texture") + .find(BindingResourceId::TextureView( + texture_view_dimension, + texture_view.id(), + )) { + Some(slot) => { + allocation_candidate + .pre_existing_resources + .insert(bindless_index, slot); + } + None => { + allocation_candidate.needed_free_slots += 1; + } + } + } + + OwnedBindingResource::Sampler(sampler_binding_type, ref sampler) => { + let bindless_resource_type = BindlessResourceType::from(sampler_binding_type); + match self + .samplers + .get(&bindless_resource_type) + .expect("Missing binding array for sampler") + .find(BindingResourceId::Sampler(sampler.id())) + { + Some(slot) => { + allocation_candidate + .pre_existing_resources + .insert(bindless_index, slot); + } + None => { + allocation_candidate.needed_free_slots += 1; + } + } + } + } + } + + Some(allocation_candidate) + } + + /// Inserts the given [`BindingResources`] into this slab. + /// + /// Returns a table that maps the bindless index of each resource to its + /// slot in its binding array. + fn insert_resources( + &mut self, + mut binding_resources: BindingResources, + allocation_candidate: BindlessAllocationCandidate, + ) -> HashMap { + let mut allocated_resource_slots = HashMap::default(); + + for (bindless_index, owned_binding_resource) in binding_resources.drain(..) { + let bindless_index = BindlessIndex(bindless_index); + + let pre_existing_slot = allocation_candidate + .pre_existing_resources + .get(&bindless_index); + + // Otherwise, we need to insert it anew. + let binding_resource_id = BindingResourceId::from(&owned_binding_resource); + let increment_allocated_resource_count = match owned_binding_resource { + OwnedBindingResource::Buffer(buffer) => { + let slot = self + .buffers + .get_mut(&bindless_index) + .expect("Buffer binding array should exist") + .insert(binding_resource_id, buffer); + allocated_resource_slots.insert(bindless_index, slot); + + if let Some(pre_existing_slot) = pre_existing_slot { + assert_eq!(*pre_existing_slot, slot); + + false + } else { + true + } + } + OwnedBindingResource::Data(data) => { + if pre_existing_slot.is_some() { + panic!("Data buffers can't be deduplicated") + } + + let slot = self + .data_buffers + .get_mut(&bindless_index) + .expect("Data buffer binding array should exist") + .insert(&data); + allocated_resource_slots.insert(bindless_index, slot); + false + } + OwnedBindingResource::TextureView(texture_view_dimension, texture_view) => { + let bindless_resource_type = BindlessResourceType::from(texture_view_dimension); + let slot = self + .textures + .get_mut(&bindless_resource_type) + .expect("Texture array should exist") + .insert(binding_resource_id, texture_view); + allocated_resource_slots.insert(bindless_index, slot); + + if let Some(pre_existing_slot) = pre_existing_slot { + assert_eq!(*pre_existing_slot, slot); + + false + } else { + true + } + } + OwnedBindingResource::Sampler(sampler_binding_type, sampler) => { + let bindless_resource_type = BindlessResourceType::from(sampler_binding_type); + let slot = self + .samplers + .get_mut(&bindless_resource_type) + .expect("Sampler should exist") + .insert(binding_resource_id, sampler); + allocated_resource_slots.insert(bindless_index, slot); + + if let Some(pre_existing_slot) = pre_existing_slot { + assert_eq!(*pre_existing_slot, slot); + + false + } else { + true + } + } + }; + + // Bump the allocated resource count. + if increment_allocated_resource_count { + self.allocated_resource_count += 1; + } + } + + allocated_resource_slots + } + + /// Removes the material allocated in the given slot, with the given + /// descriptor, from this slab. + fn free(&mut self, slot: MaterialBindGroupSlot, bindless_descriptor: &BindlessDescriptor) { + // Loop through each binding. + for (bindless_index, bindless_resource_type) in + bindless_descriptor.resources.iter().enumerate() + { + let bindless_index = BindlessIndex::from(bindless_index as u32); + let Some(bindless_index_table) = self.get_bindless_index_table(bindless_index) else { + continue; + }; + let Some(bindless_binding) = bindless_index_table.get_binding(slot, bindless_index) + else { + continue; + }; + + // Free the binding. If the resource in question was anything other + // than a data buffer, then it has a reference count and + // consequently we need to decrement it. + let decrement_allocated_resource_count = match *bindless_resource_type { + BindlessResourceType::None => false, + BindlessResourceType::Buffer => self + .buffers + .get_mut(&bindless_index) + .expect("Buffer should exist with that bindless index") + .remove(bindless_binding), + BindlessResourceType::DataBuffer => { + self.data_buffers + .get_mut(&bindless_index) + .expect("Data buffer should exist with that bindless index") + .remove(bindless_binding); + false + } + BindlessResourceType::SamplerFiltering + | BindlessResourceType::SamplerNonFiltering + | BindlessResourceType::SamplerComparison => self + .samplers + .get_mut(bindless_resource_type) + .expect("Sampler array should exist") + .remove(bindless_binding), + BindlessResourceType::Texture1d + | BindlessResourceType::Texture2d + | BindlessResourceType::Texture2dArray + | BindlessResourceType::Texture3d + | BindlessResourceType::TextureCube + | BindlessResourceType::TextureCubeArray => self + .textures + .get_mut(bindless_resource_type) + .expect("Texture array should exist") + .remove(bindless_binding), + }; + + // If the slot is now free, decrement the allocated resource + // count. + if decrement_allocated_resource_count { + self.allocated_resource_count -= 1; + } + } + + // Invalidate the cached bind group. + self.bind_group = None; + + // Release the slot ID. + self.free_slots.push(slot); + self.live_allocation_count -= 1; + } + + /// Recreates the bind group and bindless index table buffer if necessary. + fn prepare( + &mut self, + render_device: &RenderDevice, + label: Option<&'static str>, + bind_group_layout: &BindGroupLayout, + fallback_bindless_resources: &FallbackBindlessResources, + fallback_buffers: &HashMap, + fallback_image: &FallbackImage, + bindless_descriptor: &BindlessDescriptor, + slab_capacity: u32, + ) { + // Create the bindless index table buffers if needed. + for bindless_index_table in &mut self.bindless_index_tables { + bindless_index_table.buffer.prepare(render_device); + } + + // Create any data buffers we were managing if necessary. + for data_buffer in self.data_buffers.values_mut() { + data_buffer.buffer.prepare(render_device); + } + + // Create the bind group if needed. + self.prepare_bind_group( + render_device, + label, + bind_group_layout, + fallback_bindless_resources, + fallback_buffers, + fallback_image, + bindless_descriptor, + slab_capacity, + ); + } + + /// Recreates the bind group if this slab has been changed since the last + /// time we created it. + fn prepare_bind_group( + &mut self, + render_device: &RenderDevice, + label: Option<&'static str>, + bind_group_layout: &BindGroupLayout, + fallback_bindless_resources: &FallbackBindlessResources, + fallback_buffers: &HashMap, + fallback_image: &FallbackImage, + bindless_descriptor: &BindlessDescriptor, + slab_capacity: u32, + ) { + // If the bind group is clean, then do nothing. + if self.bind_group.is_some() { + return; + } + + // Determine whether we need to pad out our binding arrays with dummy + // resources. + let required_binding_array_size = if render_device + .features() + .contains(WgpuFeatures::PARTIALLY_BOUND_BINDING_ARRAY) + { + None + } else { + Some(slab_capacity) + }; + + let binding_resource_arrays = self.create_binding_resource_arrays( + fallback_bindless_resources, + fallback_buffers, + fallback_image, + bindless_descriptor, + required_binding_array_size, + ); + + let mut bind_group_entries: Vec<_> = self + .bindless_index_tables + .iter() + .map(|bindless_index_table| bindless_index_table.bind_group_entry()) + .collect(); + + for &(&binding, ref binding_resource_array) in binding_resource_arrays.iter() { + bind_group_entries.push(BindGroupEntry { + binding, + resource: match *binding_resource_array { + BindingResourceArray::Buffers(ref buffer_bindings) => { + BindingResource::BufferArray(&buffer_bindings[..]) + } + BindingResourceArray::TextureViews(ref texture_views) => { + BindingResource::TextureViewArray(&texture_views[..]) + } + BindingResourceArray::Samplers(ref samplers) => { + BindingResource::SamplerArray(&samplers[..]) + } + }, + }); + } + + // Create bind group entries for any data buffers we're managing. + for data_buffer in self.data_buffers.values() { + bind_group_entries.push(BindGroupEntry { + binding: *data_buffer.binding_number, + resource: data_buffer + .buffer + .buffer() + .expect("Backing data buffer must have been uploaded by now") + .as_entire_binding(), + }); + } + + self.bind_group = + Some(render_device.create_bind_group(label, bind_group_layout, &bind_group_entries)); + } + + /// Writes any buffers that we're managing to the GPU. + /// + /// Currently, this consists of the bindless index table plus any data + /// buffers we're managing. + fn write_buffer(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) { + for bindless_index_table in &mut self.bindless_index_tables { + bindless_index_table + .buffer + .write(render_device, render_queue); + } + + for data_buffer in self.data_buffers.values_mut() { + data_buffer.buffer.write(render_device, render_queue); + } + } + + /// Converts our binding arrays into binding resource arrays suitable for + /// passing to `wgpu`. + fn create_binding_resource_arrays<'a>( + &'a self, + fallback_bindless_resources: &'a FallbackBindlessResources, + fallback_buffers: &'a HashMap, + fallback_image: &'a FallbackImage, + bindless_descriptor: &'a BindlessDescriptor, + required_binding_array_size: Option, + ) -> Vec<(&'a u32, BindingResourceArray<'a>)> { + let mut binding_resource_arrays = vec![]; + + // Build sampler bindings. + self.create_sampler_binding_resource_arrays( + &mut binding_resource_arrays, + fallback_bindless_resources, + required_binding_array_size, + ); + + // Build texture bindings. + self.create_texture_binding_resource_arrays( + &mut binding_resource_arrays, + fallback_image, + required_binding_array_size, + ); + + // Build buffer bindings. + self.create_buffer_binding_resource_arrays( + &mut binding_resource_arrays, + fallback_buffers, + bindless_descriptor, + required_binding_array_size, + ); + + binding_resource_arrays + } + + /// Accumulates sampler binding arrays into binding resource arrays suitable + /// for passing to `wgpu`. + fn create_sampler_binding_resource_arrays<'a, 'b>( + &'a self, + binding_resource_arrays: &'b mut Vec<(&'a u32, BindingResourceArray<'a>)>, + fallback_bindless_resources: &'a FallbackBindlessResources, + required_binding_array_size: Option, + ) { + // We have one binding resource array per sampler type. + for (bindless_resource_type, fallback_sampler) in [ + ( + BindlessResourceType::SamplerFiltering, + &fallback_bindless_resources.filtering_sampler, + ), + ( + BindlessResourceType::SamplerNonFiltering, + &fallback_bindless_resources.non_filtering_sampler, + ), + ( + BindlessResourceType::SamplerComparison, + &fallback_bindless_resources.comparison_sampler, + ), + ] { + let mut sampler_bindings = vec![]; + + match self.samplers.get(&bindless_resource_type) { + Some(sampler_bindless_binding_array) => { + for maybe_bindless_binding in sampler_bindless_binding_array.bindings.iter() { + match *maybe_bindless_binding { + Some(ref bindless_binding) => { + sampler_bindings.push(&*bindless_binding.resource); + } + None => sampler_bindings.push(&**fallback_sampler), + } + } + } + + None => { + // Fill with a single fallback sampler. + sampler_bindings.push(&**fallback_sampler); + } + } + + if let Some(required_binding_array_size) = required_binding_array_size { + sampler_bindings.extend(iter::repeat_n( + &**fallback_sampler, + required_binding_array_size as usize - sampler_bindings.len(), + )); + } + + let binding_number = bindless_resource_type + .binding_number() + .expect("Sampler bindless resource type must have a binding number"); + + binding_resource_arrays.push(( + &**binding_number, + BindingResourceArray::Samplers(sampler_bindings), + )); + } + } + + /// Accumulates texture binding arrays into binding resource arrays suitable + /// for passing to `wgpu`. + fn create_texture_binding_resource_arrays<'a, 'b>( + &'a self, + binding_resource_arrays: &'b mut Vec<(&'a u32, BindingResourceArray<'a>)>, + fallback_image: &'a FallbackImage, + required_binding_array_size: Option, + ) { + for (bindless_resource_type, fallback_image) in [ + (BindlessResourceType::Texture1d, &fallback_image.d1), + (BindlessResourceType::Texture2d, &fallback_image.d2), + ( + BindlessResourceType::Texture2dArray, + &fallback_image.d2_array, + ), + (BindlessResourceType::Texture3d, &fallback_image.d3), + (BindlessResourceType::TextureCube, &fallback_image.cube), + ( + BindlessResourceType::TextureCubeArray, + &fallback_image.cube_array, + ), + ] { + let mut texture_bindings = vec![]; + + let binding_number = bindless_resource_type + .binding_number() + .expect("Texture bindless resource type must have a binding number"); + + match self.textures.get(&bindless_resource_type) { + Some(texture_bindless_binding_array) => { + for maybe_bindless_binding in texture_bindless_binding_array.bindings.iter() { + match *maybe_bindless_binding { + Some(ref bindless_binding) => { + texture_bindings.push(&*bindless_binding.resource); + } + None => texture_bindings.push(&*fallback_image.texture_view), + } + } + } + + None => { + // Fill with a single fallback image. + texture_bindings.push(&*fallback_image.texture_view); + } + } + + if let Some(required_binding_array_size) = required_binding_array_size { + texture_bindings.extend(iter::repeat_n( + &*fallback_image.texture_view, + required_binding_array_size as usize - texture_bindings.len(), + )); + } + + binding_resource_arrays.push(( + binding_number, + BindingResourceArray::TextureViews(texture_bindings), + )); + } + } + + /// Accumulates buffer binding arrays into binding resource arrays suitable + /// for `wgpu`. + fn create_buffer_binding_resource_arrays<'a, 'b>( + &'a self, + binding_resource_arrays: &'b mut Vec<(&'a u32, BindingResourceArray<'a>)>, + fallback_buffers: &'a HashMap, + bindless_descriptor: &'a BindlessDescriptor, + required_binding_array_size: Option, + ) { + for bindless_buffer_descriptor in bindless_descriptor.buffers.iter() { + let Some(buffer_bindless_binding_array) = + self.buffers.get(&bindless_buffer_descriptor.bindless_index) + else { + // This is OK, because index buffers are present in + // `BindlessDescriptor::buffers` but not in + // `BindlessDescriptor::resources`. + continue; + }; + + let fallback_buffer = fallback_buffers + .get(&bindless_buffer_descriptor.bindless_index) + .expect("Fallback buffer should exist"); + + let mut buffer_bindings: Vec<_> = buffer_bindless_binding_array + .bindings + .iter() + .map(|maybe_bindless_binding| { + let buffer = match *maybe_bindless_binding { + None => fallback_buffer, + Some(ref bindless_binding) => &bindless_binding.resource, + }; + BufferBinding { + buffer, + offset: 0, + size: None, + } + }) + .collect(); + + if let Some(required_binding_array_size) = required_binding_array_size { + buffer_bindings.extend(iter::repeat_n( + BufferBinding { + buffer: fallback_buffer, + offset: 0, + size: None, + }, + required_binding_array_size as usize - buffer_bindings.len(), + )); + } + + binding_resource_arrays.push(( + &*buffer_bindless_binding_array.binding_number, + BindingResourceArray::Buffers(buffer_bindings), + )); + } + } + + /// Returns the [`BindGroup`] corresponding to this slab, if it's been + /// prepared. + fn bind_group(&self) -> Option<&BindGroup> { + self.bind_group.as_ref() + } + + /// Returns the bindless index table containing the given bindless index. + fn get_bindless_index_table( + &self, + bindless_index: BindlessIndex, + ) -> Option<&MaterialBindlessIndexTable> { + let table_index = self + .bindless_index_tables + .binary_search_by(|bindless_index_table| { + if bindless_index < bindless_index_table.index_range.start { + Ordering::Less + } else if bindless_index >= bindless_index_table.index_range.end { + Ordering::Greater + } else { + Ordering::Equal + } + }) + .ok()?; + self.bindless_index_tables.get(table_index) + } +} + +impl MaterialBindlessBindingArray +where + R: GetBindingResourceId, +{ + /// Creates a new [`MaterialBindlessBindingArray`] with the given binding + /// number, managing resources of the given type. + fn new( + binding_number: BindingNumber, + resource_type: BindlessResourceType, + ) -> MaterialBindlessBindingArray { + MaterialBindlessBindingArray { + binding_number, + bindings: vec![], + resource_type, + resource_to_slot: HashMap::default(), + free_slots: vec![], + len: 0, + } + } + + /// Returns the slot corresponding to the given resource, if that resource + /// is located in this binding array. + /// + /// If the resource isn't in this binding array, this method returns `None`. + fn find(&self, binding_resource_id: BindingResourceId) -> Option { + self.resource_to_slot.get(&binding_resource_id).copied() + } + + /// Inserts a bindless resource into a binding array and returns the index + /// of the slot it was inserted into. + fn insert(&mut self, binding_resource_id: BindingResourceId, resource: R) -> u32 { + match self.resource_to_slot.entry(binding_resource_id) { + bevy_platform::collections::hash_map::Entry::Occupied(o) => { + let slot = *o.get(); + + self.bindings[slot as usize] + .as_mut() + .expect("A slot in the resource_to_slot map should have a value") + .ref_count += 1; + + slot + } + bevy_platform::collections::hash_map::Entry::Vacant(v) => { + let slot = self.free_slots.pop().unwrap_or(self.len); + v.insert(slot); + + if self.bindings.len() < slot as usize + 1 { + self.bindings.resize_with(slot as usize + 1, || None); + } + self.bindings[slot as usize] = Some(MaterialBindlessBinding::new(resource)); + + self.len += 1; + slot + } + } + } + + /// Removes a reference to an object from the slot. + /// + /// If the reference count dropped to 0 and the object was freed, this + /// method returns true. If the object was still referenced after removing + /// it, returns false. + fn remove(&mut self, slot: u32) -> bool { + let maybe_binding = &mut self.bindings[slot as usize]; + let binding = maybe_binding + .as_mut() + .expect("Attempted to free an already-freed binding"); + + binding.ref_count -= 1; + if binding.ref_count != 0 { + return false; + } + + let binding_resource_id = binding.resource.binding_resource_id(self.resource_type); + self.resource_to_slot.remove(&binding_resource_id); + + *maybe_binding = None; + self.free_slots.push(slot); + self.len -= 1; + true + } +} + +impl MaterialBindlessBinding +where + R: GetBindingResourceId, +{ + /// Creates a new [`MaterialBindlessBinding`] for a freshly-added resource. + /// + /// The reference count is initialized to 1. + fn new(resource: R) -> MaterialBindlessBinding { + MaterialBindlessBinding { + resource, + ref_count: 1, + } + } +} + +/// Returns true if the material will *actually* use bindless resources or false +/// if it won't. +/// +/// This takes the platform support (or lack thereof) for bindless resources +/// into account. +pub fn material_uses_bindless_resources(render_device: &RenderDevice) -> bool +where + M: Material, +{ + M::bindless_slot_count().is_some_and(|bindless_slot_count| { + M::bindless_supported(render_device) && bindless_slot_count.resolve() > 1 + }) +} + +impl MaterialBindlessSlab { + /// Creates a new [`MaterialBindlessSlab`] for a material with the given + /// bindless descriptor. + /// + /// We use this when no existing slab could hold a material to be allocated. + fn new(bindless_descriptor: &BindlessDescriptor) -> MaterialBindlessSlab { + let mut buffers = HashMap::default(); + let mut samplers = HashMap::default(); + let mut textures = HashMap::default(); + let mut data_buffers = HashMap::default(); + + for (bindless_index, bindless_resource_type) in + bindless_descriptor.resources.iter().enumerate() + { + let bindless_index = BindlessIndex(bindless_index as u32); + match *bindless_resource_type { + BindlessResourceType::None => {} + BindlessResourceType::Buffer => { + let binding_number = bindless_descriptor + .buffers + .iter() + .find(|bindless_buffer_descriptor| { + bindless_buffer_descriptor.bindless_index == bindless_index + }) + .expect( + "Bindless buffer descriptor matching that bindless index should be \ + present", + ) + .binding_number; + buffers.insert( + bindless_index, + MaterialBindlessBindingArray::new(binding_number, *bindless_resource_type), + ); + } + BindlessResourceType::DataBuffer => { + // Copy the data in. + let buffer_descriptor = bindless_descriptor + .buffers + .iter() + .find(|bindless_buffer_descriptor| { + bindless_buffer_descriptor.bindless_index == bindless_index + }) + .expect( + "Bindless buffer descriptor matching that bindless index should be \ + present", + ); + data_buffers.insert( + bindless_index, + MaterialDataBuffer::new( + buffer_descriptor.binding_number, + buffer_descriptor + .size + .expect("Data buffers should have a size") + as u32, + ), + ); + } + BindlessResourceType::SamplerFiltering + | BindlessResourceType::SamplerNonFiltering + | BindlessResourceType::SamplerComparison => { + samplers.insert( + *bindless_resource_type, + MaterialBindlessBindingArray::new( + *bindless_resource_type.binding_number().unwrap(), + *bindless_resource_type, + ), + ); + } + BindlessResourceType::Texture1d + | BindlessResourceType::Texture2d + | BindlessResourceType::Texture2dArray + | BindlessResourceType::Texture3d + | BindlessResourceType::TextureCube + | BindlessResourceType::TextureCubeArray => { + textures.insert( + *bindless_resource_type, + MaterialBindlessBindingArray::new( + *bindless_resource_type.binding_number().unwrap(), + *bindless_resource_type, + ), + ); + } + } + } + + let bindless_index_tables = bindless_descriptor + .index_tables + .iter() + .map(MaterialBindlessIndexTable::new) + .collect(); + + MaterialBindlessSlab { + bind_group: None, + bindless_index_tables, + samplers, + textures, + buffers, + data_buffers, + free_slots: vec![], + live_allocation_count: 0, + allocated_resource_count: 0, + } + } +} + +pub fn init_fallback_bindless_resources(mut commands: Commands, render_device: Res) { + commands.insert_resource(FallbackBindlessResources { + filtering_sampler: render_device.create_sampler(&SamplerDescriptor { + label: Some("fallback filtering sampler"), + ..default() + }), + non_filtering_sampler: render_device.create_sampler(&SamplerDescriptor { + label: Some("fallback non-filtering sampler"), + mag_filter: FilterMode::Nearest, + min_filter: FilterMode::Nearest, + mipmap_filter: FilterMode::Nearest, + ..default() + }), + comparison_sampler: render_device.create_sampler(&SamplerDescriptor { + label: Some("fallback comparison sampler"), + compare: Some(CompareFunction::Always), + ..default() + }), + }); +} + +impl MaterialBindGroupNonBindlessAllocator { + /// Creates a new [`MaterialBindGroupNonBindlessAllocator`] managing the + /// bind groups for a single non-bindless material. + fn new(label: Option<&'static str>) -> MaterialBindGroupNonBindlessAllocator { + MaterialBindGroupNonBindlessAllocator { + label, + bind_groups: vec![], + to_prepare: HashSet::default(), + free_indices: vec![], + } + } + + /// Inserts a bind group, either unprepared or prepared, into this allocator + /// and returns a [`MaterialBindingId`]. + /// + /// The returned [`MaterialBindingId`] can later be used to fetch the bind + /// group. + fn allocate(&mut self, bind_group: MaterialNonBindlessAllocatedBindGroup) -> MaterialBindingId { + let group_id = self + .free_indices + .pop() + .unwrap_or(MaterialBindGroupIndex(self.bind_groups.len() as u32)); + if self.bind_groups.len() < *group_id as usize + 1 { + self.bind_groups + .resize_with(*group_id as usize + 1, || None); + } + + if matches!( + bind_group, + MaterialNonBindlessAllocatedBindGroup::Unprepared { .. } + ) { + self.to_prepare.insert(group_id); + } + + self.bind_groups[*group_id as usize] = Some(bind_group); + + MaterialBindingId { + group: group_id, + slot: default(), + } + } + + /// Inserts an unprepared bind group into this allocator and returns a + /// [`MaterialBindingId`]. + fn allocate_unprepared( + &mut self, + unprepared_bind_group: UnpreparedBindGroup, + bind_group_layout: BindGroupLayout, + ) -> MaterialBindingId { + self.allocate(MaterialNonBindlessAllocatedBindGroup::Unprepared { + bind_group: unprepared_bind_group, + layout: bind_group_layout, + }) + } + + /// Inserts an prepared bind group into this allocator and returns a + /// [`MaterialBindingId`]. + fn allocate_prepared(&mut self, prepared_bind_group: PreparedBindGroup) -> MaterialBindingId { + self.allocate(MaterialNonBindlessAllocatedBindGroup::Prepared { + bind_group: prepared_bind_group, + uniform_buffers: vec![], + }) + } + + /// Deallocates the bind group with the given binding ID. + fn free(&mut self, binding_id: MaterialBindingId) { + debug_assert_eq!(binding_id.slot, MaterialBindGroupSlot(0)); + debug_assert!(self.bind_groups[*binding_id.group as usize].is_some()); + self.bind_groups[*binding_id.group as usize] = None; + self.to_prepare.remove(&binding_id.group); + self.free_indices.push(binding_id.group); + } + + /// Returns a wrapper around the bind group with the given index. + fn get(&self, group: MaterialBindGroupIndex) -> Option> { + self.bind_groups[group.0 as usize] + .as_ref() + .map(|bind_group| match bind_group { + MaterialNonBindlessAllocatedBindGroup::Prepared { bind_group, .. } => { + MaterialNonBindlessSlab::Prepared(bind_group) + } + MaterialNonBindlessAllocatedBindGroup::Unprepared { .. } => { + MaterialNonBindlessSlab::Unprepared + } + }) + } + + /// Prepares any as-yet unprepared bind groups that this allocator is + /// managing. + /// + /// Unprepared bind groups can be added to this allocator with + /// [`Self::allocate_unprepared`]. Such bind groups will defer being + /// prepared until the next time this method is called. + fn prepare_bind_groups(&mut self, render_device: &RenderDevice) { + for bind_group_index in mem::take(&mut self.to_prepare) { + let Some(MaterialNonBindlessAllocatedBindGroup::Unprepared { + bind_group: unprepared_bind_group, + layout: bind_group_layout, + }) = mem::take(&mut self.bind_groups[*bind_group_index as usize]) + else { + panic!("Allocation didn't exist or was already prepared"); + }; + + // Pack any `Data` into uniform buffers. + let mut uniform_buffers = vec![]; + for (index, binding) in unprepared_bind_group.bindings.iter() { + let OwnedBindingResource::Data(ref owned_data) = *binding else { + continue; + }; + let label = format!("material uniform data {}", *index); + let uniform_buffer = render_device.create_buffer_with_data(&BufferInitDescriptor { + label: Some(&label), + contents: &owned_data.0, + usage: BufferUsages::COPY_DST | BufferUsages::UNIFORM, + }); + uniform_buffers.push(uniform_buffer); + } + + // Create bind group entries. + let mut bind_group_entries = vec![]; + let mut uniform_buffers_iter = uniform_buffers.iter(); + for (index, binding) in unprepared_bind_group.bindings.iter() { + match *binding { + OwnedBindingResource::Data(_) => { + bind_group_entries.push(BindGroupEntry { + binding: *index, + resource: uniform_buffers_iter + .next() + .expect("We should have created uniform buffers for each `Data`") + .as_entire_binding(), + }); + } + _ => bind_group_entries.push(BindGroupEntry { + binding: *index, + resource: binding.get_binding(), + }), + } + } + + // Create the bind group. + let bind_group = render_device.create_bind_group( + self.label, + &bind_group_layout, + &bind_group_entries, + ); + + self.bind_groups[*bind_group_index as usize] = + Some(MaterialNonBindlessAllocatedBindGroup::Prepared { + bind_group: PreparedBindGroup { + bindings: unprepared_bind_group.bindings, + bind_group, + }, + uniform_buffers, + }); + } + } +} + +impl<'a> MaterialSlab<'a> { + /// Returns the [`BindGroup`] corresponding to this slab, if it's been + /// prepared. + /// + /// You can prepare bind groups by calling + /// [`MaterialBindGroupAllocator::prepare_bind_groups`]. If the bind group + /// isn't ready, this method returns `None`. + pub fn bind_group(&self) -> Option<&'a BindGroup> { + match self.0 { + MaterialSlabImpl::Bindless(material_bindless_slab) => { + material_bindless_slab.bind_group() + } + MaterialSlabImpl::NonBindless(MaterialNonBindlessSlab::Prepared( + prepared_bind_group, + )) => Some(&prepared_bind_group.bind_group), + MaterialSlabImpl::NonBindless(MaterialNonBindlessSlab::Unprepared) => None, + } + } +} + +impl MaterialDataBuffer { + /// Creates a new [`MaterialDataBuffer`] managing a buffer of elements of + /// size `aligned_element_size` that will be bound to the given binding + /// number. + fn new(binding_number: BindingNumber, aligned_element_size: u32) -> MaterialDataBuffer { + MaterialDataBuffer { + binding_number, + buffer: RetainedRawBufferVec::new(BufferUsages::STORAGE), + aligned_element_size, + free_slots: vec![], + len: 0, + } + } + + /// Allocates a slot for a new piece of data, copies the data into that + /// slot, and returns the slot ID. + /// + /// The size of the piece of data supplied to this method must equal the + /// [`Self::aligned_element_size`] provided to [`MaterialDataBuffer::new`]. + fn insert(&mut self, data: &[u8]) -> u32 { + // Make sure the data is of the right length. + debug_assert_eq!(data.len(), self.aligned_element_size as usize); + + // Grab a slot. + let slot = self.free_slots.pop().unwrap_or(self.len); + + // Calculate the range we're going to copy to. + let start = slot as usize * self.aligned_element_size as usize; + let end = (slot as usize + 1) * self.aligned_element_size as usize; + + // Resize the buffer if necessary. + if self.buffer.len() < end { + self.buffer.reserve_internal(end); + } + while self.buffer.values().len() < end { + self.buffer.push(0); + } + + // Copy in the data. + self.buffer.values_mut()[start..end].copy_from_slice(data); + + // Mark the buffer dirty, and finish up. + self.len += 1; + self.buffer.dirty = BufferDirtyState::NeedsReserve; + slot + } + + /// Marks the given slot as free. + fn remove(&mut self, slot: u32) { + self.free_slots.push(slot); + self.len -= 1; + } +} diff --git a/crates/libmarathon/src/render/pbr/mesh_material.rs b/crates/libmarathon/src/render/pbr/mesh_material.rs new file mode 100644 index 0000000..443a3ba --- /dev/null +++ b/crates/libmarathon/src/render/pbr/mesh_material.rs @@ -0,0 +1,75 @@ +use crate::render::pbr::Material; +use bevy_asset::{AsAssetId, AssetId, Handle}; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::{component::Component, reflect::ReflectComponent}; +use bevy_reflect::{std_traits::ReflectDefault, Reflect}; +use derive_more::derive::From; + +/// A [material](Material) used for rendering a [`Mesh3d`]. +/// +/// See [`Material`] for general information about 3D materials and how to implement your own materials. +/// +/// [`Mesh3d`]: bevy_mesh::Mesh3d +/// +/// # Example +/// +/// ``` +/// # use bevy_pbr::{Material, MeshMaterial3d, StandardMaterial}; +/// # use bevy_ecs::prelude::*; +/// # use bevy_mesh::{Mesh, Mesh3d}; +/// # use bevy_color::palettes::basic::RED; +/// # use bevy_asset::Assets; +/// # use bevy_math::primitives::Capsule3d; +/// # +/// // Spawn an entity with a mesh using `StandardMaterial`. +/// fn setup( +/// mut commands: Commands, +/// mut meshes: ResMut>, +/// mut materials: ResMut>, +/// ) { +/// commands.spawn(( +/// Mesh3d(meshes.add(Capsule3d::default())), +/// MeshMaterial3d(materials.add(StandardMaterial { +/// base_color: RED.into(), +/// ..Default::default() +/// })), +/// )); +/// } +/// ``` +#[derive(Component, Clone, Debug, Deref, DerefMut, Reflect, From)] +#[reflect(Component, Default, Clone, PartialEq)] +pub struct MeshMaterial3d(pub Handle); + +impl Default for MeshMaterial3d { + fn default() -> Self { + Self(Handle::default()) + } +} + +impl PartialEq for MeshMaterial3d { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +impl Eq for MeshMaterial3d {} + +impl From> for AssetId { + fn from(material: MeshMaterial3d) -> Self { + material.id() + } +} + +impl From<&MeshMaterial3d> for AssetId { + fn from(material: &MeshMaterial3d) -> Self { + material.id() + } +} + +impl AsAssetId for MeshMaterial3d { + type Asset = M; + + fn as_asset_id(&self) -> AssetId { + self.id() + } +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/asset.rs b/crates/libmarathon/src/render/pbr/meshlet/asset.rs new file mode 100644 index 0000000..6a84dcb --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/asset.rs @@ -0,0 +1,319 @@ +use std::sync::Arc; +use bevy_asset::{ + io::{Reader, Writer}, + saver::{AssetSaver, SavedAsset}, + Asset, AssetLoader, AsyncReadExt, AsyncWriteExt, LoadContext, +}; +use bevy_math::{Vec2, Vec3}; +use bevy_reflect::TypePath; +use crate::render::render_resource::ShaderType; +use bevy_tasks::block_on; +use bytemuck::{Pod, Zeroable}; +use lz4_flex::frame::{FrameDecoder, FrameEncoder}; +use std::io::{Read, Write}; +use thiserror::Error; + +/// Unique identifier for the [`MeshletMesh`] asset format. +const MESHLET_MESH_ASSET_MAGIC: u64 = 1717551717668; + +/// The current version of the [`MeshletMesh`] asset format. +pub const MESHLET_MESH_ASSET_VERSION: u64 = 2; + +/// A mesh that has been pre-processed into multiple small clusters of triangles called meshlets. +/// +/// A [`bevy_mesh::Mesh`] can be converted to a [`MeshletMesh`] using `MeshletMesh::from_mesh` when the `meshlet_processor` cargo feature is enabled. +/// The conversion step is very slow, and is meant to be ran once ahead of time, and not during runtime. This type of mesh is not suitable for +/// dynamically generated geometry. +/// +/// There are restrictions on the [`crate::Material`] functionality that can be used with this type of mesh. +/// * Materials have no control over the vertex shader or vertex attributes. +/// * Materials must be opaque. Transparent, alpha masked, and transmissive materials are not supported. +/// * Do not use normal maps baked from higher-poly geometry. Use the high-poly geometry directly and skip the normal map. +/// * If additional detail is needed, a smaller tiling normal map not baked from a mesh is ok. +/// * Material shaders must not use builtin functions that automatically calculate derivatives . +/// * Performing manual arithmetic on texture coordinates (UVs) is forbidden. Use the chain-rule version of arithmetic functions instead (TODO: not yet implemented). +/// * Limited control over [`bevy_render::render_resource::RenderPipelineDescriptor`] attributes. +/// * Materials must use the [`crate::Material::meshlet_mesh_fragment_shader`] method (and similar variants for prepass/deferred shaders) +/// which requires certain shader patterns that differ from the regular material shaders. +/// +/// See also [`super::MeshletMesh3d`] and [`super::MeshletPlugin`]. +#[derive(Asset, TypePath, Clone)] +pub struct MeshletMesh { + /// Quantized and bitstream-packed vertex positions for meshlet vertices. + pub(crate) vertex_positions: Arc<[u32]>, + /// Octahedral-encoded and 2x16snorm packed normals for meshlet vertices. + pub(crate) vertex_normals: Arc<[u32]>, + /// Uncompressed vertex texture coordinates for meshlet vertices. + pub(crate) vertex_uvs: Arc<[Vec2]>, + /// Triangle indices for meshlets. + pub(crate) indices: Arc<[u8]>, + /// The BVH8 used for culling and LOD selection of the meshlets. The root is at index 0. + pub(crate) bvh: Arc<[BvhNode]>, + /// The list of meshlets making up this mesh. + pub(crate) meshlets: Arc<[Meshlet]>, + /// Spherical bounding volumes. + pub(crate) meshlet_cull_data: Arc<[MeshletCullData]>, + /// The tight AABB of the meshlet mesh, used for frustum and occlusion culling at the instance + /// level. + pub(crate) aabb: MeshletAabb, + /// The depth of the culling BVH, used to determine the number of dispatches at runtime. + pub(crate) bvh_depth: u32, +} + +/// A single BVH8 node in the BVH used for culling and LOD selection of a [`MeshletMesh`]. +#[derive(Copy, Clone, Default, Pod, Zeroable)] +#[repr(C)] +pub struct BvhNode { + /// The tight AABBs of this node's children, used for frustum and occlusion during BVH + /// traversal. + pub aabbs: [MeshletAabbErrorOffset; 8], + /// The LOD bounding spheres of this node's children, used for LOD selection during BVH + /// traversal. + pub lod_bounds: [MeshletBoundingSphere; 8], + /// If `u8::MAX`, it indicates that the child of each children is a BVH node, otherwise it is the number of meshlets in the group. + pub child_counts: [u8; 8], + pub _padding: [u32; 2], +} + +/// A single meshlet within a [`MeshletMesh`]. +#[derive(Copy, Clone, Pod, Zeroable)] +#[repr(C)] +pub struct Meshlet { + /// The bit offset within the parent mesh's [`MeshletMesh::vertex_positions`] buffer where the vertex positions for this meshlet begin. + pub start_vertex_position_bit: u32, + /// The offset within the parent mesh's [`MeshletMesh::vertex_normals`] and [`MeshletMesh::vertex_uvs`] buffers + /// where non-position vertex attributes for this meshlet begin. + pub start_vertex_attribute_id: u32, + /// The offset within the parent mesh's [`MeshletMesh::indices`] buffer where the indices for this meshlet begin. + pub start_index_id: u32, + /// The amount of vertices in this meshlet. + pub vertex_count: u8, + /// The amount of triangles in this meshlet. + pub triangle_count: u8, + /// Unused. + pub padding: u16, + /// Number of bits used to store the X channel of vertex positions within this meshlet. + pub bits_per_vertex_position_channel_x: u8, + /// Number of bits used to store the Y channel of vertex positions within this meshlet. + pub bits_per_vertex_position_channel_y: u8, + /// Number of bits used to store the Z channel of vertex positions within this meshlet. + pub bits_per_vertex_position_channel_z: u8, + /// Power of 2 factor used to quantize vertex positions within this meshlet. + pub vertex_position_quantization_factor: u8, + /// Minimum quantized X channel value of vertex positions within this meshlet. + pub min_vertex_position_channel_x: f32, + /// Minimum quantized Y channel value of vertex positions within this meshlet. + pub min_vertex_position_channel_y: f32, + /// Minimum quantized Z channel value of vertex positions within this meshlet. + pub min_vertex_position_channel_z: f32, +} + +/// Bounding spheres used for culling and choosing level of detail for a [`Meshlet`]. +#[derive(Copy, Clone, Pod, Zeroable)] +#[repr(C)] +pub struct MeshletCullData { + /// Tight bounding box, used for frustum and occlusion culling for this meshlet. + pub aabb: MeshletAabbErrorOffset, + /// Bounding sphere used for determining if this meshlet's group is at the correct level of detail for a given view. + pub lod_group_sphere: MeshletBoundingSphere, +} + +/// An axis-aligned bounding box used for a [`Meshlet`]. +#[derive(Copy, Clone, Default, Pod, Zeroable, ShaderType)] +#[repr(C)] +pub struct MeshletAabb { + pub center: Vec3, + pub half_extent: Vec3, +} + +// An axis-aligned bounding box used for a [`Meshlet`]. +#[derive(Copy, Clone, Default, Pod, Zeroable, ShaderType)] +#[repr(C)] +pub struct MeshletAabbErrorOffset { + pub center: Vec3, + pub error: f32, + pub half_extent: Vec3, + pub child_offset: u32, +} + +/// A spherical bounding volume used for a [`Meshlet`]. +#[derive(Copy, Clone, Default, Pod, Zeroable)] +#[repr(C)] +pub struct MeshletBoundingSphere { + pub center: Vec3, + pub radius: f32, +} + +/// An [`AssetSaver`] for `.meshlet_mesh` [`MeshletMesh`] assets. +pub struct MeshletMeshSaver; + +impl AssetSaver for MeshletMeshSaver { + type Asset = MeshletMesh; + type Settings = (); + type OutputLoader = MeshletMeshLoader; + type Error = MeshletMeshSaveOrLoadError; + + async fn save( + &self, + writer: &mut Writer, + asset: SavedAsset<'_, MeshletMesh>, + _settings: &(), + ) -> Result<(), MeshletMeshSaveOrLoadError> { + // Write asset magic number + writer + .write_all(&MESHLET_MESH_ASSET_MAGIC.to_le_bytes()) + .await?; + + // Write asset version + writer + .write_all(&MESHLET_MESH_ASSET_VERSION.to_le_bytes()) + .await?; + + writer.write_all(bytemuck::bytes_of(&asset.aabb)).await?; + writer + .write_all(bytemuck::bytes_of(&asset.bvh_depth)) + .await?; + + // Compress and write asset data + let mut writer = FrameEncoder::new(AsyncWriteSyncAdapter(writer)); + write_slice(&asset.vertex_positions, &mut writer)?; + write_slice(&asset.vertex_normals, &mut writer)?; + write_slice(&asset.vertex_uvs, &mut writer)?; + write_slice(&asset.indices, &mut writer)?; + write_slice(&asset.bvh, &mut writer)?; + write_slice(&asset.meshlets, &mut writer)?; + write_slice(&asset.meshlet_cull_data, &mut writer)?; + // BUG: Flushing helps with an async_fs bug, but it still fails sometimes. https://github.com/smol-rs/async-fs/issues/45 + // ERROR bevy_asset::server: Failed to load asset with asset loader MeshletMeshLoader: failed to fill whole buffer + writer.flush()?; + writer.finish()?; + + Ok(()) + } +} + +/// An [`AssetLoader`] for `.meshlet_mesh` [`MeshletMesh`] assets. +pub struct MeshletMeshLoader; + +impl AssetLoader for MeshletMeshLoader { + type Asset = MeshletMesh; + type Settings = (); + type Error = MeshletMeshSaveOrLoadError; + + async fn load( + &self, + reader: &mut dyn Reader, + _settings: &(), + _load_context: &mut LoadContext<'_>, + ) -> Result { + // Load and check magic number + let magic = async_read_u64(reader).await?; + if magic != MESHLET_MESH_ASSET_MAGIC { + return Err(MeshletMeshSaveOrLoadError::WrongFileType); + } + + // Load and check asset version + let version = async_read_u64(reader).await?; + if version != MESHLET_MESH_ASSET_VERSION { + return Err(MeshletMeshSaveOrLoadError::WrongVersion { found: version }); + } + + let mut bytes = [0u8; size_of::()]; + reader.read_exact(&mut bytes).await?; + let aabb = bytemuck::cast(bytes); + let mut bytes = [0u8; size_of::()]; + reader.read_exact(&mut bytes).await?; + let bvh_depth = u32::from_le_bytes(bytes); + + // Load and decompress asset data + let reader = &mut FrameDecoder::new(AsyncReadSyncAdapter(reader)); + let vertex_positions = read_slice(reader)?; + let vertex_normals = read_slice(reader)?; + let vertex_uvs = read_slice(reader)?; + let indices = read_slice(reader)?; + let bvh = read_slice(reader)?; + let meshlets = read_slice(reader)?; + let meshlet_cull_data = read_slice(reader)?; + + Ok(MeshletMesh { + vertex_positions, + vertex_normals, + vertex_uvs, + indices, + bvh, + meshlets, + meshlet_cull_data, + aabb, + bvh_depth, + }) + } + + fn extensions(&self) -> &[&str] { + &["meshlet_mesh"] + } +} + +#[derive(Error, Debug)] +pub enum MeshletMeshSaveOrLoadError { + #[error("file was not a MeshletMesh asset")] + WrongFileType, + #[error("expected asset version {MESHLET_MESH_ASSET_VERSION} but found version {found}")] + WrongVersion { found: u64 }, + #[error("failed to compress or decompress asset data")] + CompressionOrDecompression(#[from] lz4_flex::frame::Error), + #[error(transparent)] + Io(#[from] std::io::Error), +} + +async fn async_read_u64(reader: &mut dyn Reader) -> Result { + let mut bytes = [0u8; 8]; + reader.read_exact(&mut bytes).await?; + Ok(u64::from_le_bytes(bytes)) +} + +fn read_u64(reader: &mut dyn Read) -> Result { + let mut bytes = [0u8; 8]; + reader.read_exact(&mut bytes)?; + Ok(u64::from_le_bytes(bytes)) +} + +fn write_slice( + field: &[T], + writer: &mut dyn Write, +) -> Result<(), MeshletMeshSaveOrLoadError> { + writer.write_all(&(field.len() as u64).to_le_bytes())?; + writer.write_all(bytemuck::cast_slice(field))?; + Ok(()) +} + +fn read_slice(reader: &mut dyn Read) -> Result, std::io::Error> { + let len = read_u64(reader)? as usize; + + let mut data: Arc<[T]> = core::iter::repeat_with(T::zeroed).take(len).collect(); + let slice = Arc::get_mut(&mut data).unwrap(); + reader.read_exact(bytemuck::cast_slice_mut(slice))?; + + Ok(data) +} + +// TODO: Use async for everything and get rid of this adapter +struct AsyncWriteSyncAdapter<'a>(&'a mut Writer); + +impl Write for AsyncWriteSyncAdapter<'_> { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + block_on(self.0.write(buf)) + } + + fn flush(&mut self) -> std::io::Result<()> { + block_on(self.0.flush()) + } +} + +// TODO: Use async for everything and get rid of this adapter +struct AsyncReadSyncAdapter<'a>(&'a mut dyn Reader); + +impl Read for AsyncReadSyncAdapter<'_> { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + block_on(self.0.read(buf)) + } +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/clear_visibility_buffer.wgsl b/crates/libmarathon/src/render/pbr/meshlet/clear_visibility_buffer.wgsl new file mode 100644 index 0000000..5956921 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/clear_visibility_buffer.wgsl @@ -0,0 +1,18 @@ +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT +@group(0) @binding(0) var meshlet_visibility_buffer: texture_storage_2d; +#else +@group(0) @binding(0) var meshlet_visibility_buffer: texture_storage_2d; +#endif +var view_size: vec2; + +@compute +@workgroup_size(16, 16, 1) +fn clear_visibility_buffer(@builtin(global_invocation_id) global_id: vec3) { + if any(global_id.xy >= view_size) { return; } + +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + textureStore(meshlet_visibility_buffer, global_id.xy, vec4(0lu)); +#else + textureStore(meshlet_visibility_buffer, global_id.xy, vec4(0u)); +#endif +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/cull_bvh.wgsl b/crates/libmarathon/src/render/pbr/meshlet/cull_bvh.wgsl new file mode 100644 index 0000000..b0bbb5f --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/cull_bvh.wgsl @@ -0,0 +1,110 @@ +#import bevy_pbr::meshlet_bindings::{ + InstancedOffset, + get_aabb, + get_aabb_error, + get_aabb_child_offset, + constants, + meshlet_bvh_nodes, + meshlet_bvh_cull_count_read, + meshlet_bvh_cull_count_write, + meshlet_bvh_cull_dispatch, + meshlet_bvh_cull_queue, + meshlet_meshlet_cull_count_early, + meshlet_meshlet_cull_count_late, + meshlet_meshlet_cull_dispatch_early, + meshlet_meshlet_cull_dispatch_late, + meshlet_meshlet_cull_queue, + meshlet_second_pass_bvh_count, + meshlet_second_pass_bvh_dispatch, + meshlet_second_pass_bvh_queue, +} +#import bevy_pbr::meshlet_cull_shared::{ + lod_error_is_imperceptible, + aabb_in_frustum, + should_occlusion_cull_aabb, +} + +@compute +@workgroup_size(128, 1, 1) // 8 threads per node, 16 nodes per workgroup +fn cull_bvh(@builtin(global_invocation_id) global_invocation_id: vec3) { + // Calculate the queue ID for this thread + let dispatch_id = global_invocation_id.x; + var node = dispatch_id >> 3u; + let subnode = dispatch_id & 7u; + if node >= meshlet_bvh_cull_count_read { return; } + + node = select(node, constants.rightmost_slot - node, constants.read_from_front == 0u); + let instanced_offset = meshlet_bvh_cull_queue[node]; + let instance_id = instanced_offset.instance_id; + let bvh_node = &meshlet_bvh_nodes[instanced_offset.offset]; + + var aabb_error_offset = (*bvh_node).aabbs[subnode]; + let aabb = get_aabb(&aabb_error_offset); + let parent_error = get_aabb_error(&aabb_error_offset); + let lod_sphere = (*bvh_node).lod_bounds[subnode]; + + let parent_is_imperceptible = lod_error_is_imperceptible(lod_sphere, parent_error, instance_id); + // Error and frustum cull, in both passes + if parent_is_imperceptible || !aabb_in_frustum(aabb, instance_id) { return; } + + let child_offset = get_aabb_child_offset(&aabb_error_offset); + let index = subnode >> 2u; + let bit_offset = subnode & 3u; + let packed_child_count = (*bvh_node).child_counts[index]; + let child_count = extractBits(packed_child_count, bit_offset * 8u, 8u); + var value = InstancedOffset(instance_id, child_offset); + + // If we pass, try occlusion culling + // If this node was occluded, push it's children to the second pass to check against this frame's HZB + if should_occlusion_cull_aabb(aabb, instance_id) { +#ifdef MESHLET_FIRST_CULLING_PASS + if child_count == 255u { + let id = atomicAdd(&meshlet_second_pass_bvh_count, 1u); + meshlet_second_pass_bvh_queue[id] = value; + if ((id & 15u) == 0u) { + atomicAdd(&meshlet_second_pass_bvh_dispatch.x, 1u); + } + } else { + let base = atomicAdd(&meshlet_meshlet_cull_count_late, child_count); + let start = constants.rightmost_slot - base; + for (var i = start; i < start - child_count; i--) { + meshlet_meshlet_cull_queue[i] = value; + value.offset += 1u; + } + let req = (base + child_count + 127u) >> 7u; + atomicMax(&meshlet_meshlet_cull_dispatch_late.x, req); + } +#endif + return; + } + + // If we pass, push the children to the next BVH cull + if child_count == 255u { + let id = atomicAdd(&meshlet_bvh_cull_count_write, 1u); + let index = select(constants.rightmost_slot - id, id, constants.read_from_front == 0u); + meshlet_bvh_cull_queue[index] = value; + if ((id & 15u) == 0u) { + atomicAdd(&meshlet_bvh_cull_dispatch.x, 1u); + } + } else { +#ifdef MESHLET_FIRST_CULLING_PASS + let base = atomicAdd(&meshlet_meshlet_cull_count_early, child_count); + let end = base + child_count; + for (var i = base; i < end; i++) { + meshlet_meshlet_cull_queue[i] = value; + value.offset += 1u; + } + let req = (end + 127u) >> 7u; + atomicMax(&meshlet_meshlet_cull_dispatch_early.x, req); +#else + let base = atomicAdd(&meshlet_meshlet_cull_count_late, child_count); + let start = constants.rightmost_slot - base; + for (var i = start; i < start - child_count; i--) { + meshlet_meshlet_cull_queue[i] = value; + value.offset += 1u; + } + let req = (base + child_count + 127u) >> 7u; + atomicMax(&meshlet_meshlet_cull_dispatch_late.x, req); +#endif + } +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/cull_clusters.wgsl b/crates/libmarathon/src/render/pbr/meshlet/cull_clusters.wgsl new file mode 100644 index 0000000..85cbc06 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/cull_clusters.wgsl @@ -0,0 +1,93 @@ +#import bevy_pbr::meshlet_bindings::{ + InstancedOffset, + get_aabb, + get_aabb_error, + constants, + view, + meshlet_instance_uniforms, + meshlet_cull_data, + meshlet_software_raster_indirect_args, + meshlet_hardware_raster_indirect_args, + meshlet_previous_raster_counts, + meshlet_raster_clusters, + meshlet_meshlet_cull_count_read, + meshlet_meshlet_cull_count_write, + meshlet_meshlet_cull_dispatch, + meshlet_meshlet_cull_queue, +} +#import bevy_pbr::meshlet_cull_shared::{ + ScreenAabb, + project_aabb, + lod_error_is_imperceptible, + aabb_in_frustum, + should_occlusion_cull_aabb, +} +#import bevy_render::maths::affine3_to_square + +@compute +@workgroup_size(128, 1, 1) // 1 cluster per thread +fn cull_clusters(@builtin(global_invocation_id) global_invocation_id: vec3) { + if global_invocation_id.x >= meshlet_meshlet_cull_count_read { return; } + +#ifdef MESHLET_FIRST_CULLING_PASS + let meshlet_id = global_invocation_id.x; +#else + let meshlet_id = constants.rightmost_slot - global_invocation_id.x; +#endif + let instanced_offset = meshlet_meshlet_cull_queue[meshlet_id]; + let instance_id = instanced_offset.instance_id; + let cull_data = &meshlet_cull_data[instanced_offset.offset]; + var aabb_error_offset = (*cull_data).aabb; + let aabb = get_aabb(&aabb_error_offset); + let error = get_aabb_error(&aabb_error_offset); + let lod_sphere = (*cull_data).lod_group_sphere; + + let is_imperceptible = lod_error_is_imperceptible(lod_sphere, error, instance_id); + // Error and frustum cull, in both passes + if !is_imperceptible || !aabb_in_frustum(aabb, instance_id) { return; } + + // If we pass, try occlusion culling + // If this node was occluded, push it's children to the second pass to check against this frame's HZB + if should_occlusion_cull_aabb(aabb, instance_id) { +#ifdef MESHLET_FIRST_CULLING_PASS + let id = atomicAdd(&meshlet_meshlet_cull_count_write, 1u); + let value = InstancedOffset(instance_id, instanced_offset.offset); + meshlet_meshlet_cull_queue[constants.rightmost_slot - id] = value; + if ((id & 127u) == 0) { + atomicAdd(&meshlet_meshlet_cull_dispatch.x, 1u); + } +#endif + return; + } + + // If we pass, rasterize the meshlet + // Check how big the cluster is in screen space + let world_from_local = affine3_to_square(meshlet_instance_uniforms[instance_id].world_from_local); + let clip_from_local = view.clip_from_world * world_from_local; + let projection = view.clip_from_world; + var near: f32; + if projection[3][3] == 1.0 { + near = projection[3][2] / projection[2][2]; + } else { + near = projection[3][2]; + } + var screen_aabb = ScreenAabb(vec3(0.0), vec3(0.0)); + var sw_raster = project_aabb(clip_from_local, near, aabb, &screen_aabb); + if sw_raster { + let aabb_size = (screen_aabb.max.xy - screen_aabb.min.xy) * view.viewport.zw; + sw_raster = all(aabb_size <= vec2(64.0)); + } + + var buffer_slot: u32; + if sw_raster { + // Append this cluster to the list for software rasterization + buffer_slot = atomicAdd(&meshlet_software_raster_indirect_args.x, 1u); + buffer_slot += meshlet_previous_raster_counts[0]; + } else { + // Append this cluster to the list for hardware rasterization + buffer_slot = atomicAdd(&meshlet_hardware_raster_indirect_args.instance_count, 1u); + buffer_slot += meshlet_previous_raster_counts[1]; + buffer_slot = constants.rightmost_slot - buffer_slot; + } + meshlet_raster_clusters[buffer_slot] = InstancedOffset(instance_id, instanced_offset.offset); +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/cull_instances.wgsl b/crates/libmarathon/src/render/pbr/meshlet/cull_instances.wgsl new file mode 100644 index 0000000..5d14d10 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/cull_instances.wgsl @@ -0,0 +1,76 @@ +#import bevy_pbr::meshlet_bindings::{ + InstancedOffset, + constants, + meshlet_view_instance_visibility, + meshlet_instance_aabbs, + meshlet_instance_bvh_root_nodes, + meshlet_bvh_cull_count_write, + meshlet_bvh_cull_dispatch, + meshlet_bvh_cull_queue, + meshlet_second_pass_instance_count, + meshlet_second_pass_instance_dispatch, + meshlet_second_pass_instance_candidates, +} +#import bevy_pbr::meshlet_cull_shared::{ + aabb_in_frustum, + should_occlusion_cull_aabb, +} + +fn instance_count() -> u32 { +#ifdef MESHLET_FIRST_CULLING_PASS + return constants.scene_instance_count; +#else + return meshlet_second_pass_instance_count; +#endif +} + +fn map_instance_id(id: u32) -> u32 { +#ifdef MESHLET_FIRST_CULLING_PASS + return id; +#else + return meshlet_second_pass_instance_candidates[id]; +#endif +} + +fn should_cull_instance(instance_id: u32) -> bool { + let bit_offset = instance_id >> 5u; + let packed_visibility = meshlet_view_instance_visibility[instance_id & 31u]; + return bool(extractBits(packed_visibility, bit_offset, 1u)); +} + +@compute +@workgroup_size(128, 1, 1) // 1 instance per thread +fn cull_instances(@builtin(global_invocation_id) global_invocation_id: vec3) { + // Calculate the instance ID for this thread + let dispatch_id = global_invocation_id.x; + if dispatch_id >= instance_count() { return; } + + let instance_id = map_instance_id(dispatch_id); + let aabb = meshlet_instance_aabbs[instance_id]; + + // Visibility and frustum cull, but only in the first pass +#ifdef MESHLET_FIRST_CULLING_PASS + if should_cull_instance(instance_id) || !aabb_in_frustum(aabb, instance_id) { return; } +#endif + + // If we pass, try occlusion culling + // If this instance was occluded, push it to the second pass to check against this frame's HZB + if should_occlusion_cull_aabb(aabb, instance_id) { +#ifdef MESHLET_FIRST_CULLING_PASS + let id = atomicAdd(&meshlet_second_pass_instance_count, 1u); + meshlet_second_pass_instance_candidates[id] = instance_id; + if ((id & 127u) == 0u) { + atomicAdd(&meshlet_second_pass_instance_dispatch.x, 1u); + } +#endif + return; + } + + // If we pass, push the instance's root node to BVH cull + let root_node = meshlet_instance_bvh_root_nodes[instance_id]; + let id = atomicAdd(&meshlet_bvh_cull_count_write, 1u); + meshlet_bvh_cull_queue[id] = InstancedOffset(instance_id, root_node); + if ((id & 15u) == 0u) { + atomicAdd(&meshlet_bvh_cull_dispatch.x, 1u); + } +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/dummy_visibility_buffer_resolve.wgsl b/crates/libmarathon/src/render/pbr/meshlet/dummy_visibility_buffer_resolve.wgsl new file mode 100644 index 0000000..243a400 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/dummy_visibility_buffer_resolve.wgsl @@ -0,0 +1,4 @@ +#define_import_path bevy_pbr::meshlet_visibility_buffer_resolve + +/// Dummy shader to prevent naga_oil from complaining about missing imports when the MeshletPlugin is not loaded, +/// as naga_oil tries to resolve imports even if they're behind an #ifdef. diff --git a/crates/libmarathon/src/render/pbr/meshlet/fill_counts.wgsl b/crates/libmarathon/src/render/pbr/meshlet/fill_counts.wgsl new file mode 100644 index 0000000..f319e39 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/fill_counts.wgsl @@ -0,0 +1,35 @@ +/// Copies the counts of meshlets in the hardware and software buckets, resetting the counters in the process. + +struct DispatchIndirectArgs { + x: u32, + y: u32, + z: u32, +} + +struct DrawIndirectArgs { + vertex_count: u32, + instance_count: u32, + first_vertex: u32, + first_instance: u32, +} + +@group(0) @binding(0) var meshlet_software_raster_indirect_args: DispatchIndirectArgs; +@group(0) @binding(1) var meshlet_hardware_raster_indirect_args: DrawIndirectArgs; +@group(0) @binding(2) var meshlet_previous_raster_counts: array; +#ifdef MESHLET_2D_DISPATCH +@group(0) @binding(3) var meshlet_software_raster_cluster_count: u32; +#endif + +@compute +@workgroup_size(1, 1, 1) +fn fill_counts() { +#ifdef MESHLET_2D_DISPATCH + meshlet_previous_raster_counts[0] += meshlet_software_raster_cluster_count; +#else + meshlet_previous_raster_counts[0] += meshlet_software_raster_indirect_args.x; +#endif + meshlet_software_raster_indirect_args.x = 0; + + meshlet_previous_raster_counts[1] += meshlet_hardware_raster_indirect_args.instance_count; + meshlet_hardware_raster_indirect_args.instance_count = 0; +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/from_mesh.rs b/crates/libmarathon/src/render/pbr/meshlet/from_mesh.rs new file mode 100644 index 0000000..10a4c99 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/from_mesh.rs @@ -0,0 +1,1109 @@ +use crate::render::pbr::meshlet::asset::{MeshletAabb, MeshletAabbErrorOffset, MeshletCullData}; + +use super::asset::{BvhNode, Meshlet, MeshletBoundingSphere, MeshletMesh}; +use std::borrow::Cow; +use bevy_math::{ + bounding::{Aabb3d, BoundingSphere, BoundingVolume}, + ops::log2, + IVec3, Isometry3d, Vec2, Vec3, Vec3A, Vec3Swizzles, +}; +use bevy_mesh::{Indices, Mesh}; +use bevy_platform::collections::HashMap; +use crate::render::render_resource::PrimitiveTopology; +use bevy_tasks::{AsyncComputeTaskPool, ParallelSlice}; +use bitvec::{order::Lsb0, vec::BitVec, view::BitView}; +use core::{f32, ops::Range}; +use itertools::Itertools; +use meshopt::{ + build_meshlets, ffi::meshopt_Meshlet, generate_vertex_remap_multi, + simplify_with_attributes_and_locks, Meshlets, SimplifyOptions, VertexDataAdapter, VertexStream, +}; +use metis::{option::Opt, Graph}; +use smallvec::SmallVec; +use thiserror::Error; +use tracing::debug_span; + +// Aim to have 8 meshlets per group +const TARGET_MESHLETS_PER_GROUP: usize = 8; +// Reject groups that keep over 60% of their original triangles. We'd much rather render a few +// extra triangles than create too many meshlets, increasing cull overhead. +const SIMPLIFICATION_FAILURE_PERCENTAGE: f32 = 0.60; + +/// Default vertex position quantization factor for use with [`MeshletMesh::from_mesh`]. +/// +/// Snaps vertices to the nearest 1/16th of a centimeter (1/2^4). +pub const MESHLET_DEFAULT_VERTEX_POSITION_QUANTIZATION_FACTOR: u8 = 4; + +const CENTIMETERS_PER_METER: f32 = 100.0; + +impl MeshletMesh { + /// Process a [`Mesh`] to generate a [`MeshletMesh`]. + /// + /// This process is very slow, and should be done ahead of time, and not at runtime. + /// + /// # Requirements + /// + /// This function requires the `meshlet_processor` cargo feature. + /// + /// The input mesh must: + /// 1. Use [`PrimitiveTopology::TriangleList`] + /// 2. Use indices + /// 3. Have the exact following set of vertex attributes: `{POSITION, NORMAL, UV_0}` (tangents can be used in material shaders, but are calculated at runtime and are not stored in the mesh) + /// + /// # Vertex precision + /// + /// `vertex_position_quantization_factor` is the amount of precision to use when quantizing vertex positions. + /// + /// Vertices are snapped to the nearest (1/2^x)th of a centimeter, where x = `vertex_position_quantization_factor`. + /// E.g. if x = 4, then vertices are snapped to the nearest 1/2^4 = 1/16th of a centimeter. + /// + /// Use [`MESHLET_DEFAULT_VERTEX_POSITION_QUANTIZATION_FACTOR`] as a default, adjusting lower to save memory and disk space, and higher to prevent artifacts if needed. + /// + /// To ensure that two different meshes do not have cracks between them when placed directly next to each other: + /// * Use the same quantization factor when converting each mesh to a meshlet mesh + /// * Ensure that their [`bevy_transform::components::Transform::translation`]s are a multiple of 1/2^x centimeters (note that translations are in meters) + /// * Ensure that their [`bevy_transform::components::Transform::scale`]s are the same + /// * Ensure that their [`bevy_transform::components::Transform::rotation`]s are a multiple of 90 degrees + pub fn from_mesh( + mesh: &Mesh, + vertex_position_quantization_factor: u8, + ) -> Result { + let s = debug_span!("build meshlet mesh"); + let _e = s.enter(); + + // Validate mesh format + let indices = validate_input_mesh(mesh)?; + + // Get meshlet vertices + let vertex_buffer = mesh.create_packed_vertex_buffer_data(); + let vertex_stride = mesh.get_vertex_size() as usize; + let vertices = VertexDataAdapter::new(&vertex_buffer, vertex_stride, 0).unwrap(); + let vertex_normals = bytemuck::cast_slice(&vertex_buffer[12..16]); + + // Generate a position-only vertex buffer for determining triangle/meshlet connectivity + let (position_only_vertex_count, position_only_vertex_remap) = generate_vertex_remap_multi( + vertices.vertex_count, + &[VertexStream::new_with_stride::( + vertex_buffer.as_ptr(), + vertex_stride, + )], + Some(&indices), + ); + + // Split the mesh into an initial list of meshlets (LOD 0) + let (mut meshlets, mut cull_data) = compute_meshlets( + &indices, + &vertices, + &position_only_vertex_remap, + position_only_vertex_count, + None, + ); + + let mut vertex_locks = vec![false; vertices.vertex_count]; + + // Build further LODs + let mut bvh = BvhBuilder::default(); + let mut all_groups = Vec::new(); + let mut simplification_queue: Vec<_> = (0..meshlets.len() as u32).collect(); + let mut stuck = Vec::new(); + while !simplification_queue.is_empty() { + let s = debug_span!("simplify lod", meshlets = simplification_queue.len()); + let _e = s.enter(); + + // For each meshlet build a list of connected meshlets (meshlets that share a vertex) + let connected_meshlets_per_meshlet = find_connected_meshlets( + &simplification_queue, + &meshlets, + &position_only_vertex_remap, + position_only_vertex_count, + ); + + // Group meshlets into roughly groups of size TARGET_MESHLETS_PER_GROUP, + // grouping meshlets with a high number of shared vertices + let groups = group_meshlets( + &simplification_queue, + &cull_data, + &connected_meshlets_per_meshlet, + ); + simplification_queue.clear(); + + // Lock borders between groups to prevent cracks when simplifying + lock_group_borders( + &mut vertex_locks, + &groups, + &meshlets, + &position_only_vertex_remap, + position_only_vertex_count, + ); + + let simplified = groups.par_chunk_map(AsyncComputeTaskPool::get(), 1, |_, groups| { + let mut group = groups[0].clone(); + + // If the group only has a single meshlet we can't simplify it + if group.meshlets.len() == 1 { + return Err(group); + } + + let s = debug_span!("simplify group", meshlets = group.meshlets.len()); + let _e = s.enter(); + + // Simplify the group to ~50% triangle count + let Some((simplified_group_indices, mut group_error)) = simplify_meshlet_group( + &group, + &meshlets, + &vertices, + vertex_normals, + vertex_stride, + &vertex_locks, + ) else { + // Couldn't simplify the group enough + return Err(group); + }; + + // Force the group error to be atleast as large as all of its constituent meshlet's + // individual errors. + for &id in group.meshlets.iter() { + group_error = group_error.max(cull_data[id as usize].error); + } + group.parent_error = group_error; + + // Build new meshlets using the simplified group + let new_meshlets = compute_meshlets( + &simplified_group_indices, + &vertices, + &position_only_vertex_remap, + position_only_vertex_count, + Some((group.lod_bounds, group.parent_error)), + ); + + Ok((group, new_meshlets)) + }); + + let first_group = all_groups.len() as u32; + let mut passed_tris = 0; + let mut stuck_tris = 0; + for group in simplified { + match group { + Ok((group, (new_meshlets, new_cull_data))) => { + let start = meshlets.len(); + merge_meshlets(&mut meshlets, new_meshlets); + cull_data.extend(new_cull_data); + let end = meshlets.len(); + let new_meshlet_ids = start as u32..end as u32; + + passed_tris += triangles_in_meshlets(&meshlets, new_meshlet_ids.clone()); + simplification_queue.extend(new_meshlet_ids); + all_groups.push(group); + } + Err(group) => { + stuck_tris += + triangles_in_meshlets(&meshlets, group.meshlets.iter().copied()); + stuck.push(group); + } + } + } + + // If we have enough triangles that passed, we can retry simplifying the stuck + // meshlets. + if passed_tris > stuck_tris / 3 { + simplification_queue.extend(stuck.drain(..).flat_map(|group| group.meshlets)); + } + + bvh.add_lod(first_group, &all_groups); + } + + // If there's any stuck meshlets left, add another LOD level with only them + if !stuck.is_empty() { + let first_group = all_groups.len() as u32; + all_groups.extend(stuck); + bvh.add_lod(first_group, &all_groups); + } + + let (bvh, aabb, depth) = bvh.build(&mut meshlets, all_groups, &mut cull_data); + + // Copy vertex attributes per meshlet and compress + let mut vertex_positions = BitVec::::new(); + let mut vertex_normals = Vec::new(); + let mut vertex_uvs = Vec::new(); + let mut bevy_meshlets = Vec::with_capacity(meshlets.len()); + for (i, meshlet) in meshlets.meshlets.iter().enumerate() { + build_and_compress_per_meshlet_vertex_data( + meshlet, + meshlets.get(i).vertices, + &vertex_buffer, + vertex_stride, + &mut vertex_positions, + &mut vertex_normals, + &mut vertex_uvs, + &mut bevy_meshlets, + vertex_position_quantization_factor, + ); + } + vertex_positions.set_uninitialized(false); + + Ok(Self { + vertex_positions: vertex_positions.into_vec().into(), + vertex_normals: vertex_normals.into(), + vertex_uvs: vertex_uvs.into(), + indices: meshlets.triangles.into(), + bvh: bvh.into(), + meshlets: bevy_meshlets.into(), + meshlet_cull_data: cull_data + .into_iter() + .map(|cull_data| MeshletCullData { + aabb: aabb_to_meshlet(cull_data.aabb, cull_data.error, 0), + lod_group_sphere: sphere_to_meshlet(cull_data.lod_group_sphere), + }) + .collect(), + aabb, + bvh_depth: depth, + }) + } +} + +fn validate_input_mesh(mesh: &Mesh) -> Result, MeshToMeshletMeshConversionError> { + if mesh.primitive_topology() != PrimitiveTopology::TriangleList { + return Err(MeshToMeshletMeshConversionError::WrongMeshPrimitiveTopology); + } + + if mesh.attributes().map(|(attribute, _)| attribute.id).ne([ + Mesh::ATTRIBUTE_POSITION.id, + Mesh::ATTRIBUTE_NORMAL.id, + Mesh::ATTRIBUTE_UV_0.id, + ]) { + return Err(MeshToMeshletMeshConversionError::WrongMeshVertexAttributes( + mesh.attributes() + .map(|(attribute, _)| format!("{attribute:?}")) + .collect(), + )); + } + + match mesh.indices() { + Some(Indices::U32(indices)) => Ok(Cow::Borrowed(indices.as_slice())), + Some(Indices::U16(indices)) => Ok(indices.iter().map(|i| *i as u32).collect()), + _ => Err(MeshToMeshletMeshConversionError::MeshMissingIndices), + } +} + +fn triangles_in_meshlets(meshlets: &Meshlets, ids: impl IntoIterator) -> u32 { + ids.into_iter() + .map(|id| meshlets.get(id as _).triangles.len() as u32 / 3) + .sum() +} + +fn compute_meshlets( + indices: &[u32], + vertices: &VertexDataAdapter, + position_only_vertex_remap: &[u32], + position_only_vertex_count: usize, + prev_lod_data: Option<(BoundingSphere, f32)>, +) -> (Meshlets, Vec) { + // For each vertex, build a list of all triangles that use it + let mut vertices_to_triangles = vec![Vec::new(); position_only_vertex_count]; + for (i, index) in indices.iter().enumerate() { + let vertex_id = position_only_vertex_remap[*index as usize]; + let vertex_to_triangles = &mut vertices_to_triangles[vertex_id as usize]; + vertex_to_triangles.push(i / 3); + } + + // For each triangle pair, count how many vertices they share + let mut triangle_pair_to_shared_vertex_count = >::default(); + for vertex_triangle_ids in vertices_to_triangles { + for (triangle_id1, triangle_id2) in vertex_triangle_ids.into_iter().tuple_combinations() { + let count = triangle_pair_to_shared_vertex_count + .entry(( + triangle_id1.min(triangle_id2), + triangle_id1.max(triangle_id2), + )) + .or_insert(0); + *count += 1; + } + } + + // For each triangle, gather all other triangles that share at least one vertex along with their shared vertex count + let triangle_count = indices.len() / 3; + let mut connected_triangles_per_triangle = vec![Vec::new(); triangle_count]; + for ((triangle_id1, triangle_id2), shared_vertex_count) in triangle_pair_to_shared_vertex_count + { + // We record both id1->id2 and id2->id1 as adjacency is symmetrical + connected_triangles_per_triangle[triangle_id1].push((triangle_id2, shared_vertex_count)); + connected_triangles_per_triangle[triangle_id2].push((triangle_id1, shared_vertex_count)); + } + + // The order of triangles depends on hash traversal order; to produce deterministic results, sort them + // TODO: Wouldn't it be faster to use a `BTreeMap` above instead of `HashMap` + sorting? + for list in connected_triangles_per_triangle.iter_mut() { + list.sort_unstable(); + } + + let mut xadj = Vec::with_capacity(triangle_count + 1); + let mut adjncy = Vec::new(); + let mut adjwgt = Vec::new(); + for connected_triangles in connected_triangles_per_triangle { + xadj.push(adjncy.len() as i32); + for (connected_triangle_id, shared_vertex_count) in connected_triangles { + adjncy.push(connected_triangle_id as i32); + adjwgt.push(shared_vertex_count); + // TODO: Additional weight based on triangle center spatial proximity? + } + } + xadj.push(adjncy.len() as i32); + + let mut options = [-1; metis::NOPTIONS]; + options[metis::option::Seed::INDEX] = 17; + options[metis::option::UFactor::INDEX] = 1; // Important that there's very little imbalance between partitions + + let mut meshlet_per_triangle = vec![0; triangle_count]; + let partition_count = triangle_count.div_ceil(126); // Need to undershoot to prevent METIS from going over 128 triangles per meshlet + Graph::new(1, partition_count as i32, &xadj, &adjncy) + .unwrap() + .set_options(&options) + .set_adjwgt(&adjwgt) + .part_recursive(&mut meshlet_per_triangle) + .unwrap(); + + let mut indices_per_meshlet = vec![Vec::new(); partition_count]; + for (triangle_id, meshlet) in meshlet_per_triangle.into_iter().enumerate() { + let meshlet_indices = &mut indices_per_meshlet[meshlet as usize]; + let base_index = triangle_id * 3; + meshlet_indices.extend_from_slice(&indices[base_index..(base_index + 3)]); + } + + // Use meshopt to build meshlets from the sets of triangles + let mut meshlets = Meshlets { + meshlets: Vec::new(), + vertices: Vec::new(), + triangles: Vec::new(), + }; + let mut cull_data = Vec::new(); + let get_vertex = |&v: &u32| { + *bytemuck::from_bytes::( + &vertices.reader.get_ref() + [vertices.position_offset + v as usize * vertices.vertex_stride..][..12], + ) + }; + for meshlet_indices in &indices_per_meshlet { + let meshlet = build_meshlets(meshlet_indices, vertices, 255, 128, 0.0); + for meshlet in meshlet.iter() { + let (lod_group_sphere, error) = prev_lod_data.unwrap_or_else(|| { + let bounds = meshopt::compute_meshlet_bounds(meshlet, vertices); + (BoundingSphere::new(bounds.center, bounds.radius), 0.0) + }); + + cull_data.push(TempMeshletCullData { + aabb: Aabb3d::from_point_cloud( + Isometry3d::IDENTITY, + meshlet.vertices.iter().map(get_vertex), + ), + lod_group_sphere, + error, + }); + } + merge_meshlets(&mut meshlets, meshlet); + } + (meshlets, cull_data) +} + +fn find_connected_meshlets( + simplification_queue: &[u32], + meshlets: &Meshlets, + position_only_vertex_remap: &[u32], + position_only_vertex_count: usize, +) -> Vec> { + // For each vertex, build a list of all meshlets that use it + let mut vertices_to_meshlets = vec![Vec::new(); position_only_vertex_count]; + for (id_index, &meshlet_id) in simplification_queue.iter().enumerate() { + let meshlet = meshlets.get(meshlet_id as _); + for index in meshlet.triangles { + let vertex_id = position_only_vertex_remap[meshlet.vertices[*index as usize] as usize]; + let vertex_to_meshlets = &mut vertices_to_meshlets[vertex_id as usize]; + // Meshlets are added in order, so we can just check the last element to deduplicate, + // in the case of two triangles sharing the same vertex within a single meshlet + if vertex_to_meshlets.last() != Some(&id_index) { + vertex_to_meshlets.push(id_index); + } + } + } + + // For each meshlet pair, count how many vertices they share + let mut meshlet_pair_to_shared_vertex_count = >::default(); + for vertex_meshlet_ids in vertices_to_meshlets { + for (meshlet_id1, meshlet_id2) in vertex_meshlet_ids.into_iter().tuple_combinations() { + let count = meshlet_pair_to_shared_vertex_count + .entry((meshlet_id1.min(meshlet_id2), meshlet_id1.max(meshlet_id2))) + .or_insert(0); + *count += 1; + } + } + + // For each meshlet, gather all other meshlets that share at least one vertex along with their shared vertex count + let mut connected_meshlets_per_meshlet = vec![Vec::new(); simplification_queue.len()]; + for ((meshlet_id1, meshlet_id2), shared_vertex_count) in meshlet_pair_to_shared_vertex_count { + // We record both id1->id2 and id2->id1 as adjacency is symmetrical + connected_meshlets_per_meshlet[meshlet_id1].push((meshlet_id2, shared_vertex_count)); + connected_meshlets_per_meshlet[meshlet_id2].push((meshlet_id1, shared_vertex_count)); + } + + // The order of meshlets depends on hash traversal order; to produce deterministic results, sort them + // TODO: Wouldn't it be faster to use a `BTreeMap` above instead of `HashMap` + sorting? + for list in connected_meshlets_per_meshlet.iter_mut() { + list.sort_unstable(); + } + + connected_meshlets_per_meshlet +} + +// METIS manual: https://github.com/KarypisLab/METIS/blob/e0f1b88b8efcb24ffa0ec55eabb78fbe61e58ae7/manual/manual.pdf +fn group_meshlets( + simplification_queue: &[u32], + meshlet_cull_data: &[TempMeshletCullData], + connected_meshlets_per_meshlet: &[Vec<(usize, usize)>], +) -> Vec { + let mut xadj = Vec::with_capacity(simplification_queue.len() + 1); + let mut adjncy = Vec::new(); + let mut adjwgt = Vec::new(); + for connected_meshlets in connected_meshlets_per_meshlet { + xadj.push(adjncy.len() as i32); + for (connected_meshlet_id, shared_vertex_count) in connected_meshlets { + adjncy.push(*connected_meshlet_id as i32); + adjwgt.push(*shared_vertex_count as i32); + // TODO: Additional weight based on meshlet spatial proximity + } + } + xadj.push(adjncy.len() as i32); + + let mut options = [-1; metis::NOPTIONS]; + options[metis::option::Seed::INDEX] = 17; + options[metis::option::UFactor::INDEX] = 200; + + let mut group_per_meshlet = vec![0; simplification_queue.len()]; + let partition_count = simplification_queue + .len() + .div_ceil(TARGET_MESHLETS_PER_GROUP); // TODO: Nanite uses groups of 8-32, probably based on some kind of heuristic + Graph::new(1, partition_count as i32, &xadj, &adjncy) + .unwrap() + .set_options(&options) + .set_adjwgt(&adjwgt) + .part_recursive(&mut group_per_meshlet) + .unwrap(); + + let mut groups = vec![TempMeshletGroup::default(); partition_count]; + for (i, meshlet_group) in group_per_meshlet.into_iter().enumerate() { + let group = &mut groups[meshlet_group as usize]; + let meshlet_id = simplification_queue[i]; + + group.meshlets.push(meshlet_id); + let data = &meshlet_cull_data[meshlet_id as usize]; + group.aabb = group.aabb.merge(&data.aabb); + group.lod_bounds = merge_spheres(group.lod_bounds, data.lod_group_sphere); + } + groups +} + +fn lock_group_borders( + vertex_locks: &mut [bool], + groups: &[TempMeshletGroup], + meshlets: &Meshlets, + position_only_vertex_remap: &[u32], + position_only_vertex_count: usize, +) { + let mut position_only_locks = vec![-1; position_only_vertex_count]; + + // Iterate over position-only based vertices of all meshlets in all groups + for (group_id, group) in groups.iter().enumerate() { + for &meshlet_id in group.meshlets.iter() { + let meshlet = meshlets.get(meshlet_id as usize); + for index in meshlet.triangles { + let vertex_id = + position_only_vertex_remap[meshlet.vertices[*index as usize] as usize] as usize; + + // If the vertex is not yet claimed by any group, or was already claimed by this group + if position_only_locks[vertex_id] == -1 + || position_only_locks[vertex_id] == group_id as i32 + { + position_only_locks[vertex_id] = group_id as i32; // Then claim the vertex for this group + } else { + position_only_locks[vertex_id] = -2; // Else vertex was already claimed by another group or was already locked, lock it + } + } + } + } + + // Lock vertices used by more than 1 group + for i in 0..vertex_locks.len() { + let vertex_id = position_only_vertex_remap[i] as usize; + vertex_locks[i] = position_only_locks[vertex_id] == -2; + } +} + +fn simplify_meshlet_group( + group: &TempMeshletGroup, + meshlets: &Meshlets, + vertices: &VertexDataAdapter<'_>, + vertex_normals: &[f32], + vertex_stride: usize, + vertex_locks: &[bool], +) -> Option<(Vec, f32)> { + // Build a new index buffer into the mesh vertex data by combining all meshlet data in the group + let group_indices = group + .meshlets + .iter() + .flat_map(|&meshlet_id| { + let meshlet = meshlets.get(meshlet_id as _); + meshlet + .triangles + .iter() + .map(|&meshlet_index| meshlet.vertices[meshlet_index as usize]) + }) + .collect::>(); + + // Simplify the group to ~50% triangle count + let mut error = 0.0; + let simplified_group_indices = simplify_with_attributes_and_locks( + &group_indices, + vertices, + vertex_normals, + &[0.5; 3], + vertex_stride, + vertex_locks, + group_indices.len() / 2, + f32::MAX, + SimplifyOptions::Sparse | SimplifyOptions::ErrorAbsolute, + Some(&mut error), + ); + + // Check if we were able to simplify + if simplified_group_indices.len() as f32 / group_indices.len() as f32 + > SIMPLIFICATION_FAILURE_PERCENTAGE + { + return None; + } + + Some((simplified_group_indices, error)) +} + +fn merge_meshlets(meshlets: &mut Meshlets, merge: Meshlets) { + let vertex_offset = meshlets.vertices.len() as u32; + let triangle_offset = meshlets.triangles.len() as u32; + meshlets.vertices.extend_from_slice(&merge.vertices); + meshlets.triangles.extend_from_slice(&merge.triangles); + meshlets + .meshlets + .extend(merge.meshlets.into_iter().map(|mut meshlet| { + meshlet.vertex_offset += vertex_offset; + meshlet.triangle_offset += triangle_offset; + meshlet + })); +} + +fn build_and_compress_per_meshlet_vertex_data( + meshlet: &meshopt_Meshlet, + meshlet_vertex_ids: &[u32], + vertex_buffer: &[u8], + vertex_stride: usize, + vertex_positions: &mut BitVec, + vertex_normals: &mut Vec, + vertex_uvs: &mut Vec, + meshlets: &mut Vec, + vertex_position_quantization_factor: u8, +) { + let start_vertex_position_bit = vertex_positions.len() as u32; + let start_vertex_attribute_id = vertex_normals.len() as u32; + + let quantization_factor = + (1 << vertex_position_quantization_factor) as f32 * CENTIMETERS_PER_METER; + + let mut min_quantized_position_channels = IVec3::MAX; + let mut max_quantized_position_channels = IVec3::MIN; + + // Lossy vertex compression + let mut quantized_positions = [IVec3::ZERO; 255]; + for (i, vertex_id) in meshlet_vertex_ids.iter().enumerate() { + // Load source vertex attributes + let vertex_id_byte = *vertex_id as usize * vertex_stride; + let vertex_data = &vertex_buffer[vertex_id_byte..(vertex_id_byte + vertex_stride)]; + let position = Vec3::from_slice(bytemuck::cast_slice(&vertex_data[0..12])); + let normal = Vec3::from_slice(bytemuck::cast_slice(&vertex_data[12..24])); + let uv = Vec2::from_slice(bytemuck::cast_slice(&vertex_data[24..32])); + + // Copy uncompressed UV + vertex_uvs.push(uv); + + // Compress normal + vertex_normals.push(pack2x16snorm(octahedral_encode(normal))); + + // Quantize position to a fixed-point IVec3 + let quantized_position = (position * quantization_factor + 0.5).as_ivec3(); + quantized_positions[i] = quantized_position; + + // Compute per X/Y/Z-channel quantized position min/max for this meshlet + min_quantized_position_channels = min_quantized_position_channels.min(quantized_position); + max_quantized_position_channels = max_quantized_position_channels.max(quantized_position); + } + + // Calculate bits needed to encode each quantized vertex position channel based on the range of each channel + let range = max_quantized_position_channels - min_quantized_position_channels + 1; + let bits_per_vertex_position_channel_x = log2(range.x as f32).ceil() as u8; + let bits_per_vertex_position_channel_y = log2(range.y as f32).ceil() as u8; + let bits_per_vertex_position_channel_z = log2(range.z as f32).ceil() as u8; + + // Lossless encoding of vertex positions in the minimum number of bits per channel + for quantized_position in quantized_positions.iter().take(meshlet_vertex_ids.len()) { + // Remap [range_min, range_max] IVec3 to [0, range_max - range_min] UVec3 + let position = (quantized_position - min_quantized_position_channels).as_uvec3(); + + // Store as a packed bitstream + vertex_positions.extend_from_bitslice( + &position.x.view_bits::()[..bits_per_vertex_position_channel_x as usize], + ); + vertex_positions.extend_from_bitslice( + &position.y.view_bits::()[..bits_per_vertex_position_channel_y as usize], + ); + vertex_positions.extend_from_bitslice( + &position.z.view_bits::()[..bits_per_vertex_position_channel_z as usize], + ); + } + + meshlets.push(Meshlet { + start_vertex_position_bit, + start_vertex_attribute_id, + start_index_id: meshlet.triangle_offset, + vertex_count: meshlet.vertex_count as u8, + triangle_count: meshlet.triangle_count as u8, + padding: 0, + bits_per_vertex_position_channel_x, + bits_per_vertex_position_channel_y, + bits_per_vertex_position_channel_z, + vertex_position_quantization_factor, + min_vertex_position_channel_x: min_quantized_position_channels.x as f32, + min_vertex_position_channel_y: min_quantized_position_channels.y as f32, + min_vertex_position_channel_z: min_quantized_position_channels.z as f32, + }); +} + +fn merge_spheres(a: BoundingSphere, b: BoundingSphere) -> BoundingSphere { + let sr = a.radius().min(b.radius()); + let br = a.radius().max(b.radius()); + let len = a.center.distance(b.center); + if len + sr <= br || sr == 0.0 || len == 0.0 { + if a.radius() > b.radius() { + a + } else { + b + } + } else { + let radius = (sr + br + len) / 2.0; + let center = + (a.center + b.center + (a.radius() - b.radius()) * (a.center - b.center) / len) / 2.0; + BoundingSphere::new(center, radius) + } +} + +#[derive(Copy, Clone)] +struct TempMeshletCullData { + aabb: Aabb3d, + lod_group_sphere: BoundingSphere, + error: f32, +} + +#[derive(Clone)] +struct TempMeshletGroup { + aabb: Aabb3d, + lod_bounds: BoundingSphere, + parent_error: f32, + meshlets: SmallVec<[u32; TARGET_MESHLETS_PER_GROUP]>, +} + +impl Default for TempMeshletGroup { + fn default() -> Self { + Self { + aabb: aabb_default(), // Default AABB to merge into + lod_bounds: BoundingSphere::new(Vec3A::ZERO, 0.0), + parent_error: f32::MAX, + meshlets: SmallVec::new(), + } + } +} + +// All the BVH build code was stolen from https://github.com/SparkyPotato/radiance/blob/4aa17a3a5be7a0466dc69713e249bbcee9f46057/crates/rad-renderer/src/assets/mesh/virtual_mesh.rs because it works and I'm lazy and don't want to reimplement it +struct TempBvhNode { + group: u32, + aabb: Aabb3d, + children: SmallVec<[u32; 8]>, +} + +#[derive(Default)] +struct BvhBuilder { + nodes: Vec, + lods: Vec>, +} + +impl BvhBuilder { + fn add_lod(&mut self, offset: u32, all_groups: &[TempMeshletGroup]) { + let first = self.nodes.len() as u32; + self.nodes.extend( + all_groups + .iter() + .enumerate() + .skip(offset as _) + .map(|(i, group)| TempBvhNode { + group: i as u32, + aabb: group.aabb, + children: SmallVec::new(), + }), + ); + let end = self.nodes.len() as u32; + if first != end { + self.lods.push(first..end); + } + } + + fn surface_area(&self, nodes: &[u32]) -> f32 { + nodes + .iter() + .map(|&x| self.nodes[x as usize].aabb) + .reduce(|a, b| a.merge(&b)) + .expect("cannot find surface area of zero nodes") + .visible_area() + } + + fn sort_nodes_by_sah(&self, nodes: &mut [u32], splits: [usize; 8]) { + // We use a BVH8, so just recursively binary split 3 times for near-optimal SAH + for i in 0..3 { + let parts = 1 << i; // 2^i + let nodes_per_split = 8 >> i; // 8 / 2^i + let half_count = nodes_per_split / 2; + let mut offset = 0; + for p in 0..parts { + let first = p * nodes_per_split; + let mut s0 = 0; + let mut s1 = 0; + for i in 0..half_count { + s0 += splits[first + i]; + s1 += splits[first + half_count + i]; + } + let c = s0 + s1; + let nodes = &mut nodes[offset..(offset + c)]; + offset += c; + + let mut cost = f32::MAX; + let mut axis = 0; + let key = |x, ax| self.nodes[x as usize].aabb.center()[ax]; + for ax in 0..3 { + nodes.sort_unstable_by(|&x, &y| key(x, ax).partial_cmp(&key(y, ax)).unwrap()); + let (left, right) = nodes.split_at(s0); + let c = self.surface_area(left) + self.surface_area(right); + if c < cost { + axis = ax; + cost = c; + } + } + if axis != 2 { + nodes.sort_unstable_by(|&x, &y| { + key(x, axis).partial_cmp(&key(y, axis)).unwrap() + }); + } + } + } + } + + fn build_temp_inner(&mut self, nodes: &mut [u32], optimize: bool) -> u32 { + let count = nodes.len(); + if count == 1 { + nodes[0] + } else if count <= 8 { + let i = self.nodes.len(); + self.nodes.push(TempBvhNode { + group: u32::MAX, + aabb: aabb_default(), + children: nodes.iter().copied().collect(), + }); + i as _ + } else { + // We need to split the nodes into 8 groups, with the smallest possible tree depth. + // Additionally, no child should be more than one level deeper than the others. + // At `l` levels, we can fit upto 8^l nodes. + // The `max_child_size` is the largest power of 8 <= `count` (any larger and we'd have + // unfilled nodes). + // The `min_child_size` is thus 1 level (8 times) smaller. + // After distributing `min_child_size` to all children, we have distributed + // `min_child_size * 8` nodes (== `max_child_size`). + // The remaining nodes are then distributed left to right. + let max_child_size = 1 << ((count.ilog2() / 3) * 3); + let min_child_size = max_child_size >> 3; + let max_extra_per_node = max_child_size - min_child_size; + let mut extra = count - max_child_size; // 8 * min_child_size + let splits = core::array::from_fn(|_| { + let size = extra.min(max_extra_per_node); + extra -= size; + min_child_size + size + }); + + if optimize { + self.sort_nodes_by_sah(nodes, splits); + } + + let mut offset = 0; + let children = splits + .into_iter() + .map(|size| { + let i = self.build_temp_inner(&mut nodes[offset..(offset + size)], optimize); + offset += size; + i + }) + .collect(); + + let i = self.nodes.len(); + self.nodes.push(TempBvhNode { + group: u32::MAX, + aabb: aabb_default(), + children, + }); + i as _ + } + } + + fn build_temp(&mut self) -> u32 { + let mut lods = Vec::with_capacity(self.lods.len()); + for lod in core::mem::take(&mut self.lods) { + let mut lod: Vec<_> = lod.collect(); + let root = self.build_temp_inner(&mut lod, true); + let node = &self.nodes[root as usize]; + if node.group != u32::MAX || node.children.len() == 8 { + lods.push(root); + } else { + lods.extend(node.children.iter().copied()); + } + } + self.build_temp_inner(&mut lods, false) + } + + fn build_inner( + &self, + groups: &[TempMeshletGroup], + out: &mut Vec, + max_depth: &mut u32, + node: u32, + depth: u32, + ) -> u32 { + *max_depth = depth.max(*max_depth); + let node = &self.nodes[node as usize]; + let onode = out.len(); + out.push(BvhNode::default()); + + for (i, &child_id) in node.children.iter().enumerate() { + let child = &self.nodes[child_id as usize]; + if child.group != u32::MAX { + let group = &groups[child.group as usize]; + let out = &mut out[onode]; + out.aabbs[i] = aabb_to_meshlet(group.aabb, group.parent_error, group.meshlets[0]); + out.lod_bounds[i] = sphere_to_meshlet(group.lod_bounds); + out.child_counts[i] = group.meshlets[1] as _; + } else { + let child_id = self.build_inner(groups, out, max_depth, child_id, depth + 1); + let child = &out[child_id as usize]; + let mut aabb = aabb_default(); + let mut parent_error = 0.0f32; + let mut lod_bounds = BoundingSphere::new(Vec3A::ZERO, 0.0); + for i in 0..8 { + if child.child_counts[i] == 0 { + break; + } + + aabb = aabb.merge(&Aabb3d::new( + child.aabbs[i].center, + child.aabbs[i].half_extent, + )); + lod_bounds = merge_spheres( + lod_bounds, + BoundingSphere::new(child.lod_bounds[i].center, child.lod_bounds[i].radius), + ); + parent_error = parent_error.max(child.aabbs[i].error); + } + + let out = &mut out[onode]; + out.aabbs[i] = aabb_to_meshlet(aabb, parent_error, child_id); + out.lod_bounds[i] = sphere_to_meshlet(lod_bounds); + out.child_counts[i] = u8::MAX; + } + } + + onode as _ + } + + fn build( + mut self, + meshlets: &mut Meshlets, + mut groups: Vec, + cull_data: &mut Vec, + ) -> (Vec, MeshletAabb, u32) { + // The BVH requires group meshlets to be contiguous, so remap them first. + let mut remap = Vec::with_capacity(meshlets.meshlets.len()); + let mut remapped_cull_data = Vec::with_capacity(cull_data.len()); + for group in groups.iter_mut() { + let first = remap.len() as u32; + let count = group.meshlets.len() as u32; + remap.extend( + group + .meshlets + .iter() + .map(|&m| meshlets.meshlets[m as usize]), + ); + remapped_cull_data.extend(group.meshlets.iter().map(|&m| cull_data[m as usize])); + group.meshlets.resize(2, 0); + group.meshlets[0] = first; + group.meshlets[1] = count; + } + meshlets.meshlets = remap; + *cull_data = remapped_cull_data; + + let mut out = vec![]; + let mut aabb = aabb_default(); + let mut max_depth = 0; + + if self.nodes.len() == 1 { + let mut o = BvhNode::default(); + let group = &groups[0]; + o.aabbs[0] = aabb_to_meshlet(group.aabb, group.parent_error, group.meshlets[0]); + o.lod_bounds[0] = sphere_to_meshlet(group.lod_bounds); + o.child_counts[0] = group.meshlets[1] as _; + out.push(o); + aabb = group.aabb; + max_depth = 1; + } else { + let root = self.build_temp(); + let root = self.build_inner(&groups, &mut out, &mut max_depth, root, 1); + assert_eq!(root, 0, "root must be 0"); + + let root = &out[0]; + for i in 0..8 { + if root.child_counts[i] == 0 { + break; + } + + aabb = aabb.merge(&Aabb3d::new( + root.aabbs[i].center, + root.aabbs[i].half_extent, + )); + } + } + + let mut reachable = vec![false; meshlets.meshlets.len()]; + verify_bvh(&out, cull_data, &mut reachable, 0); + assert!( + reachable.iter().all(|&x| x), + "all meshlets must be reachable" + ); + + ( + out, + MeshletAabb { + center: aabb.center().into(), + half_extent: aabb.half_size().into(), + }, + max_depth, + ) + } +} + +fn verify_bvh( + out: &[BvhNode], + cull_data: &[TempMeshletCullData], + reachable: &mut [bool], + node: u32, +) { + let node = &out[node as usize]; + for i in 0..8 { + let sphere = node.lod_bounds[i]; + let error = node.aabbs[i].error; + if node.child_counts[i] == u8::MAX { + let child = &out[node.aabbs[i].child_offset as usize]; + for i in 0..8 { + if child.child_counts[i] == 0 { + break; + } + assert!( + child.aabbs[i].error <= error, + "BVH errors are not monotonic" + ); + let sphere_error = (sphere.center - child.lod_bounds[i].center).length() + - (sphere.radius - child.lod_bounds[i].radius); + assert!( + sphere_error <= 0.0001, + "BVH lod spheres are not monotonic ({sphere_error})" + ); + } + verify_bvh(out, cull_data, reachable, node.aabbs[i].child_offset); + } else { + for m in 0..node.child_counts[i] as u32 { + let mid = (m + node.aabbs[i].child_offset) as usize; + let meshlet = &cull_data[mid]; + assert!(meshlet.error <= error, "meshlet errors are not monotonic"); + let sphere_error = (Vec3A::from(sphere.center) - meshlet.lod_group_sphere.center) + .length() + - (sphere.radius - meshlet.lod_group_sphere.radius()); + assert!( + sphere_error <= 0.0001, + "meshlet lod spheres are not monotonic: ({sphere_error})" + ); + reachable[mid] = true; + } + } + } +} + +fn aabb_default() -> Aabb3d { + Aabb3d { + min: Vec3A::INFINITY, + max: Vec3A::NEG_INFINITY, + } +} + +fn aabb_to_meshlet(aabb: Aabb3d, error: f32, child_offset: u32) -> MeshletAabbErrorOffset { + MeshletAabbErrorOffset { + center: aabb.center().into(), + error, + half_extent: aabb.half_size().into(), + child_offset, + } +} + +fn sphere_to_meshlet(sphere: BoundingSphere) -> MeshletBoundingSphere { + MeshletBoundingSphere { + center: sphere.center.into(), + radius: sphere.radius(), + } +} + +// TODO: Precise encode variant +fn octahedral_encode(v: Vec3) -> Vec2 { + let n = v / (v.x.abs() + v.y.abs() + v.z.abs()); + let octahedral_wrap = (1.0 - n.yx().abs()) + * Vec2::new( + if n.x >= 0.0 { 1.0 } else { -1.0 }, + if n.y >= 0.0 { 1.0 } else { -1.0 }, + ); + if n.z >= 0.0 { + n.xy() + } else { + octahedral_wrap + } +} + +// https://www.w3.org/TR/WGSL/#pack2x16snorm-builtin +fn pack2x16snorm(v: Vec2) -> u32 { + let v = v.clamp(Vec2::NEG_ONE, Vec2::ONE); + let v = (v * 32767.0 + 0.5).floor().as_i16vec2(); + bytemuck::cast(v) +} + +/// An error produced by [`MeshletMesh::from_mesh`]. +#[derive(Error, Debug)] +pub enum MeshToMeshletMeshConversionError { + #[error("Mesh primitive topology is not TriangleList")] + WrongMeshPrimitiveTopology, + #[error("Mesh vertex attributes are not {{POSITION, NORMAL, UV_0}}: {0:?}")] + WrongMeshVertexAttributes(Vec), + #[error("Mesh has no indices")] + MeshMissingIndices, +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/instance_manager.rs b/crates/libmarathon/src/render/pbr/meshlet/instance_manager.rs new file mode 100644 index 0000000..2035083 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/instance_manager.rs @@ -0,0 +1,295 @@ +use super::{meshlet_mesh_manager::MeshletMeshManager, MeshletMesh, MeshletMesh3d}; +use crate::render::pbr::DUMMY_MESH_MATERIAL; +use crate::render::pbr::{ + meshlet::asset::MeshletAabb, MaterialBindingId, MeshFlags, MeshTransforms, MeshUniform, + PreviousGlobalTransform, RenderMaterialBindings, RenderMaterialInstances, +}; +use bevy_asset::{AssetEvent, AssetServer, Assets, UntypedAssetId}; +use bevy_camera::visibility::RenderLayers; +use bevy_ecs::{ + entity::{Entities, Entity, EntityHashMap}, + message::MessageReader, + query::Has, + resource::Resource, + system::{Local, Query, Res, ResMut, SystemState}, +}; +use bevy_light::{NotShadowCaster, NotShadowReceiver}; +use bevy_platform::collections::{HashMap, HashSet}; +use crate::render::{render_resource::StorageBuffer, sync_world::MainEntity, MainWorld}; +use bevy_transform::components::GlobalTransform; +use core::ops::DerefMut; + +/// Manages data for each entity with a [`MeshletMesh`]. +#[derive(Resource)] +pub struct InstanceManager { + /// Amount of instances in the scene. + pub scene_instance_count: u32, + /// The max BVH depth of any instance in the scene. This is used to control the number of + /// dependent dispatches emitted for BVH traversal. + pub max_bvh_depth: u32, + + /// Per-instance [`MainEntity`], [`RenderLayers`], and [`NotShadowCaster`]. + pub instances: Vec<(MainEntity, RenderLayers, bool)>, + /// Per-instance [`MeshUniform`]. + pub instance_uniforms: StorageBuffer>, + /// Per-instance model-space AABB. + pub instance_aabbs: StorageBuffer>, + /// Per-instance material ID. + pub instance_material_ids: StorageBuffer>, + /// Per-instance index to the root node of the instance's BVH. + pub instance_bvh_root_nodes: StorageBuffer>, + /// Per-view per-instance visibility bit. Used for [`RenderLayers`] and [`NotShadowCaster`] support. + pub view_instance_visibility: EntityHashMap>>, + + /// Next material ID available. + next_material_id: u32, + /// Map of material asset to material ID. + material_id_lookup: HashMap, + /// Set of material IDs used in the scene. + material_ids_present_in_scene: HashSet, +} + +impl InstanceManager { + pub fn new() -> Self { + Self { + scene_instance_count: 0, + max_bvh_depth: 0, + + instances: Vec::new(), + instance_uniforms: { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_instance_uniforms")); + buffer + }, + instance_aabbs: { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_instance_aabbs")); + buffer + }, + instance_material_ids: { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_instance_material_ids")); + buffer + }, + instance_bvh_root_nodes: { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_instance_bvh_root_nodes")); + buffer + }, + view_instance_visibility: EntityHashMap::default(), + + next_material_id: 0, + material_id_lookup: HashMap::default(), + material_ids_present_in_scene: HashSet::default(), + } + } + + pub fn add_instance( + &mut self, + instance: MainEntity, + root_bvh_node: u32, + aabb: MeshletAabb, + bvh_depth: u32, + transform: &GlobalTransform, + previous_transform: Option<&PreviousGlobalTransform>, + render_layers: Option<&RenderLayers>, + mesh_material_ids: &RenderMaterialInstances, + render_material_bindings: &RenderMaterialBindings, + not_shadow_receiver: bool, + not_shadow_caster: bool, + ) { + // Build a MeshUniform for the instance + let transform = transform.affine(); + let previous_transform = previous_transform.map(|t| t.0).unwrap_or(transform); + let mut flags = if not_shadow_receiver { + MeshFlags::empty() + } else { + MeshFlags::SHADOW_RECEIVER + }; + if transform.matrix3.determinant().is_sign_positive() { + flags |= MeshFlags::SIGN_DETERMINANT_MODEL_3X3; + } + let transforms = MeshTransforms { + world_from_local: (&transform).into(), + previous_world_from_local: (&previous_transform).into(), + flags: flags.bits(), + }; + + let mesh_material = mesh_material_ids.mesh_material(instance); + let mesh_material_binding_id = if mesh_material != DUMMY_MESH_MATERIAL.untyped() { + render_material_bindings + .get(&mesh_material) + .cloned() + .unwrap_or_default() + } else { + // Use a dummy binding ID if the mesh has no material + MaterialBindingId::default() + }; + + let mesh_uniform = MeshUniform::new( + &transforms, + 0, + mesh_material_binding_id.slot, + None, + None, + None, + ); + + // Append instance data + self.instances.push(( + instance, + render_layers.cloned().unwrap_or(RenderLayers::default()), + not_shadow_caster, + )); + self.instance_uniforms.get_mut().push(mesh_uniform); + self.instance_aabbs.get_mut().push(aabb); + self.instance_material_ids.get_mut().push(0); + self.instance_bvh_root_nodes.get_mut().push(root_bvh_node); + + self.scene_instance_count += 1; + self.max_bvh_depth = self.max_bvh_depth.max(bvh_depth); + } + + /// Get the material ID for a [`crate::Material`]. + pub fn get_material_id(&mut self, material_asset_id: UntypedAssetId) -> u32 { + *self + .material_id_lookup + .entry(material_asset_id) + .or_insert_with(|| { + self.next_material_id += 1; + self.next_material_id + }) + } + + pub fn material_present_in_scene(&self, material_id: &u32) -> bool { + self.material_ids_present_in_scene.contains(material_id) + } + + pub fn reset(&mut self, entities: &Entities) { + self.scene_instance_count = 0; + self.max_bvh_depth = 0; + + self.instances.clear(); + self.instance_uniforms.get_mut().clear(); + self.instance_aabbs.get_mut().clear(); + self.instance_material_ids.get_mut().clear(); + self.instance_bvh_root_nodes.get_mut().clear(); + self.view_instance_visibility + .retain(|view_entity, _| entities.contains(*view_entity)); + self.view_instance_visibility + .values_mut() + .for_each(|b| b.get_mut().clear()); + + self.next_material_id = 0; + self.material_id_lookup.clear(); + self.material_ids_present_in_scene.clear(); + } +} + +pub fn extract_meshlet_mesh_entities( + mut meshlet_mesh_manager: ResMut, + mut instance_manager: ResMut, + // TODO: Replace main_world and system_state when Extract>> is possible + mut main_world: ResMut, + mesh_material_ids: Res, + render_material_bindings: Res, + mut system_state: Local< + Option< + SystemState<( + Query<( + Entity, + &MeshletMesh3d, + &GlobalTransform, + Option<&PreviousGlobalTransform>, + Option<&RenderLayers>, + Has, + Has, + )>, + Res, + ResMut>, + MessageReader>, + )>, + >, + >, + render_entities: &Entities, +) { + // Get instances query + if system_state.is_none() { + *system_state = Some(SystemState::new(&mut main_world)); + } + let system_state = system_state.as_mut().unwrap(); + let (instances_query, asset_server, mut assets, mut asset_events) = + system_state.get_mut(&mut main_world); + + // Reset per-frame data + instance_manager.reset(render_entities); + + // Free GPU buffer space for any modified or dropped MeshletMesh assets + for asset_event in asset_events.read() { + if let AssetEvent::Unused { id } | AssetEvent::Modified { id } = asset_event { + meshlet_mesh_manager.remove(id); + } + } + + // Iterate over every instance + // TODO: Switch to change events to not upload every instance every frame. + for ( + instance, + meshlet_mesh, + transform, + previous_transform, + render_layers, + not_shadow_receiver, + not_shadow_caster, + ) in &instances_query + { + // Skip instances with an unloaded MeshletMesh asset + // TODO: This is a semi-expensive check + if asset_server.is_managed(meshlet_mesh.id()) + && !asset_server.is_loaded_with_dependencies(meshlet_mesh.id()) + { + continue; + } + + // Upload the instance's MeshletMesh asset data if not done already done + let (root_bvh_node, aabb, bvh_depth) = + meshlet_mesh_manager.queue_upload_if_needed(meshlet_mesh.id(), &mut assets); + + // Add the instance's data to the instance manager + instance_manager.add_instance( + instance.into(), + root_bvh_node, + aabb, + bvh_depth, + transform, + previous_transform, + render_layers, + &mesh_material_ids, + &render_material_bindings, + not_shadow_receiver, + not_shadow_caster, + ); + } +} + +/// For each entity in the scene, record what material ID its material was assigned in the `prepare_material_meshlet_meshes` systems, +/// and note that the material is used by at least one entity in the scene. +pub fn queue_material_meshlet_meshes( + mut instance_manager: ResMut, + render_material_instances: Res, +) { + let instance_manager = instance_manager.deref_mut(); + + for (i, (instance, _, _)) in instance_manager.instances.iter().enumerate() { + if let Some(material_instance) = render_material_instances.instances.get(instance) + && let Some(material_id) = instance_manager + .material_id_lookup + .get(&material_instance.asset_id) + { + instance_manager + .material_ids_present_in_scene + .insert(*material_id); + instance_manager.instance_material_ids.get_mut()[i] = *material_id; + } + } +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/material_pipeline_prepare.rs b/crates/libmarathon/src/render/pbr/meshlet/material_pipeline_prepare.rs new file mode 100644 index 0000000..2607d0c --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/material_pipeline_prepare.rs @@ -0,0 +1,475 @@ +use super::{ + instance_manager::InstanceManager, pipelines::MeshletPipelines, + resource_manager::ResourceManager, +}; +use crate::render::pbr::*; +use bevy_camera::{Camera3d, Projection}; +use crate::render::{ + prepass::{DeferredPrepass, DepthPrepass, MotionVectorPrepass, NormalPrepass}, + tonemapping::{DebandDither, Tonemapping}, +}; +use bevy_derive::{Deref, DerefMut}; +use bevy_light::{EnvironmentMapLight, IrradianceVolume, ShadowFilteringMethod}; +use bevy_mesh::VertexBufferLayout; +use bevy_mesh::{Mesh, MeshVertexBufferLayout, MeshVertexBufferLayoutRef, MeshVertexBufferLayouts}; +use bevy_platform::collections::{HashMap, HashSet}; +use crate::render::erased_render_asset::ErasedRenderAssets; +use crate::render::{camera::TemporalJitter, render_resource::*, view::ExtractedView}; +use bevy_utils::default; +use core::any::{Any, TypeId}; + +/// A list of `(Material ID, Pipeline, BindGroup)` for a view for use in [`super::MeshletMainOpaquePass3dNode`]. +#[derive(Component, Deref, DerefMut, Default)] +pub struct MeshletViewMaterialsMainOpaquePass(pub Vec<(u32, CachedRenderPipelineId, BindGroup)>); + +/// Prepare [`Material`] pipelines for [`super::MeshletMesh`] entities for use in [`super::MeshletMainOpaquePass3dNode`], +/// and register the material with [`InstanceManager`]. +pub fn prepare_material_meshlet_meshes_main_opaque_pass( + resource_manager: ResMut, + mut instance_manager: ResMut, + mut cache: Local>, + pipeline_cache: Res, + material_pipeline: Res, + mesh_pipeline: Res, + render_materials: Res>, + meshlet_pipelines: Res, + render_material_instances: Res, + material_bind_group_allocators: Res, + mut mesh_vertex_buffer_layouts: ResMut, + mut views: Query< + ( + &mut MeshletViewMaterialsMainOpaquePass, + &ExtractedView, + Option<&Tonemapping>, + Option<&DebandDither>, + Option<&ShadowFilteringMethod>, + (Has, Has), + ( + Has, + Has, + Has, + Has, + ), + Has, + Option<&Projection>, + Has>, + Has>, + ), + With, + >, +) { + let fake_vertex_buffer_layout = &fake_vertex_buffer_layout(&mut mesh_vertex_buffer_layouts); + + for ( + mut materials, + view, + tonemapping, + dither, + shadow_filter_method, + (ssao, distance_fog), + (normal_prepass, depth_prepass, motion_vector_prepass, deferred_prepass), + temporal_jitter, + projection, + has_environment_maps, + has_irradiance_volumes, + ) in &mut views + { + let mut view_key = + MeshPipelineKey::from_msaa_samples(1) | MeshPipelineKey::from_hdr(view.hdr); + + if normal_prepass { + view_key |= MeshPipelineKey::NORMAL_PREPASS; + } + if depth_prepass { + view_key |= MeshPipelineKey::DEPTH_PREPASS; + } + if motion_vector_prepass { + view_key |= MeshPipelineKey::MOTION_VECTOR_PREPASS; + } + if deferred_prepass { + view_key |= MeshPipelineKey::DEFERRED_PREPASS; + } + + if temporal_jitter { + view_key |= MeshPipelineKey::TEMPORAL_JITTER; + } + + if has_environment_maps { + view_key |= MeshPipelineKey::ENVIRONMENT_MAP; + } + + if has_irradiance_volumes { + view_key |= MeshPipelineKey::IRRADIANCE_VOLUME; + } + + if let Some(projection) = projection { + view_key |= match projection { + Projection::Perspective(_) => MeshPipelineKey::VIEW_PROJECTION_PERSPECTIVE, + Projection::Orthographic(_) => MeshPipelineKey::VIEW_PROJECTION_ORTHOGRAPHIC, + Projection::Custom(_) => MeshPipelineKey::VIEW_PROJECTION_NONSTANDARD, + }; + } + + match shadow_filter_method.unwrap_or(&ShadowFilteringMethod::default()) { + ShadowFilteringMethod::Hardware2x2 => { + view_key |= MeshPipelineKey::SHADOW_FILTER_METHOD_HARDWARE_2X2; + } + ShadowFilteringMethod::Gaussian => { + view_key |= MeshPipelineKey::SHADOW_FILTER_METHOD_GAUSSIAN; + } + ShadowFilteringMethod::Temporal => { + view_key |= MeshPipelineKey::SHADOW_FILTER_METHOD_TEMPORAL; + } + } + + if !view.hdr { + if let Some(tonemapping) = tonemapping { + view_key |= MeshPipelineKey::TONEMAP_IN_SHADER; + view_key |= tonemapping_pipeline_key(*tonemapping); + } + if let Some(DebandDither::Enabled) = dither { + view_key |= MeshPipelineKey::DEBAND_DITHER; + } + } + + if ssao { + view_key |= MeshPipelineKey::SCREEN_SPACE_AMBIENT_OCCLUSION; + } + if distance_fog { + view_key |= MeshPipelineKey::DISTANCE_FOG; + } + + view_key |= MeshPipelineKey::from_primitive_topology(PrimitiveTopology::TriangleList); + + for material_id in render_material_instances + .instances + .values() + .map(|instance| instance.asset_id) + .collect::>() + { + let Some(material) = render_materials.get(material_id) else { + continue; + }; + + if material.properties.render_method != OpaqueRendererMethod::Forward + || material.properties.alpha_mode != AlphaMode::Opaque + || material.properties.reads_view_transmission_texture + { + continue; + } + + let erased_key = ErasedMaterialPipelineKey { + mesh_key: view_key, + material_key: material.properties.material_key.clone(), + type_id: material_id.type_id(), + }; + let material_pipeline_specializer = MaterialPipelineSpecializer { + pipeline: material_pipeline.clone(), + properties: material.properties.clone(), + }; + let Ok(material_pipeline_descriptor) = + material_pipeline_specializer.specialize(erased_key, fake_vertex_buffer_layout) + else { + continue; + }; + let material_fragment = material_pipeline_descriptor.fragment.unwrap(); + + let mut shader_defs = material_fragment.shader_defs; + shader_defs.push("MESHLET_MESH_MATERIAL_PASS".into()); + + let layout = mesh_pipeline.get_view_layout(view_key.into()); + let layout = vec![ + layout.main_layout.clone(), + layout.binding_array_layout.clone(), + resource_manager.material_shade_bind_group_layout.clone(), + material + .properties + .material_layout + .as_ref() + .unwrap() + .clone(), + ]; + + let pipeline_descriptor = RenderPipelineDescriptor { + label: material_pipeline_descriptor.label, + layout, + push_constant_ranges: vec![], + vertex: VertexState { + shader: meshlet_pipelines.meshlet_mesh_material.clone(), + shader_defs: shader_defs.clone(), + entry_point: material_pipeline_descriptor.vertex.entry_point, + buffers: Vec::new(), + }, + primitive: PrimitiveState::default(), + depth_stencil: Some(DepthStencilState { + format: TextureFormat::Depth16Unorm, + depth_write_enabled: false, + depth_compare: CompareFunction::Equal, + stencil: StencilState::default(), + bias: DepthBiasState::default(), + }), + multisample: MultisampleState::default(), + fragment: Some(FragmentState { + shader: match material.properties.get_shader(MeshletFragmentShader) { + Some(shader) => shader.clone(), + None => meshlet_pipelines.meshlet_mesh_material.clone(), + }, + shader_defs, + entry_point: material_fragment.entry_point, + targets: material_fragment.targets, + }), + zero_initialize_workgroup_memory: false, + }; + let type_id = material_id.type_id(); + let Some(material_bind_group_allocator) = material_bind_group_allocators.get(&type_id) + else { + continue; + }; + let material_id = instance_manager.get_material_id(material_id); + + let pipeline_id = *cache.entry((view_key, type_id)).or_insert_with(|| { + pipeline_cache.queue_render_pipeline(pipeline_descriptor.clone()) + }); + + let Some(material_bind_group) = + material_bind_group_allocator.get(material.binding.group) + else { + continue; + }; + let Some(bind_group) = material_bind_group.bind_group() else { + continue; + }; + + materials.push((material_id, pipeline_id, (*bind_group).clone())); + } + } +} + +/// A list of `(Material ID, Pipeline, BindGroup)` for a view for use in [`super::MeshletPrepassNode`]. +#[derive(Component, Deref, DerefMut, Default)] +pub struct MeshletViewMaterialsPrepass(pub Vec<(u32, CachedRenderPipelineId, BindGroup)>); + +/// A list of `(Material ID, Pipeline, BindGroup)` for a view for use in [`super::MeshletDeferredGBufferPrepassNode`]. +#[derive(Component, Deref, DerefMut, Default)] +pub struct MeshletViewMaterialsDeferredGBufferPrepass( + pub Vec<(u32, CachedRenderPipelineId, BindGroup)>, +); + +/// Prepare [`Material`] pipelines for [`super::MeshletMesh`] entities for use in [`super::MeshletPrepassNode`], +/// and [`super::MeshletDeferredGBufferPrepassNode`] and register the material with [`InstanceManager`]. +pub fn prepare_material_meshlet_meshes_prepass( + resource_manager: ResMut, + mut instance_manager: ResMut, + mut cache: Local>, + pipeline_cache: Res, + prepass_pipeline: Res, + material_bind_group_allocators: Res, + render_materials: Res>, + meshlet_pipelines: Res, + render_material_instances: Res, + mut mesh_vertex_buffer_layouts: ResMut, + mut views: Query< + ( + &mut MeshletViewMaterialsPrepass, + &mut MeshletViewMaterialsDeferredGBufferPrepass, + &ExtractedView, + AnyOf<(&NormalPrepass, &MotionVectorPrepass, &DeferredPrepass)>, + ), + With, + >, +) { + let fake_vertex_buffer_layout = &fake_vertex_buffer_layout(&mut mesh_vertex_buffer_layouts); + + for ( + mut materials, + mut deferred_materials, + view, + (normal_prepass, motion_vector_prepass, deferred_prepass), + ) in &mut views + { + let mut view_key = + MeshPipelineKey::from_msaa_samples(1) | MeshPipelineKey::from_hdr(view.hdr); + + if normal_prepass.is_some() { + view_key |= MeshPipelineKey::NORMAL_PREPASS; + } + if motion_vector_prepass.is_some() { + view_key |= MeshPipelineKey::MOTION_VECTOR_PREPASS; + } + + view_key |= MeshPipelineKey::from_primitive_topology(PrimitiveTopology::TriangleList); + + for material_id in render_material_instances + .instances + .values() + .map(|instance| instance.asset_id) + .collect::>() + { + let Some(material) = render_materials.get(material_id) else { + continue; + }; + let Some(material_bind_group_allocator) = + material_bind_group_allocators.get(&material_id.type_id()) + else { + continue; + }; + + if material.properties.alpha_mode != AlphaMode::Opaque + || material.properties.reads_view_transmission_texture + { + continue; + } + + let material_wants_deferred = matches!( + material.properties.render_method, + OpaqueRendererMethod::Deferred + ); + if deferred_prepass.is_some() && material_wants_deferred { + view_key |= MeshPipelineKey::DEFERRED_PREPASS; + } else if normal_prepass.is_none() && motion_vector_prepass.is_none() { + continue; + } + + let erased_key = ErasedMaterialPipelineKey { + mesh_key: view_key, + material_key: material.properties.material_key.clone(), + type_id: material_id.type_id(), + }; + let material_pipeline_specializer = PrepassPipelineSpecializer { + pipeline: prepass_pipeline.clone(), + properties: material.properties.clone(), + }; + let Ok(material_pipeline_descriptor) = + material_pipeline_specializer.specialize(erased_key, fake_vertex_buffer_layout) + else { + continue; + }; + let material_fragment = material_pipeline_descriptor.fragment.unwrap(); + + let mut shader_defs = material_fragment.shader_defs; + shader_defs.push("MESHLET_MESH_MATERIAL_PASS".into()); + + let view_layout = if view_key.contains(MeshPipelineKey::MOTION_VECTOR_PREPASS) { + prepass_pipeline.view_layout_motion_vectors.clone() + } else { + prepass_pipeline.view_layout_no_motion_vectors.clone() + }; + + let fragment_shader = if view_key.contains(MeshPipelineKey::DEFERRED_PREPASS) { + material + .properties + .get_shader(MeshletDeferredFragmentShader) + .unwrap_or(meshlet_pipelines.meshlet_mesh_material.clone()) + } else { + material + .properties + .get_shader(MeshletPrepassFragmentShader) + .unwrap_or(meshlet_pipelines.meshlet_mesh_material.clone()) + }; + + let entry_point = if fragment_shader == meshlet_pipelines.meshlet_mesh_material { + material_fragment.entry_point.clone() + } else { + None + }; + + let pipeline_descriptor = RenderPipelineDescriptor { + label: material_pipeline_descriptor.label, + layout: vec![ + view_layout, + prepass_pipeline.empty_layout.clone(), + resource_manager.material_shade_bind_group_layout.clone(), + material + .properties + .material_layout + .as_ref() + .unwrap() + .clone(), + ], + vertex: VertexState { + shader: meshlet_pipelines.meshlet_mesh_material.clone(), + shader_defs: shader_defs.clone(), + entry_point: material_pipeline_descriptor.vertex.entry_point, + ..default() + }, + primitive: PrimitiveState::default(), + depth_stencil: Some(DepthStencilState { + format: TextureFormat::Depth16Unorm, + depth_write_enabled: false, + depth_compare: CompareFunction::Equal, + stencil: StencilState::default(), + bias: DepthBiasState::default(), + }), + fragment: Some(FragmentState { + shader: fragment_shader, + shader_defs, + entry_point, + targets: material_fragment.targets, + }), + ..default() + }; + + let material_id = instance_manager.get_material_id(material_id); + + let pipeline_id = *cache + .entry((view_key, material_id.type_id())) + .or_insert_with(|| { + pipeline_cache.queue_render_pipeline(pipeline_descriptor.clone()) + }); + + let Some(material_bind_group) = + material_bind_group_allocator.get(material.binding.group) + else { + continue; + }; + let Some(bind_group) = material_bind_group.bind_group() else { + continue; + }; + + let item = (material_id, pipeline_id, (*bind_group).clone()); + if view_key.contains(MeshPipelineKey::DEFERRED_PREPASS) { + deferred_materials.push(item); + } else { + materials.push(item); + } + } + } +} + +// Meshlet materials don't use a traditional vertex buffer, but the material specialization requires one. +fn fake_vertex_buffer_layout(layouts: &mut MeshVertexBufferLayouts) -> MeshVertexBufferLayoutRef { + layouts.insert(MeshVertexBufferLayout::new( + vec![ + Mesh::ATTRIBUTE_POSITION.id, + Mesh::ATTRIBUTE_NORMAL.id, + Mesh::ATTRIBUTE_UV_0.id, + Mesh::ATTRIBUTE_TANGENT.id, + ], + VertexBufferLayout { + array_stride: 48, + step_mode: VertexStepMode::Vertex, + attributes: vec![ + VertexAttribute { + format: Mesh::ATTRIBUTE_POSITION.format, + offset: 0, + shader_location: 0, + }, + VertexAttribute { + format: Mesh::ATTRIBUTE_NORMAL.format, + offset: 12, + shader_location: 1, + }, + VertexAttribute { + format: Mesh::ATTRIBUTE_UV_0.format, + offset: 24, + shader_location: 2, + }, + VertexAttribute { + format: Mesh::ATTRIBUTE_TANGENT.format, + offset: 32, + shader_location: 3, + }, + ], + }, + )) +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/material_shade_nodes.rs b/crates/libmarathon/src/render/pbr/meshlet/material_shade_nodes.rs new file mode 100644 index 0000000..b363d87 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/material_shade_nodes.rs @@ -0,0 +1,421 @@ +use super::{ + material_pipeline_prepare::{ + MeshletViewMaterialsDeferredGBufferPrepass, MeshletViewMaterialsMainOpaquePass, + MeshletViewMaterialsPrepass, + }, + resource_manager::{MeshletViewBindGroups, MeshletViewResources}, + InstanceManager, +}; +use crate::render::pbr::{ + MeshViewBindGroup, PrepassViewBindGroup, ViewEnvironmentMapUniformOffset, ViewFogUniformOffset, + ViewLightProbesUniformOffset, ViewLightsUniformOffset, ViewScreenSpaceReflectionsUniformOffset, +}; +use bevy_camera::MainPassResolutionOverride; +use bevy_camera::Viewport; +use crate::render::prepass::{ + MotionVectorPrepass, PreviousViewUniformOffset, ViewPrepassTextures, +}; +use bevy_ecs::{ + query::{Has, QueryItem}, + world::World, +}; +use crate::render::{ + camera::ExtractedCamera, + diagnostic::RecordDiagnostics, + render_graph::{NodeRunError, RenderGraphContext, ViewNode}, + render_resource::{ + LoadOp, Operations, PipelineCache, RenderPassDepthStencilAttachment, RenderPassDescriptor, + StoreOp, + }, + renderer::RenderContext, + view::{ViewTarget, ViewUniformOffset}, +}; + +/// Fullscreen shading pass based on the visibility buffer generated from rasterizing meshlets. +#[derive(Default)] +pub struct MeshletMainOpaquePass3dNode; +impl ViewNode for MeshletMainOpaquePass3dNode { + type ViewQuery = ( + &'static ExtractedCamera, + &'static ViewTarget, + &'static MeshViewBindGroup, + &'static ViewUniformOffset, + &'static ViewLightsUniformOffset, + &'static ViewFogUniformOffset, + &'static ViewLightProbesUniformOffset, + &'static ViewScreenSpaceReflectionsUniformOffset, + &'static ViewEnvironmentMapUniformOffset, + Option<&'static MainPassResolutionOverride>, + &'static MeshletViewMaterialsMainOpaquePass, + &'static MeshletViewBindGroups, + &'static MeshletViewResources, + ); + + fn run( + &self, + _graph: &mut RenderGraphContext, + render_context: &mut RenderContext, + ( + camera, + target, + mesh_view_bind_group, + view_uniform_offset, + view_lights_offset, + view_fog_offset, + view_light_probes_offset, + view_ssr_offset, + view_environment_map_offset, + resolution_override, + meshlet_view_materials, + meshlet_view_bind_groups, + meshlet_view_resources, + ): QueryItem, + world: &World, + ) -> Result<(), NodeRunError> { + if meshlet_view_materials.is_empty() { + return Ok(()); + } + + let ( + Some(instance_manager), + Some(pipeline_cache), + Some(meshlet_material_depth), + Some(meshlet_material_shade_bind_group), + ) = ( + world.get_resource::(), + world.get_resource::(), + meshlet_view_resources.material_depth.as_ref(), + meshlet_view_bind_groups.material_shade.as_ref(), + ) + else { + return Ok(()); + }; + + let diagnostics = render_context.diagnostic_recorder(); + + let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some("meshlet_material_opaque_3d_pass"), + color_attachments: &[Some(target.get_color_attachment())], + depth_stencil_attachment: Some(RenderPassDepthStencilAttachment { + view: &meshlet_material_depth.default_view, + depth_ops: Some(Operations { + load: LoadOp::Load, + store: StoreOp::Store, + }), + stencil_ops: None, + }), + timestamp_writes: None, + occlusion_query_set: None, + }); + let pass_span = diagnostics.pass_span(&mut render_pass, "meshlet_material_opaque_3d_pass"); + if let Some(viewport) = + Viewport::from_viewport_and_override(camera.viewport.as_ref(), resolution_override) + { + render_pass.set_camera_viewport(&viewport); + } + + render_pass.set_bind_group( + 0, + &mesh_view_bind_group.main, + &[ + view_uniform_offset.offset, + view_lights_offset.offset, + view_fog_offset.offset, + **view_light_probes_offset, + **view_ssr_offset, + **view_environment_map_offset, + ], + ); + render_pass.set_bind_group(1, &mesh_view_bind_group.binding_array, &[]); + render_pass.set_bind_group(2, meshlet_material_shade_bind_group, &[]); + + // 1 fullscreen triangle draw per material + for (material_id, material_pipeline_id, material_bind_group) in + meshlet_view_materials.iter() + { + if instance_manager.material_present_in_scene(material_id) + && let Some(material_pipeline) = + pipeline_cache.get_render_pipeline(*material_pipeline_id) + { + let x = *material_id * 3; + render_pass.set_render_pipeline(material_pipeline); + render_pass.set_bind_group(3, material_bind_group, &[]); + render_pass.draw(x..(x + 3), 0..1); + } + } + + pass_span.end(&mut render_pass); + + Ok(()) + } +} + +/// Fullscreen pass to generate prepass textures based on the visibility buffer generated from rasterizing meshlets. +#[derive(Default)] +pub struct MeshletPrepassNode; +impl ViewNode for MeshletPrepassNode { + type ViewQuery = ( + &'static ExtractedCamera, + &'static ViewPrepassTextures, + &'static ViewUniformOffset, + &'static PreviousViewUniformOffset, + Option<&'static MainPassResolutionOverride>, + Has, + &'static MeshletViewMaterialsPrepass, + &'static MeshletViewBindGroups, + &'static MeshletViewResources, + ); + + fn run( + &self, + _graph: &mut RenderGraphContext, + render_context: &mut RenderContext, + ( + camera, + view_prepass_textures, + view_uniform_offset, + previous_view_uniform_offset, + resolution_override, + view_has_motion_vector_prepass, + meshlet_view_materials, + meshlet_view_bind_groups, + meshlet_view_resources, + ): QueryItem, + world: &World, + ) -> Result<(), NodeRunError> { + if meshlet_view_materials.is_empty() { + return Ok(()); + } + + let ( + Some(prepass_view_bind_group), + Some(instance_manager), + Some(pipeline_cache), + Some(meshlet_material_depth), + Some(meshlet_material_shade_bind_group), + ) = ( + world.get_resource::(), + world.get_resource::(), + world.get_resource::(), + meshlet_view_resources.material_depth.as_ref(), + meshlet_view_bind_groups.material_shade.as_ref(), + ) + else { + return Ok(()); + }; + + let diagnostics = render_context.diagnostic_recorder(); + + let color_attachments = vec![ + view_prepass_textures + .normal + .as_ref() + .map(|normals_texture| normals_texture.get_attachment()), + view_prepass_textures + .motion_vectors + .as_ref() + .map(|motion_vectors_texture| motion_vectors_texture.get_attachment()), + // Use None in place of Deferred attachments + None, + None, + ]; + + let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some("meshlet_material_prepass"), + color_attachments: &color_attachments, + depth_stencil_attachment: Some(RenderPassDepthStencilAttachment { + view: &meshlet_material_depth.default_view, + depth_ops: Some(Operations { + load: LoadOp::Load, + store: StoreOp::Store, + }), + stencil_ops: None, + }), + timestamp_writes: None, + occlusion_query_set: None, + }); + let pass_span = diagnostics.pass_span(&mut render_pass, "meshlet_material_prepass"); + if let Some(viewport) = + Viewport::from_viewport_and_override(camera.viewport.as_ref(), resolution_override) + { + render_pass.set_camera_viewport(&viewport); + } + + if view_has_motion_vector_prepass { + render_pass.set_bind_group( + 0, + prepass_view_bind_group.motion_vectors.as_ref().unwrap(), + &[ + view_uniform_offset.offset, + previous_view_uniform_offset.offset, + ], + ); + } else { + render_pass.set_bind_group( + 0, + prepass_view_bind_group.no_motion_vectors.as_ref().unwrap(), + &[view_uniform_offset.offset], + ); + } + + render_pass.set_bind_group(1, &prepass_view_bind_group.empty_bind_group, &[]); + render_pass.set_bind_group(2, meshlet_material_shade_bind_group, &[]); + + // 1 fullscreen triangle draw per material + for (material_id, material_pipeline_id, material_bind_group) in + meshlet_view_materials.iter() + { + if instance_manager.material_present_in_scene(material_id) + && let Some(material_pipeline) = + pipeline_cache.get_render_pipeline(*material_pipeline_id) + { + let x = *material_id * 3; + render_pass.set_render_pipeline(material_pipeline); + render_pass.set_bind_group(2, material_bind_group, &[]); + render_pass.draw(x..(x + 3), 0..1); + } + } + + pass_span.end(&mut render_pass); + + Ok(()) + } +} + +/// Fullscreen pass to generate a gbuffer based on the visibility buffer generated from rasterizing meshlets. +#[derive(Default)] +pub struct MeshletDeferredGBufferPrepassNode; +impl ViewNode for MeshletDeferredGBufferPrepassNode { + type ViewQuery = ( + &'static ExtractedCamera, + &'static ViewPrepassTextures, + &'static ViewUniformOffset, + &'static PreviousViewUniformOffset, + Option<&'static MainPassResolutionOverride>, + Has, + &'static MeshletViewMaterialsDeferredGBufferPrepass, + &'static MeshletViewBindGroups, + &'static MeshletViewResources, + ); + + fn run( + &self, + _graph: &mut RenderGraphContext, + render_context: &mut RenderContext, + ( + camera, + view_prepass_textures, + view_uniform_offset, + previous_view_uniform_offset, + resolution_override, + view_has_motion_vector_prepass, + meshlet_view_materials, + meshlet_view_bind_groups, + meshlet_view_resources, + ): QueryItem, + world: &World, + ) -> Result<(), NodeRunError> { + if meshlet_view_materials.is_empty() { + return Ok(()); + } + + let ( + Some(prepass_view_bind_group), + Some(instance_manager), + Some(pipeline_cache), + Some(meshlet_material_depth), + Some(meshlet_material_shade_bind_group), + ) = ( + world.get_resource::(), + world.get_resource::(), + world.get_resource::(), + meshlet_view_resources.material_depth.as_ref(), + meshlet_view_bind_groups.material_shade.as_ref(), + ) + else { + return Ok(()); + }; + + let color_attachments = vec![ + view_prepass_textures + .normal + .as_ref() + .map(|normals_texture| normals_texture.get_attachment()), + view_prepass_textures + .motion_vectors + .as_ref() + .map(|motion_vectors_texture| motion_vectors_texture.get_attachment()), + view_prepass_textures + .deferred + .as_ref() + .map(|deferred_texture| deferred_texture.get_attachment()), + view_prepass_textures + .deferred_lighting_pass_id + .as_ref() + .map(|deferred_lighting_pass_id| deferred_lighting_pass_id.get_attachment()), + ]; + + let diagnostics = render_context.diagnostic_recorder(); + + let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some("meshlet_material_deferred_prepass"), + color_attachments: &color_attachments, + depth_stencil_attachment: Some(RenderPassDepthStencilAttachment { + view: &meshlet_material_depth.default_view, + depth_ops: Some(Operations { + load: LoadOp::Load, + store: StoreOp::Store, + }), + stencil_ops: None, + }), + timestamp_writes: None, + occlusion_query_set: None, + }); + let pass_span = + diagnostics.pass_span(&mut render_pass, "meshlet_material_deferred_prepass"); + if let Some(viewport) = + Viewport::from_viewport_and_override(camera.viewport.as_ref(), resolution_override) + { + render_pass.set_camera_viewport(&viewport); + } + + if view_has_motion_vector_prepass { + render_pass.set_bind_group( + 0, + prepass_view_bind_group.motion_vectors.as_ref().unwrap(), + &[ + view_uniform_offset.offset, + previous_view_uniform_offset.offset, + ], + ); + } else { + render_pass.set_bind_group( + 0, + prepass_view_bind_group.no_motion_vectors.as_ref().unwrap(), + &[view_uniform_offset.offset], + ); + } + + render_pass.set_bind_group(1, &prepass_view_bind_group.empty_bind_group, &[]); + render_pass.set_bind_group(2, meshlet_material_shade_bind_group, &[]); + + // 1 fullscreen triangle draw per material + for (material_id, material_pipeline_id, material_bind_group) in + meshlet_view_materials.iter() + { + if instance_manager.material_present_in_scene(material_id) + && let Some(material_pipeline) = + pipeline_cache.get_render_pipeline(*material_pipeline_id) + { + let x = *material_id * 3; + render_pass.set_render_pipeline(material_pipeline); + render_pass.set_bind_group(2, material_bind_group, &[]); + render_pass.draw(x..(x + 3), 0..1); + } + } + + pass_span.end(&mut render_pass); + + Ok(()) + } +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/meshlet_bindings.wgsl b/crates/libmarathon/src/render/pbr/meshlet/meshlet_bindings.wgsl new file mode 100644 index 0000000..4533b2b --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/meshlet_bindings.wgsl @@ -0,0 +1,306 @@ +#define_import_path bevy_pbr::meshlet_bindings + +#import bevy_pbr::mesh_types::Mesh +#import bevy_render::view::View +#import bevy_pbr::prepass_bindings::PreviousViewUniforms +#import bevy_pbr::utils::octahedral_decode_signed + +struct BvhNode { + aabbs: array, + lod_bounds: array, 8>, + child_counts: array, + _padding: vec2, +} + +struct Meshlet { + start_vertex_position_bit: u32, + start_vertex_attribute_id: u32, + start_index_id: u32, + packed_a: u32, + packed_b: u32, + min_vertex_position_channel_x: f32, + min_vertex_position_channel_y: f32, + min_vertex_position_channel_z: f32, +} + +fn get_meshlet_vertex_count(meshlet: ptr) -> u32 { + return extractBits((*meshlet).packed_a, 0u, 8u); +} + +fn get_meshlet_triangle_count(meshlet: ptr) -> u32 { + return extractBits((*meshlet).packed_a, 8u, 8u); +} + +struct MeshletCullData { + aabb: MeshletAabbErrorOffset, + lod_group_sphere: vec4, +} + +struct MeshletAabb { + center: vec3, + half_extent: vec3, +} + +struct MeshletAabbErrorOffset { + center_and_error: vec4, + half_extent_and_child_offset: vec4, +} + +fn get_aabb(aabb: ptr) -> MeshletAabb { + return MeshletAabb( + (*aabb).center_and_error.xyz, + (*aabb).half_extent_and_child_offset.xyz, + ); +} + +fn get_aabb_error(aabb: ptr) -> f32 { + return (*aabb).center_and_error.w; +} + +fn get_aabb_child_offset(aabb: ptr) -> u32 { + return bitcast((*aabb).half_extent_and_child_offset.w); +} + +struct DispatchIndirectArgs { + x: atomic, + y: u32, + z: u32, +} + +struct DrawIndirectArgs { + vertex_count: u32, + instance_count: atomic, + first_vertex: u32, + first_instance: u32, +} + +// Either a BVH node or a meshlet, along with the instance it is associated with. +// Refers to BVH nodes in `meshlet_bvh_cull_queue` and `meshlet_second_pass_bvh_queue`, where `offset` is the index into `meshlet_bvh_nodes`. +// Refers to meshlets in `meshlet_meshlet_cull_queue` and `meshlet_raster_clusters`. +// In `meshlet_meshlet_cull_queue`, `offset` is the index into `meshlet_cull_data`. +// In `meshlet_raster_clusters`, `offset` is the index into `meshlets`. +struct InstancedOffset { + instance_id: u32, + offset: u32, +} + +const CENTIMETERS_PER_METER = 100.0; + +#ifdef MESHLET_INSTANCE_CULLING_PASS +struct Constants { scene_instance_count: u32 } +var constants: Constants; + +// Cull data +@group(0) @binding(0) var depth_pyramid: texture_2d; +@group(0) @binding(1) var view: View; +@group(0) @binding(2) var previous_view: PreviousViewUniforms; + +// Per entity instance data +@group(0) @binding(3) var meshlet_instance_uniforms: array; +@group(0) @binding(4) var meshlet_view_instance_visibility: array; // 1 bit per entity instance, packed as a bitmask +@group(0) @binding(5) var meshlet_instance_aabbs: array; +@group(0) @binding(6) var meshlet_instance_bvh_root_nodes: array; + +// BVH cull queue data +@group(0) @binding(7) var meshlet_bvh_cull_count_write: atomic; +@group(0) @binding(8) var meshlet_bvh_cull_dispatch: DispatchIndirectArgs; +@group(0) @binding(9) var meshlet_bvh_cull_queue: array; + +// Second pass queue data +#ifdef MESHLET_FIRST_CULLING_PASS +@group(0) @binding(10) var meshlet_second_pass_instance_count: atomic; +@group(0) @binding(11) var meshlet_second_pass_instance_dispatch: DispatchIndirectArgs; +@group(0) @binding(12) var meshlet_second_pass_instance_candidates: array; +#else +@group(0) @binding(10) var meshlet_second_pass_instance_count: u32; +@group(0) @binding(11) var meshlet_second_pass_instance_candidates: array; +#endif +#endif + +#ifdef MESHLET_BVH_CULLING_PASS +struct Constants { read_from_front: u32, rightmost_slot: u32 } +var constants: Constants; + +// Cull data +@group(0) @binding(0) var depth_pyramid: texture_2d; // From the end of the last frame for the first culling pass, and from the first raster pass for the second culling pass +@group(0) @binding(1) var view: View; +@group(0) @binding(2) var previous_view: PreviousViewUniforms; + +// Global mesh data +@group(0) @binding(3) var meshlet_bvh_nodes: array; + +// Per entity instance data +@group(0) @binding(4) var meshlet_instance_uniforms: array; + +// BVH cull queue data +@group(0) @binding(5) var meshlet_bvh_cull_count_read: u32; +@group(0) @binding(6) var meshlet_bvh_cull_count_write: atomic; +@group(0) @binding(7) var meshlet_bvh_cull_dispatch: DispatchIndirectArgs; +@group(0) @binding(8) var meshlet_bvh_cull_queue: array; + +// Meshlet cull queue data +@group(0) @binding(9) var meshlet_meshlet_cull_count_early: atomic; +@group(0) @binding(10) var meshlet_meshlet_cull_count_late: atomic; +@group(0) @binding(11) var meshlet_meshlet_cull_dispatch_early: DispatchIndirectArgs; +@group(0) @binding(12) var meshlet_meshlet_cull_dispatch_late: DispatchIndirectArgs; +@group(0) @binding(13) var meshlet_meshlet_cull_queue: array; + +// Second pass queue data +#ifdef MESHLET_FIRST_CULLING_PASS +@group(0) @binding(14) var meshlet_second_pass_bvh_count: atomic; +@group(0) @binding(15) var meshlet_second_pass_bvh_dispatch: DispatchIndirectArgs; +@group(0) @binding(16) var meshlet_second_pass_bvh_queue: array; +#endif +#endif + +#ifdef MESHLET_CLUSTER_CULLING_PASS +struct Constants { rightmost_slot: u32 } +var constants: Constants; + +// Cull data +@group(0) @binding(0) var depth_pyramid: texture_2d; // From the end of the last frame for the first culling pass, and from the first raster pass for the second culling pass +@group(0) @binding(1) var view: View; +@group(0) @binding(2) var previous_view: PreviousViewUniforms; + +// Global mesh data +@group(0) @binding(3) var meshlet_cull_data: array; + +// Per entity instance data +@group(0) @binding(4) var meshlet_instance_uniforms: array; + +// Raster queue data +@group(0) @binding(5) var meshlet_software_raster_indirect_args: DispatchIndirectArgs; +@group(0) @binding(6) var meshlet_hardware_raster_indirect_args: DrawIndirectArgs; +@group(0) @binding(7) var meshlet_previous_raster_counts: array; +@group(0) @binding(8) var meshlet_raster_clusters: array; + +// Meshlet cull queue data +@group(0) @binding(9) var meshlet_meshlet_cull_count_read: u32; + +// Second pass queue data +#ifdef MESHLET_FIRST_CULLING_PASS +@group(0) @binding(10) var meshlet_meshlet_cull_count_write: atomic; +@group(0) @binding(11) var meshlet_meshlet_cull_dispatch: DispatchIndirectArgs; +@group(0) @binding(12) var meshlet_meshlet_cull_queue: array; +#else +@group(0) @binding(10) var meshlet_meshlet_cull_queue: array; +#endif +#endif + +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS +@group(0) @binding(0) var meshlet_raster_clusters: array; // Per cluster +@group(0) @binding(1) var meshlets: array; // Per meshlet +@group(0) @binding(2) var meshlet_indices: array; // Many per meshlet +@group(0) @binding(3) var meshlet_vertex_positions: array; // Many per meshlet +@group(0) @binding(4) var meshlet_instance_uniforms: array; // Per entity instance +@group(0) @binding(5) var meshlet_previous_raster_counts: array; +@group(0) @binding(6) var meshlet_software_raster_cluster_count: u32; +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT +@group(0) @binding(7) var meshlet_visibility_buffer: texture_storage_2d; +#else +@group(0) @binding(7) var meshlet_visibility_buffer: texture_storage_2d; +#endif +@group(0) @binding(8) var view: View; + +// TODO: Load only twice, instead of 3x in cases where you load 3 indices per thread? +fn get_meshlet_vertex_id(index_id: u32) -> u32 { + let packed_index = meshlet_indices[index_id / 4u]; + let bit_offset = (index_id % 4u) * 8u; + return extractBits(packed_index, bit_offset, 8u); +} + +fn get_meshlet_vertex_position(meshlet: ptr, vertex_id: u32) -> vec3 { + // Get bitstream start for the vertex + let unpacked = unpack4xU8((*meshlet).packed_b); + let bits_per_channel = unpacked.xyz; + let bits_per_vertex = bits_per_channel.x + bits_per_channel.y + bits_per_channel.z; + var start_bit = (*meshlet).start_vertex_position_bit + (vertex_id * bits_per_vertex); + + // Read each vertex channel from the bitstream + var vertex_position_packed = vec3(0u); + for (var i = 0u; i < 3u; i++) { + let lower_word_index = start_bit / 32u; + let lower_word_bit_offset = start_bit & 31u; + var next_32_bits = meshlet_vertex_positions[lower_word_index] >> lower_word_bit_offset; + if lower_word_bit_offset + bits_per_channel[i] > 32u { + next_32_bits |= meshlet_vertex_positions[lower_word_index + 1u] << (32u - lower_word_bit_offset); + } + vertex_position_packed[i] = extractBits(next_32_bits, 0u, bits_per_channel[i]); + start_bit += bits_per_channel[i]; + } + + // Remap [0, range_max - range_min] vec3 to [range_min, range_max] vec3 + var vertex_position = vec3(vertex_position_packed) + vec3( + (*meshlet).min_vertex_position_channel_x, + (*meshlet).min_vertex_position_channel_y, + (*meshlet).min_vertex_position_channel_z, + ); + + // Reverse vertex quantization + let vertex_position_quantization_factor = unpacked.w; + vertex_position /= f32(1u << vertex_position_quantization_factor) * CENTIMETERS_PER_METER; + + return vertex_position; +} +#endif + +#ifdef MESHLET_MESH_MATERIAL_PASS +@group(2) @binding(0) var meshlet_visibility_buffer: texture_storage_2d; +@group(2) @binding(1) var meshlet_raster_clusters: array; // Per cluster +@group(2) @binding(2) var meshlets: array; // Per meshlet +@group(2) @binding(3) var meshlet_indices: array; // Many per meshlet +@group(2) @binding(4) var meshlet_vertex_positions: array; // Many per meshlet +@group(2) @binding(5) var meshlet_vertex_normals: array; // Many per meshlet +@group(2) @binding(6) var meshlet_vertex_uvs: array>; // Many per meshlet +@group(2) @binding(7) var meshlet_instance_uniforms: array; // Per entity instance + +// TODO: Load only twice, instead of 3x in cases where you load 3 indices per thread? +fn get_meshlet_vertex_id(index_id: u32) -> u32 { + let packed_index = meshlet_indices[index_id / 4u]; + let bit_offset = (index_id % 4u) * 8u; + return extractBits(packed_index, bit_offset, 8u); +} + +fn get_meshlet_vertex_position(meshlet: ptr, vertex_id: u32) -> vec3 { + // Get bitstream start for the vertex + let unpacked = unpack4xU8((*meshlet).packed_b); + let bits_per_channel = unpacked.xyz; + let bits_per_vertex = bits_per_channel.x + bits_per_channel.y + bits_per_channel.z; + var start_bit = (*meshlet).start_vertex_position_bit + (vertex_id * bits_per_vertex); + + // Read each vertex channel from the bitstream + var vertex_position_packed = vec3(0u); + for (var i = 0u; i < 3u; i++) { + let lower_word_index = start_bit / 32u; + let lower_word_bit_offset = start_bit & 31u; + var next_32_bits = meshlet_vertex_positions[lower_word_index] >> lower_word_bit_offset; + if lower_word_bit_offset + bits_per_channel[i] > 32u { + next_32_bits |= meshlet_vertex_positions[lower_word_index + 1u] << (32u - lower_word_bit_offset); + } + vertex_position_packed[i] = extractBits(next_32_bits, 0u, bits_per_channel[i]); + start_bit += bits_per_channel[i]; + } + + // Remap [0, range_max - range_min] vec3 to [range_min, range_max] vec3 + var vertex_position = vec3(vertex_position_packed) + vec3( + (*meshlet).min_vertex_position_channel_x, + (*meshlet).min_vertex_position_channel_y, + (*meshlet).min_vertex_position_channel_z, + ); + + // Reverse vertex quantization + let vertex_position_quantization_factor = unpacked.w; + vertex_position /= f32(1u << vertex_position_quantization_factor) * CENTIMETERS_PER_METER; + + return vertex_position; +} + +fn get_meshlet_vertex_normal(meshlet: ptr, vertex_id: u32) -> vec3 { + let packed_normal = meshlet_vertex_normals[(*meshlet).start_vertex_attribute_id + vertex_id]; + return octahedral_decode_signed(unpack2x16snorm(packed_normal)); +} + +fn get_meshlet_vertex_uv(meshlet: ptr, vertex_id: u32) -> vec2 { + return meshlet_vertex_uvs[(*meshlet).start_vertex_attribute_id + vertex_id]; +} +#endif diff --git a/crates/libmarathon/src/render/pbr/meshlet/meshlet_cull_shared.wgsl b/crates/libmarathon/src/render/pbr/meshlet/meshlet_cull_shared.wgsl new file mode 100644 index 0000000..975dd74 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/meshlet_cull_shared.wgsl @@ -0,0 +1,207 @@ +#define_import_path bevy_pbr::meshlet_cull_shared + +#import bevy_pbr::meshlet_bindings::{ + MeshletAabb, + DispatchIndirectArgs, + InstancedOffset, + depth_pyramid, + view, + previous_view, + meshlet_instance_uniforms, +} +#import bevy_render::maths::affine3_to_square + +// https://github.com/zeux/meshoptimizer/blob/1e48e96c7e8059321de492865165e9ef071bffba/demo/nanite.cpp#L115 +fn lod_error_is_imperceptible(lod_sphere: vec4, simplification_error: f32, instance_id: u32) -> bool { + let world_from_local = affine3_to_square(meshlet_instance_uniforms[instance_id].world_from_local); + let world_scale = max(length(world_from_local[0]), max(length(world_from_local[1]), length(world_from_local[2]))); + let camera_pos = view.world_position; + + let projection = view.clip_from_view; + if projection[3][3] == 1.0 { + // Orthographic + let world_error = simplification_error * world_scale; + let proj = projection[1][1]; + let height = 2.0 / proj; + let norm_error = world_error / height; + return norm_error * view.viewport.w < 1.0; + } else { + // Perspective + var near = projection[3][2]; + let world_sphere_center = (world_from_local * vec4(lod_sphere.xyz, 1.0)).xyz; + let world_sphere_radius = lod_sphere.w * world_scale; + let d_pos = world_sphere_center - camera_pos; + let d = sqrt(dot(d_pos, d_pos)) - world_sphere_radius; + let norm_error = simplification_error / max(d, near) * projection[1][1] * 0.5; + return norm_error * view.viewport.w < 1.0; + } +} + +fn normalize_plane(p: vec4) -> vec4 { + return p / length(p.xyz); +} + +// https://fgiesen.wordpress.com/2012/08/31/frustum-planes-from-the-projection-matrix/ +// https://fgiesen.wordpress.com/2010/10/17/view-frustum-culling/ +fn aabb_in_frustum(aabb: MeshletAabb, instance_id: u32) -> bool { + let world_from_local = affine3_to_square(meshlet_instance_uniforms[instance_id].world_from_local); + let clip_from_local = view.clip_from_world * world_from_local; + let row_major = transpose(clip_from_local); + let planes = array( + row_major[3] + row_major[0], + row_major[3] - row_major[0], + row_major[3] + row_major[1], + row_major[3] - row_major[1], + row_major[2], + ); + + for (var i = 0; i < 5; i++) { + let plane = normalize_plane(planes[i]); + let flipped = aabb.half_extent * sign(plane.xyz); + if dot(aabb.center + flipped, plane.xyz) <= -plane.w { + return false; + } + } + return true; +} + +struct ScreenAabb { + min: vec3, + max: vec3, +} + +fn min8(a: vec3, b: vec3, c: vec3, d: vec3, e: vec3, f: vec3, g: vec3, h: vec3) -> vec3 { + return min(min(min(a, b), min(c, d)), min(min(e, f), min(g, h))); +} + +fn max8(a: vec3, b: vec3, c: vec3, d: vec3, e: vec3, f: vec3, g: vec3, h: vec3) -> vec3 { + return max(max(max(a, b), max(c, d)), max(max(e, f), max(g, h))); +} + +fn min8_4(a: vec4, b: vec4, c: vec4, d: vec4, e: vec4, f: vec4, g: vec4, h: vec4) -> vec4 { + return min(min(min(a, b), min(c, d)), min(min(e, f), min(g, h))); +} + +// https://zeux.io/2023/01/12/approximate-projected-bounds/ +fn project_aabb(clip_from_local: mat4x4, near: f32, aabb: MeshletAabb, out: ptr) -> bool { + let extent = aabb.half_extent * 2.0; + let sx = clip_from_local * vec4(extent.x, 0.0, 0.0, 0.0); + let sy = clip_from_local * vec4(0.0, extent.y, 0.0, 0.0); + let sz = clip_from_local * vec4(0.0, 0.0, extent.z, 0.0); + + let p0 = clip_from_local * vec4(aabb.center - aabb.half_extent, 1.0); + let p1 = p0 + sz; + let p2 = p0 + sy; + let p3 = p2 + sz; + let p4 = p0 + sx; + let p5 = p4 + sz; + let p6 = p4 + sy; + let p7 = p6 + sz; + + let depth = min8_4(p0, p1, p2, p3, p4, p5, p6, p7).w; + // do not occlusion cull if we are inside the aabb + if depth < near { + return false; + } + + let dp0 = p0.xyz / p0.w; + let dp1 = p1.xyz / p1.w; + let dp2 = p2.xyz / p2.w; + let dp3 = p3.xyz / p3.w; + let dp4 = p4.xyz / p4.w; + let dp5 = p5.xyz / p5.w; + let dp6 = p6.xyz / p6.w; + let dp7 = p7.xyz / p7.w; + let min = min8(dp0, dp1, dp2, dp3, dp4, dp5, dp6, dp7); + let max = max8(dp0, dp1, dp2, dp3, dp4, dp5, dp6, dp7); + var vaabb = vec4(min.xy, max.xy); + // convert ndc to texture coordinates by rescaling and flipping Y + vaabb = vaabb.xwzy * vec4(0.5, -0.5, 0.5, -0.5) + 0.5; + (*out).min = vec3(vaabb.xy, min.z); + (*out).max = vec3(vaabb.zw, max.z); + return true; +} + +fn sample_hzb(smin: vec2, smax: vec2, mip: i32) -> f32 { + let texel = vec4(0, 1, 2, 3); + let sx = min(smin.x + texel, smax.xxxx); + let sy = min(smin.y + texel, smax.yyyy); + // TODO: switch to min samplers when wgpu has them + // sampling 16 times a finer mip is worth the extra cost for better culling + let a = sample_hzb_row(sx, sy.x, mip); + let b = sample_hzb_row(sx, sy.y, mip); + let c = sample_hzb_row(sx, sy.z, mip); + let d = sample_hzb_row(sx, sy.w, mip); + return min(min(a, b), min(c, d)); +} + +fn sample_hzb_row(sx: vec4, sy: u32, mip: i32) -> f32 { + let a = textureLoad(depth_pyramid, vec2(sx.x, sy), mip).x; + let b = textureLoad(depth_pyramid, vec2(sx.y, sy), mip).x; + let c = textureLoad(depth_pyramid, vec2(sx.z, sy), mip).x; + let d = textureLoad(depth_pyramid, vec2(sx.w, sy), mip).x; + return min(min(a, b), min(c, d)); +} + +// TODO: We should probably be using a POT HZB texture? +fn occlusion_cull_screen_aabb(aabb: ScreenAabb, screen: vec2) -> bool { + let hzb_size = ceil(screen * 0.5); + let aabb_min = aabb.min.xy * hzb_size; + let aabb_max = aabb.max.xy * hzb_size; + + let min_texel = vec2(max(aabb_min, vec2(0.0))); + let max_texel = vec2(min(aabb_max, hzb_size - 1.0)); + let size = max_texel - min_texel; + let max_size = max(size.x, size.y); + + // note: add 1 before max because the unsigned overflow behavior is intentional + // it wraps around firstLeadingBit(0) = ~0 to 0 + // TODO: we actually sample a 4x4 block, so ideally this would be `max(..., 3u) - 3u`. + // However, since our HZB is not a power of two, we need to be extra-conservative to not over-cull, so we go up a mip. + var mip = max(firstLeadingBit(max_size) + 1u, 2u) - 2u; + + if any((max_texel >> vec2(mip)) > (min_texel >> vec2(mip)) + 3) { + mip += 1u; + } + + let smin = min_texel >> vec2(mip); + let smax = max_texel >> vec2(mip); + + let curr_depth = sample_hzb(smin, smax, i32(mip)); + return aabb.max.z <= curr_depth; +} + +fn occlusion_cull_projection() -> mat4x4 { +#ifdef FIRST_CULLING_PASS + return view.clip_from_world; +#else + return previous_view.clip_from_world; +#endif +} + +fn occlusion_cull_clip_from_local(instance_id: u32) -> mat4x4 { +#ifdef FIRST_CULLING_PASS + let prev_world_from_local = affine3_to_square(meshlet_instance_uniforms[instance_id].previous_world_from_local); + return previous_view.clip_from_world * prev_world_from_local; +#else + let world_from_local = affine3_to_square(meshlet_instance_uniforms[instance_id].world_from_local); + return view.clip_from_world * world_from_local; +#endif +} + +fn should_occlusion_cull_aabb(aabb: MeshletAabb, instance_id: u32) -> bool { + let projection = occlusion_cull_projection(); + var near: f32; + if projection[3][3] == 1.0 { + near = projection[3][2] / projection[2][2]; + } else { + near = projection[3][2]; + } + + let clip_from_local = occlusion_cull_clip_from_local(instance_id); + var screen_aabb = ScreenAabb(vec3(0.0), vec3(0.0)); + if project_aabb(clip_from_local, near, aabb, &screen_aabb) { + return occlusion_cull_screen_aabb(screen_aabb, view.viewport.zw); + } + return false; +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/meshlet_mesh_manager.rs b/crates/libmarathon/src/render/pbr/meshlet/meshlet_mesh_manager.rs new file mode 100644 index 0000000..1af5b42 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/meshlet_mesh_manager.rs @@ -0,0 +1,161 @@ +use crate::render::pbr::meshlet::asset::{BvhNode, MeshletAabb, MeshletCullData}; + +use super::{asset::Meshlet, persistent_buffer::PersistentGpuBuffer, MeshletMesh}; +use std::sync::Arc; +use bevy_asset::{AssetId, Assets}; +use bevy_ecs::{ + resource::Resource, + system::{Commands, Res, ResMut}, +}; +use bevy_math::Vec2; +use bevy_platform::collections::HashMap; +use crate::render::{ + render_resource::BufferAddress, + renderer::{RenderDevice, RenderQueue}, +}; +use core::ops::Range; + +/// Manages uploading [`MeshletMesh`] asset data to the GPU. +#[derive(Resource)] +pub struct MeshletMeshManager { + pub vertex_positions: PersistentGpuBuffer>, + pub vertex_normals: PersistentGpuBuffer>, + pub vertex_uvs: PersistentGpuBuffer>, + pub indices: PersistentGpuBuffer>, + pub bvh_nodes: PersistentGpuBuffer>, + pub meshlets: PersistentGpuBuffer>, + pub meshlet_cull_data: PersistentGpuBuffer>, + meshlet_mesh_slices: + HashMap, ([Range; 7], MeshletAabb, u32)>, +} + +pub fn init_meshlet_mesh_manager(mut commands: Commands, render_device: Res) { + commands.insert_resource(MeshletMeshManager { + vertex_positions: PersistentGpuBuffer::new("meshlet_vertex_positions", &render_device), + vertex_normals: PersistentGpuBuffer::new("meshlet_vertex_normals", &render_device), + vertex_uvs: PersistentGpuBuffer::new("meshlet_vertex_uvs", &render_device), + indices: PersistentGpuBuffer::new("meshlet_indices", &render_device), + bvh_nodes: PersistentGpuBuffer::new("meshlet_bvh_nodes", &render_device), + meshlets: PersistentGpuBuffer::new("meshlets", &render_device), + meshlet_cull_data: PersistentGpuBuffer::new("meshlet_cull_data", &render_device), + meshlet_mesh_slices: HashMap::default(), + }); +} + +impl MeshletMeshManager { + // Returns the index of the root BVH node, as well as the depth of the BVH. + pub fn queue_upload_if_needed( + &mut self, + asset_id: AssetId, + assets: &mut Assets, + ) -> (u32, MeshletAabb, u32) { + let queue_meshlet_mesh = |asset_id: &AssetId| { + let meshlet_mesh = assets.remove_untracked(*asset_id).expect( + "MeshletMesh asset was already unloaded but is not registered with MeshletMeshManager", + ); + + let vertex_positions_slice = self + .vertex_positions + .queue_write(Arc::clone(&meshlet_mesh.vertex_positions), ()); + let vertex_normals_slice = self + .vertex_normals + .queue_write(Arc::clone(&meshlet_mesh.vertex_normals), ()); + let vertex_uvs_slice = self + .vertex_uvs + .queue_write(Arc::clone(&meshlet_mesh.vertex_uvs), ()); + let indices_slice = self + .indices + .queue_write(Arc::clone(&meshlet_mesh.indices), ()); + let meshlets_slice = self.meshlets.queue_write( + Arc::clone(&meshlet_mesh.meshlets), + ( + vertex_positions_slice.start, + vertex_normals_slice.start, + indices_slice.start, + ), + ); + let base_meshlet_index = (meshlets_slice.start / size_of::() as u64) as u32; + let bvh_node_slice = self + .bvh_nodes + .queue_write(Arc::clone(&meshlet_mesh.bvh), base_meshlet_index); + let meshlet_cull_data_slice = self + .meshlet_cull_data + .queue_write(Arc::clone(&meshlet_mesh.meshlet_cull_data), ()); + + ( + [ + vertex_positions_slice, + vertex_normals_slice, + vertex_uvs_slice, + indices_slice, + bvh_node_slice, + meshlets_slice, + meshlet_cull_data_slice, + ], + meshlet_mesh.aabb, + meshlet_mesh.bvh_depth, + ) + }; + + // If the MeshletMesh asset has not been uploaded to the GPU yet, queue it for uploading + let ([_, _, _, _, bvh_node_slice, _, _], aabb, bvh_depth) = self + .meshlet_mesh_slices + .entry(asset_id) + .or_insert_with_key(queue_meshlet_mesh) + .clone(); + + ( + (bvh_node_slice.start / size_of::() as u64) as u32, + aabb, + bvh_depth, + ) + } + + pub fn remove(&mut self, asset_id: &AssetId) { + if let Some(( + [vertex_positions_slice, vertex_normals_slice, vertex_uvs_slice, indices_slice, bvh_node_slice, meshlets_slice, meshlet_cull_data_slice], + _, + _, + )) = self.meshlet_mesh_slices.remove(asset_id) + { + self.vertex_positions + .mark_slice_unused(vertex_positions_slice); + self.vertex_normals.mark_slice_unused(vertex_normals_slice); + self.vertex_uvs.mark_slice_unused(vertex_uvs_slice); + self.indices.mark_slice_unused(indices_slice); + self.bvh_nodes.mark_slice_unused(bvh_node_slice); + self.meshlets.mark_slice_unused(meshlets_slice); + self.meshlet_cull_data + .mark_slice_unused(meshlet_cull_data_slice); + } + } +} + +/// Upload all newly queued [`MeshletMesh`] asset data to the GPU. +pub fn perform_pending_meshlet_mesh_writes( + mut meshlet_mesh_manager: ResMut, + render_queue: Res, + render_device: Res, +) { + meshlet_mesh_manager + .vertex_positions + .perform_writes(&render_queue, &render_device); + meshlet_mesh_manager + .vertex_normals + .perform_writes(&render_queue, &render_device); + meshlet_mesh_manager + .vertex_uvs + .perform_writes(&render_queue, &render_device); + meshlet_mesh_manager + .indices + .perform_writes(&render_queue, &render_device); + meshlet_mesh_manager + .bvh_nodes + .perform_writes(&render_queue, &render_device); + meshlet_mesh_manager + .meshlets + .perform_writes(&render_queue, &render_device); + meshlet_mesh_manager + .meshlet_cull_data + .perform_writes(&render_queue, &render_device); +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/meshlet_mesh_material.wgsl b/crates/libmarathon/src/render/pbr/meshlet/meshlet_mesh_material.wgsl new file mode 100644 index 0000000..1309c78 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/meshlet_mesh_material.wgsl @@ -0,0 +1,52 @@ +#import bevy_pbr::{ + meshlet_visibility_buffer_resolve::resolve_vertex_output, + view_transformations::uv_to_ndc, + prepass_io, + pbr_prepass_functions, + utils::rand_f, +} + +@vertex +fn vertex(@builtin(vertex_index) vertex_input: u32) -> @builtin(position) vec4 { + let vertex_index = vertex_input % 3u; + let material_id = vertex_input / 3u; + let material_depth = f32(material_id) / 65535.0; + let uv = vec2(vec2(vertex_index >> 1u, vertex_index & 1u)) * 2.0; + return vec4(uv_to_ndc(uv), material_depth, 1.0); +} + +@fragment +fn fragment(@builtin(position) frag_coord: vec4) -> @location(0) vec4 { + let vertex_output = resolve_vertex_output(frag_coord); + var rng = vertex_output.cluster_id; + let color = vec3(rand_f(&rng), rand_f(&rng), rand_f(&rng)); + return vec4(color, 1.0); +} + +#ifdef PREPASS_FRAGMENT +@fragment +fn prepass_fragment(@builtin(position) frag_coord: vec4) -> prepass_io::FragmentOutput { + let vertex_output = resolve_vertex_output(frag_coord); + + var out: prepass_io::FragmentOutput; + +#ifdef NORMAL_PREPASS + out.normal = vec4(vertex_output.world_normal * 0.5 + vec3(0.5), 1.0); +#endif + +#ifdef MOTION_VECTOR_PREPASS + out.motion_vector = vertex_output.motion_vector; +#endif + +#ifdef DEFERRED_PREPASS + // There isn't any material info available for this default prepass shader so we are just writing  + // emissive magenta out to the deferred gbuffer to be rendered by the first deferred lighting pass layer. + // This is here so if the default prepass fragment is used for deferred magenta will be rendered, and also + // as an example to show that a user could write to the deferred gbuffer if they were to start from this shader. + out.deferred = vec4(0u, bevy_pbr::rgb9e5::vec3_to_rgb9e5_(vec3(1.0, 0.0, 1.0)), 0u, 0u); + out.deferred_lighting_pass_id = 1u; +#endif + + return out; +} +#endif diff --git a/crates/libmarathon/src/render/pbr/meshlet/meshlet_preview.png b/crates/libmarathon/src/render/pbr/meshlet/meshlet_preview.png new file mode 100644 index 0000000..2c319a8 Binary files /dev/null and b/crates/libmarathon/src/render/pbr/meshlet/meshlet_preview.png differ diff --git a/crates/libmarathon/src/render/pbr/meshlet/mod.rs b/crates/libmarathon/src/render/pbr/meshlet/mod.rs new file mode 100644 index 0000000..c166299 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/mod.rs @@ -0,0 +1,307 @@ +//! Render high-poly 3d meshes using an efficient GPU-driven method. See [`MeshletPlugin`] and [`MeshletMesh`] for details. + +mod asset; +#[cfg(feature = "meshlet_processor")] +mod from_mesh; +mod instance_manager; +mod material_pipeline_prepare; +mod material_shade_nodes; +mod meshlet_mesh_manager; +mod persistent_buffer; +mod persistent_buffer_impls; +mod pipelines; +mod resource_manager; +mod visibility_buffer_raster_node; + +pub mod graph { + use crate::render::render_graph::RenderLabel; + + #[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)] + pub enum NodeMeshlet { + VisibilityBufferRasterPass, + Prepass, + DeferredPrepass, + MainOpaquePass, + } +} + +pub(crate) use self::{ + instance_manager::{queue_material_meshlet_meshes, InstanceManager}, + material_pipeline_prepare::{ + prepare_material_meshlet_meshes_main_opaque_pass, prepare_material_meshlet_meshes_prepass, + }, +}; + +pub use self::asset::{ + MeshletMesh, MeshletMeshLoader, MeshletMeshSaver, MESHLET_MESH_ASSET_VERSION, +}; +#[cfg(feature = "meshlet_processor")] +pub use self::from_mesh::{ + MeshToMeshletMeshConversionError, MESHLET_DEFAULT_VERTEX_POSITION_QUANTIZATION_FACTOR, +}; +use self::{ + graph::NodeMeshlet, + instance_manager::extract_meshlet_mesh_entities, + material_pipeline_prepare::{ + MeshletViewMaterialsDeferredGBufferPrepass, MeshletViewMaterialsMainOpaquePass, + MeshletViewMaterialsPrepass, + }, + material_shade_nodes::{ + MeshletDeferredGBufferPrepassNode, MeshletMainOpaquePass3dNode, MeshletPrepassNode, + }, + meshlet_mesh_manager::perform_pending_meshlet_mesh_writes, + pipelines::*, + resource_manager::{ + prepare_meshlet_per_frame_resources, prepare_meshlet_view_bind_groups, ResourceManager, + }, + visibility_buffer_raster_node::MeshletVisibilityBufferRasterPassNode, +}; +use crate::render::pbr::{ + graph::NodePbr, meshlet::meshlet_mesh_manager::init_meshlet_mesh_manager, + PreviousGlobalTransform, +}; +use bevy_app::{App, Plugin}; +use bevy_asset::{embedded_asset, AssetApp, AssetId, Handle}; +use bevy_camera::visibility::{self, Visibility, VisibilityClass}; +use crate::render::{ + core_3d::graph::{Core3d, Node3d}, + prepass::{DeferredPrepass, MotionVectorPrepass, NormalPrepass}, +}; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::{ + component::Component, + entity::Entity, + query::Has, + reflect::ReflectComponent, + schedule::IntoScheduleConfigs, + system::{Commands, Query, Res}, +}; +use bevy_reflect::{std_traits::ReflectDefault, Reflect}; +use crate::render::{ + render_graph::{RenderGraphExt, ViewNodeRunner}, + renderer::RenderDevice, + settings::WgpuFeatures, + view::{prepare_view_targets, Msaa}, + ExtractSchedule, Render, RenderApp, RenderStartup, RenderSystems, +}; +use bevy_shader::load_shader_library; +use bevy_transform::components::Transform; +use derive_more::From; +use tracing::error; + +/// Provides a plugin for rendering large amounts of high-poly 3d meshes using an efficient GPU-driven method. See also [`MeshletMesh`]. +/// +/// Rendering dense scenes made of high-poly meshes with thousands or millions of triangles is extremely expensive in Bevy's standard renderer. +/// Once meshes are pre-processed into a [`MeshletMesh`], this plugin can render these kinds of scenes very efficiently. +/// +/// In comparison to Bevy's standard renderer: +/// * Much more efficient culling. Meshlets can be culled individually, instead of all or nothing culling for entire meshes at a time. +/// Additionally, occlusion culling can eliminate meshlets that would cause overdraw. +/// * Much more efficient batching. All geometry can be rasterized in a single draw. +/// * Scales better with large amounts of dense geometry and overdraw. Bevy's standard renderer will bottleneck sooner. +/// * Near-seamless level of detail (LOD). +/// * Much greater base overhead. Rendering will be slower and use more memory than Bevy's standard renderer +/// with small amounts of geometry and overdraw. +/// * Requires preprocessing meshes. See [`MeshletMesh`] for details. +/// * Limitations on the kinds of materials you can use. See [`MeshletMesh`] for details. +/// +/// This plugin requires a fairly recent GPU that supports [`WgpuFeatures::TEXTURE_INT64_ATOMIC`]. +/// +/// This plugin currently works only on the Vulkan and Metal backends. +/// +/// This plugin is not compatible with [`Msaa`]. Any camera rendering a [`MeshletMesh`] must have +/// [`Msaa`] set to [`Msaa::Off`]. +/// +/// Mixing forward+prepass and deferred rendering for opaque materials is not currently supported when using this plugin. +/// You must use one or the other by setting [`crate::DefaultOpaqueRendererMethod`]. +/// Do not override [`crate::Material::opaque_render_method`] for any material when using this plugin. +/// +/// ![A render of the Stanford dragon as a `MeshletMesh`](https://raw.githubusercontent.com/bevyengine/bevy/main/crates/bevy_pbr/src/meshlet/meshlet_preview.png) +pub struct MeshletPlugin { + /// The maximum amount of clusters that can be processed at once, + /// used to control the size of a pre-allocated GPU buffer. + /// + /// If this number is too low, you'll see rendering artifacts like missing or blinking meshes. + /// + /// Each cluster slot costs 4 bytes of VRAM. + /// + /// Must not be greater than 2^25. + pub cluster_buffer_slots: u32, +} + +impl MeshletPlugin { + /// [`WgpuFeatures`] required for this plugin to function. + pub fn required_wgpu_features() -> WgpuFeatures { + WgpuFeatures::TEXTURE_INT64_ATOMIC + | WgpuFeatures::TEXTURE_ATOMIC + | WgpuFeatures::SHADER_INT64 + | WgpuFeatures::SUBGROUP + | WgpuFeatures::DEPTH_CLIP_CONTROL + | WgpuFeatures::PUSH_CONSTANTS + } +} + +impl Plugin for MeshletPlugin { + fn build(&self, app: &mut App) { + #[cfg(target_endian = "big")] + compile_error!("MeshletPlugin is only supported on little-endian processors."); + + if self.cluster_buffer_slots > 2_u32.pow(25) { + error!("MeshletPlugin::cluster_buffer_slots must not be greater than 2^25."); + std::process::exit(1); + } + + load_shader_library!(app, "meshlet_bindings.wgsl"); + load_shader_library!(app, "visibility_buffer_resolve.wgsl"); + load_shader_library!(app, "meshlet_cull_shared.wgsl"); + embedded_asset!(app, "clear_visibility_buffer.wgsl"); + embedded_asset!(app, "cull_instances.wgsl"); + embedded_asset!(app, "cull_bvh.wgsl"); + embedded_asset!(app, "cull_clusters.wgsl"); + embedded_asset!(app, "visibility_buffer_software_raster.wgsl"); + embedded_asset!(app, "visibility_buffer_hardware_raster.wgsl"); + embedded_asset!(app, "meshlet_mesh_material.wgsl"); + embedded_asset!(app, "resolve_render_targets.wgsl"); + embedded_asset!(app, "remap_1d_to_2d_dispatch.wgsl"); + embedded_asset!(app, "fill_counts.wgsl"); + + app.init_asset::() + .register_asset_loader(MeshletMeshLoader); + + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + // Create a variable here so we can move-capture it. + let cluster_buffer_slots = self.cluster_buffer_slots; + let init_resource_manager_system = + move |mut commands: Commands, render_device: Res| { + commands + .insert_resource(ResourceManager::new(cluster_buffer_slots, &render_device)); + }; + + render_app + .add_render_graph_node::( + Core3d, + NodeMeshlet::VisibilityBufferRasterPass, + ) + .add_render_graph_node::>( + Core3d, + NodeMeshlet::Prepass, + ) + .add_render_graph_node::>( + Core3d, + NodeMeshlet::DeferredPrepass, + ) + .add_render_graph_node::>( + Core3d, + NodeMeshlet::MainOpaquePass, + ) + .add_render_graph_edges( + Core3d, + ( + NodeMeshlet::VisibilityBufferRasterPass, + NodePbr::EarlyShadowPass, + // + NodeMeshlet::Prepass, + // + NodeMeshlet::DeferredPrepass, + Node3d::EndPrepasses, + // + Node3d::StartMainPass, + NodeMeshlet::MainOpaquePass, + Node3d::MainOpaquePass, + Node3d::EndMainPass, + ), + ) + .insert_resource(InstanceManager::new()) + .add_systems( + RenderStartup, + ( + check_meshlet_features, + ( + (init_resource_manager_system, init_meshlet_pipelines).chain(), + init_meshlet_mesh_manager, + ), + ) + .chain(), + ) + .add_systems(ExtractSchedule, extract_meshlet_mesh_entities) + .add_systems( + Render, + ( + perform_pending_meshlet_mesh_writes.in_set(RenderSystems::PrepareAssets), + configure_meshlet_views + .after(prepare_view_targets) + .in_set(RenderSystems::ManageViews), + prepare_meshlet_per_frame_resources.in_set(RenderSystems::PrepareResources), + prepare_meshlet_view_bind_groups.in_set(RenderSystems::PrepareBindGroups), + queue_material_meshlet_meshes.in_set(RenderSystems::QueueMeshes), + prepare_material_meshlet_meshes_main_opaque_pass + .in_set(RenderSystems::QueueMeshes) + .before(queue_material_meshlet_meshes), + ), + ); + } +} + +fn check_meshlet_features(render_device: Res) { + let features = render_device.features(); + if !features.contains(MeshletPlugin::required_wgpu_features()) { + error!( + "MeshletPlugin can't be used. GPU lacks support for required features: {:?}.", + MeshletPlugin::required_wgpu_features().difference(features) + ); + std::process::exit(1); + } +} + +/// The meshlet mesh equivalent of [`bevy_mesh::Mesh3d`]. +#[derive(Component, Clone, Debug, Default, Deref, DerefMut, Reflect, PartialEq, Eq, From)] +#[reflect(Component, Default, Clone, PartialEq)] +#[require(Transform, PreviousGlobalTransform, Visibility, VisibilityClass)] +#[component(on_add = visibility::add_visibility_class::)] +pub struct MeshletMesh3d(pub Handle); + +impl From for AssetId { + fn from(mesh: MeshletMesh3d) -> Self { + mesh.id() + } +} + +impl From<&MeshletMesh3d> for AssetId { + fn from(mesh: &MeshletMesh3d) -> Self { + mesh.id() + } +} + +fn configure_meshlet_views( + mut views_3d: Query<( + Entity, + &Msaa, + Has, + Has, + Has, + )>, + mut commands: Commands, +) { + for (entity, msaa, normal_prepass, motion_vector_prepass, deferred_prepass) in &mut views_3d { + if *msaa != Msaa::Off { + error!("MeshletPlugin can't be used with MSAA. Add Msaa::Off to your camera to use this plugin."); + std::process::exit(1); + } + + if !(normal_prepass || motion_vector_prepass || deferred_prepass) { + commands + .entity(entity) + .insert(MeshletViewMaterialsMainOpaquePass::default()); + } else { + // TODO: Should we add both Prepass and DeferredGBufferPrepass materials here, and in other systems/nodes? + commands.entity(entity).insert(( + MeshletViewMaterialsMainOpaquePass::default(), + MeshletViewMaterialsPrepass::default(), + MeshletViewMaterialsDeferredGBufferPrepass::default(), + )); + } + } +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/persistent_buffer.rs b/crates/libmarathon/src/render/pbr/meshlet/persistent_buffer.rs new file mode 100644 index 0000000..216c7e7 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/persistent_buffer.rs @@ -0,0 +1,132 @@ +use crate::render::{ + render_resource::{ + BindingResource, Buffer, BufferAddress, BufferDescriptor, BufferUsages, + CommandEncoderDescriptor, COPY_BUFFER_ALIGNMENT, + }, + renderer::{RenderDevice, RenderQueue}, +}; +use core::{num::NonZero, ops::Range}; +use range_alloc::RangeAllocator; + +/// Wrapper for a GPU buffer holding a large amount of data that persists across frames. +pub struct PersistentGpuBuffer { + /// Debug label for the buffer. + label: &'static str, + /// Handle to the GPU buffer. + buffer: Buffer, + /// Tracks free slices of the buffer. + allocation_planner: RangeAllocator, + /// Queue of pending writes, and associated metadata. + write_queue: Vec<(T, T::Metadata, Range)>, +} + +impl PersistentGpuBuffer { + /// Create a new persistent buffer. + pub fn new(label: &'static str, render_device: &RenderDevice) -> Self { + Self { + label, + buffer: render_device.create_buffer(&BufferDescriptor { + label: Some(label), + size: 0, + usage: BufferUsages::STORAGE | BufferUsages::COPY_DST | BufferUsages::COPY_SRC, + mapped_at_creation: false, + }), + allocation_planner: RangeAllocator::new(0..0), + write_queue: Vec::new(), + } + } + + /// Queue an item of type T to be added to the buffer, returning the byte range within the buffer that it will be located at. + pub fn queue_write(&mut self, data: T, metadata: T::Metadata) -> Range { + let data_size = data.size_in_bytes() as u64; + debug_assert!(data_size.is_multiple_of(COPY_BUFFER_ALIGNMENT)); + if let Ok(buffer_slice) = self.allocation_planner.allocate_range(data_size) { + self.write_queue + .push((data, metadata, buffer_slice.clone())); + return buffer_slice; + } + + let buffer_size = self.allocation_planner.initial_range(); + let double_buffer_size = (buffer_size.end - buffer_size.start) * 2; + let new_size = double_buffer_size.max(data_size); + self.allocation_planner.grow_to(buffer_size.end + new_size); + + let buffer_slice = self.allocation_planner.allocate_range(data_size).unwrap(); + self.write_queue + .push((data, metadata, buffer_slice.clone())); + buffer_slice + } + + /// Upload all pending data to the GPU buffer. + pub fn perform_writes(&mut self, render_queue: &RenderQueue, render_device: &RenderDevice) { + if self.allocation_planner.initial_range().end > self.buffer.size() { + self.expand_buffer(render_device, render_queue); + } + + let queue_count = self.write_queue.len(); + + for (data, metadata, buffer_slice) in self.write_queue.drain(..) { + let buffer_slice_size = + NonZero::::new(buffer_slice.end - buffer_slice.start).unwrap(); + let mut buffer_view = render_queue + .write_buffer_with(&self.buffer, buffer_slice.start, buffer_slice_size) + .unwrap(); + data.write_bytes_le(metadata, &mut buffer_view, buffer_slice.start); + } + + let queue_saturation = queue_count as f32 / self.write_queue.capacity() as f32; + if queue_saturation < 0.3 { + self.write_queue = Vec::new(); + } + } + + /// Mark a section of the GPU buffer as no longer needed. + pub fn mark_slice_unused(&mut self, buffer_slice: Range) { + self.allocation_planner.free_range(buffer_slice); + } + + pub fn binding(&self) -> BindingResource<'_> { + self.buffer.as_entire_binding() + } + + /// Expand the buffer by creating a new buffer and copying old data over. + fn expand_buffer(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) { + let size = self.allocation_planner.initial_range(); + let new_buffer = render_device.create_buffer(&BufferDescriptor { + label: Some(self.label), + size: size.end - size.start, + usage: BufferUsages::STORAGE | BufferUsages::COPY_DST | BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + + let mut command_encoder = render_device.create_command_encoder(&CommandEncoderDescriptor { + label: Some("persistent_gpu_buffer_expand"), + }); + command_encoder.copy_buffer_to_buffer(&self.buffer, 0, &new_buffer, 0, self.buffer.size()); + render_queue.submit([command_encoder.finish()]); + + self.buffer = new_buffer; + } +} + +/// A trait representing data that can be written to a [`PersistentGpuBuffer`]. +pub trait PersistentGpuBufferable { + /// Additional metadata associated with each item, made available during `write_bytes_le`. + type Metadata; + + /// The size in bytes of `self`. This will determine the size of the buffer passed into + /// `write_bytes_le`. + /// + /// All data written must be in a multiple of `wgpu::COPY_BUFFER_ALIGNMENT` bytes. Failure to do so will + /// result in a panic when using [`PersistentGpuBuffer`]. + fn size_in_bytes(&self) -> usize; + + /// Convert `self` + `metadata` into bytes (little-endian), and write to the provided buffer slice. + /// Any bytes not written to in the slice will be zeroed out when uploaded to the GPU. + fn write_bytes_le( + &self, + metadata: Self::Metadata, + buffer_slice: &mut [u8], + buffer_offset: BufferAddress, + ); +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/persistent_buffer_impls.rs b/crates/libmarathon/src/render/pbr/meshlet/persistent_buffer_impls.rs new file mode 100644 index 0000000..19ae015 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/persistent_buffer_impls.rs @@ -0,0 +1,128 @@ +use crate::render::pbr::meshlet::asset::{BvhNode, MeshletCullData}; + +use super::{asset::Meshlet, persistent_buffer::PersistentGpuBufferable}; +use std::sync::Arc; +use bevy_math::Vec2; +use crate::render::render_resource::BufferAddress; + +impl PersistentGpuBufferable for Arc<[BvhNode]> { + type Metadata = u32; + + fn size_in_bytes(&self) -> usize { + self.len() * size_of::() + } + + fn write_bytes_le( + &self, + base_meshlet_index: Self::Metadata, + buffer_slice: &mut [u8], + buffer_offset: BufferAddress, + ) { + const SIZE: usize = size_of::(); + for (i, &node) in self.iter().enumerate() { + let bytes: [u8; SIZE] = + bytemuck::cast(node.offset_aabbs(base_meshlet_index, buffer_offset)); + buffer_slice[i * SIZE..(i + 1) * SIZE].copy_from_slice(&bytes); + } + } +} + +impl BvhNode { + fn offset_aabbs(mut self, base_meshlet_index: u32, buffer_offset: BufferAddress) -> Self { + let size = size_of::(); + let base_bvh_node_index = (buffer_offset / size as u64) as u32; + for i in 0..self.aabbs.len() { + self.aabbs[i].child_offset += if self.child_is_bvh_node(i) { + base_bvh_node_index + } else { + base_meshlet_index + }; + } + self + } + + fn child_is_bvh_node(&self, i: usize) -> bool { + self.child_counts[i] == u8::MAX + } +} + +impl PersistentGpuBufferable for Arc<[Meshlet]> { + type Metadata = (u64, u64, u64); + + fn size_in_bytes(&self) -> usize { + self.len() * size_of::() + } + + fn write_bytes_le( + &self, + (vertex_position_offset, vertex_attribute_offset, index_offset): Self::Metadata, + buffer_slice: &mut [u8], + _: BufferAddress, + ) { + let vertex_position_offset = (vertex_position_offset * 8) as u32; + let vertex_attribute_offset = (vertex_attribute_offset as usize / size_of::()) as u32; + let index_offset = index_offset as u32; + + for (i, meshlet) in self.iter().enumerate() { + let size = size_of::(); + let i = i * size; + let bytes = bytemuck::cast::<_, [u8; size_of::()]>(Meshlet { + start_vertex_position_bit: meshlet.start_vertex_position_bit + + vertex_position_offset, + start_vertex_attribute_id: meshlet.start_vertex_attribute_id + + vertex_attribute_offset, + start_index_id: meshlet.start_index_id + index_offset, + ..*meshlet + }); + buffer_slice[i..(i + size)].clone_from_slice(&bytes); + } + } +} + +impl PersistentGpuBufferable for Arc<[MeshletCullData]> { + type Metadata = (); + + fn size_in_bytes(&self) -> usize { + self.len() * size_of::() + } + + fn write_bytes_le(&self, _: Self::Metadata, buffer_slice: &mut [u8], _: BufferAddress) { + buffer_slice.clone_from_slice(bytemuck::cast_slice(self)); + } +} + +impl PersistentGpuBufferable for Arc<[u8]> { + type Metadata = (); + + fn size_in_bytes(&self) -> usize { + self.len() + } + + fn write_bytes_le(&self, _: Self::Metadata, buffer_slice: &mut [u8], _: BufferAddress) { + buffer_slice.clone_from_slice(self); + } +} + +impl PersistentGpuBufferable for Arc<[u32]> { + type Metadata = (); + + fn size_in_bytes(&self) -> usize { + self.len() * size_of::() + } + + fn write_bytes_le(&self, _: Self::Metadata, buffer_slice: &mut [u8], _: BufferAddress) { + buffer_slice.clone_from_slice(bytemuck::cast_slice(self)); + } +} + +impl PersistentGpuBufferable for Arc<[Vec2]> { + type Metadata = (); + + fn size_in_bytes(&self) -> usize { + self.len() * size_of::() + } + + fn write_bytes_le(&self, _: Self::Metadata, buffer_slice: &mut [u8], _: BufferAddress) { + buffer_slice.clone_from_slice(bytemuck::cast_slice(self)); + } +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/pipelines.rs b/crates/libmarathon/src/render/pbr/meshlet/pipelines.rs new file mode 100644 index 0000000..78a675f --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/pipelines.rs @@ -0,0 +1,580 @@ +use super::resource_manager::ResourceManager; +use bevy_asset::{load_embedded_asset, AssetServer, Handle}; +use crate::render::{ + core_3d::CORE_3D_DEPTH_FORMAT, experimental::mip_generation::DownsampleDepthShader, + FullscreenShader, +}; +use bevy_ecs::{ + resource::Resource, + system::{Commands, Res}, + world::World, +}; +use crate::render::render_resource::*; +use bevy_shader::Shader; +use bevy_utils::default; + +#[derive(Resource)] +pub struct MeshletPipelines { + clear_visibility_buffer: CachedComputePipelineId, + clear_visibility_buffer_shadow_view: CachedComputePipelineId, + first_instance_cull: CachedComputePipelineId, + second_instance_cull: CachedComputePipelineId, + first_bvh_cull: CachedComputePipelineId, + second_bvh_cull: CachedComputePipelineId, + first_meshlet_cull: CachedComputePipelineId, + second_meshlet_cull: CachedComputePipelineId, + downsample_depth_first: CachedComputePipelineId, + downsample_depth_second: CachedComputePipelineId, + downsample_depth_first_shadow_view: CachedComputePipelineId, + downsample_depth_second_shadow_view: CachedComputePipelineId, + visibility_buffer_software_raster: CachedComputePipelineId, + visibility_buffer_software_raster_shadow_view: CachedComputePipelineId, + visibility_buffer_hardware_raster: CachedRenderPipelineId, + visibility_buffer_hardware_raster_shadow_view: CachedRenderPipelineId, + visibility_buffer_hardware_raster_shadow_view_unclipped: CachedRenderPipelineId, + resolve_depth: CachedRenderPipelineId, + resolve_depth_shadow_view: CachedRenderPipelineId, + resolve_material_depth: CachedRenderPipelineId, + remap_1d_to_2d_dispatch: Option, + fill_counts: CachedComputePipelineId, + pub(crate) meshlet_mesh_material: Handle, +} + +pub fn init_meshlet_pipelines( + mut commands: Commands, + resource_manager: Res, + fullscreen_shader: Res, + downsample_depth_shader: Res, + pipeline_cache: Res, + asset_server: Res, +) { + let clear_visibility_buffer_bind_group_layout = resource_manager + .clear_visibility_buffer_bind_group_layout + .clone(); + let clear_visibility_buffer_shadow_view_bind_group_layout = resource_manager + .clear_visibility_buffer_shadow_view_bind_group_layout + .clone(); + let first_instance_cull_bind_group_layout = resource_manager + .first_instance_cull_bind_group_layout + .clone(); + let second_instance_cull_bind_group_layout = resource_manager + .second_instance_cull_bind_group_layout + .clone(); + let first_bvh_cull_bind_group_layout = + resource_manager.first_bvh_cull_bind_group_layout.clone(); + let second_bvh_cull_bind_group_layout = + resource_manager.second_bvh_cull_bind_group_layout.clone(); + let first_meshlet_cull_bind_group_layout = resource_manager + .first_meshlet_cull_bind_group_layout + .clone(); + let second_meshlet_cull_bind_group_layout = resource_manager + .second_meshlet_cull_bind_group_layout + .clone(); + let downsample_depth_layout = resource_manager.downsample_depth_bind_group_layout.clone(); + let downsample_depth_shadow_view_layout = resource_manager + .downsample_depth_shadow_view_bind_group_layout + .clone(); + let visibility_buffer_raster_layout = resource_manager + .visibility_buffer_raster_bind_group_layout + .clone(); + let visibility_buffer_raster_shadow_view_layout = resource_manager + .visibility_buffer_raster_shadow_view_bind_group_layout + .clone(); + let resolve_depth_layout = resource_manager.resolve_depth_bind_group_layout.clone(); + let resolve_depth_shadow_view_layout = resource_manager + .resolve_depth_shadow_view_bind_group_layout + .clone(); + let resolve_material_depth_layout = resource_manager + .resolve_material_depth_bind_group_layout + .clone(); + let remap_1d_to_2d_dispatch_layout = resource_manager + .remap_1d_to_2d_dispatch_bind_group_layout + .clone(); + + let downsample_depth_shader = (*downsample_depth_shader).clone(); + let vertex_state = fullscreen_shader.to_vertex_state(); + let fill_counts_layout = resource_manager.fill_counts_bind_group_layout.clone(); + + let clear_visibility_buffer = + load_embedded_asset!(asset_server.as_ref(), "clear_visibility_buffer.wgsl"); + let cull_instances = load_embedded_asset!(asset_server.as_ref(), "cull_instances.wgsl"); + let cull_bvh = load_embedded_asset!(asset_server.as_ref(), "cull_bvh.wgsl"); + let cull_clusters = load_embedded_asset!(asset_server.as_ref(), "cull_clusters.wgsl"); + let visibility_buffer_software_raster = load_embedded_asset!( + asset_server.as_ref(), + "visibility_buffer_software_raster.wgsl" + ); + let visibility_buffer_hardware_raster = load_embedded_asset!( + asset_server.as_ref(), + "visibility_buffer_hardware_raster.wgsl" + ); + let resolve_render_targets = + load_embedded_asset!(asset_server.as_ref(), "resolve_render_targets.wgsl"); + let remap_1d_to_2d_dispatch = + load_embedded_asset!(asset_server.as_ref(), "remap_1d_to_2d_dispatch.wgsl"); + let fill_counts = load_embedded_asset!(asset_server.as_ref(), "fill_counts.wgsl"); + let meshlet_mesh_material = + load_embedded_asset!(asset_server.as_ref(), "meshlet_mesh_material.wgsl"); + + commands.insert_resource(MeshletPipelines { + clear_visibility_buffer: pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { + label: Some("meshlet_clear_visibility_buffer_pipeline".into()), + layout: vec![clear_visibility_buffer_bind_group_layout], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..8, + }], + shader: clear_visibility_buffer.clone(), + shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into()], + ..default() + }), + + clear_visibility_buffer_shadow_view: pipeline_cache.queue_compute_pipeline( + ComputePipelineDescriptor { + label: Some("meshlet_clear_visibility_buffer_shadow_view_pipeline".into()), + layout: vec![clear_visibility_buffer_shadow_view_bind_group_layout], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..8, + }], + shader: clear_visibility_buffer, + ..default() + }, + ), + + first_instance_cull: pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { + label: Some("meshlet_first_instance_cull_pipeline".into()), + layout: vec![first_instance_cull_bind_group_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..4, + }], + shader: cull_instances.clone(), + shader_defs: vec![ + "MESHLET_INSTANCE_CULLING_PASS".into(), + "MESHLET_FIRST_CULLING_PASS".into(), + ], + ..default() + }), + + second_instance_cull: pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { + label: Some("meshlet_second_instance_cull_pipeline".into()), + layout: vec![second_instance_cull_bind_group_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..4, + }], + shader: cull_instances, + shader_defs: vec![ + "MESHLET_INSTANCE_CULLING_PASS".into(), + "MESHLET_SECOND_CULLING_PASS".into(), + ], + ..default() + }), + + first_bvh_cull: pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { + label: Some("meshlet_first_bvh_cull_pipeline".into()), + layout: vec![first_bvh_cull_bind_group_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..8, + }], + shader: cull_bvh.clone(), + shader_defs: vec![ + "MESHLET_BVH_CULLING_PASS".into(), + "MESHLET_FIRST_CULLING_PASS".into(), + ], + ..default() + }), + + second_bvh_cull: pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { + label: Some("meshlet_second_bvh_cull_pipeline".into()), + layout: vec![second_bvh_cull_bind_group_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..8, + }], + shader: cull_bvh, + shader_defs: vec![ + "MESHLET_BVH_CULLING_PASS".into(), + "MESHLET_SECOND_CULLING_PASS".into(), + ], + ..default() + }), + + first_meshlet_cull: pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { + label: Some("meshlet_first_meshlet_cull_pipeline".into()), + layout: vec![first_meshlet_cull_bind_group_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..4, + }], + shader: cull_clusters.clone(), + shader_defs: vec![ + "MESHLET_CLUSTER_CULLING_PASS".into(), + "MESHLET_FIRST_CULLING_PASS".into(), + ], + ..default() + }), + + second_meshlet_cull: pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { + label: Some("meshlet_second_meshlet_cull_pipeline".into()), + layout: vec![second_meshlet_cull_bind_group_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..4, + }], + shader: cull_clusters, + shader_defs: vec![ + "MESHLET_CLUSTER_CULLING_PASS".into(), + "MESHLET_SECOND_CULLING_PASS".into(), + ], + ..default() + }), + + downsample_depth_first: pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { + label: Some("meshlet_downsample_depth_first_pipeline".into()), + layout: vec![downsample_depth_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..4, + }], + shader: downsample_depth_shader.clone(), + shader_defs: vec![ + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into(), + "MESHLET".into(), + ], + entry_point: Some("downsample_depth_first".into()), + ..default() + }), + + downsample_depth_second: pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { + label: Some("meshlet_downsample_depth_second_pipeline".into()), + layout: vec![downsample_depth_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..4, + }], + shader: downsample_depth_shader.clone(), + shader_defs: vec![ + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into(), + "MESHLET".into(), + ], + entry_point: Some("downsample_depth_second".into()), + ..default() + }), + + downsample_depth_first_shadow_view: pipeline_cache.queue_compute_pipeline( + ComputePipelineDescriptor { + label: Some("meshlet_downsample_depth_first_pipeline".into()), + layout: vec![downsample_depth_shadow_view_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..4, + }], + shader: downsample_depth_shader.clone(), + shader_defs: vec!["MESHLET".into()], + entry_point: Some("downsample_depth_first".into()), + ..default() + }, + ), + + downsample_depth_second_shadow_view: pipeline_cache.queue_compute_pipeline( + ComputePipelineDescriptor { + label: Some("meshlet_downsample_depth_second_pipeline".into()), + layout: vec![downsample_depth_shadow_view_layout], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..4, + }], + shader: downsample_depth_shader, + shader_defs: vec!["MESHLET".into()], + entry_point: Some("downsample_depth_second".into()), + zero_initialize_workgroup_memory: false, + }, + ), + + visibility_buffer_software_raster: pipeline_cache.queue_compute_pipeline( + ComputePipelineDescriptor { + label: Some("meshlet_visibility_buffer_software_raster_pipeline".into()), + layout: vec![visibility_buffer_raster_layout.clone()], + push_constant_ranges: vec![], + shader: visibility_buffer_software_raster.clone(), + shader_defs: vec![ + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into(), + if remap_1d_to_2d_dispatch_layout.is_some() { + "MESHLET_2D_DISPATCH" + } else { + "" + } + .into(), + ], + ..default() + }, + ), + + visibility_buffer_software_raster_shadow_view: pipeline_cache.queue_compute_pipeline( + ComputePipelineDescriptor { + label: Some( + "meshlet_visibility_buffer_software_raster_shadow_view_pipeline".into(), + ), + layout: vec![visibility_buffer_raster_shadow_view_layout.clone()], + push_constant_ranges: vec![], + shader: visibility_buffer_software_raster, + shader_defs: vec![ + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), + if remap_1d_to_2d_dispatch_layout.is_some() { + "MESHLET_2D_DISPATCH" + } else { + "" + } + .into(), + ], + ..default() + }, + ), + + visibility_buffer_hardware_raster: pipeline_cache.queue_render_pipeline( + RenderPipelineDescriptor { + label: Some("meshlet_visibility_buffer_hardware_raster_pipeline".into()), + layout: vec![visibility_buffer_raster_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::VERTEX, + range: 0..4, + }], + vertex: VertexState { + shader: visibility_buffer_hardware_raster.clone(), + shader_defs: vec![ + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into(), + ], + ..default() + }, + fragment: Some(FragmentState { + shader: visibility_buffer_hardware_raster.clone(), + shader_defs: vec![ + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into(), + ], + targets: vec![Some(ColorTargetState { + format: TextureFormat::R8Uint, + blend: None, + write_mask: ColorWrites::empty(), + })], + ..default() + }), + ..default() + }, + ), + + visibility_buffer_hardware_raster_shadow_view: pipeline_cache.queue_render_pipeline( + RenderPipelineDescriptor { + label: Some( + "meshlet_visibility_buffer_hardware_raster_shadow_view_pipeline".into(), + ), + layout: vec![visibility_buffer_raster_shadow_view_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::VERTEX, + range: 0..4, + }], + vertex: VertexState { + shader: visibility_buffer_hardware_raster.clone(), + shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into()], + ..default() + }, + fragment: Some(FragmentState { + shader: visibility_buffer_hardware_raster.clone(), + shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into()], + targets: vec![Some(ColorTargetState { + format: TextureFormat::R8Uint, + blend: None, + write_mask: ColorWrites::empty(), + })], + ..default() + }), + ..default() + }, + ), + + visibility_buffer_hardware_raster_shadow_view_unclipped: pipeline_cache + .queue_render_pipeline(RenderPipelineDescriptor { + label: Some( + "meshlet_visibility_buffer_hardware_raster_shadow_view_unclipped_pipeline" + .into(), + ), + layout: vec![visibility_buffer_raster_shadow_view_layout], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::VERTEX, + range: 0..4, + }], + vertex: VertexState { + shader: visibility_buffer_hardware_raster.clone(), + shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into()], + ..default() + }, + fragment: Some(FragmentState { + shader: visibility_buffer_hardware_raster, + shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into()], + targets: vec![Some(ColorTargetState { + format: TextureFormat::R8Uint, + blend: None, + write_mask: ColorWrites::empty(), + })], + ..default() + }), + ..default() + }), + + resolve_depth: pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor { + label: Some("meshlet_resolve_depth_pipeline".into()), + layout: vec![resolve_depth_layout], + vertex: vertex_state.clone(), + depth_stencil: Some(DepthStencilState { + format: CORE_3D_DEPTH_FORMAT, + depth_write_enabled: true, + depth_compare: CompareFunction::Always, + stencil: StencilState::default(), + bias: DepthBiasState::default(), + }), + fragment: Some(FragmentState { + shader: resolve_render_targets.clone(), + shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into()], + entry_point: Some("resolve_depth".into()), + ..default() + }), + ..default() + }), + + resolve_depth_shadow_view: pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor { + label: Some("meshlet_resolve_depth_pipeline".into()), + layout: vec![resolve_depth_shadow_view_layout], + vertex: vertex_state.clone(), + depth_stencil: Some(DepthStencilState { + format: CORE_3D_DEPTH_FORMAT, + depth_write_enabled: true, + depth_compare: CompareFunction::Always, + stencil: StencilState::default(), + bias: DepthBiasState::default(), + }), + fragment: Some(FragmentState { + shader: resolve_render_targets.clone(), + entry_point: Some("resolve_depth".into()), + ..default() + }), + ..default() + }), + + resolve_material_depth: pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor { + label: Some("meshlet_resolve_material_depth_pipeline".into()), + layout: vec![resolve_material_depth_layout], + vertex: vertex_state, + primitive: PrimitiveState::default(), + depth_stencil: Some(DepthStencilState { + format: TextureFormat::Depth16Unorm, + depth_write_enabled: true, + depth_compare: CompareFunction::Always, + stencil: StencilState::default(), + bias: DepthBiasState::default(), + }), + fragment: Some(FragmentState { + shader: resolve_render_targets, + shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into()], + entry_point: Some("resolve_material_depth".into()), + targets: vec![], + }), + ..default() + }), + + fill_counts: pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { + label: Some("meshlet_fill_counts_pipeline".into()), + layout: vec![fill_counts_layout], + shader: fill_counts, + shader_defs: vec![if remap_1d_to_2d_dispatch_layout.is_some() { + "MESHLET_2D_DISPATCH" + } else { + "" + } + .into()], + ..default() + }), + + remap_1d_to_2d_dispatch: remap_1d_to_2d_dispatch_layout.map(|layout| { + pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { + label: Some("meshlet_remap_1d_to_2d_dispatch_pipeline".into()), + layout: vec![layout], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..4, + }], + shader: remap_1d_to_2d_dispatch, + ..default() + }) + }), + + meshlet_mesh_material, + }); +} + +impl MeshletPipelines { + pub fn get( + world: &World, + ) -> Option<( + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &RenderPipeline, + &RenderPipeline, + &RenderPipeline, + &RenderPipeline, + &RenderPipeline, + &RenderPipeline, + Option<&ComputePipeline>, + &ComputePipeline, + )> { + let pipeline_cache = world.get_resource::()?; + let pipeline = world.get_resource::()?; + Some(( + pipeline_cache.get_compute_pipeline(pipeline.clear_visibility_buffer)?, + pipeline_cache.get_compute_pipeline(pipeline.clear_visibility_buffer_shadow_view)?, + pipeline_cache.get_compute_pipeline(pipeline.first_instance_cull)?, + pipeline_cache.get_compute_pipeline(pipeline.second_instance_cull)?, + pipeline_cache.get_compute_pipeline(pipeline.first_bvh_cull)?, + pipeline_cache.get_compute_pipeline(pipeline.second_bvh_cull)?, + pipeline_cache.get_compute_pipeline(pipeline.first_meshlet_cull)?, + pipeline_cache.get_compute_pipeline(pipeline.second_meshlet_cull)?, + pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_first)?, + pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_second)?, + pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_first_shadow_view)?, + pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_second_shadow_view)?, + pipeline_cache.get_compute_pipeline(pipeline.visibility_buffer_software_raster)?, + pipeline_cache + .get_compute_pipeline(pipeline.visibility_buffer_software_raster_shadow_view)?, + pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_hardware_raster)?, + pipeline_cache + .get_render_pipeline(pipeline.visibility_buffer_hardware_raster_shadow_view)?, + pipeline_cache.get_render_pipeline( + pipeline.visibility_buffer_hardware_raster_shadow_view_unclipped, + )?, + pipeline_cache.get_render_pipeline(pipeline.resolve_depth)?, + pipeline_cache.get_render_pipeline(pipeline.resolve_depth_shadow_view)?, + pipeline_cache.get_render_pipeline(pipeline.resolve_material_depth)?, + match pipeline.remap_1d_to_2d_dispatch { + Some(id) => Some(pipeline_cache.get_compute_pipeline(id)?), + None => None, + }, + pipeline_cache.get_compute_pipeline(pipeline.fill_counts)?, + )) + } +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/remap_1d_to_2d_dispatch.wgsl b/crates/libmarathon/src/render/pbr/meshlet/remap_1d_to_2d_dispatch.wgsl new file mode 100644 index 0000000..b9970c4 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/remap_1d_to_2d_dispatch.wgsl @@ -0,0 +1,24 @@ +/// Remaps an indirect 1d to 2d dispatch for devices with low dispatch size limit. + +struct DispatchIndirectArgs { + x: u32, + y: u32, + z: u32, +} + +@group(0) @binding(0) var meshlet_software_raster_indirect_args: DispatchIndirectArgs; +@group(0) @binding(1) var meshlet_software_raster_cluster_count: u32; +var max_compute_workgroups_per_dimension: u32; + +@compute +@workgroup_size(1, 1, 1) +fn remap_dispatch() { + let cluster_count = meshlet_software_raster_indirect_args.x; + + if cluster_count > max_compute_workgroups_per_dimension { + let n = u32(ceil(sqrt(f32(cluster_count)))); + meshlet_software_raster_indirect_args.x = n; + meshlet_software_raster_indirect_args.y = n; + meshlet_software_raster_cluster_count = cluster_count; + } +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/resolve_render_targets.wgsl b/crates/libmarathon/src/render/pbr/meshlet/resolve_render_targets.wgsl new file mode 100644 index 0000000..6fef0cc --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/resolve_render_targets.wgsl @@ -0,0 +1,41 @@ +#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput +#import bevy_pbr::meshlet_bindings::InstancedOffset + +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT +@group(0) @binding(0) var meshlet_visibility_buffer: texture_storage_2d; +#else +@group(0) @binding(0) var meshlet_visibility_buffer: texture_storage_2d; +#endif +@group(0) @binding(1) var meshlet_raster_clusters: array; // Per cluster +@group(0) @binding(2) var meshlet_instance_material_ids: array; // Per entity instance + +/// This pass writes out the depth texture. +@fragment +fn resolve_depth(in: FullscreenVertexOutput) -> @builtin(frag_depth) f32 { + let visibility = textureLoad(meshlet_visibility_buffer, vec2(in.position.xy)).r; +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + let depth = u32(visibility >> 32u); +#else + let depth = visibility; +#endif + + if depth == 0u { discard; } + + return bitcast(depth); +} + +/// This pass writes out the material depth texture. +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT +@fragment +fn resolve_material_depth(in: FullscreenVertexOutput) -> @builtin(frag_depth) f32 { + let visibility = textureLoad(meshlet_visibility_buffer, vec2(in.position.xy)).r; + + let depth = visibility >> 32u; + if depth == 0lu { discard; } + + let cluster_id = u32(visibility) >> 7u; + let instance_id = meshlet_raster_clusters[cluster_id].instance_id; + let material_id = meshlet_instance_material_ids[instance_id]; + return f32(material_id) / 65535.0; +} +#endif diff --git a/crates/libmarathon/src/render/pbr/meshlet/resource_manager.rs b/crates/libmarathon/src/render/pbr/meshlet/resource_manager.rs new file mode 100644 index 0000000..9785d04 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/resource_manager.rs @@ -0,0 +1,1224 @@ +use super::{instance_manager::InstanceManager, meshlet_mesh_manager::MeshletMeshManager}; +use crate::render::pbr::ShadowView; +use bevy_camera::{visibility::RenderLayers, Camera3d}; +use crate::render::{ + experimental::mip_generation::{self, ViewDepthPyramid}, + prepass::{PreviousViewData, PreviousViewUniforms}, +}; +use bevy_ecs::{ + component::Component, + entity::{Entity, EntityHashMap}, + query::AnyOf, + resource::Resource, + system::{Commands, Query, Res, ResMut}, +}; +use bevy_image::ToExtents; +use bevy_math::{UVec2, Vec4Swizzles}; +use crate::render::{ + render_resource::*, + renderer::{RenderDevice, RenderQueue}, + texture::{CachedTexture, TextureCache}, + view::{ExtractedView, ViewUniform, ViewUniforms}, +}; +use binding_types::*; +use core::iter; + +/// Manages per-view and per-cluster GPU resources for [`super::MeshletPlugin`]. +#[derive(Resource)] +pub struct ResourceManager { + /// Intermediate buffer of cluster IDs for use with rasterizing the visibility buffer + visibility_buffer_raster_clusters: Buffer, + /// Intermediate buffer of previous counts of clusters in rasterizer buckets + pub visibility_buffer_raster_cluster_prev_counts: Buffer, + /// Intermediate buffer of count of clusters to software rasterize + software_raster_cluster_count: Buffer, + /// BVH traversal queues + bvh_traversal_queues: [Buffer; 2], + /// Cluster cull candidate queue + cluster_cull_candidate_queue: Buffer, + /// Rightmost slot index of [`Self::visibility_buffer_raster_clusters`], [`Self::bvh_traversal_queues`], and [`Self::cluster_cull_candidate_queue`] + cull_queue_rightmost_slot: u32, + + /// Second pass instance candidates + second_pass_candidates: Option, + /// Sampler for a depth pyramid + depth_pyramid_sampler: Sampler, + /// Dummy texture view for binding depth pyramids with less than the maximum amount of mips + depth_pyramid_dummy_texture: TextureView, + + // TODO + previous_depth_pyramids: EntityHashMap, + + // Bind group layouts + pub clear_visibility_buffer_bind_group_layout: BindGroupLayout, + pub clear_visibility_buffer_shadow_view_bind_group_layout: BindGroupLayout, + pub first_instance_cull_bind_group_layout: BindGroupLayout, + pub second_instance_cull_bind_group_layout: BindGroupLayout, + pub first_bvh_cull_bind_group_layout: BindGroupLayout, + pub second_bvh_cull_bind_group_layout: BindGroupLayout, + pub first_meshlet_cull_bind_group_layout: BindGroupLayout, + pub second_meshlet_cull_bind_group_layout: BindGroupLayout, + pub visibility_buffer_raster_bind_group_layout: BindGroupLayout, + pub visibility_buffer_raster_shadow_view_bind_group_layout: BindGroupLayout, + pub downsample_depth_bind_group_layout: BindGroupLayout, + pub downsample_depth_shadow_view_bind_group_layout: BindGroupLayout, + pub resolve_depth_bind_group_layout: BindGroupLayout, + pub resolve_depth_shadow_view_bind_group_layout: BindGroupLayout, + pub resolve_material_depth_bind_group_layout: BindGroupLayout, + pub material_shade_bind_group_layout: BindGroupLayout, + pub fill_counts_bind_group_layout: BindGroupLayout, + pub remap_1d_to_2d_dispatch_bind_group_layout: Option, +} + +impl ResourceManager { + pub fn new(cluster_buffer_slots: u32, render_device: &RenderDevice) -> Self { + let needs_dispatch_remap = + cluster_buffer_slots > render_device.limits().max_compute_workgroups_per_dimension; + // The IDs are a (u32, u32) of instance and index. + let cull_queue_size = 2 * cluster_buffer_slots as u64 * size_of::() as u64; + + Self { + visibility_buffer_raster_clusters: render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_visibility_buffer_raster_clusters"), + size: cull_queue_size, + usage: BufferUsages::STORAGE, + mapped_at_creation: false, + }), + visibility_buffer_raster_cluster_prev_counts: render_device.create_buffer( + &BufferDescriptor { + label: Some("meshlet_visibility_buffer_raster_cluster_prev_counts"), + size: size_of::() as u64 * 2, + usage: BufferUsages::STORAGE | BufferUsages::COPY_DST, + mapped_at_creation: false, + }, + ), + software_raster_cluster_count: render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_software_raster_cluster_count"), + size: size_of::() as u64, + usage: BufferUsages::STORAGE, + mapped_at_creation: false, + }), + bvh_traversal_queues: [ + render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_bvh_traversal_queue_0"), + size: cull_queue_size, + usage: BufferUsages::STORAGE, + mapped_at_creation: false, + }), + render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_bvh_traversal_queue_1"), + size: cull_queue_size, + usage: BufferUsages::STORAGE, + mapped_at_creation: false, + }), + ], + cluster_cull_candidate_queue: render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_cluster_cull_candidate_queue"), + size: cull_queue_size, + usage: BufferUsages::STORAGE, + mapped_at_creation: false, + }), + cull_queue_rightmost_slot: cluster_buffer_slots - 1, + + second_pass_candidates: None, + depth_pyramid_sampler: render_device.create_sampler(&SamplerDescriptor { + label: Some("meshlet_depth_pyramid_sampler"), + ..SamplerDescriptor::default() + }), + depth_pyramid_dummy_texture: mip_generation::create_depth_pyramid_dummy_texture( + render_device, + "meshlet_depth_pyramid_dummy_texture", + "meshlet_depth_pyramid_dummy_texture_view", + ), + + previous_depth_pyramids: EntityHashMap::default(), + + // TODO: Buffer min sizes + clear_visibility_buffer_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_clear_visibility_buffer_bind_group_layout", + &BindGroupLayoutEntries::single( + ShaderStages::COMPUTE, + texture_storage_2d(TextureFormat::R64Uint, StorageTextureAccess::WriteOnly), + ), + ), + clear_visibility_buffer_shadow_view_bind_group_layout: render_device + .create_bind_group_layout( + "meshlet_clear_visibility_buffer_shadow_view_bind_group_layout", + &BindGroupLayoutEntries::single( + ShaderStages::COMPUTE, + texture_storage_2d(TextureFormat::R32Uint, StorageTextureAccess::WriteOnly), + ), + ), + first_instance_cull_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_first_instance_culling_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + texture_2d(TextureSampleType::Float { filterable: false }), + uniform_buffer::(true), + uniform_buffer::(true), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + ), + ), + ), + second_instance_cull_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_second_instance_culling_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + texture_2d(TextureSampleType::Float { filterable: false }), + uniform_buffer::(true), + uniform_buffer::(true), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + ), + ), + ), + first_bvh_cull_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_first_bvh_culling_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + texture_2d(TextureSampleType::Float { filterable: false }), + uniform_buffer::(true), + uniform_buffer::(true), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + ), + ), + ), + second_bvh_cull_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_second_bvh_culling_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + texture_2d(TextureSampleType::Float { filterable: false }), + uniform_buffer::(true), + uniform_buffer::(true), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + ), + ), + ), + first_meshlet_cull_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_first_meshlet_culling_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + texture_2d(TextureSampleType::Float { filterable: false }), + uniform_buffer::(true), + uniform_buffer::(true), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + ), + ), + ), + second_meshlet_cull_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_second_meshlet_culling_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + texture_2d(TextureSampleType::Float { filterable: false }), + uniform_buffer::(true), + uniform_buffer::(true), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + ), + ), + ), + downsample_depth_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_downsample_depth_bind_group_layout", + &BindGroupLayoutEntries::sequential(ShaderStages::COMPUTE, { + let write_only_r32float = || { + texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly) + }; + ( + texture_storage_2d(TextureFormat::R64Uint, StorageTextureAccess::ReadOnly), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + texture_storage_2d( + TextureFormat::R32Float, + StorageTextureAccess::ReadWrite, + ), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + sampler(SamplerBindingType::NonFiltering), + ) + }), + ), + downsample_depth_shadow_view_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_downsample_depth_shadow_view_bind_group_layout", + &BindGroupLayoutEntries::sequential(ShaderStages::COMPUTE, { + let write_only_r32float = || { + texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly) + }; + ( + texture_storage_2d(TextureFormat::R32Uint, StorageTextureAccess::ReadOnly), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + texture_storage_2d( + TextureFormat::R32Float, + StorageTextureAccess::ReadWrite, + ), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + sampler(SamplerBindingType::NonFiltering), + ) + }), + ), + visibility_buffer_raster_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_visibility_buffer_raster_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE, + ( + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + texture_storage_2d(TextureFormat::R64Uint, StorageTextureAccess::Atomic), + uniform_buffer::(true), + ), + ), + ), + visibility_buffer_raster_shadow_view_bind_group_layout: render_device + .create_bind_group_layout( + "meshlet_visibility_buffer_raster_shadow_view_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE, + ( + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + texture_storage_2d( + TextureFormat::R32Uint, + StorageTextureAccess::Atomic, + ), + uniform_buffer::(true), + ), + ), + ), + resolve_depth_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_resolve_depth_bind_group_layout", + &BindGroupLayoutEntries::single( + ShaderStages::FRAGMENT, + texture_storage_2d(TextureFormat::R64Uint, StorageTextureAccess::ReadOnly), + ), + ), + resolve_depth_shadow_view_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_resolve_depth_shadow_view_bind_group_layout", + &BindGroupLayoutEntries::single( + ShaderStages::FRAGMENT, + texture_storage_2d(TextureFormat::R32Uint, StorageTextureAccess::ReadOnly), + ), + ), + resolve_material_depth_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_resolve_material_depth_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::FRAGMENT, + ( + texture_storage_2d(TextureFormat::R64Uint, StorageTextureAccess::ReadOnly), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + ), + ), + ), + material_shade_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_mesh_material_shade_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::FRAGMENT, + ( + texture_storage_2d(TextureFormat::R64Uint, StorageTextureAccess::ReadOnly), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + ), + ), + ), + fill_counts_bind_group_layout: if needs_dispatch_remap { + render_device.create_bind_group_layout( + "meshlet_fill_counts_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + ), + ), + ) + } else { + render_device.create_bind_group_layout( + "meshlet_fill_counts_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + ), + ), + ) + }, + remap_1d_to_2d_dispatch_bind_group_layout: needs_dispatch_remap.then(|| { + render_device.create_bind_group_layout( + "meshlet_remap_1d_to_2d_dispatch_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + ), + ), + ) + }), + } + } +} + +// ------------ TODO: Everything under here needs to be rewritten and cached ------------ + +#[derive(Component)] +pub struct MeshletViewResources { + pub scene_instance_count: u32, + pub rightmost_slot: u32, + pub max_bvh_depth: u32, + instance_visibility: Buffer, + pub dummy_render_target: CachedTexture, + pub visibility_buffer: CachedTexture, + pub second_pass_count: Buffer, + pub second_pass_dispatch: Buffer, + pub second_pass_candidates: Buffer, + pub first_bvh_cull_count_front: Buffer, + pub first_bvh_cull_dispatch_front: Buffer, + pub first_bvh_cull_count_back: Buffer, + pub first_bvh_cull_dispatch_back: Buffer, + pub first_bvh_cull_queue: Buffer, + pub second_bvh_cull_count_front: Buffer, + pub second_bvh_cull_dispatch_front: Buffer, + pub second_bvh_cull_count_back: Buffer, + pub second_bvh_cull_dispatch_back: Buffer, + pub second_bvh_cull_queue: Buffer, + pub front_meshlet_cull_count: Buffer, + pub front_meshlet_cull_dispatch: Buffer, + pub back_meshlet_cull_count: Buffer, + pub back_meshlet_cull_dispatch: Buffer, + pub meshlet_cull_queue: Buffer, + pub visibility_buffer_software_raster_indirect_args: Buffer, + pub visibility_buffer_hardware_raster_indirect_args: Buffer, + pub depth_pyramid: ViewDepthPyramid, + previous_depth_pyramid: TextureView, + pub material_depth: Option, + pub view_size: UVec2, + not_shadow_view: bool, +} + +#[derive(Component)] +pub struct MeshletViewBindGroups { + pub clear_visibility_buffer: BindGroup, + pub first_instance_cull: BindGroup, + pub second_instance_cull: BindGroup, + pub first_bvh_cull_ping: BindGroup, + pub first_bvh_cull_pong: BindGroup, + pub second_bvh_cull_ping: BindGroup, + pub second_bvh_cull_pong: BindGroup, + pub first_meshlet_cull: BindGroup, + pub second_meshlet_cull: BindGroup, + pub downsample_depth: BindGroup, + pub visibility_buffer_raster: BindGroup, + pub resolve_depth: BindGroup, + pub resolve_material_depth: Option, + pub material_shade: Option, + pub remap_1d_to_2d_dispatch: Option, + pub fill_counts: BindGroup, +} + +// TODO: Cache things per-view and skip running this system / optimize this system +pub fn prepare_meshlet_per_frame_resources( + mut resource_manager: ResMut, + mut instance_manager: ResMut, + views: Query<( + Entity, + &ExtractedView, + Option<&RenderLayers>, + AnyOf<(&Camera3d, &ShadowView)>, + )>, + mut texture_cache: ResMut, + render_queue: Res, + render_device: Res, + mut commands: Commands, +) { + if instance_manager.scene_instance_count == 0 { + return; + } + + let instance_manager = instance_manager.as_mut(); + + // TODO: Move this and the submit to a separate system and remove pub from the fields + instance_manager + .instance_uniforms + .write_buffer(&render_device, &render_queue); + instance_manager + .instance_aabbs + .write_buffer(&render_device, &render_queue); + instance_manager + .instance_material_ids + .write_buffer(&render_device, &render_queue); + instance_manager + .instance_bvh_root_nodes + .write_buffer(&render_device, &render_queue); + + let needed_buffer_size = 4 * instance_manager.scene_instance_count as u64; + let second_pass_candidates = match &mut resource_manager.second_pass_candidates { + Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), + slot => { + let buffer = render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_second_pass_candidates"), + size: needed_buffer_size, + usage: BufferUsages::STORAGE, + mapped_at_creation: false, + }); + *slot = Some(buffer.clone()); + buffer + } + }; + + for (view_entity, view, render_layers, (_, shadow_view)) in &views { + let not_shadow_view = shadow_view.is_none(); + + let instance_visibility = instance_manager + .view_instance_visibility + .entry(view_entity) + .or_insert_with(|| { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_view_instance_visibility")); + buffer + }); + for (instance_index, (_, layers, not_shadow_caster)) in + instance_manager.instances.iter().enumerate() + { + // If either the layers don't match the view's layers or this is a shadow view + // and the instance is not a shadow caster, hide the instance for this view + if !render_layers + .unwrap_or(&RenderLayers::default()) + .intersects(layers) + || (shadow_view.is_some() && *not_shadow_caster) + { + let vec = instance_visibility.get_mut(); + let index = instance_index / 32; + let bit = instance_index - index * 32; + if vec.len() <= index { + vec.extend(iter::repeat_n(0, index - vec.len() + 1)); + } + vec[index] |= 1 << bit; + } + } + instance_visibility.write_buffer(&render_device, &render_queue); + let instance_visibility = instance_visibility.buffer().unwrap().clone(); + + // TODO: Remove this once wgpu allows render passes with no attachments + let dummy_render_target = texture_cache.get( + &render_device, + TextureDescriptor { + label: Some("meshlet_dummy_render_target"), + size: view.viewport.zw().to_extents(), + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R8Uint, + usage: TextureUsages::RENDER_ATTACHMENT, + view_formats: &[], + }, + ); + + let visibility_buffer = texture_cache.get( + &render_device, + TextureDescriptor { + label: Some("meshlet_visibility_buffer"), + size: view.viewport.zw().to_extents(), + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: if not_shadow_view { + TextureFormat::R64Uint + } else { + TextureFormat::R32Uint + }, + usage: TextureUsages::STORAGE_ATOMIC | TextureUsages::STORAGE_BINDING, + view_formats: &[], + }, + ); + + let second_pass_count = render_device.create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_second_pass_count"), + contents: bytemuck::bytes_of(&0u32), + usage: BufferUsages::STORAGE, + }); + let second_pass_dispatch = render_device.create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_second_pass_dispatch"), + contents: DispatchIndirectArgs { x: 0, y: 1, z: 1 }.as_bytes(), + usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, + }); + + let first_bvh_cull_count_front = + render_device.create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_first_bvh_cull_count_front"), + contents: bytemuck::bytes_of(&0u32), + usage: BufferUsages::STORAGE | BufferUsages::COPY_DST, + }); + let first_bvh_cull_dispatch_front = + render_device.create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_first_bvh_cull_dispatch_front"), + contents: DispatchIndirectArgs { x: 0, y: 1, z: 1 }.as_bytes(), + usage: BufferUsages::STORAGE | BufferUsages::INDIRECT | BufferUsages::COPY_DST, + }); + let first_bvh_cull_count_back = + render_device.create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_first_bvh_cull_count_back"), + contents: bytemuck::bytes_of(&0u32), + usage: BufferUsages::STORAGE | BufferUsages::COPY_DST, + }); + let first_bvh_cull_dispatch_back = + render_device.create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_first_bvh_cull_dispatch_back"), + contents: DispatchIndirectArgs { x: 0, y: 1, z: 1 }.as_bytes(), + usage: BufferUsages::STORAGE | BufferUsages::INDIRECT | BufferUsages::COPY_DST, + }); + + let second_bvh_cull_count_front = + render_device.create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_second_bvh_cull_count_front"), + contents: bytemuck::bytes_of(&0u32), + usage: BufferUsages::STORAGE | BufferUsages::COPY_DST, + }); + let second_bvh_cull_dispatch_front = + render_device.create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_second_bvh_cull_dispatch_front"), + contents: DispatchIndirectArgs { x: 0, y: 1, z: 1 }.as_bytes(), + usage: BufferUsages::STORAGE | BufferUsages::INDIRECT | BufferUsages::COPY_DST, + }); + let second_bvh_cull_count_back = + render_device.create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_second_bvh_cull_count_back"), + contents: bytemuck::bytes_of(&0u32), + usage: BufferUsages::STORAGE | BufferUsages::COPY_DST, + }); + let second_bvh_cull_dispatch_back = + render_device.create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_second_bvh_cull_dispatch_back"), + contents: DispatchIndirectArgs { x: 0, y: 1, z: 1 }.as_bytes(), + usage: BufferUsages::STORAGE | BufferUsages::INDIRECT | BufferUsages::COPY_DST, + }); + + let front_meshlet_cull_count = + render_device.create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_front_meshlet_cull_count"), + contents: bytemuck::bytes_of(&0u32), + usage: BufferUsages::STORAGE, + }); + let front_meshlet_cull_dispatch = + render_device.create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_front_meshlet_cull_dispatch"), + contents: DispatchIndirectArgs { x: 0, y: 1, z: 1 }.as_bytes(), + usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, + }); + let back_meshlet_cull_count = + render_device.create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_back_meshlet_cull_count"), + contents: bytemuck::bytes_of(&0u32), + usage: BufferUsages::STORAGE, + }); + let back_meshlet_cull_dispatch = + render_device.create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_back_meshlet_cull_dispatch"), + contents: DispatchIndirectArgs { x: 0, y: 1, z: 1 }.as_bytes(), + usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, + }); + + let visibility_buffer_software_raster_indirect_args = render_device + .create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_visibility_buffer_software_raster_indirect_args"), + contents: DispatchIndirectArgs { x: 0, y: 1, z: 1 }.as_bytes(), + usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, + }); + + let visibility_buffer_hardware_raster_indirect_args = render_device + .create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_visibility_buffer_hardware_raster_indirect_args"), + contents: DrawIndirectArgs { + vertex_count: 128 * 3, + instance_count: 0, + first_vertex: 0, + first_instance: 0, + } + .as_bytes(), + usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, + }); + + let depth_pyramid = ViewDepthPyramid::new( + &render_device, + &mut texture_cache, + &resource_manager.depth_pyramid_dummy_texture, + view.viewport.zw(), + "meshlet_depth_pyramid", + "meshlet_depth_pyramid_texture_view", + ); + + let previous_depth_pyramid = + match resource_manager.previous_depth_pyramids.get(&view_entity) { + Some(texture_view) => texture_view.clone(), + None => depth_pyramid.all_mips.clone(), + }; + resource_manager + .previous_depth_pyramids + .insert(view_entity, depth_pyramid.all_mips.clone()); + + let material_depth = TextureDescriptor { + label: Some("meshlet_material_depth"), + size: view.viewport.zw().to_extents(), + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::Depth16Unorm, + usage: TextureUsages::RENDER_ATTACHMENT, + view_formats: &[], + }; + + commands.entity(view_entity).insert(MeshletViewResources { + scene_instance_count: instance_manager.scene_instance_count, + rightmost_slot: resource_manager.cull_queue_rightmost_slot, + max_bvh_depth: instance_manager.max_bvh_depth, + instance_visibility, + dummy_render_target, + visibility_buffer, + second_pass_count, + second_pass_dispatch, + second_pass_candidates: second_pass_candidates.clone(), + first_bvh_cull_count_front, + first_bvh_cull_dispatch_front, + first_bvh_cull_count_back, + first_bvh_cull_dispatch_back, + first_bvh_cull_queue: resource_manager.bvh_traversal_queues[0].clone(), + second_bvh_cull_count_front, + second_bvh_cull_dispatch_front, + second_bvh_cull_count_back, + second_bvh_cull_dispatch_back, + second_bvh_cull_queue: resource_manager.bvh_traversal_queues[1].clone(), + front_meshlet_cull_count, + front_meshlet_cull_dispatch, + back_meshlet_cull_count, + back_meshlet_cull_dispatch, + meshlet_cull_queue: resource_manager.cluster_cull_candidate_queue.clone(), + visibility_buffer_software_raster_indirect_args, + visibility_buffer_hardware_raster_indirect_args, + depth_pyramid, + previous_depth_pyramid, + material_depth: not_shadow_view + .then(|| texture_cache.get(&render_device, material_depth)), + view_size: view.viewport.zw(), + not_shadow_view, + }); + } +} + +pub fn prepare_meshlet_view_bind_groups( + meshlet_mesh_manager: Res, + resource_manager: Res, + instance_manager: Res, + views: Query<(Entity, &MeshletViewResources)>, + view_uniforms: Res, + previous_view_uniforms: Res, + render_device: Res, + mut commands: Commands, +) { + let (Some(view_uniforms), Some(previous_view_uniforms)) = ( + view_uniforms.uniforms.binding(), + previous_view_uniforms.uniforms.binding(), + ) else { + return; + }; + + // TODO: Some of these bind groups can be reused across multiple views + for (view_entity, view_resources) in &views { + let clear_visibility_buffer = render_device.create_bind_group( + "meshlet_clear_visibility_buffer_bind_group", + if view_resources.not_shadow_view { + &resource_manager.clear_visibility_buffer_bind_group_layout + } else { + &resource_manager.clear_visibility_buffer_shadow_view_bind_group_layout + }, + &BindGroupEntries::single(&view_resources.visibility_buffer.default_view), + ); + + let first_instance_cull = render_device.create_bind_group( + "meshlet_first_instance_cull_bind_group", + &resource_manager.first_instance_cull_bind_group_layout, + &BindGroupEntries::sequential(( + &view_resources.previous_depth_pyramid, + view_uniforms.clone(), + previous_view_uniforms.clone(), + instance_manager.instance_uniforms.binding().unwrap(), + view_resources.instance_visibility.as_entire_binding(), + instance_manager.instance_aabbs.binding().unwrap(), + instance_manager.instance_bvh_root_nodes.binding().unwrap(), + view_resources + .first_bvh_cull_count_front + .as_entire_binding(), + view_resources + .first_bvh_cull_dispatch_front + .as_entire_binding(), + view_resources.first_bvh_cull_queue.as_entire_binding(), + view_resources.second_pass_count.as_entire_binding(), + view_resources.second_pass_dispatch.as_entire_binding(), + view_resources.second_pass_candidates.as_entire_binding(), + )), + ); + + let second_instance_cull = render_device.create_bind_group( + "meshlet_second_instance_cull_bind_group", + &resource_manager.second_instance_cull_bind_group_layout, + &BindGroupEntries::sequential(( + &view_resources.previous_depth_pyramid, + view_uniforms.clone(), + previous_view_uniforms.clone(), + instance_manager.instance_uniforms.binding().unwrap(), + view_resources.instance_visibility.as_entire_binding(), + instance_manager.instance_aabbs.binding().unwrap(), + instance_manager.instance_bvh_root_nodes.binding().unwrap(), + view_resources + .second_bvh_cull_count_front + .as_entire_binding(), + view_resources + .second_bvh_cull_dispatch_front + .as_entire_binding(), + view_resources.second_bvh_cull_queue.as_entire_binding(), + view_resources.second_pass_count.as_entire_binding(), + view_resources.second_pass_candidates.as_entire_binding(), + )), + ); + + let first_bvh_cull_ping = render_device.create_bind_group( + "meshlet_first_bvh_cull_ping_bind_group", + &resource_manager.first_bvh_cull_bind_group_layout, + &BindGroupEntries::sequential(( + &view_resources.previous_depth_pyramid, + view_uniforms.clone(), + previous_view_uniforms.clone(), + meshlet_mesh_manager.bvh_nodes.binding(), + instance_manager.instance_uniforms.binding().unwrap(), + view_resources + .first_bvh_cull_count_front + .as_entire_binding(), + view_resources.first_bvh_cull_count_back.as_entire_binding(), + view_resources + .first_bvh_cull_dispatch_back + .as_entire_binding(), + view_resources.first_bvh_cull_queue.as_entire_binding(), + view_resources.front_meshlet_cull_count.as_entire_binding(), + view_resources.back_meshlet_cull_count.as_entire_binding(), + view_resources + .front_meshlet_cull_dispatch + .as_entire_binding(), + view_resources + .back_meshlet_cull_dispatch + .as_entire_binding(), + view_resources.meshlet_cull_queue.as_entire_binding(), + view_resources + .second_bvh_cull_count_front + .as_entire_binding(), + view_resources + .second_bvh_cull_dispatch_front + .as_entire_binding(), + view_resources.second_bvh_cull_queue.as_entire_binding(), + )), + ); + + let first_bvh_cull_pong = render_device.create_bind_group( + "meshlet_first_bvh_cull_pong_bind_group", + &resource_manager.first_bvh_cull_bind_group_layout, + &BindGroupEntries::sequential(( + &view_resources.previous_depth_pyramid, + view_uniforms.clone(), + previous_view_uniforms.clone(), + meshlet_mesh_manager.bvh_nodes.binding(), + instance_manager.instance_uniforms.binding().unwrap(), + view_resources.first_bvh_cull_count_back.as_entire_binding(), + view_resources + .first_bvh_cull_count_front + .as_entire_binding(), + view_resources + .first_bvh_cull_dispatch_front + .as_entire_binding(), + view_resources.first_bvh_cull_queue.as_entire_binding(), + view_resources.front_meshlet_cull_count.as_entire_binding(), + view_resources.back_meshlet_cull_count.as_entire_binding(), + view_resources + .front_meshlet_cull_dispatch + .as_entire_binding(), + view_resources + .back_meshlet_cull_dispatch + .as_entire_binding(), + view_resources.meshlet_cull_queue.as_entire_binding(), + view_resources + .second_bvh_cull_count_front + .as_entire_binding(), + view_resources + .second_bvh_cull_dispatch_front + .as_entire_binding(), + view_resources.second_bvh_cull_queue.as_entire_binding(), + )), + ); + + let second_bvh_cull_ping = render_device.create_bind_group( + "meshlet_second_bvh_cull_ping_bind_group", + &resource_manager.second_bvh_cull_bind_group_layout, + &BindGroupEntries::sequential(( + &view_resources.previous_depth_pyramid, + view_uniforms.clone(), + previous_view_uniforms.clone(), + meshlet_mesh_manager.bvh_nodes.binding(), + instance_manager.instance_uniforms.binding().unwrap(), + view_resources + .second_bvh_cull_count_front + .as_entire_binding(), + view_resources + .second_bvh_cull_count_back + .as_entire_binding(), + view_resources + .second_bvh_cull_dispatch_back + .as_entire_binding(), + view_resources.second_bvh_cull_queue.as_entire_binding(), + view_resources.front_meshlet_cull_count.as_entire_binding(), + view_resources.back_meshlet_cull_count.as_entire_binding(), + view_resources + .front_meshlet_cull_dispatch + .as_entire_binding(), + view_resources + .back_meshlet_cull_dispatch + .as_entire_binding(), + view_resources.meshlet_cull_queue.as_entire_binding(), + )), + ); + + let second_bvh_cull_pong = render_device.create_bind_group( + "meshlet_second_bvh_cull_pong_bind_group", + &resource_manager.second_bvh_cull_bind_group_layout, + &BindGroupEntries::sequential(( + &view_resources.previous_depth_pyramid, + view_uniforms.clone(), + previous_view_uniforms.clone(), + meshlet_mesh_manager.bvh_nodes.binding(), + instance_manager.instance_uniforms.binding().unwrap(), + view_resources + .second_bvh_cull_count_back + .as_entire_binding(), + view_resources + .second_bvh_cull_count_front + .as_entire_binding(), + view_resources + .second_bvh_cull_dispatch_front + .as_entire_binding(), + view_resources.second_bvh_cull_queue.as_entire_binding(), + view_resources.front_meshlet_cull_count.as_entire_binding(), + view_resources.back_meshlet_cull_count.as_entire_binding(), + view_resources + .front_meshlet_cull_dispatch + .as_entire_binding(), + view_resources + .back_meshlet_cull_dispatch + .as_entire_binding(), + view_resources.meshlet_cull_queue.as_entire_binding(), + )), + ); + + let first_meshlet_cull = render_device.create_bind_group( + "meshlet_first_meshlet_cull_bind_group", + &resource_manager.first_meshlet_cull_bind_group_layout, + &BindGroupEntries::sequential(( + &view_resources.previous_depth_pyramid, + view_uniforms.clone(), + previous_view_uniforms.clone(), + meshlet_mesh_manager.meshlet_cull_data.binding(), + instance_manager.instance_uniforms.binding().unwrap(), + view_resources + .visibility_buffer_software_raster_indirect_args + .as_entire_binding(), + view_resources + .visibility_buffer_hardware_raster_indirect_args + .as_entire_binding(), + resource_manager + .visibility_buffer_raster_cluster_prev_counts + .as_entire_binding(), + resource_manager + .visibility_buffer_raster_clusters + .as_entire_binding(), + view_resources.front_meshlet_cull_count.as_entire_binding(), + view_resources.back_meshlet_cull_count.as_entire_binding(), + view_resources + .back_meshlet_cull_dispatch + .as_entire_binding(), + view_resources.meshlet_cull_queue.as_entire_binding(), + )), + ); + + let second_meshlet_cull = render_device.create_bind_group( + "meshlet_second_meshlet_cull_bind_group", + &resource_manager.second_meshlet_cull_bind_group_layout, + &BindGroupEntries::sequential(( + &view_resources.previous_depth_pyramid, + view_uniforms.clone(), + previous_view_uniforms.clone(), + meshlet_mesh_manager.meshlet_cull_data.binding(), + instance_manager.instance_uniforms.binding().unwrap(), + view_resources + .visibility_buffer_software_raster_indirect_args + .as_entire_binding(), + view_resources + .visibility_buffer_hardware_raster_indirect_args + .as_entire_binding(), + resource_manager + .visibility_buffer_raster_cluster_prev_counts + .as_entire_binding(), + resource_manager + .visibility_buffer_raster_clusters + .as_entire_binding(), + view_resources.back_meshlet_cull_count.as_entire_binding(), + view_resources.meshlet_cull_queue.as_entire_binding(), + )), + ); + + let downsample_depth = view_resources.depth_pyramid.create_bind_group( + &render_device, + "meshlet_downsample_depth_bind_group", + if view_resources.not_shadow_view { + &resource_manager.downsample_depth_bind_group_layout + } else { + &resource_manager.downsample_depth_shadow_view_bind_group_layout + }, + &view_resources.visibility_buffer.default_view, + &resource_manager.depth_pyramid_sampler, + ); + + let visibility_buffer_raster = render_device.create_bind_group( + "meshlet_visibility_raster_buffer_bind_group", + if view_resources.not_shadow_view { + &resource_manager.visibility_buffer_raster_bind_group_layout + } else { + &resource_manager.visibility_buffer_raster_shadow_view_bind_group_layout + }, + &BindGroupEntries::sequential(( + resource_manager + .visibility_buffer_raster_clusters + .as_entire_binding(), + meshlet_mesh_manager.meshlets.binding(), + meshlet_mesh_manager.indices.binding(), + meshlet_mesh_manager.vertex_positions.binding(), + instance_manager.instance_uniforms.binding().unwrap(), + resource_manager + .visibility_buffer_raster_cluster_prev_counts + .as_entire_binding(), + resource_manager + .software_raster_cluster_count + .as_entire_binding(), + &view_resources.visibility_buffer.default_view, + view_uniforms.clone(), + )), + ); + + let resolve_depth = render_device.create_bind_group( + "meshlet_resolve_depth_bind_group", + if view_resources.not_shadow_view { + &resource_manager.resolve_depth_bind_group_layout + } else { + &resource_manager.resolve_depth_shadow_view_bind_group_layout + }, + &BindGroupEntries::single(&view_resources.visibility_buffer.default_view), + ); + + let resolve_material_depth = view_resources.material_depth.as_ref().map(|_| { + render_device.create_bind_group( + "meshlet_resolve_material_depth_bind_group", + &resource_manager.resolve_material_depth_bind_group_layout, + &BindGroupEntries::sequential(( + &view_resources.visibility_buffer.default_view, + resource_manager + .visibility_buffer_raster_clusters + .as_entire_binding(), + instance_manager.instance_material_ids.binding().unwrap(), + )), + ) + }); + + let material_shade = view_resources.material_depth.as_ref().map(|_| { + render_device.create_bind_group( + "meshlet_mesh_material_shade_bind_group", + &resource_manager.material_shade_bind_group_layout, + &BindGroupEntries::sequential(( + &view_resources.visibility_buffer.default_view, + resource_manager + .visibility_buffer_raster_clusters + .as_entire_binding(), + meshlet_mesh_manager.meshlets.binding(), + meshlet_mesh_manager.indices.binding(), + meshlet_mesh_manager.vertex_positions.binding(), + meshlet_mesh_manager.vertex_normals.binding(), + meshlet_mesh_manager.vertex_uvs.binding(), + instance_manager.instance_uniforms.binding().unwrap(), + )), + ) + }); + + let remap_1d_to_2d_dispatch = resource_manager + .remap_1d_to_2d_dispatch_bind_group_layout + .as_ref() + .map(|layout| { + render_device.create_bind_group( + "meshlet_remap_1d_to_2d_dispatch_bind_group", + layout, + &BindGroupEntries::sequential(( + view_resources + .visibility_buffer_software_raster_indirect_args + .as_entire_binding(), + resource_manager + .software_raster_cluster_count + .as_entire_binding(), + )), + ) + }); + + let fill_counts = if resource_manager + .remap_1d_to_2d_dispatch_bind_group_layout + .is_some() + { + render_device.create_bind_group( + "meshlet_fill_counts_bind_group", + &resource_manager.fill_counts_bind_group_layout, + &BindGroupEntries::sequential(( + view_resources + .visibility_buffer_software_raster_indirect_args + .as_entire_binding(), + view_resources + .visibility_buffer_hardware_raster_indirect_args + .as_entire_binding(), + resource_manager + .visibility_buffer_raster_cluster_prev_counts + .as_entire_binding(), + resource_manager + .software_raster_cluster_count + .as_entire_binding(), + )), + ) + } else { + render_device.create_bind_group( + "meshlet_fill_counts_bind_group", + &resource_manager.fill_counts_bind_group_layout, + &BindGroupEntries::sequential(( + view_resources + .visibility_buffer_software_raster_indirect_args + .as_entire_binding(), + view_resources + .visibility_buffer_hardware_raster_indirect_args + .as_entire_binding(), + resource_manager + .visibility_buffer_raster_cluster_prev_counts + .as_entire_binding(), + )), + ) + }; + + commands.entity(view_entity).insert(MeshletViewBindGroups { + clear_visibility_buffer, + first_instance_cull, + second_instance_cull, + first_bvh_cull_ping, + first_bvh_cull_pong, + second_bvh_cull_ping, + second_bvh_cull_pong, + first_meshlet_cull, + second_meshlet_cull, + downsample_depth, + visibility_buffer_raster, + resolve_depth, + resolve_material_depth, + material_shade, + remap_1d_to_2d_dispatch, + fill_counts, + }); + } +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/visibility_buffer_hardware_raster.wgsl b/crates/libmarathon/src/render/pbr/meshlet/visibility_buffer_hardware_raster.wgsl new file mode 100644 index 0000000..2a25144 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/visibility_buffer_hardware_raster.wgsl @@ -0,0 +1,79 @@ +#import bevy_pbr::{ + meshlet_bindings::{ + meshlet_cluster_meshlet_ids, + meshlets, + meshlet_cluster_instance_ids, + meshlet_instance_uniforms, + meshlet_raster_clusters, + meshlet_previous_raster_counts, + meshlet_visibility_buffer, + view, + get_meshlet_triangle_count, + get_meshlet_vertex_id, + get_meshlet_vertex_position, + }, + mesh_functions::mesh_position_local_to_world, +} +#import bevy_render::maths::affine3_to_square +var meshlet_raster_cluster_rightmost_slot: u32; + +/// Vertex/fragment shader for rasterizing large clusters into a visibility buffer. + +struct VertexOutput { + @builtin(position) position: vec4, +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + @location(0) @interpolate(flat) packed_ids: u32, +#endif +} + +@vertex +fn vertex(@builtin(instance_index) instance_index: u32, @builtin(vertex_index) vertex_index: u32) -> VertexOutput { + let cluster_in_draw = meshlet_previous_raster_counts[1] + instance_index; + let cluster_id = meshlet_raster_cluster_rightmost_slot - cluster_in_draw; + let instanced_offset = meshlet_raster_clusters[cluster_id]; + var meshlet = meshlets[instanced_offset.offset]; + + let triangle_id = vertex_index / 3u; + if triangle_id >= get_meshlet_triangle_count(&meshlet) { return dummy_vertex(); } + let index_id = vertex_index; + let vertex_id = get_meshlet_vertex_id(meshlet.start_index_id + index_id); + + let instance_uniform = meshlet_instance_uniforms[instanced_offset.instance_id]; + + let vertex_position = get_meshlet_vertex_position(&meshlet, vertex_id); + let world_from_local = affine3_to_square(instance_uniform.world_from_local); + let world_position = mesh_position_local_to_world(world_from_local, vec4(vertex_position, 1.0)); + let clip_position = view.clip_from_world * vec4(world_position.xyz, 1.0); + + return VertexOutput( + clip_position, +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + (cluster_id << 7u) | triangle_id, +#endif + ); +} + +@fragment +fn fragment(vertex_output: VertexOutput) { + let depth = bitcast(vertex_output.position.z); +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + let visibility = (u64(depth) << 32u) | u64(vertex_output.packed_ids); +#else + let visibility = depth; +#endif + textureAtomicMax(meshlet_visibility_buffer, vec2(vertex_output.position.xy), visibility); +} + +fn dummy_vertex() -> VertexOutput { + return VertexOutput( + vec4(divide(0.0, 0.0)), // NaN vertex position +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + 0u, +#endif + ); +} + +// Naga doesn't allow divide by zero literals, but this lets us work around it +fn divide(a: f32, b: f32) -> f32 { + return a / b; +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/visibility_buffer_raster_node.rs b/crates/libmarathon/src/render/pbr/meshlet/visibility_buffer_raster_node.rs new file mode 100644 index 0000000..60d8bed --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/visibility_buffer_raster_node.rs @@ -0,0 +1,706 @@ +use super::{ + pipelines::MeshletPipelines, + resource_manager::{MeshletViewBindGroups, MeshletViewResources}, +}; +use crate::render::pbr::{ + meshlet::resource_manager::ResourceManager, LightEntity, ShadowView, ViewLightEntities, +}; +use bevy_color::LinearRgba; +use crate::render::prepass::PreviousViewUniformOffset; +use bevy_ecs::{ + query::QueryState, + world::{FromWorld, World}, +}; +use bevy_math::UVec2; +use crate::render::{ + camera::ExtractedCamera, + diagnostic::RecordDiagnostics, + render_graph::{Node, NodeRunError, RenderGraphContext}, + render_resource::*, + renderer::RenderContext, + view::{ViewDepthTexture, ViewUniformOffset}, +}; + +/// Rasterize meshlets into a depth buffer, and optional visibility buffer + material depth buffer for shading passes. +pub struct MeshletVisibilityBufferRasterPassNode { + main_view_query: QueryState<( + &'static ExtractedCamera, + &'static ViewDepthTexture, + &'static ViewUniformOffset, + &'static PreviousViewUniformOffset, + &'static MeshletViewBindGroups, + &'static MeshletViewResources, + &'static ViewLightEntities, + )>, + view_light_query: QueryState<( + &'static ShadowView, + &'static LightEntity, + &'static ViewUniformOffset, + &'static PreviousViewUniformOffset, + &'static MeshletViewBindGroups, + &'static MeshletViewResources, + )>, +} + +impl FromWorld for MeshletVisibilityBufferRasterPassNode { + fn from_world(world: &mut World) -> Self { + Self { + main_view_query: QueryState::new(world), + view_light_query: QueryState::new(world), + } + } +} + +impl Node for MeshletVisibilityBufferRasterPassNode { + fn update(&mut self, world: &mut World) { + self.main_view_query.update_archetypes(world); + self.view_light_query.update_archetypes(world); + } + + // TODO: Reuse compute/render passes between logical passes where possible, as they're expensive + fn run( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext, + world: &World, + ) -> Result<(), NodeRunError> { + let Ok(( + camera, + view_depth, + view_offset, + previous_view_offset, + meshlet_view_bind_groups, + meshlet_view_resources, + lights, + )) = self.main_view_query.get_manual(world, graph.view_entity()) + else { + return Ok(()); + }; + + let Some(( + clear_visibility_buffer_pipeline, + clear_visibility_buffer_shadow_view_pipeline, + first_instance_cull_pipeline, + second_instance_cull_pipeline, + first_bvh_cull_pipeline, + second_bvh_cull_pipeline, + first_meshlet_cull_pipeline, + second_meshlet_cull_pipeline, + downsample_depth_first_pipeline, + downsample_depth_second_pipeline, + downsample_depth_first_shadow_view_pipeline, + downsample_depth_second_shadow_view_pipeline, + visibility_buffer_software_raster_pipeline, + visibility_buffer_software_raster_shadow_view_pipeline, + visibility_buffer_hardware_raster_pipeline, + visibility_buffer_hardware_raster_shadow_view_pipeline, + visibility_buffer_hardware_raster_shadow_view_unclipped_pipeline, + resolve_depth_pipeline, + resolve_depth_shadow_view_pipeline, + resolve_material_depth_pipeline, + remap_1d_to_2d_dispatch_pipeline, + fill_counts_pipeline, + )) = MeshletPipelines::get(world) + else { + return Ok(()); + }; + + let diagnostics = render_context.diagnostic_recorder(); + + render_context + .command_encoder() + .push_debug_group("meshlet_visibility_buffer_raster"); + let time_span = diagnostics.time_span( + render_context.command_encoder(), + "meshlet_visibility_buffer_raster", + ); + + let resource_manager = world.get_resource::().unwrap(); + render_context.command_encoder().clear_buffer( + &resource_manager.visibility_buffer_raster_cluster_prev_counts, + 0, + None, + ); + + clear_visibility_buffer_pass( + render_context, + &meshlet_view_bind_groups.clear_visibility_buffer, + clear_visibility_buffer_pipeline, + meshlet_view_resources.view_size, + ); + + render_context + .command_encoder() + .push_debug_group("meshlet_first_pass"); + first_cull( + render_context, + meshlet_view_bind_groups, + meshlet_view_resources, + view_offset, + previous_view_offset, + first_instance_cull_pipeline, + first_bvh_cull_pipeline, + first_meshlet_cull_pipeline, + remap_1d_to_2d_dispatch_pipeline, + ); + raster_pass( + true, + render_context, + &meshlet_view_resources.visibility_buffer_software_raster_indirect_args, + &meshlet_view_resources.visibility_buffer_hardware_raster_indirect_args, + &meshlet_view_resources.dummy_render_target.default_view, + meshlet_view_bind_groups, + view_offset, + visibility_buffer_software_raster_pipeline, + visibility_buffer_hardware_raster_pipeline, + fill_counts_pipeline, + Some(camera), + meshlet_view_resources.rightmost_slot, + ); + render_context.command_encoder().pop_debug_group(); + + meshlet_view_resources.depth_pyramid.downsample_depth( + "downsample_depth", + render_context, + meshlet_view_resources.view_size, + &meshlet_view_bind_groups.downsample_depth, + downsample_depth_first_pipeline, + downsample_depth_second_pipeline, + ); + + render_context + .command_encoder() + .push_debug_group("meshlet_second_pass"); + second_cull( + render_context, + meshlet_view_bind_groups, + meshlet_view_resources, + view_offset, + previous_view_offset, + second_instance_cull_pipeline, + second_bvh_cull_pipeline, + second_meshlet_cull_pipeline, + remap_1d_to_2d_dispatch_pipeline, + ); + raster_pass( + false, + render_context, + &meshlet_view_resources.visibility_buffer_software_raster_indirect_args, + &meshlet_view_resources.visibility_buffer_hardware_raster_indirect_args, + &meshlet_view_resources.dummy_render_target.default_view, + meshlet_view_bind_groups, + view_offset, + visibility_buffer_software_raster_pipeline, + visibility_buffer_hardware_raster_pipeline, + fill_counts_pipeline, + Some(camera), + meshlet_view_resources.rightmost_slot, + ); + render_context.command_encoder().pop_debug_group(); + + resolve_depth( + render_context, + view_depth.get_attachment(StoreOp::Store), + meshlet_view_bind_groups, + resolve_depth_pipeline, + camera, + ); + resolve_material_depth( + render_context, + meshlet_view_resources, + meshlet_view_bind_groups, + resolve_material_depth_pipeline, + camera, + ); + meshlet_view_resources.depth_pyramid.downsample_depth( + "downsample_depth", + render_context, + meshlet_view_resources.view_size, + &meshlet_view_bind_groups.downsample_depth, + downsample_depth_first_pipeline, + downsample_depth_second_pipeline, + ); + render_context.command_encoder().pop_debug_group(); + + for light_entity in &lights.lights { + let Ok(( + shadow_view, + light_type, + view_offset, + previous_view_offset, + meshlet_view_bind_groups, + meshlet_view_resources, + )) = self.view_light_query.get_manual(world, *light_entity) + else { + continue; + }; + + let shadow_visibility_buffer_hardware_raster_pipeline = + if let LightEntity::Directional { .. } = light_type { + visibility_buffer_hardware_raster_shadow_view_unclipped_pipeline + } else { + visibility_buffer_hardware_raster_shadow_view_pipeline + }; + + render_context.command_encoder().push_debug_group(&format!( + "meshlet_visibility_buffer_raster: {}", + shadow_view.pass_name + )); + let time_span_shadow = diagnostics.time_span( + render_context.command_encoder(), + shadow_view.pass_name.clone(), + ); + clear_visibility_buffer_pass( + render_context, + &meshlet_view_bind_groups.clear_visibility_buffer, + clear_visibility_buffer_shadow_view_pipeline, + meshlet_view_resources.view_size, + ); + + render_context + .command_encoder() + .push_debug_group("meshlet_first_pass"); + first_cull( + render_context, + meshlet_view_bind_groups, + meshlet_view_resources, + view_offset, + previous_view_offset, + first_instance_cull_pipeline, + first_bvh_cull_pipeline, + first_meshlet_cull_pipeline, + remap_1d_to_2d_dispatch_pipeline, + ); + raster_pass( + true, + render_context, + &meshlet_view_resources.visibility_buffer_software_raster_indirect_args, + &meshlet_view_resources.visibility_buffer_hardware_raster_indirect_args, + &meshlet_view_resources.dummy_render_target.default_view, + meshlet_view_bind_groups, + view_offset, + visibility_buffer_software_raster_shadow_view_pipeline, + shadow_visibility_buffer_hardware_raster_pipeline, + fill_counts_pipeline, + None, + meshlet_view_resources.rightmost_slot, + ); + render_context.command_encoder().pop_debug_group(); + + meshlet_view_resources.depth_pyramid.downsample_depth( + "downsample_depth", + render_context, + meshlet_view_resources.view_size, + &meshlet_view_bind_groups.downsample_depth, + downsample_depth_first_shadow_view_pipeline, + downsample_depth_second_shadow_view_pipeline, + ); + + render_context + .command_encoder() + .push_debug_group("meshlet_second_pass"); + second_cull( + render_context, + meshlet_view_bind_groups, + meshlet_view_resources, + view_offset, + previous_view_offset, + second_instance_cull_pipeline, + second_bvh_cull_pipeline, + second_meshlet_cull_pipeline, + remap_1d_to_2d_dispatch_pipeline, + ); + raster_pass( + false, + render_context, + &meshlet_view_resources.visibility_buffer_software_raster_indirect_args, + &meshlet_view_resources.visibility_buffer_hardware_raster_indirect_args, + &meshlet_view_resources.dummy_render_target.default_view, + meshlet_view_bind_groups, + view_offset, + visibility_buffer_software_raster_shadow_view_pipeline, + shadow_visibility_buffer_hardware_raster_pipeline, + fill_counts_pipeline, + None, + meshlet_view_resources.rightmost_slot, + ); + render_context.command_encoder().pop_debug_group(); + + resolve_depth( + render_context, + shadow_view.depth_attachment.get_attachment(StoreOp::Store), + meshlet_view_bind_groups, + resolve_depth_shadow_view_pipeline, + camera, + ); + meshlet_view_resources.depth_pyramid.downsample_depth( + "downsample_depth", + render_context, + meshlet_view_resources.view_size, + &meshlet_view_bind_groups.downsample_depth, + downsample_depth_first_shadow_view_pipeline, + downsample_depth_second_shadow_view_pipeline, + ); + render_context.command_encoder().pop_debug_group(); + time_span_shadow.end(render_context.command_encoder()); + } + + time_span.end(render_context.command_encoder()); + + Ok(()) + } +} + +// TODO: Replace this with vkCmdClearColorImage once wgpu supports it +fn clear_visibility_buffer_pass( + render_context: &mut RenderContext, + clear_visibility_buffer_bind_group: &BindGroup, + clear_visibility_buffer_pipeline: &ComputePipeline, + view_size: UVec2, +) { + let command_encoder = render_context.command_encoder(); + let mut clear_visibility_buffer_pass = + command_encoder.begin_compute_pass(&ComputePassDescriptor { + label: Some("clear_visibility_buffer"), + timestamp_writes: None, + }); + clear_visibility_buffer_pass.set_pipeline(clear_visibility_buffer_pipeline); + clear_visibility_buffer_pass.set_push_constants(0, bytemuck::bytes_of(&view_size)); + clear_visibility_buffer_pass.set_bind_group(0, clear_visibility_buffer_bind_group, &[]); + clear_visibility_buffer_pass.dispatch_workgroups( + view_size.x.div_ceil(16), + view_size.y.div_ceil(16), + 1, + ); +} + +fn first_cull( + render_context: &mut RenderContext, + meshlet_view_bind_groups: &MeshletViewBindGroups, + meshlet_view_resources: &MeshletViewResources, + view_offset: &ViewUniformOffset, + previous_view_offset: &PreviousViewUniformOffset, + first_instance_cull_pipeline: &ComputePipeline, + first_bvh_cull_pipeline: &ComputePipeline, + first_meshlet_cull_pipeline: &ComputePipeline, + remap_1d_to_2d_pipeline: Option<&ComputePipeline>, +) { + let workgroups = meshlet_view_resources.scene_instance_count.div_ceil(128); + cull_pass( + "meshlet_first_instance_cull", + render_context, + &meshlet_view_bind_groups.first_instance_cull, + view_offset, + previous_view_offset, + first_instance_cull_pipeline, + &[meshlet_view_resources.scene_instance_count], + ) + .dispatch_workgroups(workgroups, 1, 1); + + render_context + .command_encoder() + .push_debug_group("meshlet_first_bvh_cull"); + let mut ping = true; + for _ in 0..meshlet_view_resources.max_bvh_depth { + cull_pass( + "meshlet_first_bvh_cull_dispatch", + render_context, + if ping { + &meshlet_view_bind_groups.first_bvh_cull_ping + } else { + &meshlet_view_bind_groups.first_bvh_cull_pong + }, + view_offset, + previous_view_offset, + first_bvh_cull_pipeline, + &[ping as u32, meshlet_view_resources.rightmost_slot], + ) + .dispatch_workgroups_indirect( + if ping { + &meshlet_view_resources.first_bvh_cull_dispatch_front + } else { + &meshlet_view_resources.first_bvh_cull_dispatch_back + }, + 0, + ); + render_context.command_encoder().clear_buffer( + if ping { + &meshlet_view_resources.first_bvh_cull_count_front + } else { + &meshlet_view_resources.first_bvh_cull_count_back + }, + 0, + Some(4), + ); + render_context.command_encoder().clear_buffer( + if ping { + &meshlet_view_resources.first_bvh_cull_dispatch_front + } else { + &meshlet_view_resources.first_bvh_cull_dispatch_back + }, + 0, + Some(4), + ); + ping = !ping; + } + render_context.command_encoder().pop_debug_group(); + + let mut pass = cull_pass( + "meshlet_first_meshlet_cull", + render_context, + &meshlet_view_bind_groups.first_meshlet_cull, + view_offset, + previous_view_offset, + first_meshlet_cull_pipeline, + &[meshlet_view_resources.rightmost_slot], + ); + pass.dispatch_workgroups_indirect(&meshlet_view_resources.front_meshlet_cull_dispatch, 0); + remap_1d_to_2d( + pass, + remap_1d_to_2d_pipeline, + meshlet_view_bind_groups.remap_1d_to_2d_dispatch.as_ref(), + ); +} + +fn second_cull( + render_context: &mut RenderContext, + meshlet_view_bind_groups: &MeshletViewBindGroups, + meshlet_view_resources: &MeshletViewResources, + view_offset: &ViewUniformOffset, + previous_view_offset: &PreviousViewUniformOffset, + second_instance_cull_pipeline: &ComputePipeline, + second_bvh_cull_pipeline: &ComputePipeline, + second_meshlet_cull_pipeline: &ComputePipeline, + remap_1d_to_2d_pipeline: Option<&ComputePipeline>, +) { + cull_pass( + "meshlet_second_instance_cull", + render_context, + &meshlet_view_bind_groups.second_instance_cull, + view_offset, + previous_view_offset, + second_instance_cull_pipeline, + &[meshlet_view_resources.scene_instance_count], + ) + .dispatch_workgroups_indirect(&meshlet_view_resources.second_pass_dispatch, 0); + + render_context + .command_encoder() + .push_debug_group("meshlet_second_bvh_cull"); + let mut ping = true; + for _ in 0..meshlet_view_resources.max_bvh_depth { + cull_pass( + "meshlet_second_bvh_cull_dispatch", + render_context, + if ping { + &meshlet_view_bind_groups.second_bvh_cull_ping + } else { + &meshlet_view_bind_groups.second_bvh_cull_pong + }, + view_offset, + previous_view_offset, + second_bvh_cull_pipeline, + &[ping as u32, meshlet_view_resources.rightmost_slot], + ) + .dispatch_workgroups_indirect( + if ping { + &meshlet_view_resources.second_bvh_cull_dispatch_front + } else { + &meshlet_view_resources.second_bvh_cull_dispatch_back + }, + 0, + ); + ping = !ping; + } + render_context.command_encoder().pop_debug_group(); + + let mut pass = cull_pass( + "meshlet_second_meshlet_cull", + render_context, + &meshlet_view_bind_groups.second_meshlet_cull, + view_offset, + previous_view_offset, + second_meshlet_cull_pipeline, + &[meshlet_view_resources.rightmost_slot], + ); + pass.dispatch_workgroups_indirect(&meshlet_view_resources.back_meshlet_cull_dispatch, 0); + remap_1d_to_2d( + pass, + remap_1d_to_2d_pipeline, + meshlet_view_bind_groups.remap_1d_to_2d_dispatch.as_ref(), + ); +} + +fn cull_pass<'a>( + label: &'static str, + render_context: &'a mut RenderContext, + bind_group: &'a BindGroup, + view_offset: &'a ViewUniformOffset, + previous_view_offset: &'a PreviousViewUniformOffset, + pipeline: &'a ComputePipeline, + push_constants: &[u32], +) -> ComputePass<'a> { + let command_encoder = render_context.command_encoder(); + let mut pass = command_encoder.begin_compute_pass(&ComputePassDescriptor { + label: Some(label), + timestamp_writes: None, + }); + pass.set_pipeline(pipeline); + pass.set_bind_group( + 0, + bind_group, + &[view_offset.offset, previous_view_offset.offset], + ); + pass.set_push_constants(0, bytemuck::cast_slice(push_constants)); + pass +} + +fn remap_1d_to_2d( + mut pass: ComputePass, + pipeline: Option<&ComputePipeline>, + bind_group: Option<&BindGroup>, +) { + if let (Some(pipeline), Some(bind_group)) = (pipeline, bind_group) { + pass.set_pipeline(pipeline); + pass.set_bind_group(0, bind_group, &[]); + pass.dispatch_workgroups(1, 1, 1); + } +} + +fn raster_pass( + first_pass: bool, + render_context: &mut RenderContext, + visibility_buffer_software_raster_indirect_args: &Buffer, + visibility_buffer_hardware_raster_indirect_args: &Buffer, + dummy_render_target: &TextureView, + meshlet_view_bind_groups: &MeshletViewBindGroups, + view_offset: &ViewUniformOffset, + visibility_buffer_software_raster_pipeline: &ComputePipeline, + visibility_buffer_hardware_raster_pipeline: &RenderPipeline, + fill_counts_pipeline: &ComputePipeline, + camera: Option<&ExtractedCamera>, + raster_cluster_rightmost_slot: u32, +) { + let mut software_pass = + render_context + .command_encoder() + .begin_compute_pass(&ComputePassDescriptor { + label: Some(if first_pass { + "raster_software_first" + } else { + "raster_software_second" + }), + timestamp_writes: None, + }); + software_pass.set_pipeline(visibility_buffer_software_raster_pipeline); + software_pass.set_bind_group( + 0, + &meshlet_view_bind_groups.visibility_buffer_raster, + &[view_offset.offset], + ); + software_pass.dispatch_workgroups_indirect(visibility_buffer_software_raster_indirect_args, 0); + drop(software_pass); + + let mut hardware_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some(if first_pass { + "raster_hardware_first" + } else { + "raster_hardware_second" + }), + color_attachments: &[Some(RenderPassColorAttachment { + view: dummy_render_target, + depth_slice: None, + resolve_target: None, + ops: Operations { + load: LoadOp::Clear(LinearRgba::BLACK.into()), + store: StoreOp::Discard, + }, + })], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + }); + if let Some(viewport) = camera.and_then(|camera| camera.viewport.as_ref()) { + hardware_pass.set_camera_viewport(viewport); + } + hardware_pass.set_render_pipeline(visibility_buffer_hardware_raster_pipeline); + hardware_pass.set_push_constants( + ShaderStages::VERTEX, + 0, + &raster_cluster_rightmost_slot.to_le_bytes(), + ); + hardware_pass.set_bind_group( + 0, + &meshlet_view_bind_groups.visibility_buffer_raster, + &[view_offset.offset], + ); + hardware_pass.draw_indirect(visibility_buffer_hardware_raster_indirect_args, 0); + drop(hardware_pass); + + let mut fill_counts_pass = + render_context + .command_encoder() + .begin_compute_pass(&ComputePassDescriptor { + label: Some("fill_counts"), + timestamp_writes: None, + }); + fill_counts_pass.set_pipeline(fill_counts_pipeline); + fill_counts_pass.set_bind_group(0, &meshlet_view_bind_groups.fill_counts, &[]); + fill_counts_pass.dispatch_workgroups(1, 1, 1); +} + +fn resolve_depth( + render_context: &mut RenderContext, + depth_stencil_attachment: RenderPassDepthStencilAttachment, + meshlet_view_bind_groups: &MeshletViewBindGroups, + resolve_depth_pipeline: &RenderPipeline, + camera: &ExtractedCamera, +) { + let mut resolve_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some("resolve_depth"), + color_attachments: &[], + depth_stencil_attachment: Some(depth_stencil_attachment), + timestamp_writes: None, + occlusion_query_set: None, + }); + if let Some(viewport) = &camera.viewport { + resolve_pass.set_camera_viewport(viewport); + } + resolve_pass.set_render_pipeline(resolve_depth_pipeline); + resolve_pass.set_bind_group(0, &meshlet_view_bind_groups.resolve_depth, &[]); + resolve_pass.draw(0..3, 0..1); +} + +fn resolve_material_depth( + render_context: &mut RenderContext, + meshlet_view_resources: &MeshletViewResources, + meshlet_view_bind_groups: &MeshletViewBindGroups, + resolve_material_depth_pipeline: &RenderPipeline, + camera: &ExtractedCamera, +) { + if let (Some(material_depth), Some(resolve_material_depth_bind_group)) = ( + meshlet_view_resources.material_depth.as_ref(), + meshlet_view_bind_groups.resolve_material_depth.as_ref(), + ) { + let mut resolve_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some("resolve_material_depth"), + color_attachments: &[], + depth_stencil_attachment: Some(RenderPassDepthStencilAttachment { + view: &material_depth.default_view, + depth_ops: Some(Operations { + load: LoadOp::Clear(0.0), + store: StoreOp::Store, + }), + stencil_ops: None, + }), + timestamp_writes: None, + occlusion_query_set: None, + }); + if let Some(viewport) = &camera.viewport { + resolve_pass.set_camera_viewport(viewport); + } + resolve_pass.set_render_pipeline(resolve_material_depth_pipeline); + resolve_pass.set_bind_group(0, resolve_material_depth_bind_group, &[]); + resolve_pass.draw(0..3, 0..1); + } +} diff --git a/crates/libmarathon/src/render/pbr/meshlet/visibility_buffer_resolve.wgsl b/crates/libmarathon/src/render/pbr/meshlet/visibility_buffer_resolve.wgsl new file mode 100644 index 0000000..8d8a22b --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/visibility_buffer_resolve.wgsl @@ -0,0 +1,240 @@ +#define_import_path bevy_pbr::meshlet_visibility_buffer_resolve + +#import bevy_pbr::{ + meshlet_bindings::{ + Meshlet, + meshlet_visibility_buffer, + meshlet_raster_clusters, + meshlets, + meshlet_instance_uniforms, + get_meshlet_vertex_id, + get_meshlet_vertex_position, + get_meshlet_vertex_normal, + get_meshlet_vertex_uv, + }, + mesh_view_bindings::view, + mesh_functions::mesh_position_local_to_world, + mesh_types::Mesh, + view_transformations::{position_world_to_clip, frag_coord_to_ndc}, +} +#import bevy_render::maths::{affine3_to_square, mat2x4_f32_to_mat3x3_unpack} + +#ifdef PREPASS_FRAGMENT +#ifdef MOTION_VECTOR_PREPASS +#import bevy_pbr::{ + prepass_bindings::previous_view_uniforms, + pbr_prepass_functions::calculate_motion_vector, +} +#endif +#endif + +/// Functions to be used by materials for reading from a meshlet visibility buffer texture. + +#ifdef MESHLET_MESH_MATERIAL_PASS +struct PartialDerivatives { + barycentrics: vec3, + ddx: vec3, + ddy: vec3, +} + +// https://github.com/ConfettiFX/The-Forge/blob/9d43e69141a9cd0ce2ce2d2db5122234d3a2d5b5/Common_3/Renderer/VisibilityBuffer2/Shaders/FSL/vb_shading_utilities.h.fsl#L90-L150 +fn compute_partial_derivatives(vertex_world_positions: array, 3>, ndc_uv: vec2, half_screen_size: vec2) -> PartialDerivatives { + var result: PartialDerivatives; + + let vertex_clip_position_0 = position_world_to_clip(vertex_world_positions[0].xyz); + let vertex_clip_position_1 = position_world_to_clip(vertex_world_positions[1].xyz); + let vertex_clip_position_2 = position_world_to_clip(vertex_world_positions[2].xyz); + + let inv_w = 1.0 / vec3(vertex_clip_position_0.w, vertex_clip_position_1.w, vertex_clip_position_2.w); + let ndc_0 = vertex_clip_position_0.xy * inv_w[0]; + let ndc_1 = vertex_clip_position_1.xy * inv_w[1]; + let ndc_2 = vertex_clip_position_2.xy * inv_w[2]; + + let inv_det = 1.0 / determinant(mat2x2(ndc_2 - ndc_1, ndc_0 - ndc_1)); + result.ddx = vec3(ndc_1.y - ndc_2.y, ndc_2.y - ndc_0.y, ndc_0.y - ndc_1.y) * inv_det * inv_w; + result.ddy = vec3(ndc_2.x - ndc_1.x, ndc_0.x - ndc_2.x, ndc_1.x - ndc_0.x) * inv_det * inv_w; + + var ddx_sum = dot(result.ddx, vec3(1.0)); + var ddy_sum = dot(result.ddy, vec3(1.0)); + + let delta_v = ndc_uv - ndc_0; + let interp_inv_w = inv_w.x + delta_v.x * ddx_sum + delta_v.y * ddy_sum; + let interp_w = 1.0 / interp_inv_w; + + result.barycentrics = vec3( + interp_w * (inv_w[0] + delta_v.x * result.ddx.x + delta_v.y * result.ddy.x), + interp_w * (delta_v.x * result.ddx.y + delta_v.y * result.ddy.y), + interp_w * (delta_v.x * result.ddx.z + delta_v.y * result.ddy.z), + ); + + result.ddx *= half_screen_size.x; + result.ddy *= half_screen_size.y; + ddx_sum *= half_screen_size.x; + ddy_sum *= half_screen_size.y; + + result.ddy *= -1.0; + ddy_sum *= -1.0; + + let interp_ddx_w = 1.0 / (interp_inv_w + ddx_sum); + let interp_ddy_w = 1.0 / (interp_inv_w + ddy_sum); + + result.ddx = interp_ddx_w * (result.barycentrics * interp_inv_w + result.ddx) - result.barycentrics; + result.ddy = interp_ddy_w * (result.barycentrics * interp_inv_w + result.ddy) - result.barycentrics; + return result; +} + +struct VertexOutput { + position: vec4, + world_position: vec4, + world_normal: vec3, + uv: vec2, + ddx_uv: vec2, + ddy_uv: vec2, + world_tangent: vec4, + mesh_flags: u32, + cluster_id: u32, + material_bind_group_slot: u32, +#ifdef PREPASS_FRAGMENT +#ifdef MOTION_VECTOR_PREPASS + motion_vector: vec2, +#endif +#endif +} + +/// Load the visibility buffer texture and resolve it into a VertexOutput. +fn resolve_vertex_output(frag_coord: vec4) -> VertexOutput { + let packed_ids = u32(textureLoad(meshlet_visibility_buffer, vec2(frag_coord.xy)).r); + let cluster_id = packed_ids >> 7u; + let instanced_offset = meshlet_raster_clusters[cluster_id]; + let meshlet_id = instanced_offset.offset; + var meshlet = meshlets[meshlet_id]; + + let triangle_id = extractBits(packed_ids, 0u, 7u); + let index_ids = meshlet.start_index_id + (triangle_id * 3u) + vec3(0u, 1u, 2u); + let vertex_ids = vec3(get_meshlet_vertex_id(index_ids[0]), get_meshlet_vertex_id(index_ids[1]), get_meshlet_vertex_id(index_ids[2])); + let vertex_0 = load_vertex(&meshlet, vertex_ids[0]); + let vertex_1 = load_vertex(&meshlet, vertex_ids[1]); + let vertex_2 = load_vertex(&meshlet, vertex_ids[2]); + + let instance_id = instanced_offset.instance_id; + var instance_uniform = meshlet_instance_uniforms[instance_id]; + + let world_from_local = affine3_to_square(instance_uniform.world_from_local); + let world_position_0 = mesh_position_local_to_world(world_from_local, vec4(vertex_0.position, 1.0)); + let world_position_1 = mesh_position_local_to_world(world_from_local, vec4(vertex_1.position, 1.0)); + let world_position_2 = mesh_position_local_to_world(world_from_local, vec4(vertex_2.position, 1.0)); + + let frag_coord_ndc = frag_coord_to_ndc(frag_coord).xy; + let partial_derivatives = compute_partial_derivatives( + array(world_position_0, world_position_1, world_position_2), + frag_coord_ndc, + view.viewport.zw / 2.0, + ); + + let world_position = mat3x4(world_position_0, world_position_1, world_position_2) * partial_derivatives.barycentrics; + let world_positions_camera_relative = mat3x3( + world_position_0.xyz - view.world_position, + world_position_1.xyz - view.world_position, + world_position_2.xyz - view.world_position, + ); + let ddx_world_position = world_positions_camera_relative * partial_derivatives.ddx; + let ddy_world_position = world_positions_camera_relative * partial_derivatives.ddy; + + let world_normal = mat3x3( + normal_local_to_world(vertex_0.normal, &instance_uniform), + normal_local_to_world(vertex_1.normal, &instance_uniform), + normal_local_to_world(vertex_2.normal, &instance_uniform), + ) * partial_derivatives.barycentrics; + + let uv = mat3x2(vertex_0.uv, vertex_1.uv, vertex_2.uv) * partial_derivatives.barycentrics; + let ddx_uv = mat3x2(vertex_0.uv, vertex_1.uv, vertex_2.uv) * partial_derivatives.ddx; + let ddy_uv = mat3x2(vertex_0.uv, vertex_1.uv, vertex_2.uv) * partial_derivatives.ddy; + + let world_tangent = calculate_world_tangent(world_normal, ddx_world_position, ddy_world_position, ddx_uv, ddy_uv); + +#ifdef PREPASS_FRAGMENT +#ifdef MOTION_VECTOR_PREPASS + let previous_world_from_local = affine3_to_square(instance_uniform.previous_world_from_local); + let previous_world_position_0 = mesh_position_local_to_world(previous_world_from_local, vec4(vertex_0.position, 1.0)); + let previous_world_position_1 = mesh_position_local_to_world(previous_world_from_local, vec4(vertex_1.position, 1.0)); + let previous_world_position_2 = mesh_position_local_to_world(previous_world_from_local, vec4(vertex_2.position, 1.0)); + let previous_world_position = mat3x4(previous_world_position_0, previous_world_position_1, previous_world_position_2) * partial_derivatives.barycentrics; + let motion_vector = calculate_motion_vector(world_position, previous_world_position); +#endif +#endif + + return VertexOutput( + frag_coord, + world_position, + world_normal, + uv, + ddx_uv, + ddy_uv, + world_tangent, + instance_uniform.flags, + instance_id ^ meshlet_id, + instance_uniform.material_and_lightmap_bind_group_slot & 0xffffu, +#ifdef PREPASS_FRAGMENT +#ifdef MOTION_VECTOR_PREPASS + motion_vector, +#endif +#endif + ); +} + +struct MeshletVertex { + position: vec3, + normal: vec3, + uv: vec2, +} + +fn load_vertex(meshlet: ptr, vertex_id: u32) -> MeshletVertex { + return MeshletVertex( + get_meshlet_vertex_position(meshlet, vertex_id), + get_meshlet_vertex_normal(meshlet, vertex_id), + get_meshlet_vertex_uv(meshlet, vertex_id), + ); +} + +fn normal_local_to_world(vertex_normal: vec3, instance_uniform: ptr) -> vec3 { + if any(vertex_normal != vec3(0.0)) { + return normalize( + mat2x4_f32_to_mat3x3_unpack( + (*instance_uniform).local_from_world_transpose_a, + (*instance_uniform).local_from_world_transpose_b, + ) * vertex_normal + ); + } else { + return vertex_normal; + } +} + +// https://www.jeremyong.com/graphics/2023/12/16/surface-gradient-bump-mapping/#surface-gradient-from-a-tangent-space-normal-vector-without-an-explicit-tangent-basis +fn calculate_world_tangent( + world_normal: vec3, + ddx_world_position: vec3, + ddy_world_position: vec3, + ddx_uv: vec2, + ddy_uv: vec2, +) -> vec4 { + // Project the position gradients onto the tangent plane + let ddx_world_position_s = ddx_world_position - dot(ddx_world_position, world_normal) * world_normal; + let ddy_world_position_s = ddy_world_position - dot(ddy_world_position, world_normal) * world_normal; + + // Compute the jacobian matrix to leverage the chain rule + let jacobian_sign = sign(ddx_uv.x * ddy_uv.y - ddx_uv.y * ddy_uv.x); + + var world_tangent = jacobian_sign * (ddy_uv.y * ddx_world_position_s - ddx_uv.y * ddy_world_position_s); + + // The sign intrinsic returns 0 if the argument is 0 + if jacobian_sign != 0.0 { + world_tangent = normalize(world_tangent); + } + + // The second factor here ensures a consistent handedness between + // the tangent frame and surface basis w.r.t. screenspace. + let w = jacobian_sign * sign(dot(ddy_world_position, cross(world_normal, ddx_world_position))); + + return vec4(world_tangent, -w); // TODO: Unclear why we need to negate this to match mikktspace generated tangents +} +#endif diff --git a/crates/libmarathon/src/render/pbr/meshlet/visibility_buffer_software_raster.wgsl b/crates/libmarathon/src/render/pbr/meshlet/visibility_buffer_software_raster.wgsl new file mode 100644 index 0000000..0ddfff8 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/meshlet/visibility_buffer_software_raster.wgsl @@ -0,0 +1,189 @@ +#import bevy_pbr::{ + meshlet_bindings::{ + meshlet_cluster_meshlet_ids, + meshlets, + meshlet_cluster_instance_ids, + meshlet_instance_uniforms, + meshlet_raster_clusters, + meshlet_previous_raster_counts, + meshlet_software_raster_cluster_count, + meshlet_visibility_buffer, + view, + get_meshlet_vertex_count, + get_meshlet_triangle_count, + get_meshlet_vertex_id, + get_meshlet_vertex_position, + }, + mesh_functions::mesh_position_local_to_world, + view_transformations::ndc_to_uv, +} +#import bevy_render::maths::affine3_to_square + +/// Compute shader for rasterizing small clusters into a visibility buffer. + +// TODO: Fixed-point math and top-left rule + +var viewport_vertices: array; + +@compute +@workgroup_size(128, 1, 1) // 128 threads per workgroup, 1-2 vertices per thread, 1 triangle per thread, 1 cluster per workgroup +fn rasterize_cluster( + @builtin(workgroup_id) workgroup_id: vec3, + @builtin(local_invocation_index) local_invocation_index: u32, +#ifdef MESHLET_2D_DISPATCH + @builtin(num_workgroups) num_workgroups: vec3, +#endif +) { + var workgroup_id_1d = workgroup_id.x; + +#ifdef MESHLET_2D_DISPATCH + workgroup_id_1d += workgroup_id.y * num_workgroups.x; + if workgroup_id_1d >= meshlet_software_raster_cluster_count { return; } +#endif + + let cluster_id = workgroup_id_1d + meshlet_previous_raster_counts[0]; + let instanced_offset = meshlet_raster_clusters[cluster_id]; + var meshlet = meshlets[instanced_offset.offset]; + + let instance_uniform = meshlet_instance_uniforms[instanced_offset.instance_id]; + let world_from_local = affine3_to_square(instance_uniform.world_from_local); + + // Load and project 1 vertex per thread, and then again if there are more than 128 vertices in the meshlet + for (var i = 0u; i <= 128u; i += 128u) { + let vertex_id = local_invocation_index + i; + if vertex_id < get_meshlet_vertex_count(&meshlet) { + let vertex_position = get_meshlet_vertex_position(&meshlet, vertex_id); + + // Project vertex to viewport space + let world_position = mesh_position_local_to_world(world_from_local, vec4(vertex_position, 1.0)); + let clip_position = view.clip_from_world * vec4(world_position.xyz, 1.0); + let ndc_position = clip_position.xyz / clip_position.w; + let viewport_position_xy = ndc_to_uv(ndc_position.xy) * view.viewport.zw; + + // Write vertex to workgroup shared memory + viewport_vertices[vertex_id] = vec3(viewport_position_xy, ndc_position.z); + } + } + workgroupBarrier(); + + // Load 1 triangle's worth of vertex data per thread + let triangle_id = local_invocation_index; + if triangle_id >= get_meshlet_triangle_count(&meshlet) { return; } + let index_ids = meshlet.start_index_id + (triangle_id * 3u) + vec3(0u, 1u, 2u); + let vertex_ids = vec3(get_meshlet_vertex_id(index_ids[0]), get_meshlet_vertex_id(index_ids[1]), get_meshlet_vertex_id(index_ids[2])); + let vertex_0 = viewport_vertices[vertex_ids[2]]; + let vertex_1 = viewport_vertices[vertex_ids[1]]; + let vertex_2 = viewport_vertices[vertex_ids[0]]; + let packed_ids = (cluster_id << 7u) | triangle_id; + + // Backface culling + let triangle_double_area = edge_function(vertex_0.xy, vertex_1.xy, vertex_2.xy); + if triangle_double_area <= 0.0 { return; } + + // Setup triangle gradients + let w_x = vec3(vertex_1.y - vertex_2.y, vertex_2.y - vertex_0.y, vertex_0.y - vertex_1.y); + let w_y = vec3(vertex_2.x - vertex_1.x, vertex_0.x - vertex_2.x, vertex_1.x - vertex_0.x); + let vertices_z = vec3(vertex_0.z, vertex_1.z, vertex_2.z) / triangle_double_area; + let z_x = dot(vertices_z, w_x); + let z_y = dot(vertices_z, w_y); + + // Compute triangle bounding box + var min_x = floor(min3(vertex_0.x, vertex_1.x, vertex_2.x)); + var min_y = floor(min3(vertex_0.y, vertex_1.y, vertex_2.y)); + var max_x = ceil(max3(vertex_0.x, vertex_1.x, vertex_2.x)); + var max_y = ceil(max3(vertex_0.y, vertex_1.y, vertex_2.y)); + min_x = max(min_x, 0.0); + min_y = max(min_y, 0.0); + max_x = min(max_x, view.viewport.z - 1.0); + max_y = min(max_y, view.viewport.w - 1.0); + + // Setup initial triangle equations + let starting_pixel = vec2(min_x, min_y) + 0.5; + var w_row = vec3( + edge_function(vertex_1.xy, vertex_2.xy, starting_pixel), + edge_function(vertex_2.xy, vertex_0.xy, starting_pixel), + edge_function(vertex_0.xy, vertex_1.xy, starting_pixel), + ); + var z_row = dot(vertices_z, w_row); + + // Rasterize triangle + if subgroupAny(max_x - min_x > 4.0) { + // Scanline setup + let edge_012 = -w_x; + let open_edge = edge_012 < vec3(0.0); + let inverse_edge_012 = select(1.0 / edge_012, vec3(1e8), edge_012 == vec3(0.0)); + let max_x_diff = vec3(max_x - min_x); + for (var y = min_y; y <= max_y; y += 1.0) { + // Calculate start and end X interval for pixels in this row within the triangle + let cross_x = w_row * inverse_edge_012; + let min_x2 = select(vec3(0.0), cross_x, open_edge); + let max_x2 = select(cross_x, max_x_diff, open_edge); + var x0 = ceil(max3(min_x2[0], min_x2[1], min_x2[2])); + var x1 = min3(max_x2[0], max_x2[1], max_x2[2]); + + var w = w_row + w_x * x0; + var z = z_row + z_x * x0; + x0 += min_x; + x1 += min_x; + + // Iterate scanline X interval + for (var x = x0; x <= x1; x += 1.0) { + // Check if point at pixel is within triangle (TODO: this shouldn't be needed, but there's bugs without it) + if min3(w[0], w[1], w[2]) >= 0.0 { + write_visibility_buffer_pixel(x, y, z, packed_ids); + } + + // Increment triangle equations along the X-axis + w += w_x; + z += z_x; + } + + // Increment triangle equations along the Y-axis + w_row += w_y; + z_row += z_y; + } + } else { + // Iterate over every pixel in the triangle's bounding box + for (var y = min_y; y <= max_y; y += 1.0) { + var w = w_row; + var z = z_row; + + for (var x = min_x; x <= max_x; x += 1.0) { + // Check if point at pixel is within triangle + if min3(w[0], w[1], w[2]) >= 0.0 { + write_visibility_buffer_pixel(x, y, z, packed_ids); + } + + // Increment triangle equations along the X-axis + w += w_x; + z += z_x; + } + + // Increment triangle equations along the Y-axis + w_row += w_y; + z_row += z_y; + } + } +} + +fn write_visibility_buffer_pixel(x: f32, y: f32, z: f32, packed_ids: u32) { + let depth = bitcast(z); +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + let visibility = (u64(depth) << 32u) | u64(packed_ids); +#else + let visibility = depth; +#endif + textureAtomicMax(meshlet_visibility_buffer, vec2(u32(x), u32(y)), visibility); +} + +fn edge_function(a: vec2, b: vec2, c: vec2) -> f32 { + return (b.x - a.x) * (c.y - a.y) - (b.y - a.y) * (c.x - a.x); +} + +fn min3(a: f32, b: f32, c: f32) -> f32 { + return min(a, min(b, c)); +} + +fn max3(a: f32, b: f32, c: f32) -> f32 { + return max(a, max(b, c)); +} diff --git a/crates/libmarathon/src/render/pbr/mod.rs b/crates/libmarathon/src/render/pbr/mod.rs new file mode 100644 index 0000000..a329455 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/mod.rs @@ -0,0 +1,390 @@ +#![expect(missing_docs, reason = "Not all docs are written yet, see #3492.")] +#![cfg_attr(docsrs, feature(doc_cfg))] +#![forbid(unsafe_code)] +// Doc attributes removed - these need to be at crate level if needed + +extern crate alloc; + +#[cfg(feature = "meshlet")] +mod meshlet; +pub mod wireframe; + +/// Experimental features that are not yet finished. Please report any issues you encounter! +/// +/// Expect bugs, missing features, compatibility issues, low performance, and/or future breaking changes. +#[cfg(feature = "meshlet")] +pub mod experimental { + /// Render high-poly 3d meshes using an efficient GPU-driven method. + /// See [`MeshletPlugin`](meshlet::MeshletPlugin) and [`MeshletMesh`](meshlet::MeshletMesh) for details. + pub mod meshlet { + pub use crate::render::pbr::meshlet::*; + } +} + +mod atmosphere; +mod cluster; +mod components; +pub mod decal; +pub mod deferred; +mod extended_material; +mod fog; +mod light_probe; +mod lightmap; +mod material; +mod material_bind_groups; +mod mesh_material; +mod parallax; +mod pbr_material; +mod prepass; +mod render; +mod ssao; +mod ssr; +mod volumetric_fog; + +use bevy_color::{Color, LinearRgba}; + +pub use atmosphere::*; +use bevy_light::{ + AmbientLight, DirectionalLight, PointLight, ShadowFilteringMethod, SimulationLightSystems, + SpotLight, +}; +use bevy_shader::{load_shader_library, ShaderRef}; +pub use cluster::*; +pub use components::*; +pub use decal::clustered::ClusteredDecalPlugin; +pub use extended_material::*; +pub use fog::*; +pub use light_probe::*; +pub use lightmap::*; +pub use material::*; +pub use material_bind_groups::*; +pub use mesh_material::*; +pub use parallax::*; +pub use pbr_material::*; +pub use prepass::*; +pub use render::*; +pub use ssao::*; +pub use ssr::*; +pub use volumetric_fog::VolumetricFogPlugin; + +/// The PBR prelude. +/// +/// This includes the most common types in this crate, re-exported for your convenience. +pub mod prelude { + #[doc(hidden)] + pub use crate::render::pbr::{ + fog::{DistanceFog, FogFalloff}, + material::{Material, MaterialPlugin}, + mesh_material::MeshMaterial3d, + parallax::ParallaxMappingMethod, + pbr_material::StandardMaterial, + ssao::ScreenSpaceAmbientOcclusionPlugin, + }; +} + +pub mod graph { + use crate::render::render_graph::RenderLabel; + + /// Render graph nodes specific to 3D PBR rendering. + #[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)] + pub enum NodePbr { + /// Label for the shadow pass node that draws meshes that were visible + /// from the light last frame. + EarlyShadowPass, + /// Label for the shadow pass node that draws meshes that became visible + /// from the light this frame. + LateShadowPass, + /// Label for the screen space ambient occlusion render node. + ScreenSpaceAmbientOcclusion, + DeferredLightingPass, + /// Label for the volumetric lighting pass. + VolumetricFog, + /// Label for the shader that transforms and culls meshes that were + /// visible last frame. + EarlyGpuPreprocess, + /// Label for the shader that transforms and culls meshes that became + /// visible this frame. + LateGpuPreprocess, + /// Label for the screen space reflections pass. + ScreenSpaceReflections, + /// Label for the node that builds indirect draw parameters for meshes + /// that were visible last frame. + EarlyPrepassBuildIndirectParameters, + /// Label for the node that builds indirect draw parameters for meshes + /// that became visible this frame. + LatePrepassBuildIndirectParameters, + /// Label for the node that builds indirect draw parameters for the main + /// rendering pass, containing all meshes that are visible this frame. + MainBuildIndirectParameters, + ClearIndirectParametersMetadata, + } +} + +use crate::render::pbr::{deferred::DeferredPbrLightingPlugin, graph::NodePbr}; +use bevy_app::prelude::*; +use bevy_asset::{AssetApp, AssetPath, Assets, Handle, RenderAssetUsages}; +use crate::render::core_3d::graph::{Core3d, Node3d}; +use bevy_ecs::prelude::*; +#[cfg(feature = "bluenoise_texture")] +use bevy_image::{CompressedImageFormats, ImageType}; +use bevy_image::{Image, ImageSampler}; +use crate::render::{ + alpha::AlphaMode, + camera::sort_cameras, + extract_resource::ExtractResourcePlugin, + render_graph::RenderGraph, + render_resource::{ + Extent3d, TextureDataOrder, TextureDescriptor, TextureDimension, TextureFormat, + TextureUsages, + }, + sync_component::SyncComponentPlugin, + ExtractSchedule, Render, RenderApp, RenderDebugFlags, RenderStartup, RenderSystems, +}; + +use std::path::PathBuf; + +fn shader_ref(path: PathBuf) -> ShaderRef { + ShaderRef::Path(AssetPath::from_path_buf(path).with_source("embedded")) +} + +pub const TONEMAPPING_LUT_TEXTURE_BINDING_INDEX: u32 = 18; +pub const TONEMAPPING_LUT_SAMPLER_BINDING_INDEX: u32 = 19; + +/// Sets up the entire PBR infrastructure of bevy. +pub struct PbrPlugin { + /// Controls if the prepass is enabled for the [`StandardMaterial`]. + /// For more information about what a prepass is, see the [`bevy_core_pipeline::prepass`] docs. + pub prepass_enabled: bool, + /// Controls if [`DeferredPbrLightingPlugin`] is added. + pub add_default_deferred_lighting_plugin: bool, + /// Controls if GPU [`MeshUniform`] building is enabled. + /// + /// This requires compute shader support and so will be forcibly disabled if + /// the platform doesn't support those. + pub use_gpu_instance_buffer_builder: bool, + /// Debugging flags that can optionally be set when constructing the renderer. + pub debug_flags: RenderDebugFlags, +} + +impl Default for PbrPlugin { + fn default() -> Self { + Self { + prepass_enabled: true, + add_default_deferred_lighting_plugin: true, + use_gpu_instance_buffer_builder: true, + debug_flags: RenderDebugFlags::default(), + } + } +} + +/// A resource that stores the spatio-temporal blue noise texture. +#[derive(Resource)] +pub struct Bluenoise { + /// Texture handle for spatio-temporal blue noise + pub texture: Handle, +} + +impl Plugin for PbrPlugin { + fn build(&self, app: &mut App) { + load_shader_library!(app, "render/pbr_types.wgsl"); + load_shader_library!(app, "render/pbr_bindings.wgsl"); + load_shader_library!(app, "render/utils.wgsl"); + load_shader_library!(app, "render/clustered_forward.wgsl"); + load_shader_library!(app, "render/pbr_lighting.wgsl"); + load_shader_library!(app, "render/pbr_transmission.wgsl"); + load_shader_library!(app, "render/shadows.wgsl"); + load_shader_library!(app, "deferred/pbr_deferred_types.wgsl"); + load_shader_library!(app, "deferred/pbr_deferred_functions.wgsl"); + load_shader_library!(app, "render/shadow_sampling.wgsl"); + load_shader_library!(app, "render/pbr_functions.wgsl"); + load_shader_library!(app, "render/rgb9e5.wgsl"); + load_shader_library!(app, "render/pbr_ambient.wgsl"); + load_shader_library!(app, "render/pbr_fragment.wgsl"); + load_shader_library!(app, "render/pbr.wgsl"); + load_shader_library!(app, "render/pbr_prepass_functions.wgsl"); + load_shader_library!(app, "render/pbr_prepass.wgsl"); + load_shader_library!(app, "render/parallax_mapping.wgsl"); + load_shader_library!(app, "render/view_transformations.wgsl"); + + // Setup dummy shaders for when MeshletPlugin is not used to prevent shader import errors. + load_shader_library!(app, "meshlet/dummy_visibility_buffer_resolve.wgsl"); + + app.register_asset_reflect::() + .init_resource::() + .add_plugins(( + MeshRenderPlugin { + use_gpu_instance_buffer_builder: self.use_gpu_instance_buffer_builder, + debug_flags: self.debug_flags, + }, + MaterialsPlugin { + debug_flags: self.debug_flags, + }, + MaterialPlugin:: { + prepass_enabled: self.prepass_enabled, + debug_flags: self.debug_flags, + ..Default::default() + }, + ScreenSpaceAmbientOcclusionPlugin, + FogPlugin, + ExtractResourcePlugin::::default(), + SyncComponentPlugin::::default(), + LightmapPlugin, + LightProbePlugin, + GpuMeshPreprocessPlugin { + use_gpu_instance_buffer_builder: self.use_gpu_instance_buffer_builder, + }, + VolumetricFogPlugin, + ScreenSpaceReflectionsPlugin, + ClusteredDecalPlugin, + )) + .add_plugins(( + decal::ForwardDecalPlugin, + SyncComponentPlugin::::default(), + SyncComponentPlugin::::default(), + SyncComponentPlugin::::default(), + SyncComponentPlugin::::default(), + )) + .add_plugins(AtmospherePlugin) + .configure_sets( + PostUpdate, + ( + SimulationLightSystems::AddClusters, + SimulationLightSystems::AssignLightsToClusters, + ) + .chain(), + ); + + if self.add_default_deferred_lighting_plugin { + app.add_plugins(DeferredPbrLightingPlugin); + } + + // Initialize the default material handle. + app.world_mut() + .resource_mut::>() + .insert( + &Handle::::default(), + StandardMaterial { + base_color: Color::srgb(1.0, 0.0, 0.5), + ..Default::default() + }, + ) + .unwrap(); + + let has_bluenoise = app + .get_sub_app(RenderApp) + .is_some_and(|render_app| render_app.world().is_resource_added::()); + + if !has_bluenoise { + let mut images = app.world_mut().resource_mut::>(); + #[cfg(feature = "bluenoise_texture")] + let handle = { + let image = Image::from_buffer( + include_bytes!("bluenoise/stbn.ktx2"), + ImageType::Extension("ktx2"), + CompressedImageFormats::NONE, + false, + ImageSampler::Default, + RenderAssetUsages::RENDER_WORLD, + ) + .expect("Failed to decode embedded blue-noise texture"); + images.add(image) + }; + + #[cfg(not(feature = "bluenoise_texture"))] + let handle = { images.add(stbn_placeholder()) }; + + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + render_app + .world_mut() + .insert_resource(Bluenoise { texture: handle }); + } + } + + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + // Extract the required data from the main world + render_app + .add_systems( + RenderStartup, + ( + init_shadow_samplers, + init_global_clusterable_object_meta, + init_fallback_bindless_resources, + ), + ) + .add_systems( + ExtractSchedule, + ( + extract_clusters, + extract_lights, + extract_ambient_light_resource, + extract_ambient_light, + extract_shadow_filtering_method, + late_sweep_material_instances, + ), + ) + .add_systems( + Render, + ( + prepare_lights + .in_set(RenderSystems::ManageViews) + .after(sort_cameras), + prepare_clusters.in_set(RenderSystems::PrepareResources), + ), + ) + .init_resource::() + .init_resource::(); + + render_app.world_mut().add_observer(add_light_view_entities); + render_app + .world_mut() + .add_observer(remove_light_view_entities); + render_app.world_mut().add_observer(extracted_light_removed); + + let early_shadow_pass_node = EarlyShadowPassNode::from_world(render_app.world_mut()); + let late_shadow_pass_node = LateShadowPassNode::from_world(render_app.world_mut()); + let mut graph = render_app.world_mut().resource_mut::(); + let draw_3d_graph = graph.get_sub_graph_mut(Core3d).unwrap(); + draw_3d_graph.add_node(NodePbr::EarlyShadowPass, early_shadow_pass_node); + draw_3d_graph.add_node(NodePbr::LateShadowPass, late_shadow_pass_node); + draw_3d_graph.add_node_edges(( + NodePbr::EarlyShadowPass, + NodePbr::LateShadowPass, + Node3d::StartMainPass, + )); + } + + fn finish(&self, app: &mut App) { + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + let global_cluster_settings = make_global_cluster_settings(render_app.world()); + app.insert_resource(global_cluster_settings); + } +} + +pub fn stbn_placeholder() -> Image { + let format = TextureFormat::Rgba8Unorm; + let data = vec![255, 0, 255, 255]; + Image { + data: Some(data), + data_order: TextureDataOrder::default(), + texture_descriptor: TextureDescriptor { + size: Extent3d::default(), + format, + dimension: TextureDimension::D2, + label: None, + mip_level_count: 1, + sample_count: 1, + usage: TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }, + sampler: ImageSampler::Default, + texture_view_descriptor: None, + asset_usage: RenderAssetUsages::RENDER_WORLD, + copy_on_resize: false, + } +} diff --git a/crates/libmarathon/src/render/pbr/parallax.rs b/crates/libmarathon/src/render/pbr/parallax.rs new file mode 100644 index 0000000..be588ca --- /dev/null +++ b/crates/libmarathon/src/render/pbr/parallax.rs @@ -0,0 +1,47 @@ +use bevy_reflect::{std_traits::ReflectDefault, Reflect}; + +/// The [parallax mapping] method to use to compute depth based on the +/// material's [`depth_map`]. +/// +/// Parallax Mapping uses a depth map texture to give the illusion of depth +/// variation on a mesh surface that is geometrically flat. +/// +/// See the `parallax_mapping.wgsl` shader code for implementation details +/// and explanation of the methods used. +/// +/// [`depth_map`]: crate::StandardMaterial::depth_map +/// [parallax mapping]: https://en.wikipedia.org/wiki/Parallax_mapping +#[derive(Debug, Copy, Clone, PartialEq, Eq, Default, Reflect)] +#[reflect(Default, Clone, PartialEq)] +pub enum ParallaxMappingMethod { + /// A simple linear interpolation, using a single texture sample. + /// + /// This method is named "Parallax Occlusion Mapping". + /// + /// Unlike [`ParallaxMappingMethod::Relief`], only requires a single lookup, + /// but may skip small details and result in writhing material artifacts. + #[default] + Occlusion, + /// Discovers the best depth value based on binary search. + /// + /// Each iteration incurs a texture sample. + /// The result has fewer visual artifacts than [`ParallaxMappingMethod::Occlusion`]. + /// + /// This method is named "Relief Mapping". + Relief { + /// How many additional steps to use at most to find the depth value. + max_steps: u32, + }, +} + +impl ParallaxMappingMethod { + /// [`ParallaxMappingMethod::Relief`] with a 5 steps, a reasonable default. + pub const DEFAULT_RELIEF_MAPPING: Self = ParallaxMappingMethod::Relief { max_steps: 5 }; + + pub(crate) fn max_steps(&self) -> u32 { + match self { + ParallaxMappingMethod::Occlusion => 0, + ParallaxMappingMethod::Relief { max_steps } => *max_steps, + } + } +} diff --git a/crates/libmarathon/src/render/pbr/pbr_material.rs b/crates/libmarathon/src/render/pbr/pbr_material.rs new file mode 100644 index 0000000..c16a441 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/pbr_material.rs @@ -0,0 +1,1554 @@ +use bevy_asset::Asset; +use bevy_color::{Alpha, ColorToComponents}; +use bevy_math::{Affine2, Affine3, Mat2, Mat3, Vec2, Vec3, Vec4}; +use bevy_mesh::MeshVertexBufferLayoutRef; +use bevy_reflect::{std_traits::ReflectDefault, Reflect}; +use crate::render::{render_asset::RenderAssets, render_resource::*, texture::GpuImage}; +use bitflags::bitflags; + +use crate::render::pbr::{deferred::DEFAULT_PBR_DEFERRED_LIGHTING_PASS_ID, *}; + +/// An enum to define which UV attribute to use for a texture. +/// +/// It is used for every texture in the [`StandardMaterial`]. +/// It only supports two UV attributes, [`bevy_mesh::Mesh::ATTRIBUTE_UV_0`] and +/// [`bevy_mesh::Mesh::ATTRIBUTE_UV_1`]. +/// The default is [`UvChannel::Uv0`]. +#[derive(Reflect, Default, Debug, Clone, PartialEq, Eq)] +#[reflect(Default, Debug, Clone, PartialEq)] +pub enum UvChannel { + #[default] + Uv0, + Uv1, +} + +/// A material with "standard" properties used in PBR lighting. +/// Standard property values with pictures here: +/// . +/// +/// May be created directly from a [`Color`] or an [`Image`]. +#[derive(Asset, AsBindGroup, Reflect, Debug, Clone)] +#[bind_group_data(StandardMaterialKey)] +#[data(0, StandardMaterialUniform, binding_array(10))] +#[bindless(index_table(range(0..31)))] +#[reflect(Default, Debug, Clone)] +pub struct StandardMaterial { + /// The color of the surface of the material before lighting. + /// + /// Doubles as diffuse albedo for non-metallic, specular for metallic and a mix for everything + /// in between. If used together with a `base_color_texture`, this is factored into the final + /// base color as `base_color * base_color_texture_value`. + /// + /// Defaults to [`Color::WHITE`]. + pub base_color: Color, + + /// The UV channel to use for the [`StandardMaterial::base_color_texture`]. + /// + /// Defaults to [`UvChannel::Uv0`]. + pub base_color_channel: UvChannel, + + /// The texture component of the material's color before lighting. + /// The actual pre-lighting color is `base_color * this_texture`. + /// + /// See [`base_color`] for details. + /// + /// You should set `base_color` to [`Color::WHITE`] (the default) + /// if you want the texture to show as-is. + /// + /// Setting `base_color` to something else than white will tint + /// the texture. For example, setting `base_color` to pure red will + /// tint the texture red. + /// + /// [`base_color`]: StandardMaterial::base_color + #[texture(1)] + #[sampler(2)] + #[dependency] + pub base_color_texture: Option>, + + // Use a color for user friendliness even though we technically don't use the alpha channel + // Might be used in the future for exposure correction in HDR + /// Color the material "emits" to the camera. + /// + /// This is typically used for monitor screens or LED lights. + /// Anything that can be visible even in darkness. + /// + /// The emissive color is added to what would otherwise be the material's visible color. + /// This means that for a light emissive value, in darkness, + /// you will mostly see the emissive component. + /// + /// The default emissive color is [`LinearRgba::BLACK`], which doesn't add anything to the material color. + /// + /// Emissive strength is controlled by the value of the color channels, + /// while the hue is controlled by their relative values. + /// + /// As a result, channel values for `emissive` + /// colors can exceed `1.0`. For instance, a `base_color` of + /// `LinearRgba::rgb(1.0, 0.0, 0.0)` represents the brightest + /// red for objects that reflect light, but an emissive color + /// like `LinearRgba::rgb(1000.0, 0.0, 0.0)` can be used to create + /// intensely bright red emissive effects. + /// + /// This results in a final luminance value when multiplied + /// by the value of the greyscale emissive texture (which ranges from 0 for black to 1 for white). + /// Luminance is a measure of the amount of light emitted per unit area, + /// and can be thought of as the "brightness" of the effect. + /// In Bevy, we treat these luminance values as the physical units of cd/m², aka nits. + /// + /// Increasing the emissive strength of the color will impact visual effects + /// like bloom, but it's important to note that **an emissive material won't + /// typically light up surrounding areas like a light source**, + /// it just adds a value to the color seen on screen. + pub emissive: LinearRgba, + + /// The weight in which the camera exposure influences the emissive color. + /// A value of `0.0` means the emissive color is not affected by the camera exposure. + /// In opposition, a value of `1.0` means the emissive color is multiplied by the camera exposure. + /// + /// Defaults to `0.0` + pub emissive_exposure_weight: f32, + + /// The UV channel to use for the [`StandardMaterial::emissive_texture`]. + /// + /// Defaults to [`UvChannel::Uv0`]. + pub emissive_channel: UvChannel, + + /// The emissive map, multiplies pixels with [`emissive`] + /// to get the final "emitting" color of a surface. + /// + /// This color is multiplied by [`emissive`] to get the final emitted color. + /// Meaning that you should set [`emissive`] to [`Color::WHITE`] + /// if you want to use the full range of color of the emissive texture. + /// + /// [`emissive`]: StandardMaterial::emissive + #[texture(3)] + #[sampler(4)] + #[dependency] + pub emissive_texture: Option>, + + /// Linear perceptual roughness, clamped to `[0.089, 1.0]` in the shader. + /// + /// Defaults to `0.5`. + /// + /// Low values result in a "glossy" material with specular highlights, + /// while values close to `1` result in rough materials. + /// + /// If used together with a roughness/metallic texture, this is factored into the final base + /// color as `roughness * roughness_texture_value`. + /// + /// 0.089 is the minimum floating point value that won't be rounded down to 0 in the + /// calculations used. + // Technically for 32-bit floats, 0.045 could be used. + // See + pub perceptual_roughness: f32, + + /// How "metallic" the material appears, within `[0.0, 1.0]`. + /// + /// This should be set to 0.0 for dielectric materials or 1.0 for metallic materials. + /// For a hybrid surface such as corroded metal, you may need to use in-between values. + /// + /// Defaults to `0.00`, for dielectric. + /// + /// If used together with a roughness/metallic texture, this is factored into the final base + /// color as `metallic * metallic_texture_value`. + pub metallic: f32, + + /// The UV channel to use for the [`StandardMaterial::metallic_roughness_texture`]. + /// + /// Defaults to [`UvChannel::Uv0`]. + pub metallic_roughness_channel: UvChannel, + + /// Metallic and roughness maps, stored as a single texture. + /// + /// The blue channel contains metallic values, + /// and the green channel contains the roughness values. + /// Other channels are unused. + /// + /// Those values are multiplied by the scalar ones of the material, + /// see [`metallic`] and [`perceptual_roughness`] for details. + /// + /// Note that with the default values of [`metallic`] and [`perceptual_roughness`], + /// setting this texture has no effect. If you want to exclusively use the + /// `metallic_roughness_texture` values for your material, make sure to set [`metallic`] + /// and [`perceptual_roughness`] to `1.0`. + /// + /// [`metallic`]: StandardMaterial::metallic + /// [`perceptual_roughness`]: StandardMaterial::perceptual_roughness + #[texture(5)] + #[sampler(6)] + #[dependency] + pub metallic_roughness_texture: Option>, + + /// Specular intensity for non-metals on a linear scale of `[0.0, 1.0]`. + /// + /// Use the value as a way to control the intensity of the + /// specular highlight of the material, i.e. how reflective is the material, + /// rather than the physical property "reflectance." + /// + /// Set to `0.0`, no specular highlight is visible, the highlight is strongest + /// when `reflectance` is set to `1.0`. + /// + /// Defaults to `0.5` which is mapped to 4% reflectance in the shader. + #[doc(alias = "specular_intensity")] + pub reflectance: f32, + + /// A color with which to modulate the [`StandardMaterial::reflectance`] for + /// non-metals. + /// + /// The specular highlights and reflection are tinted with this color. Note + /// that it has no effect for non-metals. + /// + /// This feature is currently unsupported in the deferred rendering path, in + /// order to reduce the size of the geometry buffers. + /// + /// Defaults to [`Color::WHITE`]. + #[doc(alias = "specular_color")] + pub specular_tint: Color, + + /// The amount of light transmitted _diffusely_ through the material (i.e. “translucency”). + /// + /// Implemented as a second, flipped [Lambertian diffuse](https://en.wikipedia.org/wiki/Lambertian_reflectance) lobe, + /// which provides an inexpensive but plausible approximation of translucency for thin dielectric objects (e.g. paper, + /// leaves, some fabrics) or thicker volumetric materials with short scattering distances (e.g. porcelain, wax). + /// + /// For specular transmission usecases with refraction (e.g. glass) use the [`StandardMaterial::specular_transmission`] and + /// [`StandardMaterial::ior`] properties instead. + /// + /// - When set to `0.0` (the default) no diffuse light is transmitted; + /// - When set to `1.0` all diffuse light is transmitted through the material; + /// - Values higher than `0.5` will cause more diffuse light to be transmitted than reflected, resulting in a “darker” + /// appearance on the side facing the light than the opposite side. (e.g. plant leaves) + /// + /// ## Notes + /// + /// - The material's [`StandardMaterial::base_color`] also modulates the transmitted light; + /// - To receive transmitted shadows on the diffuse transmission lobe (i.e. the “backside”) of the material, + /// use the [`TransmittedShadowReceiver`](bevy_light::TransmittedShadowReceiver) component. + #[doc(alias = "translucency")] + pub diffuse_transmission: f32, + + /// The UV channel to use for the [`StandardMaterial::diffuse_transmission_texture`]. + /// + /// Defaults to [`UvChannel::Uv0`]. + #[cfg(feature = "pbr_transmission_textures")] + pub diffuse_transmission_channel: UvChannel, + + /// A map that modulates diffuse transmission via its alpha channel. Multiplied by [`StandardMaterial::diffuse_transmission`] + /// to obtain the final result. + /// + /// **Important:** The [`StandardMaterial::diffuse_transmission`] property must be set to a value higher than 0.0, + /// or this texture won't have any effect. + #[cfg_attr(feature = "pbr_transmission_textures", texture(19))] + #[cfg_attr(feature = "pbr_transmission_textures", sampler(20))] + #[cfg(feature = "pbr_transmission_textures")] + pub diffuse_transmission_texture: Option>, + + /// The amount of light transmitted _specularly_ through the material (i.e. via refraction). + /// + /// - When set to `0.0` (the default) no light is transmitted. + /// - When set to `1.0` all light is transmitted through the material. + /// + /// The material's [`StandardMaterial::base_color`] also modulates the transmitted light. + /// + /// **Note:** Typically used in conjunction with [`StandardMaterial::thickness`], [`StandardMaterial::ior`] and [`StandardMaterial::perceptual_roughness`]. + /// + /// ## Performance + /// + /// Specular transmission is implemented as a relatively expensive screen-space effect that allows occluded objects to be seen through the material, + /// with distortion and blur effects. + /// + /// - [`Camera3d::screen_space_specular_transmission_steps`](bevy_camera::Camera3d::screen_space_specular_transmission_steps) can be used to enable transmissive objects + /// to be seen through other transmissive objects, at the cost of additional draw calls and texture copies; (Use with caution!) + /// - If a simplified approximation of specular transmission using only environment map lighting is sufficient, consider setting + /// [`Camera3d::screen_space_specular_transmission_steps`](bevy_camera::Camera3d::screen_space_specular_transmission_steps) to `0`. + /// - If purely diffuse light transmission is needed, (i.e. “translucency”) consider using [`StandardMaterial::diffuse_transmission`] instead, + /// for a much less expensive effect. + /// - Specular transmission is rendered before alpha blending, so any material with [`AlphaMode::Blend`], [`AlphaMode::Premultiplied`], [`AlphaMode::Add`] or [`AlphaMode::Multiply`] + /// won't be visible through specular transmissive materials. + #[doc(alias = "refraction")] + pub specular_transmission: f32, + + /// The UV channel to use for the [`StandardMaterial::specular_transmission_texture`]. + /// + /// Defaults to [`UvChannel::Uv0`]. + #[cfg(feature = "pbr_transmission_textures")] + pub specular_transmission_channel: UvChannel, + + /// A map that modulates specular transmission via its red channel. Multiplied by [`StandardMaterial::specular_transmission`] + /// to obtain the final result. + /// + /// **Important:** The [`StandardMaterial::specular_transmission`] property must be set to a value higher than 0.0, + /// or this texture won't have any effect. + #[cfg_attr(feature = "pbr_transmission_textures", texture(15))] + #[cfg_attr(feature = "pbr_transmission_textures", sampler(16))] + #[cfg(feature = "pbr_transmission_textures")] + pub specular_transmission_texture: Option>, + + /// Thickness of the volume beneath the material surface. + /// + /// When set to `0.0` (the default) the material appears as an infinitely-thin film, + /// transmitting light without distorting it. + /// + /// When set to any other value, the material distorts light like a thick lens. + /// + /// **Note:** Typically used in conjunction with [`StandardMaterial::specular_transmission`] and [`StandardMaterial::ior`], or with + /// [`StandardMaterial::diffuse_transmission`]. + #[doc(alias = "volume")] + #[doc(alias = "thin_walled")] + pub thickness: f32, + + /// The UV channel to use for the [`StandardMaterial::thickness_texture`]. + /// + /// Defaults to [`UvChannel::Uv0`]. + #[cfg(feature = "pbr_transmission_textures")] + pub thickness_channel: UvChannel, + + /// A map that modulates thickness via its green channel. Multiplied by [`StandardMaterial::thickness`] + /// to obtain the final result. + /// + /// **Important:** The [`StandardMaterial::thickness`] property must be set to a value higher than 0.0, + /// or this texture won't have any effect. + #[cfg_attr(feature = "pbr_transmission_textures", texture(17))] + #[cfg_attr(feature = "pbr_transmission_textures", sampler(18))] + #[cfg(feature = "pbr_transmission_textures")] + pub thickness_texture: Option>, + + /// The [index of refraction](https://en.wikipedia.org/wiki/Refractive_index) of the material. + /// + /// Defaults to 1.5. + /// + /// | Material | Index of Refraction | + /// |:----------------|:---------------------| + /// | Vacuum | 1 | + /// | Air | 1.00 | + /// | Ice | 1.31 | + /// | Water | 1.33 | + /// | Eyes | 1.38 | + /// | Quartz | 1.46 | + /// | Olive Oil | 1.47 | + /// | Honey | 1.49 | + /// | Acrylic | 1.49 | + /// | Window Glass | 1.52 | + /// | Polycarbonate | 1.58 | + /// | Flint Glass | 1.69 | + /// | Ruby | 1.71 | + /// | Glycerine | 1.74 | + /// | Sapphire | 1.77 | + /// | Cubic Zirconia | 2.15 | + /// | Diamond | 2.42 | + /// | Moissanite | 2.65 | + /// + /// **Note:** Typically used in conjunction with [`StandardMaterial::specular_transmission`] and [`StandardMaterial::thickness`]. + #[doc(alias = "index_of_refraction")] + #[doc(alias = "refraction_index")] + #[doc(alias = "refractive_index")] + pub ior: f32, + + /// How far, on average, light travels through the volume beneath the material's + /// surface before being absorbed. + /// + /// Defaults to [`f32::INFINITY`], i.e. light is never absorbed. + /// + /// **Note:** To have any effect, must be used in conjunction with: + /// - [`StandardMaterial::attenuation_color`]; + /// - [`StandardMaterial::thickness`]; + /// - [`StandardMaterial::diffuse_transmission`] or [`StandardMaterial::specular_transmission`]. + #[doc(alias = "absorption_distance")] + #[doc(alias = "extinction_distance")] + pub attenuation_distance: f32, + + /// The resulting (non-absorbed) color after white light travels through the attenuation distance. + /// + /// Defaults to [`Color::WHITE`], i.e. no change. + /// + /// **Note:** To have any effect, must be used in conjunction with: + /// - [`StandardMaterial::attenuation_distance`]; + /// - [`StandardMaterial::thickness`]; + /// - [`StandardMaterial::diffuse_transmission`] or [`StandardMaterial::specular_transmission`]. + #[doc(alias = "absorption_color")] + #[doc(alias = "extinction_color")] + pub attenuation_color: Color, + + /// The UV channel to use for the [`StandardMaterial::normal_map_texture`]. + /// + /// Defaults to [`UvChannel::Uv0`]. + pub normal_map_channel: UvChannel, + + /// Used to fake the lighting of bumps and dents on a material. + /// + /// A typical usage would be faking cobblestones on a flat plane mesh in 3D. + /// + /// # Notes + /// + /// Normal mapping with `StandardMaterial` and the core bevy PBR shaders requires: + /// - A normal map texture + /// - Vertex UVs + /// - Vertex tangents + /// - Vertex normals + /// + /// Tangents do not have to be stored in your model, + /// they can be generated using the [`Mesh::generate_tangents`] or + /// [`Mesh::with_generated_tangents`] methods. + /// If your material has a normal map, but still renders as a flat surface, + /// make sure your meshes have their tangents set. + /// + /// [`Mesh::generate_tangents`]: bevy_mesh::Mesh::generate_tangents + /// [`Mesh::with_generated_tangents`]: bevy_mesh::Mesh::with_generated_tangents + /// + /// # Usage + /// + /// ``` + /// # use bevy_asset::{AssetServer, Handle}; + /// # use bevy_ecs::change_detection::Res; + /// # use bevy_image::{Image, ImageLoaderSettings}; + /// # + /// fn load_normal_map(asset_server: Res) { + /// let normal_handle: Handle = asset_server.load_with_settings( + /// "textures/parallax_example/cube_normal.png", + /// // The normal map texture is in linear color space. Lighting won't look correct + /// // if `is_srgb` is `true`, which is the default. + /// |settings: &mut ImageLoaderSettings| settings.is_srgb = false, + /// ); + /// } + /// ``` + #[texture(9)] + #[sampler(10)] + #[dependency] + pub normal_map_texture: Option>, + + /// Normal map textures authored for DirectX have their y-component flipped. Set this to flip + /// it to right-handed conventions. + pub flip_normal_map_y: bool, + + /// The UV channel to use for the [`StandardMaterial::occlusion_texture`]. + /// + /// Defaults to [`UvChannel::Uv0`]. + pub occlusion_channel: UvChannel, + + /// Specifies the level of exposure to ambient light. + /// + /// This is usually generated and stored automatically ("baked") by 3D-modeling software. + /// + /// Typically, steep concave parts of a model (such as the armpit of a shirt) are darker, + /// because they have little exposure to light. + /// An occlusion map specifies those parts of the model that light doesn't reach well. + /// + /// The material will be less lit in places where this texture is dark. + /// This is similar to ambient occlusion, but built into the model. + #[texture(7)] + #[sampler(8)] + #[dependency] + pub occlusion_texture: Option>, + + /// The UV channel to use for the [`StandardMaterial::specular_texture`]. + /// + /// Defaults to [`UvChannel::Uv0`]. + #[cfg(feature = "pbr_specular_textures")] + pub specular_channel: UvChannel, + + /// A map that specifies reflectance for non-metallic materials. + /// + /// Alpha values from [0.0, 1.0] in this texture are linearly mapped to + /// reflectance values of [0.0, 0.5] and multiplied by the constant + /// [`StandardMaterial::reflectance`] value. This follows the + /// `KHR_materials_specular` specification. The map will have no effect if + /// the material is fully metallic. + /// + /// When using this map, you may wish to set the + /// [`StandardMaterial::reflectance`] value to 2.0 so that this map can + /// express the full [0.0, 1.0] range of values. + /// + /// Note that, because the reflectance is stored in the alpha channel, and + /// the [`StandardMaterial::specular_tint_texture`] has no alpha value, it + /// may be desirable to pack the values together and supply the same + /// texture to both fields. + #[cfg_attr(feature = "pbr_specular_textures", texture(27))] + #[cfg_attr(feature = "pbr_specular_textures", sampler(28))] + #[cfg(feature = "pbr_specular_textures")] + pub specular_texture: Option>, + + /// The UV channel to use for the + /// [`StandardMaterial::specular_tint_texture`]. + /// + /// Defaults to [`UvChannel::Uv0`]. + #[cfg(feature = "pbr_specular_textures")] + pub specular_tint_channel: UvChannel, + + /// A map that specifies color adjustment to be applied to the specular + /// reflection for non-metallic materials. + /// + /// The RGB values of this texture modulate the + /// [`StandardMaterial::specular_tint`] value. See the documentation for + /// that field for more information. + /// + /// Like the fixed specular tint value, this texture map isn't supported in + /// the deferred renderer. + #[cfg_attr(feature = "pbr_specular_textures", texture(29))] + #[cfg_attr(feature = "pbr_specular_textures", sampler(30))] + #[cfg(feature = "pbr_specular_textures")] + pub specular_tint_texture: Option>, + + /// An extra thin translucent layer on top of the main PBR layer. This is + /// typically used for painted surfaces. + /// + /// This value specifies the strength of the layer, which affects how + /// visible the clearcoat layer will be. + /// + /// Defaults to zero, specifying no clearcoat layer. + pub clearcoat: f32, + + /// The UV channel to use for the [`StandardMaterial::clearcoat_texture`]. + /// + /// Defaults to [`UvChannel::Uv0`]. + #[cfg(feature = "pbr_multi_layer_material_textures")] + pub clearcoat_channel: UvChannel, + + /// An image texture that specifies the strength of the clearcoat layer in + /// the red channel. Values sampled from this texture are multiplied by the + /// main [`StandardMaterial::clearcoat`] factor. + /// + /// As this is a non-color map, it must not be loaded as sRGB. + #[cfg_attr(feature = "pbr_multi_layer_material_textures", texture(21))] + #[cfg_attr(feature = "pbr_multi_layer_material_textures", sampler(22))] + #[cfg(feature = "pbr_multi_layer_material_textures")] + pub clearcoat_texture: Option>, + + /// The roughness of the clearcoat material. This is specified in exactly + /// the same way as the [`StandardMaterial::perceptual_roughness`]. + /// + /// If the [`StandardMaterial::clearcoat`] value if zero, this has no + /// effect. + /// + /// Defaults to 0.5. + pub clearcoat_perceptual_roughness: f32, + + /// The UV channel to use for the [`StandardMaterial::clearcoat_roughness_texture`]. + /// + /// Defaults to [`UvChannel::Uv0`]. + #[cfg(feature = "pbr_multi_layer_material_textures")] + pub clearcoat_roughness_channel: UvChannel, + + /// An image texture that specifies the roughness of the clearcoat level in + /// the green channel. Values from this texture are multiplied by the main + /// [`StandardMaterial::clearcoat_perceptual_roughness`] factor. + /// + /// As this is a non-color map, it must not be loaded as sRGB. + #[cfg_attr(feature = "pbr_multi_layer_material_textures", texture(23))] + #[cfg_attr(feature = "pbr_multi_layer_material_textures", sampler(24))] + #[cfg(feature = "pbr_multi_layer_material_textures")] + pub clearcoat_roughness_texture: Option>, + + /// The UV channel to use for the [`StandardMaterial::clearcoat_normal_texture`]. + /// + /// Defaults to [`UvChannel::Uv0`]. + #[cfg(feature = "pbr_multi_layer_material_textures")] + pub clearcoat_normal_channel: UvChannel, + + /// An image texture that specifies a normal map that is to be applied to + /// the clearcoat layer. This can be used to simulate, for example, + /// scratches on an outer layer of varnish. Normal maps are in the same + /// format as [`StandardMaterial::normal_map_texture`]. + /// + /// Note that, if a clearcoat normal map isn't specified, the main normal + /// map, if any, won't be applied to the clearcoat. If you want a normal map + /// that applies to both the main material and to the clearcoat, specify it + /// in both [`StandardMaterial::normal_map_texture`] and this field. + /// + /// As this is a non-color map, it must not be loaded as sRGB. + #[cfg_attr(feature = "pbr_multi_layer_material_textures", texture(25))] + #[cfg_attr(feature = "pbr_multi_layer_material_textures", sampler(26))] + #[cfg(feature = "pbr_multi_layer_material_textures")] + pub clearcoat_normal_texture: Option>, + + /// Increases the roughness along a specific direction, so that the specular + /// highlight will be stretched instead of being a circular lobe. + /// + /// This value ranges from 0 (perfectly circular) to 1 (maximally + /// stretched). The default direction (corresponding to a + /// [`StandardMaterial::anisotropy_rotation`] of 0) aligns with the + /// *tangent* of the mesh; thus mesh tangents must be specified in order for + /// this parameter to have any meaning. The direction can be changed using + /// the [`StandardMaterial::anisotropy_rotation`] parameter. + /// + /// This is typically used for modeling surfaces such as brushed metal and + /// hair, in which one direction of the surface but not the other is smooth. + /// + /// See the [`KHR_materials_anisotropy` specification] for more details. + /// + /// [`KHR_materials_anisotropy` specification]: + /// https://github.com/KhronosGroup/glTF/blob/main/extensions/2.0/Khronos/KHR_materials_anisotropy/README.md + pub anisotropy_strength: f32, + + /// The direction of increased roughness, in radians relative to the mesh + /// tangent. + /// + /// This parameter causes the roughness to vary according to the + /// [`StandardMaterial::anisotropy_strength`]. The rotation is applied in + /// tangent-bitangent space; thus, mesh tangents must be present for this + /// parameter to have any meaning. + /// + /// This parameter has no effect if + /// [`StandardMaterial::anisotropy_strength`] is zero. Its value can + /// optionally be adjusted across the mesh with the + /// [`StandardMaterial::anisotropy_texture`]. + /// + /// See the [`KHR_materials_anisotropy` specification] for more details. + /// + /// [`KHR_materials_anisotropy` specification]: + /// https://github.com/KhronosGroup/glTF/blob/main/extensions/2.0/Khronos/KHR_materials_anisotropy/README.md + pub anisotropy_rotation: f32, + + /// The UV channel to use for the [`StandardMaterial::anisotropy_texture`]. + /// + /// Defaults to [`UvChannel::Uv0`]. + #[cfg(feature = "pbr_anisotropy_texture")] + pub anisotropy_channel: UvChannel, + + /// An image texture that allows the + /// [`StandardMaterial::anisotropy_strength`] and + /// [`StandardMaterial::anisotropy_rotation`] to vary across the mesh. + /// + /// The [`KHR_materials_anisotropy` specification] defines the format that + /// this texture must take. To summarize: the direction vector is encoded in + /// the red and green channels, while the strength is encoded in the blue + /// channels. For the direction vector, the red and green channels map the + /// color range [0, 1] to the vector range [-1, 1]. The direction vector + /// encoded in this texture modifies the default rotation direction in + /// tangent-bitangent space, before the + /// [`StandardMaterial::anisotropy_rotation`] parameter is applied. The + /// value in the blue channel is multiplied by the + /// [`StandardMaterial::anisotropy_strength`] value to produce the final + /// anisotropy strength. + /// + /// As the texel values don't represent colors, this texture must be in + /// linear color space, not sRGB. + /// + /// [`KHR_materials_anisotropy` specification]: + /// https://github.com/KhronosGroup/glTF/blob/main/extensions/2.0/Khronos/KHR_materials_anisotropy/README.md + #[cfg_attr(feature = "pbr_anisotropy_texture", texture(13))] + #[cfg_attr(feature = "pbr_anisotropy_texture", sampler(14))] + #[cfg(feature = "pbr_anisotropy_texture")] + pub anisotropy_texture: Option>, + + /// Support two-sided lighting by automatically flipping the normals for "back" faces + /// within the PBR lighting shader. + /// + /// Defaults to `false`. + /// This does not automatically configure backface culling, + /// which can be done via `cull_mode`. + pub double_sided: bool, + + /// Whether to cull the "front", "back" or neither side of a mesh. + /// If set to `None`, the two sides of the mesh are visible. + /// + /// Defaults to `Some(Face::Back)`. + /// In bevy, the order of declaration of a triangle's vertices + /// in [`Mesh`] defines the triangle's front face. + /// + /// When a triangle is in a viewport, + /// if its vertices appear counter-clockwise from the viewport's perspective, + /// then the viewport is seeing the triangle's front face. + /// Conversely, if the vertices appear clockwise, you are seeing the back face. + /// + /// In short, in bevy, front faces winds counter-clockwise. + /// + /// Your 3D editing software should manage all of that. + /// + /// [`Mesh`]: bevy_mesh::Mesh + // TODO: include this in reflection somehow (maybe via remote types like serde https://serde.rs/remote-derive.html) + #[reflect(ignore, clone)] + pub cull_mode: Option, + + /// Whether to apply only the base color to this material. + /// + /// Normals, occlusion textures, roughness, metallic, reflectance, emissive, + /// shadows, alpha mode and ambient light are ignored if this is set to `true`. + pub unlit: bool, + + /// Whether to enable fog for this material. + pub fog_enabled: bool, + + /// How to apply the alpha channel of the `base_color_texture`. + /// + /// See [`AlphaMode`] for details. Defaults to [`AlphaMode::Opaque`]. + pub alpha_mode: AlphaMode, + + /// Adjust rendered depth. + /// + /// A material with a positive depth bias will render closer to the + /// camera while negative values cause the material to render behind + /// other objects. This is independent of the viewport. + /// + /// `depth_bias` affects render ordering and depth write operations + /// using the `wgpu::DepthBiasState::Constant` field. + /// + /// [z-fighting]: https://en.wikipedia.org/wiki/Z-fighting + pub depth_bias: f32, + + /// The depth map used for [parallax mapping]. + /// + /// It is a grayscale image where white represents bottom and black the top. + /// If this field is set, bevy will apply [parallax mapping]. + /// Parallax mapping, unlike simple normal maps, will move the texture + /// coordinate according to the current perspective, + /// giving actual depth to the texture. + /// + /// The visual result is similar to a displacement map, + /// but does not require additional geometry. + /// + /// Use the [`parallax_depth_scale`] field to control the depth of the parallax. + /// + /// ## Limitations + /// + /// - It will look weird on bent/non-planar surfaces. + /// - The depth of the pixel does not reflect its visual position, resulting + /// in artifacts for depth-dependent features such as fog or SSAO. + /// - For the same reason, the geometry silhouette will always be + /// the one of the actual geometry, not the parallaxed version, resulting + /// in awkward looks on intersecting parallaxed surfaces. + /// + /// ## Performance + /// + /// Parallax mapping requires multiple texture lookups, proportional to + /// [`max_parallax_layer_count`], which might be costly. + /// + /// Use the [`parallax_mapping_method`] and [`max_parallax_layer_count`] fields + /// to tweak the shader, trading graphical quality for performance. + /// + /// To improve performance, set your `depth_map`'s [`Image::sampler`] + /// filter mode to `FilterMode::Nearest`, as [this paper] indicates, it improves + /// performance a bit. + /// + /// To reduce artifacts, avoid steep changes in depth, blurring the depth + /// map helps with this. + /// + /// Larger depth maps haves a disproportionate performance impact. + /// + /// [this paper]: https://www.diva-portal.org/smash/get/diva2:831762/FULLTEXT01.pdf + /// [parallax mapping]: https://en.wikipedia.org/wiki/Parallax_mapping + /// [`parallax_depth_scale`]: StandardMaterial::parallax_depth_scale + /// [`parallax_mapping_method`]: StandardMaterial::parallax_mapping_method + /// [`max_parallax_layer_count`]: StandardMaterial::max_parallax_layer_count + #[texture(11)] + #[sampler(12)] + #[dependency] + pub depth_map: Option>, + + /// How deep the offset introduced by the depth map should be. + /// + /// Default is `0.1`, anything over that value may look distorted. + /// Lower values lessen the effect. + /// + /// The depth is relative to texture size. This means that if your texture + /// occupies a surface of `1` world unit, and `parallax_depth_scale` is `0.1`, then + /// the in-world depth will be of `0.1` world units. + /// If the texture stretches for `10` world units, then the final depth + /// will be of `1` world unit. + pub parallax_depth_scale: f32, + + /// Which parallax mapping method to use. + /// + /// We recommend that all objects use the same [`ParallaxMappingMethod`], to avoid + /// duplicating and running two shaders. + pub parallax_mapping_method: ParallaxMappingMethod, + + /// In how many layers to split the depth maps for parallax mapping. + /// + /// If you are seeing jaggy edges, increase this value. + /// However, this incurs a performance cost. + /// + /// Dependent on the situation, switching to [`ParallaxMappingMethod::Relief`] + /// and keeping this value low might have better performance than increasing the + /// layer count while using [`ParallaxMappingMethod::Occlusion`]. + /// + /// Default is `16.0`. + pub max_parallax_layer_count: f32, + + /// The exposure (brightness) level of the lightmap, if present. + pub lightmap_exposure: f32, + + /// Render method used for opaque materials. (Where `alpha_mode` is [`AlphaMode::Opaque`] or [`AlphaMode::Mask`]) + pub opaque_render_method: OpaqueRendererMethod, + + /// Used for selecting the deferred lighting pass for deferred materials. + /// Default is [`DEFAULT_PBR_DEFERRED_LIGHTING_PASS_ID`] for default + /// PBR deferred lighting pass. Ignored in the case of forward materials. + pub deferred_lighting_pass_id: u8, + + /// The transform applied to the UVs corresponding to `ATTRIBUTE_UV_0` on the mesh before sampling. Default is identity. + pub uv_transform: Affine2, +} + +impl StandardMaterial { + /// Horizontal flipping transform + /// + /// Multiplying this with another Affine2 returns transformation with horizontally flipped texture coords + pub const FLIP_HORIZONTAL: Affine2 = Affine2 { + matrix2: Mat2::from_cols(Vec2::new(-1.0, 0.0), Vec2::Y), + translation: Vec2::X, + }; + + /// Vertical flipping transform + /// + /// Multiplying this with another Affine2 returns transformation with vertically flipped texture coords + pub const FLIP_VERTICAL: Affine2 = Affine2 { + matrix2: Mat2::from_cols(Vec2::X, Vec2::new(0.0, -1.0)), + translation: Vec2::Y, + }; + + /// Flipping X 3D transform + /// + /// Multiplying this with another Affine3 returns transformation with flipped X coords + pub const FLIP_X: Affine3 = Affine3 { + matrix3: Mat3::from_cols(Vec3::new(-1.0, 0.0, 0.0), Vec3::Y, Vec3::Z), + translation: Vec3::X, + }; + + /// Flipping Y 3D transform + /// + /// Multiplying this with another Affine3 returns transformation with flipped Y coords + pub const FLIP_Y: Affine3 = Affine3 { + matrix3: Mat3::from_cols(Vec3::X, Vec3::new(0.0, -1.0, 0.0), Vec3::Z), + translation: Vec3::Y, + }; + + /// Flipping Z 3D transform + /// + /// Multiplying this with another Affine3 returns transformation with flipped Z coords + pub const FLIP_Z: Affine3 = Affine3 { + matrix3: Mat3::from_cols(Vec3::X, Vec3::Y, Vec3::new(0.0, 0.0, -1.0)), + translation: Vec3::Z, + }; + + /// Flip the texture coordinates of the material. + pub fn flip(&mut self, horizontal: bool, vertical: bool) { + if horizontal { + // Multiplication of `Affine2` is order dependent, which is why + // we do not use the `*=` operator. + self.uv_transform = Self::FLIP_HORIZONTAL * self.uv_transform; + } + if vertical { + self.uv_transform = Self::FLIP_VERTICAL * self.uv_transform; + } + } + + /// Consumes the material and returns a material with flipped texture coordinates + pub fn flipped(mut self, horizontal: bool, vertical: bool) -> Self { + self.flip(horizontal, vertical); + self + } + + /// Creates a new material from a given color + pub fn from_color(color: impl Into) -> Self { + Self::from(color.into()) + } +} + +impl Default for StandardMaterial { + fn default() -> Self { + StandardMaterial { + // White because it gets multiplied with texture values if someone uses + // a texture. + base_color: Color::WHITE, + base_color_channel: UvChannel::Uv0, + base_color_texture: None, + emissive: LinearRgba::BLACK, + emissive_exposure_weight: 0.0, + emissive_channel: UvChannel::Uv0, + emissive_texture: None, + // Matches Blender's default roughness. + perceptual_roughness: 0.5, + // Metallic should generally be set to 0.0 or 1.0. + metallic: 0.0, + metallic_roughness_channel: UvChannel::Uv0, + metallic_roughness_texture: None, + // Minimum real-world reflectance is 2%, most materials between 2-5% + // Expressed in a linear scale and equivalent to 4% reflectance see + // + reflectance: 0.5, + diffuse_transmission: 0.0, + #[cfg(feature = "pbr_transmission_textures")] + diffuse_transmission_channel: UvChannel::Uv0, + #[cfg(feature = "pbr_transmission_textures")] + diffuse_transmission_texture: None, + specular_transmission: 0.0, + #[cfg(feature = "pbr_transmission_textures")] + specular_transmission_channel: UvChannel::Uv0, + #[cfg(feature = "pbr_transmission_textures")] + specular_transmission_texture: None, + thickness: 0.0, + #[cfg(feature = "pbr_transmission_textures")] + thickness_channel: UvChannel::Uv0, + #[cfg(feature = "pbr_transmission_textures")] + thickness_texture: None, + ior: 1.5, + attenuation_color: Color::WHITE, + attenuation_distance: f32::INFINITY, + occlusion_channel: UvChannel::Uv0, + occlusion_texture: None, + normal_map_channel: UvChannel::Uv0, + normal_map_texture: None, + #[cfg(feature = "pbr_specular_textures")] + specular_channel: UvChannel::Uv0, + #[cfg(feature = "pbr_specular_textures")] + specular_texture: None, + specular_tint: Color::WHITE, + #[cfg(feature = "pbr_specular_textures")] + specular_tint_channel: UvChannel::Uv0, + #[cfg(feature = "pbr_specular_textures")] + specular_tint_texture: None, + clearcoat: 0.0, + clearcoat_perceptual_roughness: 0.5, + #[cfg(feature = "pbr_multi_layer_material_textures")] + clearcoat_channel: UvChannel::Uv0, + #[cfg(feature = "pbr_multi_layer_material_textures")] + clearcoat_texture: None, + #[cfg(feature = "pbr_multi_layer_material_textures")] + clearcoat_roughness_channel: UvChannel::Uv0, + #[cfg(feature = "pbr_multi_layer_material_textures")] + clearcoat_roughness_texture: None, + #[cfg(feature = "pbr_multi_layer_material_textures")] + clearcoat_normal_channel: UvChannel::Uv0, + #[cfg(feature = "pbr_multi_layer_material_textures")] + clearcoat_normal_texture: None, + anisotropy_strength: 0.0, + anisotropy_rotation: 0.0, + #[cfg(feature = "pbr_anisotropy_texture")] + anisotropy_channel: UvChannel::Uv0, + #[cfg(feature = "pbr_anisotropy_texture")] + anisotropy_texture: None, + flip_normal_map_y: false, + double_sided: false, + cull_mode: Some(Face::Back), + unlit: false, + fog_enabled: true, + alpha_mode: AlphaMode::Opaque, + depth_bias: 0.0, + depth_map: None, + parallax_depth_scale: 0.1, + max_parallax_layer_count: 16.0, + lightmap_exposure: 1.0, + parallax_mapping_method: ParallaxMappingMethod::Occlusion, + opaque_render_method: OpaqueRendererMethod::Auto, + deferred_lighting_pass_id: DEFAULT_PBR_DEFERRED_LIGHTING_PASS_ID, + uv_transform: Affine2::IDENTITY, + } + } +} + +impl From for StandardMaterial { + fn from(color: Color) -> Self { + StandardMaterial { + base_color: color, + alpha_mode: if color.alpha() < 1.0 { + AlphaMode::Blend + } else { + AlphaMode::Opaque + }, + ..Default::default() + } + } +} + +impl From> for StandardMaterial { + fn from(texture: Handle) -> Self { + StandardMaterial { + base_color_texture: Some(texture), + ..Default::default() + } + } +} + +// NOTE: These must match the bit flags in bevy_pbr/src/render/pbr_types.wgsl! +bitflags::bitflags! { + /// Bitflags info about the material a shader is currently rendering. + /// This is accessible in the shader in the [`StandardMaterialUniform`] + #[repr(transparent)] + pub struct StandardMaterialFlags: u32 { + const BASE_COLOR_TEXTURE = 1 << 0; + const EMISSIVE_TEXTURE = 1 << 1; + const METALLIC_ROUGHNESS_TEXTURE = 1 << 2; + const OCCLUSION_TEXTURE = 1 << 3; + const DOUBLE_SIDED = 1 << 4; + const UNLIT = 1 << 5; + const TWO_COMPONENT_NORMAL_MAP = 1 << 6; + const FLIP_NORMAL_MAP_Y = 1 << 7; + const FOG_ENABLED = 1 << 8; + const DEPTH_MAP = 1 << 9; // Used for parallax mapping + const SPECULAR_TRANSMISSION_TEXTURE = 1 << 10; + const THICKNESS_TEXTURE = 1 << 11; + const DIFFUSE_TRANSMISSION_TEXTURE = 1 << 12; + const ATTENUATION_ENABLED = 1 << 13; + const CLEARCOAT_TEXTURE = 1 << 14; + const CLEARCOAT_ROUGHNESS_TEXTURE = 1 << 15; + const CLEARCOAT_NORMAL_TEXTURE = 1 << 16; + const ANISOTROPY_TEXTURE = 1 << 17; + const SPECULAR_TEXTURE = 1 << 18; + const SPECULAR_TINT_TEXTURE = 1 << 19; + const ALPHA_MODE_RESERVED_BITS = Self::ALPHA_MODE_MASK_BITS << Self::ALPHA_MODE_SHIFT_BITS; // ← Bitmask reserving bits for the `AlphaMode` + const ALPHA_MODE_OPAQUE = 0 << Self::ALPHA_MODE_SHIFT_BITS; // ← Values are just sequential values bitshifted into + const ALPHA_MODE_MASK = 1 << Self::ALPHA_MODE_SHIFT_BITS; // the bitmask, and can range from 0 to 7. + const ALPHA_MODE_BLEND = 2 << Self::ALPHA_MODE_SHIFT_BITS; // + const ALPHA_MODE_PREMULTIPLIED = 3 << Self::ALPHA_MODE_SHIFT_BITS; // + const ALPHA_MODE_ADD = 4 << Self::ALPHA_MODE_SHIFT_BITS; // Right now only values 0–5 are used, which still gives + const ALPHA_MODE_MULTIPLY = 5 << Self::ALPHA_MODE_SHIFT_BITS; // ← us "room" for two more modes without adding more bits + const ALPHA_MODE_ALPHA_TO_COVERAGE = 6 << Self::ALPHA_MODE_SHIFT_BITS; + const NONE = 0; + const UNINITIALIZED = 0xFFFF; + } +} + +impl StandardMaterialFlags { + const ALPHA_MODE_MASK_BITS: u32 = 0b111; + const ALPHA_MODE_SHIFT_BITS: u32 = 32 - Self::ALPHA_MODE_MASK_BITS.count_ones(); +} + +/// The GPU representation of the uniform data of a [`StandardMaterial`]. +#[derive(Clone, Default, ShaderType)] +pub struct StandardMaterialUniform { + /// Doubles as diffuse albedo for non-metallic, specular for metallic and a mix for everything + /// in between. + pub base_color: Vec4, + // Use a color for user-friendliness even though we technically don't use the alpha channel + // Might be used in the future for exposure correction in HDR + pub emissive: Vec4, + /// Color white light takes after traveling through the attenuation distance underneath the material surface + pub attenuation_color: Vec4, + /// The transform applied to the UVs corresponding to `ATTRIBUTE_UV_0` on the mesh before sampling. Default is identity. + pub uv_transform: Mat3, + /// Specular intensity for non-metals on a linear scale of [0.0, 1.0] + /// defaults to 0.5 which is mapped to 4% reflectance in the shader + pub reflectance: Vec3, + /// Linear perceptual roughness, clamped to [0.089, 1.0] in the shader + /// Defaults to minimum of 0.089 + pub roughness: f32, + /// From [0.0, 1.0], dielectric to pure metallic + pub metallic: f32, + /// Amount of diffuse light transmitted through the material + pub diffuse_transmission: f32, + /// Amount of specular light transmitted through the material + pub specular_transmission: f32, + /// Thickness of the volume underneath the material surface + pub thickness: f32, + /// Index of Refraction + pub ior: f32, + /// How far light travels through the volume underneath the material surface before being absorbed + pub attenuation_distance: f32, + pub clearcoat: f32, + pub clearcoat_perceptual_roughness: f32, + pub anisotropy_strength: f32, + pub anisotropy_rotation: Vec2, + /// The [`StandardMaterialFlags`] accessible in the `wgsl` shader. + pub flags: u32, + /// When the alpha mode mask flag is set, any base color alpha above this cutoff means fully opaque, + /// and any below means fully transparent. + pub alpha_cutoff: f32, + /// The depth of the [`StandardMaterial::depth_map`] to apply. + pub parallax_depth_scale: f32, + /// In how many layers to split the depth maps for Steep parallax mapping. + /// + /// If your `parallax_depth_scale` is >0.1 and you are seeing jaggy edges, + /// increase this value. However, this incurs a performance cost. + pub max_parallax_layer_count: f32, + /// The exposure (brightness) level of the lightmap, if present. + pub lightmap_exposure: f32, + /// Using [`ParallaxMappingMethod::Relief`], how many additional + /// steps to use at most to find the depth value. + pub max_relief_mapping_search_steps: u32, + /// ID for specifying which deferred lighting pass should be used for rendering this material, if any. + pub deferred_lighting_pass_id: u32, +} + +impl AsBindGroupShaderType for StandardMaterial { + fn as_bind_group_shader_type( + &self, + images: &RenderAssets, + ) -> StandardMaterialUniform { + let mut flags = StandardMaterialFlags::NONE; + if self.base_color_texture.is_some() { + flags |= StandardMaterialFlags::BASE_COLOR_TEXTURE; + } + if self.emissive_texture.is_some() { + flags |= StandardMaterialFlags::EMISSIVE_TEXTURE; + } + if self.metallic_roughness_texture.is_some() { + flags |= StandardMaterialFlags::METALLIC_ROUGHNESS_TEXTURE; + } + if self.occlusion_texture.is_some() { + flags |= StandardMaterialFlags::OCCLUSION_TEXTURE; + } + if self.double_sided { + flags |= StandardMaterialFlags::DOUBLE_SIDED; + } + if self.unlit { + flags |= StandardMaterialFlags::UNLIT; + } + if self.fog_enabled { + flags |= StandardMaterialFlags::FOG_ENABLED; + } + if self.depth_map.is_some() { + flags |= StandardMaterialFlags::DEPTH_MAP; + } + #[cfg(feature = "pbr_transmission_textures")] + { + if self.specular_transmission_texture.is_some() { + flags |= StandardMaterialFlags::SPECULAR_TRANSMISSION_TEXTURE; + } + if self.thickness_texture.is_some() { + flags |= StandardMaterialFlags::THICKNESS_TEXTURE; + } + if self.diffuse_transmission_texture.is_some() { + flags |= StandardMaterialFlags::DIFFUSE_TRANSMISSION_TEXTURE; + } + } + + #[cfg(feature = "pbr_anisotropy_texture")] + { + if self.anisotropy_texture.is_some() { + flags |= StandardMaterialFlags::ANISOTROPY_TEXTURE; + } + } + + #[cfg(feature = "pbr_specular_textures")] + { + if self.specular_texture.is_some() { + flags |= StandardMaterialFlags::SPECULAR_TEXTURE; + } + if self.specular_tint_texture.is_some() { + flags |= StandardMaterialFlags::SPECULAR_TINT_TEXTURE; + } + } + + #[cfg(feature = "pbr_multi_layer_material_textures")] + { + if self.clearcoat_texture.is_some() { + flags |= StandardMaterialFlags::CLEARCOAT_TEXTURE; + } + if self.clearcoat_roughness_texture.is_some() { + flags |= StandardMaterialFlags::CLEARCOAT_ROUGHNESS_TEXTURE; + } + if self.clearcoat_normal_texture.is_some() { + flags |= StandardMaterialFlags::CLEARCOAT_NORMAL_TEXTURE; + } + } + + let has_normal_map = self.normal_map_texture.is_some(); + if has_normal_map { + let normal_map_id = self.normal_map_texture.as_ref().map(Handle::id).unwrap(); + if let Some(texture) = images.get(normal_map_id) { + match texture.texture_format { + // All 2-component unorm formats + TextureFormat::Rg8Unorm + | TextureFormat::Rg16Unorm + | TextureFormat::Bc5RgUnorm + | TextureFormat::EacRg11Unorm => { + flags |= StandardMaterialFlags::TWO_COMPONENT_NORMAL_MAP; + } + _ => {} + } + } + if self.flip_normal_map_y { + flags |= StandardMaterialFlags::FLIP_NORMAL_MAP_Y; + } + } + // NOTE: 0.5 is from the glTF default - do we want this? + let mut alpha_cutoff = 0.5; + match self.alpha_mode { + AlphaMode::Opaque => flags |= StandardMaterialFlags::ALPHA_MODE_OPAQUE, + AlphaMode::Mask(c) => { + alpha_cutoff = c; + flags |= StandardMaterialFlags::ALPHA_MODE_MASK; + } + AlphaMode::Blend => flags |= StandardMaterialFlags::ALPHA_MODE_BLEND, + AlphaMode::Premultiplied => flags |= StandardMaterialFlags::ALPHA_MODE_PREMULTIPLIED, + AlphaMode::Add => flags |= StandardMaterialFlags::ALPHA_MODE_ADD, + AlphaMode::Multiply => flags |= StandardMaterialFlags::ALPHA_MODE_MULTIPLY, + AlphaMode::AlphaToCoverage => { + flags |= StandardMaterialFlags::ALPHA_MODE_ALPHA_TO_COVERAGE; + } + }; + + if self.attenuation_distance.is_finite() { + flags |= StandardMaterialFlags::ATTENUATION_ENABLED; + } + + let mut emissive = self.emissive.to_vec4(); + emissive[3] = self.emissive_exposure_weight; + + // Doing this up front saves having to do this repeatedly in the fragment shader. + let anisotropy_rotation = Vec2::from_angle(self.anisotropy_rotation); + + StandardMaterialUniform { + base_color: LinearRgba::from(self.base_color).to_vec4(), + emissive, + roughness: self.perceptual_roughness, + metallic: self.metallic, + reflectance: LinearRgba::from(self.specular_tint).to_vec3() * self.reflectance, + clearcoat: self.clearcoat, + clearcoat_perceptual_roughness: self.clearcoat_perceptual_roughness, + anisotropy_strength: self.anisotropy_strength, + anisotropy_rotation, + diffuse_transmission: self.diffuse_transmission, + specular_transmission: self.specular_transmission, + thickness: self.thickness, + ior: self.ior, + attenuation_distance: self.attenuation_distance, + attenuation_color: LinearRgba::from(self.attenuation_color) + .to_f32_array() + .into(), + flags: flags.bits(), + alpha_cutoff, + parallax_depth_scale: self.parallax_depth_scale, + max_parallax_layer_count: self.max_parallax_layer_count, + lightmap_exposure: self.lightmap_exposure, + max_relief_mapping_search_steps: self.parallax_mapping_method.max_steps(), + deferred_lighting_pass_id: self.deferred_lighting_pass_id as u32, + uv_transform: self.uv_transform.into(), + } + } +} + +bitflags! { + /// The pipeline key for `StandardMaterial`, packed into 64 bits. + #[repr(C)] + #[derive(Clone, Copy, PartialEq, Eq, Hash)] + pub struct StandardMaterialKey: u64 { + const CULL_FRONT = 0x000001; + const CULL_BACK = 0x000002; + const NORMAL_MAP = 0x000004; + const RELIEF_MAPPING = 0x000008; + const DIFFUSE_TRANSMISSION = 0x000010; + const SPECULAR_TRANSMISSION = 0x000020; + const CLEARCOAT = 0x000040; + const CLEARCOAT_NORMAL_MAP = 0x000080; + const ANISOTROPY = 0x000100; + const BASE_COLOR_UV = 0x000200; + const EMISSIVE_UV = 0x000400; + const METALLIC_ROUGHNESS_UV = 0x000800; + const OCCLUSION_UV = 0x001000; + const SPECULAR_TRANSMISSION_UV = 0x002000; + const THICKNESS_UV = 0x004000; + const DIFFUSE_TRANSMISSION_UV = 0x008000; + const NORMAL_MAP_UV = 0x010000; + const ANISOTROPY_UV = 0x020000; + const CLEARCOAT_UV = 0x040000; + const CLEARCOAT_ROUGHNESS_UV = 0x080000; + const CLEARCOAT_NORMAL_UV = 0x100000; + const SPECULAR_UV = 0x200000; + const SPECULAR_TINT_UV = 0x400000; + const DEPTH_BIAS = 0xffffffff_00000000; + } +} + +const STANDARD_MATERIAL_KEY_DEPTH_BIAS_SHIFT: u64 = 32; + +impl From<&StandardMaterial> for StandardMaterialKey { + fn from(material: &StandardMaterial) -> Self { + let mut key = StandardMaterialKey::empty(); + key.set( + StandardMaterialKey::CULL_FRONT, + material.cull_mode == Some(Face::Front), + ); + key.set( + StandardMaterialKey::CULL_BACK, + material.cull_mode == Some(Face::Back), + ); + key.set( + StandardMaterialKey::NORMAL_MAP, + material.normal_map_texture.is_some(), + ); + key.set( + StandardMaterialKey::RELIEF_MAPPING, + matches!( + material.parallax_mapping_method, + ParallaxMappingMethod::Relief { .. } + ), + ); + key.set( + StandardMaterialKey::DIFFUSE_TRANSMISSION, + material.diffuse_transmission > 0.0, + ); + key.set( + StandardMaterialKey::SPECULAR_TRANSMISSION, + material.specular_transmission > 0.0, + ); + + key.set(StandardMaterialKey::CLEARCOAT, material.clearcoat > 0.0); + + #[cfg(feature = "pbr_multi_layer_material_textures")] + key.set( + StandardMaterialKey::CLEARCOAT_NORMAL_MAP, + material.clearcoat > 0.0 && material.clearcoat_normal_texture.is_some(), + ); + + key.set( + StandardMaterialKey::ANISOTROPY, + material.anisotropy_strength > 0.0, + ); + + key.set( + StandardMaterialKey::BASE_COLOR_UV, + material.base_color_channel != UvChannel::Uv0, + ); + + key.set( + StandardMaterialKey::EMISSIVE_UV, + material.emissive_channel != UvChannel::Uv0, + ); + key.set( + StandardMaterialKey::METALLIC_ROUGHNESS_UV, + material.metallic_roughness_channel != UvChannel::Uv0, + ); + key.set( + StandardMaterialKey::OCCLUSION_UV, + material.occlusion_channel != UvChannel::Uv0, + ); + #[cfg(feature = "pbr_transmission_textures")] + { + key.set( + StandardMaterialKey::SPECULAR_TRANSMISSION_UV, + material.specular_transmission_channel != UvChannel::Uv0, + ); + key.set( + StandardMaterialKey::THICKNESS_UV, + material.thickness_channel != UvChannel::Uv0, + ); + key.set( + StandardMaterialKey::DIFFUSE_TRANSMISSION_UV, + material.diffuse_transmission_channel != UvChannel::Uv0, + ); + } + + key.set( + StandardMaterialKey::NORMAL_MAP_UV, + material.normal_map_channel != UvChannel::Uv0, + ); + + #[cfg(feature = "pbr_anisotropy_texture")] + { + key.set( + StandardMaterialKey::ANISOTROPY_UV, + material.anisotropy_channel != UvChannel::Uv0, + ); + } + + #[cfg(feature = "pbr_specular_textures")] + { + key.set( + StandardMaterialKey::SPECULAR_UV, + material.specular_channel != UvChannel::Uv0, + ); + key.set( + StandardMaterialKey::SPECULAR_TINT_UV, + material.specular_tint_channel != UvChannel::Uv0, + ); + } + + #[cfg(feature = "pbr_multi_layer_material_textures")] + { + key.set( + StandardMaterialKey::CLEARCOAT_UV, + material.clearcoat_channel != UvChannel::Uv0, + ); + key.set( + StandardMaterialKey::CLEARCOAT_ROUGHNESS_UV, + material.clearcoat_roughness_channel != UvChannel::Uv0, + ); + key.set( + StandardMaterialKey::CLEARCOAT_NORMAL_UV, + material.clearcoat_normal_channel != UvChannel::Uv0, + ); + } + + key.insert(StandardMaterialKey::from_bits_retain( + // Casting to i32 first to ensure the full i32 range is preserved. + // (wgpu expects the depth_bias as an i32 when this is extracted in a later step) + (material.depth_bias as i32 as u64) << STANDARD_MATERIAL_KEY_DEPTH_BIAS_SHIFT, + )); + key + } +} + +impl Material for StandardMaterial { + fn fragment_shader() -> ShaderRef { + shader_ref(bevy_asset::embedded_path!("render/pbr.wgsl")) + } + + #[inline] + fn alpha_mode(&self) -> AlphaMode { + self.alpha_mode + } + + #[inline] + fn opaque_render_method(&self) -> OpaqueRendererMethod { + match self.opaque_render_method { + // For now, diffuse transmission doesn't work under deferred rendering as we don't pack + // the required data into the GBuffer. If this material is set to `Auto`, we report it as + // `Forward` so that it's rendered correctly, even when the `DefaultOpaqueRendererMethod` + // is set to `Deferred`. + // + // If the developer explicitly sets the `OpaqueRendererMethod` to `Deferred`, we assume + // they know what they're doing and don't override it. + OpaqueRendererMethod::Auto if self.diffuse_transmission > 0.0 => { + OpaqueRendererMethod::Forward + } + other => other, + } + } + + #[inline] + fn depth_bias(&self) -> f32 { + self.depth_bias + } + + #[inline] + fn reads_view_transmission_texture(&self) -> bool { + self.specular_transmission > 0.0 + } + + fn prepass_fragment_shader() -> ShaderRef { + shader_ref(bevy_asset::embedded_path!("render/pbr_prepass.wgsl")) + } + + fn deferred_fragment_shader() -> ShaderRef { + shader_ref(bevy_asset::embedded_path!("render/pbr.wgsl")) + } + + #[cfg(feature = "meshlet")] + fn meshlet_mesh_fragment_shader() -> ShaderRef { + Self::fragment_shader() + } + + #[cfg(feature = "meshlet")] + fn meshlet_mesh_prepass_fragment_shader() -> ShaderRef { + Self::prepass_fragment_shader() + } + + #[cfg(feature = "meshlet")] + fn meshlet_mesh_deferred_fragment_shader() -> ShaderRef { + Self::deferred_fragment_shader() + } + + fn specialize( + _pipeline: &MaterialPipeline, + descriptor: &mut RenderPipelineDescriptor, + _layout: &MeshVertexBufferLayoutRef, + key: MaterialPipelineKey, + ) -> Result<(), SpecializedMeshPipelineError> { + if let Some(fragment) = descriptor.fragment.as_mut() { + let shader_defs = &mut fragment.shader_defs; + + for (flags, shader_def) in [ + ( + StandardMaterialKey::NORMAL_MAP, + "STANDARD_MATERIAL_NORMAL_MAP", + ), + (StandardMaterialKey::RELIEF_MAPPING, "RELIEF_MAPPING"), + ( + StandardMaterialKey::DIFFUSE_TRANSMISSION, + "STANDARD_MATERIAL_DIFFUSE_TRANSMISSION", + ), + ( + StandardMaterialKey::SPECULAR_TRANSMISSION, + "STANDARD_MATERIAL_SPECULAR_TRANSMISSION", + ), + ( + StandardMaterialKey::DIFFUSE_TRANSMISSION + | StandardMaterialKey::SPECULAR_TRANSMISSION, + "STANDARD_MATERIAL_DIFFUSE_OR_SPECULAR_TRANSMISSION", + ), + ( + StandardMaterialKey::CLEARCOAT, + "STANDARD_MATERIAL_CLEARCOAT", + ), + ( + StandardMaterialKey::CLEARCOAT_NORMAL_MAP, + "STANDARD_MATERIAL_CLEARCOAT_NORMAL_MAP", + ), + ( + StandardMaterialKey::ANISOTROPY, + "STANDARD_MATERIAL_ANISOTROPY", + ), + ( + StandardMaterialKey::BASE_COLOR_UV, + "STANDARD_MATERIAL_BASE_COLOR_UV_B", + ), + ( + StandardMaterialKey::EMISSIVE_UV, + "STANDARD_MATERIAL_EMISSIVE_UV_B", + ), + ( + StandardMaterialKey::METALLIC_ROUGHNESS_UV, + "STANDARD_MATERIAL_METALLIC_ROUGHNESS_UV_B", + ), + ( + StandardMaterialKey::OCCLUSION_UV, + "STANDARD_MATERIAL_OCCLUSION_UV_B", + ), + ( + StandardMaterialKey::SPECULAR_TRANSMISSION_UV, + "STANDARD_MATERIAL_SPECULAR_TRANSMISSION_UV_B", + ), + ( + StandardMaterialKey::THICKNESS_UV, + "STANDARD_MATERIAL_THICKNESS_UV_B", + ), + ( + StandardMaterialKey::DIFFUSE_TRANSMISSION_UV, + "STANDARD_MATERIAL_DIFFUSE_TRANSMISSION_UV_B", + ), + ( + StandardMaterialKey::NORMAL_MAP_UV, + "STANDARD_MATERIAL_NORMAL_MAP_UV_B", + ), + ( + StandardMaterialKey::CLEARCOAT_UV, + "STANDARD_MATERIAL_CLEARCOAT_UV_B", + ), + ( + StandardMaterialKey::CLEARCOAT_ROUGHNESS_UV, + "STANDARD_MATERIAL_CLEARCOAT_ROUGHNESS_UV_B", + ), + ( + StandardMaterialKey::CLEARCOAT_NORMAL_UV, + "STANDARD_MATERIAL_CLEARCOAT_NORMAL_UV_B", + ), + ( + StandardMaterialKey::ANISOTROPY_UV, + "STANDARD_MATERIAL_ANISOTROPY_UV_B", + ), + ( + StandardMaterialKey::SPECULAR_UV, + "STANDARD_MATERIAL_SPECULAR_UV_B", + ), + ( + StandardMaterialKey::SPECULAR_TINT_UV, + "STANDARD_MATERIAL_SPECULAR_TINT_UV_B", + ), + ] { + if key.bind_group_data.intersects(flags) { + shader_defs.push(shader_def.into()); + } + } + } + + descriptor.primitive.cull_mode = if key + .bind_group_data + .contains(StandardMaterialKey::CULL_FRONT) + { + Some(Face::Front) + } else if key.bind_group_data.contains(StandardMaterialKey::CULL_BACK) { + Some(Face::Back) + } else { + None + }; + + if let Some(label) = &mut descriptor.label { + *label = format!("pbr_{}", *label).into(); + } + if let Some(depth_stencil) = descriptor.depth_stencil.as_mut() { + depth_stencil.bias.constant = + (key.bind_group_data.bits() >> STANDARD_MATERIAL_KEY_DEPTH_BIAS_SHIFT) as i32; + } + Ok(()) + } +} diff --git a/crates/libmarathon/src/render/pbr/prepass/mod.rs b/crates/libmarathon/src/render/pbr/prepass/mod.rs new file mode 100644 index 0000000..248ca34 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/prepass/mod.rs @@ -0,0 +1,1282 @@ +mod prepass_bindings; + +use crate::render::pbr::{ + alpha_mode_pipeline_key, binding_arrays_are_usable, buffer_layout, + collect_meshes_for_gpu_building, init_material_pipeline, set_mesh_motion_vector_flags, + setup_morph_and_skinning_defs, skin, DeferredDrawFunction, DeferredFragmentShader, + DeferredVertexShader, DrawMesh, EntitySpecializationTicks, ErasedMaterialPipelineKey, Material, + MaterialPipeline, MaterialProperties, MeshLayouts, MeshPipeline, MeshPipelineKey, + OpaqueRendererMethod, PreparedMaterial, PrepassDrawFunction, PrepassFragmentShader, + PrepassVertexShader, RenderLightmaps, RenderMaterialInstances, RenderMeshInstanceFlags, + RenderMeshInstances, RenderPhaseType, SetMaterialBindGroup, SetMeshBindGroup, ShadowView, +}; +use bevy_app::{App, Plugin, PreUpdate}; +use bevy_asset::{embedded_asset, load_embedded_asset, AssetServer, Handle}; +use bevy_camera::{Camera, Camera3d}; +use crate::render::{core_3d::CORE_3D_DEPTH_FORMAT, deferred::*, prepass::*}; +use bevy_ecs::{ + prelude::*, + system::{ + lifetimeless::{Read, SRes}, + SystemParamItem, + }, +}; +use bevy_math::{Affine3A, Mat4, Vec4}; +use bevy_mesh::{Mesh, Mesh3d, MeshVertexBufferLayoutRef}; +use crate::render::{ + alpha::AlphaMode, + batching::gpu_preprocessing::GpuPreprocessingSupport, + globals::{GlobalsBuffer, GlobalsUniform}, + mesh::{allocator::MeshAllocator, RenderMesh}, + render_asset::{prepare_assets, RenderAssets}, + render_phase::*, + render_resource::{binding_types::uniform_buffer, *}, + renderer::{RenderAdapter, RenderDevice, RenderQueue}, + sync_world::RenderEntity, + view::{ + ExtractedView, Msaa, RenderVisibilityRanges, RetainedViewEntity, ViewUniform, + ViewUniformOffset, ViewUniforms, VISIBILITY_RANGES_STORAGE_BUFFER_COUNT, + }, + Extract, ExtractSchedule, Render, RenderApp, RenderDebugFlags, RenderStartup, RenderSystems, +}; +use bevy_shader::{load_shader_library, Shader, ShaderDefVal}; +use bevy_transform::prelude::GlobalTransform; +pub use prepass_bindings::*; +use tracing::{error, warn}; + +#[cfg(feature = "meshlet")] +use crate::render::pbr::meshlet::{ + prepare_material_meshlet_meshes_prepass, queue_material_meshlet_meshes, InstanceManager, + MeshletMesh3d, +}; + +use std::sync::Arc; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::{component::Tick, system::SystemChangeTick}; +use bevy_platform::collections::HashMap; +use crate::render::{ + erased_render_asset::ErasedRenderAssets, + sync_world::MainEntityHashMap, + view::RenderVisibleEntities, + RenderSystems::{PrepareAssets, PrepareResources}, +}; +use bevy_utils::default; +use core::marker::PhantomData; + +/// Sets up everything required to use the prepass pipeline. +/// +/// This does not add the actual prepasses, see [`PrepassPlugin`] for that. +pub struct PrepassPipelinePlugin; + +impl Plugin for PrepassPipelinePlugin { + fn build(&self, app: &mut App) { + embedded_asset!(app, "prepass.wgsl"); + + load_shader_library!(app, "prepass_bindings.wgsl"); + load_shader_library!(app, "prepass_utils.wgsl"); + load_shader_library!(app, "prepass_io.wgsl"); + + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + render_app + .add_systems( + RenderStartup, + ( + init_prepass_pipeline.after(init_material_pipeline), + init_prepass_view_bind_group, + ) + .chain(), + ) + .add_systems( + Render, + prepare_prepass_view_bind_group.in_set(RenderSystems::PrepareBindGroups), + ) + .init_resource::>(); + } +} + +/// Sets up the prepasses for a material. +/// +/// This depends on the [`PrepassPipelinePlugin`]. +pub struct PrepassPlugin { + /// Debugging flags that can optionally be set when constructing the renderer. + pub debug_flags: RenderDebugFlags, +} + +impl PrepassPlugin { + /// Creates a new [`PrepassPlugin`] with the given debug flags. + pub fn new(debug_flags: RenderDebugFlags) -> Self { + PrepassPlugin { debug_flags } + } +} + +impl Plugin for PrepassPlugin { + fn build(&self, app: &mut App) { + let no_prepass_plugin_loaded = app + .world() + .get_resource::() + .is_none(); + + if no_prepass_plugin_loaded { + app.insert_resource(AnyPrepassPluginLoaded) + // At the start of each frame, last frame's GlobalTransforms become this frame's PreviousGlobalTransforms + // and last frame's view projection matrices become this frame's PreviousViewProjections + .add_systems( + PreUpdate, + ( + update_mesh_previous_global_transforms, + update_previous_view_data, + ), + ) + .add_plugins(( + BinnedRenderPhasePlugin::::new(self.debug_flags), + BinnedRenderPhasePlugin::::new( + self.debug_flags, + ), + )); + } + + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + if no_prepass_plugin_loaded { + render_app + .add_systems(ExtractSchedule, extract_camera_previous_view_data) + .add_systems( + Render, + prepare_previous_view_uniforms.in_set(PrepareResources), + ); + } + + render_app + .init_resource::() + .init_resource::() + .init_resource::() + .add_render_command::() + .add_render_command::() + .add_render_command::() + .add_render_command::() + .add_systems( + Render, + ( + check_prepass_views_need_specialization.in_set(PrepareAssets), + specialize_prepass_material_meshes + .in_set(RenderSystems::PrepareMeshes) + .after(prepare_assets::) + .after(collect_meshes_for_gpu_building) + .after(set_mesh_motion_vector_flags), + queue_prepass_material_meshes.in_set(RenderSystems::QueueMeshes), + ), + ); + + #[cfg(feature = "meshlet")] + render_app.add_systems( + Render, + prepare_material_meshlet_meshes_prepass + .in_set(RenderSystems::QueueMeshes) + .before(queue_material_meshlet_meshes) + .run_if(resource_exists::), + ); + } +} + +/// Marker resource for whether prepass is enabled globally for this material type +#[derive(Resource, Debug)] +pub struct PrepassEnabled(PhantomData); + +impl Default for PrepassEnabled { + fn default() -> Self { + PrepassEnabled(PhantomData) + } +} + +#[derive(Resource)] +struct AnyPrepassPluginLoaded; + +pub fn update_previous_view_data( + mut commands: Commands, + query: Query<(Entity, &Camera, &GlobalTransform), Or<(With, With)>>, +) { + for (entity, camera, camera_transform) in &query { + let world_from_view = camera_transform.affine(); + let view_from_world = Mat4::from(world_from_view.inverse()); + let view_from_clip = camera.clip_from_view().inverse(); + + commands.entity(entity).try_insert(PreviousViewData { + view_from_world, + clip_from_world: camera.clip_from_view() * view_from_world, + clip_from_view: camera.clip_from_view(), + world_from_clip: Mat4::from(world_from_view) * view_from_clip, + view_from_clip, + }); + } +} + +#[derive(Component, PartialEq, Default)] +pub struct PreviousGlobalTransform(pub Affine3A); + +#[cfg(not(feature = "meshlet"))] +type PreviousMeshFilter = With; +#[cfg(feature = "meshlet")] +type PreviousMeshFilter = Or<(With, With)>; + +pub fn update_mesh_previous_global_transforms( + mut commands: Commands, + views: Query<&Camera, Or<(With, With)>>, + new_meshes: Query< + (Entity, &GlobalTransform), + (PreviousMeshFilter, Without), + >, + mut meshes: Query<(&GlobalTransform, &mut PreviousGlobalTransform), PreviousMeshFilter>, +) { + let should_run = views.iter().any(|camera| camera.is_active); + + if should_run { + for (entity, transform) in &new_meshes { + let new_previous_transform = PreviousGlobalTransform(transform.affine()); + commands.entity(entity).try_insert(new_previous_transform); + } + meshes.par_iter_mut().for_each(|(transform, mut previous)| { + previous.set_if_neq(PreviousGlobalTransform(transform.affine())); + }); + } +} + +#[derive(Resource, Clone)] +pub struct PrepassPipeline { + pub view_layout_motion_vectors: BindGroupLayout, + pub view_layout_no_motion_vectors: BindGroupLayout, + pub mesh_layouts: MeshLayouts, + pub empty_layout: BindGroupLayout, + pub default_prepass_shader: Handle, + + /// Whether skins will use uniform buffers on account of storage buffers + /// being unavailable on this platform. + pub skins_use_uniform_buffers: bool, + + pub depth_clip_control_supported: bool, + + /// Whether binding arrays (a.k.a. bindless textures) are usable on the + /// current render device. + pub binding_arrays_are_usable: bool, + pub material_pipeline: MaterialPipeline, +} + +pub fn init_prepass_pipeline( + mut commands: Commands, + render_device: Res, + render_adapter: Res, + mesh_pipeline: Res, + material_pipeline: Res, + asset_server: Res, +) { + let visibility_ranges_buffer_binding_type = + render_device.get_supported_read_only_binding_type(VISIBILITY_RANGES_STORAGE_BUFFER_COUNT); + + let view_layout_motion_vectors = render_device.create_bind_group_layout( + "prepass_view_layout_motion_vectors", + &BindGroupLayoutEntries::with_indices( + ShaderStages::VERTEX_FRAGMENT, + ( + // View + (0, uniform_buffer::(true)), + // Globals + (1, uniform_buffer::(false)), + // PreviousViewUniforms + (2, uniform_buffer::(true)), + // VisibilityRanges + ( + 14, + buffer_layout( + visibility_ranges_buffer_binding_type, + false, + Some(Vec4::min_size()), + ) + .visibility(ShaderStages::VERTEX), + ), + ), + ), + ); + + let view_layout_no_motion_vectors = render_device.create_bind_group_layout( + "prepass_view_layout_no_motion_vectors", + &BindGroupLayoutEntries::with_indices( + ShaderStages::VERTEX_FRAGMENT, + ( + // View + (0, uniform_buffer::(true)), + // Globals + (1, uniform_buffer::(false)), + // VisibilityRanges + ( + 14, + buffer_layout( + visibility_ranges_buffer_binding_type, + false, + Some(Vec4::min_size()), + ) + .visibility(ShaderStages::VERTEX), + ), + ), + ), + ); + + let depth_clip_control_supported = render_device + .features() + .contains(WgpuFeatures::DEPTH_CLIP_CONTROL); + commands.insert_resource(PrepassPipeline { + view_layout_motion_vectors, + view_layout_no_motion_vectors, + mesh_layouts: mesh_pipeline.mesh_layouts.clone(), + default_prepass_shader: load_embedded_asset!(asset_server.as_ref(), "prepass.wgsl"), + skins_use_uniform_buffers: skin::skins_use_uniform_buffers(&render_device), + depth_clip_control_supported, + binding_arrays_are_usable: binding_arrays_are_usable(&render_device, &render_adapter), + empty_layout: render_device.create_bind_group_layout("prepass_empty_layout", &[]), + material_pipeline: material_pipeline.clone(), + }); +} + +pub struct PrepassPipelineSpecializer { + pub(crate) pipeline: PrepassPipeline, + pub(crate) properties: Arc, +} + +impl SpecializedMeshPipeline for PrepassPipelineSpecializer { + type Key = ErasedMaterialPipelineKey; + + fn specialize( + &self, + key: Self::Key, + layout: &MeshVertexBufferLayoutRef, + ) -> Result { + let mut shader_defs = Vec::new(); + if self.properties.bindless { + shader_defs.push("BINDLESS".into()); + } + let mut descriptor = + self.pipeline + .specialize(key.mesh_key, shader_defs, layout, &self.properties)?; + + // This is a bit risky because it's possible to change something that would + // break the prepass but be fine in the main pass. + // Since this api is pretty low-level it doesn't matter that much, but it is a potential issue. + if let Some(specialize) = self.properties.specialize { + specialize( + &self.pipeline.material_pipeline, + &mut descriptor, + layout, + key, + )?; + } + + Ok(descriptor) + } +} + +impl PrepassPipeline { + fn specialize( + &self, + mesh_key: MeshPipelineKey, + shader_defs: Vec, + layout: &MeshVertexBufferLayoutRef, + material_properties: &MaterialProperties, + ) -> Result { + let mut shader_defs = shader_defs; + let mut bind_group_layouts = vec![ + if mesh_key.contains(MeshPipelineKey::MOTION_VECTOR_PREPASS) { + self.view_layout_motion_vectors.clone() + } else { + self.view_layout_no_motion_vectors.clone() + }, + self.empty_layout.clone(), + ]; + let mut vertex_attributes = Vec::new(); + + // Let the shader code know that it's running in a prepass pipeline. + // (PBR code will use this to detect that it's running in deferred mode, + // since that's the only time it gets called from a prepass pipeline.) + shader_defs.push("PREPASS_PIPELINE".into()); + + shader_defs.push(ShaderDefVal::UInt( + "MATERIAL_BIND_GROUP".into(), + crate::render::pbr::material::MATERIAL_BIND_GROUP_INDEX as u32, + )); + // NOTE: Eventually, it would be nice to only add this when the shaders are overloaded by the Material. + // The main limitation right now is that bind group order is hardcoded in shaders. + bind_group_layouts.push( + material_properties + .material_layout + .as_ref() + .unwrap() + .clone(), + ); + #[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] + shader_defs.push("WEBGL2".into()); + shader_defs.push("VERTEX_OUTPUT_INSTANCE_INDEX".into()); + if mesh_key.contains(MeshPipelineKey::DEPTH_PREPASS) { + shader_defs.push("DEPTH_PREPASS".into()); + } + if mesh_key.contains(MeshPipelineKey::MAY_DISCARD) { + shader_defs.push("MAY_DISCARD".into()); + } + let blend_key = mesh_key.intersection(MeshPipelineKey::BLEND_RESERVED_BITS); + if blend_key == MeshPipelineKey::BLEND_PREMULTIPLIED_ALPHA { + shader_defs.push("BLEND_PREMULTIPLIED_ALPHA".into()); + } + if blend_key == MeshPipelineKey::BLEND_ALPHA { + shader_defs.push("BLEND_ALPHA".into()); + } + if layout.0.contains(Mesh::ATTRIBUTE_POSITION) { + shader_defs.push("VERTEX_POSITIONS".into()); + vertex_attributes.push(Mesh::ATTRIBUTE_POSITION.at_shader_location(0)); + } + // For directional light shadow map views, use unclipped depth via either the native GPU feature, + // or emulated by setting depth in the fragment shader for GPUs that don't support it natively. + let emulate_unclipped_depth = mesh_key.contains(MeshPipelineKey::UNCLIPPED_DEPTH_ORTHO) + && !self.depth_clip_control_supported; + if emulate_unclipped_depth { + shader_defs.push("UNCLIPPED_DEPTH_ORTHO_EMULATION".into()); + // PERF: This line forces the "prepass fragment shader" to always run in + // common scenarios like "directional light calculation". Doing so resolves + // a pretty nasty depth clamping bug, but it also feels a bit excessive. + // We should try to find a way to resolve this without forcing the fragment + // shader to run. + // https://github.com/bevyengine/bevy/pull/8877 + shader_defs.push("PREPASS_FRAGMENT".into()); + } + let unclipped_depth = mesh_key.contains(MeshPipelineKey::UNCLIPPED_DEPTH_ORTHO) + && self.depth_clip_control_supported; + if layout.0.contains(Mesh::ATTRIBUTE_UV_0) { + shader_defs.push("VERTEX_UVS".into()); + shader_defs.push("VERTEX_UVS_A".into()); + vertex_attributes.push(Mesh::ATTRIBUTE_UV_0.at_shader_location(1)); + } + if layout.0.contains(Mesh::ATTRIBUTE_UV_1) { + shader_defs.push("VERTEX_UVS".into()); + shader_defs.push("VERTEX_UVS_B".into()); + vertex_attributes.push(Mesh::ATTRIBUTE_UV_1.at_shader_location(2)); + } + if mesh_key.contains(MeshPipelineKey::NORMAL_PREPASS) { + shader_defs.push("NORMAL_PREPASS".into()); + } + if mesh_key.intersects(MeshPipelineKey::NORMAL_PREPASS | MeshPipelineKey::DEFERRED_PREPASS) + { + shader_defs.push("NORMAL_PREPASS_OR_DEFERRED_PREPASS".into()); + if layout.0.contains(Mesh::ATTRIBUTE_NORMAL) { + shader_defs.push("VERTEX_NORMALS".into()); + vertex_attributes.push(Mesh::ATTRIBUTE_NORMAL.at_shader_location(3)); + } else if mesh_key.contains(MeshPipelineKey::NORMAL_PREPASS) { + warn!( + "The default normal prepass expects the mesh to have vertex normal attributes." + ); + } + if layout.0.contains(Mesh::ATTRIBUTE_TANGENT) { + shader_defs.push("VERTEX_TANGENTS".into()); + vertex_attributes.push(Mesh::ATTRIBUTE_TANGENT.at_shader_location(4)); + } + } + if mesh_key + .intersects(MeshPipelineKey::MOTION_VECTOR_PREPASS | MeshPipelineKey::DEFERRED_PREPASS) + { + shader_defs.push("MOTION_VECTOR_PREPASS_OR_DEFERRED_PREPASS".into()); + } + if mesh_key.contains(MeshPipelineKey::DEFERRED_PREPASS) { + shader_defs.push("DEFERRED_PREPASS".into()); + } + if mesh_key.contains(MeshPipelineKey::LIGHTMAPPED) { + shader_defs.push("LIGHTMAP".into()); + } + if mesh_key.contains(MeshPipelineKey::LIGHTMAP_BICUBIC_SAMPLING) { + shader_defs.push("LIGHTMAP_BICUBIC_SAMPLING".into()); + } + if layout.0.contains(Mesh::ATTRIBUTE_COLOR) { + shader_defs.push("VERTEX_COLORS".into()); + vertex_attributes.push(Mesh::ATTRIBUTE_COLOR.at_shader_location(7)); + } + if mesh_key.contains(MeshPipelineKey::MOTION_VECTOR_PREPASS) { + shader_defs.push("MOTION_VECTOR_PREPASS".into()); + } + if mesh_key.contains(MeshPipelineKey::HAS_PREVIOUS_SKIN) { + shader_defs.push("HAS_PREVIOUS_SKIN".into()); + } + if mesh_key.contains(MeshPipelineKey::HAS_PREVIOUS_MORPH) { + shader_defs.push("HAS_PREVIOUS_MORPH".into()); + } + if self.binding_arrays_are_usable { + shader_defs.push("MULTIPLE_LIGHTMAPS_IN_ARRAY".into()); + } + if mesh_key.contains(MeshPipelineKey::VISIBILITY_RANGE_DITHER) { + shader_defs.push("VISIBILITY_RANGE_DITHER".into()); + } + if mesh_key.intersects( + MeshPipelineKey::NORMAL_PREPASS + | MeshPipelineKey::MOTION_VECTOR_PREPASS + | MeshPipelineKey::DEFERRED_PREPASS, + ) { + shader_defs.push("PREPASS_FRAGMENT".into()); + } + let bind_group = setup_morph_and_skinning_defs( + &self.mesh_layouts, + layout, + 5, + &mesh_key, + &mut shader_defs, + &mut vertex_attributes, + self.skins_use_uniform_buffers, + ); + bind_group_layouts.insert(2, bind_group); + let vertex_buffer_layout = layout.0.get_layout(&vertex_attributes)?; + // Setup prepass fragment targets - normals in slot 0 (or None if not needed), motion vectors in slot 1 + let mut targets = prepass_target_descriptors( + mesh_key.contains(MeshPipelineKey::NORMAL_PREPASS), + mesh_key.contains(MeshPipelineKey::MOTION_VECTOR_PREPASS), + mesh_key.contains(MeshPipelineKey::DEFERRED_PREPASS), + ); + + if targets.iter().all(Option::is_none) { + // if no targets are required then clear the list, so that no fragment shader is required + // (though one may still be used for discarding depth buffer writes) + targets.clear(); + } + + // The fragment shader is only used when the normal prepass or motion vectors prepass + // is enabled, the material uses alpha cutoff values and doesn't rely on the standard + // prepass shader, or we are emulating unclipped depth in the fragment shader. + let fragment_required = !targets.is_empty() + || emulate_unclipped_depth + || (mesh_key.contains(MeshPipelineKey::MAY_DISCARD) + && material_properties + .get_shader(PrepassFragmentShader) + .is_some()); + + let fragment = fragment_required.then(|| { + // Use the fragment shader from the material + let frag_shader_handle = if mesh_key.contains(MeshPipelineKey::DEFERRED_PREPASS) { + match material_properties.get_shader(DeferredFragmentShader) { + Some(frag_shader_handle) => frag_shader_handle, + None => self.default_prepass_shader.clone(), + } + } else { + match material_properties.get_shader(PrepassFragmentShader) { + Some(frag_shader_handle) => frag_shader_handle, + None => self.default_prepass_shader.clone(), + } + }; + + FragmentState { + shader: frag_shader_handle, + shader_defs: shader_defs.clone(), + targets, + ..default() + } + }); + + // Use the vertex shader from the material if present + let vert_shader_handle = if mesh_key.contains(MeshPipelineKey::DEFERRED_PREPASS) { + if let Some(handle) = material_properties.get_shader(DeferredVertexShader) { + handle + } else { + self.default_prepass_shader.clone() + } + } else if let Some(handle) = material_properties.get_shader(PrepassVertexShader) { + handle + } else { + self.default_prepass_shader.clone() + }; + let descriptor = RenderPipelineDescriptor { + vertex: VertexState { + shader: vert_shader_handle, + shader_defs, + buffers: vec![vertex_buffer_layout], + ..default() + }, + fragment, + layout: bind_group_layouts, + primitive: PrimitiveState { + topology: mesh_key.primitive_topology(), + unclipped_depth, + ..default() + }, + depth_stencil: Some(DepthStencilState { + format: CORE_3D_DEPTH_FORMAT, + depth_write_enabled: true, + depth_compare: CompareFunction::GreaterEqual, + stencil: StencilState { + front: StencilFaceState::IGNORE, + back: StencilFaceState::IGNORE, + read_mask: 0, + write_mask: 0, + }, + bias: DepthBiasState { + constant: 0, + slope_scale: 0.0, + clamp: 0.0, + }, + }), + multisample: MultisampleState { + count: mesh_key.msaa_samples(), + mask: !0, + alpha_to_coverage_enabled: false, + }, + label: Some("prepass_pipeline".into()), + ..default() + }; + Ok(descriptor) + } +} + +// Extract the render phases for the prepass +pub fn extract_camera_previous_view_data( + mut commands: Commands, + cameras_3d: Extract), With>>, +) { + for (entity, camera, maybe_previous_view_data) in cameras_3d.iter() { + let mut entity = commands + .get_entity(entity) + .expect("Camera entity wasn't synced."); + if camera.is_active { + if let Some(previous_view_data) = maybe_previous_view_data { + entity.insert(previous_view_data.clone()); + } + } else { + entity.remove::(); + } + } +} + +pub fn prepare_previous_view_uniforms( + mut commands: Commands, + render_device: Res, + render_queue: Res, + mut previous_view_uniforms: ResMut, + views: Query< + (Entity, &ExtractedView, Option<&PreviousViewData>), + Or<(With, With)>, + >, +) { + let views_iter = views.iter(); + let view_count = views_iter.len(); + let Some(mut writer) = + previous_view_uniforms + .uniforms + .get_writer(view_count, &render_device, &render_queue) + else { + return; + }; + + for (entity, camera, maybe_previous_view_uniforms) in views_iter { + let prev_view_data = match maybe_previous_view_uniforms { + Some(previous_view) => previous_view.clone(), + None => { + let world_from_view = camera.world_from_view.affine(); + let view_from_world = Mat4::from(world_from_view.inverse()); + let view_from_clip = camera.clip_from_view.inverse(); + + PreviousViewData { + view_from_world, + clip_from_world: camera.clip_from_view * view_from_world, + clip_from_view: camera.clip_from_view, + world_from_clip: Mat4::from(world_from_view) * view_from_clip, + view_from_clip, + } + } + }; + + commands.entity(entity).insert(PreviousViewUniformOffset { + offset: writer.write(&prev_view_data), + }); + } +} + +#[derive(Resource)] +pub struct PrepassViewBindGroup { + pub motion_vectors: Option, + pub no_motion_vectors: Option, + pub empty_bind_group: BindGroup, +} + +pub fn init_prepass_view_bind_group( + mut commands: Commands, + render_device: Res, + pipeline: Res, +) { + let empty_bind_group = render_device.create_bind_group( + "prepass_view_empty_bind_group", + &pipeline.empty_layout, + &[], + ); + commands.insert_resource(PrepassViewBindGroup { + motion_vectors: None, + no_motion_vectors: None, + empty_bind_group, + }); +} + +pub fn prepare_prepass_view_bind_group( + render_device: Res, + prepass_pipeline: Res, + view_uniforms: Res, + globals_buffer: Res, + previous_view_uniforms: Res, + visibility_ranges: Res, + mut prepass_view_bind_group: ResMut, +) { + if let (Some(view_binding), Some(globals_binding), Some(visibility_ranges_buffer)) = ( + view_uniforms.uniforms.binding(), + globals_buffer.buffer.binding(), + visibility_ranges.buffer().buffer(), + ) { + prepass_view_bind_group.no_motion_vectors = Some(render_device.create_bind_group( + "prepass_view_no_motion_vectors_bind_group", + &prepass_pipeline.view_layout_no_motion_vectors, + &BindGroupEntries::with_indices(( + (0, view_binding.clone()), + (1, globals_binding.clone()), + (14, visibility_ranges_buffer.as_entire_binding()), + )), + )); + + if let Some(previous_view_uniforms_binding) = previous_view_uniforms.uniforms.binding() { + prepass_view_bind_group.motion_vectors = Some(render_device.create_bind_group( + "prepass_view_motion_vectors_bind_group", + &prepass_pipeline.view_layout_motion_vectors, + &BindGroupEntries::with_indices(( + (0, view_binding), + (1, globals_binding), + (2, previous_view_uniforms_binding), + (14, visibility_ranges_buffer.as_entire_binding()), + )), + )); + } + } +} + +/// Stores the [`SpecializedPrepassMaterialViewPipelineCache`] for each view. +#[derive(Resource, Deref, DerefMut, Default)] +pub struct SpecializedPrepassMaterialPipelineCache { + // view_entity -> view pipeline cache + #[deref] + map: HashMap, +} + +/// Stores the cached render pipeline ID for each entity in a single view, as +/// well as the last time it was changed. +#[derive(Deref, DerefMut, Default)] +pub struct SpecializedPrepassMaterialViewPipelineCache { + // material entity -> (tick, pipeline_id) + #[deref] + map: MainEntityHashMap<(Tick, CachedRenderPipelineId)>, +} + +#[derive(Resource, Deref, DerefMut, Default, Clone)] +pub struct ViewKeyPrepassCache(HashMap); + +#[derive(Resource, Deref, DerefMut, Default, Clone)] +pub struct ViewPrepassSpecializationTicks(HashMap); + +pub fn check_prepass_views_need_specialization( + mut view_key_cache: ResMut, + mut view_specialization_ticks: ResMut, + mut views: Query<( + &ExtractedView, + &Msaa, + Option<&DepthPrepass>, + Option<&NormalPrepass>, + Option<&MotionVectorPrepass>, + )>, + ticks: SystemChangeTick, +) { + for (view, msaa, depth_prepass, normal_prepass, motion_vector_prepass) in views.iter_mut() { + let mut view_key = MeshPipelineKey::from_msaa_samples(msaa.samples()); + if depth_prepass.is_some() { + view_key |= MeshPipelineKey::DEPTH_PREPASS; + } + if normal_prepass.is_some() { + view_key |= MeshPipelineKey::NORMAL_PREPASS; + } + if motion_vector_prepass.is_some() { + view_key |= MeshPipelineKey::MOTION_VECTOR_PREPASS; + } + + if let Some(current_key) = view_key_cache.get_mut(&view.retained_view_entity) { + if *current_key != view_key { + view_key_cache.insert(view.retained_view_entity, view_key); + view_specialization_ticks.insert(view.retained_view_entity, ticks.this_run()); + } + } else { + view_key_cache.insert(view.retained_view_entity, view_key); + view_specialization_ticks.insert(view.retained_view_entity, ticks.this_run()); + } + } +} + +pub fn specialize_prepass_material_meshes( + render_meshes: Res>, + render_materials: Res>, + render_mesh_instances: Res, + render_material_instances: Res, + render_lightmaps: Res, + render_visibility_ranges: Res, + view_key_cache: Res, + views: Query<( + &ExtractedView, + &RenderVisibleEntities, + &Msaa, + Option<&MotionVectorPrepass>, + Option<&DeferredPrepass>, + )>, + ( + opaque_prepass_render_phases, + alpha_mask_prepass_render_phases, + opaque_deferred_render_phases, + alpha_mask_deferred_render_phases, + ): ( + Res>, + Res>, + Res>, + Res>, + ), + ( + mut specialized_material_pipeline_cache, + ticks, + prepass_pipeline, + mut pipelines, + pipeline_cache, + view_specialization_ticks, + entity_specialization_ticks, + ): ( + ResMut, + SystemChangeTick, + Res, + ResMut>, + Res, + Res, + Res, + ), +) { + for (extracted_view, visible_entities, msaa, motion_vector_prepass, deferred_prepass) in &views + { + if !opaque_deferred_render_phases.contains_key(&extracted_view.retained_view_entity) + && !alpha_mask_deferred_render_phases.contains_key(&extracted_view.retained_view_entity) + && !opaque_prepass_render_phases.contains_key(&extracted_view.retained_view_entity) + && !alpha_mask_prepass_render_phases.contains_key(&extracted_view.retained_view_entity) + { + continue; + } + + let Some(view_key) = view_key_cache.get(&extracted_view.retained_view_entity) else { + continue; + }; + + let view_tick = view_specialization_ticks + .get(&extracted_view.retained_view_entity) + .unwrap(); + let view_specialized_material_pipeline_cache = specialized_material_pipeline_cache + .entry(extracted_view.retained_view_entity) + .or_default(); + + for (_, visible_entity) in visible_entities.iter::() { + let Some(material_instance) = render_material_instances.instances.get(visible_entity) + else { + continue; + }; + let Some(mesh_instance) = render_mesh_instances.render_mesh_queue_data(*visible_entity) + else { + continue; + }; + let entity_tick = entity_specialization_ticks.get(visible_entity).unwrap(); + let last_specialized_tick = view_specialized_material_pipeline_cache + .get(visible_entity) + .map(|(tick, _)| *tick); + let needs_specialization = last_specialized_tick.is_none_or(|tick| { + view_tick.is_newer_than(tick, ticks.this_run()) + || entity_tick.is_newer_than(tick, ticks.this_run()) + }); + if !needs_specialization { + continue; + } + let Some(material) = render_materials.get(material_instance.asset_id) else { + continue; + }; + if !material.properties.prepass_enabled && !material.properties.shadows_enabled { + // If the material was previously specialized for prepass, remove it + view_specialized_material_pipeline_cache.remove(visible_entity); + continue; + } + let Some(mesh) = render_meshes.get(mesh_instance.mesh_asset_id) else { + continue; + }; + + let mut mesh_key = *view_key | MeshPipelineKey::from_bits_retain(mesh.key_bits.bits()); + + let alpha_mode = material.properties.alpha_mode; + match alpha_mode { + AlphaMode::Opaque | AlphaMode::AlphaToCoverage | AlphaMode::Mask(_) => { + mesh_key |= alpha_mode_pipeline_key(alpha_mode, msaa); + } + AlphaMode::Blend + | AlphaMode::Premultiplied + | AlphaMode::Add + | AlphaMode::Multiply => { + // In case this material was previously in a valid alpha_mode, remove it to + // stop the queue system from assuming its retained cache to be valid. + view_specialized_material_pipeline_cache.remove(visible_entity); + continue; + } + } + + if material.properties.reads_view_transmission_texture { + // No-op: Materials reading from `ViewTransmissionTexture` are not rendered in the `Opaque3d` + // phase, and are therefore also excluded from the prepass much like alpha-blended materials. + view_specialized_material_pipeline_cache.remove(visible_entity); + continue; + } + + let forward = match material.properties.render_method { + OpaqueRendererMethod::Forward => true, + OpaqueRendererMethod::Deferred => false, + OpaqueRendererMethod::Auto => unreachable!(), + }; + + let deferred = deferred_prepass.is_some() && !forward; + + if deferred { + mesh_key |= MeshPipelineKey::DEFERRED_PREPASS; + } + + if let Some(lightmap) = render_lightmaps.render_lightmaps.get(visible_entity) { + // Even though we don't use the lightmap in the forward prepass, the + // `SetMeshBindGroup` render command will bind the data for it. So + // we need to include the appropriate flag in the mesh pipeline key + // to ensure that the necessary bind group layout entries are + // present. + mesh_key |= MeshPipelineKey::LIGHTMAPPED; + + if lightmap.bicubic_sampling && deferred { + mesh_key |= MeshPipelineKey::LIGHTMAP_BICUBIC_SAMPLING; + } + } + + if render_visibility_ranges.entity_has_crossfading_visibility_ranges(*visible_entity) { + mesh_key |= MeshPipelineKey::VISIBILITY_RANGE_DITHER; + } + + // If the previous frame has skins or morph targets, note that. + if motion_vector_prepass.is_some() { + if mesh_instance + .flags + .contains(RenderMeshInstanceFlags::HAS_PREVIOUS_SKIN) + { + mesh_key |= MeshPipelineKey::HAS_PREVIOUS_SKIN; + } + if mesh_instance + .flags + .contains(RenderMeshInstanceFlags::HAS_PREVIOUS_MORPH) + { + mesh_key |= MeshPipelineKey::HAS_PREVIOUS_MORPH; + } + } + + let erased_key = ErasedMaterialPipelineKey { + mesh_key, + material_key: material.properties.material_key.clone(), + type_id: material_instance.asset_id.type_id(), + }; + let prepass_pipeline_specializer = PrepassPipelineSpecializer { + pipeline: prepass_pipeline.clone(), + properties: material.properties.clone(), + }; + let pipeline_id = pipelines.specialize( + &pipeline_cache, + &prepass_pipeline_specializer, + erased_key, + &mesh.layout, + ); + let pipeline_id = match pipeline_id { + Ok(id) => id, + Err(err) => { + error!("{}", err); + continue; + } + }; + + view_specialized_material_pipeline_cache + .insert(*visible_entity, (ticks.this_run(), pipeline_id)); + } + } +} + +pub fn queue_prepass_material_meshes( + render_mesh_instances: Res, + render_materials: Res>, + render_material_instances: Res, + mesh_allocator: Res, + gpu_preprocessing_support: Res, + mut opaque_prepass_render_phases: ResMut>, + mut alpha_mask_prepass_render_phases: ResMut>, + mut opaque_deferred_render_phases: ResMut>, + mut alpha_mask_deferred_render_phases: ResMut>, + views: Query<(&ExtractedView, &RenderVisibleEntities)>, + specialized_material_pipeline_cache: Res, +) { + for (extracted_view, visible_entities) in &views { + let ( + mut opaque_phase, + mut alpha_mask_phase, + mut opaque_deferred_phase, + mut alpha_mask_deferred_phase, + ) = ( + opaque_prepass_render_phases.get_mut(&extracted_view.retained_view_entity), + alpha_mask_prepass_render_phases.get_mut(&extracted_view.retained_view_entity), + opaque_deferred_render_phases.get_mut(&extracted_view.retained_view_entity), + alpha_mask_deferred_render_phases.get_mut(&extracted_view.retained_view_entity), + ); + + let Some(view_specialized_material_pipeline_cache) = + specialized_material_pipeline_cache.get(&extracted_view.retained_view_entity) + else { + continue; + }; + + // Skip if there's no place to put the mesh. + if opaque_phase.is_none() + && alpha_mask_phase.is_none() + && opaque_deferred_phase.is_none() + && alpha_mask_deferred_phase.is_none() + { + continue; + } + + for (render_entity, visible_entity) in visible_entities.iter::() { + let Some((current_change_tick, pipeline_id)) = + view_specialized_material_pipeline_cache.get(visible_entity) + else { + continue; + }; + + // Skip the entity if it's cached in a bin and up to date. + if opaque_phase.as_mut().is_some_and(|phase| { + phase.validate_cached_entity(*visible_entity, *current_change_tick) + }) || alpha_mask_phase.as_mut().is_some_and(|phase| { + phase.validate_cached_entity(*visible_entity, *current_change_tick) + }) || opaque_deferred_phase.as_mut().is_some_and(|phase| { + phase.validate_cached_entity(*visible_entity, *current_change_tick) + }) || alpha_mask_deferred_phase.as_mut().is_some_and(|phase| { + phase.validate_cached_entity(*visible_entity, *current_change_tick) + }) { + continue; + } + + let Some(material_instance) = render_material_instances.instances.get(visible_entity) + else { + continue; + }; + let Some(mesh_instance) = render_mesh_instances.render_mesh_queue_data(*visible_entity) + else { + continue; + }; + let Some(material) = render_materials.get(material_instance.asset_id) else { + continue; + }; + let (vertex_slab, index_slab) = mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id); + + let deferred = match material.properties.render_method { + OpaqueRendererMethod::Forward => false, + OpaqueRendererMethod::Deferred => true, + OpaqueRendererMethod::Auto => unreachable!(), + }; + + match material.properties.render_phase_type { + RenderPhaseType::Opaque => { + if deferred { + opaque_deferred_phase.as_mut().unwrap().add( + OpaqueNoLightmap3dBatchSetKey { + draw_function: material + .properties + .get_draw_function(DeferredDrawFunction) + .unwrap(), + pipeline: *pipeline_id, + material_bind_group_index: Some(material.binding.group.0), + vertex_slab: vertex_slab.unwrap_or_default(), + index_slab, + }, + OpaqueNoLightmap3dBinKey { + asset_id: mesh_instance.mesh_asset_id.into(), + }, + (*render_entity, *visible_entity), + mesh_instance.current_uniform_index, + BinnedRenderPhaseType::mesh( + mesh_instance.should_batch(), + &gpu_preprocessing_support, + ), + *current_change_tick, + ); + } else if let Some(opaque_phase) = opaque_phase.as_mut() { + let (vertex_slab, index_slab) = + mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id); + opaque_phase.add( + OpaqueNoLightmap3dBatchSetKey { + draw_function: material + .properties + .get_draw_function(PrepassDrawFunction) + .unwrap(), + pipeline: *pipeline_id, + material_bind_group_index: Some(material.binding.group.0), + vertex_slab: vertex_slab.unwrap_or_default(), + index_slab, + }, + OpaqueNoLightmap3dBinKey { + asset_id: mesh_instance.mesh_asset_id.into(), + }, + (*render_entity, *visible_entity), + mesh_instance.current_uniform_index, + BinnedRenderPhaseType::mesh( + mesh_instance.should_batch(), + &gpu_preprocessing_support, + ), + *current_change_tick, + ); + } + } + RenderPhaseType::AlphaMask => { + if deferred { + let (vertex_slab, index_slab) = + mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id); + let batch_set_key = OpaqueNoLightmap3dBatchSetKey { + draw_function: material + .properties + .get_draw_function(DeferredDrawFunction) + .unwrap(), + pipeline: *pipeline_id, + material_bind_group_index: Some(material.binding.group.0), + vertex_slab: vertex_slab.unwrap_or_default(), + index_slab, + }; + let bin_key = OpaqueNoLightmap3dBinKey { + asset_id: mesh_instance.mesh_asset_id.into(), + }; + alpha_mask_deferred_phase.as_mut().unwrap().add( + batch_set_key, + bin_key, + (*render_entity, *visible_entity), + mesh_instance.current_uniform_index, + BinnedRenderPhaseType::mesh( + mesh_instance.should_batch(), + &gpu_preprocessing_support, + ), + *current_change_tick, + ); + } else if let Some(alpha_mask_phase) = alpha_mask_phase.as_mut() { + let (vertex_slab, index_slab) = + mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id); + let batch_set_key = OpaqueNoLightmap3dBatchSetKey { + draw_function: material + .properties + .get_draw_function(PrepassDrawFunction) + .unwrap(), + pipeline: *pipeline_id, + material_bind_group_index: Some(material.binding.group.0), + vertex_slab: vertex_slab.unwrap_or_default(), + index_slab, + }; + let bin_key = OpaqueNoLightmap3dBinKey { + asset_id: mesh_instance.mesh_asset_id.into(), + }; + alpha_mask_phase.add( + batch_set_key, + bin_key, + (*render_entity, *visible_entity), + mesh_instance.current_uniform_index, + BinnedRenderPhaseType::mesh( + mesh_instance.should_batch(), + &gpu_preprocessing_support, + ), + *current_change_tick, + ); + } + } + _ => {} + } + } + } +} + +pub struct SetPrepassViewBindGroup; +impl RenderCommand

for SetPrepassViewBindGroup { + type Param = SRes; + type ViewQuery = ( + Read, + Has, + Option>, + ); + type ItemQuery = (); + + #[inline] + fn render<'w>( + _item: &P, + (view_uniform_offset, has_motion_vector_prepass, previous_view_uniform_offset): ( + &'_ ViewUniformOffset, + bool, + Option<&'_ PreviousViewUniformOffset>, + ), + _entity: Option<()>, + prepass_view_bind_group: SystemParamItem<'w, '_, Self::Param>, + pass: &mut TrackedRenderPass<'w>, + ) -> RenderCommandResult { + let prepass_view_bind_group = prepass_view_bind_group.into_inner(); + + match previous_view_uniform_offset { + Some(previous_view_uniform_offset) if has_motion_vector_prepass => { + pass.set_bind_group( + I, + prepass_view_bind_group.motion_vectors.as_ref().unwrap(), + &[ + view_uniform_offset.offset, + previous_view_uniform_offset.offset, + ], + ); + } + _ => { + pass.set_bind_group( + I, + prepass_view_bind_group.no_motion_vectors.as_ref().unwrap(), + &[view_uniform_offset.offset], + ); + } + } + RenderCommandResult::Success + } +} + +pub struct SetPrepassViewEmptyBindGroup; +impl RenderCommand

for SetPrepassViewEmptyBindGroup { + type Param = SRes; + type ViewQuery = (); + type ItemQuery = (); + + #[inline] + fn render<'w>( + _item: &P, + _view: (), + _entity: Option<()>, + prepass_view_bind_group: SystemParamItem<'w, '_, Self::Param>, + pass: &mut TrackedRenderPass<'w>, + ) -> RenderCommandResult { + let prepass_view_bind_group = prepass_view_bind_group.into_inner(); + pass.set_bind_group(I, &prepass_view_bind_group.empty_bind_group, &[]); + RenderCommandResult::Success + } +} + +pub type DrawPrepass = ( + SetItemPipeline, + SetPrepassViewBindGroup<0>, + SetPrepassViewEmptyBindGroup<1>, + SetMeshBindGroup<2>, + SetMaterialBindGroup<3>, + DrawMesh, +); diff --git a/crates/libmarathon/src/render/pbr/prepass/prepass.wgsl b/crates/libmarathon/src/render/pbr/prepass/prepass.wgsl new file mode 100644 index 0000000..52dd9bf --- /dev/null +++ b/crates/libmarathon/src/render/pbr/prepass/prepass.wgsl @@ -0,0 +1,219 @@ +#import bevy_pbr::{ + prepass_bindings, + mesh_bindings::mesh, + mesh_functions, + prepass_io::{Vertex, VertexOutput, FragmentOutput}, + skinning, + morph, + mesh_view_bindings::view, + view_transformations::position_world_to_clip, +} + +#ifdef DEFERRED_PREPASS +#import bevy_pbr::rgb9e5 +#endif + +#ifdef MORPH_TARGETS +fn morph_vertex(vertex_in: Vertex) -> Vertex { + var vertex = vertex_in; + let first_vertex = mesh[vertex.instance_index].first_vertex_index; + let vertex_index = vertex.index - first_vertex; + + let weight_count = morph::layer_count(); + for (var i: u32 = 0u; i < weight_count; i ++) { + let weight = morph::weight_at(i); + if weight == 0.0 { + continue; + } + vertex.position += weight * morph::morph(vertex_index, morph::position_offset, i); +#ifdef VERTEX_NORMALS + vertex.normal += weight * morph::morph(vertex_index, morph::normal_offset, i); +#endif +#ifdef VERTEX_TANGENTS + vertex.tangent += vec4(weight * morph::morph(vertex_index, morph::tangent_offset, i), 0.0); +#endif + } + return vertex; +} + +// Returns the morphed position of the given vertex from the previous frame. +// +// This function is used for motion vector calculation, and, as such, it doesn't +// bother morphing the normals and tangents. +fn morph_prev_vertex(vertex_in: Vertex) -> Vertex { + var vertex = vertex_in; + let weight_count = morph::layer_count(); + for (var i: u32 = 0u; i < weight_count; i ++) { + let weight = morph::prev_weight_at(i); + if weight == 0.0 { + continue; + } + vertex.position += weight * morph::morph(vertex.index, morph::position_offset, i); + // Don't bother morphing normals and tangents; we don't need them for + // motion vector calculation. + } + return vertex; +} +#endif // MORPH_TARGETS + +@vertex +fn vertex(vertex_no_morph: Vertex) -> VertexOutput { + var out: VertexOutput; + +#ifdef MORPH_TARGETS + var vertex = morph_vertex(vertex_no_morph); +#else + var vertex = vertex_no_morph; +#endif + + let mesh_world_from_local = mesh_functions::get_world_from_local(vertex_no_morph.instance_index); + +#ifdef SKINNED + var world_from_local = skinning::skin_model( + vertex.joint_indices, + vertex.joint_weights, + vertex_no_morph.instance_index + ); +#else // SKINNED + // Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug. + // See https://github.com/gfx-rs/naga/issues/2416 + var world_from_local = mesh_world_from_local; +#endif // SKINNED + + out.world_position = mesh_functions::mesh_position_local_to_world(world_from_local, vec4(vertex.position, 1.0)); + out.position = position_world_to_clip(out.world_position.xyz); +#ifdef UNCLIPPED_DEPTH_ORTHO_EMULATION + out.unclipped_depth = out.position.z; + out.position.z = min(out.position.z, 1.0); // Clamp depth to avoid clipping +#endif // UNCLIPPED_DEPTH_ORTHO_EMULATION + +#ifdef VERTEX_UVS_A + out.uv = vertex.uv; +#endif // VERTEX_UVS_A + +#ifdef VERTEX_UVS_B + out.uv_b = vertex.uv_b; +#endif // VERTEX_UVS_B + +#ifdef NORMAL_PREPASS_OR_DEFERRED_PREPASS +#ifdef VERTEX_NORMALS +#ifdef SKINNED + out.world_normal = skinning::skin_normals(world_from_local, vertex.normal); +#else // SKINNED + out.world_normal = mesh_functions::mesh_normal_local_to_world( + vertex.normal, + // Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug. + // See https://github.com/gfx-rs/naga/issues/2416 + vertex_no_morph.instance_index + ); +#endif // SKINNED +#endif // VERTEX_NORMALS + +#ifdef VERTEX_TANGENTS + out.world_tangent = mesh_functions::mesh_tangent_local_to_world( + world_from_local, + vertex.tangent, + // Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug. + // See https://github.com/gfx-rs/naga/issues/2416 + vertex_no_morph.instance_index + ); +#endif // VERTEX_TANGENTS +#endif // NORMAL_PREPASS_OR_DEFERRED_PREPASS + +#ifdef VERTEX_COLORS + out.color = vertex.color; +#endif + + // Compute the motion vector for TAA among other purposes. For this we need + // to know where the vertex was last frame. +#ifdef MOTION_VECTOR_PREPASS + + // Take morph targets into account. +#ifdef MORPH_TARGETS + +#ifdef HAS_PREVIOUS_MORPH + let prev_vertex = morph_prev_vertex(vertex_no_morph); +#else // HAS_PREVIOUS_MORPH + let prev_vertex = vertex_no_morph; +#endif // HAS_PREVIOUS_MORPH + +#else // MORPH_TARGETS + let prev_vertex = vertex_no_morph; +#endif // MORPH_TARGETS + + // Take skinning into account. +#ifdef SKINNED + +#ifdef HAS_PREVIOUS_SKIN + let prev_model = skinning::skin_prev_model( + prev_vertex.joint_indices, + prev_vertex.joint_weights, + vertex_no_morph.instance_index + ); +#else // HAS_PREVIOUS_SKIN + let prev_model = mesh_functions::get_previous_world_from_local(prev_vertex.instance_index); +#endif // HAS_PREVIOUS_SKIN + +#else // SKINNED + let prev_model = mesh_functions::get_previous_world_from_local(prev_vertex.instance_index); +#endif // SKINNED + + out.previous_world_position = mesh_functions::mesh_position_local_to_world( + prev_model, + vec4(prev_vertex.position, 1.0) + ); +#endif // MOTION_VECTOR_PREPASS + +#ifdef VERTEX_OUTPUT_INSTANCE_INDEX + // Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug. + // See https://github.com/gfx-rs/naga/issues/2416 + out.instance_index = vertex_no_morph.instance_index; +#endif + +#ifdef VISIBILITY_RANGE_DITHER + out.visibility_range_dither = mesh_functions::get_visibility_range_dither_level( + vertex_no_morph.instance_index, mesh_world_from_local[3]); +#endif // VISIBILITY_RANGE_DITHER + + return out; +} + +#ifdef PREPASS_FRAGMENT +@fragment +fn fragment(in: VertexOutput) -> FragmentOutput { + var out: FragmentOutput; + +#ifdef NORMAL_PREPASS + out.normal = vec4(in.world_normal * 0.5 + vec3(0.5), 1.0); +#endif + +#ifdef UNCLIPPED_DEPTH_ORTHO_EMULATION + out.frag_depth = in.unclipped_depth; +#endif // UNCLIPPED_DEPTH_ORTHO_EMULATION + +#ifdef MOTION_VECTOR_PREPASS + let clip_position_t = view.unjittered_clip_from_world * in.world_position; + let clip_position = clip_position_t.xy / clip_position_t.w; + let previous_clip_position_t = prepass_bindings::previous_view_uniforms.clip_from_world * in.previous_world_position; + let previous_clip_position = previous_clip_position_t.xy / previous_clip_position_t.w; + // These motion vectors are used as offsets to UV positions and are stored + // in the range -1,1 to allow offsetting from the one corner to the + // diagonally-opposite corner in UV coordinates, in either direction. + // A difference between diagonally-opposite corners of clip space is in the + // range -2,2, so this needs to be scaled by 0.5. And the V direction goes + // down where clip space y goes up, so y needs to be flipped. + out.motion_vector = (clip_position - previous_clip_position) * vec2(0.5, -0.5); +#endif // MOTION_VECTOR_PREPASS + +#ifdef DEFERRED_PREPASS + // There isn't any material info available for this default prepass shader so we are just writing  + // emissive magenta out to the deferred gbuffer to be rendered by the first deferred lighting pass layer. + // This is here so if the default prepass fragment is used for deferred magenta will be rendered, and also + // as an example to show that a user could write to the deferred gbuffer if they were to start from this shader. + out.deferred = vec4(0u, bevy_pbr::rgb9e5::vec3_to_rgb9e5_(vec3(1.0, 0.0, 1.0)), 0u, 0u); + out.deferred_lighting_pass_id = 1u; +#endif + + return out; +} +#endif // PREPASS_FRAGMENT diff --git a/crates/libmarathon/src/render/pbr/prepass/prepass_bindings.rs b/crates/libmarathon/src/render/pbr/prepass/prepass_bindings.rs new file mode 100644 index 0000000..f3b9ca4 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/prepass/prepass_bindings.rs @@ -0,0 +1,75 @@ +use crate::render::prepass::ViewPrepassTextures; +use crate::render::render_resource::{ + binding_types::{ + texture_2d, texture_2d_multisampled, texture_depth_2d, texture_depth_2d_multisampled, + }, + BindGroupLayoutEntryBuilder, TextureAspect, TextureSampleType, TextureView, + TextureViewDescriptor, +}; +use bevy_utils::default; + +use crate::render::pbr::MeshPipelineViewLayoutKey; + +pub fn get_bind_group_layout_entries( + layout_key: MeshPipelineViewLayoutKey, +) -> [Option; 4] { + let mut entries: [Option; 4] = [None; 4]; + + let multisampled = layout_key.contains(MeshPipelineViewLayoutKey::MULTISAMPLED); + + if layout_key.contains(MeshPipelineViewLayoutKey::DEPTH_PREPASS) { + // Depth texture + entries[0] = if multisampled { + Some(texture_depth_2d_multisampled()) + } else { + Some(texture_depth_2d()) + }; + } + + if layout_key.contains(MeshPipelineViewLayoutKey::NORMAL_PREPASS) { + // Normal texture + entries[1] = if multisampled { + Some(texture_2d_multisampled(TextureSampleType::Float { + filterable: false, + })) + } else { + Some(texture_2d(TextureSampleType::Float { filterable: false })) + }; + } + + if layout_key.contains(MeshPipelineViewLayoutKey::MOTION_VECTOR_PREPASS) { + // Motion Vectors texture + entries[2] = if multisampled { + Some(texture_2d_multisampled(TextureSampleType::Float { + filterable: false, + })) + } else { + Some(texture_2d(TextureSampleType::Float { filterable: false })) + }; + } + + if layout_key.contains(MeshPipelineViewLayoutKey::DEFERRED_PREPASS) { + // Deferred texture + entries[3] = Some(texture_2d(TextureSampleType::Uint)); + } + + entries +} + +pub fn get_bindings(prepass_textures: Option<&ViewPrepassTextures>) -> [Option; 4] { + let depth_desc = TextureViewDescriptor { + label: Some("prepass_depth"), + aspect: TextureAspect::DepthOnly, + ..default() + }; + let depth_view = prepass_textures + .and_then(|x| x.depth.as_ref()) + .map(|texture| texture.texture.texture.create_view(&depth_desc)); + + [ + depth_view, + prepass_textures.and_then(|pt| pt.normal_view().cloned()), + prepass_textures.and_then(|pt| pt.motion_vectors_view().cloned()), + prepass_textures.and_then(|pt| pt.deferred_view().cloned()), + ] +} diff --git a/crates/libmarathon/src/render/pbr/prepass/prepass_bindings.wgsl b/crates/libmarathon/src/render/pbr/prepass/prepass_bindings.wgsl new file mode 100644 index 0000000..141f7d7 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/prepass/prepass_bindings.wgsl @@ -0,0 +1,13 @@ +#define_import_path bevy_pbr::prepass_bindings + +struct PreviousViewUniforms { + view_from_world: mat4x4, + clip_from_world: mat4x4, + clip_from_view: mat4x4, + world_from_clip: mat4x4, + view_from_clip: mat4x4, +} + +@group(0) @binding(2) var previous_view_uniforms: PreviousViewUniforms; + +// Material bindings will be in @group(2) diff --git a/crates/libmarathon/src/render/pbr/prepass/prepass_io.wgsl b/crates/libmarathon/src/render/pbr/prepass/prepass_io.wgsl new file mode 100644 index 0000000..c3c0e55 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/prepass/prepass_io.wgsl @@ -0,0 +1,100 @@ +#define_import_path bevy_pbr::prepass_io + +// Most of these attributes are not used in the default prepass fragment shader, but they are still needed so we can +// pass them to custom prepass shaders like pbr_prepass.wgsl. +struct Vertex { + @builtin(instance_index) instance_index: u32, + @location(0) position: vec3, + +#ifdef VERTEX_UVS_A + @location(1) uv: vec2, +#endif + +#ifdef VERTEX_UVS_B + @location(2) uv_b: vec2, +#endif + +#ifdef NORMAL_PREPASS_OR_DEFERRED_PREPASS +#ifdef VERTEX_NORMALS + @location(3) normal: vec3, +#endif +#ifdef VERTEX_TANGENTS + @location(4) tangent: vec4, +#endif +#endif // NORMAL_PREPASS_OR_DEFERRED_PREPASS + +#ifdef SKINNED + @location(5) joint_indices: vec4, + @location(6) joint_weights: vec4, +#endif + +#ifdef VERTEX_COLORS + @location(7) color: vec4, +#endif + +#ifdef MORPH_TARGETS + @builtin(vertex_index) index: u32, +#endif // MORPH_TARGETS +} + +struct VertexOutput { + // This is `clip position` when the struct is used as a vertex stage output + // and `frag coord` when used as a fragment stage input + @builtin(position) position: vec4, + +#ifdef VERTEX_UVS_A + @location(0) uv: vec2, +#endif + +#ifdef VERTEX_UVS_B + @location(1) uv_b: vec2, +#endif + +#ifdef NORMAL_PREPASS_OR_DEFERRED_PREPASS + @location(2) world_normal: vec3, +#ifdef VERTEX_TANGENTS + @location(3) world_tangent: vec4, +#endif +#endif // NORMAL_PREPASS_OR_DEFERRED_PREPASS + + @location(4) world_position: vec4, +#ifdef MOTION_VECTOR_PREPASS + @location(5) previous_world_position: vec4, +#endif + +#ifdef UNCLIPPED_DEPTH_ORTHO_EMULATION + @location(6) unclipped_depth: f32, +#endif // UNCLIPPED_DEPTH_ORTHO_EMULATION +#ifdef VERTEX_OUTPUT_INSTANCE_INDEX + @location(7) instance_index: u32, +#endif + +#ifdef VERTEX_COLORS + @location(8) color: vec4, +#endif + +#ifdef VISIBILITY_RANGE_DITHER + @location(9) @interpolate(flat) visibility_range_dither: i32, +#endif // VISIBILITY_RANGE_DITHER +} + +#ifdef PREPASS_FRAGMENT +struct FragmentOutput { +#ifdef NORMAL_PREPASS + @location(0) normal: vec4, +#endif + +#ifdef MOTION_VECTOR_PREPASS + @location(1) motion_vector: vec2, +#endif + +#ifdef DEFERRED_PREPASS + @location(2) deferred: vec4, + @location(3) deferred_lighting_pass_id: u32, +#endif + +#ifdef UNCLIPPED_DEPTH_ORTHO_EMULATION + @builtin(frag_depth) frag_depth: f32, +#endif // UNCLIPPED_DEPTH_ORTHO_EMULATION +} +#endif //PREPASS_FRAGMENT diff --git a/crates/libmarathon/src/render/pbr/prepass/prepass_utils.wgsl b/crates/libmarathon/src/render/pbr/prepass/prepass_utils.wgsl new file mode 100644 index 0000000..42f403c --- /dev/null +++ b/crates/libmarathon/src/render/pbr/prepass/prepass_utils.wgsl @@ -0,0 +1,35 @@ +#define_import_path bevy_pbr::prepass_utils + +#import bevy_pbr::mesh_view_bindings as view_bindings + +#ifdef DEPTH_PREPASS +fn prepass_depth(frag_coord: vec4, sample_index: u32) -> f32 { +#ifdef MULTISAMPLED + return textureLoad(view_bindings::depth_prepass_texture, vec2(frag_coord.xy), i32(sample_index)); +#else // MULTISAMPLED + return textureLoad(view_bindings::depth_prepass_texture, vec2(frag_coord.xy), 0); +#endif // MULTISAMPLED +} +#endif // DEPTH_PREPASS + +#ifdef NORMAL_PREPASS +fn prepass_normal(frag_coord: vec4, sample_index: u32) -> vec3 { +#ifdef MULTISAMPLED + let normal_sample = textureLoad(view_bindings::normal_prepass_texture, vec2(frag_coord.xy), i32(sample_index)); +#else + let normal_sample = textureLoad(view_bindings::normal_prepass_texture, vec2(frag_coord.xy), 0); +#endif // MULTISAMPLED + return normalize(normal_sample.xyz * 2.0 - vec3(1.0)); +} +#endif // NORMAL_PREPASS + +#ifdef MOTION_VECTOR_PREPASS +fn prepass_motion_vector(frag_coord: vec4, sample_index: u32) -> vec2 { +#ifdef MULTISAMPLED + let motion_vector_sample = textureLoad(view_bindings::motion_vector_prepass_texture, vec2(frag_coord.xy), i32(sample_index)); +#else + let motion_vector_sample = textureLoad(view_bindings::motion_vector_prepass_texture, vec2(frag_coord.xy), 0); +#endif + return motion_vector_sample.rg; +} +#endif // MOTION_VECTOR_PREPASS diff --git a/crates/libmarathon/src/render/pbr/render/build_indirect_params.wgsl b/crates/libmarathon/src/render/pbr/render/build_indirect_params.wgsl new file mode 100644 index 0000000..5ca6d4c --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/build_indirect_params.wgsl @@ -0,0 +1,142 @@ +// Builds GPU indirect draw parameters from metadata. +// +// This only runs when indirect drawing is enabled. It takes the output of +// `mesh_preprocess.wgsl` and creates indirect parameters for the GPU. +// +// This shader runs separately for indexed and non-indexed meshes. Unlike +// `mesh_preprocess.wgsl`, which runs one instance per mesh *instance*, one +// instance of this shader corresponds to a single *batch* which could contain +// arbitrarily many instances of a single mesh. + +#import bevy_pbr::mesh_preprocess_types::{ + IndirectBatchSet, + IndirectParametersIndexed, + IndirectParametersNonIndexed, + IndirectParametersCpuMetadata, + IndirectParametersGpuMetadata, + MeshInput +} + +// The data for each mesh that the CPU supplied to the GPU. +@group(0) @binding(0) var current_input: array; + +// Data that we use to generate the indirect parameters. +// +// The `mesh_preprocess.wgsl` shader emits these. +@group(0) @binding(1) var indirect_parameters_cpu_metadata: + array; + +@group(0) @binding(2) var indirect_parameters_gpu_metadata: + array; + +// Information about each batch set. +// +// A *batch set* is a set of meshes that might be multi-drawn together. +@group(0) @binding(3) var indirect_batch_sets: array; + +#ifdef INDEXED +// The buffer of indirect draw parameters that we generate, and that the GPU +// reads to issue the draws. +// +// This buffer is for indexed meshes. +@group(0) @binding(4) var indirect_parameters: + array; +#else // INDEXED +// The buffer of indirect draw parameters that we generate, and that the GPU +// reads to issue the draws. +// +// This buffer is for non-indexed meshes. +@group(0) @binding(4) var indirect_parameters: + array; +#endif // INDEXED + +@compute +@workgroup_size(64) +fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { + // Figure out our instance index (i.e. batch index). If this thread doesn't + // correspond to any index, bail. + let instance_index = global_invocation_id.x; + if (instance_index >= arrayLength(&indirect_parameters_cpu_metadata)) { + return; + } + + // Unpack the metadata for this batch. + let base_output_index = indirect_parameters_cpu_metadata[instance_index].base_output_index; + let batch_set_index = indirect_parameters_cpu_metadata[instance_index].batch_set_index; + let mesh_index = indirect_parameters_gpu_metadata[instance_index].mesh_index; + + // If we aren't using `multi_draw_indirect_count`, we have a 1:1 fixed + // assignment of batches to slots in the indirect parameters buffer, so we + // can just use the instance index as the index of our indirect parameters. + let early_instance_count = + indirect_parameters_gpu_metadata[instance_index].early_instance_count; + let late_instance_count = indirect_parameters_gpu_metadata[instance_index].late_instance_count; + + // If in the early phase, we draw only the early meshes. If in the late + // phase, we draw only the late meshes. If in the main phase, draw all the + // meshes. +#ifdef EARLY_PHASE + let instance_count = early_instance_count; +#else // EARLY_PHASE +#ifdef LATE_PHASE + let instance_count = late_instance_count; +#else // LATE_PHASE + let instance_count = early_instance_count + late_instance_count; +#endif // LATE_PHASE +#endif // EARLY_PHASE + + var indirect_parameters_index = instance_index; + + // If the current hardware and driver support `multi_draw_indirect_count`, + // dynamically reserve an index for the indirect parameters we're to + // generate. +#ifdef MULTI_DRAW_INDIRECT_COUNT_SUPPORTED + // If this batch belongs to a batch set, then allocate space for the + // indirect commands in that batch set. + if (batch_set_index != 0xffffffffu) { + // Bail out now if there are no instances. Note that we can only bail if + // we're in a batch set. That's because only batch sets are drawn using + // `multi_draw_indirect_count`. If we aren't using + // `multi_draw_indirect_count`, then we need to continue in order to + // zero out the instance count; otherwise, it'll have garbage data in + // it. + if (instance_count == 0u) { + return; + } + + let indirect_parameters_base = + indirect_batch_sets[batch_set_index].indirect_parameters_base; + let indirect_parameters_offset = + atomicAdd(&indirect_batch_sets[batch_set_index].indirect_parameters_count, 1u); + + indirect_parameters_index = indirect_parameters_base + indirect_parameters_offset; + } +#endif // MULTI_DRAW_INDIRECT_COUNT_SUPPORTED + + // Build up the indirect parameters. The structures for indexed and + // non-indexed meshes are slightly different. + + indirect_parameters[indirect_parameters_index].instance_count = instance_count; + +#ifdef LATE_PHASE + // The late mesh instances are stored after the early mesh instances, so we + // offset the output index by the number of early mesh instances. + indirect_parameters[indirect_parameters_index].first_instance = + base_output_index + early_instance_count; +#else // LATE_PHASE + indirect_parameters[indirect_parameters_index].first_instance = base_output_index; +#endif // LATE_PHASE + + indirect_parameters[indirect_parameters_index].base_vertex = + current_input[mesh_index].first_vertex_index; + +#ifdef INDEXED + indirect_parameters[indirect_parameters_index].index_count = + current_input[mesh_index].index_count; + indirect_parameters[indirect_parameters_index].first_index = + current_input[mesh_index].first_index_index; +#else // INDEXED + indirect_parameters[indirect_parameters_index].vertex_count = + current_input[mesh_index].index_count; +#endif // INDEXED +} diff --git a/crates/libmarathon/src/render/pbr/render/clustered_forward.wgsl b/crates/libmarathon/src/render/pbr/render/clustered_forward.wgsl new file mode 100644 index 0000000..aa3fb4f --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/clustered_forward.wgsl @@ -0,0 +1,193 @@ +#define_import_path bevy_pbr::clustered_forward + +#import bevy_pbr::{ + mesh_view_bindings as bindings, + utils::rand_f, +} + +#import bevy_render::{ + color_operations::hsv_to_rgb, + maths::PI_2, +} + +// Offsets within the `cluster_offsets_and_counts` buffer for a single cluster. +// +// These offsets must be monotonically nondecreasing. That is, indices are +// always sorted into the following order: point lights, spot lights, reflection +// probes, irradiance volumes. +struct ClusterableObjectIndexRanges { + // The offset of the index of the first point light. + first_point_light_index_offset: u32, + // The offset of the index of the first spot light, which also terminates + // the list of point lights. + first_spot_light_index_offset: u32, + // The offset of the index of the first reflection probe, which also + // terminates the list of spot lights. + first_reflection_probe_index_offset: u32, + // The offset of the index of the first irradiance volumes, which also + // terminates the list of reflection probes. + first_irradiance_volume_index_offset: u32, + first_decal_offset: u32, + // One past the offset of the index of the final clusterable object for this + // cluster. + last_clusterable_object_index_offset: u32, +} + +// NOTE: Keep in sync with bevy_pbr/src/light.rs +fn view_z_to_z_slice(view_z: f32, is_orthographic: bool) -> u32 { + var z_slice: u32 = 0u; + if is_orthographic { + // NOTE: view_z is correct in the orthographic case + z_slice = u32(floor((view_z - bindings::lights.cluster_factors.z) * bindings::lights.cluster_factors.w)); + } else { + // NOTE: had to use -view_z to make it positive else log(negative) is nan + z_slice = u32(log(-view_z) * bindings::lights.cluster_factors.z - bindings::lights.cluster_factors.w + 1.0); + } + // NOTE: We use min as we may limit the far z plane used for clustering to be closer than + // the furthest thing being drawn. This means that we need to limit to the maximum cluster. + return min(z_slice, bindings::lights.cluster_dimensions.z - 1u); +} + +fn fragment_cluster_index(frag_coord: vec2, view_z: f32, is_orthographic: bool) -> u32 { + let xy = vec2(floor((frag_coord - bindings::view.viewport.xy) * bindings::lights.cluster_factors.xy)); + let z_slice = view_z_to_z_slice(view_z, is_orthographic); + // NOTE: Restricting cluster index to avoid undefined behavior when accessing uniform buffer + // arrays based on the cluster index. + return min( + (xy.y * bindings::lights.cluster_dimensions.x + xy.x) * bindings::lights.cluster_dimensions.z + z_slice, + bindings::lights.cluster_dimensions.w - 1u + ); +} + +// this must match CLUSTER_COUNT_SIZE in light.rs +const CLUSTER_COUNT_SIZE = 9u; + +// Returns the indices of clusterable objects belonging to the given cluster. +// +// Note that if fewer than 3 SSBO bindings are available (in WebGL 2, +// primarily), light probes aren't clustered, and therefore both light probe +// index ranges will be empty. +fn unpack_clusterable_object_index_ranges(cluster_index: u32) -> ClusterableObjectIndexRanges { +#if AVAILABLE_STORAGE_BUFFER_BINDINGS >= 3 + + let offset_and_counts_a = bindings::cluster_offsets_and_counts.data[cluster_index][0]; + let offset_and_counts_b = bindings::cluster_offsets_and_counts.data[cluster_index][1]; + + // Sum up the counts to produce the range brackets. + // + // We could have stored the range brackets in `cluster_offsets_and_counts` + // directly, but doing it this way makes the logic in this path more + // consistent with the WebGL 2 path below. + let point_light_offset = offset_and_counts_a.x; + let spot_light_offset = point_light_offset + offset_and_counts_a.y; + let reflection_probe_offset = spot_light_offset + offset_and_counts_a.z; + let irradiance_volume_offset = reflection_probe_offset + offset_and_counts_a.w; + let decal_offset = irradiance_volume_offset + offset_and_counts_b.x; + let last_clusterable_offset = decal_offset + offset_and_counts_b.y; + return ClusterableObjectIndexRanges( + point_light_offset, + spot_light_offset, + reflection_probe_offset, + irradiance_volume_offset, + decal_offset, + last_clusterable_offset + ); + +#else // AVAILABLE_STORAGE_BUFFER_BINDINGS >= 3 + + let raw_offset_and_counts = bindings::cluster_offsets_and_counts.data[cluster_index >> 2u][cluster_index & ((1u << 2u) - 1u)]; + // [ 31 .. 18 | 17 .. 9 | 8 .. 0 ] + // [ offset | point light count | spot light count ] + let offset_and_counts = vec3( + (raw_offset_and_counts >> (CLUSTER_COUNT_SIZE * 2u)) & ((1u << (32u - (CLUSTER_COUNT_SIZE * 2u))) - 1u), + (raw_offset_and_counts >> CLUSTER_COUNT_SIZE) & ((1u << CLUSTER_COUNT_SIZE) - 1u), + raw_offset_and_counts & ((1u << CLUSTER_COUNT_SIZE) - 1u), + ); + + // We don't cluster reflection probes or irradiance volumes on this + // platform, as there's no room in the UBO. Thus, those offset ranges + // (corresponding to `offset_d` and `offset_e` above) are empty and are + // simply copies of `offset_c`. + + let offset_a = offset_and_counts.x; + let offset_b = offset_a + offset_and_counts.y; + let offset_c = offset_b + offset_and_counts.z; + + return ClusterableObjectIndexRanges(offset_a, offset_b, offset_c, offset_c, offset_c, offset_c); + +#endif // AVAILABLE_STORAGE_BUFFER_BINDINGS >= 3 +} + +// Returns the index of the clusterable object at the given offset. +// +// Note that, in the case of a light probe, the index refers to an element in +// one of the two `light_probes` sublists, not the `clusterable_objects` list. +fn get_clusterable_object_id(index: u32) -> u32 { +#if AVAILABLE_STORAGE_BUFFER_BINDINGS >= 3 + return bindings::clusterable_object_index_lists.data[index]; +#else + // The index is correct but in clusterable_object_index_lists we pack 4 u8s into a u32 + // This means the index into clusterable_object_index_lists is index / 4 + let indices = bindings::clusterable_object_index_lists.data[index >> 4u][(index >> 2u) & + ((1u << 2u) - 1u)]; + // And index % 4 gives the sub-index of the u8 within the u32 so we shift by 8 * sub-index + return (indices >> (8u * (index & ((1u << 2u) - 1u)))) & ((1u << 8u) - 1u); +#endif +} + +fn cluster_debug_visualization( + input_color: vec4, + view_z: f32, + is_orthographic: bool, + clusterable_object_index_ranges: ClusterableObjectIndexRanges, + cluster_index: u32, +) -> vec4 { + var output_color = input_color; + + // Cluster allocation debug (using 'over' alpha blending) +#ifdef CLUSTERED_FORWARD_DEBUG_Z_SLICES + // NOTE: This debug mode visualizes the z-slices + let cluster_overlay_alpha = 0.1; + var z_slice: u32 = view_z_to_z_slice(view_z, is_orthographic); + // A hack to make the colors alternate a bit more + if (z_slice & 1u) == 1u { + z_slice = z_slice + bindings::lights.cluster_dimensions.z / 2u; + } + let slice_color_hsv = vec3( + f32(z_slice) / f32(bindings::lights.cluster_dimensions.z + 1u) * PI_2, + 1.0, + 0.5 + ); + let slice_color = hsv_to_rgb(slice_color_hsv); + output_color = vec4( + (1.0 - cluster_overlay_alpha) * output_color.rgb + cluster_overlay_alpha * slice_color, + output_color.a + ); +#endif // CLUSTERED_FORWARD_DEBUG_Z_SLICES +#ifdef CLUSTERED_FORWARD_DEBUG_CLUSTER_COMPLEXITY + // NOTE: This debug mode visualizes the number of clusterable objects within + // the cluster that contains the fragment. It shows a sort of cluster + // complexity measure. + let cluster_overlay_alpha = 0.1; + let max_complexity_per_cluster = 64.0; + let object_count = clusterable_object_index_ranges.first_reflection_probe_index_offset - + clusterable_object_index_ranges.first_point_light_index_offset; + output_color.r = (1.0 - cluster_overlay_alpha) * output_color.r + cluster_overlay_alpha * + smoothstep(0.0, max_complexity_per_cluster, f32(object_count)); + output_color.g = (1.0 - cluster_overlay_alpha) * output_color.g + cluster_overlay_alpha * + (1.0 - smoothstep(0.0, max_complexity_per_cluster, f32(object_count))); +#endif // CLUSTERED_FORWARD_DEBUG_CLUSTER_COMPLEXITY +#ifdef CLUSTERED_FORWARD_DEBUG_CLUSTER_COHERENCY + // NOTE: Visualizes the cluster to which the fragment belongs + let cluster_overlay_alpha = 0.1; + var rng = cluster_index; + let cluster_color_hsv = vec3(rand_f(&rng) * PI_2, 1.0, 0.5); + let cluster_color = hsv_to_rgb(cluster_color_hsv); + output_color = vec4( + (1.0 - cluster_overlay_alpha) * output_color.rgb + cluster_overlay_alpha * cluster_color, + output_color.a + ); +#endif // CLUSTERED_FORWARD_DEBUG_CLUSTER_COHERENCY + + return output_color; +} diff --git a/crates/libmarathon/src/render/pbr/render/fog.rs b/crates/libmarathon/src/render/pbr/render/fog.rs new file mode 100644 index 0000000..d8e31cd --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/fog.rs @@ -0,0 +1,144 @@ +use bevy_app::{App, Plugin}; +use bevy_color::{ColorToComponents, LinearRgba}; +use bevy_ecs::prelude::*; +use bevy_math::{Vec3, Vec4}; +use crate::render::{ + extract_component::ExtractComponentPlugin, + render_resource::{DynamicUniformBuffer, ShaderType}, + renderer::{RenderDevice, RenderQueue}, + view::ExtractedView, + Render, RenderApp, RenderSystems, +}; +use bevy_shader::load_shader_library; + +use crate::render::pbr::{DistanceFog, FogFalloff}; + +/// The GPU-side representation of the fog configuration that's sent as a uniform to the shader +#[derive(Copy, Clone, ShaderType, Default, Debug)] +pub struct GpuFog { + /// Fog color + base_color: Vec4, + /// The color used for the fog where the view direction aligns with directional lights + directional_light_color: Vec4, + /// Allocated differently depending on fog mode. + /// See `mesh_view_types.wgsl` for a detailed explanation + be: Vec3, + /// The exponent applied to the directional light alignment calculation + directional_light_exponent: f32, + /// Allocated differently depending on fog mode. + /// See `mesh_view_types.wgsl` for a detailed explanation + bi: Vec3, + /// Unsigned int representation of the active fog falloff mode + mode: u32, +} + +// Important: These must be kept in sync with `mesh_view_types.wgsl` +const GPU_FOG_MODE_OFF: u32 = 0; +const GPU_FOG_MODE_LINEAR: u32 = 1; +const GPU_FOG_MODE_EXPONENTIAL: u32 = 2; +const GPU_FOG_MODE_EXPONENTIAL_SQUARED: u32 = 3; +const GPU_FOG_MODE_ATMOSPHERIC: u32 = 4; + +/// Metadata for fog +#[derive(Default, Resource)] +pub struct FogMeta { + pub gpu_fogs: DynamicUniformBuffer, +} + +/// Prepares fog metadata and writes the fog-related uniform buffers to the GPU +pub fn prepare_fog( + mut commands: Commands, + render_device: Res, + render_queue: Res, + mut fog_meta: ResMut, + views: Query<(Entity, Option<&DistanceFog>), With>, +) { + let views_iter = views.iter(); + let view_count = views_iter.len(); + let Some(mut writer) = fog_meta + .gpu_fogs + .get_writer(view_count, &render_device, &render_queue) + else { + return; + }; + for (entity, fog) in views_iter { + let gpu_fog = if let Some(fog) = fog { + match &fog.falloff { + FogFalloff::Linear { start, end } => GpuFog { + mode: GPU_FOG_MODE_LINEAR, + base_color: LinearRgba::from(fog.color).to_vec4(), + directional_light_color: LinearRgba::from(fog.directional_light_color) + .to_vec4(), + directional_light_exponent: fog.directional_light_exponent, + be: Vec3::new(*start, *end, 0.0), + ..Default::default() + }, + FogFalloff::Exponential { density } => GpuFog { + mode: GPU_FOG_MODE_EXPONENTIAL, + base_color: LinearRgba::from(fog.color).to_vec4(), + directional_light_color: LinearRgba::from(fog.directional_light_color) + .to_vec4(), + directional_light_exponent: fog.directional_light_exponent, + be: Vec3::new(*density, 0.0, 0.0), + ..Default::default() + }, + FogFalloff::ExponentialSquared { density } => GpuFog { + mode: GPU_FOG_MODE_EXPONENTIAL_SQUARED, + base_color: LinearRgba::from(fog.color).to_vec4(), + directional_light_color: LinearRgba::from(fog.directional_light_color) + .to_vec4(), + directional_light_exponent: fog.directional_light_exponent, + be: Vec3::new(*density, 0.0, 0.0), + ..Default::default() + }, + FogFalloff::Atmospheric { + extinction, + inscattering, + } => GpuFog { + mode: GPU_FOG_MODE_ATMOSPHERIC, + base_color: LinearRgba::from(fog.color).to_vec4(), + directional_light_color: LinearRgba::from(fog.directional_light_color) + .to_vec4(), + directional_light_exponent: fog.directional_light_exponent, + be: *extinction, + bi: *inscattering, + }, + } + } else { + // If no fog is added to a camera, by default it's off + GpuFog { + mode: GPU_FOG_MODE_OFF, + ..Default::default() + } + }; + + // This is later read by `SetMeshViewBindGroup` + commands.entity(entity).insert(ViewFogUniformOffset { + offset: writer.write(&gpu_fog), + }); + } +} + +/// Inserted on each `Entity` with an `ExtractedView` to keep track of its offset +/// in the `gpu_fogs` `DynamicUniformBuffer` within `FogMeta` +#[derive(Component)] +pub struct ViewFogUniformOffset { + pub offset: u32, +} + +/// A plugin that consolidates fog extraction, preparation and related resources/assets +pub struct FogPlugin; + +impl Plugin for FogPlugin { + fn build(&self, app: &mut App) { + load_shader_library!(app, "fog.wgsl"); + + app.add_plugins(ExtractComponentPlugin::::default()); + + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + render_app + .init_resource::() + .add_systems(Render, prepare_fog.in_set(RenderSystems::PrepareResources)); + } + } +} diff --git a/crates/libmarathon/src/render/pbr/render/fog.wgsl b/crates/libmarathon/src/render/pbr/render/fog.wgsl new file mode 100644 index 0000000..a9e28ae --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/fog.wgsl @@ -0,0 +1,79 @@ +#define_import_path bevy_pbr::fog + +#import bevy_pbr::{ + mesh_view_bindings::fog, + mesh_view_types::Fog, +} + +// Fog formulas adapted from: +// https://learn.microsoft.com/en-us/windows/win32/direct3d9/fog-formulas +// https://catlikecoding.com/unity/tutorials/rendering/part-14/ +// https://iquilezles.org/articles/fog/ (Atmospheric Fog and Scattering) + +fn scattering_adjusted_fog_color( + fog_params: Fog, + scattering: vec3, +) -> vec4 { + if (fog_params.directional_light_color.a > 0.0) { + return vec4( + fog_params.base_color.rgb + + scattering * fog_params.directional_light_color.rgb * fog_params.directional_light_color.a, + fog_params.base_color.a, + ); + } else { + return fog_params.base_color; + } +} + +fn linear_fog( + fog_params: Fog, + input_color: vec4, + distance: f32, + scattering: vec3, +) -> vec4 { + var fog_color = scattering_adjusted_fog_color(fog_params, scattering); + let start = fog_params.be.x; + let end = fog_params.be.y; + fog_color.a *= 1.0 - clamp((end - distance) / (end - start), 0.0, 1.0); + return vec4(mix(input_color.rgb, fog_color.rgb, fog_color.a), input_color.a); +} + +fn exponential_fog( + fog_params: Fog, + input_color: vec4, + distance: f32, + scattering: vec3, +) -> vec4 { + var fog_color = scattering_adjusted_fog_color(fog_params, scattering); + let density = fog_params.be.x; + fog_color.a *= 1.0 - 1.0 / exp(distance * density); + return vec4(mix(input_color.rgb, fog_color.rgb, fog_color.a), input_color.a); +} + +fn exponential_squared_fog( + fog_params: Fog, + input_color: vec4, + distance: f32, + scattering: vec3, +) -> vec4 { + var fog_color = scattering_adjusted_fog_color(fog_params, scattering); + let distance_times_density = distance * fog_params.be.x; + fog_color.a *= 1.0 - 1.0 / exp(distance_times_density * distance_times_density); + return vec4(mix(input_color.rgb, fog_color.rgb, fog_color.a), input_color.a); +} + +fn atmospheric_fog( + fog_params: Fog, + input_color: vec4, + distance: f32, + scattering: vec3, +) -> vec4 { + var fog_color = scattering_adjusted_fog_color(fog_params, scattering); + let extinction_factor = 1.0 - 1.0 / exp(distance * fog_params.be); + let inscattering_factor = 1.0 - 1.0 / exp(distance * fog_params.bi); + return vec4( + input_color.rgb * (1.0 - extinction_factor * fog_color.a) + + fog_color.rgb * inscattering_factor * fog_color.a, + input_color.a + ); +} diff --git a/crates/libmarathon/src/render/pbr/render/forward_io.wgsl b/crates/libmarathon/src/render/pbr/render/forward_io.wgsl new file mode 100644 index 0000000..99f2ecc --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/forward_io.wgsl @@ -0,0 +1,60 @@ +#define_import_path bevy_pbr::forward_io + +struct Vertex { + @builtin(instance_index) instance_index: u32, +#ifdef VERTEX_POSITIONS + @location(0) position: vec3, +#endif +#ifdef VERTEX_NORMALS + @location(1) normal: vec3, +#endif +#ifdef VERTEX_UVS_A + @location(2) uv: vec2, +#endif +#ifdef VERTEX_UVS_B + @location(3) uv_b: vec2, +#endif +#ifdef VERTEX_TANGENTS + @location(4) tangent: vec4, +#endif +#ifdef VERTEX_COLORS + @location(5) color: vec4, +#endif +#ifdef SKINNED + @location(6) joint_indices: vec4, + @location(7) joint_weights: vec4, +#endif +#ifdef MORPH_TARGETS + @builtin(vertex_index) index: u32, +#endif +}; + +struct VertexOutput { + // This is `clip position` when the struct is used as a vertex stage output + // and `frag coord` when used as a fragment stage input + @builtin(position) position: vec4, + @location(0) world_position: vec4, + @location(1) world_normal: vec3, +#ifdef VERTEX_UVS_A + @location(2) uv: vec2, +#endif +#ifdef VERTEX_UVS_B + @location(3) uv_b: vec2, +#endif +#ifdef VERTEX_TANGENTS + @location(4) world_tangent: vec4, +#endif +#ifdef VERTEX_COLORS + @location(5) color: vec4, +#endif +#ifdef VERTEX_OUTPUT_INSTANCE_INDEX + @location(6) @interpolate(flat) instance_index: u32, +#endif +#ifdef VISIBILITY_RANGE_DITHER + @location(7) @interpolate(flat) visibility_range_dither: i32, +#endif +} + +struct FragmentOutput { + @location(0) color: vec4, +} diff --git a/crates/libmarathon/src/render/pbr/render/gpu_preprocess.rs b/crates/libmarathon/src/render/pbr/render/gpu_preprocess.rs new file mode 100644 index 0000000..3cbaa4a --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/gpu_preprocess.rs @@ -0,0 +1,2704 @@ +//! GPU mesh preprocessing. +//! +//! This is an optional pass that uses a compute shader to reduce the amount of +//! data that has to be transferred from the CPU to the GPU. When enabled, +//! instead of transferring [`MeshUniform`]s to the GPU, we transfer the smaller +//! [`MeshInputUniform`]s instead and use the GPU to calculate the remaining +//! derived fields in [`MeshUniform`]. + +use core::num::{NonZero, NonZeroU64}; + +use bevy_app::{App, Plugin}; +use bevy_asset::{embedded_asset, load_embedded_asset, Handle}; +use crate::render::{ + core_3d::graph::{Core3d, Node3d}, + experimental::mip_generation::ViewDepthPyramid, + prepass::{DepthPrepass, PreviousViewData, PreviousViewUniformOffset, PreviousViewUniforms}, +}; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::{ + component::Component, + entity::Entity, + prelude::resource_exists, + query::{Has, Or, QueryState, With, Without}, + resource::Resource, + schedule::IntoScheduleConfigs as _, + system::{lifetimeless::Read, Commands, Query, Res, ResMut}, + world::{FromWorld, World}, +}; +use crate::render::{ + batching::gpu_preprocessing::{ + BatchedInstanceBuffers, GpuOcclusionCullingWorkItemBuffers, GpuPreprocessingMode, + GpuPreprocessingSupport, IndirectBatchSet, IndirectParametersBuffers, + IndirectParametersCpuMetadata, IndirectParametersGpuMetadata, IndirectParametersIndexed, + IndirectParametersNonIndexed, LatePreprocessWorkItemIndirectParameters, PreprocessWorkItem, + PreprocessWorkItemBuffers, UntypedPhaseBatchedInstanceBuffers, + UntypedPhaseIndirectParametersBuffers, + }, + diagnostic::RecordDiagnostics, + experimental::occlusion_culling::OcclusionCulling, + render_graph::{Node, NodeRunError, RenderGraphContext, RenderGraphExt}, + render_resource::{ + binding_types::{storage_buffer, storage_buffer_read_only, texture_2d, uniform_buffer}, + BindGroup, BindGroupEntries, BindGroupLayout, BindingResource, Buffer, BufferBinding, + CachedComputePipelineId, ComputePassDescriptor, ComputePipelineDescriptor, + DynamicBindGroupLayoutEntries, PipelineCache, PushConstantRange, RawBufferVec, + ShaderStages, ShaderType, SpecializedComputePipeline, SpecializedComputePipelines, + TextureSampleType, UninitBufferVec, + }, + renderer::{RenderContext, RenderDevice, RenderQueue}, + settings::WgpuFeatures, + view::{ExtractedView, NoIndirectDrawing, ViewUniform, ViewUniformOffset, ViewUniforms}, + Render, RenderApp, RenderSystems, +}; +use bevy_shader::Shader; +use bevy_utils::{default, TypeIdMap}; +use bitflags::bitflags; +use smallvec::{smallvec, SmallVec}; +use tracing::warn; + +use crate::render::pbr::{ + graph::NodePbr, MeshCullingData, MeshCullingDataBuffer, MeshInputUniform, MeshUniform, +}; + +use super::{ShadowView, ViewLightEntities}; + +/// The GPU workgroup size. +const WORKGROUP_SIZE: usize = 64; + +/// A plugin that builds mesh uniforms on GPU. +/// +/// This will only be added if the platform supports compute shaders (e.g. not +/// on WebGL 2). +pub struct GpuMeshPreprocessPlugin { + /// Whether we're building [`MeshUniform`]s on GPU. + /// + /// This requires compute shader support and so will be forcibly disabled if + /// the platform doesn't support those. + pub use_gpu_instance_buffer_builder: bool, +} + +/// The render node that clears out the GPU-side indirect metadata buffers. +/// +/// This is only used when indirect drawing is enabled. +#[derive(Default)] +pub struct ClearIndirectParametersMetadataNode; + +/// The render node for the first mesh preprocessing pass. +/// +/// This pass runs a compute shader to cull meshes outside the view frustum (if +/// that wasn't done by the CPU), cull meshes that weren't visible last frame +/// (if occlusion culling is on), transform them, and, if indirect drawing is +/// on, populate indirect draw parameter metadata for the subsequent +/// [`EarlyPrepassBuildIndirectParametersNode`]. +pub struct EarlyGpuPreprocessNode { + view_query: QueryState< + ( + Read, + Option>, + Option>, + Has, + Has, + ), + Without, + >, + main_view_query: QueryState>, +} + +/// The render node for the second mesh preprocessing pass. +/// +/// This pass runs a compute shader to cull meshes outside the view frustum (if +/// that wasn't done by the CPU), cull meshes that were neither visible last +/// frame nor visible this frame (if occlusion culling is on), transform them, +/// and, if indirect drawing is on, populate the indirect draw parameter +/// metadata for the subsequent [`LatePrepassBuildIndirectParametersNode`]. +pub struct LateGpuPreprocessNode { + view_query: QueryState< + ( + Read, + Read, + Read, + ), + ( + Without, + Without, + With, + With, + ), + >, +} + +/// The render node for the part of the indirect parameter building pass that +/// draws the meshes visible from the previous frame. +/// +/// This node runs a compute shader on the output of the +/// [`EarlyGpuPreprocessNode`] in order to transform the +/// [`IndirectParametersGpuMetadata`] into properly-formatted +/// [`IndirectParametersIndexed`] and [`IndirectParametersNonIndexed`]. +pub struct EarlyPrepassBuildIndirectParametersNode { + view_query: QueryState< + Read, + ( + Without, + Without, + Or<(With, With)>, + ), + >, +} + +/// The render node for the part of the indirect parameter building pass that +/// draws the meshes that are potentially visible on this frame but weren't +/// visible on the previous frame. +/// +/// This node runs a compute shader on the output of the +/// [`LateGpuPreprocessNode`] in order to transform the +/// [`IndirectParametersGpuMetadata`] into properly-formatted +/// [`IndirectParametersIndexed`] and [`IndirectParametersNonIndexed`]. +pub struct LatePrepassBuildIndirectParametersNode { + view_query: QueryState< + Read, + ( + Without, + Without, + Or<(With, With)>, + With, + ), + >, +} + +/// The render node for the part of the indirect parameter building pass that +/// draws all meshes, both those that are newly-visible on this frame and those +/// that were visible last frame. +/// +/// This node runs a compute shader on the output of the +/// [`EarlyGpuPreprocessNode`] and [`LateGpuPreprocessNode`] in order to +/// transform the [`IndirectParametersGpuMetadata`] into properly-formatted +/// [`IndirectParametersIndexed`] and [`IndirectParametersNonIndexed`]. +pub struct MainBuildIndirectParametersNode { + view_query: QueryState< + Read, + (Without, Without), + >, +} + +/// The compute shader pipelines for the GPU mesh preprocessing and indirect +/// parameter building passes. +#[derive(Resource)] +pub struct PreprocessPipelines { + /// The pipeline used for CPU culling. This pipeline doesn't populate + /// indirect parameter metadata. + pub direct_preprocess: PreprocessPipeline, + /// The pipeline used for mesh preprocessing when GPU frustum culling is in + /// use, but occlusion culling isn't. + /// + /// This pipeline populates indirect parameter metadata. + pub gpu_frustum_culling_preprocess: PreprocessPipeline, + /// The pipeline used for the first phase of occlusion culling. + /// + /// This pipeline culls, transforms meshes, and populates indirect parameter + /// metadata. + pub early_gpu_occlusion_culling_preprocess: PreprocessPipeline, + /// The pipeline used for the second phase of occlusion culling. + /// + /// This pipeline culls, transforms meshes, and populates indirect parameter + /// metadata. + pub late_gpu_occlusion_culling_preprocess: PreprocessPipeline, + /// The pipeline that builds indirect draw parameters for indexed meshes, + /// when frustum culling is enabled but occlusion culling *isn't* enabled. + pub gpu_frustum_culling_build_indexed_indirect_params: BuildIndirectParametersPipeline, + /// The pipeline that builds indirect draw parameters for non-indexed + /// meshes, when frustum culling is enabled but occlusion culling *isn't* + /// enabled. + pub gpu_frustum_culling_build_non_indexed_indirect_params: BuildIndirectParametersPipeline, + /// Compute shader pipelines for the early prepass phase that draws meshes + /// visible in the previous frame. + pub early_phase: PreprocessPhasePipelines, + /// Compute shader pipelines for the late prepass phase that draws meshes + /// that weren't visible in the previous frame, but became visible this + /// frame. + pub late_phase: PreprocessPhasePipelines, + /// Compute shader pipelines for the main color phase. + pub main_phase: PreprocessPhasePipelines, +} + +/// Compute shader pipelines for a specific phase: early, late, or main. +/// +/// The distinction between these phases is relevant for occlusion culling. +#[derive(Clone)] +pub struct PreprocessPhasePipelines { + /// The pipeline that resets the indirect draw counts used in + /// `multi_draw_indirect_count` to 0 in preparation for a new pass. + pub reset_indirect_batch_sets: ResetIndirectBatchSetsPipeline, + /// The pipeline used for indexed indirect parameter building. + /// + /// This pipeline converts indirect parameter metadata into indexed indirect + /// parameters. + pub gpu_occlusion_culling_build_indexed_indirect_params: BuildIndirectParametersPipeline, + /// The pipeline used for non-indexed indirect parameter building. + /// + /// This pipeline converts indirect parameter metadata into non-indexed + /// indirect parameters. + pub gpu_occlusion_culling_build_non_indexed_indirect_params: BuildIndirectParametersPipeline, +} + +/// The pipeline for the GPU mesh preprocessing shader. +pub struct PreprocessPipeline { + /// The bind group layout for the compute shader. + pub bind_group_layout: BindGroupLayout, + /// The shader asset handle. + pub shader: Handle, + /// The pipeline ID for the compute shader. + /// + /// This gets filled in `prepare_preprocess_pipelines`. + pub pipeline_id: Option, +} + +/// The pipeline for the batch set count reset shader. +/// +/// This shader resets the indirect batch set count to 0 for each view. It runs +/// in between every phase (early, late, and main). +#[derive(Clone)] +pub struct ResetIndirectBatchSetsPipeline { + /// The bind group layout for the compute shader. + pub bind_group_layout: BindGroupLayout, + /// The shader asset handle. + pub shader: Handle, + /// The pipeline ID for the compute shader. + /// + /// This gets filled in `prepare_preprocess_pipelines`. + pub pipeline_id: Option, +} + +/// The pipeline for the indirect parameter building shader. +#[derive(Clone)] +pub struct BuildIndirectParametersPipeline { + /// The bind group layout for the compute shader. + pub bind_group_layout: BindGroupLayout, + /// The shader asset handle. + pub shader: Handle, + /// The pipeline ID for the compute shader. + /// + /// This gets filled in `prepare_preprocess_pipelines`. + pub pipeline_id: Option, +} + +bitflags! { + /// Specifies variants of the mesh preprocessing shader. + #[derive(Clone, Copy, PartialEq, Eq, Hash)] + pub struct PreprocessPipelineKey: u8 { + /// Whether GPU frustum culling is in use. + /// + /// This `#define`'s `FRUSTUM_CULLING` in the shader. + const FRUSTUM_CULLING = 1; + /// Whether GPU two-phase occlusion culling is in use. + /// + /// This `#define`'s `OCCLUSION_CULLING` in the shader. + const OCCLUSION_CULLING = 2; + /// Whether this is the early phase of GPU two-phase occlusion culling. + /// + /// This `#define`'s `EARLY_PHASE` in the shader. + const EARLY_PHASE = 4; + } + + /// Specifies variants of the indirect parameter building shader. + #[derive(Clone, Copy, PartialEq, Eq, Hash)] + pub struct BuildIndirectParametersPipelineKey: u8 { + /// Whether the indirect parameter building shader is processing indexed + /// meshes (those that have index buffers). + /// + /// This defines `INDEXED` in the shader. + const INDEXED = 1; + /// Whether the GPU and driver supports `multi_draw_indirect_count`. + /// + /// This defines `MULTI_DRAW_INDIRECT_COUNT_SUPPORTED` in the shader. + const MULTI_DRAW_INDIRECT_COUNT_SUPPORTED = 2; + /// Whether GPU two-phase occlusion culling is in use. + /// + /// This `#define`'s `OCCLUSION_CULLING` in the shader. + const OCCLUSION_CULLING = 4; + /// Whether this is the early phase of GPU two-phase occlusion culling. + /// + /// This `#define`'s `EARLY_PHASE` in the shader. + const EARLY_PHASE = 8; + /// Whether this is the late phase of GPU two-phase occlusion culling. + /// + /// This `#define`'s `LATE_PHASE` in the shader. + const LATE_PHASE = 16; + /// Whether this is the phase that runs after the early and late phases, + /// and right before the main drawing logic, when GPU two-phase + /// occlusion culling is in use. + /// + /// This `#define`'s `MAIN_PHASE` in the shader. + const MAIN_PHASE = 32; + } +} + +/// The compute shader bind group for the mesh preprocessing pass for each +/// render phase. +/// +/// This goes on the view. It maps the [`core::any::TypeId`] of a render phase +/// (e.g. [`bevy_core_pipeline::core_3d::Opaque3d`]) to the +/// [`PhasePreprocessBindGroups`] for that phase. +#[derive(Component, Clone, Deref, DerefMut)] +pub struct PreprocessBindGroups(pub TypeIdMap); + +/// The compute shader bind group for the mesh preprocessing step for a single +/// render phase on a single view. +#[derive(Clone)] +pub enum PhasePreprocessBindGroups { + /// The bind group used for the single invocation of the compute shader when + /// indirect drawing is *not* being used. + /// + /// Because direct drawing doesn't require splitting the meshes into indexed + /// and non-indexed meshes, there's only one bind group in this case. + Direct(BindGroup), + + /// The bind groups used for the compute shader when indirect drawing is + /// being used, but occlusion culling isn't being used. + /// + /// Because indirect drawing requires splitting the meshes into indexed and + /// non-indexed meshes, there are two bind groups here. + IndirectFrustumCulling { + /// The bind group for indexed meshes. + indexed: Option, + /// The bind group for non-indexed meshes. + non_indexed: Option, + }, + + /// The bind groups used for the compute shader when indirect drawing is + /// being used, but occlusion culling isn't being used. + /// + /// Because indirect drawing requires splitting the meshes into indexed and + /// non-indexed meshes, and because occlusion culling requires splitting + /// this phase into early and late versions, there are four bind groups + /// here. + IndirectOcclusionCulling { + /// The bind group for indexed meshes during the early mesh + /// preprocessing phase. + early_indexed: Option, + /// The bind group for non-indexed meshes during the early mesh + /// preprocessing phase. + early_non_indexed: Option, + /// The bind group for indexed meshes during the late mesh preprocessing + /// phase. + late_indexed: Option, + /// The bind group for non-indexed meshes during the late mesh + /// preprocessing phase. + late_non_indexed: Option, + }, +} + +/// The bind groups for the compute shaders that reset indirect draw counts and +/// build indirect parameters. +/// +/// There's one set of bind group for each phase. Phases are keyed off their +/// [`core::any::TypeId`]. +#[derive(Resource, Default, Deref, DerefMut)] +pub struct BuildIndirectParametersBindGroups(pub TypeIdMap); + +impl BuildIndirectParametersBindGroups { + /// Creates a new, empty [`BuildIndirectParametersBindGroups`] table. + pub fn new() -> BuildIndirectParametersBindGroups { + Self::default() + } +} + +/// The per-phase set of bind groups for the compute shaders that reset indirect +/// draw counts and build indirect parameters. +pub struct PhaseBuildIndirectParametersBindGroups { + /// The bind group for the `reset_indirect_batch_sets.wgsl` shader, for + /// indexed meshes. + reset_indexed_indirect_batch_sets: Option, + /// The bind group for the `reset_indirect_batch_sets.wgsl` shader, for + /// non-indexed meshes. + reset_non_indexed_indirect_batch_sets: Option, + /// The bind group for the `build_indirect_params.wgsl` shader, for indexed + /// meshes. + build_indexed_indirect: Option, + /// The bind group for the `build_indirect_params.wgsl` shader, for + /// non-indexed meshes. + build_non_indexed_indirect: Option, +} + +/// Stops the `GpuPreprocessNode` attempting to generate the buffer for this view +/// useful to avoid duplicating effort if the bind group is shared between views +#[derive(Component, Default)] +pub struct SkipGpuPreprocess; + +impl Plugin for GpuMeshPreprocessPlugin { + fn build(&self, app: &mut App) { + embedded_asset!(app, "mesh_preprocess.wgsl"); + embedded_asset!(app, "reset_indirect_batch_sets.wgsl"); + embedded_asset!(app, "build_indirect_params.wgsl"); + } + + fn finish(&self, app: &mut App) { + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + // This plugin does nothing if GPU instance buffer building isn't in + // use. + let gpu_preprocessing_support = render_app.world().resource::(); + if !self.use_gpu_instance_buffer_builder || !gpu_preprocessing_support.is_available() { + return; + } + + render_app + .init_resource::() + .init_resource::>() + .init_resource::>() + .init_resource::>() + .add_systems( + Render, + ( + prepare_preprocess_pipelines.in_set(RenderSystems::Prepare), + prepare_preprocess_bind_groups + .run_if(resource_exists::>) + .in_set(RenderSystems::PrepareBindGroups), + write_mesh_culling_data_buffer.in_set(RenderSystems::PrepareResourcesFlush), + ), + ) + .add_render_graph_node::( + Core3d, + NodePbr::ClearIndirectParametersMetadata + ) + .add_render_graph_node::(Core3d, NodePbr::EarlyGpuPreprocess) + .add_render_graph_node::(Core3d, NodePbr::LateGpuPreprocess) + .add_render_graph_node::( + Core3d, + NodePbr::EarlyPrepassBuildIndirectParameters, + ) + .add_render_graph_node::( + Core3d, + NodePbr::LatePrepassBuildIndirectParameters, + ) + .add_render_graph_node::( + Core3d, + NodePbr::MainBuildIndirectParameters, + ) + .add_render_graph_edges( + Core3d, + ( + NodePbr::ClearIndirectParametersMetadata, + NodePbr::EarlyGpuPreprocess, + NodePbr::EarlyPrepassBuildIndirectParameters, + Node3d::EarlyPrepass, + Node3d::EarlyDeferredPrepass, + Node3d::EarlyDownsampleDepth, + NodePbr::LateGpuPreprocess, + NodePbr::LatePrepassBuildIndirectParameters, + Node3d::LatePrepass, + Node3d::LateDeferredPrepass, + NodePbr::MainBuildIndirectParameters, + Node3d::StartMainPass, + ), + ).add_render_graph_edges( + Core3d, + ( + NodePbr::EarlyPrepassBuildIndirectParameters, + NodePbr::EarlyShadowPass, + Node3d::EarlyDownsampleDepth, + ) + ).add_render_graph_edges( + Core3d, + ( + NodePbr::LatePrepassBuildIndirectParameters, + NodePbr::LateShadowPass, + NodePbr::MainBuildIndirectParameters, + ) + ); + } +} + +impl Node for ClearIndirectParametersMetadataNode { + fn run<'w>( + &self, + _: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + world: &'w World, + ) -> Result<(), NodeRunError> { + let Some(indirect_parameters_buffers) = world.get_resource::() + else { + return Ok(()); + }; + + // Clear out each indexed and non-indexed GPU-side buffer. + for phase_indirect_parameters_buffers in indirect_parameters_buffers.values() { + if let Some(indexed_gpu_metadata_buffer) = phase_indirect_parameters_buffers + .indexed + .gpu_metadata_buffer() + { + render_context.command_encoder().clear_buffer( + indexed_gpu_metadata_buffer, + 0, + Some( + phase_indirect_parameters_buffers.indexed.batch_count() as u64 + * size_of::() as u64, + ), + ); + } + + if let Some(non_indexed_gpu_metadata_buffer) = phase_indirect_parameters_buffers + .non_indexed + .gpu_metadata_buffer() + { + render_context.command_encoder().clear_buffer( + non_indexed_gpu_metadata_buffer, + 0, + Some( + phase_indirect_parameters_buffers.non_indexed.batch_count() as u64 + * size_of::() as u64, + ), + ); + } + } + + Ok(()) + } +} + +impl FromWorld for EarlyGpuPreprocessNode { + fn from_world(world: &mut World) -> Self { + Self { + view_query: QueryState::new(world), + main_view_query: QueryState::new(world), + } + } +} + +impl Node for EarlyGpuPreprocessNode { + fn update(&mut self, world: &mut World) { + self.view_query.update_archetypes(world); + self.main_view_query.update_archetypes(world); + } + + fn run<'w>( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + world: &'w World, + ) -> Result<(), NodeRunError> { + let diagnostics = render_context.diagnostic_recorder(); + + // Grab the [`BatchedInstanceBuffers`]. + let batched_instance_buffers = + world.resource::>(); + + let pipeline_cache = world.resource::(); + let preprocess_pipelines = world.resource::(); + + let mut compute_pass = + render_context + .command_encoder() + .begin_compute_pass(&ComputePassDescriptor { + label: Some("early_mesh_preprocessing"), + timestamp_writes: None, + }); + let pass_span = diagnostics.pass_span(&mut compute_pass, "early_mesh_preprocessing"); + + let mut all_views: SmallVec<[_; 8]> = SmallVec::new(); + all_views.push(graph.view_entity()); + if let Ok(shadow_cascade_views) = + self.main_view_query.get_manual(world, graph.view_entity()) + { + all_views.extend(shadow_cascade_views.lights.iter().copied()); + } + + // Run the compute passes. + + for view_entity in all_views { + let Ok(( + view, + bind_groups, + view_uniform_offset, + no_indirect_drawing, + occlusion_culling, + )) = self.view_query.get_manual(world, view_entity) + else { + continue; + }; + + let Some(bind_groups) = bind_groups else { + continue; + }; + let Some(view_uniform_offset) = view_uniform_offset else { + continue; + }; + + // Select the right pipeline, depending on whether GPU culling is in + // use. + let maybe_pipeline_id = if no_indirect_drawing { + preprocess_pipelines.direct_preprocess.pipeline_id + } else if occlusion_culling { + preprocess_pipelines + .early_gpu_occlusion_culling_preprocess + .pipeline_id + } else { + preprocess_pipelines + .gpu_frustum_culling_preprocess + .pipeline_id + }; + + // Fetch the pipeline. + let Some(preprocess_pipeline_id) = maybe_pipeline_id else { + warn!("The build mesh uniforms pipeline wasn't ready"); + continue; + }; + + let Some(preprocess_pipeline) = + pipeline_cache.get_compute_pipeline(preprocess_pipeline_id) + else { + // This will happen while the pipeline is being compiled and is fine. + continue; + }; + + compute_pass.set_pipeline(preprocess_pipeline); + + // Loop over each render phase. + for (phase_type_id, batched_phase_instance_buffers) in + &batched_instance_buffers.phase_instance_buffers + { + // Grab the work item buffers for this view. + let Some(work_item_buffers) = batched_phase_instance_buffers + .work_item_buffers + .get(&view.retained_view_entity) + else { + continue; + }; + + // Fetch the bind group for the render phase. + let Some(phase_bind_groups) = bind_groups.get(phase_type_id) else { + continue; + }; + + // Make sure the mesh preprocessing shader has access to the + // view info it needs to do culling and motion vector + // computation. + let dynamic_offsets = [view_uniform_offset.offset]; + + // Are we drawing directly or indirectly? + match *phase_bind_groups { + PhasePreprocessBindGroups::Direct(ref bind_group) => { + // Invoke the mesh preprocessing shader to transform + // meshes only, but not cull. + let PreprocessWorkItemBuffers::Direct(work_item_buffer) = work_item_buffers + else { + continue; + }; + compute_pass.set_bind_group(0, bind_group, &dynamic_offsets); + let workgroup_count = work_item_buffer.len().div_ceil(WORKGROUP_SIZE); + if workgroup_count > 0 { + compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); + } + } + + PhasePreprocessBindGroups::IndirectFrustumCulling { + indexed: ref maybe_indexed_bind_group, + non_indexed: ref maybe_non_indexed_bind_group, + } + | PhasePreprocessBindGroups::IndirectOcclusionCulling { + early_indexed: ref maybe_indexed_bind_group, + early_non_indexed: ref maybe_non_indexed_bind_group, + .. + } => { + // Invoke the mesh preprocessing shader to transform and + // cull the meshes. + let PreprocessWorkItemBuffers::Indirect { + indexed: indexed_buffer, + non_indexed: non_indexed_buffer, + .. + } = work_item_buffers + else { + continue; + }; + + // Transform and cull indexed meshes if there are any. + if let Some(indexed_bind_group) = maybe_indexed_bind_group { + if let PreprocessWorkItemBuffers::Indirect { + gpu_occlusion_culling: + Some(GpuOcclusionCullingWorkItemBuffers { + late_indirect_parameters_indexed_offset, + .. + }), + .. + } = *work_item_buffers + { + compute_pass.set_push_constants( + 0, + bytemuck::bytes_of(&late_indirect_parameters_indexed_offset), + ); + } + + compute_pass.set_bind_group(0, indexed_bind_group, &dynamic_offsets); + let workgroup_count = indexed_buffer.len().div_ceil(WORKGROUP_SIZE); + if workgroup_count > 0 { + compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); + } + } + + // Transform and cull non-indexed meshes if there are any. + if let Some(non_indexed_bind_group) = maybe_non_indexed_bind_group { + if let PreprocessWorkItemBuffers::Indirect { + gpu_occlusion_culling: + Some(GpuOcclusionCullingWorkItemBuffers { + late_indirect_parameters_non_indexed_offset, + .. + }), + .. + } = *work_item_buffers + { + compute_pass.set_push_constants( + 0, + bytemuck::bytes_of( + &late_indirect_parameters_non_indexed_offset, + ), + ); + } + + compute_pass.set_bind_group( + 0, + non_indexed_bind_group, + &dynamic_offsets, + ); + let workgroup_count = non_indexed_buffer.len().div_ceil(WORKGROUP_SIZE); + if workgroup_count > 0 { + compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); + } + } + } + } + } + } + + pass_span.end(&mut compute_pass); + + Ok(()) + } +} + +impl FromWorld for EarlyPrepassBuildIndirectParametersNode { + fn from_world(world: &mut World) -> Self { + Self { + view_query: QueryState::new(world), + } + } +} + +impl FromWorld for LatePrepassBuildIndirectParametersNode { + fn from_world(world: &mut World) -> Self { + Self { + view_query: QueryState::new(world), + } + } +} + +impl FromWorld for MainBuildIndirectParametersNode { + fn from_world(world: &mut World) -> Self { + Self { + view_query: QueryState::new(world), + } + } +} + +impl FromWorld for LateGpuPreprocessNode { + fn from_world(world: &mut World) -> Self { + Self { + view_query: QueryState::new(world), + } + } +} + +impl Node for LateGpuPreprocessNode { + fn update(&mut self, world: &mut World) { + self.view_query.update_archetypes(world); + } + + fn run<'w>( + &self, + _: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + world: &'w World, + ) -> Result<(), NodeRunError> { + let diagnostics = render_context.diagnostic_recorder(); + + // Grab the [`BatchedInstanceBuffers`]. + let batched_instance_buffers = + world.resource::>(); + + let pipeline_cache = world.resource::(); + let preprocess_pipelines = world.resource::(); + + let mut compute_pass = + render_context + .command_encoder() + .begin_compute_pass(&ComputePassDescriptor { + label: Some("late_mesh_preprocessing"), + timestamp_writes: None, + }); + let pass_span = diagnostics.pass_span(&mut compute_pass, "late_mesh_preprocessing"); + + // Run the compute passes. + for (view, bind_groups, view_uniform_offset) in self.view_query.iter_manual(world) { + let maybe_pipeline_id = preprocess_pipelines + .late_gpu_occlusion_culling_preprocess + .pipeline_id; + + // Fetch the pipeline. + let Some(preprocess_pipeline_id) = maybe_pipeline_id else { + warn!("The build mesh uniforms pipeline wasn't ready"); + return Ok(()); + }; + + let Some(preprocess_pipeline) = + pipeline_cache.get_compute_pipeline(preprocess_pipeline_id) + else { + // This will happen while the pipeline is being compiled and is fine. + return Ok(()); + }; + + compute_pass.set_pipeline(preprocess_pipeline); + + // Loop over each phase. Because we built the phases in parallel, + // each phase has a separate set of instance buffers. + for (phase_type_id, batched_phase_instance_buffers) in + &batched_instance_buffers.phase_instance_buffers + { + let UntypedPhaseBatchedInstanceBuffers { + ref work_item_buffers, + ref late_indexed_indirect_parameters_buffer, + ref late_non_indexed_indirect_parameters_buffer, + .. + } = *batched_phase_instance_buffers; + + // Grab the work item buffers for this view. + let Some(phase_work_item_buffers) = + work_item_buffers.get(&view.retained_view_entity) + else { + continue; + }; + + let ( + PreprocessWorkItemBuffers::Indirect { + gpu_occlusion_culling: + Some(GpuOcclusionCullingWorkItemBuffers { + late_indirect_parameters_indexed_offset, + late_indirect_parameters_non_indexed_offset, + .. + }), + .. + }, + Some(PhasePreprocessBindGroups::IndirectOcclusionCulling { + late_indexed: maybe_late_indexed_bind_group, + late_non_indexed: maybe_late_non_indexed_bind_group, + .. + }), + Some(late_indexed_indirect_parameters_buffer), + Some(late_non_indexed_indirect_parameters_buffer), + ) = ( + phase_work_item_buffers, + bind_groups.get(phase_type_id), + late_indexed_indirect_parameters_buffer.buffer(), + late_non_indexed_indirect_parameters_buffer.buffer(), + ) + else { + continue; + }; + + let mut dynamic_offsets: SmallVec<[u32; 1]> = smallvec![]; + dynamic_offsets.push(view_uniform_offset.offset); + + // If there's no space reserved for work items, then don't + // bother doing the dispatch, as there can't possibly be any + // meshes of the given class (indexed or non-indexed) in this + // phase. + + // Transform and cull indexed meshes if there are any. + if let Some(late_indexed_bind_group) = maybe_late_indexed_bind_group { + compute_pass.set_push_constants( + 0, + bytemuck::bytes_of(late_indirect_parameters_indexed_offset), + ); + + compute_pass.set_bind_group(0, late_indexed_bind_group, &dynamic_offsets); + compute_pass.dispatch_workgroups_indirect( + late_indexed_indirect_parameters_buffer, + (*late_indirect_parameters_indexed_offset as u64) + * (size_of::() as u64), + ); + } + + // Transform and cull non-indexed meshes if there are any. + if let Some(late_non_indexed_bind_group) = maybe_late_non_indexed_bind_group { + compute_pass.set_push_constants( + 0, + bytemuck::bytes_of(late_indirect_parameters_non_indexed_offset), + ); + + compute_pass.set_bind_group(0, late_non_indexed_bind_group, &dynamic_offsets); + compute_pass.dispatch_workgroups_indirect( + late_non_indexed_indirect_parameters_buffer, + (*late_indirect_parameters_non_indexed_offset as u64) + * (size_of::() as u64), + ); + } + } + } + + pass_span.end(&mut compute_pass); + + Ok(()) + } +} + +impl Node for EarlyPrepassBuildIndirectParametersNode { + fn update(&mut self, world: &mut World) { + self.view_query.update_archetypes(world); + } + + fn run<'w>( + &self, + _: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + world: &'w World, + ) -> Result<(), NodeRunError> { + let preprocess_pipelines = world.resource::(); + + // If there are no views with a depth prepass enabled, we don't need to + // run this. + if self.view_query.iter_manual(world).next().is_none() { + return Ok(()); + } + + run_build_indirect_parameters_node( + render_context, + world, + &preprocess_pipelines.early_phase, + "early_prepass_indirect_parameters_building", + ) + } +} + +impl Node for LatePrepassBuildIndirectParametersNode { + fn update(&mut self, world: &mut World) { + self.view_query.update_archetypes(world); + } + + fn run<'w>( + &self, + _: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + world: &'w World, + ) -> Result<(), NodeRunError> { + let preprocess_pipelines = world.resource::(); + + // If there are no views with occlusion culling enabled, we don't need + // to run this. + if self.view_query.iter_manual(world).next().is_none() { + return Ok(()); + } + + run_build_indirect_parameters_node( + render_context, + world, + &preprocess_pipelines.late_phase, + "late_prepass_indirect_parameters_building", + ) + } +} + +impl Node for MainBuildIndirectParametersNode { + fn update(&mut self, world: &mut World) { + self.view_query.update_archetypes(world); + } + + fn run<'w>( + &self, + _: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + world: &'w World, + ) -> Result<(), NodeRunError> { + let preprocess_pipelines = world.resource::(); + + run_build_indirect_parameters_node( + render_context, + world, + &preprocess_pipelines.main_phase, + "main_indirect_parameters_building", + ) + } +} + +fn run_build_indirect_parameters_node( + render_context: &mut RenderContext, + world: &World, + preprocess_phase_pipelines: &PreprocessPhasePipelines, + label: &'static str, +) -> Result<(), NodeRunError> { + let Some(build_indirect_params_bind_groups) = + world.get_resource::() + else { + return Ok(()); + }; + + let diagnostics = render_context.diagnostic_recorder(); + + let pipeline_cache = world.resource::(); + let indirect_parameters_buffers = world.resource::(); + + let mut compute_pass = + render_context + .command_encoder() + .begin_compute_pass(&ComputePassDescriptor { + label: Some(label), + timestamp_writes: None, + }); + let pass_span = diagnostics.pass_span(&mut compute_pass, label); + + // Fetch the pipeline. + let ( + Some(reset_indirect_batch_sets_pipeline_id), + Some(build_indexed_indirect_params_pipeline_id), + Some(build_non_indexed_indirect_params_pipeline_id), + ) = ( + preprocess_phase_pipelines + .reset_indirect_batch_sets + .pipeline_id, + preprocess_phase_pipelines + .gpu_occlusion_culling_build_indexed_indirect_params + .pipeline_id, + preprocess_phase_pipelines + .gpu_occlusion_culling_build_non_indexed_indirect_params + .pipeline_id, + ) + else { + warn!("The build indirect parameters pipelines weren't ready"); + pass_span.end(&mut compute_pass); + return Ok(()); + }; + + let ( + Some(reset_indirect_batch_sets_pipeline), + Some(build_indexed_indirect_params_pipeline), + Some(build_non_indexed_indirect_params_pipeline), + ) = ( + pipeline_cache.get_compute_pipeline(reset_indirect_batch_sets_pipeline_id), + pipeline_cache.get_compute_pipeline(build_indexed_indirect_params_pipeline_id), + pipeline_cache.get_compute_pipeline(build_non_indexed_indirect_params_pipeline_id), + ) + else { + // This will happen while the pipeline is being compiled and is fine. + pass_span.end(&mut compute_pass); + return Ok(()); + }; + + // Loop over each phase. As each has as separate set of buffers, we need to + // build indirect parameters individually for each phase. + for (phase_type_id, phase_build_indirect_params_bind_groups) in + build_indirect_params_bind_groups.iter() + { + let Some(phase_indirect_parameters_buffers) = + indirect_parameters_buffers.get(phase_type_id) + else { + continue; + }; + + // Build indexed indirect parameters. + if let ( + Some(reset_indexed_indirect_batch_sets_bind_group), + Some(build_indirect_indexed_params_bind_group), + ) = ( + &phase_build_indirect_params_bind_groups.reset_indexed_indirect_batch_sets, + &phase_build_indirect_params_bind_groups.build_indexed_indirect, + ) { + compute_pass.set_pipeline(reset_indirect_batch_sets_pipeline); + compute_pass.set_bind_group(0, reset_indexed_indirect_batch_sets_bind_group, &[]); + let workgroup_count = phase_indirect_parameters_buffers + .batch_set_count(true) + .div_ceil(WORKGROUP_SIZE); + if workgroup_count > 0 { + compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); + } + + compute_pass.set_pipeline(build_indexed_indirect_params_pipeline); + compute_pass.set_bind_group(0, build_indirect_indexed_params_bind_group, &[]); + let workgroup_count = phase_indirect_parameters_buffers + .indexed + .batch_count() + .div_ceil(WORKGROUP_SIZE); + if workgroup_count > 0 { + compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); + } + } + + // Build non-indexed indirect parameters. + if let ( + Some(reset_non_indexed_indirect_batch_sets_bind_group), + Some(build_indirect_non_indexed_params_bind_group), + ) = ( + &phase_build_indirect_params_bind_groups.reset_non_indexed_indirect_batch_sets, + &phase_build_indirect_params_bind_groups.build_non_indexed_indirect, + ) { + compute_pass.set_pipeline(reset_indirect_batch_sets_pipeline); + compute_pass.set_bind_group(0, reset_non_indexed_indirect_batch_sets_bind_group, &[]); + let workgroup_count = phase_indirect_parameters_buffers + .batch_set_count(false) + .div_ceil(WORKGROUP_SIZE); + if workgroup_count > 0 { + compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); + } + + compute_pass.set_pipeline(build_non_indexed_indirect_params_pipeline); + compute_pass.set_bind_group(0, build_indirect_non_indexed_params_bind_group, &[]); + let workgroup_count = phase_indirect_parameters_buffers + .non_indexed + .batch_count() + .div_ceil(WORKGROUP_SIZE); + if workgroup_count > 0 { + compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); + } + } + } + + pass_span.end(&mut compute_pass); + + Ok(()) +} + +impl PreprocessPipelines { + /// Returns true if the preprocessing and indirect parameters pipelines have + /// been loaded or false otherwise. + pub(crate) fn pipelines_are_loaded( + &self, + pipeline_cache: &PipelineCache, + preprocessing_support: &GpuPreprocessingSupport, + ) -> bool { + match preprocessing_support.max_supported_mode { + GpuPreprocessingMode::None => false, + GpuPreprocessingMode::PreprocessingOnly => { + self.direct_preprocess.is_loaded(pipeline_cache) + && self + .gpu_frustum_culling_preprocess + .is_loaded(pipeline_cache) + } + GpuPreprocessingMode::Culling => { + self.direct_preprocess.is_loaded(pipeline_cache) + && self + .gpu_frustum_culling_preprocess + .is_loaded(pipeline_cache) + && self + .early_gpu_occlusion_culling_preprocess + .is_loaded(pipeline_cache) + && self + .late_gpu_occlusion_culling_preprocess + .is_loaded(pipeline_cache) + && self + .gpu_frustum_culling_build_indexed_indirect_params + .is_loaded(pipeline_cache) + && self + .gpu_frustum_culling_build_non_indexed_indirect_params + .is_loaded(pipeline_cache) + && self.early_phase.is_loaded(pipeline_cache) + && self.late_phase.is_loaded(pipeline_cache) + && self.main_phase.is_loaded(pipeline_cache) + } + } + } +} + +impl PreprocessPhasePipelines { + fn is_loaded(&self, pipeline_cache: &PipelineCache) -> bool { + self.reset_indirect_batch_sets.is_loaded(pipeline_cache) + && self + .gpu_occlusion_culling_build_indexed_indirect_params + .is_loaded(pipeline_cache) + && self + .gpu_occlusion_culling_build_non_indexed_indirect_params + .is_loaded(pipeline_cache) + } +} + +impl PreprocessPipeline { + fn is_loaded(&self, pipeline_cache: &PipelineCache) -> bool { + self.pipeline_id + .is_some_and(|pipeline_id| pipeline_cache.get_compute_pipeline(pipeline_id).is_some()) + } +} + +impl ResetIndirectBatchSetsPipeline { + fn is_loaded(&self, pipeline_cache: &PipelineCache) -> bool { + self.pipeline_id + .is_some_and(|pipeline_id| pipeline_cache.get_compute_pipeline(pipeline_id).is_some()) + } +} + +impl BuildIndirectParametersPipeline { + /// Returns true if this pipeline has been loaded into the pipeline cache or + /// false otherwise. + fn is_loaded(&self, pipeline_cache: &PipelineCache) -> bool { + self.pipeline_id + .is_some_and(|pipeline_id| pipeline_cache.get_compute_pipeline(pipeline_id).is_some()) + } +} + +impl SpecializedComputePipeline for PreprocessPipeline { + type Key = PreprocessPipelineKey; + + fn specialize(&self, key: Self::Key) -> ComputePipelineDescriptor { + let mut shader_defs = vec!["WRITE_INDIRECT_PARAMETERS_METADATA".into()]; + if key.contains(PreprocessPipelineKey::FRUSTUM_CULLING) { + shader_defs.push("INDIRECT".into()); + shader_defs.push("FRUSTUM_CULLING".into()); + } + if key.contains(PreprocessPipelineKey::OCCLUSION_CULLING) { + shader_defs.push("OCCLUSION_CULLING".into()); + if key.contains(PreprocessPipelineKey::EARLY_PHASE) { + shader_defs.push("EARLY_PHASE".into()); + } else { + shader_defs.push("LATE_PHASE".into()); + } + } + + ComputePipelineDescriptor { + label: Some( + format!( + "mesh preprocessing ({})", + if key.contains( + PreprocessPipelineKey::OCCLUSION_CULLING + | PreprocessPipelineKey::EARLY_PHASE + ) { + "early GPU occlusion culling" + } else if key.contains(PreprocessPipelineKey::OCCLUSION_CULLING) { + "late GPU occlusion culling" + } else if key.contains(PreprocessPipelineKey::FRUSTUM_CULLING) { + "GPU frustum culling" + } else { + "direct" + } + ) + .into(), + ), + layout: vec![self.bind_group_layout.clone()], + push_constant_ranges: if key.contains(PreprocessPipelineKey::OCCLUSION_CULLING) { + vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..4, + }] + } else { + vec![] + }, + shader: self.shader.clone(), + shader_defs, + ..default() + } + } +} + +impl FromWorld for PreprocessPipelines { + fn from_world(world: &mut World) -> Self { + let render_device = world.resource::(); + + // GPU culling bind group parameters are a superset of those in the CPU + // culling (direct) shader. + let direct_bind_group_layout_entries = preprocess_direct_bind_group_layout_entries(); + let gpu_frustum_culling_bind_group_layout_entries = gpu_culling_bind_group_layout_entries(); + let gpu_early_occlusion_culling_bind_group_layout_entries = + gpu_occlusion_culling_bind_group_layout_entries().extend_with_indices((( + 11, + storage_buffer::(/*has_dynamic_offset=*/ false), + ),)); + let gpu_late_occlusion_culling_bind_group_layout_entries = + gpu_occlusion_culling_bind_group_layout_entries(); + + let reset_indirect_batch_sets_bind_group_layout_entries = + DynamicBindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + (storage_buffer::(false),), + ); + + // Indexed and non-indexed bind group parameters share all the bind + // group layout entries except the final one. + let build_indexed_indirect_params_bind_group_layout_entries = + build_indirect_params_bind_group_layout_entries() + .extend_sequential((storage_buffer::(false),)); + let build_non_indexed_indirect_params_bind_group_layout_entries = + build_indirect_params_bind_group_layout_entries() + .extend_sequential((storage_buffer::(false),)); + + // Create the bind group layouts. + let direct_bind_group_layout = render_device.create_bind_group_layout( + "build mesh uniforms direct bind group layout", + &direct_bind_group_layout_entries, + ); + let gpu_frustum_culling_bind_group_layout = render_device.create_bind_group_layout( + "build mesh uniforms GPU frustum culling bind group layout", + &gpu_frustum_culling_bind_group_layout_entries, + ); + let gpu_early_occlusion_culling_bind_group_layout = render_device.create_bind_group_layout( + "build mesh uniforms GPU early occlusion culling bind group layout", + &gpu_early_occlusion_culling_bind_group_layout_entries, + ); + let gpu_late_occlusion_culling_bind_group_layout = render_device.create_bind_group_layout( + "build mesh uniforms GPU late occlusion culling bind group layout", + &gpu_late_occlusion_culling_bind_group_layout_entries, + ); + let reset_indirect_batch_sets_bind_group_layout = render_device.create_bind_group_layout( + "reset indirect batch sets bind group layout", + &reset_indirect_batch_sets_bind_group_layout_entries, + ); + let build_indexed_indirect_params_bind_group_layout = render_device + .create_bind_group_layout( + "build indexed indirect parameters bind group layout", + &build_indexed_indirect_params_bind_group_layout_entries, + ); + let build_non_indexed_indirect_params_bind_group_layout = render_device + .create_bind_group_layout( + "build non-indexed indirect parameters bind group layout", + &build_non_indexed_indirect_params_bind_group_layout_entries, + ); + + let preprocess_shader = load_embedded_asset!(world, "mesh_preprocess.wgsl"); + let reset_indirect_batch_sets_shader = + load_embedded_asset!(world, "reset_indirect_batch_sets.wgsl"); + let build_indirect_params_shader = + load_embedded_asset!(world, "build_indirect_params.wgsl"); + + let preprocess_phase_pipelines = PreprocessPhasePipelines { + reset_indirect_batch_sets: ResetIndirectBatchSetsPipeline { + bind_group_layout: reset_indirect_batch_sets_bind_group_layout.clone(), + shader: reset_indirect_batch_sets_shader, + pipeline_id: None, + }, + gpu_occlusion_culling_build_indexed_indirect_params: BuildIndirectParametersPipeline { + bind_group_layout: build_indexed_indirect_params_bind_group_layout.clone(), + shader: build_indirect_params_shader.clone(), + pipeline_id: None, + }, + gpu_occlusion_culling_build_non_indexed_indirect_params: + BuildIndirectParametersPipeline { + bind_group_layout: build_non_indexed_indirect_params_bind_group_layout.clone(), + shader: build_indirect_params_shader.clone(), + pipeline_id: None, + }, + }; + + PreprocessPipelines { + direct_preprocess: PreprocessPipeline { + bind_group_layout: direct_bind_group_layout, + shader: preprocess_shader.clone(), + pipeline_id: None, + }, + gpu_frustum_culling_preprocess: PreprocessPipeline { + bind_group_layout: gpu_frustum_culling_bind_group_layout, + shader: preprocess_shader.clone(), + pipeline_id: None, + }, + early_gpu_occlusion_culling_preprocess: PreprocessPipeline { + bind_group_layout: gpu_early_occlusion_culling_bind_group_layout, + shader: preprocess_shader.clone(), + pipeline_id: None, + }, + late_gpu_occlusion_culling_preprocess: PreprocessPipeline { + bind_group_layout: gpu_late_occlusion_culling_bind_group_layout, + shader: preprocess_shader, + pipeline_id: None, + }, + gpu_frustum_culling_build_indexed_indirect_params: BuildIndirectParametersPipeline { + bind_group_layout: build_indexed_indirect_params_bind_group_layout.clone(), + shader: build_indirect_params_shader.clone(), + pipeline_id: None, + }, + gpu_frustum_culling_build_non_indexed_indirect_params: + BuildIndirectParametersPipeline { + bind_group_layout: build_non_indexed_indirect_params_bind_group_layout.clone(), + shader: build_indirect_params_shader, + pipeline_id: None, + }, + early_phase: preprocess_phase_pipelines.clone(), + late_phase: preprocess_phase_pipelines.clone(), + main_phase: preprocess_phase_pipelines.clone(), + } + } +} + +fn preprocess_direct_bind_group_layout_entries() -> DynamicBindGroupLayoutEntries { + DynamicBindGroupLayoutEntries::new_with_indices( + ShaderStages::COMPUTE, + ( + // `view` + ( + 0, + uniform_buffer::(/* has_dynamic_offset= */ true), + ), + // `current_input` + (3, storage_buffer_read_only::(false)), + // `previous_input` + (4, storage_buffer_read_only::(false)), + // `indices` + (5, storage_buffer_read_only::(false)), + // `output` + (6, storage_buffer::(false)), + ), + ) +} + +// Returns the first 4 bind group layout entries shared between all invocations +// of the indirect parameters building shader. +fn build_indirect_params_bind_group_layout_entries() -> DynamicBindGroupLayoutEntries { + DynamicBindGroupLayoutEntries::new_with_indices( + ShaderStages::COMPUTE, + ( + (0, storage_buffer_read_only::(false)), + ( + 1, + storage_buffer_read_only::(false), + ), + ( + 2, + storage_buffer_read_only::(false), + ), + (3, storage_buffer::(false)), + ), + ) +} + +/// A system that specializes the `mesh_preprocess.wgsl` and +/// `build_indirect_params.wgsl` pipelines if necessary. +fn gpu_culling_bind_group_layout_entries() -> DynamicBindGroupLayoutEntries { + // GPU culling bind group parameters are a superset of those in the CPU + // culling (direct) shader. + preprocess_direct_bind_group_layout_entries().extend_with_indices(( + // `indirect_parameters_cpu_metadata` + ( + 7, + storage_buffer_read_only::( + /* has_dynamic_offset= */ false, + ), + ), + // `indirect_parameters_gpu_metadata` + ( + 8, + storage_buffer::(/* has_dynamic_offset= */ false), + ), + // `mesh_culling_data` + ( + 9, + storage_buffer_read_only::(/* has_dynamic_offset= */ false), + ), + )) +} + +fn gpu_occlusion_culling_bind_group_layout_entries() -> DynamicBindGroupLayoutEntries { + gpu_culling_bind_group_layout_entries().extend_with_indices(( + ( + 2, + uniform_buffer::(/*has_dynamic_offset=*/ false), + ), + ( + 10, + texture_2d(TextureSampleType::Float { filterable: true }), + ), + ( + 12, + storage_buffer::( + /*has_dynamic_offset=*/ false, + ), + ), + )) +} + +/// A system that specializes the `mesh_preprocess.wgsl` pipelines if necessary. +pub fn prepare_preprocess_pipelines( + pipeline_cache: Res, + render_device: Res, + mut specialized_preprocess_pipelines: ResMut>, + mut specialized_reset_indirect_batch_sets_pipelines: ResMut< + SpecializedComputePipelines, + >, + mut specialized_build_indirect_parameters_pipelines: ResMut< + SpecializedComputePipelines, + >, + preprocess_pipelines: ResMut, + gpu_preprocessing_support: Res, +) { + let preprocess_pipelines = preprocess_pipelines.into_inner(); + + preprocess_pipelines.direct_preprocess.prepare( + &pipeline_cache, + &mut specialized_preprocess_pipelines, + PreprocessPipelineKey::empty(), + ); + preprocess_pipelines.gpu_frustum_culling_preprocess.prepare( + &pipeline_cache, + &mut specialized_preprocess_pipelines, + PreprocessPipelineKey::FRUSTUM_CULLING, + ); + + if gpu_preprocessing_support.is_culling_supported() { + preprocess_pipelines + .early_gpu_occlusion_culling_preprocess + .prepare( + &pipeline_cache, + &mut specialized_preprocess_pipelines, + PreprocessPipelineKey::FRUSTUM_CULLING + | PreprocessPipelineKey::OCCLUSION_CULLING + | PreprocessPipelineKey::EARLY_PHASE, + ); + preprocess_pipelines + .late_gpu_occlusion_culling_preprocess + .prepare( + &pipeline_cache, + &mut specialized_preprocess_pipelines, + PreprocessPipelineKey::FRUSTUM_CULLING | PreprocessPipelineKey::OCCLUSION_CULLING, + ); + } + + let mut build_indirect_parameters_pipeline_key = BuildIndirectParametersPipelineKey::empty(); + + // If the GPU and driver support `multi_draw_indirect_count`, tell the + // shader that. + if render_device + .wgpu_device() + .features() + .contains(WgpuFeatures::MULTI_DRAW_INDIRECT_COUNT) + { + build_indirect_parameters_pipeline_key + .insert(BuildIndirectParametersPipelineKey::MULTI_DRAW_INDIRECT_COUNT_SUPPORTED); + } + + preprocess_pipelines + .gpu_frustum_culling_build_indexed_indirect_params + .prepare( + &pipeline_cache, + &mut specialized_build_indirect_parameters_pipelines, + build_indirect_parameters_pipeline_key | BuildIndirectParametersPipelineKey::INDEXED, + ); + preprocess_pipelines + .gpu_frustum_culling_build_non_indexed_indirect_params + .prepare( + &pipeline_cache, + &mut specialized_build_indirect_parameters_pipelines, + build_indirect_parameters_pipeline_key, + ); + + if !gpu_preprocessing_support.is_culling_supported() { + return; + } + + for (preprocess_phase_pipelines, build_indirect_parameters_phase_pipeline_key) in [ + ( + &mut preprocess_pipelines.early_phase, + BuildIndirectParametersPipelineKey::EARLY_PHASE, + ), + ( + &mut preprocess_pipelines.late_phase, + BuildIndirectParametersPipelineKey::LATE_PHASE, + ), + ( + &mut preprocess_pipelines.main_phase, + BuildIndirectParametersPipelineKey::MAIN_PHASE, + ), + ] { + preprocess_phase_pipelines + .reset_indirect_batch_sets + .prepare( + &pipeline_cache, + &mut specialized_reset_indirect_batch_sets_pipelines, + ); + preprocess_phase_pipelines + .gpu_occlusion_culling_build_indexed_indirect_params + .prepare( + &pipeline_cache, + &mut specialized_build_indirect_parameters_pipelines, + build_indirect_parameters_pipeline_key + | build_indirect_parameters_phase_pipeline_key + | BuildIndirectParametersPipelineKey::INDEXED + | BuildIndirectParametersPipelineKey::OCCLUSION_CULLING, + ); + preprocess_phase_pipelines + .gpu_occlusion_culling_build_non_indexed_indirect_params + .prepare( + &pipeline_cache, + &mut specialized_build_indirect_parameters_pipelines, + build_indirect_parameters_pipeline_key + | build_indirect_parameters_phase_pipeline_key + | BuildIndirectParametersPipelineKey::OCCLUSION_CULLING, + ); + } +} + +impl PreprocessPipeline { + fn prepare( + &mut self, + pipeline_cache: &PipelineCache, + pipelines: &mut SpecializedComputePipelines, + key: PreprocessPipelineKey, + ) { + if self.pipeline_id.is_some() { + return; + } + + let preprocess_pipeline_id = pipelines.specialize(pipeline_cache, self, key); + self.pipeline_id = Some(preprocess_pipeline_id); + } +} + +impl SpecializedComputePipeline for ResetIndirectBatchSetsPipeline { + type Key = (); + + fn specialize(&self, _: Self::Key) -> ComputePipelineDescriptor { + ComputePipelineDescriptor { + label: Some("reset indirect batch sets".into()), + layout: vec![self.bind_group_layout.clone()], + shader: self.shader.clone(), + ..default() + } + } +} + +impl SpecializedComputePipeline for BuildIndirectParametersPipeline { + type Key = BuildIndirectParametersPipelineKey; + + fn specialize(&self, key: Self::Key) -> ComputePipelineDescriptor { + let mut shader_defs = vec![]; + if key.contains(BuildIndirectParametersPipelineKey::INDEXED) { + shader_defs.push("INDEXED".into()); + } + if key.contains(BuildIndirectParametersPipelineKey::MULTI_DRAW_INDIRECT_COUNT_SUPPORTED) { + shader_defs.push("MULTI_DRAW_INDIRECT_COUNT_SUPPORTED".into()); + } + if key.contains(BuildIndirectParametersPipelineKey::OCCLUSION_CULLING) { + shader_defs.push("OCCLUSION_CULLING".into()); + } + if key.contains(BuildIndirectParametersPipelineKey::EARLY_PHASE) { + shader_defs.push("EARLY_PHASE".into()); + } + if key.contains(BuildIndirectParametersPipelineKey::LATE_PHASE) { + shader_defs.push("LATE_PHASE".into()); + } + if key.contains(BuildIndirectParametersPipelineKey::MAIN_PHASE) { + shader_defs.push("MAIN_PHASE".into()); + } + + let label = format!( + "{} build {}indexed indirect parameters", + if !key.contains(BuildIndirectParametersPipelineKey::OCCLUSION_CULLING) { + "frustum culling" + } else if key.contains(BuildIndirectParametersPipelineKey::EARLY_PHASE) { + "early occlusion culling" + } else if key.contains(BuildIndirectParametersPipelineKey::LATE_PHASE) { + "late occlusion culling" + } else { + "main occlusion culling" + }, + if key.contains(BuildIndirectParametersPipelineKey::INDEXED) { + "" + } else { + "non-" + } + ); + + ComputePipelineDescriptor { + label: Some(label.into()), + layout: vec![self.bind_group_layout.clone()], + shader: self.shader.clone(), + shader_defs, + ..default() + } + } +} + +impl ResetIndirectBatchSetsPipeline { + fn prepare( + &mut self, + pipeline_cache: &PipelineCache, + pipelines: &mut SpecializedComputePipelines, + ) { + if self.pipeline_id.is_some() { + return; + } + + let reset_indirect_batch_sets_pipeline_id = pipelines.specialize(pipeline_cache, self, ()); + self.pipeline_id = Some(reset_indirect_batch_sets_pipeline_id); + } +} + +impl BuildIndirectParametersPipeline { + fn prepare( + &mut self, + pipeline_cache: &PipelineCache, + pipelines: &mut SpecializedComputePipelines, + key: BuildIndirectParametersPipelineKey, + ) { + if self.pipeline_id.is_some() { + return; + } + + let build_indirect_parameters_pipeline_id = pipelines.specialize(pipeline_cache, self, key); + self.pipeline_id = Some(build_indirect_parameters_pipeline_id); + } +} + +/// A system that attaches the mesh uniform buffers to the bind groups for the +/// variants of the mesh preprocessing compute shader. +#[expect( + clippy::too_many_arguments, + reason = "it's a system that needs a lot of arguments" +)] +pub fn prepare_preprocess_bind_groups( + mut commands: Commands, + views: Query<(Entity, &ExtractedView)>, + view_depth_pyramids: Query<(&ViewDepthPyramid, &PreviousViewUniformOffset)>, + render_device: Res, + batched_instance_buffers: Res>, + indirect_parameters_buffers: Res, + mesh_culling_data_buffer: Res, + view_uniforms: Res, + previous_view_uniforms: Res, + pipelines: Res, +) { + // Grab the `BatchedInstanceBuffers`. + let BatchedInstanceBuffers { + current_input_buffer: current_input_buffer_vec, + previous_input_buffer: previous_input_buffer_vec, + phase_instance_buffers, + } = batched_instance_buffers.into_inner(); + + let (Some(current_input_buffer), Some(previous_input_buffer)) = ( + current_input_buffer_vec.buffer().buffer(), + previous_input_buffer_vec.buffer().buffer(), + ) else { + return; + }; + + // Record whether we have any meshes that are to be drawn indirectly. If we + // don't, then we can skip building indirect parameters. + let mut any_indirect = false; + + // Loop over each view. + for (view_entity, view) in &views { + let mut bind_groups = TypeIdMap::default(); + + // Loop over each phase. + for (phase_type_id, phase_instance_buffers) in phase_instance_buffers { + let UntypedPhaseBatchedInstanceBuffers { + data_buffer: ref data_buffer_vec, + ref work_item_buffers, + ref late_indexed_indirect_parameters_buffer, + ref late_non_indexed_indirect_parameters_buffer, + } = *phase_instance_buffers; + + let Some(data_buffer) = data_buffer_vec.buffer() else { + continue; + }; + + // Grab the indirect parameters buffers for this phase. + let Some(phase_indirect_parameters_buffers) = + indirect_parameters_buffers.get(phase_type_id) + else { + continue; + }; + + let Some(work_item_buffers) = work_item_buffers.get(&view.retained_view_entity) else { + continue; + }; + + // Create the `PreprocessBindGroupBuilder`. + let preprocess_bind_group_builder = PreprocessBindGroupBuilder { + view: view_entity, + late_indexed_indirect_parameters_buffer, + late_non_indexed_indirect_parameters_buffer, + render_device: &render_device, + phase_indirect_parameters_buffers, + mesh_culling_data_buffer: &mesh_culling_data_buffer, + view_uniforms: &view_uniforms, + previous_view_uniforms: &previous_view_uniforms, + pipelines: &pipelines, + current_input_buffer, + previous_input_buffer, + data_buffer, + }; + + // Depending on the type of work items we have, construct the + // appropriate bind groups. + let (was_indirect, bind_group) = match *work_item_buffers { + PreprocessWorkItemBuffers::Direct(ref work_item_buffer) => ( + false, + preprocess_bind_group_builder + .create_direct_preprocess_bind_groups(work_item_buffer), + ), + + PreprocessWorkItemBuffers::Indirect { + indexed: ref indexed_work_item_buffer, + non_indexed: ref non_indexed_work_item_buffer, + gpu_occlusion_culling: Some(ref gpu_occlusion_culling_work_item_buffers), + } => ( + true, + preprocess_bind_group_builder + .create_indirect_occlusion_culling_preprocess_bind_groups( + &view_depth_pyramids, + indexed_work_item_buffer, + non_indexed_work_item_buffer, + gpu_occlusion_culling_work_item_buffers, + ), + ), + + PreprocessWorkItemBuffers::Indirect { + indexed: ref indexed_work_item_buffer, + non_indexed: ref non_indexed_work_item_buffer, + gpu_occlusion_culling: None, + } => ( + true, + preprocess_bind_group_builder + .create_indirect_frustum_culling_preprocess_bind_groups( + indexed_work_item_buffer, + non_indexed_work_item_buffer, + ), + ), + }; + + // Write that bind group in. + if let Some(bind_group) = bind_group { + any_indirect = any_indirect || was_indirect; + bind_groups.insert(*phase_type_id, bind_group); + } + } + + // Save the bind groups. + commands + .entity(view_entity) + .insert(PreprocessBindGroups(bind_groups)); + } + + // Now, if there were any indirect draw commands, create the bind groups for + // the indirect parameters building shader. + if any_indirect { + create_build_indirect_parameters_bind_groups( + &mut commands, + &render_device, + &pipelines, + current_input_buffer, + &indirect_parameters_buffers, + ); + } +} + +/// A temporary structure that stores all the information needed to construct +/// bind groups for the mesh preprocessing shader. +struct PreprocessBindGroupBuilder<'a> { + /// The render-world entity corresponding to the current view. + view: Entity, + /// The indirect compute dispatch parameters buffer for indexed meshes in + /// the late prepass. + late_indexed_indirect_parameters_buffer: + &'a RawBufferVec, + /// The indirect compute dispatch parameters buffer for non-indexed meshes + /// in the late prepass. + late_non_indexed_indirect_parameters_buffer: + &'a RawBufferVec, + /// The device. + render_device: &'a RenderDevice, + /// The buffers that store indirect draw parameters. + phase_indirect_parameters_buffers: &'a UntypedPhaseIndirectParametersBuffers, + /// The GPU buffer that stores the information needed to cull each mesh. + mesh_culling_data_buffer: &'a MeshCullingDataBuffer, + /// The GPU buffer that stores information about the view. + view_uniforms: &'a ViewUniforms, + /// The GPU buffer that stores information about the view from last frame. + previous_view_uniforms: &'a PreviousViewUniforms, + /// The pipelines for the mesh preprocessing shader. + pipelines: &'a PreprocessPipelines, + /// The GPU buffer containing the list of [`MeshInputUniform`]s for the + /// current frame. + current_input_buffer: &'a Buffer, + /// The GPU buffer containing the list of [`MeshInputUniform`]s for the + /// previous frame. + previous_input_buffer: &'a Buffer, + /// The GPU buffer containing the list of [`MeshUniform`]s for the current + /// frame. + /// + /// This is the buffer containing the mesh's final transforms that the + /// shaders will write to. + data_buffer: &'a Buffer, +} + +impl<'a> PreprocessBindGroupBuilder<'a> { + /// Creates the bind groups for mesh preprocessing when GPU frustum culling + /// and GPU occlusion culling are both disabled. + fn create_direct_preprocess_bind_groups( + &self, + work_item_buffer: &RawBufferVec, + ) -> Option { + // Don't use `as_entire_binding()` here; the shader reads the array + // length and the underlying buffer may be longer than the actual size + // of the vector. + let work_item_buffer_size = NonZero::::try_from( + work_item_buffer.len() as u64 * u64::from(PreprocessWorkItem::min_size()), + ) + .ok(); + + Some(PhasePreprocessBindGroups::Direct( + self.render_device.create_bind_group( + "preprocess_direct_bind_group", + &self.pipelines.direct_preprocess.bind_group_layout, + &BindGroupEntries::with_indices(( + (0, self.view_uniforms.uniforms.binding()?), + (3, self.current_input_buffer.as_entire_binding()), + (4, self.previous_input_buffer.as_entire_binding()), + ( + 5, + BindingResource::Buffer(BufferBinding { + buffer: work_item_buffer.buffer()?, + offset: 0, + size: work_item_buffer_size, + }), + ), + (6, self.data_buffer.as_entire_binding()), + )), + ), + )) + } + + /// Creates the bind groups for mesh preprocessing when GPU occlusion + /// culling is enabled. + fn create_indirect_occlusion_culling_preprocess_bind_groups( + &self, + view_depth_pyramids: &Query<(&ViewDepthPyramid, &PreviousViewUniformOffset)>, + indexed_work_item_buffer: &RawBufferVec, + non_indexed_work_item_buffer: &RawBufferVec, + gpu_occlusion_culling_work_item_buffers: &GpuOcclusionCullingWorkItemBuffers, + ) -> Option { + let GpuOcclusionCullingWorkItemBuffers { + late_indexed: ref late_indexed_work_item_buffer, + late_non_indexed: ref late_non_indexed_work_item_buffer, + .. + } = *gpu_occlusion_culling_work_item_buffers; + + let (view_depth_pyramid, previous_view_uniform_offset) = + view_depth_pyramids.get(self.view).ok()?; + + Some(PhasePreprocessBindGroups::IndirectOcclusionCulling { + early_indexed: self.create_indirect_occlusion_culling_early_indexed_bind_group( + view_depth_pyramid, + previous_view_uniform_offset, + indexed_work_item_buffer, + late_indexed_work_item_buffer, + ), + + early_non_indexed: self.create_indirect_occlusion_culling_early_non_indexed_bind_group( + view_depth_pyramid, + previous_view_uniform_offset, + non_indexed_work_item_buffer, + late_non_indexed_work_item_buffer, + ), + + late_indexed: self.create_indirect_occlusion_culling_late_indexed_bind_group( + view_depth_pyramid, + previous_view_uniform_offset, + late_indexed_work_item_buffer, + ), + + late_non_indexed: self.create_indirect_occlusion_culling_late_non_indexed_bind_group( + view_depth_pyramid, + previous_view_uniform_offset, + late_non_indexed_work_item_buffer, + ), + }) + } + + /// Creates the bind group for the first phase of mesh preprocessing of + /// indexed meshes when GPU occlusion culling is enabled. + fn create_indirect_occlusion_culling_early_indexed_bind_group( + &self, + view_depth_pyramid: &ViewDepthPyramid, + previous_view_uniform_offset: &PreviousViewUniformOffset, + indexed_work_item_buffer: &RawBufferVec, + late_indexed_work_item_buffer: &UninitBufferVec, + ) -> Option { + let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?; + let view_uniforms_binding = self.view_uniforms.uniforms.binding()?; + let previous_view_buffer = self.previous_view_uniforms.uniforms.buffer()?; + + match ( + self.phase_indirect_parameters_buffers + .indexed + .cpu_metadata_buffer(), + self.phase_indirect_parameters_buffers + .indexed + .gpu_metadata_buffer(), + indexed_work_item_buffer.buffer(), + late_indexed_work_item_buffer.buffer(), + self.late_indexed_indirect_parameters_buffer.buffer(), + ) { + ( + Some(indexed_cpu_metadata_buffer), + Some(indexed_gpu_metadata_buffer), + Some(indexed_work_item_gpu_buffer), + Some(late_indexed_work_item_gpu_buffer), + Some(late_indexed_indirect_parameters_buffer), + ) => { + // Don't use `as_entire_binding()` here; the shader reads the array + // length and the underlying buffer may be longer than the actual size + // of the vector. + let indexed_work_item_buffer_size = NonZero::::try_from( + indexed_work_item_buffer.len() as u64 + * u64::from(PreprocessWorkItem::min_size()), + ) + .ok(); + + Some( + self.render_device.create_bind_group( + "preprocess_early_indexed_gpu_occlusion_culling_bind_group", + &self + .pipelines + .early_gpu_occlusion_culling_preprocess + .bind_group_layout, + &BindGroupEntries::with_indices(( + (3, self.current_input_buffer.as_entire_binding()), + (4, self.previous_input_buffer.as_entire_binding()), + ( + 5, + BindingResource::Buffer(BufferBinding { + buffer: indexed_work_item_gpu_buffer, + offset: 0, + size: indexed_work_item_buffer_size, + }), + ), + (6, self.data_buffer.as_entire_binding()), + (7, indexed_cpu_metadata_buffer.as_entire_binding()), + (8, indexed_gpu_metadata_buffer.as_entire_binding()), + (9, mesh_culling_data_buffer.as_entire_binding()), + (0, view_uniforms_binding.clone()), + (10, &view_depth_pyramid.all_mips), + ( + 2, + BufferBinding { + buffer: previous_view_buffer, + offset: previous_view_uniform_offset.offset as u64, + size: NonZeroU64::new(size_of::() as u64), + }, + ), + ( + 11, + BufferBinding { + buffer: late_indexed_work_item_gpu_buffer, + offset: 0, + size: indexed_work_item_buffer_size, + }, + ), + ( + 12, + BufferBinding { + buffer: late_indexed_indirect_parameters_buffer, + offset: 0, + size: NonZeroU64::new( + late_indexed_indirect_parameters_buffer.size(), + ), + }, + ), + )), + ), + ) + } + _ => None, + } + } + + /// Creates the bind group for the first phase of mesh preprocessing of + /// non-indexed meshes when GPU occlusion culling is enabled. + fn create_indirect_occlusion_culling_early_non_indexed_bind_group( + &self, + view_depth_pyramid: &ViewDepthPyramid, + previous_view_uniform_offset: &PreviousViewUniformOffset, + non_indexed_work_item_buffer: &RawBufferVec, + late_non_indexed_work_item_buffer: &UninitBufferVec, + ) -> Option { + let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?; + let view_uniforms_binding = self.view_uniforms.uniforms.binding()?; + let previous_view_buffer = self.previous_view_uniforms.uniforms.buffer()?; + + match ( + self.phase_indirect_parameters_buffers + .non_indexed + .cpu_metadata_buffer(), + self.phase_indirect_parameters_buffers + .non_indexed + .gpu_metadata_buffer(), + non_indexed_work_item_buffer.buffer(), + late_non_indexed_work_item_buffer.buffer(), + self.late_non_indexed_indirect_parameters_buffer.buffer(), + ) { + ( + Some(non_indexed_cpu_metadata_buffer), + Some(non_indexed_gpu_metadata_buffer), + Some(non_indexed_work_item_gpu_buffer), + Some(late_non_indexed_work_item_buffer), + Some(late_non_indexed_indirect_parameters_buffer), + ) => { + // Don't use `as_entire_binding()` here; the shader reads the array + // length and the underlying buffer may be longer than the actual size + // of the vector. + let non_indexed_work_item_buffer_size = NonZero::::try_from( + non_indexed_work_item_buffer.len() as u64 + * u64::from(PreprocessWorkItem::min_size()), + ) + .ok(); + + Some( + self.render_device.create_bind_group( + "preprocess_early_non_indexed_gpu_occlusion_culling_bind_group", + &self + .pipelines + .early_gpu_occlusion_culling_preprocess + .bind_group_layout, + &BindGroupEntries::with_indices(( + (3, self.current_input_buffer.as_entire_binding()), + (4, self.previous_input_buffer.as_entire_binding()), + ( + 5, + BindingResource::Buffer(BufferBinding { + buffer: non_indexed_work_item_gpu_buffer, + offset: 0, + size: non_indexed_work_item_buffer_size, + }), + ), + (6, self.data_buffer.as_entire_binding()), + (7, non_indexed_cpu_metadata_buffer.as_entire_binding()), + (8, non_indexed_gpu_metadata_buffer.as_entire_binding()), + (9, mesh_culling_data_buffer.as_entire_binding()), + (0, view_uniforms_binding.clone()), + (10, &view_depth_pyramid.all_mips), + ( + 2, + BufferBinding { + buffer: previous_view_buffer, + offset: previous_view_uniform_offset.offset as u64, + size: NonZeroU64::new(size_of::() as u64), + }, + ), + ( + 11, + BufferBinding { + buffer: late_non_indexed_work_item_buffer, + offset: 0, + size: non_indexed_work_item_buffer_size, + }, + ), + ( + 12, + BufferBinding { + buffer: late_non_indexed_indirect_parameters_buffer, + offset: 0, + size: NonZeroU64::new( + late_non_indexed_indirect_parameters_buffer.size(), + ), + }, + ), + )), + ), + ) + } + _ => None, + } + } + + /// Creates the bind group for the second phase of mesh preprocessing of + /// indexed meshes when GPU occlusion culling is enabled. + fn create_indirect_occlusion_culling_late_indexed_bind_group( + &self, + view_depth_pyramid: &ViewDepthPyramid, + previous_view_uniform_offset: &PreviousViewUniformOffset, + late_indexed_work_item_buffer: &UninitBufferVec, + ) -> Option { + let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?; + let view_uniforms_binding = self.view_uniforms.uniforms.binding()?; + let previous_view_buffer = self.previous_view_uniforms.uniforms.buffer()?; + + match ( + self.phase_indirect_parameters_buffers + .indexed + .cpu_metadata_buffer(), + self.phase_indirect_parameters_buffers + .indexed + .gpu_metadata_buffer(), + late_indexed_work_item_buffer.buffer(), + self.late_indexed_indirect_parameters_buffer.buffer(), + ) { + ( + Some(indexed_cpu_metadata_buffer), + Some(indexed_gpu_metadata_buffer), + Some(late_indexed_work_item_gpu_buffer), + Some(late_indexed_indirect_parameters_buffer), + ) => { + // Don't use `as_entire_binding()` here; the shader reads the array + // length and the underlying buffer may be longer than the actual size + // of the vector. + let late_indexed_work_item_buffer_size = NonZero::::try_from( + late_indexed_work_item_buffer.len() as u64 + * u64::from(PreprocessWorkItem::min_size()), + ) + .ok(); + + Some( + self.render_device.create_bind_group( + "preprocess_late_indexed_gpu_occlusion_culling_bind_group", + &self + .pipelines + .late_gpu_occlusion_culling_preprocess + .bind_group_layout, + &BindGroupEntries::with_indices(( + (3, self.current_input_buffer.as_entire_binding()), + (4, self.previous_input_buffer.as_entire_binding()), + ( + 5, + BindingResource::Buffer(BufferBinding { + buffer: late_indexed_work_item_gpu_buffer, + offset: 0, + size: late_indexed_work_item_buffer_size, + }), + ), + (6, self.data_buffer.as_entire_binding()), + (7, indexed_cpu_metadata_buffer.as_entire_binding()), + (8, indexed_gpu_metadata_buffer.as_entire_binding()), + (9, mesh_culling_data_buffer.as_entire_binding()), + (0, view_uniforms_binding.clone()), + (10, &view_depth_pyramid.all_mips), + ( + 2, + BufferBinding { + buffer: previous_view_buffer, + offset: previous_view_uniform_offset.offset as u64, + size: NonZeroU64::new(size_of::() as u64), + }, + ), + ( + 12, + BufferBinding { + buffer: late_indexed_indirect_parameters_buffer, + offset: 0, + size: NonZeroU64::new( + late_indexed_indirect_parameters_buffer.size(), + ), + }, + ), + )), + ), + ) + } + _ => None, + } + } + + /// Creates the bind group for the second phase of mesh preprocessing of + /// non-indexed meshes when GPU occlusion culling is enabled. + fn create_indirect_occlusion_culling_late_non_indexed_bind_group( + &self, + view_depth_pyramid: &ViewDepthPyramid, + previous_view_uniform_offset: &PreviousViewUniformOffset, + late_non_indexed_work_item_buffer: &UninitBufferVec, + ) -> Option { + let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?; + let view_uniforms_binding = self.view_uniforms.uniforms.binding()?; + let previous_view_buffer = self.previous_view_uniforms.uniforms.buffer()?; + + match ( + self.phase_indirect_parameters_buffers + .non_indexed + .cpu_metadata_buffer(), + self.phase_indirect_parameters_buffers + .non_indexed + .gpu_metadata_buffer(), + late_non_indexed_work_item_buffer.buffer(), + self.late_non_indexed_indirect_parameters_buffer.buffer(), + ) { + ( + Some(non_indexed_cpu_metadata_buffer), + Some(non_indexed_gpu_metadata_buffer), + Some(non_indexed_work_item_gpu_buffer), + Some(late_non_indexed_indirect_parameters_buffer), + ) => { + // Don't use `as_entire_binding()` here; the shader reads the array + // length and the underlying buffer may be longer than the actual size + // of the vector. + let non_indexed_work_item_buffer_size = NonZero::::try_from( + late_non_indexed_work_item_buffer.len() as u64 + * u64::from(PreprocessWorkItem::min_size()), + ) + .ok(); + + Some( + self.render_device.create_bind_group( + "preprocess_late_non_indexed_gpu_occlusion_culling_bind_group", + &self + .pipelines + .late_gpu_occlusion_culling_preprocess + .bind_group_layout, + &BindGroupEntries::with_indices(( + (3, self.current_input_buffer.as_entire_binding()), + (4, self.previous_input_buffer.as_entire_binding()), + ( + 5, + BindingResource::Buffer(BufferBinding { + buffer: non_indexed_work_item_gpu_buffer, + offset: 0, + size: non_indexed_work_item_buffer_size, + }), + ), + (6, self.data_buffer.as_entire_binding()), + (7, non_indexed_cpu_metadata_buffer.as_entire_binding()), + (8, non_indexed_gpu_metadata_buffer.as_entire_binding()), + (9, mesh_culling_data_buffer.as_entire_binding()), + (0, view_uniforms_binding.clone()), + (10, &view_depth_pyramid.all_mips), + ( + 2, + BufferBinding { + buffer: previous_view_buffer, + offset: previous_view_uniform_offset.offset as u64, + size: NonZeroU64::new(size_of::() as u64), + }, + ), + ( + 12, + BufferBinding { + buffer: late_non_indexed_indirect_parameters_buffer, + offset: 0, + size: NonZeroU64::new( + late_non_indexed_indirect_parameters_buffer.size(), + ), + }, + ), + )), + ), + ) + } + _ => None, + } + } + + /// Creates the bind groups for mesh preprocessing when GPU frustum culling + /// is enabled, but GPU occlusion culling is disabled. + fn create_indirect_frustum_culling_preprocess_bind_groups( + &self, + indexed_work_item_buffer: &RawBufferVec, + non_indexed_work_item_buffer: &RawBufferVec, + ) -> Option { + Some(PhasePreprocessBindGroups::IndirectFrustumCulling { + indexed: self + .create_indirect_frustum_culling_indexed_bind_group(indexed_work_item_buffer), + non_indexed: self.create_indirect_frustum_culling_non_indexed_bind_group( + non_indexed_work_item_buffer, + ), + }) + } + + /// Creates the bind group for mesh preprocessing of indexed meshes when GPU + /// frustum culling is enabled, but GPU occlusion culling is disabled. + fn create_indirect_frustum_culling_indexed_bind_group( + &self, + indexed_work_item_buffer: &RawBufferVec, + ) -> Option { + let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?; + let view_uniforms_binding = self.view_uniforms.uniforms.binding()?; + + match ( + self.phase_indirect_parameters_buffers + .indexed + .cpu_metadata_buffer(), + self.phase_indirect_parameters_buffers + .indexed + .gpu_metadata_buffer(), + indexed_work_item_buffer.buffer(), + ) { + ( + Some(indexed_cpu_metadata_buffer), + Some(indexed_gpu_metadata_buffer), + Some(indexed_work_item_gpu_buffer), + ) => { + // Don't use `as_entire_binding()` here; the shader reads the array + // length and the underlying buffer may be longer than the actual size + // of the vector. + let indexed_work_item_buffer_size = NonZero::::try_from( + indexed_work_item_buffer.len() as u64 + * u64::from(PreprocessWorkItem::min_size()), + ) + .ok(); + + Some( + self.render_device.create_bind_group( + "preprocess_gpu_indexed_frustum_culling_bind_group", + &self + .pipelines + .gpu_frustum_culling_preprocess + .bind_group_layout, + &BindGroupEntries::with_indices(( + (3, self.current_input_buffer.as_entire_binding()), + (4, self.previous_input_buffer.as_entire_binding()), + ( + 5, + BindingResource::Buffer(BufferBinding { + buffer: indexed_work_item_gpu_buffer, + offset: 0, + size: indexed_work_item_buffer_size, + }), + ), + (6, self.data_buffer.as_entire_binding()), + (7, indexed_cpu_metadata_buffer.as_entire_binding()), + (8, indexed_gpu_metadata_buffer.as_entire_binding()), + (9, mesh_culling_data_buffer.as_entire_binding()), + (0, view_uniforms_binding.clone()), + )), + ), + ) + } + _ => None, + } + } + + /// Creates the bind group for mesh preprocessing of non-indexed meshes when + /// GPU frustum culling is enabled, but GPU occlusion culling is disabled. + fn create_indirect_frustum_culling_non_indexed_bind_group( + &self, + non_indexed_work_item_buffer: &RawBufferVec, + ) -> Option { + let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?; + let view_uniforms_binding = self.view_uniforms.uniforms.binding()?; + + match ( + self.phase_indirect_parameters_buffers + .non_indexed + .cpu_metadata_buffer(), + self.phase_indirect_parameters_buffers + .non_indexed + .gpu_metadata_buffer(), + non_indexed_work_item_buffer.buffer(), + ) { + ( + Some(non_indexed_cpu_metadata_buffer), + Some(non_indexed_gpu_metadata_buffer), + Some(non_indexed_work_item_gpu_buffer), + ) => { + // Don't use `as_entire_binding()` here; the shader reads the array + // length and the underlying buffer may be longer than the actual size + // of the vector. + let non_indexed_work_item_buffer_size = NonZero::::try_from( + non_indexed_work_item_buffer.len() as u64 + * u64::from(PreprocessWorkItem::min_size()), + ) + .ok(); + + Some( + self.render_device.create_bind_group( + "preprocess_gpu_non_indexed_frustum_culling_bind_group", + &self + .pipelines + .gpu_frustum_culling_preprocess + .bind_group_layout, + &BindGroupEntries::with_indices(( + (3, self.current_input_buffer.as_entire_binding()), + (4, self.previous_input_buffer.as_entire_binding()), + ( + 5, + BindingResource::Buffer(BufferBinding { + buffer: non_indexed_work_item_gpu_buffer, + offset: 0, + size: non_indexed_work_item_buffer_size, + }), + ), + (6, self.data_buffer.as_entire_binding()), + (7, non_indexed_cpu_metadata_buffer.as_entire_binding()), + (8, non_indexed_gpu_metadata_buffer.as_entire_binding()), + (9, mesh_culling_data_buffer.as_entire_binding()), + (0, view_uniforms_binding.clone()), + )), + ), + ) + } + _ => None, + } + } +} + +/// A system that creates bind groups from the indirect parameters metadata and +/// data buffers for the indirect batch set reset shader and the indirect +/// parameter building shader. +fn create_build_indirect_parameters_bind_groups( + commands: &mut Commands, + render_device: &RenderDevice, + pipelines: &PreprocessPipelines, + current_input_buffer: &Buffer, + indirect_parameters_buffers: &IndirectParametersBuffers, +) { + let mut build_indirect_parameters_bind_groups = BuildIndirectParametersBindGroups::new(); + + for (phase_type_id, phase_indirect_parameters_buffer) in indirect_parameters_buffers.iter() { + build_indirect_parameters_bind_groups.insert( + *phase_type_id, + PhaseBuildIndirectParametersBindGroups { + reset_indexed_indirect_batch_sets: match (phase_indirect_parameters_buffer + .indexed + .batch_sets_buffer(),) + { + (Some(indexed_batch_sets_buffer),) => Some( + render_device.create_bind_group( + "reset_indexed_indirect_batch_sets_bind_group", + // The early bind group is good for the main phase and late + // phase too. They bind the same buffers. + &pipelines + .early_phase + .reset_indirect_batch_sets + .bind_group_layout, + &BindGroupEntries::sequential(( + indexed_batch_sets_buffer.as_entire_binding(), + )), + ), + ), + _ => None, + }, + + reset_non_indexed_indirect_batch_sets: match (phase_indirect_parameters_buffer + .non_indexed + .batch_sets_buffer(),) + { + (Some(non_indexed_batch_sets_buffer),) => Some( + render_device.create_bind_group( + "reset_non_indexed_indirect_batch_sets_bind_group", + // The early bind group is good for the main phase and late + // phase too. They bind the same buffers. + &pipelines + .early_phase + .reset_indirect_batch_sets + .bind_group_layout, + &BindGroupEntries::sequential(( + non_indexed_batch_sets_buffer.as_entire_binding(), + )), + ), + ), + _ => None, + }, + + build_indexed_indirect: match ( + phase_indirect_parameters_buffer + .indexed + .cpu_metadata_buffer(), + phase_indirect_parameters_buffer + .indexed + .gpu_metadata_buffer(), + phase_indirect_parameters_buffer.indexed.data_buffer(), + phase_indirect_parameters_buffer.indexed.batch_sets_buffer(), + ) { + ( + Some(indexed_indirect_parameters_cpu_metadata_buffer), + Some(indexed_indirect_parameters_gpu_metadata_buffer), + Some(indexed_indirect_parameters_data_buffer), + Some(indexed_batch_sets_buffer), + ) => Some( + render_device.create_bind_group( + "build_indexed_indirect_parameters_bind_group", + // The frustum culling bind group is good for occlusion culling + // too. They bind the same buffers. + &pipelines + .gpu_frustum_culling_build_indexed_indirect_params + .bind_group_layout, + &BindGroupEntries::sequential(( + current_input_buffer.as_entire_binding(), + // Don't use `as_entire_binding` here; the shader reads + // the length and `RawBufferVec` overallocates. + BufferBinding { + buffer: indexed_indirect_parameters_cpu_metadata_buffer, + offset: 0, + size: NonZeroU64::new( + phase_indirect_parameters_buffer.indexed.batch_count() + as u64 + * size_of::() as u64, + ), + }, + BufferBinding { + buffer: indexed_indirect_parameters_gpu_metadata_buffer, + offset: 0, + size: NonZeroU64::new( + phase_indirect_parameters_buffer.indexed.batch_count() + as u64 + * size_of::() as u64, + ), + }, + indexed_batch_sets_buffer.as_entire_binding(), + indexed_indirect_parameters_data_buffer.as_entire_binding(), + )), + ), + ), + _ => None, + }, + + build_non_indexed_indirect: match ( + phase_indirect_parameters_buffer + .non_indexed + .cpu_metadata_buffer(), + phase_indirect_parameters_buffer + .non_indexed + .gpu_metadata_buffer(), + phase_indirect_parameters_buffer.non_indexed.data_buffer(), + phase_indirect_parameters_buffer + .non_indexed + .batch_sets_buffer(), + ) { + ( + Some(non_indexed_indirect_parameters_cpu_metadata_buffer), + Some(non_indexed_indirect_parameters_gpu_metadata_buffer), + Some(non_indexed_indirect_parameters_data_buffer), + Some(non_indexed_batch_sets_buffer), + ) => Some( + render_device.create_bind_group( + "build_non_indexed_indirect_parameters_bind_group", + // The frustum culling bind group is good for occlusion culling + // too. They bind the same buffers. + &pipelines + .gpu_frustum_culling_build_non_indexed_indirect_params + .bind_group_layout, + &BindGroupEntries::sequential(( + current_input_buffer.as_entire_binding(), + // Don't use `as_entire_binding` here; the shader reads + // the length and `RawBufferVec` overallocates. + BufferBinding { + buffer: non_indexed_indirect_parameters_cpu_metadata_buffer, + offset: 0, + size: NonZeroU64::new( + phase_indirect_parameters_buffer.non_indexed.batch_count() + as u64 + * size_of::() as u64, + ), + }, + BufferBinding { + buffer: non_indexed_indirect_parameters_gpu_metadata_buffer, + offset: 0, + size: NonZeroU64::new( + phase_indirect_parameters_buffer.non_indexed.batch_count() + as u64 + * size_of::() as u64, + ), + }, + non_indexed_batch_sets_buffer.as_entire_binding(), + non_indexed_indirect_parameters_data_buffer.as_entire_binding(), + )), + ), + ), + _ => None, + }, + }, + ); + } + + commands.insert_resource(build_indirect_parameters_bind_groups); +} + +/// Writes the information needed to do GPU mesh culling to the GPU. +pub fn write_mesh_culling_data_buffer( + render_device: Res, + render_queue: Res, + mut mesh_culling_data_buffer: ResMut, +) { + mesh_culling_data_buffer.write_buffer(&render_device, &render_queue); +} diff --git a/crates/libmarathon/src/render/pbr/render/light.rs b/crates/libmarathon/src/render/pbr/render/light.rs new file mode 100644 index 0000000..e61b402 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/light.rs @@ -0,0 +1,2357 @@ +use crate::render::pbr::*; +use bevy_asset::UntypedAssetId; +use bevy_camera::primitives::{ + face_index_to_name, CascadesFrusta, CubeMapFace, CubemapFrusta, Frustum, HalfSpace, + CUBE_MAP_FACES, +}; +use bevy_camera::visibility::{ + CascadesVisibleEntities, CubemapVisibleEntities, RenderLayers, ViewVisibility, + VisibleMeshEntities, +}; +use bevy_camera::Camera3d; +use bevy_color::ColorToComponents; +use crate::render::core_3d::CORE_3D_DEPTH_FORMAT; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::component::Tick; +use bevy_ecs::system::SystemChangeTick; +use bevy_ecs::{ + entity::{EntityHashMap, EntityHashSet}, + prelude::*, + system::lifetimeless::Read, +}; +use bevy_light::cascade::Cascade; +use bevy_light::cluster::assign::{calculate_cluster_factors, ClusterableObjectType}; +use bevy_light::cluster::GlobalVisibleClusterableObjects; +use bevy_light::SunDisk; +use bevy_light::{ + spot_light_clip_from_view, spot_light_world_from_view, AmbientLight, CascadeShadowConfig, + Cascades, DirectionalLight, DirectionalLightShadowMap, NotShadowCaster, PointLight, + PointLightShadowMap, ShadowFilteringMethod, SpotLight, VolumetricLight, +}; +use bevy_math::{ops, Mat4, UVec4, Vec3, Vec3Swizzles, Vec4, Vec4Swizzles}; +use bevy_platform::collections::{HashMap, HashSet}; +use bevy_platform::hash::FixedHasher; +use crate::render::erased_render_asset::ErasedRenderAssets; +use crate::render::experimental::occlusion_culling::{ + OcclusionCulling, OcclusionCullingSubview, OcclusionCullingSubviewEntities, +}; +use crate::render::sync_world::MainEntityHashMap; +use crate::render::{ + batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport}, + camera::SortedCameras, + mesh::allocator::MeshAllocator, + view::{NoIndirectDrawing, RetainedViewEntity}, +}; +use crate::render::{ + diagnostic::RecordDiagnostics, + mesh::RenderMesh, + render_asset::RenderAssets, + render_graph::{Node, NodeRunError, RenderGraphContext}, + render_phase::*, + render_resource::*, + renderer::{RenderContext, RenderDevice, RenderQueue}, + texture::*, + view::ExtractedView, + Extract, +}; +use crate::render::{ + mesh::allocator::SlabId, + sync_world::{MainEntity, RenderEntity}, +}; +use bevy_transform::{components::GlobalTransform, prelude::Transform}; +use bevy_utils::default; +use core::{hash::Hash, ops::Range}; +use decal::clustered::RenderClusteredDecals; +#[cfg(feature = "trace")] +use tracing::info_span; +use tracing::{error, warn}; + +#[derive(Component)] +pub struct ExtractedPointLight { + pub color: LinearRgba, + /// luminous intensity in lumens per steradian + pub intensity: f32, + pub range: f32, + pub radius: f32, + pub transform: GlobalTransform, + pub shadows_enabled: bool, + pub shadow_depth_bias: f32, + pub shadow_normal_bias: f32, + pub shadow_map_near_z: f32, + pub spot_light_angles: Option<(f32, f32)>, + pub volumetric: bool, + pub soft_shadows_enabled: bool, + /// whether this point light contributes diffuse light to lightmapped meshes + pub affects_lightmapped_mesh_diffuse: bool, +} + +#[derive(Component, Debug)] +pub struct ExtractedDirectionalLight { + pub color: LinearRgba, + pub illuminance: f32, + pub transform: GlobalTransform, + pub shadows_enabled: bool, + pub volumetric: bool, + /// whether this directional light contributes diffuse light to lightmapped + /// meshes + pub affects_lightmapped_mesh_diffuse: bool, + pub shadow_depth_bias: f32, + pub shadow_normal_bias: f32, + pub cascade_shadow_config: CascadeShadowConfig, + pub cascades: EntityHashMap>, + pub frusta: EntityHashMap>, + pub render_layers: RenderLayers, + pub soft_shadow_size: Option, + /// True if this light is using two-phase occlusion culling. + pub occlusion_culling: bool, + pub sun_disk_angular_size: f32, + pub sun_disk_intensity: f32, +} + +// NOTE: These must match the bit flags in bevy_pbr/src/render/mesh_view_types.wgsl! +bitflags::bitflags! { + #[repr(transparent)] + struct PointLightFlags: u32 { + const SHADOWS_ENABLED = 1 << 0; + const SPOT_LIGHT_Y_NEGATIVE = 1 << 1; + const VOLUMETRIC = 1 << 2; + const AFFECTS_LIGHTMAPPED_MESH_DIFFUSE = 1 << 3; + const NONE = 0; + const UNINITIALIZED = 0xFFFF; + } +} + +#[derive(Copy, Clone, ShaderType, Default, Debug)] +pub struct GpuDirectionalCascade { + clip_from_world: Mat4, + texel_size: f32, + far_bound: f32, +} + +#[derive(Copy, Clone, ShaderType, Default, Debug)] +pub struct GpuDirectionalLight { + cascades: [GpuDirectionalCascade; MAX_CASCADES_PER_LIGHT], + color: Vec4, + dir_to_light: Vec3, + flags: u32, + soft_shadow_size: f32, + shadow_depth_bias: f32, + shadow_normal_bias: f32, + num_cascades: u32, + cascades_overlap_proportion: f32, + depth_texture_base_index: u32, + decal_index: u32, + sun_disk_angular_size: f32, + sun_disk_intensity: f32, +} + +// NOTE: These must match the bit flags in bevy_pbr/src/render/mesh_view_types.wgsl! +bitflags::bitflags! { + #[repr(transparent)] + struct DirectionalLightFlags: u32 { + const SHADOWS_ENABLED = 1 << 0; + const VOLUMETRIC = 1 << 1; + const AFFECTS_LIGHTMAPPED_MESH_DIFFUSE = 1 << 2; + const NONE = 0; + const UNINITIALIZED = 0xFFFF; + } +} + +#[derive(Copy, Clone, Debug, ShaderType)] +pub struct GpuLights { + directional_lights: [GpuDirectionalLight; MAX_DIRECTIONAL_LIGHTS], + ambient_color: Vec4, + // xyz are x/y/z cluster dimensions and w is the number of clusters + cluster_dimensions: UVec4, + // xy are vec2(cluster_dimensions.xy) / vec2(view.width, view.height) + // z is cluster_dimensions.z / log(far / near) + // w is cluster_dimensions.z * log(near) / log(far / near) + cluster_factors: Vec4, + n_directional_lights: u32, + // offset from spot light's light index to spot light's shadow map index + spot_light_shadowmap_offset: i32, + ambient_light_affects_lightmapped_meshes: u32, +} + +// NOTE: When running bevy on Adreno GPU chipsets in WebGL, any value above 1 will result in a crash +// when loading the wgsl "pbr_functions.wgsl" in the function apply_fog. +#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] +pub const MAX_DIRECTIONAL_LIGHTS: usize = 1; +#[cfg(any( + not(feature = "webgl"), + not(target_arch = "wasm32"), + feature = "webgpu" +))] +pub const MAX_DIRECTIONAL_LIGHTS: usize = 10; +#[cfg(any( + not(feature = "webgl"), + not(target_arch = "wasm32"), + feature = "webgpu" +))] +pub const MAX_CASCADES_PER_LIGHT: usize = 4; +#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] +pub const MAX_CASCADES_PER_LIGHT: usize = 1; + +#[derive(Resource, Clone)] +pub struct ShadowSamplers { + pub point_light_comparison_sampler: Sampler, + #[cfg(feature = "experimental_pbr_pcss")] + pub point_light_linear_sampler: Sampler, + pub directional_light_comparison_sampler: Sampler, + #[cfg(feature = "experimental_pbr_pcss")] + pub directional_light_linear_sampler: Sampler, +} + +pub fn init_shadow_samplers(mut commands: Commands, render_device: Res) { + let base_sampler_descriptor = SamplerDescriptor { + address_mode_u: AddressMode::ClampToEdge, + address_mode_v: AddressMode::ClampToEdge, + address_mode_w: AddressMode::ClampToEdge, + mag_filter: FilterMode::Linear, + min_filter: FilterMode::Linear, + mipmap_filter: FilterMode::Nearest, + ..default() + }; + + commands.insert_resource(ShadowSamplers { + point_light_comparison_sampler: render_device.create_sampler(&SamplerDescriptor { + compare: Some(CompareFunction::GreaterEqual), + ..base_sampler_descriptor + }), + #[cfg(feature = "experimental_pbr_pcss")] + point_light_linear_sampler: render_device.create_sampler(&base_sampler_descriptor), + directional_light_comparison_sampler: render_device.create_sampler(&SamplerDescriptor { + compare: Some(CompareFunction::GreaterEqual), + ..base_sampler_descriptor + }), + #[cfg(feature = "experimental_pbr_pcss")] + directional_light_linear_sampler: render_device.create_sampler(&base_sampler_descriptor), + }); +} + +// This is needed because of the orphan rule not allowing implementing +// foreign trait ExtractComponent on foreign type ShadowFilteringMethod +pub fn extract_shadow_filtering_method( + mut commands: Commands, + mut previous_len: Local, + query: Extract>, +) { + let mut values = Vec::with_capacity(*previous_len); + for (entity, query_item) in &query { + values.push((entity, *query_item)); + } + *previous_len = values.len(); + commands.try_insert_batch(values); +} + +// This is needed because of the orphan rule not allowing implementing +// foreign trait ExtractResource on foreign type AmbientLight +pub fn extract_ambient_light_resource( + mut commands: Commands, + main_resource: Extract>>, + target_resource: Option>, +) { + if let Some(main_resource) = main_resource.as_ref() { + if let Some(mut target_resource) = target_resource { + if main_resource.is_changed() { + *target_resource = (*main_resource).clone(); + } + } else { + commands.insert_resource((*main_resource).clone()); + } + } +} + +// This is needed because of the orphan rule not allowing implementing +// foreign trait ExtractComponent on foreign type AmbientLight +pub fn extract_ambient_light( + mut commands: Commands, + mut previous_len: Local, + query: Extract>, +) { + let mut values = Vec::with_capacity(*previous_len); + for (entity, query_item) in &query { + values.push((entity, query_item.clone())); + } + *previous_len = values.len(); + commands.try_insert_batch(values); +} + +pub fn extract_lights( + mut commands: Commands, + point_light_shadow_map: Extract>, + directional_light_shadow_map: Extract>, + global_visible_clusterable: Extract>, + previous_point_lights: Query< + Entity, + ( + With, + With, + ), + >, + previous_spot_lights: Query< + Entity, + (With, With), + >, + point_lights: Extract< + Query<( + Entity, + RenderEntity, + &PointLight, + &CubemapVisibleEntities, + &GlobalTransform, + &ViewVisibility, + &CubemapFrusta, + Option<&VolumetricLight>, + )>, + >, + spot_lights: Extract< + Query<( + Entity, + RenderEntity, + &SpotLight, + &VisibleMeshEntities, + &GlobalTransform, + &ViewVisibility, + &Frustum, + Option<&VolumetricLight>, + )>, + >, + directional_lights: Extract< + Query< + ( + Entity, + RenderEntity, + &DirectionalLight, + &CascadesVisibleEntities, + &Cascades, + &CascadeShadowConfig, + &CascadesFrusta, + &GlobalTransform, + &ViewVisibility, + Option<&RenderLayers>, + Option<&VolumetricLight>, + Has, + Option<&SunDisk>, + ), + Without, + >, + >, + mapper: Extract>, + mut previous_point_lights_len: Local, + mut previous_spot_lights_len: Local, +) { + // NOTE: These shadow map resources are extracted here as they are used here too so this avoids + // races between scheduling of ExtractResourceSystems and this system. + if point_light_shadow_map.is_changed() { + commands.insert_resource(point_light_shadow_map.clone()); + } + if directional_light_shadow_map.is_changed() { + commands.insert_resource(directional_light_shadow_map.clone()); + } + + // Clear previous visible entities for all point/spot lights as they might not be in the + // `global_visible_clusterable` list anymore. + commands.try_insert_batch( + previous_point_lights + .iter() + .map(|render_entity| (render_entity, RenderCubemapVisibleEntities::default())) + .collect::>(), + ); + commands.try_insert_batch( + previous_spot_lights + .iter() + .map(|render_entity| (render_entity, RenderVisibleMeshEntities::default())) + .collect::>(), + ); + + // This is the point light shadow map texel size for one face of the cube as a distance of 1.0 + // world unit from the light. + // point_light_texel_size = 2.0 * 1.0 * tan(PI / 4.0) / cube face width in texels + // PI / 4.0 is half the cube face fov, tan(PI / 4.0) = 1.0, so this simplifies to: + // point_light_texel_size = 2.0 / cube face width in texels + // NOTE: When using various PCF kernel sizes, this will need to be adjusted, according to: + // https://catlikecoding.com/unity/tutorials/custom-srp/point-and-spot-shadows/ + let point_light_texel_size = 2.0 / point_light_shadow_map.size as f32; + + let mut point_lights_values = Vec::with_capacity(*previous_point_lights_len); + for entity in global_visible_clusterable.iter().copied() { + let Ok(( + main_entity, + render_entity, + point_light, + cubemap_visible_entities, + transform, + view_visibility, + frusta, + volumetric_light, + )) = point_lights.get(entity) + else { + continue; + }; + if !view_visibility.get() { + continue; + } + let render_cubemap_visible_entities = RenderCubemapVisibleEntities { + data: cubemap_visible_entities + .iter() + .map(|v| create_render_visible_mesh_entities(&mapper, v)) + .collect::>() + .try_into() + .unwrap(), + }; + + let extracted_point_light = ExtractedPointLight { + color: point_light.color.into(), + // NOTE: Map from luminous power in lumens to luminous intensity in lumens per steradian + // for a point light. See https://google.github.io/filament/Filament.html#mjx-eqn-pointLightLuminousPower + // for details. + intensity: point_light.intensity / (4.0 * core::f32::consts::PI), + range: point_light.range, + radius: point_light.radius, + transform: *transform, + shadows_enabled: point_light.shadows_enabled, + shadow_depth_bias: point_light.shadow_depth_bias, + // The factor of SQRT_2 is for the worst-case diagonal offset + shadow_normal_bias: point_light.shadow_normal_bias + * point_light_texel_size + * core::f32::consts::SQRT_2, + shadow_map_near_z: point_light.shadow_map_near_z, + spot_light_angles: None, + volumetric: volumetric_light.is_some(), + affects_lightmapped_mesh_diffuse: point_light.affects_lightmapped_mesh_diffuse, + #[cfg(feature = "experimental_pbr_pcss")] + soft_shadows_enabled: point_light.soft_shadows_enabled, + #[cfg(not(feature = "experimental_pbr_pcss"))] + soft_shadows_enabled: false, + }; + point_lights_values.push(( + render_entity, + ( + extracted_point_light, + render_cubemap_visible_entities, + (*frusta).clone(), + MainEntity::from(main_entity), + ), + )); + } + *previous_point_lights_len = point_lights_values.len(); + commands.try_insert_batch(point_lights_values); + + let mut spot_lights_values = Vec::with_capacity(*previous_spot_lights_len); + for entity in global_visible_clusterable.iter().copied() { + if let Ok(( + main_entity, + render_entity, + spot_light, + visible_entities, + transform, + view_visibility, + frustum, + volumetric_light, + )) = spot_lights.get(entity) + { + if !view_visibility.get() { + continue; + } + let render_visible_entities = + create_render_visible_mesh_entities(&mapper, visible_entities); + + let texel_size = + 2.0 * ops::tan(spot_light.outer_angle) / directional_light_shadow_map.size as f32; + + spot_lights_values.push(( + render_entity, + ( + ExtractedPointLight { + color: spot_light.color.into(), + // NOTE: Map from luminous power in lumens to luminous intensity in lumens per steradian + // for a point light. See https://google.github.io/filament/Filament.html#mjx-eqn-pointLightLuminousPower + // for details. + // Note: Filament uses a divisor of PI for spot lights. We choose to use the same 4*PI divisor + // in both cases so that toggling between point light and spot light keeps lit areas lit equally, + // which seems least surprising for users + intensity: spot_light.intensity / (4.0 * core::f32::consts::PI), + range: spot_light.range, + radius: spot_light.radius, + transform: *transform, + shadows_enabled: spot_light.shadows_enabled, + shadow_depth_bias: spot_light.shadow_depth_bias, + // The factor of SQRT_2 is for the worst-case diagonal offset + shadow_normal_bias: spot_light.shadow_normal_bias + * texel_size + * core::f32::consts::SQRT_2, + shadow_map_near_z: spot_light.shadow_map_near_z, + spot_light_angles: Some((spot_light.inner_angle, spot_light.outer_angle)), + volumetric: volumetric_light.is_some(), + affects_lightmapped_mesh_diffuse: spot_light + .affects_lightmapped_mesh_diffuse, + #[cfg(feature = "experimental_pbr_pcss")] + soft_shadows_enabled: spot_light.soft_shadows_enabled, + #[cfg(not(feature = "experimental_pbr_pcss"))] + soft_shadows_enabled: false, + }, + render_visible_entities, + *frustum, + MainEntity::from(main_entity), + ), + )); + } + } + *previous_spot_lights_len = spot_lights_values.len(); + commands.try_insert_batch(spot_lights_values); + + for ( + main_entity, + entity, + directional_light, + visible_entities, + cascades, + cascade_config, + frusta, + transform, + view_visibility, + maybe_layers, + volumetric_light, + occlusion_culling, + sun_disk, + ) in &directional_lights + { + if !view_visibility.get() { + commands + .get_entity(entity) + .expect("Light entity wasn't synced.") + .remove::<(ExtractedDirectionalLight, RenderCascadesVisibleEntities)>(); + continue; + } + + // TODO: update in place instead of reinserting. + let mut extracted_cascades = EntityHashMap::default(); + let mut extracted_frusta = EntityHashMap::default(); + let mut cascade_visible_entities = EntityHashMap::default(); + for (e, v) in cascades.cascades.iter() { + if let Ok(entity) = mapper.get(*e) { + extracted_cascades.insert(entity, v.clone()); + } else { + break; + } + } + for (e, v) in frusta.frusta.iter() { + if let Ok(entity) = mapper.get(*e) { + extracted_frusta.insert(entity, v.clone()); + } else { + break; + } + } + for (e, v) in visible_entities.entities.iter() { + if let Ok(entity) = mapper.get(*e) { + cascade_visible_entities.insert( + entity, + v.iter() + .map(|v| create_render_visible_mesh_entities(&mapper, v)) + .collect(), + ); + } else { + break; + } + } + + commands + .get_entity(entity) + .expect("Light entity wasn't synced.") + .insert(( + ExtractedDirectionalLight { + color: directional_light.color.into(), + illuminance: directional_light.illuminance, + transform: *transform, + volumetric: volumetric_light.is_some(), + affects_lightmapped_mesh_diffuse: directional_light + .affects_lightmapped_mesh_diffuse, + #[cfg(feature = "experimental_pbr_pcss")] + soft_shadow_size: directional_light.soft_shadow_size, + #[cfg(not(feature = "experimental_pbr_pcss"))] + soft_shadow_size: None, + shadows_enabled: directional_light.shadows_enabled, + shadow_depth_bias: directional_light.shadow_depth_bias, + // The factor of SQRT_2 is for the worst-case diagonal offset + shadow_normal_bias: directional_light.shadow_normal_bias + * core::f32::consts::SQRT_2, + cascade_shadow_config: cascade_config.clone(), + cascades: extracted_cascades, + frusta: extracted_frusta, + render_layers: maybe_layers.unwrap_or_default().clone(), + occlusion_culling, + sun_disk_angular_size: sun_disk.unwrap_or_default().angular_size, + sun_disk_intensity: sun_disk.unwrap_or_default().intensity, + }, + RenderCascadesVisibleEntities { + entities: cascade_visible_entities, + }, + MainEntity::from(main_entity), + )); + } +} + +fn create_render_visible_mesh_entities( + mapper: &Extract>, + visible_entities: &VisibleMeshEntities, +) -> RenderVisibleMeshEntities { + RenderVisibleMeshEntities { + entities: visible_entities + .iter() + .map(|e| { + let render_entity = mapper.get(*e).unwrap_or(Entity::PLACEHOLDER); + (render_entity, MainEntity::from(*e)) + }) + .collect(), + } +} + +#[derive(Component, Default, Deref, DerefMut)] +/// Component automatically attached to a light entity to track light-view entities +/// for each view. +pub struct LightViewEntities(EntityHashMap>); + +// TODO: using required component +pub(crate) fn add_light_view_entities( + add: On, + mut commands: Commands, +) { + if let Ok(mut v) = commands.get_entity(add.entity) { + v.insert(LightViewEntities::default()); + } +} + +/// Removes [`LightViewEntities`] when light is removed. See [`add_light_view_entities`]. +pub(crate) fn extracted_light_removed( + remove: On, + mut commands: Commands, +) { + if let Ok(mut v) = commands.get_entity(remove.entity) { + v.try_remove::(); + } +} + +pub(crate) fn remove_light_view_entities( + remove: On, + query: Query<&LightViewEntities>, + mut commands: Commands, +) { + if let Ok(entities) = query.get(remove.entity) { + for v in entities.0.values() { + for e in v.iter().copied() { + if let Ok(mut v) = commands.get_entity(e) { + v.despawn(); + } + } + } + } +} + +#[derive(Component)] +pub struct ShadowView { + pub depth_attachment: DepthAttachment, + pub pass_name: String, +} + +#[derive(Component)] +pub struct ViewShadowBindings { + pub point_light_depth_texture: Texture, + pub point_light_depth_texture_view: TextureView, + pub directional_light_depth_texture: Texture, + pub directional_light_depth_texture_view: TextureView, +} + +/// A component that holds the shadow cascade views for all shadow cascades +/// associated with a camera. +/// +/// Note: Despite the name, this component actually holds the shadow cascade +/// views, not the lights themselves. +#[derive(Component)] +pub struct ViewLightEntities { + /// The shadow cascade views for all shadow cascades associated with a + /// camera. + /// + /// Note: Despite the name, this component actually holds the shadow cascade + /// views, not the lights themselves. + pub lights: Vec, +} + +#[derive(Component)] +pub struct ViewLightsUniformOffset { + pub offset: u32, +} + +#[derive(Resource, Default)] +pub struct LightMeta { + pub view_gpu_lights: DynamicUniformBuffer, +} + +#[derive(Component)] +pub enum LightEntity { + Directional { + light_entity: Entity, + cascade_index: usize, + }, + Point { + light_entity: Entity, + face_index: usize, + }, + Spot { + light_entity: Entity, + }, +} + +pub fn prepare_lights( + mut commands: Commands, + mut texture_cache: ResMut, + (render_device, render_queue): (Res, Res), + mut global_light_meta: ResMut, + mut light_meta: ResMut, + views: Query< + ( + Entity, + MainEntity, + &ExtractedView, + &ExtractedClusterConfig, + Option<&RenderLayers>, + Has, + Option<&AmbientLight>, + ), + With, + >, + ambient_light: Res, + point_light_shadow_map: Res, + directional_light_shadow_map: Res, + mut shadow_render_phases: ResMut>, + ( + mut max_directional_lights_warning_emitted, + mut max_cascades_per_light_warning_emitted, + mut live_shadow_mapping_lights, + ): (Local, Local, Local>), + point_lights: Query<( + Entity, + &MainEntity, + &ExtractedPointLight, + AnyOf<(&CubemapFrusta, &Frustum)>, + )>, + directional_lights: Query<(Entity, &MainEntity, &ExtractedDirectionalLight)>, + mut light_view_entities: Query<&mut LightViewEntities>, + sorted_cameras: Res, + (gpu_preprocessing_support, decals): ( + Res, + Option>, + ), +) { + let views_iter = views.iter(); + let views_count = views_iter.len(); + let Some(mut view_gpu_lights_writer) = + light_meta + .view_gpu_lights + .get_writer(views_count, &render_device, &render_queue) + else { + return; + }; + + // Pre-calculate for PointLights + let cube_face_rotations = CUBE_MAP_FACES + .iter() + .map(|CubeMapFace { target, up }| Transform::IDENTITY.looking_at(*target, *up)) + .collect::>(); + + global_light_meta.entity_to_index.clear(); + + let mut point_lights: Vec<_> = point_lights.iter().collect::>(); + let mut directional_lights: Vec<_> = directional_lights.iter().collect::>(); + + #[cfg(any( + not(feature = "webgl"), + not(target_arch = "wasm32"), + feature = "webgpu" + ))] + let max_texture_array_layers = render_device.limits().max_texture_array_layers as usize; + #[cfg(any( + not(feature = "webgl"), + not(target_arch = "wasm32"), + feature = "webgpu" + ))] + let max_texture_cubes = max_texture_array_layers / 6; + #[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] + let max_texture_array_layers = 1; + #[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] + let max_texture_cubes = 1; + + if !*max_directional_lights_warning_emitted && directional_lights.len() > MAX_DIRECTIONAL_LIGHTS + { + warn!( + "The amount of directional lights of {} is exceeding the supported limit of {}.", + directional_lights.len(), + MAX_DIRECTIONAL_LIGHTS + ); + *max_directional_lights_warning_emitted = true; + } + + if !*max_cascades_per_light_warning_emitted + && directional_lights + .iter() + .any(|(_, _, light)| light.cascade_shadow_config.bounds.len() > MAX_CASCADES_PER_LIGHT) + { + warn!( + "The number of cascades configured for a directional light exceeds the supported limit of {}.", + MAX_CASCADES_PER_LIGHT + ); + *max_cascades_per_light_warning_emitted = true; + } + + let point_light_count = point_lights + .iter() + .filter(|light| light.2.spot_light_angles.is_none()) + .count(); + + let point_light_volumetric_enabled_count = point_lights + .iter() + .filter(|(_, _, light, _)| light.volumetric && light.spot_light_angles.is_none()) + .count() + .min(max_texture_cubes); + + let point_light_shadow_maps_count = point_lights + .iter() + .filter(|light| light.2.shadows_enabled && light.2.spot_light_angles.is_none()) + .count() + .min(max_texture_cubes); + + let directional_volumetric_enabled_count = directional_lights + .iter() + .take(MAX_DIRECTIONAL_LIGHTS) + .filter(|(_, _, light)| light.volumetric) + .count() + .min(max_texture_array_layers / MAX_CASCADES_PER_LIGHT); + + let directional_shadow_enabled_count = directional_lights + .iter() + .take(MAX_DIRECTIONAL_LIGHTS) + .filter(|(_, _, light)| light.shadows_enabled) + .count() + .min(max_texture_array_layers / MAX_CASCADES_PER_LIGHT); + + let spot_light_count = point_lights + .iter() + .filter(|(_, _, light, _)| light.spot_light_angles.is_some()) + .count() + .min(max_texture_array_layers - directional_shadow_enabled_count * MAX_CASCADES_PER_LIGHT); + + let spot_light_volumetric_enabled_count = point_lights + .iter() + .filter(|(_, _, light, _)| light.volumetric && light.spot_light_angles.is_some()) + .count() + .min(max_texture_array_layers - directional_shadow_enabled_count * MAX_CASCADES_PER_LIGHT); + + let spot_light_shadow_maps_count = point_lights + .iter() + .filter(|(_, _, light, _)| light.shadows_enabled && light.spot_light_angles.is_some()) + .count() + .min(max_texture_array_layers - directional_shadow_enabled_count * MAX_CASCADES_PER_LIGHT); + + // Sort lights by + // - point-light vs spot-light, so that we can iterate point lights and spot lights in contiguous blocks in the fragment shader, + // - then those with shadows enabled first, so that the index can be used to render at most `point_light_shadow_maps_count` + // point light shadows and `spot_light_shadow_maps_count` spot light shadow maps, + // - then by entity as a stable key to ensure that a consistent set of lights are chosen if the light count limit is exceeded. + point_lights.sort_by_cached_key(|(entity, _, light, _)| { + ( + point_or_spot_light_to_clusterable(light).ordering(), + *entity, + ) + }); + + // Sort lights by + // - those with volumetric (and shadows) enabled first, so that the + // volumetric lighting pass can quickly find the volumetric lights; + // - then those with shadows enabled second, so that the index can be used + // to render at most `directional_light_shadow_maps_count` directional light + // shadows + // - then by entity as a stable key to ensure that a consistent set of + // lights are chosen if the light count limit is exceeded. + // - because entities are unique, we can use `sort_unstable_by_key` + // and still end up with a stable order. + directional_lights.sort_unstable_by_key(|(entity, _, light)| { + (light.volumetric, light.shadows_enabled, *entity) + }); + + if global_light_meta.entity_to_index.capacity() < point_lights.len() { + global_light_meta + .entity_to_index + .reserve(point_lights.len()); + } + + let mut gpu_point_lights = Vec::new(); + for (index, &(entity, _, light, _)) in point_lights.iter().enumerate() { + let mut flags = PointLightFlags::NONE; + + // Lights are sorted, shadow enabled lights are first + if light.shadows_enabled + && (index < point_light_shadow_maps_count + || (light.spot_light_angles.is_some() + && index - point_light_count < spot_light_shadow_maps_count)) + { + flags |= PointLightFlags::SHADOWS_ENABLED; + } + + let cube_face_projection = Mat4::perspective_infinite_reverse_rh( + core::f32::consts::FRAC_PI_2, + 1.0, + light.shadow_map_near_z, + ); + if light.shadows_enabled + && light.volumetric + && (index < point_light_volumetric_enabled_count + || (light.spot_light_angles.is_some() + && index - point_light_count < spot_light_volumetric_enabled_count)) + { + flags |= PointLightFlags::VOLUMETRIC; + } + + if light.affects_lightmapped_mesh_diffuse { + flags |= PointLightFlags::AFFECTS_LIGHTMAPPED_MESH_DIFFUSE; + } + + let (light_custom_data, spot_light_tan_angle) = match light.spot_light_angles { + Some((inner, outer)) => { + let light_direction = light.transform.forward(); + if light_direction.y.is_sign_negative() { + flags |= PointLightFlags::SPOT_LIGHT_Y_NEGATIVE; + } + + let cos_outer = ops::cos(outer); + let spot_scale = 1.0 / f32::max(ops::cos(inner) - cos_outer, 1e-4); + let spot_offset = -cos_outer * spot_scale; + + ( + // For spot lights: the direction (x,z), spot_scale and spot_offset + light_direction.xz().extend(spot_scale).extend(spot_offset), + ops::tan(outer), + ) + } + None => { + ( + // For point lights: the lower-right 2x2 values of the projection matrix [2][2] [2][3] [3][2] [3][3] + Vec4::new( + cube_face_projection.z_axis.z, + cube_face_projection.z_axis.w, + cube_face_projection.w_axis.z, + cube_face_projection.w_axis.w, + ), + // unused + 0.0, + ) + } + }; + + gpu_point_lights.push(GpuClusterableObject { + light_custom_data, + // premultiply color by intensity + // we don't use the alpha at all, so no reason to multiply only [0..3] + color_inverse_square_range: (Vec4::from_slice(&light.color.to_f32_array()) + * light.intensity) + .xyz() + .extend(1.0 / (light.range * light.range)), + position_radius: light.transform.translation().extend(light.radius), + flags: flags.bits(), + shadow_depth_bias: light.shadow_depth_bias, + shadow_normal_bias: light.shadow_normal_bias, + shadow_map_near_z: light.shadow_map_near_z, + spot_light_tan_angle, + decal_index: decals + .as_ref() + .and_then(|decals| decals.get(entity)) + .and_then(|index| index.try_into().ok()) + .unwrap_or(u32::MAX), + pad: 0.0, + soft_shadow_size: if light.soft_shadows_enabled { + light.radius + } else { + 0.0 + }, + }); + global_light_meta.entity_to_index.insert(entity, index); + } + + // iterate the views once to find the maximum number of cascade shadowmaps we will need + let mut num_directional_cascades_enabled = 0usize; + for ( + _entity, + _camera_main_entity, + _extracted_view, + _clusters, + maybe_layers, + _no_indirect_drawing, + _maybe_ambient_override, + ) in sorted_cameras + .0 + .iter() + .filter_map(|sorted_camera| views.get(sorted_camera.entity).ok()) + { + let mut num_directional_cascades_for_this_view = 0usize; + let render_layers = maybe_layers.unwrap_or_default(); + + for (_light_entity, _, light) in directional_lights.iter() { + if light.shadows_enabled && light.render_layers.intersects(render_layers) { + num_directional_cascades_for_this_view += light + .cascade_shadow_config + .bounds + .len() + .min(MAX_CASCADES_PER_LIGHT); + } + } + + num_directional_cascades_enabled = num_directional_cascades_enabled + .max(num_directional_cascades_for_this_view) + .min(max_texture_array_layers); + } + + global_light_meta + .gpu_clusterable_objects + .set(gpu_point_lights); + global_light_meta + .gpu_clusterable_objects + .write_buffer(&render_device, &render_queue); + + live_shadow_mapping_lights.clear(); + + let mut point_light_depth_attachments = HashMap::::default(); + let mut directional_light_depth_attachments = HashMap::::default(); + + let point_light_depth_texture = texture_cache.get( + &render_device, + TextureDescriptor { + size: Extent3d { + width: point_light_shadow_map.size as u32, + height: point_light_shadow_map.size as u32, + depth_or_array_layers: point_light_shadow_maps_count.max(1) as u32 * 6, + }, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: CORE_3D_DEPTH_FORMAT, + label: Some("point_light_shadow_map_texture"), + usage: TextureUsages::RENDER_ATTACHMENT | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }, + ); + + let point_light_depth_texture_view = + point_light_depth_texture + .texture + .create_view(&TextureViewDescriptor { + label: Some("point_light_shadow_map_array_texture_view"), + format: None, + // NOTE: iOS Simulator is missing CubeArray support so we use Cube instead. + // See https://github.com/bevyengine/bevy/pull/12052 - remove if support is added. + #[cfg(all( + not(target_abi = "sim"), + any( + not(feature = "webgl"), + not(target_arch = "wasm32"), + feature = "webgpu" + ) + ))] + dimension: Some(TextureViewDimension::CubeArray), + #[cfg(any( + target_abi = "sim", + all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")) + ))] + dimension: Some(TextureViewDimension::Cube), + usage: None, + aspect: TextureAspect::DepthOnly, + base_mip_level: 0, + mip_level_count: None, + base_array_layer: 0, + array_layer_count: None, + }); + + let directional_light_depth_texture = texture_cache.get( + &render_device, + TextureDescriptor { + size: Extent3d { + width: (directional_light_shadow_map.size as u32) + .min(render_device.limits().max_texture_dimension_2d), + height: (directional_light_shadow_map.size as u32) + .min(render_device.limits().max_texture_dimension_2d), + depth_or_array_layers: (num_directional_cascades_enabled + + spot_light_shadow_maps_count) + .max(1) as u32, + }, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: CORE_3D_DEPTH_FORMAT, + label: Some("directional_light_shadow_map_texture"), + usage: TextureUsages::RENDER_ATTACHMENT | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }, + ); + + let directional_light_depth_texture_view = + directional_light_depth_texture + .texture + .create_view(&TextureViewDescriptor { + label: Some("directional_light_shadow_map_array_texture_view"), + format: None, + #[cfg(any( + not(feature = "webgl"), + not(target_arch = "wasm32"), + feature = "webgpu" + ))] + dimension: Some(TextureViewDimension::D2Array), + #[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] + dimension: Some(TextureViewDimension::D2), + usage: None, + aspect: TextureAspect::DepthOnly, + base_mip_level: 0, + mip_level_count: None, + base_array_layer: 0, + array_layer_count: None, + }); + + let mut live_views = EntityHashSet::with_capacity(views_count); + + // set up light data for each view + for ( + entity, + camera_main_entity, + extracted_view, + clusters, + maybe_layers, + no_indirect_drawing, + maybe_ambient_override, + ) in sorted_cameras + .0 + .iter() + .filter_map(|sorted_camera| views.get(sorted_camera.entity).ok()) + { + live_views.insert(entity); + + let view_layers = maybe_layers.unwrap_or_default(); + let mut view_lights = Vec::new(); + let mut view_occlusion_culling_lights = Vec::new(); + + let gpu_preprocessing_mode = gpu_preprocessing_support.min(if !no_indirect_drawing { + GpuPreprocessingMode::Culling + } else { + GpuPreprocessingMode::PreprocessingOnly + }); + + let is_orthographic = extracted_view.clip_from_view.w_axis.w == 1.0; + let cluster_factors_zw = calculate_cluster_factors( + clusters.near, + clusters.far, + clusters.dimensions.z as f32, + is_orthographic, + ); + + let n_clusters = clusters.dimensions.x * clusters.dimensions.y * clusters.dimensions.z; + let ambient_light = maybe_ambient_override.unwrap_or(&ambient_light); + + let mut gpu_directional_lights = [GpuDirectionalLight::default(); MAX_DIRECTIONAL_LIGHTS]; + let mut num_directional_cascades_enabled_for_this_view = 0usize; + let mut num_directional_lights_for_this_view = 0usize; + for (index, (light_entity, _, light)) in directional_lights + .iter() + .filter(|(_light_entity, _, light)| light.render_layers.intersects(view_layers)) + .enumerate() + .take(MAX_DIRECTIONAL_LIGHTS) + { + num_directional_lights_for_this_view += 1; + + let mut flags = DirectionalLightFlags::NONE; + + // Lights are sorted, volumetric and shadow enabled lights are first + if light.volumetric + && light.shadows_enabled + && (index < directional_volumetric_enabled_count) + { + flags |= DirectionalLightFlags::VOLUMETRIC; + } + + // Shadow enabled lights are second + let mut num_cascades = 0; + if light.shadows_enabled { + let cascades = light + .cascade_shadow_config + .bounds + .len() + .min(MAX_CASCADES_PER_LIGHT); + + if num_directional_cascades_enabled_for_this_view + cascades + <= max_texture_array_layers + { + flags |= DirectionalLightFlags::SHADOWS_ENABLED; + num_cascades += cascades; + } + } + + if light.affects_lightmapped_mesh_diffuse { + flags |= DirectionalLightFlags::AFFECTS_LIGHTMAPPED_MESH_DIFFUSE; + } + + gpu_directional_lights[index] = GpuDirectionalLight { + // Filled in later. + cascades: [GpuDirectionalCascade::default(); MAX_CASCADES_PER_LIGHT], + // premultiply color by illuminance + // we don't use the alpha at all, so no reason to multiply only [0..3] + color: Vec4::from_slice(&light.color.to_f32_array()) * light.illuminance, + // direction is negated to be ready for N.L + dir_to_light: light.transform.back().into(), + flags: flags.bits(), + soft_shadow_size: light.soft_shadow_size.unwrap_or_default(), + shadow_depth_bias: light.shadow_depth_bias, + shadow_normal_bias: light.shadow_normal_bias, + num_cascades: num_cascades as u32, + cascades_overlap_proportion: light.cascade_shadow_config.overlap_proportion, + depth_texture_base_index: num_directional_cascades_enabled_for_this_view as u32, + sun_disk_angular_size: light.sun_disk_angular_size, + sun_disk_intensity: light.sun_disk_intensity, + decal_index: decals + .as_ref() + .and_then(|decals| decals.get(*light_entity)) + .and_then(|index| index.try_into().ok()) + .unwrap_or(u32::MAX), + }; + num_directional_cascades_enabled_for_this_view += num_cascades; + } + + let mut gpu_lights = GpuLights { + directional_lights: gpu_directional_lights, + ambient_color: Vec4::from_slice(&LinearRgba::from(ambient_light.color).to_f32_array()) + * ambient_light.brightness, + cluster_factors: Vec4::new( + clusters.dimensions.x as f32 / extracted_view.viewport.z as f32, + clusters.dimensions.y as f32 / extracted_view.viewport.w as f32, + cluster_factors_zw.x, + cluster_factors_zw.y, + ), + cluster_dimensions: clusters.dimensions.extend(n_clusters), + n_directional_lights: num_directional_lights_for_this_view as u32, + // spotlight shadow maps are stored in the directional light array, starting at num_directional_cascades_enabled. + // the spot lights themselves start in the light array at point_light_count. so to go from light + // index to shadow map index, we need to subtract point light count and add directional shadowmap count. + spot_light_shadowmap_offset: num_directional_cascades_enabled as i32 + - point_light_count as i32, + ambient_light_affects_lightmapped_meshes: ambient_light.affects_lightmapped_meshes + as u32, + }; + + // TODO: this should select lights based on relevance to the view instead of the first ones that show up in a query + for &(light_entity, light_main_entity, light, (point_light_frusta, _)) in point_lights + .iter() + // Lights are sorted, shadow enabled lights are first + .take(point_light_count.min(max_texture_cubes)) + { + let Ok(mut light_view_entities) = light_view_entities.get_mut(light_entity) else { + continue; + }; + + if !light.shadows_enabled { + if let Some(entities) = light_view_entities.remove(&entity) { + despawn_entities(&mut commands, entities); + } + continue; + } + + let light_index = *global_light_meta + .entity_to_index + .get(&light_entity) + .unwrap(); + // ignore scale because we don't want to effectively scale light radius and range + // by applying those as a view transform to shadow map rendering of objects + // and ignore rotation because we want the shadow map projections to align with the axes + let view_translation = GlobalTransform::from_translation(light.transform.translation()); + + // for each face of a cube and each view we spawn a light entity + let light_view_entities = light_view_entities + .entry(entity) + .or_insert_with(|| (0..6).map(|_| commands.spawn_empty().id()).collect()); + + let cube_face_projection = Mat4::perspective_infinite_reverse_rh( + core::f32::consts::FRAC_PI_2, + 1.0, + light.shadow_map_near_z, + ); + + for (face_index, ((view_rotation, frustum), view_light_entity)) in cube_face_rotations + .iter() + .zip(&point_light_frusta.unwrap().frusta) + .zip(light_view_entities.iter().copied()) + .enumerate() + { + let mut first = false; + let base_array_layer = (light_index * 6 + face_index) as u32; + + let depth_attachment = point_light_depth_attachments + .entry(base_array_layer) + .or_insert_with(|| { + first = true; + + let depth_texture_view = + point_light_depth_texture + .texture + .create_view(&TextureViewDescriptor { + label: Some("point_light_shadow_map_texture_view"), + format: None, + dimension: Some(TextureViewDimension::D2), + usage: None, + aspect: TextureAspect::All, + base_mip_level: 0, + mip_level_count: None, + base_array_layer, + array_layer_count: Some(1u32), + }); + + DepthAttachment::new(depth_texture_view, Some(0.0)) + }) + .clone(); + + let retained_view_entity = RetainedViewEntity::new( + *light_main_entity, + Some(camera_main_entity.into()), + face_index as u32, + ); + + commands.entity(view_light_entity).insert(( + ShadowView { + depth_attachment, + pass_name: format!( + "shadow_point_light_{}_{}", + light_index, + face_index_to_name(face_index) + ), + }, + ExtractedView { + retained_view_entity, + viewport: UVec4::new( + 0, + 0, + point_light_shadow_map.size as u32, + point_light_shadow_map.size as u32, + ), + world_from_view: view_translation * *view_rotation, + clip_from_world: None, + clip_from_view: cube_face_projection, + hdr: false, + color_grading: Default::default(), + }, + *frustum, + LightEntity::Point { + light_entity, + face_index, + }, + )); + + if !matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) { + commands.entity(view_light_entity).insert(NoIndirectDrawing); + } + + view_lights.push(view_light_entity); + + if first { + // Subsequent views with the same light entity will reuse the same shadow map + shadow_render_phases + .prepare_for_new_frame(retained_view_entity, gpu_preprocessing_mode); + live_shadow_mapping_lights.insert(retained_view_entity); + } + } + } + + // spot lights + for (light_index, &(light_entity, light_main_entity, light, (_, spot_light_frustum))) in + point_lights + .iter() + .skip(point_light_count) + .take(spot_light_count) + .enumerate() + { + let Ok(mut light_view_entities) = light_view_entities.get_mut(light_entity) else { + continue; + }; + + if !light.shadows_enabled { + if let Some(entities) = light_view_entities.remove(&entity) { + despawn_entities(&mut commands, entities); + } + continue; + } + + let spot_world_from_view = spot_light_world_from_view(&light.transform); + let spot_world_from_view = spot_world_from_view.into(); + + let angle = light.spot_light_angles.expect("lights should be sorted so that \ + [point_light_count..point_light_count + spot_light_shadow_maps_count] are spot lights").1; + let spot_projection = spot_light_clip_from_view(angle, light.shadow_map_near_z); + + let mut first = false; + let base_array_layer = (num_directional_cascades_enabled + light_index) as u32; + + let depth_attachment = directional_light_depth_attachments + .entry(base_array_layer) + .or_insert_with(|| { + first = true; + + let depth_texture_view = directional_light_depth_texture.texture.create_view( + &TextureViewDescriptor { + label: Some("spot_light_shadow_map_texture_view"), + format: None, + dimension: Some(TextureViewDimension::D2), + usage: None, + aspect: TextureAspect::All, + base_mip_level: 0, + mip_level_count: None, + base_array_layer, + array_layer_count: Some(1u32), + }, + ); + + DepthAttachment::new(depth_texture_view, Some(0.0)) + }) + .clone(); + + let light_view_entities = light_view_entities + .entry(entity) + .or_insert_with(|| vec![commands.spawn_empty().id()]); + + let view_light_entity = light_view_entities[0]; + + let retained_view_entity = + RetainedViewEntity::new(*light_main_entity, Some(camera_main_entity.into()), 0); + + commands.entity(view_light_entity).insert(( + ShadowView { + depth_attachment, + pass_name: format!("shadow_spot_light_{light_index}"), + }, + ExtractedView { + retained_view_entity, + viewport: UVec4::new( + 0, + 0, + directional_light_shadow_map.size as u32, + directional_light_shadow_map.size as u32, + ), + world_from_view: spot_world_from_view, + clip_from_view: spot_projection, + clip_from_world: None, + hdr: false, + color_grading: Default::default(), + }, + *spot_light_frustum.unwrap(), + LightEntity::Spot { light_entity }, + )); + + if !matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) { + commands.entity(view_light_entity).insert(NoIndirectDrawing); + } + + view_lights.push(view_light_entity); + + if first { + // Subsequent views with the same light entity will reuse the same shadow map + shadow_render_phases + .prepare_for_new_frame(retained_view_entity, gpu_preprocessing_mode); + live_shadow_mapping_lights.insert(retained_view_entity); + } + } + + // directional lights + // clear entities for lights that don't intersect the layer + for &(light_entity, _, _) in directional_lights + .iter() + .filter(|(_, _, light)| !light.render_layers.intersects(view_layers)) + { + let Ok(mut light_view_entities) = light_view_entities.get_mut(light_entity) else { + continue; + }; + if let Some(entities) = light_view_entities.remove(&entity) { + despawn_entities(&mut commands, entities); + } + } + + let mut directional_depth_texture_array_index = 0u32; + for (light_index, &(light_entity, light_main_entity, light)) in directional_lights + .iter() + .filter(|(_, _, light)| light.render_layers.intersects(view_layers)) + .enumerate() + .take(MAX_DIRECTIONAL_LIGHTS) + { + let Ok(mut light_view_entities) = light_view_entities.get_mut(light_entity) else { + continue; + }; + + let gpu_light = &mut gpu_lights.directional_lights[light_index]; + + // Only deal with cascades when shadows are enabled. + if (gpu_light.flags & DirectionalLightFlags::SHADOWS_ENABLED.bits()) == 0u32 { + if let Some(entities) = light_view_entities.remove(&entity) { + despawn_entities(&mut commands, entities); + } + continue; + } + + let cascades = light + .cascades + .get(&entity) + .unwrap() + .iter() + .take(MAX_CASCADES_PER_LIGHT); + let frusta = light + .frusta + .get(&entity) + .unwrap() + .iter() + .take(MAX_CASCADES_PER_LIGHT); + + let iter = cascades + .zip(frusta) + .zip(&light.cascade_shadow_config.bounds); + + let light_view_entities = light_view_entities.entry(entity).or_insert_with(|| { + (0..iter.len()) + .map(|_| commands.spawn_empty().id()) + .collect() + }); + if light_view_entities.len() != iter.len() { + let entities = core::mem::take(light_view_entities); + despawn_entities(&mut commands, entities); + light_view_entities.extend((0..iter.len()).map(|_| commands.spawn_empty().id())); + } + + for (cascade_index, (((cascade, frustum), bound), view_light_entity)) in + iter.zip(light_view_entities.iter().copied()).enumerate() + { + gpu_lights.directional_lights[light_index].cascades[cascade_index] = + GpuDirectionalCascade { + clip_from_world: cascade.clip_from_world, + texel_size: cascade.texel_size, + far_bound: *bound, + }; + + let depth_texture_view = + directional_light_depth_texture + .texture + .create_view(&TextureViewDescriptor { + label: Some("directional_light_shadow_map_array_texture_view"), + format: None, + dimension: Some(TextureViewDimension::D2), + usage: None, + aspect: TextureAspect::All, + base_mip_level: 0, + mip_level_count: None, + base_array_layer: directional_depth_texture_array_index, + array_layer_count: Some(1u32), + }); + + // NOTE: For point and spotlights, we reuse the same depth attachment for all views. + // However, for directional lights, we want a new depth attachment for each view, + // so that the view is cleared for each view. + let depth_attachment = DepthAttachment::new(depth_texture_view.clone(), Some(0.0)); + + directional_depth_texture_array_index += 1; + + let mut frustum = *frustum; + // Push the near clip plane out to infinity for directional lights + frustum.half_spaces[4] = + HalfSpace::new(frustum.half_spaces[4].normal().extend(f32::INFINITY)); + + let retained_view_entity = RetainedViewEntity::new( + *light_main_entity, + Some(camera_main_entity.into()), + cascade_index as u32, + ); + + commands.entity(view_light_entity).insert(( + ShadowView { + depth_attachment, + pass_name: format!( + "shadow_directional_light_{light_index}_cascade_{cascade_index}" + ), + }, + ExtractedView { + retained_view_entity, + viewport: UVec4::new( + 0, + 0, + directional_light_shadow_map.size as u32, + directional_light_shadow_map.size as u32, + ), + world_from_view: GlobalTransform::from(cascade.world_from_cascade), + clip_from_view: cascade.clip_from_cascade, + clip_from_world: Some(cascade.clip_from_world), + hdr: false, + color_grading: Default::default(), + }, + frustum, + LightEntity::Directional { + light_entity, + cascade_index, + }, + )); + + if !matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) { + commands.entity(view_light_entity).insert(NoIndirectDrawing); + } + + view_lights.push(view_light_entity); + + // If this light is using occlusion culling, add the appropriate components. + if light.occlusion_culling { + commands.entity(view_light_entity).insert(( + OcclusionCulling, + OcclusionCullingSubview { + depth_texture_view, + depth_texture_size: directional_light_shadow_map.size as u32, + }, + )); + view_occlusion_culling_lights.push(view_light_entity); + } + + // Subsequent views with the same light entity will **NOT** reuse the same shadow map + // (Because the cascades are unique to each view) + // TODO: Implement GPU culling for shadow passes. + shadow_render_phases + .prepare_for_new_frame(retained_view_entity, gpu_preprocessing_mode); + live_shadow_mapping_lights.insert(retained_view_entity); + } + } + + commands.entity(entity).insert(( + ViewShadowBindings { + point_light_depth_texture: point_light_depth_texture.texture.clone(), + point_light_depth_texture_view: point_light_depth_texture_view.clone(), + directional_light_depth_texture: directional_light_depth_texture.texture.clone(), + directional_light_depth_texture_view: directional_light_depth_texture_view.clone(), + }, + ViewLightEntities { + lights: view_lights, + }, + ViewLightsUniformOffset { + offset: view_gpu_lights_writer.write(&gpu_lights), + }, + )); + + // Make a link from the camera to all shadow cascades with occlusion + // culling enabled. + if !view_occlusion_culling_lights.is_empty() { + commands + .entity(entity) + .insert(OcclusionCullingSubviewEntities( + view_occlusion_culling_lights, + )); + } + } + + // Despawn light-view entities for views that no longer exist + for mut entities in &mut light_view_entities { + for (_, light_view_entities) in + entities.extract_if(|entity, _| !live_views.contains(entity)) + { + despawn_entities(&mut commands, light_view_entities); + } + } + + shadow_render_phases.retain(|entity, _| live_shadow_mapping_lights.contains(entity)); +} + +fn despawn_entities(commands: &mut Commands, entities: Vec) { + if entities.is_empty() { + return; + } + commands.queue(move |world: &mut World| { + for entity in entities { + world.despawn(entity); + } + }); +} + +// These will be extracted in the material extraction, which will also clear the needs_specialization +// collection. +pub fn check_light_entities_needing_specialization( + needs_specialization: Query>, Changed)>, + mut entities_needing_specialization: ResMut>, + mut removed_components: RemovedComponents, +) { + for entity in &needs_specialization { + entities_needing_specialization.push(entity); + } + + for removed in removed_components.read() { + entities_needing_specialization.entities.push(removed); + } +} + +#[derive(Resource, Deref, DerefMut, Default, Debug, Clone)] +pub struct LightKeyCache(HashMap); + +#[derive(Resource, Deref, DerefMut, Default, Debug, Clone)] +pub struct LightSpecializationTicks(HashMap); + +#[derive(Resource, Deref, DerefMut, Default)] +pub struct SpecializedShadowMaterialPipelineCache { + // view light entity -> view pipeline cache + #[deref] + map: HashMap, +} + +#[derive(Deref, DerefMut, Default)] +pub struct SpecializedShadowMaterialViewPipelineCache { + #[deref] + map: MainEntityHashMap<(Tick, CachedRenderPipelineId)>, +} + +pub fn check_views_lights_need_specialization( + view_lights: Query<&ViewLightEntities, With>, + view_light_entities: Query<(&LightEntity, &ExtractedView)>, + shadow_render_phases: Res>, + mut light_key_cache: ResMut, + mut light_specialization_ticks: ResMut, + ticks: SystemChangeTick, +) { + for view_lights in &view_lights { + for view_light_entity in view_lights.lights.iter().copied() { + let Ok((light_entity, extracted_view_light)) = + view_light_entities.get(view_light_entity) + else { + continue; + }; + if !shadow_render_phases.contains_key(&extracted_view_light.retained_view_entity) { + continue; + } + + let is_directional_light = matches!(light_entity, LightEntity::Directional { .. }); + let mut light_key = MeshPipelineKey::DEPTH_PREPASS; + light_key.set(MeshPipelineKey::UNCLIPPED_DEPTH_ORTHO, is_directional_light); + if let Some(current_key) = + light_key_cache.get_mut(&extracted_view_light.retained_view_entity) + { + if *current_key != light_key { + light_key_cache.insert(extracted_view_light.retained_view_entity, light_key); + light_specialization_ticks + .insert(extracted_view_light.retained_view_entity, ticks.this_run()); + } + } else { + light_key_cache.insert(extracted_view_light.retained_view_entity, light_key); + light_specialization_ticks + .insert(extracted_view_light.retained_view_entity, ticks.this_run()); + } + } + } +} + +pub fn specialize_shadows( + prepass_pipeline: Res, + (render_meshes, render_mesh_instances, render_materials, render_material_instances): ( + Res>, + Res, + Res>, + Res, + ), + shadow_render_phases: Res>, + mut pipelines: ResMut>, + pipeline_cache: Res, + render_lightmaps: Res, + view_lights: Query<(Entity, &ViewLightEntities), With>, + view_light_entities: Query<(&LightEntity, &ExtractedView)>, + point_light_entities: Query<&RenderCubemapVisibleEntities, With>, + directional_light_entities: Query< + &RenderCascadesVisibleEntities, + With, + >, + spot_light_entities: Query<&RenderVisibleMeshEntities, With>, + light_key_cache: Res, + mut specialized_material_pipeline_cache: ResMut, + light_specialization_ticks: Res, + entity_specialization_ticks: Res, + ticks: SystemChangeTick, +) { + // Record the retained IDs of all shadow views so that we can expire old + // pipeline IDs. + let mut all_shadow_views: HashSet = HashSet::default(); + + for (entity, view_lights) in &view_lights { + for view_light_entity in view_lights.lights.iter().copied() { + let Ok((light_entity, extracted_view_light)) = + view_light_entities.get(view_light_entity) + else { + continue; + }; + + all_shadow_views.insert(extracted_view_light.retained_view_entity); + + if !shadow_render_phases.contains_key(&extracted_view_light.retained_view_entity) { + continue; + } + let Some(light_key) = light_key_cache.get(&extracted_view_light.retained_view_entity) + else { + continue; + }; + + let visible_entities = match light_entity { + LightEntity::Directional { + light_entity, + cascade_index, + } => directional_light_entities + .get(*light_entity) + .expect("Failed to get directional light visible entities") + .entities + .get(&entity) + .expect("Failed to get directional light visible entities for view") + .get(*cascade_index) + .expect("Failed to get directional light visible entities for cascade"), + LightEntity::Point { + light_entity, + face_index, + } => point_light_entities + .get(*light_entity) + .expect("Failed to get point light visible entities") + .get(*face_index), + LightEntity::Spot { light_entity } => spot_light_entities + .get(*light_entity) + .expect("Failed to get spot light visible entities"), + }; + + // NOTE: Lights with shadow mapping disabled will have no visible entities + // so no meshes will be queued + + let view_tick = light_specialization_ticks + .get(&extracted_view_light.retained_view_entity) + .unwrap(); + let view_specialized_material_pipeline_cache = specialized_material_pipeline_cache + .entry(extracted_view_light.retained_view_entity) + .or_default(); + + for (_, visible_entity) in visible_entities.iter().copied() { + let Some(material_instance) = + render_material_instances.instances.get(&visible_entity) + else { + continue; + }; + + let Some(mesh_instance) = + render_mesh_instances.render_mesh_queue_data(visible_entity) + else { + continue; + }; + let entity_tick = entity_specialization_ticks.get(&visible_entity).unwrap(); + let last_specialized_tick = view_specialized_material_pipeline_cache + .get(&visible_entity) + .map(|(tick, _)| *tick); + let needs_specialization = last_specialized_tick.is_none_or(|tick| { + view_tick.is_newer_than(tick, ticks.this_run()) + || entity_tick.is_newer_than(tick, ticks.this_run()) + }); + if !needs_specialization { + continue; + } + let Some(material) = render_materials.get(material_instance.asset_id) else { + continue; + }; + if !material.properties.shadows_enabled { + // If the material is not a shadow caster, we don't need to specialize it. + continue; + } + if !mesh_instance + .flags + .contains(RenderMeshInstanceFlags::SHADOW_CASTER) + { + continue; + } + let Some(mesh) = render_meshes.get(mesh_instance.mesh_asset_id) else { + continue; + }; + + let mut mesh_key = + *light_key | MeshPipelineKey::from_bits_retain(mesh.key_bits.bits()); + + // Even though we don't use the lightmap in the shadow map, the + // `SetMeshBindGroup` render command will bind the data for it. So + // we need to include the appropriate flag in the mesh pipeline key + // to ensure that the necessary bind group layout entries are + // present. + if render_lightmaps + .render_lightmaps + .contains_key(&visible_entity) + { + mesh_key |= MeshPipelineKey::LIGHTMAPPED; + } + + mesh_key |= match material.properties.alpha_mode { + AlphaMode::Mask(_) + | AlphaMode::Blend + | AlphaMode::Premultiplied + | AlphaMode::Add + | AlphaMode::AlphaToCoverage => MeshPipelineKey::MAY_DISCARD, + _ => MeshPipelineKey::NONE, + }; + let erased_key = ErasedMaterialPipelineKey { + mesh_key, + material_key: material.properties.material_key.clone(), + type_id: material_instance.asset_id.type_id(), + }; + let material_pipeline_specializer = PrepassPipelineSpecializer { + pipeline: prepass_pipeline.clone(), + properties: material.properties.clone(), + }; + let pipeline_id = pipelines.specialize( + &pipeline_cache, + &material_pipeline_specializer, + erased_key, + &mesh.layout, + ); + let pipeline_id = match pipeline_id { + Ok(id) => id, + Err(err) => { + error!("{}", err); + continue; + } + }; + + view_specialized_material_pipeline_cache + .insert(visible_entity, (ticks.this_run(), pipeline_id)); + } + } + } + + // Delete specialized pipelines belonging to views that have expired. + specialized_material_pipeline_cache.retain(|view, _| all_shadow_views.contains(view)); +} + +/// For each shadow cascade, iterates over all the meshes "visible" from it and +/// adds them to [`BinnedRenderPhase`]s or [`SortedRenderPhase`]s as +/// appropriate. +pub fn queue_shadows( + render_mesh_instances: Res, + render_materials: Res>, + render_material_instances: Res, + mut shadow_render_phases: ResMut>, + gpu_preprocessing_support: Res, + mesh_allocator: Res, + view_lights: Query<(Entity, &ViewLightEntities, Option<&RenderLayers>), With>, + view_light_entities: Query<(&LightEntity, &ExtractedView)>, + point_light_entities: Query<&RenderCubemapVisibleEntities, With>, + directional_light_entities: Query< + &RenderCascadesVisibleEntities, + With, + >, + spot_light_entities: Query<&RenderVisibleMeshEntities, With>, + specialized_material_pipeline_cache: Res, +) { + for (entity, view_lights, camera_layers) in &view_lights { + for view_light_entity in view_lights.lights.iter().copied() { + let Ok((light_entity, extracted_view_light)) = + view_light_entities.get(view_light_entity) + else { + continue; + }; + let Some(shadow_phase) = + shadow_render_phases.get_mut(&extracted_view_light.retained_view_entity) + else { + continue; + }; + + let Some(view_specialized_material_pipeline_cache) = + specialized_material_pipeline_cache.get(&extracted_view_light.retained_view_entity) + else { + continue; + }; + + let visible_entities = match light_entity { + LightEntity::Directional { + light_entity, + cascade_index, + } => directional_light_entities + .get(*light_entity) + .expect("Failed to get directional light visible entities") + .entities + .get(&entity) + .expect("Failed to get directional light visible entities for view") + .get(*cascade_index) + .expect("Failed to get directional light visible entities for cascade"), + LightEntity::Point { + light_entity, + face_index, + } => point_light_entities + .get(*light_entity) + .expect("Failed to get point light visible entities") + .get(*face_index), + LightEntity::Spot { light_entity } => spot_light_entities + .get(*light_entity) + .expect("Failed to get spot light visible entities"), + }; + + for (entity, main_entity) in visible_entities.iter().copied() { + let Some((current_change_tick, pipeline_id)) = + view_specialized_material_pipeline_cache.get(&main_entity) + else { + continue; + }; + + let Some(mesh_instance) = render_mesh_instances.render_mesh_queue_data(main_entity) + else { + continue; + }; + if !mesh_instance + .flags + .contains(RenderMeshInstanceFlags::SHADOW_CASTER) + { + continue; + } + + let mesh_layers = mesh_instance + .shared + .render_layers + .as_ref() + .unwrap_or_default(); + + let camera_layers = camera_layers.unwrap_or_default(); + + if !camera_layers.intersects(mesh_layers) { + continue; + } + + // Skip the entity if it's cached in a bin and up to date. + if shadow_phase.validate_cached_entity(main_entity, *current_change_tick) { + continue; + } + + let Some(material_instance) = render_material_instances.instances.get(&main_entity) + else { + continue; + }; + let Some(material) = render_materials.get(material_instance.asset_id) else { + continue; + }; + let Some(draw_function) = + material.properties.get_draw_function(ShadowsDrawFunction) + else { + continue; + }; + + let (vertex_slab, index_slab) = + mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id); + + let batch_set_key = ShadowBatchSetKey { + pipeline: *pipeline_id, + draw_function, + material_bind_group_index: Some(material.binding.group.0), + vertex_slab: vertex_slab.unwrap_or_default(), + index_slab, + }; + + shadow_phase.add( + batch_set_key, + ShadowBinKey { + asset_id: mesh_instance.mesh_asset_id.into(), + }, + (entity, main_entity), + mesh_instance.current_uniform_index, + BinnedRenderPhaseType::mesh( + mesh_instance.should_batch(), + &gpu_preprocessing_support, + ), + *current_change_tick, + ); + } + } + } +} + +pub struct Shadow { + /// Determines which objects can be placed into a *batch set*. + /// + /// Objects in a single batch set can potentially be multi-drawn together, + /// if it's enabled and the current platform supports it. + pub batch_set_key: ShadowBatchSetKey, + /// Information that separates items into bins. + pub bin_key: ShadowBinKey, + pub representative_entity: (Entity, MainEntity), + pub batch_range: Range, + pub extra_index: PhaseItemExtraIndex, +} + +/// Information that must be identical in order to place opaque meshes in the +/// same *batch set*. +/// +/// A batch set is a set of batches that can be multi-drawn together, if +/// multi-draw is in use. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct ShadowBatchSetKey { + /// The identifier of the render pipeline. + pub pipeline: CachedRenderPipelineId, + + /// The function used to draw. + pub draw_function: DrawFunctionId, + + /// The ID of a bind group specific to the material. + /// + /// In the case of PBR, this is the `MaterialBindGroupIndex`. + pub material_bind_group_index: Option, + + /// The ID of the slab of GPU memory that contains vertex data. + /// + /// For non-mesh items, you can fill this with 0 if your items can be + /// multi-drawn, or with a unique value if they can't. + pub vertex_slab: SlabId, + + /// The ID of the slab of GPU memory that contains index data, if present. + /// + /// For non-mesh items, you can safely fill this with `None`. + pub index_slab: Option, +} + +impl PhaseItemBatchSetKey for ShadowBatchSetKey { + fn indexed(&self) -> bool { + self.index_slab.is_some() + } +} + +/// Data used to bin each object in the shadow map phase. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct ShadowBinKey { + /// The object. + pub asset_id: UntypedAssetId, +} + +impl PhaseItem for Shadow { + #[inline] + fn entity(&self) -> Entity { + self.representative_entity.0 + } + + fn main_entity(&self) -> MainEntity { + self.representative_entity.1 + } + + #[inline] + fn draw_function(&self) -> DrawFunctionId { + self.batch_set_key.draw_function + } + + #[inline] + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + #[inline] + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index.clone() + } + + #[inline] + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) + } +} + +impl BinnedPhaseItem for Shadow { + type BatchSetKey = ShadowBatchSetKey; + type BinKey = ShadowBinKey; + + #[inline] + fn new( + batch_set_key: Self::BatchSetKey, + bin_key: Self::BinKey, + representative_entity: (Entity, MainEntity), + batch_range: Range, + extra_index: PhaseItemExtraIndex, + ) -> Self { + Shadow { + batch_set_key, + bin_key, + representative_entity, + batch_range, + extra_index, + } + } +} + +impl CachedRenderPipelinePhaseItem for Shadow { + #[inline] + fn cached_pipeline(&self) -> CachedRenderPipelineId { + self.batch_set_key.pipeline + } +} + +/// The rendering node that renders meshes that were "visible" (so to speak) +/// from a light last frame. +/// +/// If occlusion culling for a light is disabled, then this node simply renders +/// all meshes in range of the light. +#[derive(Deref, DerefMut)] +pub struct EarlyShadowPassNode(ShadowPassNode); + +/// The rendering node that renders meshes that became newly "visible" (so to +/// speak) from a light this frame. +/// +/// If occlusion culling for a light is disabled, then this node does nothing. +#[derive(Deref, DerefMut)] +pub struct LateShadowPassNode(ShadowPassNode); + +/// Encapsulates rendering logic shared between the early and late shadow pass +/// nodes. +pub struct ShadowPassNode { + /// The query that finds cameras in which shadows are visible. + main_view_query: QueryState>, + /// The query that finds shadow cascades. + view_light_query: QueryState<(Read, Read, Has)>, +} + +impl FromWorld for EarlyShadowPassNode { + fn from_world(world: &mut World) -> Self { + Self(ShadowPassNode::from_world(world)) + } +} + +impl FromWorld for LateShadowPassNode { + fn from_world(world: &mut World) -> Self { + Self(ShadowPassNode::from_world(world)) + } +} + +impl FromWorld for ShadowPassNode { + fn from_world(world: &mut World) -> Self { + Self { + main_view_query: QueryState::new(world), + view_light_query: QueryState::new(world), + } + } +} + +impl Node for EarlyShadowPassNode { + fn update(&mut self, world: &mut World) { + self.0.update(world); + } + + fn run<'w>( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + world: &'w World, + ) -> Result<(), NodeRunError> { + self.0.run(graph, render_context, world, false) + } +} + +impl Node for LateShadowPassNode { + fn update(&mut self, world: &mut World) { + self.0.update(world); + } + + fn run<'w>( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + world: &'w World, + ) -> Result<(), NodeRunError> { + self.0.run(graph, render_context, world, true) + } +} + +impl ShadowPassNode { + fn update(&mut self, world: &mut World) { + self.main_view_query.update_archetypes(world); + self.view_light_query.update_archetypes(world); + } + + /// Runs the node logic. + /// + /// `is_late` is true if this is the late shadow pass or false if this is + /// the early shadow pass. + fn run<'w>( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + world: &'w World, + is_late: bool, + ) -> Result<(), NodeRunError> { + let Some(shadow_render_phases) = world.get_resource::>() + else { + return Ok(()); + }; + + if let Ok(view_lights) = self.main_view_query.get_manual(world, graph.view_entity()) { + for view_light_entity in view_lights.lights.iter().copied() { + let Ok((view_light, extracted_light_view, occlusion_culling)) = + self.view_light_query.get_manual(world, view_light_entity) + else { + continue; + }; + + // There's no need for a late shadow pass if the light isn't + // using occlusion culling. + if is_late && !occlusion_culling { + continue; + } + + let Some(shadow_phase) = + shadow_render_phases.get(&extracted_light_view.retained_view_entity) + else { + continue; + }; + + let depth_stencil_attachment = + Some(view_light.depth_attachment.get_attachment(StoreOp::Store)); + + let diagnostics = render_context.diagnostic_recorder(); + render_context.add_command_buffer_generation_task(move |render_device| { + #[cfg(feature = "trace")] + let _shadow_pass_span = info_span!("", "{}", view_light.pass_name).entered(); + let mut command_encoder = + render_device.create_command_encoder(&CommandEncoderDescriptor { + label: Some("shadow_pass_command_encoder"), + }); + + let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor { + label: Some(&view_light.pass_name), + color_attachments: &[], + depth_stencil_attachment, + timestamp_writes: None, + occlusion_query_set: None, + }); + + let mut render_pass = TrackedRenderPass::new(&render_device, render_pass); + let pass_span = + diagnostics.pass_span(&mut render_pass, view_light.pass_name.clone()); + + if let Err(err) = + shadow_phase.render(&mut render_pass, world, view_light_entity) + { + error!("Error encountered while rendering the shadow phase {err:?}"); + } + + pass_span.end(&mut render_pass); + drop(render_pass); + command_encoder.finish() + }); + } + } + + Ok(()) + } +} + +/// Creates the [`ClusterableObjectType`] data for a point or spot light. +fn point_or_spot_light_to_clusterable(point_light: &ExtractedPointLight) -> ClusterableObjectType { + match point_light.spot_light_angles { + Some((_, outer_angle)) => ClusterableObjectType::SpotLight { + outer_angle, + shadows_enabled: point_light.shadows_enabled, + volumetric: point_light.volumetric, + }, + None => ClusterableObjectType::PointLight { + shadows_enabled: point_light.shadows_enabled, + volumetric: point_light.volumetric, + }, + } +} diff --git a/crates/libmarathon/src/render/pbr/render/mesh.rs b/crates/libmarathon/src/render/pbr/render/mesh.rs new file mode 100644 index 0000000..6bb451d --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/mesh.rs @@ -0,0 +1,3312 @@ +use crate::render::pbr::material_bind_groups::{MaterialBindGroupIndex, MaterialBindGroupSlot}; +use bevy_asset::{embedded_asset, load_embedded_asset, AssetId}; +use bevy_camera::{ + primitives::Aabb, + visibility::{NoFrustumCulling, RenderLayers, ViewVisibility, VisibilityRange}, + Camera, Camera3d, Projection, +}; +use crate::render::{ + core_3d::{AlphaMask3d, Opaque3d, Transmissive3d, Transparent3d, CORE_3D_DEPTH_FORMAT}, + deferred::{AlphaMask3dDeferred, Opaque3dDeferred}, + oit::{prepare_oit_buffers, OrderIndependentTransparencySettingsOffset}, + prepass::MotionVectorPrepass, +}; +use bevy_derive::{Deref, DerefMut}; +use bevy_diagnostic::FrameCount; +use bevy_ecs::{ + prelude::*, + query::{QueryData, ROQueryItem}, + system::{lifetimeless::*, SystemParamItem, SystemState}, +}; +use bevy_image::{BevyDefault, ImageSampler, TextureFormatPixelInfo}; +use bevy_light::{ + EnvironmentMapLight, IrradianceVolume, NotShadowCaster, NotShadowReceiver, + ShadowFilteringMethod, TransmittedShadowReceiver, +}; +use bevy_math::{Affine3, Rect, UVec2, Vec3, Vec4}; +use bevy_mesh::{ + skinning::SkinnedMesh, BaseMeshPipelineKey, Mesh, Mesh3d, MeshTag, MeshVertexBufferLayoutRef, + VertexAttributeDescriptor, +}; +use bevy_platform::collections::{hash_map::Entry, HashMap}; +use crate::render::{ + batching::{ + gpu_preprocessing::{ + self, GpuPreprocessingSupport, IndirectBatchSet, IndirectParametersBuffers, + IndirectParametersCpuMetadata, IndirectParametersIndexed, IndirectParametersNonIndexed, + InstanceInputUniformBuffer, UntypedPhaseIndirectParametersBuffers, + }, + no_gpu_preprocessing, GetBatchData, GetFullBatchData, NoAutomaticBatching, + }, + mesh::{allocator::MeshAllocator, RenderMesh, RenderMeshBufferInfo}, + render_asset::RenderAssets, + render_phase::{ + BinnedRenderPhasePlugin, InputUniformIndex, PhaseItem, PhaseItemExtraIndex, RenderCommand, + RenderCommandResult, SortedRenderPhasePlugin, TrackedRenderPass, + }, + render_resource::*, + renderer::{RenderAdapter, RenderDevice, RenderQueue}, + sync_world::MainEntityHashSet, + texture::{DefaultImageSampler, GpuImage}, + view::{ + self, NoIndirectDrawing, RenderVisibilityRanges, RetainedViewEntity, ViewTarget, + ViewUniformOffset, + }, + Extract, +}; +use bevy_shader::{load_shader_library, Shader, ShaderDefVal, ShaderSettings}; +use bevy_transform::components::GlobalTransform; +use bevy_utils::{default, Parallel, TypeIdMap}; +use core::any::TypeId; +use core::mem::size_of; +use material_bind_groups::MaterialBindingId; +use tracing::{error, warn}; + +use self::irradiance_volume::IRRADIANCE_VOLUMES_ARE_USABLE; +use crate::render::pbr::{ + render::{ + morph::{ + extract_morphs, no_automatic_morph_batching, prepare_morphs, MorphIndices, + MorphUniforms, + }, + skin::no_automatic_skin_batching, + }, + *, +}; +use crate::render::oit::OrderIndependentTransparencySettings; +use crate::render::prepass::{DeferredPrepass, DepthPrepass, NormalPrepass}; +use crate::render::tonemapping::{DebandDither, Tonemapping}; +use bevy_ecs::component::Tick; +use bevy_ecs::system::SystemChangeTick; +use crate::render::camera::TemporalJitter; +use crate::render::prelude::Msaa; +use crate::render::sync_world::{MainEntity, MainEntityHashMap}; +use crate::render::view::ExtractedView; +use crate::render::RenderSystems::PrepareAssets; + +use bytemuck::{Pod, Zeroable}; +use nonmax::{NonMaxU16, NonMaxU32}; +use smallvec::{smallvec, SmallVec}; +use static_assertions::const_assert_eq; + +/// Provides support for rendering 3D meshes. +pub struct MeshRenderPlugin { + /// Whether we're building [`MeshUniform`]s on GPU. + /// + /// This requires compute shader support and so will be forcibly disabled if + /// the platform doesn't support those. + pub use_gpu_instance_buffer_builder: bool, + /// Debugging flags that can optionally be set when constructing the renderer. + pub debug_flags: RenderDebugFlags, +} + +impl MeshRenderPlugin { + /// Creates a new [`MeshRenderPlugin`] with the given debug flags. + pub fn new(debug_flags: RenderDebugFlags) -> MeshRenderPlugin { + MeshRenderPlugin { + use_gpu_instance_buffer_builder: false, + debug_flags, + } + } +} + +/// How many textures are allowed in the view bind group layout (`@group(0)`) before +/// broader compatibility with WebGL and WebGPU is at risk, due to the minimum guaranteed +/// values for `MAX_TEXTURE_IMAGE_UNITS` (in WebGL) and `maxSampledTexturesPerShaderStage` (in WebGPU), +/// currently both at 16. +/// +/// We use 10 here because it still leaves us, in a worst case scenario, with 6 textures for the other bind groups. +/// +/// See: +#[cfg(debug_assertions)] +pub const MESH_PIPELINE_VIEW_LAYOUT_SAFE_MAX_TEXTURES: usize = 10; + +impl Plugin for MeshRenderPlugin { + fn build(&self, app: &mut App) { + load_shader_library!(app, "forward_io.wgsl"); + load_shader_library!(app, "mesh_view_types.wgsl", |settings| *settings = + ShaderSettings { + shader_defs: vec![ + ShaderDefVal::UInt( + "MAX_DIRECTIONAL_LIGHTS".into(), + MAX_DIRECTIONAL_LIGHTS as u32 + ), + ShaderDefVal::UInt( + "MAX_CASCADES_PER_LIGHT".into(), + MAX_CASCADES_PER_LIGHT as u32, + ) + ] + }); + load_shader_library!(app, "mesh_view_bindings.wgsl"); + load_shader_library!(app, "mesh_types.wgsl"); + load_shader_library!(app, "mesh_functions.wgsl"); + load_shader_library!(app, "skinning.wgsl"); + load_shader_library!(app, "morph.wgsl"); + load_shader_library!(app, "occlusion_culling.wgsl"); + + embedded_asset!(app, "mesh.wgsl"); + + if app.get_sub_app(RenderApp).is_none() { + return; + } + + app.add_systems( + PostUpdate, + (no_automatic_skin_batching, no_automatic_morph_batching), + ) + .add_plugins(( + BinnedRenderPhasePlugin::::new(self.debug_flags), + BinnedRenderPhasePlugin::::new(self.debug_flags), + BinnedRenderPhasePlugin::::new(self.debug_flags), + BinnedRenderPhasePlugin::::new(self.debug_flags), + BinnedRenderPhasePlugin::::new(self.debug_flags), + SortedRenderPhasePlugin::::new(self.debug_flags), + SortedRenderPhasePlugin::::new(self.debug_flags), + )); + + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + render_app + .init_resource::() + .init_resource::() + .init_resource::() + .init_resource::() + .configure_sets( + ExtractSchedule, + MeshExtractionSystems + .after(view::extract_visibility_ranges) + .after(late_sweep_material_instances), + ) + .add_systems( + ExtractSchedule, + ( + extract_skins, + extract_morphs, + gpu_preprocessing::clear_batched_gpu_instance_buffers:: + .before(MeshExtractionSystems), + ), + ) + .add_systems( + Render, + ( + set_mesh_motion_vector_flags.in_set(RenderSystems::PrepareMeshes), + prepare_skins.in_set(RenderSystems::PrepareResources), + prepare_morphs.in_set(RenderSystems::PrepareResources), + prepare_mesh_bind_groups.in_set(RenderSystems::PrepareBindGroups), + prepare_mesh_view_bind_groups + .in_set(RenderSystems::PrepareBindGroups) + .after(prepare_oit_buffers), + no_gpu_preprocessing::clear_batched_cpu_instance_buffers:: + .in_set(RenderSystems::Cleanup) + .after(RenderSystems::Render), + ), + ); + } + } + + fn finish(&self, app: &mut App) { + let mut mesh_bindings_shader_defs = Vec::with_capacity(1); + + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + render_app + .init_resource::() + .init_resource::() + .init_resource::() + .init_resource::() + .add_systems( + Render, + check_views_need_specialization.in_set(PrepareAssets), + ); + + let gpu_preprocessing_support = + render_app.world().resource::(); + let use_gpu_instance_buffer_builder = + self.use_gpu_instance_buffer_builder && gpu_preprocessing_support.is_available(); + + let render_mesh_instances = RenderMeshInstances::new(use_gpu_instance_buffer_builder); + render_app.insert_resource(render_mesh_instances); + + if use_gpu_instance_buffer_builder { + render_app + .init_resource::>() + .init_resource::() + .init_resource::() + .add_systems( + ExtractSchedule, + extract_meshes_for_gpu_building.in_set(MeshExtractionSystems), + ) + .add_systems( + Render, + ( + gpu_preprocessing::write_batched_instance_buffers:: + .in_set(RenderSystems::PrepareResourcesFlush), + gpu_preprocessing::delete_old_work_item_buffers:: + .in_set(RenderSystems::PrepareResources), + collect_meshes_for_gpu_building + .in_set(RenderSystems::PrepareMeshes) + // This must be before + // `set_mesh_motion_vector_flags` so it doesn't + // overwrite those flags. + .before(set_mesh_motion_vector_flags), + ), + ); + } else { + let render_device = render_app.world().resource::(); + let cpu_batched_instance_buffer = + no_gpu_preprocessing::BatchedInstanceBuffer::::new(render_device); + render_app + .insert_resource(cpu_batched_instance_buffer) + .add_systems( + ExtractSchedule, + extract_meshes_for_cpu_building.in_set(MeshExtractionSystems), + ) + .add_systems( + Render, + no_gpu_preprocessing::write_batched_instance_buffer:: + .in_set(RenderSystems::PrepareResourcesFlush), + ); + }; + + let render_device = render_app.world().resource::(); + if let Some(per_object_buffer_batch_size) = + GpuArrayBuffer::::batch_size(render_device) + { + mesh_bindings_shader_defs.push(ShaderDefVal::UInt( + "PER_OBJECT_BUFFER_BATCH_SIZE".into(), + per_object_buffer_batch_size, + )); + } + + render_app + .init_resource::() + .init_resource::(); + } + + // Load the mesh_bindings shader module here as it depends on runtime information about + // whether storage buffers are supported, or the maximum uniform buffer binding size. + load_shader_library!(app, "mesh_bindings.wgsl", move |settings| *settings = + ShaderSettings { + shader_defs: mesh_bindings_shader_defs.clone(), + }); + } +} + +#[derive(Resource, Deref, DerefMut, Default, Debug, Clone)] +pub struct ViewKeyCache(HashMap); + +#[derive(Resource, Deref, DerefMut, Default, Debug, Clone)] +pub struct ViewSpecializationTicks(HashMap); + +pub fn check_views_need_specialization( + mut view_key_cache: ResMut, + mut view_specialization_ticks: ResMut, + mut views: Query<( + &ExtractedView, + &Msaa, + Option<&Tonemapping>, + Option<&DebandDither>, + Option<&ShadowFilteringMethod>, + Has, + ( + Has, + Has, + Has, + Has, + ), + Option<&Camera3d>, + Has, + Option<&Projection>, + Has, + ( + Has>, + Has>, + ), + Has, + )>, + ticks: SystemChangeTick, +) { + for ( + view, + msaa, + tonemapping, + dither, + shadow_filter_method, + ssao, + (normal_prepass, depth_prepass, motion_vector_prepass, deferred_prepass), + camera_3d, + temporal_jitter, + projection, + distance_fog, + (has_environment_maps, has_irradiance_volumes), + has_oit, + ) in views.iter_mut() + { + let mut view_key = MeshPipelineKey::from_msaa_samples(msaa.samples()) + | MeshPipelineKey::from_hdr(view.hdr); + + if normal_prepass { + view_key |= MeshPipelineKey::NORMAL_PREPASS; + } + + if depth_prepass { + view_key |= MeshPipelineKey::DEPTH_PREPASS; + } + + if motion_vector_prepass { + view_key |= MeshPipelineKey::MOTION_VECTOR_PREPASS; + } + + if deferred_prepass { + view_key |= MeshPipelineKey::DEFERRED_PREPASS; + } + + if temporal_jitter { + view_key |= MeshPipelineKey::TEMPORAL_JITTER; + } + + if has_environment_maps { + view_key |= MeshPipelineKey::ENVIRONMENT_MAP; + } + + if has_irradiance_volumes { + view_key |= MeshPipelineKey::IRRADIANCE_VOLUME; + } + + if has_oit { + view_key |= MeshPipelineKey::OIT_ENABLED; + } + + if let Some(projection) = projection { + view_key |= match projection { + Projection::Perspective(_) => MeshPipelineKey::VIEW_PROJECTION_PERSPECTIVE, + Projection::Orthographic(_) => MeshPipelineKey::VIEW_PROJECTION_ORTHOGRAPHIC, + Projection::Custom(_) => MeshPipelineKey::VIEW_PROJECTION_NONSTANDARD, + }; + } + + match shadow_filter_method.unwrap_or(&ShadowFilteringMethod::default()) { + ShadowFilteringMethod::Hardware2x2 => { + view_key |= MeshPipelineKey::SHADOW_FILTER_METHOD_HARDWARE_2X2; + } + ShadowFilteringMethod::Gaussian => { + view_key |= MeshPipelineKey::SHADOW_FILTER_METHOD_GAUSSIAN; + } + ShadowFilteringMethod::Temporal => { + view_key |= MeshPipelineKey::SHADOW_FILTER_METHOD_TEMPORAL; + } + } + + if !view.hdr { + if let Some(tonemapping) = tonemapping { + view_key |= MeshPipelineKey::TONEMAP_IN_SHADER; + view_key |= tonemapping_pipeline_key(*tonemapping); + } + if let Some(DebandDither::Enabled) = dither { + view_key |= MeshPipelineKey::DEBAND_DITHER; + } + } + if ssao { + view_key |= MeshPipelineKey::SCREEN_SPACE_AMBIENT_OCCLUSION; + } + if distance_fog { + view_key |= MeshPipelineKey::DISTANCE_FOG; + } + if let Some(camera_3d) = camera_3d { + view_key |= screen_space_specular_transmission_pipeline_key( + camera_3d.screen_space_specular_transmission_quality, + ); + } + if !view_key_cache + .get_mut(&view.retained_view_entity) + .is_some_and(|current_key| *current_key == view_key) + { + view_key_cache.insert(view.retained_view_entity, view_key); + view_specialization_ticks.insert(view.retained_view_entity, ticks.this_run()); + } + } +} + +#[derive(Component)] +pub struct MeshTransforms { + pub world_from_local: Affine3, + pub previous_world_from_local: Affine3, + pub flags: u32, +} + +#[derive(ShaderType, Clone)] +pub struct MeshUniform { + // Affine 4x3 matrices transposed to 3x4 + pub world_from_local: [Vec4; 3], + pub previous_world_from_local: [Vec4; 3], + // 3x3 matrix packed in mat2x4 and f32 as: + // [0].xyz, [1].x, + // [1].yz, [2].xy + // [2].z + pub local_from_world_transpose_a: [Vec4; 2], + pub local_from_world_transpose_b: f32, + pub flags: u32, + // Four 16-bit unsigned normalized UV values packed into a `UVec2`: + // + // <--- MSB LSB ---> + // +---- min v ----+ +---- min u ----+ + // lightmap_uv_rect.x: vvvvvvvv vvvvvvvv uuuuuuuu uuuuuuuu, + // +---- max v ----+ +---- max u ----+ + // lightmap_uv_rect.y: VVVVVVVV VVVVVVVV UUUUUUUU UUUUUUUU, + // + // (MSB: most significant bit; LSB: least significant bit.) + pub lightmap_uv_rect: UVec2, + /// The index of this mesh's first vertex in the vertex buffer. + /// + /// Multiple meshes can be packed into a single vertex buffer (see + /// [`MeshAllocator`]). This value stores the offset of the first vertex in + /// this mesh in that buffer. + pub first_vertex_index: u32, + /// The current skin index, or `u32::MAX` if there's no skin. + pub current_skin_index: u32, + /// The material and lightmap indices, packed into 32 bits. + /// + /// Low 16 bits: index of the material inside the bind group data. + /// High 16 bits: index of the lightmap in the binding array. + pub material_and_lightmap_bind_group_slot: u32, + /// User supplied tag to identify this mesh instance. + pub tag: u32, + /// Padding. + pub pad: u32, +} + +/// Information that has to be transferred from CPU to GPU in order to produce +/// the full [`MeshUniform`]. +/// +/// This is essentially a subset of the fields in [`MeshUniform`] above. +#[derive(ShaderType, Pod, Zeroable, Clone, Copy, Default, Debug)] +#[repr(C)] +pub struct MeshInputUniform { + /// Affine 4x3 matrix transposed to 3x4. + pub world_from_local: [Vec4; 3], + /// Four 16-bit unsigned normalized UV values packed into a `UVec2`: + /// + /// ```text + /// <--- MSB LSB ---> + /// +---- min v ----+ +---- min u ----+ + /// lightmap_uv_rect.x: vvvvvvvv vvvvvvvv uuuuuuuu uuuuuuuu, + /// +---- max v ----+ +---- max u ----+ + /// lightmap_uv_rect.y: VVVVVVVV VVVVVVVV UUUUUUUU UUUUUUUU, + /// + /// (MSB: most significant bit; LSB: least significant bit.) + /// ``` + pub lightmap_uv_rect: UVec2, + /// Various [`MeshFlags`]. + pub flags: u32, + /// The index of this mesh's [`MeshInputUniform`] in the previous frame's + /// buffer, if applicable. + /// + /// This is used for TAA. If not present, this will be `u32::MAX`. + pub previous_input_index: u32, + /// The index of this mesh's first vertex in the vertex buffer. + /// + /// Multiple meshes can be packed into a single vertex buffer (see + /// [`MeshAllocator`]). This value stores the offset of the first vertex in + /// this mesh in that buffer. + pub first_vertex_index: u32, + /// The index of this mesh's first index in the index buffer, if any. + /// + /// Multiple meshes can be packed into a single index buffer (see + /// [`MeshAllocator`]). This value stores the offset of the first index in + /// this mesh in that buffer. + /// + /// If this mesh isn't indexed, this value is ignored. + pub first_index_index: u32, + /// For an indexed mesh, the number of indices that make it up; for a + /// non-indexed mesh, the number of vertices in it. + pub index_count: u32, + /// The current skin index, or `u32::MAX` if there's no skin. + pub current_skin_index: u32, + /// The material and lightmap indices, packed into 32 bits. + /// + /// Low 16 bits: index of the material inside the bind group data. + /// High 16 bits: index of the lightmap in the binding array. + pub material_and_lightmap_bind_group_slot: u32, + /// The number of the frame on which this [`MeshInputUniform`] was built. + /// + /// This is used to validate the previous transform and skin. If this + /// [`MeshInputUniform`] wasn't updated on this frame, then we know that + /// neither this mesh's transform nor that of its joints have been updated + /// on this frame, and therefore the transforms of both this mesh and its + /// joints must be identical to those for the previous frame. + pub timestamp: u32, + /// User supplied tag to identify this mesh instance. + pub tag: u32, + /// Padding. + pub pad: u32, +} + +/// Information about each mesh instance needed to cull it on GPU. +/// +/// This consists of its axis-aligned bounding box (AABB). +#[derive(ShaderType, Pod, Zeroable, Clone, Copy, Default)] +#[repr(C)] +pub struct MeshCullingData { + /// The 3D center of the AABB in model space, padded with an extra unused + /// float value. + pub aabb_center: Vec4, + /// The 3D extents of the AABB in model space, divided by two, padded with + /// an extra unused float value. + pub aabb_half_extents: Vec4, +} + +/// A GPU buffer that holds the information needed to cull meshes on GPU. +/// +/// At the moment, this simply holds each mesh's AABB. +/// +/// To avoid wasting CPU time in the CPU culling case, this buffer will be empty +/// if GPU culling isn't in use. +#[derive(Resource, Deref, DerefMut)] +pub struct MeshCullingDataBuffer(RawBufferVec); + +impl MeshUniform { + pub fn new( + mesh_transforms: &MeshTransforms, + first_vertex_index: u32, + material_bind_group_slot: MaterialBindGroupSlot, + maybe_lightmap: Option<(LightmapSlotIndex, Rect)>, + current_skin_index: Option, + tag: Option, + ) -> Self { + let (local_from_world_transpose_a, local_from_world_transpose_b) = + mesh_transforms.world_from_local.inverse_transpose_3x3(); + let lightmap_bind_group_slot = match maybe_lightmap { + None => u16::MAX, + Some((slot_index, _)) => slot_index.into(), + }; + + Self { + world_from_local: mesh_transforms.world_from_local.to_transpose(), + previous_world_from_local: mesh_transforms.previous_world_from_local.to_transpose(), + lightmap_uv_rect: pack_lightmap_uv_rect(maybe_lightmap.map(|(_, uv_rect)| uv_rect)), + local_from_world_transpose_a, + local_from_world_transpose_b, + flags: mesh_transforms.flags, + first_vertex_index, + current_skin_index: current_skin_index.unwrap_or(u32::MAX), + material_and_lightmap_bind_group_slot: u32::from(material_bind_group_slot) + | ((lightmap_bind_group_slot as u32) << 16), + tag: tag.unwrap_or(0), + pad: 0, + } + } +} + +// NOTE: These must match the bit flags in bevy_pbr/src/render/mesh_types.wgsl! +bitflags::bitflags! { + /// Various flags and tightly-packed values on a mesh. + /// + /// Flags grow from the top bit down; other values grow from the bottom bit + /// up. + #[repr(transparent)] + pub struct MeshFlags: u32 { + /// Bitmask for the 16-bit index into the LOD array. + /// + /// This will be `u16::MAX` if this mesh has no LOD. + const LOD_INDEX_MASK = (1 << 16) - 1; + /// Disables frustum culling for this mesh. + /// + /// This corresponds to the + /// [`bevy_render::view::visibility::NoFrustumCulling`] component. + const NO_FRUSTUM_CULLING = 1 << 28; + const SHADOW_RECEIVER = 1 << 29; + const TRANSMITTED_SHADOW_RECEIVER = 1 << 30; + // Indicates the sign of the determinant of the 3x3 model matrix. If the sign is positive, + // then the flag should be set, else it should not be set. + const SIGN_DETERMINANT_MODEL_3X3 = 1 << 31; + const NONE = 0; + const UNINITIALIZED = 0xFFFFFFFF; + } +} + +impl MeshFlags { + fn from_components( + transform: &GlobalTransform, + lod_index: Option, + no_frustum_culling: bool, + not_shadow_receiver: bool, + transmitted_receiver: bool, + ) -> MeshFlags { + let mut mesh_flags = if not_shadow_receiver { + MeshFlags::empty() + } else { + MeshFlags::SHADOW_RECEIVER + }; + if no_frustum_culling { + mesh_flags |= MeshFlags::NO_FRUSTUM_CULLING; + } + if transmitted_receiver { + mesh_flags |= MeshFlags::TRANSMITTED_SHADOW_RECEIVER; + } + if transform.affine().matrix3.determinant().is_sign_positive() { + mesh_flags |= MeshFlags::SIGN_DETERMINANT_MODEL_3X3; + } + + let lod_index_bits = match lod_index { + None => u16::MAX, + Some(lod_index) => u16::from(lod_index), + }; + mesh_flags |= + MeshFlags::from_bits_retain((lod_index_bits as u32) << MeshFlags::LOD_INDEX_SHIFT); + + mesh_flags + } + + /// The first bit of the LOD index. + pub const LOD_INDEX_SHIFT: u32 = 0; +} + +bitflags::bitflags! { + /// Various useful flags for [`RenderMeshInstance`]s. + #[derive(Clone, Copy)] + pub struct RenderMeshInstanceFlags: u8 { + /// The mesh casts shadows. + const SHADOW_CASTER = 1 << 0; + /// The mesh can participate in automatic batching. + const AUTOMATIC_BATCHING = 1 << 1; + /// The mesh had a transform last frame and so is eligible for motion + /// vector computation. + const HAS_PREVIOUS_TRANSFORM = 1 << 2; + /// The mesh had a skin last frame and so that skin should be taken into + /// account for motion vector computation. + const HAS_PREVIOUS_SKIN = 1 << 3; + /// The mesh had morph targets last frame and so they should be taken + /// into account for motion vector computation. + const HAS_PREVIOUS_MORPH = 1 << 4; + } +} + +/// CPU data that the render world keeps for each entity, when *not* using GPU +/// mesh uniform building. +#[derive(Deref, DerefMut)] +pub struct RenderMeshInstanceCpu { + /// Data shared between both the CPU mesh uniform building and the GPU mesh + /// uniform building paths. + #[deref] + pub shared: RenderMeshInstanceShared, + /// The transform of the mesh. + /// + /// This will be written into the [`MeshUniform`] at the appropriate time. + pub transforms: MeshTransforms, +} + +/// CPU data that the render world needs to keep for each entity that contains a +/// mesh when using GPU mesh uniform building. +#[derive(Deref, DerefMut)] +pub struct RenderMeshInstanceGpu { + /// Data shared between both the CPU mesh uniform building and the GPU mesh + /// uniform building paths. + #[deref] + pub shared: RenderMeshInstanceShared, + /// The translation of the mesh. + /// + /// This is the only part of the transform that we have to keep on CPU (for + /// distance sorting). + pub translation: Vec3, + /// The index of the [`MeshInputUniform`] in the buffer. + pub current_uniform_index: NonMaxU32, +} + +/// CPU data that the render world needs to keep about each entity that contains +/// a mesh. +pub struct RenderMeshInstanceShared { + /// The [`AssetId`] of the mesh. + pub mesh_asset_id: AssetId, + /// A slot for the material bind group index. + pub material_bindings_index: MaterialBindingId, + /// Various flags. + pub flags: RenderMeshInstanceFlags, + /// Index of the slab that the lightmap resides in, if a lightmap is + /// present. + pub lightmap_slab_index: Option, + /// User supplied tag to identify this mesh instance. + pub tag: u32, + /// Render layers that this mesh instance belongs to. + pub render_layers: Option, +} + +/// Information that is gathered during the parallel portion of mesh extraction +/// when GPU mesh uniform building is enabled. +/// +/// From this, the [`MeshInputUniform`] and [`RenderMeshInstanceGpu`] are +/// prepared. +pub struct RenderMeshInstanceGpuBuilder { + /// Data that will be placed on the [`RenderMeshInstanceGpu`]. + pub shared: RenderMeshInstanceShared, + /// The current transform. + pub world_from_local: Affine3, + /// Four 16-bit unsigned normalized UV values packed into a [`UVec2`]: + /// + /// ```text + /// <--- MSB LSB ---> + /// +---- min v ----+ +---- min u ----+ + /// lightmap_uv_rect.x: vvvvvvvv vvvvvvvv uuuuuuuu uuuuuuuu, + /// +---- max v ----+ +---- max u ----+ + /// lightmap_uv_rect.y: VVVVVVVV VVVVVVVV UUUUUUUU UUUUUUUU, + /// + /// (MSB: most significant bit; LSB: least significant bit.) + /// ``` + pub lightmap_uv_rect: UVec2, + /// The index of the previous mesh input. + pub previous_input_index: Option, + /// Various flags. + pub mesh_flags: MeshFlags, +} + +/// The per-thread queues used during [`extract_meshes_for_gpu_building`]. +/// +/// There are two varieties of these: one for when culling happens on CPU and +/// one for when culling happens on GPU. Having the two varieties avoids wasting +/// space if GPU culling is disabled. +#[derive(Default)] +pub enum RenderMeshInstanceGpuQueue { + /// The default value. + /// + /// This becomes [`RenderMeshInstanceGpuQueue::CpuCulling`] or + /// [`RenderMeshInstanceGpuQueue::GpuCulling`] once extraction starts. + #[default] + None, + /// The version of [`RenderMeshInstanceGpuQueue`] that omits the + /// [`MeshCullingData`], so that we don't waste space when GPU + /// culling is disabled. + CpuCulling { + /// Stores GPU data for each entity that became visible or changed in + /// such a way that necessitates updating the [`MeshInputUniform`] (e.g. + /// changed transform). + changed: Vec<(MainEntity, RenderMeshInstanceGpuBuilder)>, + /// Stores the IDs of entities that became invisible this frame. + removed: Vec, + }, + /// The version of [`RenderMeshInstanceGpuQueue`] that contains the + /// [`MeshCullingData`], used when any view has GPU culling + /// enabled. + GpuCulling { + /// Stores GPU data for each entity that became visible or changed in + /// such a way that necessitates updating the [`MeshInputUniform`] (e.g. + /// changed transform). + changed: Vec<(MainEntity, RenderMeshInstanceGpuBuilder, MeshCullingData)>, + /// Stores the IDs of entities that became invisible this frame. + removed: Vec, + }, +} + +/// The per-thread queues containing mesh instances, populated during the +/// extract phase. +/// +/// These are filled in [`extract_meshes_for_gpu_building`] and consumed in +/// [`collect_meshes_for_gpu_building`]. +#[derive(Resource, Default, Deref, DerefMut)] +pub struct RenderMeshInstanceGpuQueues(Parallel); + +/// Holds a list of meshes that couldn't be extracted this frame because their +/// materials weren't prepared yet. +/// +/// On subsequent frames, we try to reextract those meshes. +#[derive(Resource, Default, Deref, DerefMut)] +pub struct MeshesToReextractNextFrame(MainEntityHashSet); + +impl RenderMeshInstanceShared { + /// A gpu builder will provide the mesh instance id + /// during [`RenderMeshInstanceGpuBuilder::update`]. + fn for_gpu_building( + previous_transform: Option<&PreviousGlobalTransform>, + mesh: &Mesh3d, + tag: Option<&MeshTag>, + not_shadow_caster: bool, + no_automatic_batching: bool, + render_layers: Option<&RenderLayers>, + ) -> Self { + Self::for_cpu_building( + previous_transform, + mesh, + tag, + default(), + not_shadow_caster, + no_automatic_batching, + render_layers, + ) + } + + /// The cpu builder does not have an equivalent [`RenderMeshInstanceGpuBuilder::update`]. + fn for_cpu_building( + previous_transform: Option<&PreviousGlobalTransform>, + mesh: &Mesh3d, + tag: Option<&MeshTag>, + material_bindings_index: MaterialBindingId, + not_shadow_caster: bool, + no_automatic_batching: bool, + render_layers: Option<&RenderLayers>, + ) -> Self { + let mut mesh_instance_flags = RenderMeshInstanceFlags::empty(); + mesh_instance_flags.set(RenderMeshInstanceFlags::SHADOW_CASTER, !not_shadow_caster); + mesh_instance_flags.set( + RenderMeshInstanceFlags::AUTOMATIC_BATCHING, + !no_automatic_batching, + ); + mesh_instance_flags.set( + RenderMeshInstanceFlags::HAS_PREVIOUS_TRANSFORM, + previous_transform.is_some(), + ); + + RenderMeshInstanceShared { + mesh_asset_id: mesh.id(), + flags: mesh_instance_flags, + material_bindings_index, + lightmap_slab_index: None, + tag: tag.map_or(0, |i| **i), + render_layers: render_layers.cloned(), + } + } + + /// Returns true if this entity is eligible to participate in automatic + /// batching. + #[inline] + pub fn should_batch(&self) -> bool { + self.flags + .contains(RenderMeshInstanceFlags::AUTOMATIC_BATCHING) + } +} + +/// Information that the render world keeps about each entity that contains a +/// mesh. +/// +/// The set of information needed is different depending on whether CPU or GPU +/// [`MeshUniform`] building is in use. +#[derive(Resource)] +pub enum RenderMeshInstances { + /// Information needed when using CPU mesh instance data building. + CpuBuilding(RenderMeshInstancesCpu), + /// Information needed when using GPU mesh instance data building. + GpuBuilding(RenderMeshInstancesGpu), +} + +/// Information that the render world keeps about each entity that contains a +/// mesh, when using CPU mesh instance data building. +#[derive(Default, Deref, DerefMut)] +pub struct RenderMeshInstancesCpu(MainEntityHashMap); + +/// Information that the render world keeps about each entity that contains a +/// mesh, when using GPU mesh instance data building. +#[derive(Default, Deref, DerefMut)] +pub struct RenderMeshInstancesGpu(MainEntityHashMap); + +impl RenderMeshInstances { + /// Creates a new [`RenderMeshInstances`] instance. + fn new(use_gpu_instance_buffer_builder: bool) -> RenderMeshInstances { + if use_gpu_instance_buffer_builder { + RenderMeshInstances::GpuBuilding(RenderMeshInstancesGpu::default()) + } else { + RenderMeshInstances::CpuBuilding(RenderMeshInstancesCpu::default()) + } + } + + /// Returns the ID of the mesh asset attached to the given entity, if any. + pub fn mesh_asset_id(&self, entity: MainEntity) -> Option> { + match *self { + RenderMeshInstances::CpuBuilding(ref instances) => instances.mesh_asset_id(entity), + RenderMeshInstances::GpuBuilding(ref instances) => instances.mesh_asset_id(entity), + } + } + + /// Constructs [`RenderMeshQueueData`] for the given entity, if it has a + /// mesh attached. + pub fn render_mesh_queue_data(&self, entity: MainEntity) -> Option> { + match *self { + RenderMeshInstances::CpuBuilding(ref instances) => { + instances.render_mesh_queue_data(entity) + } + RenderMeshInstances::GpuBuilding(ref instances) => { + instances.render_mesh_queue_data(entity) + } + } + } + + /// Inserts the given flags into the CPU or GPU render mesh instance data + /// for the given mesh as appropriate. + fn insert_mesh_instance_flags(&mut self, entity: MainEntity, flags: RenderMeshInstanceFlags) { + match *self { + RenderMeshInstances::CpuBuilding(ref mut instances) => { + instances.insert_mesh_instance_flags(entity, flags); + } + RenderMeshInstances::GpuBuilding(ref mut instances) => { + instances.insert_mesh_instance_flags(entity, flags); + } + } + } +} + +impl RenderMeshInstancesCpu { + fn mesh_asset_id(&self, entity: MainEntity) -> Option> { + self.get(&entity) + .map(|render_mesh_instance| render_mesh_instance.mesh_asset_id) + } + + fn render_mesh_queue_data(&self, entity: MainEntity) -> Option> { + self.get(&entity) + .map(|render_mesh_instance| RenderMeshQueueData { + shared: &render_mesh_instance.shared, + translation: render_mesh_instance.transforms.world_from_local.translation, + current_uniform_index: InputUniformIndex::default(), + }) + } + + /// Inserts the given flags into the render mesh instance data for the given + /// mesh. + fn insert_mesh_instance_flags(&mut self, entity: MainEntity, flags: RenderMeshInstanceFlags) { + if let Some(instance) = self.get_mut(&entity) { + instance.flags.insert(flags); + } + } +} + +impl RenderMeshInstancesGpu { + fn mesh_asset_id(&self, entity: MainEntity) -> Option> { + self.get(&entity) + .map(|render_mesh_instance| render_mesh_instance.mesh_asset_id) + } + + fn render_mesh_queue_data(&self, entity: MainEntity) -> Option> { + self.get(&entity) + .map(|render_mesh_instance| RenderMeshQueueData { + shared: &render_mesh_instance.shared, + translation: render_mesh_instance.translation, + current_uniform_index: InputUniformIndex( + render_mesh_instance.current_uniform_index.into(), + ), + }) + } + + /// Inserts the given flags into the render mesh instance data for the given + /// mesh. + fn insert_mesh_instance_flags(&mut self, entity: MainEntity, flags: RenderMeshInstanceFlags) { + if let Some(instance) = self.get_mut(&entity) { + instance.flags.insert(flags); + } + } +} + +impl RenderMeshInstanceGpuQueue { + /// Clears out a [`RenderMeshInstanceGpuQueue`], creating or recreating it + /// as necessary. + /// + /// `any_gpu_culling` should be set to true if any view has GPU culling + /// enabled. + fn init(&mut self, any_gpu_culling: bool) { + match (any_gpu_culling, &mut *self) { + (true, RenderMeshInstanceGpuQueue::GpuCulling { changed, removed }) => { + changed.clear(); + removed.clear(); + } + (true, _) => { + *self = RenderMeshInstanceGpuQueue::GpuCulling { + changed: vec![], + removed: vec![], + } + } + (false, RenderMeshInstanceGpuQueue::CpuCulling { changed, removed }) => { + changed.clear(); + removed.clear(); + } + (false, _) => { + *self = RenderMeshInstanceGpuQueue::CpuCulling { + changed: vec![], + removed: vec![], + } + } + } + } + + /// Adds a new mesh to this queue. + fn push( + &mut self, + entity: MainEntity, + instance_builder: RenderMeshInstanceGpuBuilder, + culling_data_builder: Option, + ) { + match (&mut *self, culling_data_builder) { + ( + &mut RenderMeshInstanceGpuQueue::CpuCulling { + changed: ref mut queue, + .. + }, + None, + ) => { + queue.push((entity, instance_builder)); + } + ( + &mut RenderMeshInstanceGpuQueue::GpuCulling { + changed: ref mut queue, + .. + }, + Some(culling_data_builder), + ) => { + queue.push((entity, instance_builder, culling_data_builder)); + } + (_, None) => { + *self = RenderMeshInstanceGpuQueue::CpuCulling { + changed: vec![(entity, instance_builder)], + removed: vec![], + }; + } + (_, Some(culling_data_builder)) => { + *self = RenderMeshInstanceGpuQueue::GpuCulling { + changed: vec![(entity, instance_builder, culling_data_builder)], + removed: vec![], + }; + } + } + } + + /// Adds the given entity to the `removed` list, queuing it for removal. + /// + /// The `gpu_culling` parameter specifies whether GPU culling is enabled. + fn remove(&mut self, entity: MainEntity, gpu_culling: bool) { + match (&mut *self, gpu_culling) { + (RenderMeshInstanceGpuQueue::None, false) => { + *self = RenderMeshInstanceGpuQueue::CpuCulling { + changed: vec![], + removed: vec![entity], + } + } + (RenderMeshInstanceGpuQueue::None, true) => { + *self = RenderMeshInstanceGpuQueue::GpuCulling { + changed: vec![], + removed: vec![entity], + } + } + (RenderMeshInstanceGpuQueue::CpuCulling { removed, .. }, _) + | (RenderMeshInstanceGpuQueue::GpuCulling { removed, .. }, _) => { + removed.push(entity); + } + } + } +} + +impl RenderMeshInstanceGpuBuilder { + /// Flushes this mesh instance to the [`RenderMeshInstanceGpu`] and + /// [`MeshInputUniform`] tables, replacing the existing entry if applicable. + fn update( + mut self, + entity: MainEntity, + render_mesh_instances: &mut MainEntityHashMap, + current_input_buffer: &mut InstanceInputUniformBuffer, + previous_input_buffer: &mut InstanceInputUniformBuffer, + mesh_allocator: &MeshAllocator, + mesh_material_ids: &RenderMaterialInstances, + render_material_bindings: &RenderMaterialBindings, + render_lightmaps: &RenderLightmaps, + skin_uniforms: &SkinUniforms, + timestamp: FrameCount, + meshes_to_reextract_next_frame: &mut MeshesToReextractNextFrame, + ) -> Option { + let (first_vertex_index, vertex_count) = + match mesh_allocator.mesh_vertex_slice(&self.shared.mesh_asset_id) { + Some(mesh_vertex_slice) => ( + mesh_vertex_slice.range.start, + mesh_vertex_slice.range.end - mesh_vertex_slice.range.start, + ), + None => (0, 0), + }; + let (mesh_is_indexed, first_index_index, index_count) = + match mesh_allocator.mesh_index_slice(&self.shared.mesh_asset_id) { + Some(mesh_index_slice) => ( + true, + mesh_index_slice.range.start, + mesh_index_slice.range.end - mesh_index_slice.range.start, + ), + None => (false, 0, 0), + }; + let current_skin_index = match skin_uniforms.skin_byte_offset(entity) { + Some(skin_index) => skin_index.index(), + None => u32::MAX, + }; + + // Look up the material index. If we couldn't fetch the material index, + // then the material hasn't been prepared yet, perhaps because it hasn't + // yet loaded. In that case, add the mesh to + // `meshes_to_reextract_next_frame` and bail. + let mesh_material = mesh_material_ids.mesh_material(entity); + let mesh_material_binding_id = if mesh_material != DUMMY_MESH_MATERIAL.untyped() { + match render_material_bindings.get(&mesh_material) { + Some(binding_id) => *binding_id, + None => { + meshes_to_reextract_next_frame.insert(entity); + return None; + } + } + } else { + // Use a dummy material binding ID. + MaterialBindingId::default() + }; + self.shared.material_bindings_index = mesh_material_binding_id; + + let lightmap_slot = match render_lightmaps.render_lightmaps.get(&entity) { + Some(render_lightmap) => u16::from(*render_lightmap.slot_index), + None => u16::MAX, + }; + let lightmap_slab_index = render_lightmaps + .render_lightmaps + .get(&entity) + .map(|lightmap| lightmap.slab_index); + self.shared.lightmap_slab_index = lightmap_slab_index; + + // Create the mesh input uniform. + let mut mesh_input_uniform = MeshInputUniform { + world_from_local: self.world_from_local.to_transpose(), + lightmap_uv_rect: self.lightmap_uv_rect, + flags: self.mesh_flags.bits(), + previous_input_index: u32::MAX, + timestamp: timestamp.0, + first_vertex_index, + first_index_index, + index_count: if mesh_is_indexed { + index_count + } else { + vertex_count + }, + current_skin_index, + material_and_lightmap_bind_group_slot: u32::from( + self.shared.material_bindings_index.slot, + ) | ((lightmap_slot as u32) << 16), + tag: self.shared.tag, + pad: 0, + }; + + // Did the last frame contain this entity as well? + let current_uniform_index; + match render_mesh_instances.entry(entity) { + Entry::Occupied(mut occupied_entry) => { + // Yes, it did. Replace its entry with the new one. + + // Reserve a slot. + current_uniform_index = u32::from(occupied_entry.get_mut().current_uniform_index); + + // Save the old mesh input uniform. The mesh preprocessing + // shader will need it to compute motion vectors. + let previous_mesh_input_uniform = + current_input_buffer.get_unchecked(current_uniform_index); + let previous_input_index = previous_input_buffer.add(previous_mesh_input_uniform); + mesh_input_uniform.previous_input_index = previous_input_index; + + // Write in the new mesh input uniform. + current_input_buffer.set(current_uniform_index, mesh_input_uniform); + + occupied_entry.replace_entry_with(|_, _| { + Some(RenderMeshInstanceGpu { + translation: self.world_from_local.translation, + shared: self.shared, + current_uniform_index: NonMaxU32::new(current_uniform_index) + .unwrap_or_default(), + }) + }); + } + + Entry::Vacant(vacant_entry) => { + // No, this is a new entity. Push its data on to the buffer. + current_uniform_index = current_input_buffer.add(mesh_input_uniform); + + vacant_entry.insert(RenderMeshInstanceGpu { + translation: self.world_from_local.translation, + shared: self.shared, + current_uniform_index: NonMaxU32::new(current_uniform_index) + .unwrap_or_default(), + }); + } + } + + Some(current_uniform_index) + } +} + +/// Removes a [`MeshInputUniform`] corresponding to an entity that became +/// invisible from the buffer. +fn remove_mesh_input_uniform( + entity: MainEntity, + render_mesh_instances: &mut MainEntityHashMap, + current_input_buffer: &mut InstanceInputUniformBuffer, +) -> Option { + // Remove the uniform data. + let removed_render_mesh_instance = render_mesh_instances.remove(&entity)?; + + let removed_uniform_index = removed_render_mesh_instance.current_uniform_index.get(); + current_input_buffer.remove(removed_uniform_index); + Some(removed_uniform_index) +} + +impl MeshCullingData { + /// Returns a new [`MeshCullingData`] initialized with the given AABB. + /// + /// If no AABB is provided, an infinitely-large one is conservatively + /// chosen. + fn new(aabb: Option<&Aabb>) -> Self { + match aabb { + Some(aabb) => MeshCullingData { + aabb_center: aabb.center.extend(0.0), + aabb_half_extents: aabb.half_extents.extend(0.0), + }, + None => MeshCullingData { + aabb_center: Vec3::ZERO.extend(0.0), + aabb_half_extents: Vec3::INFINITY.extend(0.0), + }, + } + } + + /// Flushes this mesh instance culling data to the + /// [`MeshCullingDataBuffer`], replacing the existing entry if applicable. + fn update( + &self, + mesh_culling_data_buffer: &mut MeshCullingDataBuffer, + instance_data_index: usize, + ) { + while mesh_culling_data_buffer.len() < instance_data_index + 1 { + mesh_culling_data_buffer.push(MeshCullingData::default()); + } + mesh_culling_data_buffer.values_mut()[instance_data_index] = *self; + } +} + +impl Default for MeshCullingDataBuffer { + #[inline] + fn default() -> Self { + Self(RawBufferVec::new(BufferUsages::STORAGE)) + } +} + +/// Data that [`crate::material::queue_material_meshes`] and similar systems +/// need in order to place entities that contain meshes in the right batch. +#[derive(Deref)] +pub struct RenderMeshQueueData<'a> { + /// General information about the mesh instance. + #[deref] + pub shared: &'a RenderMeshInstanceShared, + /// The translation of the mesh instance. + pub translation: Vec3, + /// The index of the [`MeshInputUniform`] in the GPU buffer for this mesh + /// instance. + pub current_uniform_index: InputUniformIndex, +} + +/// A [`SystemSet`] that encompasses both [`extract_meshes_for_cpu_building`] +/// and [`extract_meshes_for_gpu_building`]. +#[derive(SystemSet, Clone, PartialEq, Eq, Debug, Hash)] +pub struct MeshExtractionSystems; + +/// Deprecated alias for [`MeshExtractionSystems`]. +#[deprecated(since = "0.17.0", note = "Renamed to `MeshExtractionSystems`.")] +pub type ExtractMeshesSet = MeshExtractionSystems; + +/// Extracts meshes from the main world into the render world, populating the +/// [`RenderMeshInstances`]. +/// +/// This is the variant of the system that runs when we're *not* using GPU +/// [`MeshUniform`] building. +pub fn extract_meshes_for_cpu_building( + mut render_mesh_instances: ResMut, + mesh_material_ids: Res, + render_material_bindings: Res, + render_visibility_ranges: Res, + mut render_mesh_instance_queues: Local>>, + meshes_query: Extract< + Query<( + Entity, + &ViewVisibility, + &GlobalTransform, + Option<&PreviousGlobalTransform>, + &Mesh3d, + Option<&MeshTag>, + Has, + Has, + Has, + Has, + Has, + Has, + Option<&RenderLayers>, + )>, + >, +) { + meshes_query.par_iter().for_each_init( + || render_mesh_instance_queues.borrow_local_mut(), + |queue, + ( + entity, + view_visibility, + transform, + previous_transform, + mesh, + tag, + no_frustum_culling, + not_shadow_receiver, + transmitted_receiver, + not_shadow_caster, + no_automatic_batching, + visibility_range, + render_layers, + )| { + if !view_visibility.get() { + return; + } + + let mut lod_index = None; + if visibility_range { + lod_index = render_visibility_ranges.lod_index_for_entity(entity.into()); + } + + let mesh_flags = MeshFlags::from_components( + transform, + lod_index, + no_frustum_culling, + not_shadow_receiver, + transmitted_receiver, + ); + + let mesh_material = mesh_material_ids.mesh_material(MainEntity::from(entity)); + + let material_bindings_index = render_material_bindings + .get(&mesh_material) + .copied() + .unwrap_or_default(); + + let shared = RenderMeshInstanceShared::for_cpu_building( + previous_transform, + mesh, + tag, + material_bindings_index, + not_shadow_caster, + no_automatic_batching, + render_layers, + ); + + let world_from_local = transform.affine(); + queue.push(( + entity, + RenderMeshInstanceCpu { + transforms: MeshTransforms { + world_from_local: (&world_from_local).into(), + previous_world_from_local: (&previous_transform + .map(|t| t.0) + .unwrap_or(world_from_local)) + .into(), + flags: mesh_flags.bits(), + }, + shared, + }, + )); + }, + ); + + // Collect the render mesh instances. + let RenderMeshInstances::CpuBuilding(ref mut render_mesh_instances) = *render_mesh_instances + else { + panic!( + "`extract_meshes_for_cpu_building` should only be called if we're using CPU \ + `MeshUniform` building" + ); + }; + + render_mesh_instances.clear(); + for queue in render_mesh_instance_queues.iter_mut() { + for (entity, render_mesh_instance) in queue.drain(..) { + render_mesh_instances.insert(entity.into(), render_mesh_instance); + } + } +} + +/// All the data that we need from a mesh in the main world. +type GpuMeshExtractionQuery = ( + Entity, + Read, + Read, + Option>, + Option>, + Option>, + Read, + Option>, + Has, + Has, + Has, + Has, + Has, + Has, + Option>, +); + +/// Extracts meshes from the main world into the render world and queues +/// [`MeshInputUniform`]s to be uploaded to the GPU. +/// +/// This is optimized to only look at entities that have changed since the last +/// frame. +/// +/// This is the variant of the system that runs when we're using GPU +/// [`MeshUniform`] building. +pub fn extract_meshes_for_gpu_building( + mut render_mesh_instances: ResMut, + render_visibility_ranges: Res, + mut render_mesh_instance_queues: ResMut, + changed_meshes_query: Extract< + Query< + GpuMeshExtractionQuery, + Or<( + Changed, + Changed, + Changed, + Changed, + Changed, + Changed, + Changed, + Changed, + Changed, + Changed, + Changed, + Changed, + Changed, + Changed, + )>, + >, + >, + all_meshes_query: Extract>, + mut removed_meshes_query: Extract>, + gpu_culling_query: Extract, Without)>>, + meshes_to_reextract_next_frame: ResMut, +) { + let any_gpu_culling = !gpu_culling_query.is_empty(); + + for render_mesh_instance_queue in render_mesh_instance_queues.iter_mut() { + render_mesh_instance_queue.init(any_gpu_culling); + } + + // Collect render mesh instances. Build up the uniform buffer. + + let RenderMeshInstances::GpuBuilding(ref mut render_mesh_instances) = *render_mesh_instances + else { + panic!( + "`extract_meshes_for_gpu_building` should only be called if we're \ + using GPU `MeshUniform` building" + ); + }; + + // Find all meshes that have changed, and record information needed to + // construct the `MeshInputUniform` for them. + changed_meshes_query.par_iter().for_each_init( + || render_mesh_instance_queues.borrow_local_mut(), + |queue, query_row| { + extract_mesh_for_gpu_building( + query_row, + &render_visibility_ranges, + render_mesh_instances, + queue, + any_gpu_culling, + ); + }, + ); + + // Process materials that `collect_meshes_for_gpu_building` marked as + // needing to be reextracted. This will happen when we extracted a mesh on + // some previous frame, but its material hadn't been prepared yet, perhaps + // because the material hadn't yet been loaded. We reextract such materials + // on subsequent frames so that `collect_meshes_for_gpu_building` will check + // to see if their materials have been prepared. + let mut queue = render_mesh_instance_queues.borrow_local_mut(); + for &mesh_entity in &**meshes_to_reextract_next_frame { + if let Ok(query_row) = all_meshes_query.get(*mesh_entity) { + extract_mesh_for_gpu_building( + query_row, + &render_visibility_ranges, + render_mesh_instances, + &mut queue, + any_gpu_culling, + ); + } + } + + // Also record info about each mesh that became invisible. + for entity in removed_meshes_query.read() { + // Only queue a mesh for removal if we didn't pick it up above. + // It's possible that a necessary component was removed and re-added in + // the same frame. + let entity = MainEntity::from(entity); + if !changed_meshes_query.contains(*entity) + && !meshes_to_reextract_next_frame.contains(&entity) + { + queue.remove(entity, any_gpu_culling); + } + } +} + +fn extract_mesh_for_gpu_building( + ( + entity, + view_visibility, + transform, + previous_transform, + lightmap, + aabb, + mesh, + tag, + no_frustum_culling, + not_shadow_receiver, + transmitted_receiver, + not_shadow_caster, + no_automatic_batching, + visibility_range, + render_layers, + ): ::Item<'_, '_>, + render_visibility_ranges: &RenderVisibilityRanges, + render_mesh_instances: &RenderMeshInstancesGpu, + queue: &mut RenderMeshInstanceGpuQueue, + any_gpu_culling: bool, +) { + if !view_visibility.get() { + queue.remove(entity.into(), any_gpu_culling); + return; + } + + let mut lod_index = None; + if visibility_range { + lod_index = render_visibility_ranges.lod_index_for_entity(entity.into()); + } + + let mesh_flags = MeshFlags::from_components( + transform, + lod_index, + no_frustum_culling, + not_shadow_receiver, + transmitted_receiver, + ); + + let shared = RenderMeshInstanceShared::for_gpu_building( + previous_transform, + mesh, + tag, + not_shadow_caster, + no_automatic_batching, + render_layers, + ); + + let lightmap_uv_rect = pack_lightmap_uv_rect(lightmap.map(|lightmap| lightmap.uv_rect)); + + let gpu_mesh_culling_data = any_gpu_culling.then(|| MeshCullingData::new(aabb)); + + let previous_input_index = if shared + .flags + .contains(RenderMeshInstanceFlags::HAS_PREVIOUS_TRANSFORM) + { + render_mesh_instances + .get(&MainEntity::from(entity)) + .map(|render_mesh_instance| render_mesh_instance.current_uniform_index) + } else { + None + }; + + let gpu_mesh_instance_builder = RenderMeshInstanceGpuBuilder { + shared, + world_from_local: (&transform.affine()).into(), + lightmap_uv_rect, + mesh_flags, + previous_input_index, + }; + + queue.push( + entity.into(), + gpu_mesh_instance_builder, + gpu_mesh_culling_data, + ); +} + +/// A system that sets the [`RenderMeshInstanceFlags`] for each mesh based on +/// whether the previous frame had skins and/or morph targets. +/// +/// Ordinarily, [`RenderMeshInstanceFlags`] are set during the extraction phase. +/// However, we can't do that for the flags related to skins and morph targets +/// because the previous frame's skin and morph targets are the responsibility +/// of [`extract_skins`] and [`extract_morphs`] respectively. We want to run +/// those systems in parallel with mesh extraction for performance, so we need +/// to defer setting of these mesh instance flags to after extraction, which +/// this system does. An alternative to having skin- and morph-target-related +/// data in [`RenderMeshInstanceFlags`] would be to have +/// [`crate::material::queue_material_meshes`] check the skin and morph target +/// tables for each mesh, but that would be too slow in the hot mesh queuing +/// loop. +pub(crate) fn set_mesh_motion_vector_flags( + mut render_mesh_instances: ResMut, + skin_uniforms: Res, + morph_indices: Res, +) { + for &entity in skin_uniforms.all_skins() { + render_mesh_instances + .insert_mesh_instance_flags(entity, RenderMeshInstanceFlags::HAS_PREVIOUS_SKIN); + } + for &entity in morph_indices.prev.keys() { + render_mesh_instances + .insert_mesh_instance_flags(entity, RenderMeshInstanceFlags::HAS_PREVIOUS_MORPH); + } +} + +/// Creates the [`RenderMeshInstanceGpu`]s and [`MeshInputUniform`]s when GPU +/// mesh uniforms are built. +pub fn collect_meshes_for_gpu_building( + render_mesh_instances: ResMut, + batched_instance_buffers: ResMut< + gpu_preprocessing::BatchedInstanceBuffers, + >, + mut mesh_culling_data_buffer: ResMut, + mut render_mesh_instance_queues: ResMut, + mesh_allocator: Res, + mesh_material_ids: Res, + render_material_bindings: Res, + render_lightmaps: Res, + skin_uniforms: Res, + frame_count: Res, + mut meshes_to_reextract_next_frame: ResMut, +) { + let RenderMeshInstances::GpuBuilding(render_mesh_instances) = + render_mesh_instances.into_inner() + else { + return; + }; + + // We're going to rebuild `meshes_to_reextract_next_frame`. + meshes_to_reextract_next_frame.clear(); + + // Collect render mesh instances. Build up the uniform buffer. + let gpu_preprocessing::BatchedInstanceBuffers { + current_input_buffer, + previous_input_buffer, + .. + } = batched_instance_buffers.into_inner(); + + previous_input_buffer.clear(); + + // Build the [`RenderMeshInstance`]s and [`MeshInputUniform`]s. + + for queue in render_mesh_instance_queues.iter_mut() { + match *queue { + RenderMeshInstanceGpuQueue::None => { + // This can only happen if the queue is empty. + } + + RenderMeshInstanceGpuQueue::CpuCulling { + ref mut changed, + ref mut removed, + } => { + for (entity, mesh_instance_builder) in changed.drain(..) { + mesh_instance_builder.update( + entity, + &mut *render_mesh_instances, + current_input_buffer, + previous_input_buffer, + &mesh_allocator, + &mesh_material_ids, + &render_material_bindings, + &render_lightmaps, + &skin_uniforms, + *frame_count, + &mut meshes_to_reextract_next_frame, + ); + } + + for entity in removed.drain(..) { + remove_mesh_input_uniform( + entity, + &mut *render_mesh_instances, + current_input_buffer, + ); + } + } + + RenderMeshInstanceGpuQueue::GpuCulling { + ref mut changed, + ref mut removed, + } => { + for (entity, mesh_instance_builder, mesh_culling_builder) in changed.drain(..) { + let Some(instance_data_index) = mesh_instance_builder.update( + entity, + &mut *render_mesh_instances, + current_input_buffer, + previous_input_buffer, + &mesh_allocator, + &mesh_material_ids, + &render_material_bindings, + &render_lightmaps, + &skin_uniforms, + *frame_count, + &mut meshes_to_reextract_next_frame, + ) else { + continue; + }; + mesh_culling_builder + .update(&mut mesh_culling_data_buffer, instance_data_index as usize); + } + + for entity in removed.drain(..) { + remove_mesh_input_uniform( + entity, + &mut *render_mesh_instances, + current_input_buffer, + ); + } + } + } + } + + // Buffers can't be empty. Make sure there's something in the previous input buffer. + previous_input_buffer.ensure_nonempty(); +} + +/// All data needed to construct a pipeline for rendering 3D meshes. +#[derive(Resource, Clone)] +pub struct MeshPipeline { + /// A reference to all the mesh pipeline view layouts. + pub view_layouts: MeshPipelineViewLayouts, + // This dummy white texture is to be used in place of optional StandardMaterial textures + pub dummy_white_gpu_image: GpuImage, + pub clustered_forward_buffer_binding_type: BufferBindingType, + pub mesh_layouts: MeshLayouts, + /// The shader asset handle. + pub shader: Handle, + /// `MeshUniform`s are stored in arrays in buffers. If storage buffers are available, they + /// are used and this will be `None`, otherwise uniform buffers will be used with batches + /// of this many `MeshUniform`s, stored at dynamic offsets within the uniform buffer. + /// Use code like this in custom shaders: + /// ```wgsl + /// ##ifdef PER_OBJECT_BUFFER_BATCH_SIZE + /// @group(1) @binding(0) var mesh: array; + /// ##else + /// @group(1) @binding(0) var mesh: array; + /// ##endif // PER_OBJECT_BUFFER_BATCH_SIZE + /// ``` + pub per_object_buffer_batch_size: Option, + + /// Whether binding arrays (a.k.a. bindless textures) are usable on the + /// current render device. + /// + /// This affects whether reflection probes can be used. + pub binding_arrays_are_usable: bool, + + /// Whether clustered decals are usable on the current render device. + pub clustered_decals_are_usable: bool, + + /// Whether skins will use uniform buffers on account of storage buffers + /// being unavailable on this platform. + pub skins_use_uniform_buffers: bool, +} + +impl FromWorld for MeshPipeline { + fn from_world(world: &mut World) -> Self { + let shader = load_embedded_asset!(world, "mesh.wgsl"); + let mut system_state: SystemState<( + Res, + Res, + Res, + Res, + Res, + )> = SystemState::new(world); + let (render_device, render_adapter, default_sampler, render_queue, view_layouts) = + system_state.get_mut(world); + + let clustered_forward_buffer_binding_type = render_device + .get_supported_read_only_binding_type(CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT); + + // A 1x1x1 'all 1.0' texture to use as a dummy texture to use in place of optional StandardMaterial textures + let dummy_white_gpu_image = { + let image = Image::default(); + let texture = render_device.create_texture(&image.texture_descriptor); + let sampler = match image.sampler { + ImageSampler::Default => (**default_sampler).clone(), + ImageSampler::Descriptor(ref descriptor) => { + render_device.create_sampler(&descriptor.as_wgpu()) + } + }; + + if let Ok(format_size) = image.texture_descriptor.format.pixel_size() { + render_queue.write_texture( + texture.as_image_copy(), + image.data.as_ref().expect("Image was created without data"), + TexelCopyBufferLayout { + offset: 0, + bytes_per_row: Some(image.width() * format_size as u32), + rows_per_image: None, + }, + image.texture_descriptor.size, + ); + } + + let texture_view = texture.create_view(&TextureViewDescriptor::default()); + GpuImage { + texture, + texture_view, + texture_format: image.texture_descriptor.format, + sampler, + size: image.texture_descriptor.size, + mip_level_count: image.texture_descriptor.mip_level_count, + } + }; + + MeshPipeline { + view_layouts: view_layouts.clone(), + clustered_forward_buffer_binding_type, + dummy_white_gpu_image, + mesh_layouts: MeshLayouts::new(&render_device, &render_adapter), + shader, + per_object_buffer_batch_size: GpuArrayBuffer::::batch_size(&render_device), + binding_arrays_are_usable: binding_arrays_are_usable(&render_device, &render_adapter), + clustered_decals_are_usable: decal::clustered::clustered_decals_are_usable( + &render_device, + &render_adapter, + ), + skins_use_uniform_buffers: skins_use_uniform_buffers(&render_device), + } + } +} + +impl MeshPipeline { + pub fn get_image_texture<'a>( + &'a self, + gpu_images: &'a RenderAssets, + handle_option: &Option>, + ) -> Option<(&'a TextureView, &'a Sampler)> { + if let Some(handle) = handle_option { + let gpu_image = gpu_images.get(handle)?; + Some((&gpu_image.texture_view, &gpu_image.sampler)) + } else { + Some(( + &self.dummy_white_gpu_image.texture_view, + &self.dummy_white_gpu_image.sampler, + )) + } + } + + pub fn get_view_layout( + &self, + layout_key: MeshPipelineViewLayoutKey, + ) -> &MeshPipelineViewLayout { + self.view_layouts.get_view_layout(layout_key) + } +} + +impl GetBatchData for MeshPipeline { + type Param = ( + SRes, + SRes, + SRes>, + SRes, + SRes, + ); + // The material bind group ID, the mesh ID, and the lightmap ID, + // respectively. + type CompareData = ( + MaterialBindGroupIndex, + AssetId, + Option, + ); + + type BufferData = MeshUniform; + + fn get_batch_data( + (mesh_instances, lightmaps, _, mesh_allocator, skin_uniforms): &SystemParamItem< + Self::Param, + >, + (_entity, main_entity): (Entity, MainEntity), + ) -> Option<(Self::BufferData, Option)> { + let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else { + error!( + "`get_batch_data` should never be called in GPU mesh uniform \ + building mode" + ); + return None; + }; + let mesh_instance = mesh_instances.get(&main_entity)?; + let first_vertex_index = + match mesh_allocator.mesh_vertex_slice(&mesh_instance.mesh_asset_id) { + Some(mesh_vertex_slice) => mesh_vertex_slice.range.start, + None => 0, + }; + let maybe_lightmap = lightmaps.render_lightmaps.get(&main_entity); + + let current_skin_index = skin_uniforms.skin_index(main_entity); + let material_bind_group_index = mesh_instance.material_bindings_index; + + Some(( + MeshUniform::new( + &mesh_instance.transforms, + first_vertex_index, + material_bind_group_index.slot, + maybe_lightmap.map(|lightmap| (lightmap.slot_index, lightmap.uv_rect)), + current_skin_index, + Some(mesh_instance.tag), + ), + mesh_instance.should_batch().then_some(( + material_bind_group_index.group, + mesh_instance.mesh_asset_id, + maybe_lightmap.map(|lightmap| lightmap.slab_index), + )), + )) + } +} + +impl GetFullBatchData for MeshPipeline { + type BufferInputData = MeshInputUniform; + + fn get_index_and_compare_data( + (mesh_instances, lightmaps, _, _, _): &SystemParamItem, + main_entity: MainEntity, + ) -> Option<(NonMaxU32, Option)> { + // This should only be called during GPU building. + let RenderMeshInstances::GpuBuilding(ref mesh_instances) = **mesh_instances else { + error!( + "`get_index_and_compare_data` should never be called in CPU mesh uniform building \ + mode" + ); + return None; + }; + + let mesh_instance = mesh_instances.get(&main_entity)?; + let maybe_lightmap = lightmaps.render_lightmaps.get(&main_entity); + + Some(( + mesh_instance.current_uniform_index, + mesh_instance.should_batch().then_some(( + mesh_instance.material_bindings_index.group, + mesh_instance.mesh_asset_id, + maybe_lightmap.map(|lightmap| lightmap.slab_index), + )), + )) + } + + fn get_binned_batch_data( + (mesh_instances, lightmaps, _, mesh_allocator, skin_uniforms): &SystemParamItem< + Self::Param, + >, + main_entity: MainEntity, + ) -> Option { + let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else { + error!( + "`get_binned_batch_data` should never be called in GPU mesh uniform building mode" + ); + return None; + }; + let mesh_instance = mesh_instances.get(&main_entity)?; + let first_vertex_index = + match mesh_allocator.mesh_vertex_slice(&mesh_instance.mesh_asset_id) { + Some(mesh_vertex_slice) => mesh_vertex_slice.range.start, + None => 0, + }; + let maybe_lightmap = lightmaps.render_lightmaps.get(&main_entity); + + let current_skin_index = skin_uniforms.skin_index(main_entity); + + Some(MeshUniform::new( + &mesh_instance.transforms, + first_vertex_index, + mesh_instance.material_bindings_index.slot, + maybe_lightmap.map(|lightmap| (lightmap.slot_index, lightmap.uv_rect)), + current_skin_index, + Some(mesh_instance.tag), + )) + } + + fn get_binned_index( + (mesh_instances, _, _, _, _): &SystemParamItem, + main_entity: MainEntity, + ) -> Option { + // This should only be called during GPU building. + let RenderMeshInstances::GpuBuilding(ref mesh_instances) = **mesh_instances else { + error!( + "`get_binned_index` should never be called in CPU mesh uniform \ + building mode" + ); + return None; + }; + + mesh_instances + .get(&main_entity) + .map(|entity| entity.current_uniform_index) + } + + fn write_batch_indirect_parameters_metadata( + indexed: bool, + base_output_index: u32, + batch_set_index: Option, + phase_indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers, + indirect_parameters_offset: u32, + ) { + let indirect_parameters = IndirectParametersCpuMetadata { + base_output_index, + batch_set_index: match batch_set_index { + Some(batch_set_index) => u32::from(batch_set_index), + None => !0, + }, + }; + + if indexed { + phase_indirect_parameters_buffers + .indexed + .set(indirect_parameters_offset, indirect_parameters); + } else { + phase_indirect_parameters_buffers + .non_indexed + .set(indirect_parameters_offset, indirect_parameters); + } + } +} + +bitflags::bitflags! { + #[derive(Default, Clone, Copy, Debug, PartialEq, Eq, Hash)] + #[repr(transparent)] + // NOTE: Apparently quadro drivers support up to 64x MSAA. + /// MSAA uses the highest 3 bits for the MSAA log2(sample count) to support up to 128x MSAA. + pub struct MeshPipelineKey: u64 { + // Nothing + const NONE = 0; + + // Inherited bits + const MORPH_TARGETS = BaseMeshPipelineKey::MORPH_TARGETS.bits(); + + // Flag bits + const HDR = 1 << 0; + const TONEMAP_IN_SHADER = 1 << 1; + const DEBAND_DITHER = 1 << 2; + const DEPTH_PREPASS = 1 << 3; + const NORMAL_PREPASS = 1 << 4; + const DEFERRED_PREPASS = 1 << 5; + const MOTION_VECTOR_PREPASS = 1 << 6; + const MAY_DISCARD = 1 << 7; // Guards shader codepaths that may discard, allowing early depth tests in most cases + // See: https://www.khronos.org/opengl/wiki/Early_Fragment_Test + const ENVIRONMENT_MAP = 1 << 8; + const SCREEN_SPACE_AMBIENT_OCCLUSION = 1 << 9; + const UNCLIPPED_DEPTH_ORTHO = 1 << 10; // Disables depth clipping for use with directional light shadow views + // Emulated via fragment shader depth on hardware that doesn't support it natively + // See: https://www.w3.org/TR/webgpu/#depth-clipping and https://therealmjp.github.io/posts/shadow-maps/#disabling-z-clipping + const TEMPORAL_JITTER = 1 << 11; + const READS_VIEW_TRANSMISSION_TEXTURE = 1 << 12; + const LIGHTMAPPED = 1 << 13; + const LIGHTMAP_BICUBIC_SAMPLING = 1 << 14; + const IRRADIANCE_VOLUME = 1 << 15; + const VISIBILITY_RANGE_DITHER = 1 << 16; + const SCREEN_SPACE_REFLECTIONS = 1 << 17; + const HAS_PREVIOUS_SKIN = 1 << 18; + const HAS_PREVIOUS_MORPH = 1 << 19; + const OIT_ENABLED = 1 << 20; + const DISTANCE_FOG = 1 << 21; + const LAST_FLAG = Self::DISTANCE_FOG.bits(); + + // Bitfields + const MSAA_RESERVED_BITS = Self::MSAA_MASK_BITS << Self::MSAA_SHIFT_BITS; + const BLEND_RESERVED_BITS = Self::BLEND_MASK_BITS << Self::BLEND_SHIFT_BITS; // ← Bitmask reserving bits for the blend state + const BLEND_OPAQUE = 0 << Self::BLEND_SHIFT_BITS; // ← Values are just sequential within the mask + const BLEND_PREMULTIPLIED_ALPHA = 1 << Self::BLEND_SHIFT_BITS; // ← As blend states is on 3 bits, it can range from 0 to 7 + const BLEND_MULTIPLY = 2 << Self::BLEND_SHIFT_BITS; // ← See `BLEND_MASK_BITS` for the number of bits available + const BLEND_ALPHA = 3 << Self::BLEND_SHIFT_BITS; // + const BLEND_ALPHA_TO_COVERAGE = 4 << Self::BLEND_SHIFT_BITS; // ← We still have room for three more values without adding more bits + const TONEMAP_METHOD_RESERVED_BITS = Self::TONEMAP_METHOD_MASK_BITS << Self::TONEMAP_METHOD_SHIFT_BITS; + const TONEMAP_METHOD_NONE = 0 << Self::TONEMAP_METHOD_SHIFT_BITS; + const TONEMAP_METHOD_REINHARD = 1 << Self::TONEMAP_METHOD_SHIFT_BITS; + const TONEMAP_METHOD_REINHARD_LUMINANCE = 2 << Self::TONEMAP_METHOD_SHIFT_BITS; + const TONEMAP_METHOD_ACES_FITTED = 3 << Self::TONEMAP_METHOD_SHIFT_BITS; + const TONEMAP_METHOD_AGX = 4 << Self::TONEMAP_METHOD_SHIFT_BITS; + const TONEMAP_METHOD_SOMEWHAT_BORING_DISPLAY_TRANSFORM = 5 << Self::TONEMAP_METHOD_SHIFT_BITS; + const TONEMAP_METHOD_TONY_MC_MAPFACE = 6 << Self::TONEMAP_METHOD_SHIFT_BITS; + const TONEMAP_METHOD_BLENDER_FILMIC = 7 << Self::TONEMAP_METHOD_SHIFT_BITS; + const SHADOW_FILTER_METHOD_RESERVED_BITS = Self::SHADOW_FILTER_METHOD_MASK_BITS << Self::SHADOW_FILTER_METHOD_SHIFT_BITS; + const SHADOW_FILTER_METHOD_HARDWARE_2X2 = 0 << Self::SHADOW_FILTER_METHOD_SHIFT_BITS; + const SHADOW_FILTER_METHOD_GAUSSIAN = 1 << Self::SHADOW_FILTER_METHOD_SHIFT_BITS; + const SHADOW_FILTER_METHOD_TEMPORAL = 2 << Self::SHADOW_FILTER_METHOD_SHIFT_BITS; + const VIEW_PROJECTION_RESERVED_BITS = Self::VIEW_PROJECTION_MASK_BITS << Self::VIEW_PROJECTION_SHIFT_BITS; + const VIEW_PROJECTION_NONSTANDARD = 0 << Self::VIEW_PROJECTION_SHIFT_BITS; + const VIEW_PROJECTION_PERSPECTIVE = 1 << Self::VIEW_PROJECTION_SHIFT_BITS; + const VIEW_PROJECTION_ORTHOGRAPHIC = 2 << Self::VIEW_PROJECTION_SHIFT_BITS; + const VIEW_PROJECTION_RESERVED = 3 << Self::VIEW_PROJECTION_SHIFT_BITS; + const SCREEN_SPACE_SPECULAR_TRANSMISSION_RESERVED_BITS = Self::SCREEN_SPACE_SPECULAR_TRANSMISSION_MASK_BITS << Self::SCREEN_SPACE_SPECULAR_TRANSMISSION_SHIFT_BITS; + const SCREEN_SPACE_SPECULAR_TRANSMISSION_LOW = 0 << Self::SCREEN_SPACE_SPECULAR_TRANSMISSION_SHIFT_BITS; + const SCREEN_SPACE_SPECULAR_TRANSMISSION_MEDIUM = 1 << Self::SCREEN_SPACE_SPECULAR_TRANSMISSION_SHIFT_BITS; + const SCREEN_SPACE_SPECULAR_TRANSMISSION_HIGH = 2 << Self::SCREEN_SPACE_SPECULAR_TRANSMISSION_SHIFT_BITS; + const SCREEN_SPACE_SPECULAR_TRANSMISSION_ULTRA = 3 << Self::SCREEN_SPACE_SPECULAR_TRANSMISSION_SHIFT_BITS; + const ALL_RESERVED_BITS = + Self::BLEND_RESERVED_BITS.bits() | + Self::MSAA_RESERVED_BITS.bits() | + Self::TONEMAP_METHOD_RESERVED_BITS.bits() | + Self::SHADOW_FILTER_METHOD_RESERVED_BITS.bits() | + Self::VIEW_PROJECTION_RESERVED_BITS.bits() | + Self::SCREEN_SPACE_SPECULAR_TRANSMISSION_RESERVED_BITS.bits(); + } +} + +impl MeshPipelineKey { + const MSAA_MASK_BITS: u64 = 0b111; + const MSAA_SHIFT_BITS: u64 = Self::LAST_FLAG.bits().trailing_zeros() as u64 + 1; + + const BLEND_MASK_BITS: u64 = 0b111; + const BLEND_SHIFT_BITS: u64 = Self::MSAA_MASK_BITS.count_ones() as u64 + Self::MSAA_SHIFT_BITS; + + const TONEMAP_METHOD_MASK_BITS: u64 = 0b111; + const TONEMAP_METHOD_SHIFT_BITS: u64 = + Self::BLEND_MASK_BITS.count_ones() as u64 + Self::BLEND_SHIFT_BITS; + + const SHADOW_FILTER_METHOD_MASK_BITS: u64 = 0b11; + const SHADOW_FILTER_METHOD_SHIFT_BITS: u64 = + Self::TONEMAP_METHOD_MASK_BITS.count_ones() as u64 + Self::TONEMAP_METHOD_SHIFT_BITS; + + const VIEW_PROJECTION_MASK_BITS: u64 = 0b11; + const VIEW_PROJECTION_SHIFT_BITS: u64 = Self::SHADOW_FILTER_METHOD_MASK_BITS.count_ones() + as u64 + + Self::SHADOW_FILTER_METHOD_SHIFT_BITS; + + const SCREEN_SPACE_SPECULAR_TRANSMISSION_MASK_BITS: u64 = 0b11; + const SCREEN_SPACE_SPECULAR_TRANSMISSION_SHIFT_BITS: u64 = + Self::VIEW_PROJECTION_MASK_BITS.count_ones() as u64 + Self::VIEW_PROJECTION_SHIFT_BITS; + + pub fn from_msaa_samples(msaa_samples: u32) -> Self { + let msaa_bits = + (msaa_samples.trailing_zeros() as u64 & Self::MSAA_MASK_BITS) << Self::MSAA_SHIFT_BITS; + Self::from_bits_retain(msaa_bits) + } + + pub fn from_hdr(hdr: bool) -> Self { + if hdr { + MeshPipelineKey::HDR + } else { + MeshPipelineKey::NONE + } + } + + pub fn msaa_samples(&self) -> u32 { + 1 << ((self.bits() >> Self::MSAA_SHIFT_BITS) & Self::MSAA_MASK_BITS) + } + + pub fn from_primitive_topology(primitive_topology: PrimitiveTopology) -> Self { + let primitive_topology_bits = ((primitive_topology as u64) + & BaseMeshPipelineKey::PRIMITIVE_TOPOLOGY_MASK_BITS) + << BaseMeshPipelineKey::PRIMITIVE_TOPOLOGY_SHIFT_BITS; + Self::from_bits_retain(primitive_topology_bits) + } + + pub fn primitive_topology(&self) -> PrimitiveTopology { + let primitive_topology_bits = (self.bits() + >> BaseMeshPipelineKey::PRIMITIVE_TOPOLOGY_SHIFT_BITS) + & BaseMeshPipelineKey::PRIMITIVE_TOPOLOGY_MASK_BITS; + match primitive_topology_bits { + x if x == PrimitiveTopology::PointList as u64 => PrimitiveTopology::PointList, + x if x == PrimitiveTopology::LineList as u64 => PrimitiveTopology::LineList, + x if x == PrimitiveTopology::LineStrip as u64 => PrimitiveTopology::LineStrip, + x if x == PrimitiveTopology::TriangleList as u64 => PrimitiveTopology::TriangleList, + x if x == PrimitiveTopology::TriangleStrip as u64 => PrimitiveTopology::TriangleStrip, + _ => PrimitiveTopology::default(), + } + } +} + +// Ensure that we didn't overflow the number of bits available in `MeshPipelineKey`. +const_assert_eq!( + (((MeshPipelineKey::LAST_FLAG.bits() << 1) - 1) | MeshPipelineKey::ALL_RESERVED_BITS.bits()) + & BaseMeshPipelineKey::all().bits(), + 0 +); + +// Ensure that the reserved bits don't overlap with the topology bits +const_assert_eq!( + (BaseMeshPipelineKey::PRIMITIVE_TOPOLOGY_MASK_BITS + << BaseMeshPipelineKey::PRIMITIVE_TOPOLOGY_SHIFT_BITS) + & MeshPipelineKey::ALL_RESERVED_BITS.bits(), + 0 +); + +fn is_skinned(layout: &MeshVertexBufferLayoutRef) -> bool { + layout.0.contains(Mesh::ATTRIBUTE_JOINT_INDEX) + && layout.0.contains(Mesh::ATTRIBUTE_JOINT_WEIGHT) +} +pub fn setup_morph_and_skinning_defs( + mesh_layouts: &MeshLayouts, + layout: &MeshVertexBufferLayoutRef, + offset: u32, + key: &MeshPipelineKey, + shader_defs: &mut Vec, + vertex_attributes: &mut Vec, + skins_use_uniform_buffers: bool, +) -> BindGroupLayout { + let is_morphed = key.intersects(MeshPipelineKey::MORPH_TARGETS); + let is_lightmapped = key.intersects(MeshPipelineKey::LIGHTMAPPED); + let motion_vector_prepass = key.intersects(MeshPipelineKey::MOTION_VECTOR_PREPASS); + + if skins_use_uniform_buffers { + shader_defs.push("SKINS_USE_UNIFORM_BUFFERS".into()); + } + + let mut add_skin_data = || { + shader_defs.push("SKINNED".into()); + vertex_attributes.push(Mesh::ATTRIBUTE_JOINT_INDEX.at_shader_location(offset)); + vertex_attributes.push(Mesh::ATTRIBUTE_JOINT_WEIGHT.at_shader_location(offset + 1)); + }; + + match ( + is_skinned(layout), + is_morphed, + is_lightmapped, + motion_vector_prepass, + ) { + (true, false, _, true) => { + add_skin_data(); + mesh_layouts.skinned_motion.clone() + } + (true, false, _, false) => { + add_skin_data(); + mesh_layouts.skinned.clone() + } + (true, true, _, true) => { + add_skin_data(); + shader_defs.push("MORPH_TARGETS".into()); + mesh_layouts.morphed_skinned_motion.clone() + } + (true, true, _, false) => { + add_skin_data(); + shader_defs.push("MORPH_TARGETS".into()); + mesh_layouts.morphed_skinned.clone() + } + (false, true, _, true) => { + shader_defs.push("MORPH_TARGETS".into()); + mesh_layouts.morphed_motion.clone() + } + (false, true, _, false) => { + shader_defs.push("MORPH_TARGETS".into()); + mesh_layouts.morphed.clone() + } + (false, false, true, _) => mesh_layouts.lightmapped.clone(), + (false, false, false, _) => mesh_layouts.model_only.clone(), + } +} + +impl SpecializedMeshPipeline for MeshPipeline { + type Key = MeshPipelineKey; + + fn specialize( + &self, + key: Self::Key, + layout: &MeshVertexBufferLayoutRef, + ) -> Result { + let mut shader_defs = Vec::new(); + let mut vertex_attributes = Vec::new(); + + // Let the shader code know that it's running in a mesh pipeline. + shader_defs.push("MESH_PIPELINE".into()); + + shader_defs.push("VERTEX_OUTPUT_INSTANCE_INDEX".into()); + + if layout.0.contains(Mesh::ATTRIBUTE_POSITION) { + shader_defs.push("VERTEX_POSITIONS".into()); + vertex_attributes.push(Mesh::ATTRIBUTE_POSITION.at_shader_location(0)); + } + + if layout.0.contains(Mesh::ATTRIBUTE_NORMAL) { + shader_defs.push("VERTEX_NORMALS".into()); + vertex_attributes.push(Mesh::ATTRIBUTE_NORMAL.at_shader_location(1)); + } + + if layout.0.contains(Mesh::ATTRIBUTE_UV_0) { + shader_defs.push("VERTEX_UVS".into()); + shader_defs.push("VERTEX_UVS_A".into()); + vertex_attributes.push(Mesh::ATTRIBUTE_UV_0.at_shader_location(2)); + } + + if layout.0.contains(Mesh::ATTRIBUTE_UV_1) { + shader_defs.push("VERTEX_UVS".into()); + shader_defs.push("VERTEX_UVS_B".into()); + vertex_attributes.push(Mesh::ATTRIBUTE_UV_1.at_shader_location(3)); + } + + if layout.0.contains(Mesh::ATTRIBUTE_TANGENT) { + shader_defs.push("VERTEX_TANGENTS".into()); + vertex_attributes.push(Mesh::ATTRIBUTE_TANGENT.at_shader_location(4)); + } + + if layout.0.contains(Mesh::ATTRIBUTE_COLOR) { + shader_defs.push("VERTEX_COLORS".into()); + vertex_attributes.push(Mesh::ATTRIBUTE_COLOR.at_shader_location(5)); + } + + if cfg!(feature = "pbr_transmission_textures") { + shader_defs.push("PBR_TRANSMISSION_TEXTURES_SUPPORTED".into()); + } + if cfg!(feature = "pbr_multi_layer_material_textures") { + shader_defs.push("PBR_MULTI_LAYER_MATERIAL_TEXTURES_SUPPORTED".into()); + } + if cfg!(feature = "pbr_anisotropy_texture") { + shader_defs.push("PBR_ANISOTROPY_TEXTURE_SUPPORTED".into()); + } + if cfg!(feature = "pbr_specular_textures") { + shader_defs.push("PBR_SPECULAR_TEXTURES_SUPPORTED".into()); + } + + let bind_group_layout = self.get_view_layout(key.into()); + let mut bind_group_layout = vec![ + bind_group_layout.main_layout.clone(), + bind_group_layout.binding_array_layout.clone(), + ]; + + if key.msaa_samples() > 1 { + shader_defs.push("MULTISAMPLED".into()); + }; + + bind_group_layout.push(setup_morph_and_skinning_defs( + &self.mesh_layouts, + layout, + 6, + &key, + &mut shader_defs, + &mut vertex_attributes, + self.skins_use_uniform_buffers, + )); + + if key.contains(MeshPipelineKey::SCREEN_SPACE_AMBIENT_OCCLUSION) { + shader_defs.push("SCREEN_SPACE_AMBIENT_OCCLUSION".into()); + } + + let vertex_buffer_layout = layout.0.get_layout(&vertex_attributes)?; + + let (label, blend, depth_write_enabled); + let pass = key.intersection(MeshPipelineKey::BLEND_RESERVED_BITS); + let (mut is_opaque, mut alpha_to_coverage_enabled) = (false, false); + if key.contains(MeshPipelineKey::OIT_ENABLED) && pass == MeshPipelineKey::BLEND_ALPHA { + label = "oit_mesh_pipeline".into(); + // TODO tail blending would need alpha blending + blend = None; + shader_defs.push("OIT_ENABLED".into()); + // TODO it should be possible to use this to combine MSAA and OIT + // alpha_to_coverage_enabled = true; + depth_write_enabled = false; + } else if pass == MeshPipelineKey::BLEND_ALPHA { + label = "alpha_blend_mesh_pipeline".into(); + blend = Some(BlendState::ALPHA_BLENDING); + // For the transparent pass, fragments that are closer will be alpha blended + // but their depth is not written to the depth buffer + depth_write_enabled = false; + } else if pass == MeshPipelineKey::BLEND_PREMULTIPLIED_ALPHA { + label = "premultiplied_alpha_mesh_pipeline".into(); + blend = Some(BlendState::PREMULTIPLIED_ALPHA_BLENDING); + shader_defs.push("PREMULTIPLY_ALPHA".into()); + shader_defs.push("BLEND_PREMULTIPLIED_ALPHA".into()); + // For the transparent pass, fragments that are closer will be alpha blended + // but their depth is not written to the depth buffer + depth_write_enabled = false; + } else if pass == MeshPipelineKey::BLEND_MULTIPLY { + label = "multiply_mesh_pipeline".into(); + blend = Some(BlendState { + color: BlendComponent { + src_factor: BlendFactor::Dst, + dst_factor: BlendFactor::OneMinusSrcAlpha, + operation: BlendOperation::Add, + }, + alpha: BlendComponent::OVER, + }); + shader_defs.push("PREMULTIPLY_ALPHA".into()); + shader_defs.push("BLEND_MULTIPLY".into()); + // For the multiply pass, fragments that are closer will be alpha blended + // but their depth is not written to the depth buffer + depth_write_enabled = false; + } else if pass == MeshPipelineKey::BLEND_ALPHA_TO_COVERAGE { + label = "alpha_to_coverage_mesh_pipeline".into(); + // BlendState::REPLACE is not needed here, and None will be potentially much faster in some cases + blend = None; + // For the opaque and alpha mask passes, fragments that are closer will replace + // the current fragment value in the output and the depth is written to the + // depth buffer + depth_write_enabled = true; + is_opaque = !key.contains(MeshPipelineKey::READS_VIEW_TRANSMISSION_TEXTURE); + alpha_to_coverage_enabled = true; + shader_defs.push("ALPHA_TO_COVERAGE".into()); + } else { + label = "opaque_mesh_pipeline".into(); + // BlendState::REPLACE is not needed here, and None will be potentially much faster in some cases + blend = None; + // For the opaque and alpha mask passes, fragments that are closer will replace + // the current fragment value in the output and the depth is written to the + // depth buffer + depth_write_enabled = true; + is_opaque = !key.contains(MeshPipelineKey::READS_VIEW_TRANSMISSION_TEXTURE); + } + + if key.contains(MeshPipelineKey::NORMAL_PREPASS) { + shader_defs.push("NORMAL_PREPASS".into()); + } + + if key.contains(MeshPipelineKey::DEPTH_PREPASS) { + shader_defs.push("DEPTH_PREPASS".into()); + } + + if key.contains(MeshPipelineKey::MOTION_VECTOR_PREPASS) { + shader_defs.push("MOTION_VECTOR_PREPASS".into()); + } + + if key.contains(MeshPipelineKey::HAS_PREVIOUS_SKIN) { + shader_defs.push("HAS_PREVIOUS_SKIN".into()); + } + + if key.contains(MeshPipelineKey::HAS_PREVIOUS_MORPH) { + shader_defs.push("HAS_PREVIOUS_MORPH".into()); + } + + if key.contains(MeshPipelineKey::DEFERRED_PREPASS) { + shader_defs.push("DEFERRED_PREPASS".into()); + } + + if key.contains(MeshPipelineKey::NORMAL_PREPASS) && key.msaa_samples() == 1 && is_opaque { + shader_defs.push("LOAD_PREPASS_NORMALS".into()); + } + + let view_projection = key.intersection(MeshPipelineKey::VIEW_PROJECTION_RESERVED_BITS); + if view_projection == MeshPipelineKey::VIEW_PROJECTION_NONSTANDARD { + shader_defs.push("VIEW_PROJECTION_NONSTANDARD".into()); + } else if view_projection == MeshPipelineKey::VIEW_PROJECTION_PERSPECTIVE { + shader_defs.push("VIEW_PROJECTION_PERSPECTIVE".into()); + } else if view_projection == MeshPipelineKey::VIEW_PROJECTION_ORTHOGRAPHIC { + shader_defs.push("VIEW_PROJECTION_ORTHOGRAPHIC".into()); + } + + #[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] + shader_defs.push("WEBGL2".into()); + + #[cfg(feature = "experimental_pbr_pcss")] + shader_defs.push("PCSS_SAMPLERS_AVAILABLE".into()); + + if key.contains(MeshPipelineKey::TONEMAP_IN_SHADER) { + shader_defs.push("TONEMAP_IN_SHADER".into()); + shader_defs.push(ShaderDefVal::UInt( + "TONEMAPPING_LUT_TEXTURE_BINDING_INDEX".into(), + TONEMAPPING_LUT_TEXTURE_BINDING_INDEX, + )); + shader_defs.push(ShaderDefVal::UInt( + "TONEMAPPING_LUT_SAMPLER_BINDING_INDEX".into(), + TONEMAPPING_LUT_SAMPLER_BINDING_INDEX, + )); + + let method = key.intersection(MeshPipelineKey::TONEMAP_METHOD_RESERVED_BITS); + + if method == MeshPipelineKey::TONEMAP_METHOD_NONE { + shader_defs.push("TONEMAP_METHOD_NONE".into()); + } else if method == MeshPipelineKey::TONEMAP_METHOD_REINHARD { + shader_defs.push("TONEMAP_METHOD_REINHARD".into()); + } else if method == MeshPipelineKey::TONEMAP_METHOD_REINHARD_LUMINANCE { + shader_defs.push("TONEMAP_METHOD_REINHARD_LUMINANCE".into()); + } else if method == MeshPipelineKey::TONEMAP_METHOD_ACES_FITTED { + shader_defs.push("TONEMAP_METHOD_ACES_FITTED".into()); + } else if method == MeshPipelineKey::TONEMAP_METHOD_AGX { + shader_defs.push("TONEMAP_METHOD_AGX".into()); + } else if method == MeshPipelineKey::TONEMAP_METHOD_SOMEWHAT_BORING_DISPLAY_TRANSFORM { + shader_defs.push("TONEMAP_METHOD_SOMEWHAT_BORING_DISPLAY_TRANSFORM".into()); + } else if method == MeshPipelineKey::TONEMAP_METHOD_BLENDER_FILMIC { + shader_defs.push("TONEMAP_METHOD_BLENDER_FILMIC".into()); + } else if method == MeshPipelineKey::TONEMAP_METHOD_TONY_MC_MAPFACE { + shader_defs.push("TONEMAP_METHOD_TONY_MC_MAPFACE".into()); + } + + // Debanding is tied to tonemapping in the shader, cannot run without it. + if key.contains(MeshPipelineKey::DEBAND_DITHER) { + shader_defs.push("DEBAND_DITHER".into()); + } + } + + if key.contains(MeshPipelineKey::MAY_DISCARD) { + shader_defs.push("MAY_DISCARD".into()); + } + + if key.contains(MeshPipelineKey::ENVIRONMENT_MAP) { + shader_defs.push("ENVIRONMENT_MAP".into()); + } + + if key.contains(MeshPipelineKey::IRRADIANCE_VOLUME) && IRRADIANCE_VOLUMES_ARE_USABLE { + shader_defs.push("IRRADIANCE_VOLUME".into()); + } + + if key.contains(MeshPipelineKey::LIGHTMAPPED) { + shader_defs.push("LIGHTMAP".into()); + } + if key.contains(MeshPipelineKey::LIGHTMAP_BICUBIC_SAMPLING) { + shader_defs.push("LIGHTMAP_BICUBIC_SAMPLING".into()); + } + + if key.contains(MeshPipelineKey::TEMPORAL_JITTER) { + shader_defs.push("TEMPORAL_JITTER".into()); + } + + let shadow_filter_method = + key.intersection(MeshPipelineKey::SHADOW_FILTER_METHOD_RESERVED_BITS); + if shadow_filter_method == MeshPipelineKey::SHADOW_FILTER_METHOD_HARDWARE_2X2 { + shader_defs.push("SHADOW_FILTER_METHOD_HARDWARE_2X2".into()); + } else if shadow_filter_method == MeshPipelineKey::SHADOW_FILTER_METHOD_GAUSSIAN { + shader_defs.push("SHADOW_FILTER_METHOD_GAUSSIAN".into()); + } else if shadow_filter_method == MeshPipelineKey::SHADOW_FILTER_METHOD_TEMPORAL { + shader_defs.push("SHADOW_FILTER_METHOD_TEMPORAL".into()); + } + + let blur_quality = + key.intersection(MeshPipelineKey::SCREEN_SPACE_SPECULAR_TRANSMISSION_RESERVED_BITS); + + shader_defs.push(ShaderDefVal::Int( + "SCREEN_SPACE_SPECULAR_TRANSMISSION_BLUR_TAPS".into(), + match blur_quality { + MeshPipelineKey::SCREEN_SPACE_SPECULAR_TRANSMISSION_LOW => 4, + MeshPipelineKey::SCREEN_SPACE_SPECULAR_TRANSMISSION_MEDIUM => 8, + MeshPipelineKey::SCREEN_SPACE_SPECULAR_TRANSMISSION_HIGH => 16, + MeshPipelineKey::SCREEN_SPACE_SPECULAR_TRANSMISSION_ULTRA => 32, + _ => unreachable!(), // Not possible, since the mask is 2 bits, and we've covered all 4 cases + }, + )); + + if key.contains(MeshPipelineKey::VISIBILITY_RANGE_DITHER) { + shader_defs.push("VISIBILITY_RANGE_DITHER".into()); + } + + if key.contains(MeshPipelineKey::DISTANCE_FOG) { + shader_defs.push("DISTANCE_FOG".into()); + } + + if self.binding_arrays_are_usable { + shader_defs.push("MULTIPLE_LIGHT_PROBES_IN_ARRAY".into()); + shader_defs.push("MULTIPLE_LIGHTMAPS_IN_ARRAY".into()); + } + + if IRRADIANCE_VOLUMES_ARE_USABLE { + shader_defs.push("IRRADIANCE_VOLUMES_ARE_USABLE".into()); + } + + if self.clustered_decals_are_usable { + shader_defs.push("CLUSTERED_DECALS_ARE_USABLE".into()); + if cfg!(feature = "pbr_light_textures") { + shader_defs.push("LIGHT_TEXTURES".into()); + } + } + + let format = if key.contains(MeshPipelineKey::HDR) { + ViewTarget::TEXTURE_FORMAT_HDR + } else { + TextureFormat::bevy_default() + }; + + // This is defined here so that custom shaders that use something other than + // the mesh binding from bevy_pbr::mesh_bindings can easily make use of this + // in their own shaders. + if let Some(per_object_buffer_batch_size) = self.per_object_buffer_batch_size { + shader_defs.push(ShaderDefVal::UInt( + "PER_OBJECT_BUFFER_BATCH_SIZE".into(), + per_object_buffer_batch_size, + )); + } + + Ok(RenderPipelineDescriptor { + vertex: VertexState { + shader: self.shader.clone(), + shader_defs: shader_defs.clone(), + buffers: vec![vertex_buffer_layout], + ..default() + }, + fragment: Some(FragmentState { + shader: self.shader.clone(), + shader_defs, + targets: vec![Some(ColorTargetState { + format, + blend, + write_mask: ColorWrites::ALL, + })], + ..default() + }), + layout: bind_group_layout, + primitive: PrimitiveState { + cull_mode: Some(Face::Back), + unclipped_depth: false, + topology: key.primitive_topology(), + ..default() + }, + depth_stencil: Some(DepthStencilState { + format: CORE_3D_DEPTH_FORMAT, + depth_write_enabled, + depth_compare: CompareFunction::GreaterEqual, + stencil: StencilState { + front: StencilFaceState::IGNORE, + back: StencilFaceState::IGNORE, + read_mask: 0, + write_mask: 0, + }, + bias: DepthBiasState { + constant: 0, + slope_scale: 0.0, + clamp: 0.0, + }, + }), + multisample: MultisampleState { + count: key.msaa_samples(), + mask: !0, + alpha_to_coverage_enabled, + }, + label: Some(label), + ..default() + }) + } +} + +/// The bind groups for meshes currently loaded. +/// +/// If GPU mesh preprocessing isn't in use, these are global to the scene. If +/// GPU mesh preprocessing is in use, these are specific to a single phase. +#[derive(Default)] +pub struct MeshPhaseBindGroups { + model_only: Option, + skinned: Option, + morph_targets: HashMap, MeshBindGroupPair>, + lightmaps: HashMap, +} + +pub struct MeshBindGroupPair { + motion_vectors: BindGroup, + no_motion_vectors: BindGroup, +} + +/// All bind groups for meshes currently loaded. +#[derive(Resource)] +pub enum MeshBindGroups { + /// The bind groups for the meshes for the entire scene, if GPU mesh + /// preprocessing isn't in use. + CpuPreprocessing(MeshPhaseBindGroups), + /// A mapping from the type ID of a phase (e.g. [`Opaque3d`]) to the mesh + /// bind groups for that phase. + GpuPreprocessing(TypeIdMap), +} + +impl MeshPhaseBindGroups { + pub fn reset(&mut self) { + self.model_only = None; + self.skinned = None; + self.morph_targets.clear(); + self.lightmaps.clear(); + } + /// Get the `BindGroup` for `RenderMesh` with given `handle_id` and lightmap + /// key `lightmap`. + pub fn get( + &self, + asset_id: AssetId, + lightmap: Option, + is_skinned: bool, + morph: bool, + motion_vectors: bool, + ) -> Option<&BindGroup> { + match (is_skinned, morph, lightmap) { + (_, true, _) => self + .morph_targets + .get(&asset_id) + .map(|bind_group_pair| bind_group_pair.get(motion_vectors)), + (true, false, _) => self + .skinned + .as_ref() + .map(|bind_group_pair| bind_group_pair.get(motion_vectors)), + (false, false, Some(lightmap_slab)) => self.lightmaps.get(&lightmap_slab), + (false, false, None) => self.model_only.as_ref(), + } + } +} + +impl MeshBindGroupPair { + fn get(&self, motion_vectors: bool) -> &BindGroup { + if motion_vectors { + &self.motion_vectors + } else { + &self.no_motion_vectors + } + } +} + +/// Creates the per-mesh bind groups for each type of mesh and each phase. +pub fn prepare_mesh_bind_groups( + mut commands: Commands, + meshes: Res>, + mesh_pipeline: Res, + render_device: Res, + cpu_batched_instance_buffer: Option< + Res>, + >, + gpu_batched_instance_buffers: Option< + Res>, + >, + skins_uniform: Res, + weights_uniform: Res, + mut render_lightmaps: ResMut, +) { + // CPU mesh preprocessing path. + if let Some(cpu_batched_instance_buffer) = cpu_batched_instance_buffer + && let Some(instance_data_binding) = cpu_batched_instance_buffer + .into_inner() + .instance_data_binding() + { + // In this path, we only have a single set of bind groups for all phases. + let cpu_preprocessing_mesh_bind_groups = prepare_mesh_bind_groups_for_phase( + instance_data_binding, + &meshes, + &mesh_pipeline, + &render_device, + &skins_uniform, + &weights_uniform, + &mut render_lightmaps, + ); + + commands.insert_resource(MeshBindGroups::CpuPreprocessing( + cpu_preprocessing_mesh_bind_groups, + )); + return; + } + + // GPU mesh preprocessing path. + if let Some(gpu_batched_instance_buffers) = gpu_batched_instance_buffers { + let mut gpu_preprocessing_mesh_bind_groups = TypeIdMap::default(); + + // Loop over each phase. + for (phase_type_id, batched_phase_instance_buffers) in + &gpu_batched_instance_buffers.phase_instance_buffers + { + let Some(instance_data_binding) = + batched_phase_instance_buffers.instance_data_binding() + else { + continue; + }; + + let mesh_phase_bind_groups = prepare_mesh_bind_groups_for_phase( + instance_data_binding, + &meshes, + &mesh_pipeline, + &render_device, + &skins_uniform, + &weights_uniform, + &mut render_lightmaps, + ); + + gpu_preprocessing_mesh_bind_groups.insert(*phase_type_id, mesh_phase_bind_groups); + } + + commands.insert_resource(MeshBindGroups::GpuPreprocessing( + gpu_preprocessing_mesh_bind_groups, + )); + } +} + +/// Creates the per-mesh bind groups for each type of mesh, for a single phase. +fn prepare_mesh_bind_groups_for_phase( + model: BindingResource, + meshes: &RenderAssets, + mesh_pipeline: &MeshPipeline, + render_device: &RenderDevice, + skins_uniform: &SkinUniforms, + weights_uniform: &MorphUniforms, + render_lightmaps: &mut RenderLightmaps, +) -> MeshPhaseBindGroups { + let layouts = &mesh_pipeline.mesh_layouts; + + // TODO: Reuse allocations. + let mut groups = MeshPhaseBindGroups { + model_only: Some(layouts.model_only(render_device, &model)), + ..default() + }; + + // Create the skinned mesh bind group with the current and previous buffers + // (the latter being for motion vector computation). + let (skin, prev_skin) = (&skins_uniform.current_buffer, &skins_uniform.prev_buffer); + groups.skinned = Some(MeshBindGroupPair { + motion_vectors: layouts.skinned_motion(render_device, &model, skin, prev_skin), + no_motion_vectors: layouts.skinned(render_device, &model, skin), + }); + + // Create the morphed bind groups just like we did for the skinned bind + // group. + if let Some(weights) = weights_uniform.current_buffer.buffer() { + let prev_weights = weights_uniform.prev_buffer.buffer().unwrap_or(weights); + for (id, gpu_mesh) in meshes.iter() { + if let Some(targets) = gpu_mesh.morph_targets.as_ref() { + let bind_group_pair = if is_skinned(&gpu_mesh.layout) { + let prev_skin = &skins_uniform.prev_buffer; + MeshBindGroupPair { + motion_vectors: layouts.morphed_skinned_motion( + render_device, + &model, + skin, + weights, + targets, + prev_skin, + prev_weights, + ), + no_motion_vectors: layouts.morphed_skinned( + render_device, + &model, + skin, + weights, + targets, + ), + } + } else { + MeshBindGroupPair { + motion_vectors: layouts.morphed_motion( + render_device, + &model, + weights, + targets, + prev_weights, + ), + no_motion_vectors: layouts.morphed(render_device, &model, weights, targets), + } + }; + groups.morph_targets.insert(id, bind_group_pair); + } + } + } + + // Create lightmap bindgroups. There will be one bindgroup for each slab. + let bindless_supported = render_lightmaps.bindless_supported; + for (lightmap_slab_id, lightmap_slab) in render_lightmaps.slabs.iter_mut().enumerate() { + groups.lightmaps.insert( + LightmapSlabIndex(NonMaxU32::new(lightmap_slab_id as u32).unwrap()), + layouts.lightmapped(render_device, &model, lightmap_slab, bindless_supported), + ); + } + + groups +} + +pub struct SetMeshViewBindGroup; +impl RenderCommand

for SetMeshViewBindGroup { + type Param = (); + type ViewQuery = ( + Read, + Read, + Read, + Read, + Read, + Read, + Read, + Option>, + ); + type ItemQuery = (); + + #[inline] + fn render<'w>( + _item: &P, + ( + view_uniform, + view_lights, + view_fog, + view_light_probes, + view_ssr, + view_environment_map, + mesh_view_bind_group, + maybe_oit_layers_count_offset, + ): ROQueryItem<'w, '_, Self::ViewQuery>, + _entity: Option<()>, + _: SystemParamItem<'w, '_, Self::Param>, + pass: &mut TrackedRenderPass<'w>, + ) -> RenderCommandResult { + let mut offsets: SmallVec<[u32; 8]> = smallvec![ + view_uniform.offset, + view_lights.offset, + view_fog.offset, + **view_light_probes, + **view_ssr, + **view_environment_map, + ]; + if let Some(layers_count_offset) = maybe_oit_layers_count_offset { + offsets.push(layers_count_offset.offset); + } + pass.set_bind_group(I, &mesh_view_bind_group.main, &offsets); + + RenderCommandResult::Success + } +} + +pub struct SetMeshViewBindingArrayBindGroup; +impl RenderCommand

for SetMeshViewBindingArrayBindGroup { + type Param = (); + type ViewQuery = (Read,); + type ItemQuery = (); + + #[inline] + fn render<'w>( + _item: &P, + (mesh_view_bind_group,): ROQueryItem<'w, '_, Self::ViewQuery>, + _entity: Option<()>, + _: SystemParamItem<'w, '_, Self::Param>, + pass: &mut TrackedRenderPass<'w>, + ) -> RenderCommandResult { + pass.set_bind_group(I, &mesh_view_bind_group.binding_array, &[]); + + RenderCommandResult::Success + } +} + +pub struct SetMeshViewEmptyBindGroup; +impl RenderCommand

for SetMeshViewEmptyBindGroup { + type Param = (); + type ViewQuery = (Read,); + type ItemQuery = (); + + #[inline] + fn render<'w>( + _item: &P, + (mesh_view_bind_group,): ROQueryItem<'w, '_, Self::ViewQuery>, + _entity: Option<()>, + _: SystemParamItem<'w, '_, Self::Param>, + pass: &mut TrackedRenderPass<'w>, + ) -> RenderCommandResult { + pass.set_bind_group(I, &mesh_view_bind_group.empty, &[]); + + RenderCommandResult::Success + } +} + +pub struct SetMeshBindGroup; +impl RenderCommand

for SetMeshBindGroup { + type Param = ( + SRes, + SRes, + SRes, + SRes, + SRes, + SRes, + ); + type ViewQuery = Has; + type ItemQuery = (); + + #[inline] + fn render<'w>( + item: &P, + has_motion_vector_prepass: bool, + _item_query: Option<()>, + ( + render_device, + bind_groups, + mesh_instances, + skin_uniforms, + morph_indices, + lightmaps, + ): SystemParamItem<'w, '_, Self::Param>, + pass: &mut TrackedRenderPass<'w>, + ) -> RenderCommandResult { + let bind_groups = bind_groups.into_inner(); + let mesh_instances = mesh_instances.into_inner(); + let skin_uniforms = skin_uniforms.into_inner(); + let morph_indices = morph_indices.into_inner(); + + let entity = &item.main_entity(); + + let Some(mesh_asset_id) = mesh_instances.mesh_asset_id(*entity) else { + return RenderCommandResult::Success; + }; + + let current_skin_byte_offset = skin_uniforms.skin_byte_offset(*entity); + let current_morph_index = morph_indices.current.get(entity); + let prev_morph_index = morph_indices.prev.get(entity); + + let is_skinned = current_skin_byte_offset.is_some(); + let is_morphed = current_morph_index.is_some(); + + let lightmap_slab_index = lightmaps + .render_lightmaps + .get(entity) + .map(|render_lightmap| render_lightmap.slab_index); + + let Some(mesh_phase_bind_groups) = (match *bind_groups { + MeshBindGroups::CpuPreprocessing(ref mesh_phase_bind_groups) => { + Some(mesh_phase_bind_groups) + } + MeshBindGroups::GpuPreprocessing(ref mesh_phase_bind_groups) => { + mesh_phase_bind_groups.get(&TypeId::of::

()) + } + }) else { + // This is harmless if e.g. we're rendering the `Shadow` phase and + // there weren't any shadows. + return RenderCommandResult::Success; + }; + + let Some(bind_group) = mesh_phase_bind_groups.get( + mesh_asset_id, + lightmap_slab_index, + is_skinned, + is_morphed, + has_motion_vector_prepass, + ) else { + return RenderCommandResult::Failure( + "The MeshBindGroups resource wasn't set in the render phase. \ + It should be set by the prepare_mesh_bind_group system.\n\ + This is a bevy bug! Please open an issue.", + ); + }; + + let mut dynamic_offsets: [u32; 5] = Default::default(); + let mut offset_count = 0; + if let PhaseItemExtraIndex::DynamicOffset(dynamic_offset) = item.extra_index() { + dynamic_offsets[offset_count] = dynamic_offset; + offset_count += 1; + } + if let Some(current_skin_index) = current_skin_byte_offset + && skins_use_uniform_buffers(&render_device) + { + dynamic_offsets[offset_count] = current_skin_index.byte_offset; + offset_count += 1; + } + if let Some(current_morph_index) = current_morph_index { + dynamic_offsets[offset_count] = current_morph_index.index; + offset_count += 1; + } + + // Attach motion vectors if needed. + if has_motion_vector_prepass { + // Attach the previous skin index for motion vector computation. + if skins_use_uniform_buffers(&render_device) + && let Some(current_skin_byte_offset) = current_skin_byte_offset + { + dynamic_offsets[offset_count] = current_skin_byte_offset.byte_offset; + offset_count += 1; + } + + // Attach the previous morph index for motion vector computation. If + // there isn't one, just use zero as the shader will ignore it. + if current_morph_index.is_some() { + match prev_morph_index { + Some(prev_morph_index) => { + dynamic_offsets[offset_count] = prev_morph_index.index; + } + None => dynamic_offsets[offset_count] = 0, + } + offset_count += 1; + } + } + + pass.set_bind_group(I, bind_group, &dynamic_offsets[0..offset_count]); + + RenderCommandResult::Success + } +} + +pub struct DrawMesh; +impl RenderCommand

for DrawMesh { + type Param = ( + SRes>, + SRes, + SRes, + SRes, + SRes, + Option>, + SRes, + ); + type ViewQuery = Has; + type ItemQuery = (); + #[inline] + fn render<'w>( + item: &P, + has_preprocess_bind_group: ROQueryItem, + _item_query: Option<()>, + ( + meshes, + mesh_instances, + indirect_parameters_buffer, + pipeline_cache, + mesh_allocator, + preprocess_pipelines, + preprocessing_support, + ): SystemParamItem<'w, '_, Self::Param>, + pass: &mut TrackedRenderPass<'w>, + ) -> RenderCommandResult { + // If we're using GPU preprocessing, then we're dependent on that + // compute shader having been run, which of course can only happen if + // it's compiled. Otherwise, our mesh instance data won't be present. + if let Some(preprocess_pipelines) = preprocess_pipelines + && (!has_preprocess_bind_group + || !preprocess_pipelines + .pipelines_are_loaded(&pipeline_cache, &preprocessing_support)) + { + return RenderCommandResult::Skip; + } + + let meshes = meshes.into_inner(); + let mesh_instances = mesh_instances.into_inner(); + let indirect_parameters_buffer = indirect_parameters_buffer.into_inner(); + let mesh_allocator = mesh_allocator.into_inner(); + + let Some(mesh_asset_id) = mesh_instances.mesh_asset_id(item.main_entity()) else { + return RenderCommandResult::Skip; + }; + let Some(gpu_mesh) = meshes.get(mesh_asset_id) else { + return RenderCommandResult::Skip; + }; + let Some(vertex_buffer_slice) = mesh_allocator.mesh_vertex_slice(&mesh_asset_id) else { + return RenderCommandResult::Skip; + }; + + pass.set_vertex_buffer(0, vertex_buffer_slice.buffer.slice(..)); + + let batch_range = item.batch_range(); + + // Draw either directly or indirectly, as appropriate. If we're in + // indirect mode, we can additionally multi-draw. (We can't multi-draw + // in direct mode because `wgpu` doesn't expose that functionality.) + match &gpu_mesh.buffer_info { + RenderMeshBufferInfo::Indexed { + index_format, + count, + } => { + let Some(index_buffer_slice) = mesh_allocator.mesh_index_slice(&mesh_asset_id) + else { + return RenderCommandResult::Skip; + }; + + pass.set_index_buffer(index_buffer_slice.buffer.slice(..), 0, *index_format); + + match item.extra_index() { + PhaseItemExtraIndex::None | PhaseItemExtraIndex::DynamicOffset(_) => { + pass.draw_indexed( + index_buffer_slice.range.start + ..(index_buffer_slice.range.start + *count), + vertex_buffer_slice.range.start as i32, + batch_range.clone(), + ); + } + PhaseItemExtraIndex::IndirectParametersIndex { + range: indirect_parameters_range, + batch_set_index, + } => { + // Look up the indirect parameters buffer, as well as + // the buffer we're going to use for + // `multi_draw_indexed_indirect_count` (if available). + let Some(phase_indirect_parameters_buffers) = + indirect_parameters_buffer.get(&TypeId::of::

()) + else { + warn!( + "Not rendering mesh because indexed indirect parameters buffer \ + wasn't present for this phase", + ); + return RenderCommandResult::Skip; + }; + let (Some(indirect_parameters_buffer), Some(batch_sets_buffer)) = ( + phase_indirect_parameters_buffers.indexed.data_buffer(), + phase_indirect_parameters_buffers + .indexed + .batch_sets_buffer(), + ) else { + warn!( + "Not rendering mesh because indexed indirect parameters buffer \ + wasn't present", + ); + return RenderCommandResult::Skip; + }; + + // Calculate the location of the indirect parameters + // within the buffer. + let indirect_parameters_offset = indirect_parameters_range.start as u64 + * size_of::() as u64; + let indirect_parameters_count = + indirect_parameters_range.end - indirect_parameters_range.start; + + // If we're using `multi_draw_indirect_count`, take the + // number of batches from the appropriate position in + // the batch sets buffer. Otherwise, supply the size of + // the batch set. + match batch_set_index { + Some(batch_set_index) => { + let count_offset = u32::from(batch_set_index) + * (size_of::() as u32); + pass.multi_draw_indexed_indirect_count( + indirect_parameters_buffer, + indirect_parameters_offset, + batch_sets_buffer, + count_offset as u64, + indirect_parameters_count, + ); + } + None => { + pass.multi_draw_indexed_indirect( + indirect_parameters_buffer, + indirect_parameters_offset, + indirect_parameters_count, + ); + } + } + } + } + } + + RenderMeshBufferInfo::NonIndexed => match item.extra_index() { + PhaseItemExtraIndex::None | PhaseItemExtraIndex::DynamicOffset(_) => { + pass.draw(vertex_buffer_slice.range, batch_range.clone()); + } + PhaseItemExtraIndex::IndirectParametersIndex { + range: indirect_parameters_range, + batch_set_index, + } => { + // Look up the indirect parameters buffer, as well as the + // buffer we're going to use for + // `multi_draw_indirect_count` (if available). + let Some(phase_indirect_parameters_buffers) = + indirect_parameters_buffer.get(&TypeId::of::

()) + else { + warn!( + "Not rendering mesh because non-indexed indirect parameters buffer \ + wasn't present for this phase", + ); + return RenderCommandResult::Skip; + }; + let (Some(indirect_parameters_buffer), Some(batch_sets_buffer)) = ( + phase_indirect_parameters_buffers.non_indexed.data_buffer(), + phase_indirect_parameters_buffers + .non_indexed + .batch_sets_buffer(), + ) else { + warn!( + "Not rendering mesh because non-indexed indirect parameters buffer \ + wasn't present" + ); + return RenderCommandResult::Skip; + }; + + // Calculate the location of the indirect parameters within + // the buffer. + let indirect_parameters_offset = indirect_parameters_range.start as u64 + * size_of::() as u64; + let indirect_parameters_count = + indirect_parameters_range.end - indirect_parameters_range.start; + + // If we're using `multi_draw_indirect_count`, take the + // number of batches from the appropriate position in the + // batch sets buffer. Otherwise, supply the size of the + // batch set. + match batch_set_index { + Some(batch_set_index) => { + let count_offset = + u32::from(batch_set_index) * (size_of::() as u32); + pass.multi_draw_indirect_count( + indirect_parameters_buffer, + indirect_parameters_offset, + batch_sets_buffer, + count_offset as u64, + indirect_parameters_count, + ); + } + None => { + pass.multi_draw_indirect( + indirect_parameters_buffer, + indirect_parameters_offset, + indirect_parameters_count, + ); + } + } + } + }, + } + RenderCommandResult::Success + } +} + +#[cfg(test)] +mod tests { + use super::MeshPipelineKey; + #[test] + fn mesh_key_msaa_samples() { + for i in [1, 2, 4, 8, 16, 32, 64, 128] { + assert_eq!(MeshPipelineKey::from_msaa_samples(i).msaa_samples(), i); + } + } +} diff --git a/crates/libmarathon/src/render/pbr/render/mesh.wgsl b/crates/libmarathon/src/render/pbr/render/mesh.wgsl new file mode 100644 index 0000000..9568468 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/mesh.wgsl @@ -0,0 +1,120 @@ +#import bevy_pbr::{ + mesh_bindings::mesh, + mesh_functions, + skinning, + morph::morph, + forward_io::{Vertex, VertexOutput}, + view_transformations::position_world_to_clip, +} + +#ifdef MORPH_TARGETS +fn morph_vertex(vertex_in: Vertex) -> Vertex { + var vertex = vertex_in; + let first_vertex = mesh[vertex.instance_index].first_vertex_index; + let vertex_index = vertex.index - first_vertex; + + let weight_count = bevy_pbr::morph::layer_count(); + for (var i: u32 = 0u; i < weight_count; i ++) { + let weight = bevy_pbr::morph::weight_at(i); + if weight == 0.0 { + continue; + } + vertex.position += weight * morph(vertex_index, bevy_pbr::morph::position_offset, i); +#ifdef VERTEX_NORMALS + vertex.normal += weight * morph(vertex_index, bevy_pbr::morph::normal_offset, i); +#endif +#ifdef VERTEX_TANGENTS + vertex.tangent += vec4(weight * morph(vertex_index, bevy_pbr::morph::tangent_offset, i), 0.0); +#endif + } + return vertex; +} +#endif + +@vertex +fn vertex(vertex_no_morph: Vertex) -> VertexOutput { + var out: VertexOutput; + +#ifdef MORPH_TARGETS + var vertex = morph_vertex(vertex_no_morph); +#else + var vertex = vertex_no_morph; +#endif + + let mesh_world_from_local = mesh_functions::get_world_from_local(vertex_no_morph.instance_index); + +#ifdef SKINNED + var world_from_local = skinning::skin_model( + vertex.joint_indices, + vertex.joint_weights, + vertex_no_morph.instance_index + ); +#else + // Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug. + // See https://github.com/gfx-rs/naga/issues/2416 . + var world_from_local = mesh_world_from_local; +#endif + +#ifdef VERTEX_NORMALS +#ifdef SKINNED + out.world_normal = skinning::skin_normals(world_from_local, vertex.normal); +#else + out.world_normal = mesh_functions::mesh_normal_local_to_world( + vertex.normal, + // Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug. + // See https://github.com/gfx-rs/naga/issues/2416 + vertex_no_morph.instance_index + ); +#endif +#endif + +#ifdef VERTEX_POSITIONS + out.world_position = mesh_functions::mesh_position_local_to_world(world_from_local, vec4(vertex.position, 1.0)); + out.position = position_world_to_clip(out.world_position.xyz); +#endif + +#ifdef VERTEX_UVS_A + out.uv = vertex.uv; +#endif +#ifdef VERTEX_UVS_B + out.uv_b = vertex.uv_b; +#endif + +#ifdef VERTEX_TANGENTS + out.world_tangent = mesh_functions::mesh_tangent_local_to_world( + world_from_local, + vertex.tangent, + // Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug. + // See https://github.com/gfx-rs/naga/issues/2416 + vertex_no_morph.instance_index + ); +#endif + +#ifdef VERTEX_COLORS + out.color = vertex.color; +#endif + +#ifdef VERTEX_OUTPUT_INSTANCE_INDEX + // Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug. + // See https://github.com/gfx-rs/naga/issues/2416 + out.instance_index = vertex_no_morph.instance_index; +#endif + +#ifdef VISIBILITY_RANGE_DITHER + out.visibility_range_dither = mesh_functions::get_visibility_range_dither_level( + vertex_no_morph.instance_index, mesh_world_from_local[3]); +#endif + + return out; +} + +@fragment +fn fragment( + mesh: VertexOutput, +) -> @location(0) vec4 { +#ifdef VERTEX_COLORS + return mesh.color; +#else + return vec4(1.0, 0.0, 1.0, 1.0); +#endif +} diff --git a/crates/libmarathon/src/render/pbr/render/mesh_bindings.rs b/crates/libmarathon/src/render/pbr/render/mesh_bindings.rs new file mode 100644 index 0000000..0acbb55 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/mesh_bindings.rs @@ -0,0 +1,551 @@ +//! Bind group layout related definitions for the mesh pipeline. + +use bevy_math::Mat4; +use bevy_mesh::morph::MAX_MORPH_WEIGHTS; +use crate::render::{ + render_resource::*, + renderer::{RenderAdapter, RenderDevice}, +}; + +use crate::render::pbr::{binding_arrays_are_usable, render::skin::MAX_JOINTS, LightmapSlab}; + +const MORPH_WEIGHT_SIZE: usize = size_of::(); + +/// This is used to allocate buffers. +/// The correctness of the value depends on the GPU/platform. +/// The current value is chosen because it is guaranteed to work everywhere. +/// To allow for bigger values, a check must be made for the limits +/// of the GPU at runtime, which would mean not using consts anymore. +pub const MORPH_BUFFER_SIZE: usize = MAX_MORPH_WEIGHTS * MORPH_WEIGHT_SIZE; + +const JOINT_SIZE: usize = size_of::(); +pub(crate) const JOINT_BUFFER_SIZE: usize = MAX_JOINTS * JOINT_SIZE; + +/// Individual layout entries. +mod layout_entry { + use core::num::NonZeroU32; + + use super::{JOINT_BUFFER_SIZE, MORPH_BUFFER_SIZE}; + use crate::render::pbr::{render::skin, MeshUniform, LIGHTMAPS_PER_SLAB}; + use crate::render::{ + render_resource::{ + binding_types::{ + sampler, storage_buffer_read_only_sized, texture_2d, texture_3d, + uniform_buffer_sized, + }, + BindGroupLayoutEntryBuilder, BufferSize, GpuArrayBuffer, SamplerBindingType, + ShaderStages, TextureSampleType, + }, + renderer::RenderDevice, + }; + + pub(super) fn model(render_device: &RenderDevice) -> BindGroupLayoutEntryBuilder { + GpuArrayBuffer::::binding_layout(render_device) + .visibility(ShaderStages::VERTEX_FRAGMENT) + } + pub(super) fn skinning(render_device: &RenderDevice) -> BindGroupLayoutEntryBuilder { + // If we can use storage buffers, do so. Otherwise, fall back to uniform + // buffers. + let size = BufferSize::new(JOINT_BUFFER_SIZE as u64); + if skin::skins_use_uniform_buffers(render_device) { + uniform_buffer_sized(true, size) + } else { + storage_buffer_read_only_sized(false, size) + } + } + pub(super) fn weights() -> BindGroupLayoutEntryBuilder { + uniform_buffer_sized(true, BufferSize::new(MORPH_BUFFER_SIZE as u64)) + } + pub(super) fn targets() -> BindGroupLayoutEntryBuilder { + texture_3d(TextureSampleType::Float { filterable: false }) + } + pub(super) fn lightmaps_texture_view() -> BindGroupLayoutEntryBuilder { + texture_2d(TextureSampleType::Float { filterable: true }).visibility(ShaderStages::FRAGMENT) + } + pub(super) fn lightmaps_sampler() -> BindGroupLayoutEntryBuilder { + sampler(SamplerBindingType::Filtering).visibility(ShaderStages::FRAGMENT) + } + pub(super) fn lightmaps_texture_view_array() -> BindGroupLayoutEntryBuilder { + texture_2d(TextureSampleType::Float { filterable: true }) + .visibility(ShaderStages::FRAGMENT) + .count(NonZeroU32::new(LIGHTMAPS_PER_SLAB as u32).unwrap()) + } + pub(super) fn lightmaps_sampler_array() -> BindGroupLayoutEntryBuilder { + sampler(SamplerBindingType::Filtering) + .visibility(ShaderStages::FRAGMENT) + .count(NonZeroU32::new(LIGHTMAPS_PER_SLAB as u32).unwrap()) + } +} + +/// Individual [`BindGroupEntry`] +/// for bind groups. +mod entry { + use crate::render::pbr::render::skin; + + use super::{JOINT_BUFFER_SIZE, MORPH_BUFFER_SIZE}; + use crate::render::{ + render_resource::{ + BindGroupEntry, BindingResource, Buffer, BufferBinding, BufferSize, Sampler, + TextureView, WgpuSampler, WgpuTextureView, + }, + renderer::RenderDevice, + }; + + fn entry(binding: u32, size: Option, buffer: &Buffer) -> BindGroupEntry<'_> { + BindGroupEntry { + binding, + resource: BindingResource::Buffer(BufferBinding { + buffer, + offset: 0, + size: size.map(|size| BufferSize::new(size).unwrap()), + }), + } + } + pub(super) fn model(binding: u32, resource: BindingResource) -> BindGroupEntry { + BindGroupEntry { binding, resource } + } + pub(super) fn skinning<'a>( + render_device: &RenderDevice, + binding: u32, + buffer: &'a Buffer, + ) -> BindGroupEntry<'a> { + let size = if skin::skins_use_uniform_buffers(render_device) { + Some(JOINT_BUFFER_SIZE as u64) + } else { + None + }; + entry(binding, size, buffer) + } + pub(super) fn weights(binding: u32, buffer: &Buffer) -> BindGroupEntry<'_> { + entry(binding, Some(MORPH_BUFFER_SIZE as u64), buffer) + } + pub(super) fn targets(binding: u32, texture: &TextureView) -> BindGroupEntry<'_> { + BindGroupEntry { + binding, + resource: BindingResource::TextureView(texture), + } + } + pub(super) fn lightmaps_texture_view( + binding: u32, + texture: &TextureView, + ) -> BindGroupEntry<'_> { + BindGroupEntry { + binding, + resource: BindingResource::TextureView(texture), + } + } + pub(super) fn lightmaps_sampler(binding: u32, sampler: &Sampler) -> BindGroupEntry<'_> { + BindGroupEntry { + binding, + resource: BindingResource::Sampler(sampler), + } + } + pub(super) fn lightmaps_texture_view_array<'a>( + binding: u32, + textures: &'a [&'a WgpuTextureView], + ) -> BindGroupEntry<'a> { + BindGroupEntry { + binding, + resource: BindingResource::TextureViewArray(textures), + } + } + pub(super) fn lightmaps_sampler_array<'a>( + binding: u32, + samplers: &'a [&'a WgpuSampler], + ) -> BindGroupEntry<'a> { + BindGroupEntry { + binding, + resource: BindingResource::SamplerArray(samplers), + } + } +} + +/// All possible [`BindGroupLayout`]s in bevy's default mesh shader (`mesh.wgsl`). +#[derive(Clone)] +pub struct MeshLayouts { + /// The mesh model uniform (transform) and nothing else. + pub model_only: BindGroupLayout, + + /// Includes the lightmap texture and uniform. + pub lightmapped: BindGroupLayout, + + /// Also includes the uniform for skinning + pub skinned: BindGroupLayout, + + /// Like [`MeshLayouts::skinned`], but includes slots for the previous + /// frame's joint matrices, so that we can compute motion vectors. + pub skinned_motion: BindGroupLayout, + + /// Also includes the uniform and [`MorphAttributes`] for morph targets. + /// + /// [`MorphAttributes`]: bevy_mesh::morph::MorphAttributes + pub morphed: BindGroupLayout, + + /// Like [`MeshLayouts::morphed`], but includes a slot for the previous + /// frame's morph weights, so that we can compute motion vectors. + pub morphed_motion: BindGroupLayout, + + /// Also includes both uniforms for skinning and morph targets, also the + /// morph target [`MorphAttributes`] binding. + /// + /// [`MorphAttributes`]: bevy_mesh::morph::MorphAttributes + pub morphed_skinned: BindGroupLayout, + + /// Like [`MeshLayouts::morphed_skinned`], but includes slots for the + /// previous frame's joint matrices and morph weights, so that we can + /// compute motion vectors. + pub morphed_skinned_motion: BindGroupLayout, +} + +impl MeshLayouts { + /// Prepare the layouts used by the default bevy [`Mesh`]. + /// + /// [`Mesh`]: bevy_mesh::Mesh + pub fn new(render_device: &RenderDevice, render_adapter: &RenderAdapter) -> Self { + MeshLayouts { + model_only: Self::model_only_layout(render_device), + lightmapped: Self::lightmapped_layout(render_device, render_adapter), + skinned: Self::skinned_layout(render_device), + skinned_motion: Self::skinned_motion_layout(render_device), + morphed: Self::morphed_layout(render_device), + morphed_motion: Self::morphed_motion_layout(render_device), + morphed_skinned: Self::morphed_skinned_layout(render_device), + morphed_skinned_motion: Self::morphed_skinned_motion_layout(render_device), + } + } + + // ---------- create individual BindGroupLayouts ---------- + + fn model_only_layout(render_device: &RenderDevice) -> BindGroupLayout { + render_device.create_bind_group_layout( + "mesh_layout", + &BindGroupLayoutEntries::single( + ShaderStages::empty(), + layout_entry::model(render_device), + ), + ) + } + + /// Creates the layout for skinned meshes. + fn skinned_layout(render_device: &RenderDevice) -> BindGroupLayout { + render_device.create_bind_group_layout( + "skinned_mesh_layout", + &BindGroupLayoutEntries::with_indices( + ShaderStages::VERTEX, + ( + (0, layout_entry::model(render_device)), + // The current frame's joint matrix buffer. + (1, layout_entry::skinning(render_device)), + ), + ), + ) + } + + /// Creates the layout for skinned meshes with the infrastructure to compute + /// motion vectors. + fn skinned_motion_layout(render_device: &RenderDevice) -> BindGroupLayout { + render_device.create_bind_group_layout( + "skinned_motion_mesh_layout", + &BindGroupLayoutEntries::with_indices( + ShaderStages::VERTEX, + ( + (0, layout_entry::model(render_device)), + // The current frame's joint matrix buffer. + (1, layout_entry::skinning(render_device)), + // The previous frame's joint matrix buffer. + (6, layout_entry::skinning(render_device)), + ), + ), + ) + } + + /// Creates the layout for meshes with morph targets. + fn morphed_layout(render_device: &RenderDevice) -> BindGroupLayout { + render_device.create_bind_group_layout( + "morphed_mesh_layout", + &BindGroupLayoutEntries::with_indices( + ShaderStages::VERTEX, + ( + (0, layout_entry::model(render_device)), + // The current frame's morph weight buffer. + (2, layout_entry::weights()), + (3, layout_entry::targets()), + ), + ), + ) + } + + /// Creates the layout for meshes with morph targets and the infrastructure + /// to compute motion vectors. + fn morphed_motion_layout(render_device: &RenderDevice) -> BindGroupLayout { + render_device.create_bind_group_layout( + "morphed_mesh_layout", + &BindGroupLayoutEntries::with_indices( + ShaderStages::VERTEX, + ( + (0, layout_entry::model(render_device)), + // The current frame's morph weight buffer. + (2, layout_entry::weights()), + (3, layout_entry::targets()), + // The previous frame's morph weight buffer. + (7, layout_entry::weights()), + ), + ), + ) + } + + /// Creates the bind group layout for meshes with both skins and morph + /// targets. + fn morphed_skinned_layout(render_device: &RenderDevice) -> BindGroupLayout { + render_device.create_bind_group_layout( + "morphed_skinned_mesh_layout", + &BindGroupLayoutEntries::with_indices( + ShaderStages::VERTEX, + ( + (0, layout_entry::model(render_device)), + // The current frame's joint matrix buffer. + (1, layout_entry::skinning(render_device)), + // The current frame's morph weight buffer. + (2, layout_entry::weights()), + (3, layout_entry::targets()), + ), + ), + ) + } + + /// Creates the bind group layout for meshes with both skins and morph + /// targets, in addition to the infrastructure to compute motion vectors. + fn morphed_skinned_motion_layout(render_device: &RenderDevice) -> BindGroupLayout { + render_device.create_bind_group_layout( + "morphed_skinned_motion_mesh_layout", + &BindGroupLayoutEntries::with_indices( + ShaderStages::VERTEX, + ( + (0, layout_entry::model(render_device)), + // The current frame's joint matrix buffer. + (1, layout_entry::skinning(render_device)), + // The current frame's morph weight buffer. + (2, layout_entry::weights()), + (3, layout_entry::targets()), + // The previous frame's joint matrix buffer. + (6, layout_entry::skinning(render_device)), + // The previous frame's morph weight buffer. + (7, layout_entry::weights()), + ), + ), + ) + } + + fn lightmapped_layout( + render_device: &RenderDevice, + render_adapter: &RenderAdapter, + ) -> BindGroupLayout { + if binding_arrays_are_usable(render_device, render_adapter) { + render_device.create_bind_group_layout( + "lightmapped_mesh_layout", + &BindGroupLayoutEntries::with_indices( + ShaderStages::VERTEX, + ( + (0, layout_entry::model(render_device)), + (4, layout_entry::lightmaps_texture_view_array()), + (5, layout_entry::lightmaps_sampler_array()), + ), + ), + ) + } else { + render_device.create_bind_group_layout( + "lightmapped_mesh_layout", + &BindGroupLayoutEntries::with_indices( + ShaderStages::VERTEX, + ( + (0, layout_entry::model(render_device)), + (4, layout_entry::lightmaps_texture_view()), + (5, layout_entry::lightmaps_sampler()), + ), + ), + ) + } + } + + // ---------- BindGroup methods ---------- + + pub fn model_only(&self, render_device: &RenderDevice, model: &BindingResource) -> BindGroup { + render_device.create_bind_group( + "model_only_mesh_bind_group", + &self.model_only, + &[entry::model(0, model.clone())], + ) + } + + pub fn lightmapped( + &self, + render_device: &RenderDevice, + model: &BindingResource, + lightmap_slab: &LightmapSlab, + bindless_lightmaps: bool, + ) -> BindGroup { + if bindless_lightmaps { + let (texture_views, samplers) = lightmap_slab.build_binding_arrays(); + render_device.create_bind_group( + "lightmapped_mesh_bind_group", + &self.lightmapped, + &[ + entry::model(0, model.clone()), + entry::lightmaps_texture_view_array(4, &texture_views), + entry::lightmaps_sampler_array(5, &samplers), + ], + ) + } else { + let (texture_view, sampler) = lightmap_slab.bindings_for_first_lightmap(); + render_device.create_bind_group( + "lightmapped_mesh_bind_group", + &self.lightmapped, + &[ + entry::model(0, model.clone()), + entry::lightmaps_texture_view(4, texture_view), + entry::lightmaps_sampler(5, sampler), + ], + ) + } + } + + /// Creates the bind group for skinned meshes with no morph targets. + pub fn skinned( + &self, + render_device: &RenderDevice, + model: &BindingResource, + current_skin: &Buffer, + ) -> BindGroup { + render_device.create_bind_group( + "skinned_mesh_bind_group", + &self.skinned, + &[ + entry::model(0, model.clone()), + entry::skinning(render_device, 1, current_skin), + ], + ) + } + + /// Creates the bind group for skinned meshes with no morph targets, with + /// the infrastructure to compute motion vectors. + /// + /// `current_skin` is the buffer of joint matrices for this frame; + /// `prev_skin` is the buffer for the previous frame. The latter is used for + /// motion vector computation. If there is no such applicable buffer, + /// `current_skin` and `prev_skin` will reference the same buffer. + pub fn skinned_motion( + &self, + render_device: &RenderDevice, + model: &BindingResource, + current_skin: &Buffer, + prev_skin: &Buffer, + ) -> BindGroup { + render_device.create_bind_group( + "skinned_motion_mesh_bind_group", + &self.skinned_motion, + &[ + entry::model(0, model.clone()), + entry::skinning(render_device, 1, current_skin), + entry::skinning(render_device, 6, prev_skin), + ], + ) + } + + /// Creates the bind group for meshes with no skins but morph targets. + pub fn morphed( + &self, + render_device: &RenderDevice, + model: &BindingResource, + current_weights: &Buffer, + targets: &TextureView, + ) -> BindGroup { + render_device.create_bind_group( + "morphed_mesh_bind_group", + &self.morphed, + &[ + entry::model(0, model.clone()), + entry::weights(2, current_weights), + entry::targets(3, targets), + ], + ) + } + + /// Creates the bind group for meshes with no skins but morph targets, in + /// addition to the infrastructure to compute motion vectors. + /// + /// `current_weights` is the buffer of morph weights for this frame; + /// `prev_weights` is the buffer for the previous frame. The latter is used + /// for motion vector computation. If there is no such applicable buffer, + /// `current_weights` and `prev_weights` will reference the same buffer. + pub fn morphed_motion( + &self, + render_device: &RenderDevice, + model: &BindingResource, + current_weights: &Buffer, + targets: &TextureView, + prev_weights: &Buffer, + ) -> BindGroup { + render_device.create_bind_group( + "morphed_motion_mesh_bind_group", + &self.morphed_motion, + &[ + entry::model(0, model.clone()), + entry::weights(2, current_weights), + entry::targets(3, targets), + entry::weights(7, prev_weights), + ], + ) + } + + /// Creates the bind group for meshes with skins and morph targets. + pub fn morphed_skinned( + &self, + render_device: &RenderDevice, + model: &BindingResource, + current_skin: &Buffer, + current_weights: &Buffer, + targets: &TextureView, + ) -> BindGroup { + render_device.create_bind_group( + "morphed_skinned_mesh_bind_group", + &self.morphed_skinned, + &[ + entry::model(0, model.clone()), + entry::skinning(render_device, 1, current_skin), + entry::weights(2, current_weights), + entry::targets(3, targets), + ], + ) + } + + /// Creates the bind group for meshes with skins and morph targets, in + /// addition to the infrastructure to compute motion vectors. + /// + /// See the documentation for [`MeshLayouts::skinned_motion`] and + /// [`MeshLayouts::morphed_motion`] above for more information about the + /// `current_skin`, `prev_skin`, `current_weights`, and `prev_weights` + /// buffers. + pub fn morphed_skinned_motion( + &self, + render_device: &RenderDevice, + model: &BindingResource, + current_skin: &Buffer, + current_weights: &Buffer, + targets: &TextureView, + prev_skin: &Buffer, + prev_weights: &Buffer, + ) -> BindGroup { + render_device.create_bind_group( + "morphed_skinned_motion_mesh_bind_group", + &self.morphed_skinned_motion, + &[ + entry::model(0, model.clone()), + entry::skinning(render_device, 1, current_skin), + entry::weights(2, current_weights), + entry::targets(3, targets), + entry::skinning(render_device, 6, prev_skin), + entry::weights(7, prev_weights), + ], + ) + } +} diff --git a/crates/libmarathon/src/render/pbr/render/mesh_bindings.wgsl b/crates/libmarathon/src/render/pbr/render/mesh_bindings.wgsl new file mode 100644 index 0000000..6e78dc4 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/mesh_bindings.wgsl @@ -0,0 +1,11 @@ +#define_import_path bevy_pbr::mesh_bindings + +#import bevy_pbr::mesh_types::Mesh + +#ifndef MESHLET_MESH_MATERIAL_PASS +#ifdef PER_OBJECT_BUFFER_BATCH_SIZE +@group(2) @binding(0) var mesh: array; +#else +@group(2) @binding(0) var mesh: array; +#endif // PER_OBJECT_BUFFER_BATCH_SIZE +#endif // MESHLET_MESH_MATERIAL_PASS diff --git a/crates/libmarathon/src/render/pbr/render/mesh_functions.wgsl b/crates/libmarathon/src/render/pbr/render/mesh_functions.wgsl new file mode 100644 index 0000000..6d4c53a --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/mesh_functions.wgsl @@ -0,0 +1,168 @@ +#define_import_path bevy_pbr::mesh_functions + +#import bevy_pbr::{ + mesh_view_bindings::{ + view, + visibility_ranges, + VISIBILITY_RANGE_UNIFORM_BUFFER_SIZE + }, + mesh_bindings::mesh, + mesh_types::MESH_FLAGS_SIGN_DETERMINANT_MODEL_3X3_BIT, + view_transformations::position_world_to_clip, +} +#import bevy_render::maths::{affine3_to_square, mat2x4_f32_to_mat3x3_unpack} + +#ifndef MESHLET_MESH_MATERIAL_PASS + +fn get_world_from_local(instance_index: u32) -> mat4x4 { + return affine3_to_square(mesh[instance_index].world_from_local); +} + +fn get_previous_world_from_local(instance_index: u32) -> mat4x4 { + return affine3_to_square(mesh[instance_index].previous_world_from_local); +} + +fn get_local_from_world(instance_index: u32) -> mat4x4 { + // the model matrix is translation * rotation * scale + // the inverse is then scale^-1 * rotation ^-1 * translation^-1 + // the 3x3 matrix only contains the information for the rotation and scale + let inverse_model_3x3 = transpose(mat2x4_f32_to_mat3x3_unpack( + mesh[instance_index].local_from_world_transpose_a, + mesh[instance_index].local_from_world_transpose_b, + )); + // construct scale^-1 * rotation^-1 from the 3x3 + let inverse_model_4x4_no_trans = mat4x4( + vec4(inverse_model_3x3[0], 0.0), + vec4(inverse_model_3x3[1], 0.0), + vec4(inverse_model_3x3[2], 0.0), + vec4(0.0,0.0,0.0,1.0) + ); + // we can get translation^-1 by negating the translation of the model + let model = get_world_from_local(instance_index); + let inverse_model_4x4_only_trans = mat4x4( + vec4(1.0,0.0,0.0,0.0), + vec4(0.0,1.0,0.0,0.0), + vec4(0.0,0.0,1.0,0.0), + vec4(-model[3].xyz, 1.0) + ); + + return inverse_model_4x4_no_trans * inverse_model_4x4_only_trans; +} + +#endif // MESHLET_MESH_MATERIAL_PASS + +fn mesh_position_local_to_world(world_from_local: mat4x4, vertex_position: vec4) -> vec4 { + return world_from_local * vertex_position; +} + +// NOTE: The intermediate world_position assignment is important +// for precision purposes when using the 'equals' depth comparison +// function. +fn mesh_position_local_to_clip(world_from_local: mat4x4, vertex_position: vec4) -> vec4 { + let world_position = mesh_position_local_to_world(world_from_local, vertex_position); + return position_world_to_clip(world_position.xyz); +} + +#ifndef MESHLET_MESH_MATERIAL_PASS + +fn mesh_normal_local_to_world(vertex_normal: vec3, instance_index: u32) -> vec3 { + // NOTE: The mikktspace method of normal mapping requires that the world normal is + // re-normalized in the vertex shader to match the way mikktspace bakes vertex tangents + // and normal maps so that the exact inverse process is applied when shading. Blender, Unity, + // Unreal Engine, Godot, and more all use the mikktspace method. + // We only skip normalization for invalid normals so that they don't become NaN. + // Do not change this code unless you really know what you are doing. + // http://www.mikktspace.com/ + if any(vertex_normal != vec3(0.0)) { + return normalize( + mat2x4_f32_to_mat3x3_unpack( + mesh[instance_index].local_from_world_transpose_a, + mesh[instance_index].local_from_world_transpose_b, + ) * vertex_normal + ); + } else { + return vertex_normal; + } +} + +#endif // MESHLET_MESH_MATERIAL_PASS + +// Calculates the sign of the determinant of the 3x3 model matrix based on a +// mesh flag +fn sign_determinant_model_3x3m(mesh_flags: u32) -> f32 { + // bool(u32) is false if 0u else true + // f32(bool) is 1.0 if true else 0.0 + // * 2.0 - 1.0 remaps 0.0 or 1.0 to -1.0 or 1.0 respectively + return f32(bool(mesh_flags & MESH_FLAGS_SIGN_DETERMINANT_MODEL_3X3_BIT)) * 2.0 - 1.0; +} + +#ifndef MESHLET_MESH_MATERIAL_PASS + +fn mesh_tangent_local_to_world(world_from_local: mat4x4, vertex_tangent: vec4, instance_index: u32) -> vec4 { + // NOTE: The mikktspace method of normal mapping requires that the world tangent is + // re-normalized in the vertex shader to match the way mikktspace bakes vertex tangents + // and normal maps so that the exact inverse process is applied when shading. Blender, Unity, + // Unreal Engine, Godot, and more all use the mikktspace method. + // We only skip normalization for invalid tangents so that they don't become NaN. + // Do not change this code unless you really know what you are doing. + // http://www.mikktspace.com/ + if any(vertex_tangent != vec4(0.0)) { + return vec4( + normalize( + mat3x3( + world_from_local[0].xyz, + world_from_local[1].xyz, + world_from_local[2].xyz, + ) * vertex_tangent.xyz + ), + // NOTE: Multiplying by the sign of the determinant of the 3x3 model matrix accounts for + // situations such as negative scaling. + vertex_tangent.w * sign_determinant_model_3x3m(mesh[instance_index].flags) + ); + } else { + return vertex_tangent; + } +} + +#endif // MESHLET_MESH_MATERIAL_PASS + +// Returns an appropriate dither level for the current mesh instance. +// +// This looks up the LOD range in the `visibility_ranges` table and compares the +// camera distance to determine the dithering level. +#ifdef VISIBILITY_RANGE_DITHER +fn get_visibility_range_dither_level(instance_index: u32, world_position: vec4) -> i32 { +#if AVAILABLE_STORAGE_BUFFER_BINDINGS >= 6 + // If we're using a storage buffer, then the length is variable. + let visibility_buffer_array_len = arrayLength(&visibility_ranges); +#else // AVAILABLE_STORAGE_BUFFER_BINDINGS >= 6 + // If we're using a uniform buffer, then the length is constant + let visibility_buffer_array_len = VISIBILITY_RANGE_UNIFORM_BUFFER_SIZE; +#endif // AVAILABLE_STORAGE_BUFFER_BINDINGS >= 6 + + let visibility_buffer_index = mesh[instance_index].flags & 0xffffu; + if (visibility_buffer_index > visibility_buffer_array_len) { + return -16; + } + + let lod_range = visibility_ranges[visibility_buffer_index]; + let camera_distance = length(view.world_position.xyz - world_position.xyz); + + // This encodes the following mapping: + // + // `lod_range.` x y z w camera distance + // ←───────┼────────┼────────┼────────┼────────→ + // LOD level -16 -16 0 0 16 16 LOD level + let offset = select(-16, 0, camera_distance >= lod_range.z); + let bounds = select(lod_range.xy, lod_range.zw, camera_distance >= lod_range.z); + let level = i32(round((camera_distance - bounds.x) / (bounds.y - bounds.x) * 16.0)); + return offset + clamp(level, 0, 16); +} +#endif + + +#ifndef MESHLET_MESH_MATERIAL_PASS +fn get_tag(instance_index: u32) -> u32 { + return mesh[instance_index].tag; +} +#endif diff --git a/crates/libmarathon/src/render/pbr/render/mesh_preprocess.wgsl b/crates/libmarathon/src/render/pbr/render/mesh_preprocess.wgsl new file mode 100644 index 0000000..543b328 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/mesh_preprocess.wgsl @@ -0,0 +1,373 @@ +// GPU mesh transforming and culling. +// +// This is a compute shader that expands each `MeshInputUniform` out to a full +// `MeshUniform` for each view before rendering. (Thus `MeshInputUniform` and +// `MeshUniform` are in a 1:N relationship.) It runs in parallel for all meshes +// for all views. As part of this process, the shader gathers each mesh's +// transform on the previous frame and writes it into the `MeshUniform` so that +// TAA works. It also performs frustum culling and occlusion culling, if +// requested. +// +// If occlusion culling is on, this shader runs twice: once to prepare the +// meshes that were visible last frame, and once to prepare the meshes that +// weren't visible last frame but became visible this frame. The two invocations +// are known as *early mesh preprocessing* and *late mesh preprocessing* +// respectively. + +#import bevy_pbr::mesh_preprocess_types::{ + IndirectParametersCpuMetadata, IndirectParametersGpuMetadata, MeshInput +} +#import bevy_pbr::mesh_types::{Mesh, MESH_FLAGS_NO_FRUSTUM_CULLING_BIT} +#import bevy_pbr::mesh_view_bindings::view +#import bevy_pbr::occlusion_culling +#import bevy_pbr::prepass_bindings::previous_view_uniforms +#import bevy_pbr::view_transformations::{ + position_world_to_ndc, position_world_to_view, ndc_to_uv, view_z_to_depth_ndc, + position_world_to_prev_ndc, position_world_to_prev_view, prev_view_z_to_depth_ndc +} +#import bevy_render::maths +#import bevy_render::view::View + +// Information about each mesh instance needed to cull it on GPU. +// +// At the moment, this just consists of its axis-aligned bounding box (AABB). +struct MeshCullingData { + // The 3D center of the AABB in model space, padded with an extra unused + // float value. + aabb_center: vec4, + // The 3D extents of the AABB in model space, divided by two, padded with + // an extra unused float value. + aabb_half_extents: vec4, +} + +// One invocation of this compute shader: i.e. one mesh instance in a view. +struct PreprocessWorkItem { + // The index of the `MeshInput` in the `current_input` buffer that we read + // from. + input_index: u32, + // In direct mode, the index of the `Mesh` in `output` that we write to. In + // indirect mode, the index of the `IndirectParameters` in + // `indirect_parameters` that we write to. + output_or_indirect_parameters_index: u32, +} + +// The parameters for the indirect compute dispatch for the late mesh +// preprocessing phase. +struct LatePreprocessWorkItemIndirectParameters { + // The number of workgroups we're going to dispatch. + // + // This value should always be equal to `ceil(work_item_count / 64)`. + dispatch_x: atomic, + // The number of workgroups in the Y direction; always 1. + dispatch_y: u32, + // The number of workgroups in the Z direction; always 1. + dispatch_z: u32, + // The precise number of work items. + work_item_count: atomic, + // Padding. + // + // This isn't the usual structure padding; it's needed because some hardware + // requires indirect compute dispatch parameters to be aligned on 64-byte + // boundaries. + pad: vec4, +} + +// These have to be in a structure because of Naga limitations on DX12. +struct PushConstants { + // The offset into the `late_preprocess_work_item_indirect_parameters` + // buffer. + late_preprocess_work_item_indirect_offset: u32, +} + +// The current frame's `MeshInput`. +@group(0) @binding(3) var current_input: array; +// The `MeshInput` values from the previous frame. +@group(0) @binding(4) var previous_input: array; +// Indices into the `MeshInput` buffer. +// +// There may be many indices that map to the same `MeshInput`. +@group(0) @binding(5) var work_items: array; +// The output array of `Mesh`es. +@group(0) @binding(6) var output: array; + +#ifdef INDIRECT +// The array of indirect parameters for drawcalls. +@group(0) @binding(7) var indirect_parameters_cpu_metadata: + array; + +@group(0) @binding(8) var indirect_parameters_gpu_metadata: + array; +#endif + +#ifdef FRUSTUM_CULLING +// Data needed to cull the meshes. +// +// At the moment, this consists only of AABBs. +@group(0) @binding(9) var mesh_culling_data: array; +#endif // FRUSTUM_CULLING + +#ifdef OCCLUSION_CULLING +@group(0) @binding(10) var depth_pyramid: texture_2d; + +#ifdef EARLY_PHASE +@group(0) @binding(11) var late_preprocess_work_items: + array; +#endif // EARLY_PHASE + +@group(0) @binding(12) var late_preprocess_work_item_indirect_parameters: + array; + +var push_constants: PushConstants; +#endif // OCCLUSION_CULLING + +#ifdef FRUSTUM_CULLING +// Returns true if the view frustum intersects an oriented bounding box (OBB). +// +// `aabb_center.w` should be 1.0. +fn view_frustum_intersects_obb( + world_from_local: mat4x4, + aabb_center: vec4, + aabb_half_extents: vec3, +) -> bool { + + for (var i = 0; i < 5; i += 1) { + // Calculate relative radius of the sphere associated with this plane. + let plane_normal = view.frustum[i]; + let relative_radius = dot( + abs( + vec3( + dot(plane_normal.xyz, world_from_local[0].xyz), + dot(plane_normal.xyz, world_from_local[1].xyz), + dot(plane_normal.xyz, world_from_local[2].xyz), + ) + ), + aabb_half_extents + ); + + // Check the frustum plane. + if (!maths::sphere_intersects_plane_half_space( + plane_normal, aabb_center, relative_radius)) { + return false; + } + } + + return true; +} +#endif + +@compute +@workgroup_size(64) +fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { + // Figure out our instance index. If this thread doesn't correspond to any + // index, bail. + let instance_index = global_invocation_id.x; + +#ifdef LATE_PHASE + if (instance_index >= atomicLoad(&late_preprocess_work_item_indirect_parameters[ + push_constants.late_preprocess_work_item_indirect_offset].work_item_count)) { + return; + } +#else // LATE_PHASE + if (instance_index >= arrayLength(&work_items)) { + return; + } +#endif + + // Unpack the work item. + let input_index = work_items[instance_index].input_index; +#ifdef INDIRECT + let indirect_parameters_index = work_items[instance_index].output_or_indirect_parameters_index; + + // If we're the first mesh instance in this batch, write the index of our + // `MeshInput` into the appropriate slot so that the indirect parameters + // building shader can access it. +#ifndef LATE_PHASE + if (instance_index == 0u || work_items[instance_index - 1].output_or_indirect_parameters_index != indirect_parameters_index) { + indirect_parameters_gpu_metadata[indirect_parameters_index].mesh_index = input_index; + } +#endif // LATE_PHASE + +#else // INDIRECT + let mesh_output_index = work_items[instance_index].output_or_indirect_parameters_index; +#endif // INDIRECT + + // Unpack the input matrix. + let world_from_local_affine_transpose = current_input[input_index].world_from_local; + let world_from_local = maths::affine3_to_square(world_from_local_affine_transpose); + + // Frustum cull if necessary. +#ifdef FRUSTUM_CULLING + if ((current_input[input_index].flags & MESH_FLAGS_NO_FRUSTUM_CULLING_BIT) == 0u) { + let aabb_center = mesh_culling_data[input_index].aabb_center.xyz; + let aabb_half_extents = mesh_culling_data[input_index].aabb_half_extents.xyz; + + // Do an OBB-based frustum cull. + let model_center = world_from_local * vec4(aabb_center, 1.0); + if (!view_frustum_intersects_obb(world_from_local, model_center, aabb_half_extents)) { + return; + } + } +#endif + + // See whether the `MeshInputUniform` was updated on this frame. If it + // wasn't, then we know the transforms of this mesh must be identical to + // those on the previous frame, and therefore we don't need to access the + // `previous_input_index` (in fact, we can't; that index are only valid for + // one frame and will be invalid). + let timestamp = current_input[input_index].timestamp; + let mesh_changed_this_frame = timestamp == view.frame_count; + + // Look up the previous model matrix, if it could have been. + let previous_input_index = current_input[input_index].previous_input_index; + var previous_world_from_local_affine_transpose: mat3x4; + if (mesh_changed_this_frame && previous_input_index != 0xffffffffu) { + previous_world_from_local_affine_transpose = + previous_input[previous_input_index].world_from_local; + } else { + previous_world_from_local_affine_transpose = world_from_local_affine_transpose; + } + let previous_world_from_local = + maths::affine3_to_square(previous_world_from_local_affine_transpose); + + // Occlusion cull if necessary. This is done by calculating the screen-space + // axis-aligned bounding box (AABB) of the mesh and testing it against the + // appropriate level of the depth pyramid (a.k.a. hierarchical Z-buffer). If + // no part of the AABB is in front of the corresponding pixel quad in the + // hierarchical Z-buffer, then this mesh must be occluded, and we can skip + // rendering it. +#ifdef OCCLUSION_CULLING + let aabb_center = mesh_culling_data[input_index].aabb_center.xyz; + let aabb_half_extents = mesh_culling_data[input_index].aabb_half_extents.xyz; + + // Initialize the AABB and the maximum depth. + let infinity = bitcast(0x7f800000u); + let neg_infinity = bitcast(0xff800000u); + var aabb = vec4(infinity, infinity, neg_infinity, neg_infinity); + var max_depth_view = neg_infinity; + + // Build up the AABB by taking each corner of this mesh's OBB, transforming + // it, and updating the AABB and depth accordingly. + for (var i = 0u; i < 8u; i += 1u) { + let local_pos = aabb_center + select( + vec3(-1.0), + vec3(1.0), + vec3((i & 1) != 0, (i & 2) != 0, (i & 4) != 0) + ) * aabb_half_extents; + +#ifdef EARLY_PHASE + // If we're in the early phase, we're testing against the last frame's + // depth buffer, so we need to use the previous frame's transform. + let prev_world_pos = (previous_world_from_local * vec4(local_pos, 1.0)).xyz; + let view_pos = position_world_to_prev_view(prev_world_pos); + let ndc_pos = position_world_to_prev_ndc(prev_world_pos); +#else // EARLY_PHASE + // Otherwise, if this is the late phase, we use the current frame's + // transform. + let world_pos = (world_from_local * vec4(local_pos, 1.0)).xyz; + let view_pos = position_world_to_view(world_pos); + let ndc_pos = position_world_to_ndc(world_pos); +#endif // EARLY_PHASE + + let uv_pos = ndc_to_uv(ndc_pos.xy); + + // Update the AABB and maximum view-space depth. + aabb = vec4(min(aabb.xy, uv_pos), max(aabb.zw, uv_pos)); + max_depth_view = max(max_depth_view, view_pos.z); + } + + // Clip to the near plane to avoid the NDC depth becoming negative. +#ifdef EARLY_PHASE + max_depth_view = min(-previous_view_uniforms.clip_from_view[3][2], max_depth_view); +#else // EARLY_PHASE + max_depth_view = min(-view.clip_from_view[3][2], max_depth_view); +#endif // EARLY_PHASE + + // Figure out the depth of the occluder, and compare it to our own depth. + + let aabb_pixel_size = occlusion_culling::get_aabb_size_in_pixels(aabb, depth_pyramid); + let occluder_depth_ndc = + occlusion_culling::get_occluder_depth(aabb, aabb_pixel_size, depth_pyramid); + +#ifdef EARLY_PHASE + let max_depth_ndc = prev_view_z_to_depth_ndc(max_depth_view); +#else // EARLY_PHASE + let max_depth_ndc = view_z_to_depth_ndc(max_depth_view); +#endif + + // Are we culled out? + if (max_depth_ndc < occluder_depth_ndc) { +#ifdef EARLY_PHASE + // If this is the early phase, we need to make a note of this mesh so + // that we examine it again in the late phase, so that we handle the + // case in which a mesh that was invisible last frame became visible in + // this frame. + let output_work_item_index = atomicAdd(&late_preprocess_work_item_indirect_parameters[ + push_constants.late_preprocess_work_item_indirect_offset].work_item_count, 1u); + if (output_work_item_index % 64u == 0u) { + // Our workgroup size is 64, and the indirect parameters for the + // late mesh preprocessing phase are counted in workgroups, so if + // we're the first thread in this workgroup, bump the workgroup + // count. + atomicAdd(&late_preprocess_work_item_indirect_parameters[ + push_constants.late_preprocess_work_item_indirect_offset].dispatch_x, 1u); + } + + // Enqueue a work item for the late prepass phase. + late_preprocess_work_items[output_work_item_index].input_index = input_index; + late_preprocess_work_items[output_work_item_index].output_or_indirect_parameters_index = + indirect_parameters_index; +#endif // EARLY_PHASE + // This mesh is culled. Skip it. + return; + } +#endif // OCCLUSION_CULLING + + // Calculate inverse transpose. + let local_from_world_transpose = transpose(maths::inverse_affine3(transpose( + world_from_local_affine_transpose))); + + // Pack inverse transpose. + let local_from_world_transpose_a = mat2x4( + vec4(local_from_world_transpose[0].xyz, local_from_world_transpose[1].x), + vec4(local_from_world_transpose[1].yz, local_from_world_transpose[2].xy)); + let local_from_world_transpose_b = local_from_world_transpose[2].z; + + // Figure out the output index. In indirect mode, this involves bumping the + // instance index in the indirect parameters metadata, which + // `build_indirect_params.wgsl` will use to generate the actual indirect + // parameters. Otherwise, this index was directly supplied to us. +#ifdef INDIRECT +#ifdef LATE_PHASE + let batch_output_index = atomicLoad( + &indirect_parameters_gpu_metadata[indirect_parameters_index].early_instance_count + ) + atomicAdd( + &indirect_parameters_gpu_metadata[indirect_parameters_index].late_instance_count, + 1u + ); +#else // LATE_PHASE + let batch_output_index = atomicAdd( + &indirect_parameters_gpu_metadata[indirect_parameters_index].early_instance_count, + 1u + ); +#endif // LATE_PHASE + + let mesh_output_index = + indirect_parameters_cpu_metadata[indirect_parameters_index].base_output_index + + batch_output_index; + +#endif // INDIRECT + + // Write the output. + output[mesh_output_index].world_from_local = world_from_local_affine_transpose; + output[mesh_output_index].previous_world_from_local = + previous_world_from_local_affine_transpose; + output[mesh_output_index].local_from_world_transpose_a = local_from_world_transpose_a; + output[mesh_output_index].local_from_world_transpose_b = local_from_world_transpose_b; + output[mesh_output_index].flags = current_input[input_index].flags; + output[mesh_output_index].lightmap_uv_rect = current_input[input_index].lightmap_uv_rect; + output[mesh_output_index].first_vertex_index = current_input[input_index].first_vertex_index; + output[mesh_output_index].current_skin_index = current_input[input_index].current_skin_index; + output[mesh_output_index].material_and_lightmap_bind_group_slot = + current_input[input_index].material_and_lightmap_bind_group_slot; + output[mesh_output_index].tag = current_input[input_index].tag; +} diff --git a/crates/libmarathon/src/render/pbr/render/mesh_types.wgsl b/crates/libmarathon/src/render/pbr/render/mesh_types.wgsl new file mode 100644 index 0000000..4c85192 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/mesh_types.wgsl @@ -0,0 +1,47 @@ +#define_import_path bevy_pbr::mesh_types + +struct Mesh { + // Affine 4x3 matrices transposed to 3x4 + // Use bevy_render::maths::affine3_to_square to unpack + world_from_local: mat3x4, + previous_world_from_local: mat3x4, + // 3x3 matrix packed in mat2x4 and f32 as: + // [0].xyz, [1].x, + // [1].yz, [2].xy + // [2].z + // Use bevy_pbr::mesh_functions::mat2x4_f32_to_mat3x3_unpack to unpack + local_from_world_transpose_a: mat2x4, + local_from_world_transpose_b: f32, + // 'flags' is a bit field indicating various options. u32 is 32 bits so we have up to 32 options. + flags: u32, + lightmap_uv_rect: vec2, + // The index of the mesh's first vertex in the vertex buffer. + first_vertex_index: u32, + current_skin_index: u32, + // Low 16 bits: index of the material inside the bind group data. + // High 16 bits: index of the lightmap in the binding array. + material_and_lightmap_bind_group_slot: u32, + // User supplied index to identify the mesh instance + tag: u32, + pad: u32, +}; + +#ifdef SKINNED +struct SkinnedMesh { + data: array, 256u>, +}; +#endif + +#ifdef MORPH_TARGETS +struct MorphWeights { + weights: array, 16u>, // 16 = 64 / 4 (64 = MAX_MORPH_WEIGHTS) +}; +#endif + +// [2^0, 2^16) +const MESH_FLAGS_VISIBILITY_RANGE_INDEX_BITS: u32 = (1u << 16u) - 1u; +const MESH_FLAGS_NO_FRUSTUM_CULLING_BIT: u32 = 1u << 28u; +const MESH_FLAGS_SHADOW_RECEIVER_BIT: u32 = 1u << 29u; +const MESH_FLAGS_TRANSMITTED_SHADOW_RECEIVER_BIT: u32 = 1u << 30u; +// if the flag is set, the sign is positive, else it is negative +const MESH_FLAGS_SIGN_DETERMINANT_MODEL_3X3_BIT: u32 = 1u << 31u; diff --git a/crates/libmarathon/src/render/pbr/render/mesh_view_bindings.rs b/crates/libmarathon/src/render/pbr/render/mesh_view_bindings.rs new file mode 100644 index 0000000..e9e990f --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/mesh_view_bindings.rs @@ -0,0 +1,818 @@ +use std::sync::Arc; +use crate::render::{ + core_3d::ViewTransmissionTexture, + oit::{resolve::is_oit_supported, OitBuffers, OrderIndependentTransparencySettings}, + prepass::ViewPrepassTextures, + tonemapping::{ + get_lut_bind_group_layout_entries, get_lut_bindings, Tonemapping, TonemappingLuts, + }, +}; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::{ + component::Component, + entity::Entity, + query::Has, + resource::Resource, + system::{Commands, Query, Res}, + world::{FromWorld, World}, +}; +use bevy_image::BevyDefault as _; +use bevy_light::{EnvironmentMapLight, IrradianceVolume}; +use bevy_math::Vec4; +use crate::render::{ + globals::{GlobalsBuffer, GlobalsUniform}, + render_asset::RenderAssets, + render_resource::{binding_types::*, *}, + renderer::{RenderAdapter, RenderDevice}, + texture::{FallbackImage, FallbackImageMsaa, FallbackImageZero, GpuImage}, + view::{ + Msaa, RenderVisibilityRanges, ViewUniform, ViewUniforms, + VISIBILITY_RANGES_STORAGE_BUFFER_COUNT, + }, +}; +use core::{array, num::NonZero}; + +use crate::render::pbr::{ + decal::{ + self, + clustered::{ + DecalsBuffer, RenderClusteredDecals, RenderViewClusteredDecalBindGroupEntries, + }, + }, + environment_map::{self, RenderViewEnvironmentMapBindGroupEntries}, + irradiance_volume::{ + self, RenderViewIrradianceVolumeBindGroupEntries, IRRADIANCE_VOLUMES_ARE_USABLE, + }, + prepass, EnvironmentMapUniformBuffer, FogMeta, GlobalClusterableObjectMeta, + GpuClusterableObjects, GpuFog, GpuLights, LightMeta, LightProbesBuffer, LightProbesUniform, + MeshPipeline, MeshPipelineKey, RenderViewLightProbes, ScreenSpaceAmbientOcclusionResources, + ScreenSpaceReflectionsBuffer, ScreenSpaceReflectionsUniform, ShadowSamplers, + ViewClusterBindings, ViewShadowBindings, CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT, +}; + +#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] +use crate::render::render_resource::binding_types::texture_cube; + +#[cfg(debug_assertions)] +use {crate::render::pbr::MESH_PIPELINE_VIEW_LAYOUT_SAFE_MAX_TEXTURES, bevy_utils::once, tracing::warn}; + +#[derive(Clone)] +pub struct MeshPipelineViewLayout { + pub main_layout: BindGroupLayout, + pub binding_array_layout: BindGroupLayout, + pub empty_layout: BindGroupLayout, + + #[cfg(debug_assertions)] + pub texture_count: usize, +} + +bitflags::bitflags! { + /// A key that uniquely identifies a [`MeshPipelineViewLayout`]. + /// + /// Used to generate all possible layouts for the mesh pipeline in [`generate_view_layouts`], + /// so special care must be taken to not add too many flags, as the number of possible layouts + /// will grow exponentially. + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] + #[repr(transparent)] + pub struct MeshPipelineViewLayoutKey: u32 { + const MULTISAMPLED = 1 << 0; + const DEPTH_PREPASS = 1 << 1; + const NORMAL_PREPASS = 1 << 2; + const MOTION_VECTOR_PREPASS = 1 << 3; + const DEFERRED_PREPASS = 1 << 4; + const OIT_ENABLED = 1 << 5; + } +} + +impl MeshPipelineViewLayoutKey { + // The number of possible layouts + pub const COUNT: usize = Self::all().bits() as usize + 1; + + /// Builds a unique label for each layout based on the flags + pub fn label(&self) -> String { + use MeshPipelineViewLayoutKey as Key; + + format!( + "mesh_view_layout{}{}{}{}{}{}", + if self.contains(Key::MULTISAMPLED) { + "_multisampled" + } else { + Default::default() + }, + if self.contains(Key::DEPTH_PREPASS) { + "_depth" + } else { + Default::default() + }, + if self.contains(Key::NORMAL_PREPASS) { + "_normal" + } else { + Default::default() + }, + if self.contains(Key::MOTION_VECTOR_PREPASS) { + "_motion" + } else { + Default::default() + }, + if self.contains(Key::DEFERRED_PREPASS) { + "_deferred" + } else { + Default::default() + }, + if self.contains(Key::OIT_ENABLED) { + "_oit" + } else { + Default::default() + }, + ) + } +} + +impl From for MeshPipelineViewLayoutKey { + fn from(value: MeshPipelineKey) -> Self { + let mut result = MeshPipelineViewLayoutKey::empty(); + + if value.msaa_samples() > 1 { + result |= MeshPipelineViewLayoutKey::MULTISAMPLED; + } + if value.contains(MeshPipelineKey::DEPTH_PREPASS) { + result |= MeshPipelineViewLayoutKey::DEPTH_PREPASS; + } + if value.contains(MeshPipelineKey::NORMAL_PREPASS) { + result |= MeshPipelineViewLayoutKey::NORMAL_PREPASS; + } + if value.contains(MeshPipelineKey::MOTION_VECTOR_PREPASS) { + result |= MeshPipelineViewLayoutKey::MOTION_VECTOR_PREPASS; + } + if value.contains(MeshPipelineKey::DEFERRED_PREPASS) { + result |= MeshPipelineViewLayoutKey::DEFERRED_PREPASS; + } + if value.contains(MeshPipelineKey::OIT_ENABLED) { + result |= MeshPipelineViewLayoutKey::OIT_ENABLED; + } + + result + } +} + +impl From for MeshPipelineViewLayoutKey { + fn from(value: Msaa) -> Self { + let mut result = MeshPipelineViewLayoutKey::empty(); + + if value.samples() > 1 { + result |= MeshPipelineViewLayoutKey::MULTISAMPLED; + } + + result + } +} + +impl From> for MeshPipelineViewLayoutKey { + fn from(value: Option<&ViewPrepassTextures>) -> Self { + let mut result = MeshPipelineViewLayoutKey::empty(); + + if let Some(prepass_textures) = value { + if prepass_textures.depth.is_some() { + result |= MeshPipelineViewLayoutKey::DEPTH_PREPASS; + } + if prepass_textures.normal.is_some() { + result |= MeshPipelineViewLayoutKey::NORMAL_PREPASS; + } + if prepass_textures.motion_vectors.is_some() { + result |= MeshPipelineViewLayoutKey::MOTION_VECTOR_PREPASS; + } + if prepass_textures.deferred.is_some() { + result |= MeshPipelineViewLayoutKey::DEFERRED_PREPASS; + } + } + + result + } +} + +pub(crate) fn buffer_layout( + buffer_binding_type: BufferBindingType, + has_dynamic_offset: bool, + min_binding_size: Option>, +) -> BindGroupLayoutEntryBuilder { + match buffer_binding_type { + BufferBindingType::Uniform => uniform_buffer_sized(has_dynamic_offset, min_binding_size), + BufferBindingType::Storage { read_only } => { + if read_only { + storage_buffer_read_only_sized(has_dynamic_offset, min_binding_size) + } else { + storage_buffer_sized(has_dynamic_offset, min_binding_size) + } + } + } +} + +/// Returns the appropriate bind group layout vec based on the parameters +fn layout_entries( + clustered_forward_buffer_binding_type: BufferBindingType, + visibility_ranges_buffer_binding_type: BufferBindingType, + layout_key: MeshPipelineViewLayoutKey, + render_device: &RenderDevice, + render_adapter: &RenderAdapter, +) -> [Vec; 2] { + // EnvironmentMapLight + let environment_map_entries = + environment_map::get_bind_group_layout_entries(render_device, render_adapter); + + let mut entries = DynamicBindGroupLayoutEntries::new_with_indices( + ShaderStages::FRAGMENT, + ( + // View + ( + 0, + uniform_buffer::(true).visibility(ShaderStages::VERTEX_FRAGMENT), + ), + // Lights + (1, uniform_buffer::(true)), + // Point Shadow Texture Cube Array + ( + 2, + #[cfg(all( + not(target_abi = "sim"), + any( + not(feature = "webgl"), + not(target_arch = "wasm32"), + feature = "webgpu" + ) + ))] + texture_cube_array(TextureSampleType::Depth), + #[cfg(any( + target_abi = "sim", + all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")) + ))] + texture_cube(TextureSampleType::Depth), + ), + // Point Shadow Texture Array Comparison Sampler + (3, sampler(SamplerBindingType::Comparison)), + // Point Shadow Texture Array Linear Sampler + #[cfg(feature = "experimental_pbr_pcss")] + (4, sampler(SamplerBindingType::Filtering)), + // Directional Shadow Texture Array + ( + 5, + #[cfg(any( + not(feature = "webgl"), + not(target_arch = "wasm32"), + feature = "webgpu" + ))] + texture_2d_array(TextureSampleType::Depth), + #[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] + texture_2d(TextureSampleType::Depth), + ), + // Directional Shadow Texture Array Comparison Sampler + (6, sampler(SamplerBindingType::Comparison)), + // Directional Shadow Texture Array Linear Sampler + #[cfg(feature = "experimental_pbr_pcss")] + (7, sampler(SamplerBindingType::Filtering)), + // PointLights + ( + 8, + buffer_layout( + clustered_forward_buffer_binding_type, + false, + Some(GpuClusterableObjects::min_size( + clustered_forward_buffer_binding_type, + )), + ), + ), + // ClusteredLightIndexLists + ( + 9, + buffer_layout( + clustered_forward_buffer_binding_type, + false, + Some( + ViewClusterBindings::min_size_clusterable_object_index_lists( + clustered_forward_buffer_binding_type, + ), + ), + ), + ), + // ClusterOffsetsAndCounts + ( + 10, + buffer_layout( + clustered_forward_buffer_binding_type, + false, + Some(ViewClusterBindings::min_size_cluster_offsets_and_counts( + clustered_forward_buffer_binding_type, + )), + ), + ), + // Globals + ( + 11, + uniform_buffer::(false).visibility(ShaderStages::VERTEX_FRAGMENT), + ), + // Fog + (12, uniform_buffer::(true)), + // Light probes + (13, uniform_buffer::(true)), + // Visibility ranges + ( + 14, + buffer_layout( + visibility_ranges_buffer_binding_type, + false, + Some(Vec4::min_size()), + ) + .visibility(ShaderStages::VERTEX), + ), + // Screen space reflection settings + (15, uniform_buffer::(true)), + // Screen space ambient occlusion texture + ( + 16, + texture_2d(TextureSampleType::Float { filterable: false }), + ), + (17, environment_map_entries[3]), + ), + ); + + // Tonemapping + let tonemapping_lut_entries = get_lut_bind_group_layout_entries(); + entries = entries.extend_with_indices(( + (18, tonemapping_lut_entries[0]), + (19, tonemapping_lut_entries[1]), + )); + + // Prepass + if cfg!(any(not(feature = "webgl"), not(target_arch = "wasm32"))) + || (cfg!(all(feature = "webgl", target_arch = "wasm32")) + && !layout_key.contains(MeshPipelineViewLayoutKey::MULTISAMPLED)) + { + for (entry, binding) in prepass::get_bind_group_layout_entries(layout_key) + .iter() + .zip([20, 21, 22, 23]) + { + if let Some(entry) = entry { + entries = entries.extend_with_indices(((binding as u32, *entry),)); + } + } + } + + // View Transmission Texture + entries = entries.extend_with_indices(( + ( + 24, + texture_2d(TextureSampleType::Float { filterable: true }), + ), + (25, sampler(SamplerBindingType::Filtering)), + )); + + // OIT + if layout_key.contains(MeshPipelineViewLayoutKey::OIT_ENABLED) { + // Check if we can use OIT. This is a hack to avoid errors on webgl -- + // the OIT plugin will warn the user that OIT is not supported on their + // platform, so we don't need to do it here. + if is_oit_supported(render_adapter, render_device, false) { + entries = entries.extend_with_indices(( + // oit_layers + (26, storage_buffer_sized(false, None)), + // oit_layer_ids, + (27, storage_buffer_sized(false, None)), + // oit_layer_count + ( + 28, + uniform_buffer::(true), + ), + )); + } + } + + let mut binding_array_entries = DynamicBindGroupLayoutEntries::new(ShaderStages::FRAGMENT); + binding_array_entries = binding_array_entries.extend_with_indices(( + (0, environment_map_entries[0]), + (1, environment_map_entries[1]), + (2, environment_map_entries[2]), + )); + + // Irradiance volumes + if IRRADIANCE_VOLUMES_ARE_USABLE { + let irradiance_volume_entries = + irradiance_volume::get_bind_group_layout_entries(render_device, render_adapter); + binding_array_entries = binding_array_entries.extend_with_indices(( + (3, irradiance_volume_entries[0]), + (4, irradiance_volume_entries[1]), + )); + } + + // Clustered decals + if let Some(clustered_decal_entries) = + decal::clustered::get_bind_group_layout_entries(render_device, render_adapter) + { + binding_array_entries = binding_array_entries.extend_with_indices(( + (5, clustered_decal_entries[0]), + (6, clustered_decal_entries[1]), + (7, clustered_decal_entries[2]), + )); + } + + [entries.to_vec(), binding_array_entries.to_vec()] +} + +/// Stores the view layouts for every combination of pipeline keys. +/// +/// This is wrapped in an [`Arc`] so that it can be efficiently cloned and +/// placed inside specializable pipeline types. +#[derive(Resource, Clone, Deref, DerefMut)] +pub struct MeshPipelineViewLayouts( + pub Arc<[MeshPipelineViewLayout; MeshPipelineViewLayoutKey::COUNT]>, +); + +impl FromWorld for MeshPipelineViewLayouts { + fn from_world(world: &mut World) -> Self { + // Generates all possible view layouts for the mesh pipeline, based on all combinations of + // [`MeshPipelineViewLayoutKey`] flags. + + let render_device = world.resource::(); + let render_adapter = world.resource::(); + + let clustered_forward_buffer_binding_type = render_device + .get_supported_read_only_binding_type(CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT); + let visibility_ranges_buffer_binding_type = render_device + .get_supported_read_only_binding_type(VISIBILITY_RANGES_STORAGE_BUFFER_COUNT); + + Self(Arc::new(array::from_fn(|i| { + let key = MeshPipelineViewLayoutKey::from_bits_truncate(i as u32); + let entries = layout_entries( + clustered_forward_buffer_binding_type, + visibility_ranges_buffer_binding_type, + key, + render_device, + render_adapter, + ); + #[cfg(debug_assertions)] + let texture_count: usize = entries + .iter() + .flat_map(|e| { + e.iter() + .filter(|entry| matches!(entry.ty, BindingType::Texture { .. })) + }) + .count(); + + MeshPipelineViewLayout { + main_layout: render_device + .create_bind_group_layout(key.label().as_str(), &entries[0]), + binding_array_layout: render_device.create_bind_group_layout( + format!("{}_binding_array", key.label()).as_str(), + &entries[1], + ), + empty_layout: render_device + .create_bind_group_layout(format!("{}_empty", key.label()).as_str(), &[]), + #[cfg(debug_assertions)] + texture_count, + } + }))) + } +} + +impl MeshPipelineViewLayouts { + pub fn get_view_layout( + &self, + layout_key: MeshPipelineViewLayoutKey, + ) -> &MeshPipelineViewLayout { + let index = layout_key.bits() as usize; + let layout = &self[index]; + + #[cfg(debug_assertions)] + if layout.texture_count > MESH_PIPELINE_VIEW_LAYOUT_SAFE_MAX_TEXTURES { + // Issue our own warning here because Naga's error message is a bit cryptic in this situation + once!(warn!("Too many textures in mesh pipeline view layout, this might cause us to hit `wgpu::Limits::max_sampled_textures_per_shader_stage` in some environments.")); + } + + layout + } +} + +/// Generates all possible view layouts for the mesh pipeline, based on all combinations of +/// [`MeshPipelineViewLayoutKey`] flags. +pub fn generate_view_layouts( + render_device: &RenderDevice, + render_adapter: &RenderAdapter, + clustered_forward_buffer_binding_type: BufferBindingType, + visibility_ranges_buffer_binding_type: BufferBindingType, +) -> [MeshPipelineViewLayout; MeshPipelineViewLayoutKey::COUNT] { + array::from_fn(|i| { + let key = MeshPipelineViewLayoutKey::from_bits_truncate(i as u32); + let entries = layout_entries( + clustered_forward_buffer_binding_type, + visibility_ranges_buffer_binding_type, + key, + render_device, + render_adapter, + ); + + #[cfg(debug_assertions)] + let texture_count: usize = entries + .iter() + .flat_map(|e| { + e.iter() + .filter(|entry| matches!(entry.ty, BindingType::Texture { .. })) + }) + .count(); + + MeshPipelineViewLayout { + main_layout: render_device.create_bind_group_layout(key.label().as_str(), &entries[0]), + binding_array_layout: render_device.create_bind_group_layout( + format!("{}_binding_array", key.label()).as_str(), + &entries[1], + ), + empty_layout: render_device + .create_bind_group_layout(format!("{}_empty", key.label()).as_str(), &[]), + #[cfg(debug_assertions)] + texture_count, + } + }) +} + +#[derive(Component)] +pub struct MeshViewBindGroup { + pub main: BindGroup, + pub binding_array: BindGroup, + pub empty: BindGroup, +} + +pub fn prepare_mesh_view_bind_groups( + mut commands: Commands, + (render_device, render_adapter): (Res, Res), + mesh_pipeline: Res, + shadow_samplers: Res, + (light_meta, global_light_meta): (Res, Res), + fog_meta: Res, + (view_uniforms, environment_map_uniform): (Res, Res), + views: Query<( + Entity, + &ViewShadowBindings, + &ViewClusterBindings, + &Msaa, + Option<&ScreenSpaceAmbientOcclusionResources>, + Option<&ViewPrepassTextures>, + Option<&ViewTransmissionTexture>, + &Tonemapping, + Option<&RenderViewLightProbes>, + Option<&RenderViewLightProbes>, + Has, + )>, + (images, mut fallback_images, fallback_image, fallback_image_zero): ( + Res>, + FallbackImageMsaa, + Res, + Res, + ), + globals_buffer: Res, + tonemapping_luts: Res, + light_probes_buffer: Res, + visibility_ranges: Res, + ssr_buffer: Res, + oit_buffers: Res, + (decals_buffer, render_decals): (Res, Res), +) { + if let ( + Some(view_binding), + Some(light_binding), + Some(clusterable_objects_binding), + Some(globals), + Some(fog_binding), + Some(light_probes_binding), + Some(visibility_ranges_buffer), + Some(ssr_binding), + Some(environment_map_binding), + ) = ( + view_uniforms.uniforms.binding(), + light_meta.view_gpu_lights.binding(), + global_light_meta.gpu_clusterable_objects.binding(), + globals_buffer.buffer.binding(), + fog_meta.gpu_fogs.binding(), + light_probes_buffer.binding(), + visibility_ranges.buffer().buffer(), + ssr_buffer.binding(), + environment_map_uniform.binding(), + ) { + for ( + entity, + shadow_bindings, + cluster_bindings, + msaa, + ssao_resources, + prepass_textures, + transmission_texture, + tonemapping, + render_view_environment_maps, + render_view_irradiance_volumes, + has_oit, + ) in &views + { + let fallback_ssao = fallback_images + .image_for_samplecount(1, TextureFormat::bevy_default()) + .texture_view + .clone(); + let ssao_view = ssao_resources + .map(|t| &t.screen_space_ambient_occlusion_texture.default_view) + .unwrap_or(&fallback_ssao); + + let mut layout_key = MeshPipelineViewLayoutKey::from(*msaa) + | MeshPipelineViewLayoutKey::from(prepass_textures); + if has_oit { + layout_key |= MeshPipelineViewLayoutKey::OIT_ENABLED; + } + + let layout = mesh_pipeline.get_view_layout(layout_key); + + let mut entries = DynamicBindGroupEntries::new_with_indices(( + (0, view_binding.clone()), + (1, light_binding.clone()), + (2, &shadow_bindings.point_light_depth_texture_view), + (3, &shadow_samplers.point_light_comparison_sampler), + #[cfg(feature = "experimental_pbr_pcss")] + (4, &shadow_samplers.point_light_linear_sampler), + (5, &shadow_bindings.directional_light_depth_texture_view), + (6, &shadow_samplers.directional_light_comparison_sampler), + #[cfg(feature = "experimental_pbr_pcss")] + (7, &shadow_samplers.directional_light_linear_sampler), + (8, clusterable_objects_binding.clone()), + ( + 9, + cluster_bindings + .clusterable_object_index_lists_binding() + .unwrap(), + ), + (10, cluster_bindings.offsets_and_counts_binding().unwrap()), + (11, globals.clone()), + (12, fog_binding.clone()), + (13, light_probes_binding.clone()), + (14, visibility_ranges_buffer.as_entire_binding()), + (15, ssr_binding.clone()), + (16, ssao_view), + )); + + entries = entries.extend_with_indices(((17, environment_map_binding.clone()),)); + + let lut_bindings = + get_lut_bindings(&images, &tonemapping_luts, tonemapping, &fallback_image); + entries = entries.extend_with_indices(((18, lut_bindings.0), (19, lut_bindings.1))); + + // When using WebGL, we can't have a depth texture with multisampling + let prepass_bindings; + if cfg!(any(not(feature = "webgl"), not(target_arch = "wasm32"))) || msaa.samples() == 1 + { + prepass_bindings = prepass::get_bindings(prepass_textures); + for (binding, index) in prepass_bindings + .iter() + .map(Option::as_ref) + .zip([20, 21, 22, 23]) + .flat_map(|(b, i)| b.map(|b| (b, i))) + { + entries = entries.extend_with_indices(((index, binding),)); + } + }; + + let transmission_view = transmission_texture + .map(|transmission| &transmission.view) + .unwrap_or(&fallback_image_zero.texture_view); + + let transmission_sampler = transmission_texture + .map(|transmission| &transmission.sampler) + .unwrap_or(&fallback_image_zero.sampler); + + entries = + entries.extend_with_indices(((24, transmission_view), (25, transmission_sampler))); + + if has_oit + && let ( + Some(oit_layers_binding), + Some(oit_layer_ids_binding), + Some(oit_settings_binding), + ) = ( + oit_buffers.layers.binding(), + oit_buffers.layer_ids.binding(), + oit_buffers.settings.binding(), + ) + { + entries = entries.extend_with_indices(( + (26, oit_layers_binding.clone()), + (27, oit_layer_ids_binding.clone()), + (28, oit_settings_binding.clone()), + )); + } + + let mut entries_binding_array = DynamicBindGroupEntries::new(); + + let environment_map_bind_group_entries = RenderViewEnvironmentMapBindGroupEntries::get( + render_view_environment_maps, + &images, + &fallback_image, + &render_device, + &render_adapter, + ); + match environment_map_bind_group_entries { + RenderViewEnvironmentMapBindGroupEntries::Single { + diffuse_texture_view, + specular_texture_view, + sampler, + } => { + entries_binding_array = entries_binding_array.extend_with_indices(( + (0, diffuse_texture_view), + (1, specular_texture_view), + (2, sampler), + )); + } + RenderViewEnvironmentMapBindGroupEntries::Multiple { + ref diffuse_texture_views, + ref specular_texture_views, + sampler, + } => { + entries_binding_array = entries_binding_array.extend_with_indices(( + (0, diffuse_texture_views.as_slice()), + (1, specular_texture_views.as_slice()), + (2, sampler), + )); + } + } + + let irradiance_volume_bind_group_entries = if IRRADIANCE_VOLUMES_ARE_USABLE { + Some(RenderViewIrradianceVolumeBindGroupEntries::get( + render_view_irradiance_volumes, + &images, + &fallback_image, + &render_device, + &render_adapter, + )) + } else { + None + }; + + match irradiance_volume_bind_group_entries { + Some(RenderViewIrradianceVolumeBindGroupEntries::Single { + texture_view, + sampler, + }) => { + entries_binding_array = entries_binding_array + .extend_with_indices(((3, texture_view), (4, sampler))); + } + Some(RenderViewIrradianceVolumeBindGroupEntries::Multiple { + ref texture_views, + sampler, + }) => { + entries_binding_array = entries_binding_array + .extend_with_indices(((3, texture_views.as_slice()), (4, sampler))); + } + None => {} + } + + let decal_bind_group_entries = RenderViewClusteredDecalBindGroupEntries::get( + &render_decals, + &decals_buffer, + &images, + &fallback_image, + &render_device, + &render_adapter, + ); + + // Add the decal bind group entries. + if let Some(ref render_view_decal_bind_group_entries) = decal_bind_group_entries { + entries_binding_array = entries_binding_array.extend_with_indices(( + // `clustered_decals` + ( + 5, + render_view_decal_bind_group_entries + .decals + .as_entire_binding(), + ), + // `clustered_decal_textures` + ( + 6, + render_view_decal_bind_group_entries + .texture_views + .as_slice(), + ), + // `clustered_decal_sampler` + (7, render_view_decal_bind_group_entries.sampler), + )); + } + + commands.entity(entity).insert(MeshViewBindGroup { + main: render_device.create_bind_group( + "mesh_view_bind_group", + &layout.main_layout, + &entries, + ), + binding_array: render_device.create_bind_group( + "mesh_view_bind_group_binding_array", + &layout.binding_array_layout, + &entries_binding_array, + ), + empty: render_device.create_bind_group( + "mesh_view_bind_group_empty", + &layout.empty_layout, + &[], + ), + }); + } + } +} diff --git a/crates/libmarathon/src/render/pbr/render/mesh_view_bindings.wgsl b/crates/libmarathon/src/render/pbr/render/mesh_view_bindings.wgsl new file mode 100644 index 0000000..0f650e6 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/mesh_view_bindings.wgsl @@ -0,0 +1,119 @@ +#define_import_path bevy_pbr::mesh_view_bindings + +#import bevy_pbr::mesh_view_types as types +#import bevy_render::{ + view::View, + globals::Globals, +} + +@group(0) @binding(0) var view: View; +@group(0) @binding(1) var lights: types::Lights; +#ifdef NO_CUBE_ARRAY_TEXTURES_SUPPORT +@group(0) @binding(2) var point_shadow_textures: texture_depth_cube; +#else +@group(0) @binding(2) var point_shadow_textures: texture_depth_cube_array; +#endif +@group(0) @binding(3) var point_shadow_textures_comparison_sampler: sampler_comparison; +#ifdef PCSS_SAMPLERS_AVAILABLE +@group(0) @binding(4) var point_shadow_textures_linear_sampler: sampler; +#endif // PCSS_SAMPLERS_AVAILABLE +#ifdef NO_ARRAY_TEXTURES_SUPPORT +@group(0) @binding(5) var directional_shadow_textures: texture_depth_2d; +#else +@group(0) @binding(5) var directional_shadow_textures: texture_depth_2d_array; +#endif +@group(0) @binding(6) var directional_shadow_textures_comparison_sampler: sampler_comparison; +#ifdef PCSS_SAMPLERS_AVAILABLE +@group(0) @binding(7) var directional_shadow_textures_linear_sampler: sampler; +#endif // PCSS_SAMPLERS_AVAILABLE + +#if AVAILABLE_STORAGE_BUFFER_BINDINGS >= 3 +@group(0) @binding(8) var clusterable_objects: types::ClusterableObjects; +@group(0) @binding(9) var clusterable_object_index_lists: types::ClusterLightIndexLists; +@group(0) @binding(10) var cluster_offsets_and_counts: types::ClusterOffsetsAndCounts; +#else +@group(0) @binding(8) var clusterable_objects: types::ClusterableObjects; +@group(0) @binding(9) var clusterable_object_index_lists: types::ClusterLightIndexLists; +@group(0) @binding(10) var cluster_offsets_and_counts: types::ClusterOffsetsAndCounts; +#endif + +@group(0) @binding(11) var globals: Globals; +@group(0) @binding(12) var fog: types::Fog; +@group(0) @binding(13) var light_probes: types::LightProbes; + +const VISIBILITY_RANGE_UNIFORM_BUFFER_SIZE: u32 = 64u; +#if AVAILABLE_STORAGE_BUFFER_BINDINGS >= 6 +@group(0) @binding(14) var visibility_ranges: array>; +#else +@group(0) @binding(14) var visibility_ranges: array, VISIBILITY_RANGE_UNIFORM_BUFFER_SIZE>; +#endif + +@group(0) @binding(15) var ssr_settings: types::ScreenSpaceReflectionsSettings; +@group(0) @binding(16) var screen_space_ambient_occlusion_texture: texture_2d; +@group(0) @binding(17) var environment_map_uniform: types::EnvironmentMapUniform; + +// NB: If you change these, make sure to update `tonemapping_shared.wgsl` too. +@group(0) @binding(18) var dt_lut_texture: texture_3d; +@group(0) @binding(19) var dt_lut_sampler: sampler; + +#ifdef MULTISAMPLED +#ifdef DEPTH_PREPASS +@group(0) @binding(20) var depth_prepass_texture: texture_depth_multisampled_2d; +#endif // DEPTH_PREPASS +#ifdef NORMAL_PREPASS +@group(0) @binding(21) var normal_prepass_texture: texture_multisampled_2d; +#endif // NORMAL_PREPASS +#ifdef MOTION_VECTOR_PREPASS +@group(0) @binding(22) var motion_vector_prepass_texture: texture_multisampled_2d; +#endif // MOTION_VECTOR_PREPASS + +#else // MULTISAMPLED + +#ifdef DEPTH_PREPASS +@group(0) @binding(20) var depth_prepass_texture: texture_depth_2d; +#endif // DEPTH_PREPASS +#ifdef NORMAL_PREPASS +@group(0) @binding(21) var normal_prepass_texture: texture_2d; +#endif // NORMAL_PREPASS +#ifdef MOTION_VECTOR_PREPASS +@group(0) @binding(22) var motion_vector_prepass_texture: texture_2d; +#endif // MOTION_VECTOR_PREPASS + +#endif // MULTISAMPLED + +#ifdef DEFERRED_PREPASS +@group(0) @binding(23) var deferred_prepass_texture: texture_2d; +#endif // DEFERRED_PREPASS + +@group(0) @binding(24) var view_transmission_texture: texture_2d; +@group(0) @binding(25) var view_transmission_sampler: sampler; + +#ifdef OIT_ENABLED +@group(0) @binding(26) var oit_layers: array>; +@group(0) @binding(27) var oit_layer_ids: array>; +@group(0) @binding(28) var oit_settings: types::OrderIndependentTransparencySettings; +#endif // OIT_ENABLED + +#ifdef MULTIPLE_LIGHT_PROBES_IN_ARRAY +@group(1) @binding(0) var diffuse_environment_maps: binding_array, 8u>; +@group(1) @binding(1) var specular_environment_maps: binding_array, 8u>; +#else +@group(1) @binding(0) var diffuse_environment_map: texture_cube; +@group(1) @binding(1) var specular_environment_map: texture_cube; +#endif +@group(1) @binding(2) var environment_map_sampler: sampler; + +#ifdef IRRADIANCE_VOLUMES_ARE_USABLE +#ifdef MULTIPLE_LIGHT_PROBES_IN_ARRAY +@group(1) @binding(3) var irradiance_volumes: binding_array, 8u>; +#else +@group(1) @binding(3) var irradiance_volume: texture_3d; +#endif +@group(1) @binding(4) var irradiance_volume_sampler: sampler; +#endif + +#ifdef CLUSTERED_DECALS_ARE_USABLE +@group(1) @binding(5) var clustered_decals: types::ClusteredDecals; +@group(1) @binding(6) var clustered_decal_textures: binding_array, 8u>; +@group(1) @binding(7) var clustered_decal_sampler: sampler; +#endif // CLUSTERED_DECALS_ARE_USABLE diff --git a/crates/libmarathon/src/render/pbr/render/mesh_view_types.wgsl b/crates/libmarathon/src/render/pbr/render/mesh_view_types.wgsl new file mode 100644 index 0000000..19f87b3 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/mesh_view_types.wgsl @@ -0,0 +1,187 @@ +#define_import_path bevy_pbr::mesh_view_types + +struct ClusterableObject { + // For point lights: the lower-right 2x2 values of the projection matrix [2][2] [2][3] [3][2] [3][3] + // For spot lights: the direction (x,z), spot_scale and spot_offset + light_custom_data: vec4, + color_inverse_square_range: vec4, + position_radius: vec4, + // 'flags' is a bit field indicating various options. u32 is 32 bits so we have up to 32 options. + flags: u32, + shadow_depth_bias: f32, + shadow_normal_bias: f32, + spot_light_tan_angle: f32, + soft_shadow_size: f32, + shadow_map_near_z: f32, + decal_index: u32, + pad: f32, +}; + +const POINT_LIGHT_FLAGS_SHADOWS_ENABLED_BIT: u32 = 1u << 0u; +const POINT_LIGHT_FLAGS_SPOT_LIGHT_Y_NEGATIVE: u32 = 1u << 1u; +const POINT_LIGHT_FLAGS_VOLUMETRIC_BIT: u32 = 1u << 2u; +const POINT_LIGHT_FLAGS_AFFECTS_LIGHTMAPPED_MESH_DIFFUSE_BIT: u32 = 1u << 3u; + +struct DirectionalCascade { + clip_from_world: mat4x4, + texel_size: f32, + far_bound: f32, +} + +struct DirectionalLight { + cascades: array, + color: vec4, + direction_to_light: vec3, + // 'flags' is a bit field indicating various options. u32 is 32 bits so we have up to 32 options. + flags: u32, + soft_shadow_size: f32, + shadow_depth_bias: f32, + shadow_normal_bias: f32, + num_cascades: u32, + cascades_overlap_proportion: f32, + depth_texture_base_index: u32, + decal_index: u32, + sun_disk_angular_size: f32, + sun_disk_intensity: f32, +}; + +const DIRECTIONAL_LIGHT_FLAGS_SHADOWS_ENABLED_BIT: u32 = 1u << 0u; +const DIRECTIONAL_LIGHT_FLAGS_VOLUMETRIC_BIT: u32 = 1u << 1u; +const DIRECTIONAL_LIGHT_FLAGS_AFFECTS_LIGHTMAPPED_MESH_DIFFUSE_BIT: u32 = 1u << 2u; + +struct Lights { + // NOTE: this array size must be kept in sync with the constants defined in bevy_pbr/src/render/light.rs + directional_lights: array, + ambient_color: vec4, + // x/y/z dimensions and n_clusters in w + cluster_dimensions: vec4, + // xy are vec2(cluster_dimensions.xy) / vec2(view.width, view.height) + // + // For perspective projections: + // z is cluster_dimensions.z / log(far / near) + // w is cluster_dimensions.z * log(near) / log(far / near) + // + // For orthographic projections: + // NOTE: near and far are +ve but -z is infront of the camera + // z is -near + // w is cluster_dimensions.z / (-far - -near) + cluster_factors: vec4, + n_directional_lights: u32, + spot_light_shadowmap_offset: i32, + ambient_light_affects_lightmapped_meshes: u32 +}; + +struct Fog { + base_color: vec4, + directional_light_color: vec4, + // `be` and `bi` are allocated differently depending on the fog mode + // + // For Linear Fog: + // be.x = start, be.y = end + // For Exponential and ExponentialSquared Fog: + // be.x = density + // For Atmospheric Fog: + // be = per-channel extinction density + // bi = per-channel inscattering density + be: vec3, + directional_light_exponent: f32, + bi: vec3, + mode: u32, +} + +// Important: These must be kept in sync with `fog.rs` +const FOG_MODE_OFF: u32 = 0u; +const FOG_MODE_LINEAR: u32 = 1u; +const FOG_MODE_EXPONENTIAL: u32 = 2u; +const FOG_MODE_EXPONENTIAL_SQUARED: u32 = 3u; +const FOG_MODE_ATMOSPHERIC: u32 = 4u; + +#if AVAILABLE_STORAGE_BUFFER_BINDINGS >= 3 +struct ClusterableObjects { + data: array, +}; +struct ClusterLightIndexLists { + data: array, +}; +struct ClusterOffsetsAndCounts { + data: array, 2>>, +}; +#else +struct ClusterableObjects { + data: array, +}; +struct ClusterLightIndexLists { + // each u32 contains 4 u8 indices into the ClusterableObjects array + data: array, 1024u>, +}; +struct ClusterOffsetsAndCounts { + // each u32 contains a 24-bit index into ClusterLightIndexLists in the high 24 bits + // and an 8-bit count of the number of lights in the low 8 bits + data: array, 1024u>, +}; +#endif + +struct LightProbe { + // This is stored as the transpose in order to save space in this structure. + // It'll be transposed in the `environment_map_light` function. + light_from_world_transposed: mat3x4, + cubemap_index: i32, + intensity: f32, + // Whether this light probe contributes diffuse light to lightmapped meshes. + affects_lightmapped_mesh_diffuse: u32, +}; + +struct LightProbes { + // This must match `MAX_VIEW_REFLECTION_PROBES` on the Rust side. + reflection_probes: array, + irradiance_volumes: array, + reflection_probe_count: i32, + irradiance_volume_count: i32, + // The index of the view environment map cubemap binding, or -1 if there's + // no such cubemap. + view_cubemap_index: i32, + // The smallest valid mipmap level for the specular environment cubemap + // associated with the view. + smallest_specular_mip_level_for_view: u32, + // The intensity of the environment map associated with the view. + intensity_for_view: f32, + // Whether the environment map attached to the view affects the diffuse + // lighting for lightmapped meshes. + view_environment_map_affects_lightmapped_mesh_diffuse: u32, +}; + +// Settings for screen space reflections. +// +// For more information on these settings, see the documentation for +// `bevy_pbr::ssr::ScreenSpaceReflections`. +struct ScreenSpaceReflectionsSettings { + perceptual_roughness_threshold: f32, + thickness: f32, + linear_steps: u32, + linear_march_exponent: f32, + bisection_steps: u32, + use_secant: u32, +}; + +struct EnvironmentMapUniform { + // Transformation matrix for the environment cubemaps in world space. + transform: mat4x4, +}; + +// Shader version of the order independent transparency settings component. +struct OrderIndependentTransparencySettings { + layers_count: i32, + alpha_threshold: f32, +}; + +struct ClusteredDecal { + local_from_world: mat4x4, + image_index: i32, + tag: u32, + pad_a: u32, + pad_b: u32, +} + +struct ClusteredDecals { + decals: array, +} diff --git a/crates/libmarathon/src/render/pbr/render/mod.rs b/crates/libmarathon/src/render/pbr/render/mod.rs new file mode 100644 index 0000000..6a29823 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/mod.rs @@ -0,0 +1,17 @@ +mod fog; +mod gpu_preprocess; +mod light; +pub(crate) mod mesh; +mod mesh_bindings; +mod mesh_view_bindings; +mod morph; +pub(crate) mod skin; + +pub use fog::*; +pub use gpu_preprocess::*; +pub use light::*; +pub use mesh::*; +pub use mesh_bindings::MeshLayouts; +pub use mesh_view_bindings::*; +pub use morph::*; +pub use skin::{extract_skins, prepare_skins, skins_use_uniform_buffers, SkinUniforms, MAX_JOINTS}; diff --git a/crates/libmarathon/src/render/pbr/render/morph.rs b/crates/libmarathon/src/render/pbr/render/morph.rs new file mode 100644 index 0000000..e33af5a --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/morph.rs @@ -0,0 +1,150 @@ +use core::{iter, mem}; + +use bevy_camera::visibility::ViewVisibility; +use bevy_ecs::prelude::*; +use bevy_mesh::morph::{MeshMorphWeights, MAX_MORPH_WEIGHTS}; +use crate::render::sync_world::MainEntityHashMap; +use crate::render::{ + batching::NoAutomaticBatching, + render_resource::{BufferUsages, RawBufferVec}, + renderer::{RenderDevice, RenderQueue}, + Extract, +}; +use bytemuck::NoUninit; + +#[derive(Component)] +pub struct MorphIndex { + pub index: u32, +} + +/// Maps each mesh affected by morph targets to the applicable offset within the +/// [`MorphUniforms`] buffer. +/// +/// We store both the current frame's mapping and the previous frame's mapping +/// for the purposes of motion vector calculation. +#[derive(Default, Resource)] +pub struct MorphIndices { + /// Maps each entity with a morphed mesh to the appropriate offset within + /// [`MorphUniforms::current_buffer`]. + pub current: MainEntityHashMap, + + /// Maps each entity with a morphed mesh to the appropriate offset within + /// [`MorphUniforms::prev_buffer`]. + pub prev: MainEntityHashMap, +} + +/// The GPU buffers containing morph weights for all meshes with morph targets. +/// +/// This is double-buffered: we store the weights of the previous frame in +/// addition to those of the current frame. This is for motion vector +/// calculation. Every frame, we swap buffers and reuse the morph target weight +/// buffer from two frames ago for the current frame. +#[derive(Resource)] +pub struct MorphUniforms { + /// The morph weights for the current frame. + pub current_buffer: RawBufferVec, + /// The morph weights for the previous frame. + pub prev_buffer: RawBufferVec, +} + +impl Default for MorphUniforms { + fn default() -> Self { + Self { + current_buffer: RawBufferVec::new(BufferUsages::UNIFORM), + prev_buffer: RawBufferVec::new(BufferUsages::UNIFORM), + } + } +} + +pub fn prepare_morphs( + render_device: Res, + render_queue: Res, + mut uniform: ResMut, +) { + if uniform.current_buffer.is_empty() { + return; + } + let len = uniform.current_buffer.len(); + uniform.current_buffer.reserve(len, &render_device); + uniform + .current_buffer + .write_buffer(&render_device, &render_queue); + + // We don't need to write `uniform.prev_buffer` because we already wrote it + // last frame, and the data should still be on the GPU. +} + +const fn can_align(step: usize, target: usize) -> bool { + step.is_multiple_of(target) || target.is_multiple_of(step) +} + +const WGPU_MIN_ALIGN: usize = 256; + +/// Align a [`RawBufferVec`] to `N` bytes by padding the end with `T::default()` values. +fn add_to_alignment(buffer: &mut RawBufferVec) { + let n = WGPU_MIN_ALIGN; + let t_size = size_of::(); + if !can_align(n, t_size) { + // This panic is stripped at compile time, due to n, t_size and can_align being const + panic!( + "RawBufferVec should contain only types with a size multiple or divisible by {n}, \ + {} has a size of {t_size}, which is neither multiple or divisible by {n}", + core::any::type_name::() + ); + } + + let buffer_size = buffer.len(); + let byte_size = t_size * buffer_size; + let bytes_over_n = byte_size % n; + if bytes_over_n == 0 { + return; + } + let bytes_to_add = n - bytes_over_n; + let ts_to_add = bytes_to_add / t_size; + buffer.extend(iter::repeat_with(T::default).take(ts_to_add)); +} + +// Notes on implementation: see comment on top of the extract_skins system in skin module. +// This works similarly, but for `f32` instead of `Mat4` +pub fn extract_morphs( + morph_indices: ResMut, + uniform: ResMut, + query: Extract>, +) { + // Borrow check workaround. + let (morph_indices, uniform) = (morph_indices.into_inner(), uniform.into_inner()); + + // Swap buffers. We need to keep the previous frame's buffer around for the + // purposes of motion vector computation. + mem::swap(&mut morph_indices.current, &mut morph_indices.prev); + mem::swap(&mut uniform.current_buffer, &mut uniform.prev_buffer); + morph_indices.current.clear(); + uniform.current_buffer.clear(); + + for (entity, view_visibility, morph_weights) in &query { + if !view_visibility.get() { + continue; + } + let start = uniform.current_buffer.len(); + let weights = morph_weights.weights(); + let legal_weights = weights.iter().take(MAX_MORPH_WEIGHTS).copied(); + uniform.current_buffer.extend(legal_weights); + add_to_alignment::(&mut uniform.current_buffer); + + let index = (start * size_of::()) as u32; + morph_indices + .current + .insert(entity.into(), MorphIndex { index }); + } +} + +// NOTE: Because morph targets require per-morph target texture bindings, they cannot +// currently be batched. +pub fn no_automatic_morph_batching( + mut commands: Commands, + query: Query, Without)>, +) { + for entity in &query { + commands.entity(entity).try_insert(NoAutomaticBatching); + } +} diff --git a/crates/libmarathon/src/render/pbr/render/morph.wgsl b/crates/libmarathon/src/render/pbr/render/morph.wgsl new file mode 100644 index 0000000..6689d68 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/morph.wgsl @@ -0,0 +1,52 @@ +#define_import_path bevy_pbr::morph + +#ifdef MORPH_TARGETS + +#import bevy_pbr::mesh_types::MorphWeights; + +@group(2) @binding(2) var morph_weights: MorphWeights; +@group(2) @binding(3) var morph_targets: texture_3d; +@group(2) @binding(7) var prev_morph_weights: MorphWeights; + +// NOTE: Those are the "hardcoded" values found in `MorphAttributes` struct +// in crates/bevy_render/src/mesh/morph/visitors.rs +// In an ideal world, the offsets are established dynamically and passed as #defines +// to the shader, but it's out of scope for the initial implementation of morph targets. +const position_offset: u32 = 0u; +const normal_offset: u32 = 3u; +const tangent_offset: u32 = 6u; +const total_component_count: u32 = 9u; + +fn layer_count() -> u32 { + let dimensions = textureDimensions(morph_targets); + return u32(dimensions.z); +} +fn component_texture_coord(vertex_index: u32, component_offset: u32) -> vec2 { + let width = u32(textureDimensions(morph_targets).x); + let component_index = total_component_count * vertex_index + component_offset; + return vec2(component_index % width, component_index / width); +} +fn weight_at(weight_index: u32) -> f32 { + let i = weight_index; + return morph_weights.weights[i / 4u][i % 4u]; +} +fn prev_weight_at(weight_index: u32) -> f32 { + let i = weight_index; + return prev_morph_weights.weights[i / 4u][i % 4u]; +} +fn morph_pixel(vertex: u32, component: u32, weight: u32) -> f32 { + let coord = component_texture_coord(vertex, component); + // Due to https://gpuweb.github.io/gpuweb/wgsl/#texel-formats + // While the texture stores a f32, the textureLoad returns a vec4<>, where + // only the first component is set. + return textureLoad(morph_targets, vec3(coord, weight), 0).r; +} +fn morph(vertex_index: u32, component_offset: u32, weight_index: u32) -> vec3 { + return vec3( + morph_pixel(vertex_index, component_offset, weight_index), + morph_pixel(vertex_index, component_offset + 1u, weight_index), + morph_pixel(vertex_index, component_offset + 2u, weight_index), + ); +} + +#endif // MORPH_TARGETS diff --git a/crates/libmarathon/src/render/pbr/render/occlusion_culling.wgsl b/crates/libmarathon/src/render/pbr/render/occlusion_culling.wgsl new file mode 100644 index 0000000..1be999c --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/occlusion_culling.wgsl @@ -0,0 +1,30 @@ +// Occlusion culling utility functions. + +#define_import_path bevy_pbr::occlusion_culling + +fn get_aabb_size_in_pixels(aabb: vec4, depth_pyramid: texture_2d) -> vec2 { + let depth_pyramid_size_mip_0 = vec2(textureDimensions(depth_pyramid, 0)); + let aabb_width_pixels = (aabb.z - aabb.x) * depth_pyramid_size_mip_0.x; + let aabb_height_pixels = (aabb.w - aabb.y) * depth_pyramid_size_mip_0.y; + return vec2(aabb_width_pixels, aabb_height_pixels); +} + +fn get_occluder_depth( + aabb: vec4, + aabb_pixel_size: vec2, + depth_pyramid: texture_2d +) -> f32 { + let aabb_width_pixels = aabb_pixel_size.x; + let aabb_height_pixels = aabb_pixel_size.y; + + let depth_pyramid_size_mip_0 = vec2(textureDimensions(depth_pyramid, 0)); + let depth_level = max(0, i32(ceil(log2(max(aabb_width_pixels, aabb_height_pixels))))); // TODO: Naga doesn't like this being a u32 + let depth_pyramid_size = vec2(textureDimensions(depth_pyramid, depth_level)); + let aabb_top_left = vec2(aabb.xy * depth_pyramid_size); + + let depth_quad_a = textureLoad(depth_pyramid, aabb_top_left, depth_level).x; + let depth_quad_b = textureLoad(depth_pyramid, aabb_top_left + vec2(1u, 0u), depth_level).x; + let depth_quad_c = textureLoad(depth_pyramid, aabb_top_left + vec2(0u, 1u), depth_level).x; + let depth_quad_d = textureLoad(depth_pyramid, aabb_top_left + vec2(1u, 1u), depth_level).x; + return min(min(depth_quad_a, depth_quad_b), min(depth_quad_c, depth_quad_d)); +} diff --git a/crates/libmarathon/src/render/pbr/render/parallax_mapping.wgsl b/crates/libmarathon/src/render/pbr/render/parallax_mapping.wgsl new file mode 100644 index 0000000..9005734 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/parallax_mapping.wgsl @@ -0,0 +1,139 @@ +#define_import_path bevy_pbr::parallax_mapping + +#import bevy_render::bindless::{bindless_samplers_filtering, bindless_textures_2d} + +#import bevy_pbr::{ + pbr_bindings::{depth_map_texture, depth_map_sampler}, + mesh_bindings::mesh +} + +#ifdef BINDLESS +#import bevy_pbr::pbr_bindings::material_indices +#endif // BINDLESS + +fn sample_depth_map(uv: vec2, material_bind_group_slot: u32) -> f32 { + // We use `textureSampleLevel` over `textureSample` because the wgpu DX12 + // backend (Fxc) panics when using "gradient instructions" inside a loop. + // It results in the whole loop being unrolled by the shader compiler, + // which it can't do because the upper limit of the loop in steep parallax + // mapping is a variable set by the user. + // The "gradient instructions" comes from `textureSample` computing MIP level + // based on UV derivative. With `textureSampleLevel`, we provide ourselves + // the MIP level, so no gradient instructions are used, and we can use + // sample_depth_map in our loop. + // See https://stackoverflow.com/questions/56581141/direct3d11-gradient-instruction-used-in-a-loop-with-varying-iteration-forcing + return textureSampleLevel( +#ifdef BINDLESS + bindless_textures_2d[material_indices[material_bind_group_slot].depth_map_texture], + bindless_samplers_filtering[material_indices[material_bind_group_slot].depth_map_sampler], +#else // BINDLESS + depth_map_texture, + depth_map_sampler, +#endif // BINDLESS + uv, + 0.0 + ).r; +} + +// An implementation of parallax mapping, see https://en.wikipedia.org/wiki/Parallax_mapping +// Code derived from: https://web.archive.org/web/20150419215321/http://sunandblackcat.com/tipFullView.php?l=eng&topicid=28 +fn parallaxed_uv( + depth_scale: f32, + max_layer_count: f32, + max_steps: u32, + // The original interpolated uv + original_uv: vec2, + // The vector from the camera to the fragment at the surface in tangent space + Vt: vec3, + material_bind_group_slot: u32, +) -> vec2 { + if max_layer_count < 1.0 { + return original_uv; + } + var uv = original_uv; + + // Steep Parallax Mapping + // ====================== + // Split the depth map into `layer_count` layers. + // When Vt hits the surface of the mesh (excluding depth displacement), + // if the depth is not below or on surface including depth displacement (textureSample), then + // look forward (+= delta_uv) on depth texture according to + // Vt and distance between hit surface and depth map surface, + // repeat until below the surface. + // + // Where `layer_count` is interpolated between `1.0` and + // `max_layer_count` according to the steepness of Vt. + + let view_steepness = abs(Vt.z); + // We mix with minimum value 1.0 because otherwise, + // with 0.0, we get a division by zero in surfaces parallel to viewport, + // resulting in a singularity. + let layer_count = mix(max_layer_count, 1.0, view_steepness); + let layer_depth = 1.0 / layer_count; + var delta_uv = depth_scale * layer_depth * Vt.xy * vec2(1.0, -1.0) / view_steepness; + + var current_layer_depth = 0.0; + var texture_depth = sample_depth_map(uv, material_bind_group_slot); + + // texture_depth > current_layer_depth means the depth map depth is deeper + // than the depth the ray would be at this UV offset so the ray has not + // intersected the surface + for (var i: i32 = 0; texture_depth > current_layer_depth && i <= i32(layer_count); i++) { + current_layer_depth += layer_depth; + uv += delta_uv; + texture_depth = sample_depth_map(uv, material_bind_group_slot); + } + +#ifdef RELIEF_MAPPING + // Relief Mapping + // ============== + // "Refine" the rough result from Steep Parallax Mapping + // with a **binary search** between the layer selected by steep parallax + // and the next one to find a point closer to the depth map surface. + // This reduces the jaggy step artifacts from steep parallax mapping. + + delta_uv *= 0.5; + var delta_depth = 0.5 * layer_depth; + + uv -= delta_uv; + current_layer_depth -= delta_depth; + + for (var i: u32 = 0u; i < max_steps; i++) { + texture_depth = sample_depth_map(uv, material_bind_group_slot); + + // Halve the deltas for the next step + delta_uv *= 0.5; + delta_depth *= 0.5; + + // Step based on whether the current depth is above or below the depth map + if (texture_depth > current_layer_depth) { + uv += delta_uv; + current_layer_depth += delta_depth; + } else { + uv -= delta_uv; + current_layer_depth -= delta_depth; + } + } +#else + // Parallax Occlusion mapping + // ========================== + // "Refine" Steep Parallax Mapping by interpolating between the + // previous layer's depth and the computed layer depth. + // Only requires a single lookup, unlike Relief Mapping, but + // may skip small details and result in writhing material artifacts. + let previous_uv = uv - delta_uv; + let next_depth = texture_depth - current_layer_depth; + let previous_depth = sample_depth_map(previous_uv, material_bind_group_slot) - + current_layer_depth + layer_depth; + + let weight = next_depth / (next_depth - previous_depth); + + uv = mix(uv, previous_uv, weight); + + current_layer_depth += mix(next_depth, previous_depth, weight); +#endif + + // Note: `current_layer_depth` is not returned, but may be useful + // for light computation later on in future improvements of the pbr shader. + return uv; +} diff --git a/crates/libmarathon/src/render/pbr/render/pbr.wgsl b/crates/libmarathon/src/render/pbr/render/pbr.wgsl new file mode 100644 index 0000000..1722ab9 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/pbr.wgsl @@ -0,0 +1,107 @@ +#import bevy_pbr::{ + pbr_types, + pbr_functions::alpha_discard, + pbr_fragment::pbr_input_from_standard_material, + decal::clustered::apply_decal_base_color, +} + +#ifdef PREPASS_PIPELINE +#import bevy_pbr::{ + prepass_io::{VertexOutput, FragmentOutput}, + pbr_deferred_functions::deferred_output, +} +#else +#import bevy_pbr::{ + forward_io::{VertexOutput, FragmentOutput}, + pbr_functions, + pbr_functions::{apply_pbr_lighting, main_pass_post_lighting_processing}, + pbr_types::STANDARD_MATERIAL_FLAGS_UNLIT_BIT, +} +#endif + +#ifdef MESHLET_MESH_MATERIAL_PASS +#import bevy_pbr::meshlet_visibility_buffer_resolve::resolve_vertex_output +#endif + +#ifdef OIT_ENABLED +#import bevy_core_pipeline::oit::oit_draw +#endif // OIT_ENABLED + +#ifdef FORWARD_DECAL +#import bevy_pbr::decal::forward::get_forward_decal_info +#endif + +@fragment +fn fragment( +#ifdef MESHLET_MESH_MATERIAL_PASS + @builtin(position) frag_coord: vec4, +#else + vertex_output: VertexOutput, + @builtin(front_facing) is_front: bool, +#endif +) -> FragmentOutput { +#ifdef MESHLET_MESH_MATERIAL_PASS + let vertex_output = resolve_vertex_output(frag_coord); + let is_front = true; +#endif + + var in = vertex_output; + + // If we're in the crossfade section of a visibility range, conditionally + // discard the fragment according to the visibility pattern. +#ifdef VISIBILITY_RANGE_DITHER + pbr_functions::visibility_range_dither(in.position, in.visibility_range_dither); +#endif + +#ifdef FORWARD_DECAL + let forward_decal_info = get_forward_decal_info(in); + in.world_position = forward_decal_info.world_position; + in.uv = forward_decal_info.uv; +#endif + + // generate a PbrInput struct from the StandardMaterial bindings + var pbr_input = pbr_input_from_standard_material(in, is_front); + + // alpha discard + pbr_input.material.base_color = alpha_discard(pbr_input.material, pbr_input.material.base_color); + + // clustered decals + pbr_input.material.base_color = apply_decal_base_color( + in.world_position.xyz, + in.position.xy, + pbr_input.material.base_color + ); + +#ifdef PREPASS_PIPELINE + // write the gbuffer, lighting pass id, and optionally normal and motion_vector textures + let out = deferred_output(in, pbr_input); +#else + // in forward mode, we calculate the lit color immediately, and then apply some post-lighting effects here. + // in deferred mode the lit color and these effects will be calculated in the deferred lighting shader + var out: FragmentOutput; + if (pbr_input.material.flags & STANDARD_MATERIAL_FLAGS_UNLIT_BIT) == 0u { + out.color = apply_pbr_lighting(pbr_input); + } else { + out.color = pbr_input.material.base_color; + } + + // apply in-shader post processing (fog, alpha-premultiply, and also tonemapping, debanding if the camera is non-hdr) + // note this does not include fullscreen postprocessing effects like bloom. + out.color = main_pass_post_lighting_processing(pbr_input, out.color); +#endif + +#ifdef OIT_ENABLED + let alpha_mode = pbr_input.material.flags & pbr_types::STANDARD_MATERIAL_FLAGS_ALPHA_MODE_RESERVED_BITS; + if alpha_mode != pbr_types::STANDARD_MATERIAL_FLAGS_ALPHA_MODE_OPAQUE { + // The fragments will only be drawn during the oit resolve pass. + oit_draw(in.position, out.color); + discard; + } +#endif // OIT_ENABLED + +#ifdef FORWARD_DECAL + out.color.a = min(forward_decal_info.alpha, out.color.a); +#endif + + return out; +} diff --git a/crates/libmarathon/src/render/pbr/render/pbr_ambient.wgsl b/crates/libmarathon/src/render/pbr/render/pbr_ambient.wgsl new file mode 100644 index 0000000..7b174da --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/pbr_ambient.wgsl @@ -0,0 +1,29 @@ +#define_import_path bevy_pbr::ambient + +#import bevy_pbr::{ + lighting::{EnvBRDFApprox, F_AB}, + mesh_view_bindings::lights, +} + +// A precomputed `NdotV` is provided because it is computed regardless, +// but `world_normal` and the view vector `V` are provided separately for more advanced uses. +fn ambient_light( + world_position: vec4, + world_normal: vec3, + V: vec3, + NdotV: f32, + diffuse_color: vec3, + specular_color: vec3, + perceptual_roughness: f32, + occlusion: vec3, +) -> vec3 { + let diffuse_ambient = EnvBRDFApprox(diffuse_color, F_AB(1.0, NdotV)); + let specular_ambient = EnvBRDFApprox(specular_color, F_AB(perceptual_roughness, NdotV)); + + // No real world material has specular values under 0.02, so we use this range as a + // "pre-baked specular occlusion" that extinguishes the fresnel term, for artistic control. + // See: https://google.github.io/filament/Filament.html#specularocclusion + let specular_occlusion = saturate(dot(specular_color, vec3(50.0 * 0.33))); + + return (diffuse_ambient + specular_ambient * specular_occlusion) * lights.ambient_color.rgb * occlusion; +} diff --git a/crates/libmarathon/src/render/pbr/render/pbr_bindings.wgsl b/crates/libmarathon/src/render/pbr/render/pbr_bindings.wgsl new file mode 100644 index 0000000..6d21c81 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/pbr_bindings.wgsl @@ -0,0 +1,89 @@ +#define_import_path bevy_pbr::pbr_bindings + +#import bevy_pbr::pbr_types::StandardMaterial + +#ifdef BINDLESS +struct StandardMaterialBindings { + material: u32, // 0 + base_color_texture: u32, // 1 + base_color_sampler: u32, // 2 + emissive_texture: u32, // 3 + emissive_sampler: u32, // 4 + metallic_roughness_texture: u32, // 5 + metallic_roughness_sampler: u32, // 6 + occlusion_texture: u32, // 7 + occlusion_sampler: u32, // 8 + normal_map_texture: u32, // 9 + normal_map_sampler: u32, // 10 + depth_map_texture: u32, // 11 + depth_map_sampler: u32, // 12 + anisotropy_texture: u32, // 13 + anisotropy_sampler: u32, // 14 + specular_transmission_texture: u32, // 15 + specular_transmission_sampler: u32, // 16 + thickness_texture: u32, // 17 + thickness_sampler: u32, // 18 + diffuse_transmission_texture: u32, // 19 + diffuse_transmission_sampler: u32, // 20 + clearcoat_texture: u32, // 21 + clearcoat_sampler: u32, // 22 + clearcoat_roughness_texture: u32, // 23 + clearcoat_roughness_sampler: u32, // 24 + clearcoat_normal_texture: u32, // 25 + clearcoat_normal_sampler: u32, // 26 + specular_texture: u32, // 27 + specular_sampler: u32, // 28 + specular_tint_texture: u32, // 29 + specular_tint_sampler: u32, // 30 +} + +@group(#{MATERIAL_BIND_GROUP}) @binding(0) var material_indices: array; +@group(#{MATERIAL_BIND_GROUP}) @binding(10) var material_array: array; + +#else // BINDLESS + +@group(#{MATERIAL_BIND_GROUP}) @binding(0) var material: StandardMaterial; +@group(#{MATERIAL_BIND_GROUP}) @binding(1) var base_color_texture: texture_2d; +@group(#{MATERIAL_BIND_GROUP}) @binding(2) var base_color_sampler: sampler; +@group(#{MATERIAL_BIND_GROUP}) @binding(3) var emissive_texture: texture_2d; +@group(#{MATERIAL_BIND_GROUP}) @binding(4) var emissive_sampler: sampler; +@group(#{MATERIAL_BIND_GROUP}) @binding(5) var metallic_roughness_texture: texture_2d; +@group(#{MATERIAL_BIND_GROUP}) @binding(6) var metallic_roughness_sampler: sampler; +@group(#{MATERIAL_BIND_GROUP}) @binding(7) var occlusion_texture: texture_2d; +@group(#{MATERIAL_BIND_GROUP}) @binding(8) var occlusion_sampler: sampler; +@group(#{MATERIAL_BIND_GROUP}) @binding(9) var normal_map_texture: texture_2d; +@group(#{MATERIAL_BIND_GROUP}) @binding(10) var normal_map_sampler: sampler; +@group(#{MATERIAL_BIND_GROUP}) @binding(11) var depth_map_texture: texture_2d; +@group(#{MATERIAL_BIND_GROUP}) @binding(12) var depth_map_sampler: sampler; + +#ifdef PBR_ANISOTROPY_TEXTURE_SUPPORTED +@group(#{MATERIAL_BIND_GROUP}) @binding(13) var anisotropy_texture: texture_2d; +@group(#{MATERIAL_BIND_GROUP}) @binding(14) var anisotropy_sampler: sampler; +#endif // PBR_ANISOTROPY_TEXTURE_SUPPORTED + +#ifdef PBR_TRANSMISSION_TEXTURES_SUPPORTED +@group(#{MATERIAL_BIND_GROUP}) @binding(15) var specular_transmission_texture: texture_2d; +@group(#{MATERIAL_BIND_GROUP}) @binding(16) var specular_transmission_sampler: sampler; +@group(#{MATERIAL_BIND_GROUP}) @binding(17) var thickness_texture: texture_2d; +@group(#{MATERIAL_BIND_GROUP}) @binding(18) var thickness_sampler: sampler; +@group(#{MATERIAL_BIND_GROUP}) @binding(19) var diffuse_transmission_texture: texture_2d; +@group(#{MATERIAL_BIND_GROUP}) @binding(20) var diffuse_transmission_sampler: sampler; +#endif // PBR_TRANSMISSION_TEXTURES_SUPPORTED + +#ifdef PBR_MULTI_LAYER_MATERIAL_TEXTURES_SUPPORTED +@group(#{MATERIAL_BIND_GROUP}) @binding(21) var clearcoat_texture: texture_2d; +@group(#{MATERIAL_BIND_GROUP}) @binding(22) var clearcoat_sampler: sampler; +@group(#{MATERIAL_BIND_GROUP}) @binding(23) var clearcoat_roughness_texture: texture_2d; +@group(#{MATERIAL_BIND_GROUP}) @binding(24) var clearcoat_roughness_sampler: sampler; +@group(#{MATERIAL_BIND_GROUP}) @binding(25) var clearcoat_normal_texture: texture_2d; +@group(#{MATERIAL_BIND_GROUP}) @binding(26) var clearcoat_normal_sampler: sampler; +#endif // PBR_MULTI_LAYER_MATERIAL_TEXTURES_SUPPORTED + +#ifdef PBR_SPECULAR_TEXTURES_SUPPORTED +@group(#{MATERIAL_BIND_GROUP}) @binding(27) var specular_texture: texture_2d; +@group(#{MATERIAL_BIND_GROUP}) @binding(28) var specular_sampler: sampler; +@group(#{MATERIAL_BIND_GROUP}) @binding(29) var specular_tint_texture: texture_2d; +@group(#{MATERIAL_BIND_GROUP}) @binding(30) var specular_tint_sampler: sampler; +#endif // PBR_SPECULAR_TEXTURES_SUPPORTED + +#endif // BINDLESS diff --git a/crates/libmarathon/src/render/pbr/render/pbr_fragment.wgsl b/crates/libmarathon/src/render/pbr/render/pbr_fragment.wgsl new file mode 100644 index 0000000..a78abcb --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/pbr_fragment.wgsl @@ -0,0 +1,844 @@ +#define_import_path bevy_pbr::pbr_fragment + +#import bevy_render::bindless::{bindless_samplers_filtering, bindless_textures_2d} + +#import bevy_pbr::{ + pbr_functions, + pbr_functions::SampleBias, + pbr_bindings, + pbr_types, + prepass_utils, + lighting, + mesh_bindings::mesh, + mesh_view_bindings::view, + parallax_mapping::parallaxed_uv, + lightmap::lightmap, +} + +#ifdef SCREEN_SPACE_AMBIENT_OCCLUSION +#import bevy_pbr::mesh_view_bindings::screen_space_ambient_occlusion_texture +#import bevy_pbr::ssao_utils::ssao_multibounce +#endif + +#ifdef MESHLET_MESH_MATERIAL_PASS +#import bevy_pbr::meshlet_visibility_buffer_resolve::VertexOutput +#else ifdef PREPASS_PIPELINE +#import bevy_pbr::prepass_io::VertexOutput +#else +#import bevy_pbr::forward_io::VertexOutput +#endif + +#ifdef BINDLESS +#import bevy_pbr::pbr_bindings::material_indices +#endif // BINDLESS + +// prepare a basic PbrInput from the vertex stage output, mesh binding and view binding +fn pbr_input_from_vertex_output( + in: VertexOutput, + is_front: bool, + double_sided: bool, +) -> pbr_types::PbrInput { + var pbr_input: pbr_types::PbrInput = pbr_types::pbr_input_new(); + +#ifdef MESHLET_MESH_MATERIAL_PASS + pbr_input.flags = in.mesh_flags; +#else + pbr_input.flags = mesh[in.instance_index].flags; +#endif + + pbr_input.is_orthographic = view.clip_from_view[3].w == 1.0; + pbr_input.V = pbr_functions::calculate_view(in.world_position, pbr_input.is_orthographic); + pbr_input.frag_coord = in.position; + pbr_input.world_position = in.world_position; + +#ifdef VERTEX_COLORS + pbr_input.material.base_color = in.color; +#endif + + pbr_input.world_normal = pbr_functions::prepare_world_normal( + in.world_normal, + double_sided, + is_front, + ); + +#ifdef LOAD_PREPASS_NORMALS + pbr_input.N = prepass_utils::prepass_normal(in.position, 0u); +#else + pbr_input.N = normalize(pbr_input.world_normal); +#endif + + return pbr_input; +} + +// Prepare a full PbrInput by sampling all textures to resolve +// the material members +fn pbr_input_from_standard_material( + in: VertexOutput, + is_front: bool, +) -> pbr_types::PbrInput { +#ifdef MESHLET_MESH_MATERIAL_PASS + let slot = in.material_bind_group_slot; +#else // MESHLET_MESH_MATERIAL_PASS + let slot = mesh[in.instance_index].material_and_lightmap_bind_group_slot & 0xffffu; +#endif // MESHLET_MESH_MATERIAL_PASS +#ifdef BINDLESS + let flags = pbr_bindings::material_array[material_indices[slot].material].flags; + let base_color = pbr_bindings::material_array[material_indices[slot].material].base_color; + let deferred_lighting_pass_id = + pbr_bindings::material_array[material_indices[slot].material].deferred_lighting_pass_id; +#else // BINDLESS + let flags = pbr_bindings::material.flags; + let base_color = pbr_bindings::material.base_color; + let deferred_lighting_pass_id = pbr_bindings::material.deferred_lighting_pass_id; +#endif + + let double_sided = (flags & pbr_types::STANDARD_MATERIAL_FLAGS_DOUBLE_SIDED_BIT) != 0u; + + var pbr_input: pbr_types::PbrInput = pbr_input_from_vertex_output(in, is_front, double_sided); + pbr_input.material.flags = flags; + pbr_input.material.base_color *= base_color; + pbr_input.material.deferred_lighting_pass_id = deferred_lighting_pass_id; + + // Neubelt and Pettineo 2013, "Crafting a Next-gen Material Pipeline for The Order: 1886" + let NdotV = max(dot(pbr_input.N, pbr_input.V), 0.0001); + + // Fill in the sample bias so we can sample from textures. + var bias: SampleBias; +#ifdef MESHLET_MESH_MATERIAL_PASS + bias.ddx_uv = in.ddx_uv; + bias.ddy_uv = in.ddy_uv; +#else // MESHLET_MESH_MATERIAL_PASS + bias.mip_bias = view.mip_bias; +#endif // MESHLET_MESH_MATERIAL_PASS + +// TODO: Transforming UVs mean we need to apply derivative chain rule for meshlet mesh material pass +#ifdef VERTEX_UVS + +#ifdef BINDLESS + let uv_transform = pbr_bindings::material_array[material_indices[slot].material].uv_transform; +#else // BINDLESS + let uv_transform = pbr_bindings::material.uv_transform; +#endif // BINDLESS + +pbr_input.material.uv_transform = uv_transform; + +#ifdef VERTEX_UVS_A + var uv = (uv_transform * vec3(in.uv, 1.0)).xy; +#endif + +// TODO: Transforming UVs mean we need to apply derivative chain rule for meshlet mesh material pass +#ifdef VERTEX_UVS_B + var uv_b = (uv_transform * vec3(in.uv_b, 1.0)).xy; +#else + var uv_b = uv; +#endif + +#ifdef VERTEX_TANGENTS + if ((flags & pbr_types::STANDARD_MATERIAL_FLAGS_DEPTH_MAP_BIT) != 0u) { + let V = pbr_input.V; + let TBN = pbr_functions::calculate_tbn_mikktspace(in.world_normal, in.world_tangent); + let T = TBN[0]; + let B = TBN[1]; + let N = TBN[2]; + // Transform V from fragment to camera in world space to tangent space. + let Vt = vec3(dot(V, T), dot(V, B), dot(V, N)); +#ifdef VERTEX_UVS_A + // TODO: Transforming UVs mean we need to apply derivative chain rule for meshlet mesh material pass + uv = parallaxed_uv( +#ifdef BINDLESS + pbr_bindings::material_array[material_indices[slot].material].parallax_depth_scale, + pbr_bindings::material_array[material_indices[slot].material].max_parallax_layer_count, + pbr_bindings::material_array[material_indices[slot].material].max_relief_mapping_search_steps, +#else // BINDLESS + pbr_bindings::material.parallax_depth_scale, + pbr_bindings::material.max_parallax_layer_count, + pbr_bindings::material.max_relief_mapping_search_steps, +#endif // BINDLESS + uv, + // Flip the direction of Vt to go toward the surface to make the + // parallax mapping algorithm easier to understand and reason + // about. + -Vt, + slot, + ); +#endif + +#ifdef VERTEX_UVS_B + // TODO: Transforming UVs mean we need to apply derivative chain rule for meshlet mesh material pass + uv_b = parallaxed_uv( +#ifdef BINDLESS + pbr_bindings::material_array[material_indices[slot].material].parallax_depth_scale, + pbr_bindings::material_array[material_indices[slot].material].max_parallax_layer_count, + pbr_bindings::material_array[material_indices[slot].material].max_relief_mapping_search_steps, +#else // BINDLESS + pbr_bindings::material.parallax_depth_scale, + pbr_bindings::material.max_parallax_layer_count, + pbr_bindings::material.max_relief_mapping_search_steps, +#endif // BINDLESS + uv_b, + // Flip the direction of Vt to go toward the surface to make the + // parallax mapping algorithm easier to understand and reason + // about. + -Vt, + slot, + ); +#else + uv_b = uv; +#endif + } +#endif // VERTEX_TANGENTS + + if ((flags & pbr_types::STANDARD_MATERIAL_FLAGS_BASE_COLOR_TEXTURE_BIT) != 0u) { + pbr_input.material.base_color *= +#ifdef MESHLET_MESH_MATERIAL_PASS + textureSampleGrad( +#else // MESHLET_MESH_MATERIAL_PASS + textureSampleBias( +#endif // MESHLET_MESH_MATERIAL_PASS +#ifdef BINDLESS + bindless_textures_2d[material_indices[slot].base_color_texture], + bindless_samplers_filtering[material_indices[slot].base_color_sampler], +#else // BINDLESS + pbr_bindings::base_color_texture, + pbr_bindings::base_color_sampler, +#endif // BINDLESS +#ifdef STANDARD_MATERIAL_BASE_COLOR_UV_B + uv_b, +#else + uv, +#endif +#ifdef MESHLET_MESH_MATERIAL_PASS + bias.ddx_uv, + bias.ddy_uv, +#else // MESHLET_MESH_MATERIAL_PASS + bias.mip_bias, +#endif // MESHLET_MESH_MATERIAL_PASS + ); + +#ifdef ALPHA_TO_COVERAGE + // Sharpen alpha edges. + // + // https://bgolus.medium.com/anti-aliased-alpha-test-the-esoteric-alpha-to-coverage-8b177335ae4f + let alpha_mode = flags & pbr_types::STANDARD_MATERIAL_FLAGS_ALPHA_MODE_RESERVED_BITS; + if alpha_mode == pbr_types::STANDARD_MATERIAL_FLAGS_ALPHA_MODE_ALPHA_TO_COVERAGE { + +#ifdef BINDLESS + let alpha_cutoff = pbr_bindings::material_array[material_indices[slot].material].alpha_cutoff; +#else // BINDLESS + let alpha_cutoff = pbr_bindings::material.alpha_cutoff; +#endif // BINDLESS + + pbr_input.material.base_color.a = (pbr_input.material.base_color.a - alpha_cutoff) / + max(fwidth(pbr_input.material.base_color.a), 0.0001) + 0.5; + } +#endif // ALPHA_TO_COVERAGE + + } +#endif // VERTEX_UVS + + pbr_input.material.flags = flags; + + // NOTE: Unlit bit not set means == 0 is true, so the true case is if lit + if ((flags & pbr_types::STANDARD_MATERIAL_FLAGS_UNLIT_BIT) == 0u) { +#ifdef BINDLESS + pbr_input.material.ior = pbr_bindings::material_array[material_indices[slot].material].ior; + pbr_input.material.attenuation_color = + pbr_bindings::material_array[material_indices[slot].material].attenuation_color; + pbr_input.material.attenuation_distance = + pbr_bindings::material_array[material_indices[slot].material].attenuation_distance; + pbr_input.material.alpha_cutoff = + pbr_bindings::material_array[material_indices[slot].material].alpha_cutoff; +#else // BINDLESS + pbr_input.material.ior = pbr_bindings::material.ior; + pbr_input.material.attenuation_color = pbr_bindings::material.attenuation_color; + pbr_input.material.attenuation_distance = pbr_bindings::material.attenuation_distance; + pbr_input.material.alpha_cutoff = pbr_bindings::material.alpha_cutoff; +#endif // BINDLESS + + // reflectance +#ifdef BINDLESS + pbr_input.material.reflectance = + pbr_bindings::material_array[material_indices[slot].material].reflectance; +#else // BINDLESS + pbr_input.material.reflectance = pbr_bindings::material.reflectance; +#endif // BINDLESS + +#ifdef PBR_SPECULAR_TEXTURES_SUPPORTED +#ifdef VERTEX_UVS + + // Specular texture + if ((flags & pbr_types::STANDARD_MATERIAL_FLAGS_SPECULAR_TEXTURE_BIT) != 0u) { + let specular = +#ifdef MESHLET_MESH_MATERIAL_PASS + textureSampleGrad( +#else // MESHLET_MESH_MATERIAL_PASS + textureSampleBias( +#endif // MESHLET_MESH_MATERIAL_PASS +#ifdef BINDLESS + bindless_textures_2d[material_indices[slot].specular_texture], + bindless_samplers_filtering[material_indices[slot].specular_sampler], +#else // BINDLESS + pbr_bindings::specular_texture, + pbr_bindings::specular_sampler, +#endif // BINDLESS +#ifdef STANDARD_MATERIAL_SPECULAR_UV_B + uv_b, +#else // STANDARD_MATERIAL_SPECULAR_UV_B + uv, +#endif // STANDARD_MATERIAL_SPECULAR_UV_B +#ifdef MESHLET_MESH_MATERIAL_PASS + bias.ddx_uv, + bias.ddy_uv, +#else // MESHLET_MESH_MATERIAL_PASS + bias.mip_bias, +#endif // MESHLET_MESH_MATERIAL_PASS + ).a; + // This 0.5 factor is from the `KHR_materials_specular` specification: + // + pbr_input.material.reflectance *= specular * 0.5; + } + + // Specular tint texture + if ((flags & pbr_types::STANDARD_MATERIAL_FLAGS_SPECULAR_TINT_TEXTURE_BIT) != 0u) { + let specular_tint = +#ifdef MESHLET_MESH_MATERIAL_PASS + textureSampleGrad( +#else // MESHLET_MESH_MATERIAL_PASS + textureSampleBias( +#endif // MESHLET_MESH_MATERIAL_PASS +#ifdef BINDLESS + bindless_textures_2d[material_indices[slot].specular_tint_texture], + bindless_samplers_filtering[material_indices[slot].specular_tint_sampler], +#else // BINDLESS + pbr_bindings::specular_tint_texture, + pbr_bindings::specular_tint_sampler, +#endif // BINDLESS +#ifdef STANDARD_MATERIAL_SPECULAR_TINT_UV_B + uv_b, +#else // STANDARD_MATERIAL_SPECULAR_TINT_UV_B + uv, +#endif // STANDARD_MATERIAL_SPECULAR_TINT_UV_B +#ifdef MESHLET_MESH_MATERIAL_PASS + bias.ddx_uv, + bias.ddy_uv, +#else // MESHLET_MESH_MATERIAL_PASS + bias.mip_bias, +#endif // MESHLET_MESH_MATERIAL_PASS + ).rgb; + pbr_input.material.reflectance *= specular_tint; + } + +#endif // VERTEX_UVS +#endif // PBR_SPECULAR_TEXTURES_SUPPORTED + + // emissive +#ifdef BINDLESS + var emissive: vec4 = pbr_bindings::material_array[material_indices[slot].material].emissive; +#else // BINDLESS + var emissive: vec4 = pbr_bindings::material.emissive; +#endif // BINDLESS + +#ifdef VERTEX_UVS + if ((flags & pbr_types::STANDARD_MATERIAL_FLAGS_EMISSIVE_TEXTURE_BIT) != 0u) { + emissive = vec4(emissive.rgb * +#ifdef MESHLET_MESH_MATERIAL_PASS + textureSampleGrad( +#else // MESHLET_MESH_MATERIAL_PASS + textureSampleBias( +#endif // MESHLET_MESH_MATERIAL_PASS +#ifdef BINDLESS + bindless_textures_2d[material_indices[slot].emissive_texture], + bindless_samplers_filtering[material_indices[slot].emissive_sampler], +#else // BINDLESS + pbr_bindings::emissive_texture, + pbr_bindings::emissive_sampler, +#endif // BINDLESS +#ifdef STANDARD_MATERIAL_EMISSIVE_UV_B + uv_b, +#else + uv, +#endif +#ifdef MESHLET_MESH_MATERIAL_PASS + bias.ddx_uv, + bias.ddy_uv, +#else // MESHLET_MESH_MATERIAL_PASS + bias.mip_bias, +#endif // MESHLET_MESH_MATERIAL_PASS + ).rgb, + emissive.a); + } +#endif + pbr_input.material.emissive = emissive; + + // metallic and perceptual roughness +#ifdef BINDLESS + var metallic: f32 = pbr_bindings::material_array[material_indices[slot].material].metallic; + var perceptual_roughness: f32 = pbr_bindings::material_array[material_indices[slot].material].perceptual_roughness; +#else // BINDLESS + var metallic: f32 = pbr_bindings::material.metallic; + var perceptual_roughness: f32 = pbr_bindings::material.perceptual_roughness; +#endif // BINDLESS + +#ifdef VERTEX_UVS + if ((flags & pbr_types::STANDARD_MATERIAL_FLAGS_METALLIC_ROUGHNESS_TEXTURE_BIT) != 0u) { + let metallic_roughness = +#ifdef MESHLET_MESH_MATERIAL_PASS + textureSampleGrad( +#else // MESHLET_MESH_MATERIAL_PASS + textureSampleBias( +#endif // MESHLET_MESH_MATERIAL_PASS +#ifdef BINDLESS + bindless_textures_2d[material_indices[slot].metallic_roughness_texture], + bindless_samplers_filtering[material_indices[slot].metallic_roughness_sampler], +#else // BINDLESS + pbr_bindings::metallic_roughness_texture, + pbr_bindings::metallic_roughness_sampler, +#endif // BINDLESS +#ifdef STANDARD_MATERIAL_METALLIC_ROUGHNESS_UV_B + uv_b, +#else + uv, +#endif +#ifdef MESHLET_MESH_MATERIAL_PASS + bias.ddx_uv, + bias.ddy_uv, +#else // MESHLET_MESH_MATERIAL_PASS + bias.mip_bias, +#endif // MESHLET_MESH_MATERIAL_PASS + ); + // Sampling from GLTF standard channels for now + metallic *= metallic_roughness.b; + perceptual_roughness *= metallic_roughness.g; + } +#endif + pbr_input.material.metallic = metallic; + pbr_input.material.perceptual_roughness = perceptual_roughness; + + // Clearcoat factor +#ifdef BINDLESS + pbr_input.material.clearcoat = + pbr_bindings::material_array[material_indices[slot].material].clearcoat; +#else // BINDLESS + pbr_input.material.clearcoat = pbr_bindings::material.clearcoat; +#endif // BINDLESS + +#ifdef VERTEX_UVS +#ifdef PBR_MULTI_LAYER_MATERIAL_TEXTURES_SUPPORTED + if ((flags & pbr_types::STANDARD_MATERIAL_FLAGS_CLEARCOAT_TEXTURE_BIT) != 0u) { + pbr_input.material.clearcoat *= +#ifdef MESHLET_MESH_MATERIAL_PASS + textureSampleGrad( +#else // MESHLET_MESH_MATERIAL_PASS + textureSampleBias( +#endif // MESHLET_MESH_MATERIAL_PASS +#ifdef BINDLESS + bindless_textures_2d[material_indices[slot].clearcoat_texture], + bindless_samplers_filtering[material_indices[slot].clearcoat_sampler], +#else // BINDLESS + pbr_bindings::clearcoat_texture, + pbr_bindings::clearcoat_sampler, +#endif // BINDLESS +#ifdef STANDARD_MATERIAL_CLEARCOAT_UV_B + uv_b, +#else + uv, +#endif +#ifdef MESHLET_MESH_MATERIAL_PASS + bias.ddx_uv, + bias.ddy_uv, +#else // MESHLET_MESH_MATERIAL_PASS + bias.mip_bias, +#endif // MESHLET_MESH_MATERIAL_PASS + ).r; + } +#endif // PBR_MULTI_LAYER_MATERIAL_TEXTURES_SUPPORTED +#endif // VERTEX_UVS + + // Clearcoat roughness +#ifdef BINDLESS + pbr_input.material.clearcoat_perceptual_roughness = + pbr_bindings::material_array[material_indices[slot].material].clearcoat_perceptual_roughness; +#else // BINDLESS + pbr_input.material.clearcoat_perceptual_roughness = + pbr_bindings::material.clearcoat_perceptual_roughness; +#endif // BINDLESS + +#ifdef VERTEX_UVS +#ifdef PBR_MULTI_LAYER_MATERIAL_TEXTURES_SUPPORTED + if ((flags & pbr_types::STANDARD_MATERIAL_FLAGS_CLEARCOAT_ROUGHNESS_TEXTURE_BIT) != 0u) { + pbr_input.material.clearcoat_perceptual_roughness *= +#ifdef MESHLET_MESH_MATERIAL_PASS + textureSampleGrad( +#else // MESHLET_MESH_MATERIAL_PASS + textureSampleBias( +#endif // MESHLET_MESH_MATERIAL_PASS +#ifdef BINDLESS + bindless_textures_2d[material_indices[slot].clearcoat_roughness_texture], + bindless_samplers_filtering[material_indices[slot].clearcoat_roughness_sampler], +#else // BINDLESS + pbr_bindings::clearcoat_roughness_texture, + pbr_bindings::clearcoat_roughness_sampler, +#endif // BINDLESS +#ifdef STANDARD_MATERIAL_CLEARCOAT_ROUGHNESS_UV_B + uv_b, +#else + uv, +#endif +#ifdef MESHLET_MESH_MATERIAL_PASS + bias.ddx_uv, + bias.ddy_uv, +#else // MESHLET_MESH_MATERIAL_PASS + bias.mip_bias, +#endif // MESHLET_MESH_MATERIAL_PASS + ).g; + } +#endif // PBR_MULTI_LAYER_MATERIAL_TEXTURES_SUPPORTED +#endif // VERTEX_UVS + +#ifdef BINDLESS + var specular_transmission: f32 = pbr_bindings::material_array[slot].specular_transmission; +#else // BINDLESS + var specular_transmission: f32 = pbr_bindings::material.specular_transmission; +#endif // BINDLESS + +#ifdef VERTEX_UVS +#ifdef PBR_TRANSMISSION_TEXTURES_SUPPORTED + if ((flags & pbr_types::STANDARD_MATERIAL_FLAGS_SPECULAR_TRANSMISSION_TEXTURE_BIT) != 0u) { + specular_transmission *= +#ifdef MESHLET_MESH_MATERIAL_PASS + textureSampleGrad( +#else // MESHLET_MESH_MATERIAL_PASS + textureSampleBias( +#endif // MESHLET_MESH_MATERIAL_PASS +#ifdef BINDLESS + bindless_textures_2d[ + material_indices[slot].specular_transmission_texture + ], + bindless_samplers_filtering[ + material_indices[slot].specular_transmission_sampler + ], +#else // BINDLESS + pbr_bindings::specular_transmission_texture, + pbr_bindings::specular_transmission_sampler, +#endif // BINDLESS +#ifdef STANDARD_MATERIAL_SPECULAR_TRANSMISSION_UV_B + uv_b, +#else + uv, +#endif +#ifdef MESHLET_MESH_MATERIAL_PASS + bias.ddx_uv, + bias.ddy_uv, +#else // MESHLET_MESH_MATERIAL_PASS + bias.mip_bias, +#endif // MESHLET_MESH_MATERIAL_PASS + ).r; + } +#endif +#endif + pbr_input.material.specular_transmission = specular_transmission; + +#ifdef BINDLESS + var thickness: f32 = pbr_bindings::material_array[material_indices[slot].material].thickness; +#else // BINDLESS + var thickness: f32 = pbr_bindings::material.thickness; +#endif // BINDLESS + +#ifdef VERTEX_UVS +#ifdef PBR_TRANSMISSION_TEXTURES_SUPPORTED + if ((flags & pbr_types::STANDARD_MATERIAL_FLAGS_THICKNESS_TEXTURE_BIT) != 0u) { + thickness *= +#ifdef MESHLET_MESH_MATERIAL_PASS + textureSampleGrad( +#else // MESHLET_MESH_MATERIAL_PASS + textureSampleBias( +#endif // MESHLET_MESH_MATERIAL_PASS +#ifdef BINDLESS + bindless_textures_2d[material_indices[slot].thickness_texture], + bindless_samplers_filtering[material_indices[slot].thickness_sampler], +#else // BINDLESS + pbr_bindings::thickness_texture, + pbr_bindings::thickness_sampler, +#endif // BINDLESS +#ifdef STANDARD_MATERIAL_THICKNESS_UV_B + uv_b, +#else + uv, +#endif +#ifdef MESHLET_MESH_MATERIAL_PASS + bias.ddx_uv, + bias.ddy_uv, +#else // MESHLET_MESH_MATERIAL_PASS + bias.mip_bias, +#endif // MESHLET_MESH_MATERIAL_PASS + ).g; + } +#endif +#endif + // scale thickness, accounting for non-uniform scaling (e.g. a “squished” mesh) + // TODO: Meshlet support +#ifndef MESHLET_MESH_MATERIAL_PASS + thickness *= length( + (transpose(mesh[in.instance_index].world_from_local) * vec4(pbr_input.N, 0.0)).xyz + ); +#endif + pbr_input.material.thickness = thickness; + +#ifdef BINDLESS + var diffuse_transmission = + pbr_bindings::material_array[material_indices[slot].material].diffuse_transmission; +#else // BINDLESS + var diffuse_transmission = pbr_bindings::material.diffuse_transmission; +#endif // BINDLESS + +#ifdef VERTEX_UVS +#ifdef PBR_TRANSMISSION_TEXTURES_SUPPORTED + if ((flags & pbr_types::STANDARD_MATERIAL_FLAGS_DIFFUSE_TRANSMISSION_TEXTURE_BIT) != 0u) { + diffuse_transmission *= +#ifdef MESHLET_MESH_MATERIAL_PASS + textureSampleGrad( +#else // MESHLET_MESH_MATERIAL_PASS + textureSampleBias( +#endif // MESHLET_MESH_MATERIAL_PASS +#ifdef BINDLESS + bindless_textures_2d[material_indices[slot].diffuse_transmission_texture], + bindless_samplers_filtering[material_indices[slot].diffuse_transmission_sampler], +#else // BINDLESS + pbr_bindings::diffuse_transmission_texture, + pbr_bindings::diffuse_transmission_sampler, +#endif // BINDLESS +#ifdef STANDARD_MATERIAL_DIFFUSE_TRANSMISSION_UV_B + uv_b, +#else + uv, +#endif +#ifdef MESHLET_MESH_MATERIAL_PASS + bias.ddx_uv, + bias.ddy_uv, +#else // MESHLET_MESH_MATERIAL_PASS + bias.mip_bias, +#endif // MESHLET_MESH_MATERIAL_PASS + ).a; + } +#endif +#endif + pbr_input.material.diffuse_transmission = diffuse_transmission; + + var diffuse_occlusion: vec3 = vec3(1.0); + var specular_occlusion: f32 = 1.0; +#ifdef VERTEX_UVS + if ((flags & pbr_types::STANDARD_MATERIAL_FLAGS_OCCLUSION_TEXTURE_BIT) != 0u) { + diffuse_occlusion *= +#ifdef MESHLET_MESH_MATERIAL_PASS + textureSampleGrad( +#else // MESHLET_MESH_MATERIAL_PASS + textureSampleBias( +#endif // MESHLET_MESH_MATERIAL_PASS +#ifdef BINDLESS + bindless_textures_2d[material_indices[slot].occlusion_texture], + bindless_samplers_filtering[material_indices[slot].occlusion_sampler], +#else // BINDLESS + pbr_bindings::occlusion_texture, + pbr_bindings::occlusion_sampler, +#endif // BINDLESS +#ifdef STANDARD_MATERIAL_OCCLUSION_UV_B + uv_b, +#else + uv, +#endif +#ifdef MESHLET_MESH_MATERIAL_PASS + bias.ddx_uv, + bias.ddy_uv, +#else // MESHLET_MESH_MATERIAL_PASS + bias.mip_bias, +#endif // MESHLET_MESH_MATERIAL_PASS + ).r; + } +#endif +#ifdef SCREEN_SPACE_AMBIENT_OCCLUSION + let ssao = textureLoad(screen_space_ambient_occlusion_texture, vec2(in.position.xy), 0i).r; + let ssao_multibounce = ssao_multibounce(ssao, pbr_input.material.base_color.rgb); + diffuse_occlusion = min(diffuse_occlusion, ssao_multibounce); + // Use SSAO to estimate the specular occlusion. + // Lagarde and Rousiers 2014, "Moving Frostbite to Physically Based Rendering" + let roughness = lighting::perceptualRoughnessToRoughness(pbr_input.material.perceptual_roughness); + specular_occlusion = saturate(pow(NdotV + ssao, exp2(-16.0 * roughness - 1.0)) - 1.0 + ssao); +#endif + pbr_input.diffuse_occlusion = diffuse_occlusion; + pbr_input.specular_occlusion = specular_occlusion; + + // N (normal vector) +#ifndef LOAD_PREPASS_NORMALS + + pbr_input.N = normalize(pbr_input.world_normal); + pbr_input.clearcoat_N = pbr_input.N; + +#ifdef VERTEX_UVS +#ifdef VERTEX_TANGENTS + + let TBN = pbr_functions::calculate_tbn_mikktspace(pbr_input.world_normal, in.world_tangent); + +#ifdef STANDARD_MATERIAL_NORMAL_MAP + + let Nt = +#ifdef MESHLET_MESH_MATERIAL_PASS + textureSampleGrad( +#else // MESHLET_MESH_MATERIAL_PASS + textureSampleBias( +#endif // MESHLET_MESH_MATERIAL_PASS +#ifdef BINDLESS + bindless_textures_2d[material_indices[slot].normal_map_texture], + bindless_samplers_filtering[material_indices[slot].normal_map_sampler], +#else // BINDLESS + pbr_bindings::normal_map_texture, + pbr_bindings::normal_map_sampler, +#endif // BINDLESS +#ifdef STANDARD_MATERIAL_NORMAL_MAP_UV_B + uv_b, +#else + uv, +#endif +#ifdef MESHLET_MESH_MATERIAL_PASS + bias.ddx_uv, + bias.ddy_uv, +#else // MESHLET_MESH_MATERIAL_PASS + bias.mip_bias, +#endif // MESHLET_MESH_MATERIAL_PASS + ).rgb; + + pbr_input.N = pbr_functions::apply_normal_mapping(flags, TBN, double_sided, is_front, Nt); + +#endif // STANDARD_MATERIAL_NORMAL_MAP + +#ifdef STANDARD_MATERIAL_CLEARCOAT + + // Note: `KHR_materials_clearcoat` specifies that, if there's no + // clearcoat normal map, we must set the normal to the mesh's normal, + // and not to the main layer's bumped normal. + +#ifdef STANDARD_MATERIAL_CLEARCOAT_NORMAL_MAP + + let clearcoat_Nt = +#ifdef MESHLET_MESH_MATERIAL_PASS + textureSampleGrad( +#else // MESHLET_MESH_MATERIAL_PASS + textureSampleBias( +#endif // MESHLET_MESH_MATERIAL_PASS +#ifdef BINDLESS + bindless_textures_2d[material_indices[slot].clearcoat_normal_texture], + bindless_samplers_filtering[material_indices[slot].clearcoat_normal_sampler], +#else // BINDLESS + pbr_bindings::clearcoat_normal_texture, + pbr_bindings::clearcoat_normal_sampler, +#endif // BINDLESS +#ifdef STANDARD_MATERIAL_CLEARCOAT_NORMAL_UV_B + uv_b, +#else + uv, +#endif +#ifdef MESHLET_MESH_MATERIAL_PASS + bias.ddx_uv, + bias.ddy_uv, +#else // MESHLET_MESH_MATERIAL_PASS + bias.mip_bias, +#endif // MESHLET_MESH_MATERIAL_PASS + ).rgb; + + pbr_input.clearcoat_N = pbr_functions::apply_normal_mapping( + flags, + TBN, + double_sided, + is_front, + clearcoat_Nt, + ); + +#endif // STANDARD_MATERIAL_CLEARCOAT_NORMAL_MAP + +#endif // STANDARD_MATERIAL_CLEARCOAT + +#endif // VERTEX_TANGENTS +#endif // VERTEX_UVS + + // Take anisotropy into account. + // + // This code comes from the `KHR_materials_anisotropy` spec: + // +#ifdef PBR_ANISOTROPY_TEXTURE_SUPPORTED +#ifdef VERTEX_TANGENTS +#ifdef STANDARD_MATERIAL_ANISOTROPY + +#ifdef BINDLESS + var anisotropy_strength = + pbr_bindings::material_array[material_indices[slot].material].anisotropy_strength; + var anisotropy_direction = + pbr_bindings::material_array[material_indices[slot].material].anisotropy_rotation; +#else // BINDLESS + var anisotropy_strength = pbr_bindings::material.anisotropy_strength; + var anisotropy_direction = pbr_bindings::material.anisotropy_rotation; +#endif // BINDLESS + + // Adjust based on the anisotropy map if there is one. + if ((flags & pbr_types::STANDARD_MATERIAL_FLAGS_ANISOTROPY_TEXTURE_BIT) != 0u) { + let anisotropy_texel = +#ifdef MESHLET_MESH_MATERIAL_PASS + textureSampleGrad( +#else // MESHLET_MESH_MATERIAL_PASS + textureSampleBias( +#endif // MESHLET_MESH_MATERIAL_PASS +#ifdef BINDLESS + bindless_textures_2d[material_indices[slot].anisotropy_texture], + bindless_samplers_filtering[material_indices[slot].anisotropy_sampler], +#else // BINDLESS + pbr_bindings::anisotropy_texture, + pbr_bindings::anisotropy_sampler, +#endif +#ifdef STANDARD_MATERIAL_ANISOTROPY_UV_B + uv_b, +#else // STANDARD_MATERIAL_ANISOTROPY_UV_B + uv, +#endif // STANDARD_MATERIAL_ANISOTROPY_UV_B +#ifdef MESHLET_MESH_MATERIAL_PASS + bias.ddx_uv, + bias.ddy_uv, +#else // MESHLET_MESH_MATERIAL_PASS + bias.mip_bias, +#endif // MESHLET_MESH_MATERIAL_PASS + ).rgb; + + let anisotropy_direction_from_texture = normalize(anisotropy_texel.rg * 2.0 - 1.0); + // Rotate by the anisotropy direction. + anisotropy_direction = + mat2x2(anisotropy_direction.xy, anisotropy_direction.yx * vec2(-1.0, 1.0)) * + anisotropy_direction_from_texture; + anisotropy_strength *= anisotropy_texel.b; + } + + pbr_input.anisotropy_strength = anisotropy_strength; + + let anisotropy_T = normalize(TBN * vec3(anisotropy_direction, 0.0)); + let anisotropy_B = normalize(cross(pbr_input.world_normal, anisotropy_T)); + pbr_input.anisotropy_T = anisotropy_T; + pbr_input.anisotropy_B = anisotropy_B; + +#endif // STANDARD_MATERIAL_ANISOTROPY +#endif // VERTEX_TANGENTS +#endif // PBR_ANISOTROPY_TEXTURE_SUPPORTED + +#endif // LOAD_PREPASS_NORMALS + +// TODO: Meshlet support +#ifdef LIGHTMAP + +#ifdef BINDLESS + let lightmap_exposure = + pbr_bindings::material_array[material_indices[slot].material].lightmap_exposure; +#else // BINDLESS + let lightmap_exposure = pbr_bindings::material.lightmap_exposure; +#endif // BINDLESS + + pbr_input.lightmap_light = lightmap(in.uv_b, lightmap_exposure, in.instance_index); +#endif + } + + return pbr_input; +} diff --git a/crates/libmarathon/src/render/pbr/render/pbr_functions.wgsl b/crates/libmarathon/src/render/pbr/render/pbr_functions.wgsl new file mode 100644 index 0000000..2c86295 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/pbr_functions.wgsl @@ -0,0 +1,883 @@ +#define_import_path bevy_pbr::pbr_functions + +#import bevy_pbr::{ + pbr_types, + pbr_bindings, + mesh_view_bindings as view_bindings, + mesh_view_types, + lighting, + lighting::{LAYER_BASE, LAYER_CLEARCOAT}, + transmission, + clustered_forward as clustering, + shadows, + ambient, + irradiance_volume, + mesh_types::{MESH_FLAGS_SHADOW_RECEIVER_BIT, MESH_FLAGS_TRANSMITTED_SHADOW_RECEIVER_BIT}, +} +#import bevy_render::maths::{E, powsafe} + +#ifdef MESHLET_MESH_MATERIAL_PASS +#import bevy_pbr::meshlet_visibility_buffer_resolve::VertexOutput +#else ifdef PREPASS_PIPELINE +#import bevy_pbr::prepass_io::VertexOutput +#else // PREPASS_PIPELINE +#import bevy_pbr::forward_io::VertexOutput +#endif // PREPASS_PIPELINE + +#ifdef ENVIRONMENT_MAP +#import bevy_pbr::environment_map +#endif + +#ifdef TONEMAP_IN_SHADER +#import bevy_core_pipeline::tonemapping::{tone_mapping, screen_space_dither} +#endif + + +// Biasing info needed to sample from a texture. How this is done depends on +// whether we're rendering meshlets or regular meshes. +struct SampleBias { +#ifdef MESHLET_MESH_MATERIAL_PASS + ddx_uv: vec2, + ddy_uv: vec2, +#else // MESHLET_MESH_MATERIAL_PASS + mip_bias: f32, +#endif // MESHLET_MESH_MATERIAL_PASS +} + +// This is the standard 4x4 ordered dithering pattern from [1]. +// +// We can't use `array, 4>` because they can't be indexed dynamically +// due to Naga limitations. So instead we pack into a single `vec4` and extract +// individual bytes. +// +// [1]: https://en.wikipedia.org/wiki/Ordered_dithering#Threshold_map +const DITHER_THRESHOLD_MAP: vec4 = vec4( + 0x0a020800, + 0x060e040c, + 0x09010b03, + 0x050d070f +); + +// Processes a visibility range dither value and discards the fragment if +// needed. +// +// Visibility ranges, also known as HLODs, are crossfades between different +// levels of detail. +// +// The `dither` value ranges from [-16, 16]. When zooming out, positive values +// are used for meshes that are in the process of disappearing, while negative +// values are used for meshes that are in the process of appearing. In other +// words, when the camera is moving backwards, the `dither` value counts up from +// -16 to 0 when the object is fading in, stays at 0 while the object is +// visible, and then counts up to 16 while the object is fading out. +// Distinguishing between negative and positive values allows the dither +// patterns for different LOD levels of a single mesh to mesh together properly. +#ifdef VISIBILITY_RANGE_DITHER +fn visibility_range_dither(frag_coord: vec4, dither: i32) { + // If `dither` is 0, the object is visible. + if (dither == 0) { + return; + } + + // If `dither` is less than -15 or greater than 15, the object is culled. + if (dither <= -16 || dither >= 16) { + discard; + } + + // Otherwise, check the dither pattern. + let coords = vec2(floor(frag_coord.xy)) % 4u; + let threshold = i32((DITHER_THRESHOLD_MAP[coords.y] >> (coords.x * 8)) & 0xff); + if ((dither >= 0 && dither + threshold >= 16) || (dither < 0 && 1 + dither + threshold <= 0)) { + discard; + } +} +#endif + +fn alpha_discard(material: pbr_types::StandardMaterial, output_color: vec4) -> vec4 { + var color = output_color; + let alpha_mode = material.flags & pbr_types::STANDARD_MATERIAL_FLAGS_ALPHA_MODE_RESERVED_BITS; + if alpha_mode == pbr_types::STANDARD_MATERIAL_FLAGS_ALPHA_MODE_OPAQUE { + // NOTE: If rendering as opaque, alpha should be ignored so set to 1.0 + color.a = 1.0; + } + +#ifdef MAY_DISCARD + // NOTE: `MAY_DISCARD` is only defined in the alpha to coverage case if MSAA + // was off. This special situation causes alpha to coverage to fall back to + // alpha mask. + else if alpha_mode == pbr_types::STANDARD_MATERIAL_FLAGS_ALPHA_MODE_MASK || + alpha_mode == pbr_types::STANDARD_MATERIAL_FLAGS_ALPHA_MODE_ALPHA_TO_COVERAGE { + if color.a >= material.alpha_cutoff { + // NOTE: If rendering as masked alpha and >= the cutoff, render as fully opaque + color.a = 1.0; + } else { + // NOTE: output_color.a < in.material.alpha_cutoff should not be rendered + discard; + } + } +#endif + + return color; +} + +fn prepare_world_normal( + world_normal: vec3, + double_sided: bool, + is_front: bool, +) -> vec3 { + var output: vec3 = world_normal; +#ifndef VERTEX_TANGENTS +#ifndef STANDARD_MATERIAL_NORMAL_MAP + // NOTE: When NOT using normal-mapping, if looking at the back face of a double-sided + // material, the normal needs to be inverted. This is a branchless version of that. + output = (f32(!double_sided || is_front) * 2.0 - 1.0) * output; +#endif +#endif + return output; +} + +// Calculates the three TBN vectors according to [mikktspace]. Returns a matrix +// with T, B, N columns in that order. +// +// [mikktspace]: http://www.mikktspace.com/ +fn calculate_tbn_mikktspace(world_normal: vec3, world_tangent: vec4) -> mat3x3 { + // NOTE: The mikktspace method of normal mapping explicitly requires that the world normal NOT + // be re-normalized in the fragment shader. This is primarily to match the way mikktspace + // bakes vertex tangents and normal maps so that this is the exact inverse. Blender, Unity, + // Unreal Engine, Godot, and more all use the mikktspace method. Do not change this code + // unless you really know what you are doing. + // http://www.mikktspace.com/ + var N: vec3 = world_normal; + + // NOTE: The mikktspace method of normal mapping explicitly requires that these NOT be + // normalized nor any Gram-Schmidt applied to ensure the vertex normal is orthogonal to the + // vertex tangent! Do not change this code unless you really know what you are doing. + // http://www.mikktspace.com/ + var T: vec3 = world_tangent.xyz; + var B: vec3 = world_tangent.w * cross(N, T); + +#ifdef MESHLET_MESH_MATERIAL_PASS + // https://www.jeremyong.com/graphics/2023/12/16/surface-gradient-bump-mapping/#a-note-on-mikktspace-usage + let inverse_length_n = 1.0 / length(N); + T *= inverse_length_n; + B *= inverse_length_n; + N *= inverse_length_n; +#endif + + return mat3x3(T, B, N); +} + +fn apply_normal_mapping( + standard_material_flags: u32, + TBN: mat3x3, + double_sided: bool, + is_front: bool, + in_Nt: vec3, +) -> vec3 { + // Unpack the TBN vectors. + var T = TBN[0]; + var B = TBN[1]; + var N = TBN[2]; + + // Nt is the tangent-space normal. + var Nt = in_Nt; + if (standard_material_flags & pbr_types::STANDARD_MATERIAL_FLAGS_TWO_COMPONENT_NORMAL_MAP) != 0u { + // Only use the xy components and derive z for 2-component normal maps. + Nt = vec3(Nt.rg * 2.0 - 1.0, 0.0); + Nt.z = sqrt(1.0 - Nt.x * Nt.x - Nt.y * Nt.y); + } else { + Nt = Nt * 2.0 - 1.0; + } + // Normal maps authored for DirectX require flipping the y component + if (standard_material_flags & pbr_types::STANDARD_MATERIAL_FLAGS_FLIP_NORMAL_MAP_Y) != 0u { + Nt.y = -Nt.y; + } + + if double_sided && !is_front { + Nt = -Nt; + } + + // NOTE: The mikktspace method of normal mapping applies maps the tangent-space normal from + // the normal map texture in this way to be an EXACT inverse of how the normal map baker + // calculates the normal maps so there is no error introduced. Do not change this code + // unless you really know what you are doing. + // http://www.mikktspace.com/ + N = Nt.x * T + Nt.y * B + Nt.z * N; + + return normalize(N); +} + +#ifdef STANDARD_MATERIAL_ANISOTROPY + +// Modifies the normal to achieve a better approximate direction from the +// environment map when using anisotropy. +// +// This follows the suggested implementation in the `KHR_materials_anisotropy` specification: +// https://github.com/KhronosGroup/glTF/blob/main/extensions/2.0/Khronos/KHR_materials_anisotropy/README.md#image-based-lighting +fn bend_normal_for_anisotropy(lighting_input: ptr) { + // Unpack. + let N = (*lighting_input).layers[LAYER_BASE].N; + let roughness = (*lighting_input).layers[LAYER_BASE].roughness; + let V = (*lighting_input).V; + let anisotropy = (*lighting_input).anisotropy; + let Ba = (*lighting_input).Ba; + + var bent_normal = normalize(cross(cross(Ba, V), Ba)); + + // The `KHR_materials_anisotropy` spec states: + // + // > This heuristic can probably be improved upon + let a = pow(2.0, pow(2.0, 1.0 - anisotropy * (1.0 - roughness))); + bent_normal = normalize(mix(bent_normal, N, a)); + + // The `KHR_materials_anisotropy` spec states: + // + // > Mixing the reflection with the normal is more accurate both with and + // > without anisotropy and keeps rough objects from gathering light from + // > behind their tangent plane. + let R = normalize(mix(reflect(-V, bent_normal), bent_normal, roughness * roughness)); + + (*lighting_input).layers[LAYER_BASE].N = bent_normal; + (*lighting_input).layers[LAYER_BASE].R = R; +} + +#endif // STANDARD_MATERIAL_ANISOTROPY + +// NOTE: Correctly calculates the view vector depending on whether +// the projection is orthographic or perspective. +fn calculate_view( + world_position: vec4, + is_orthographic: bool, +) -> vec3 { + var V: vec3; + if is_orthographic { + // Orthographic view vector + V = normalize(vec3(view_bindings::view.clip_from_world[0].z, view_bindings::view.clip_from_world[1].z, view_bindings::view.clip_from_world[2].z)); + } else { + // Only valid for a perspective projection + V = normalize(view_bindings::view.world_position.xyz - world_position.xyz); + } + return V; +} + +// Diffuse strength is inversely related to metallicity, specular and diffuse transmission +fn calculate_diffuse_color( + base_color: vec3, + metallic: f32, + specular_transmission: f32, + diffuse_transmission: f32 +) -> vec3 { + return base_color * (1.0 - metallic) * (1.0 - specular_transmission) * + (1.0 - diffuse_transmission); +} + +// Remapping [0,1] reflectance to F0 +// See https://google.github.io/filament/Filament.html#materialsystem/parameterization/remapping +fn calculate_F0(base_color: vec3, metallic: f32, reflectance: vec3) -> vec3 { + return 0.16 * reflectance * reflectance * (1.0 - metallic) + base_color * metallic; +} + +#ifndef PREPASS_FRAGMENT +fn apply_pbr_lighting( + in: pbr_types::PbrInput, +) -> vec4 { + var output_color: vec4 = in.material.base_color; + + let emissive = in.material.emissive; + + // calculate non-linear roughness from linear perceptualRoughness + let metallic = in.material.metallic; + let perceptual_roughness = in.material.perceptual_roughness; + let roughness = lighting::perceptualRoughnessToRoughness(perceptual_roughness); + let ior = in.material.ior; + let thickness = in.material.thickness; + let reflectance = in.material.reflectance; + let diffuse_transmission = in.material.diffuse_transmission; + let specular_transmission = in.material.specular_transmission; + + let specular_transmissive_color = specular_transmission * in.material.base_color.rgb; + + let diffuse_occlusion = in.diffuse_occlusion; + let specular_occlusion = in.specular_occlusion; + + // Neubelt and Pettineo 2013, "Crafting a Next-gen Material Pipeline for The Order: 1886" + let NdotV = max(dot(in.N, in.V), 0.0001); + let R = reflect(-in.V, in.N); + +#ifdef STANDARD_MATERIAL_CLEARCOAT + // Do the above calculations again for the clearcoat layer. Remember that + // the clearcoat can have its own roughness and its own normal. + let clearcoat = in.material.clearcoat; + let clearcoat_perceptual_roughness = in.material.clearcoat_perceptual_roughness; + let clearcoat_roughness = lighting::perceptualRoughnessToRoughness(clearcoat_perceptual_roughness); + let clearcoat_N = in.clearcoat_N; + let clearcoat_NdotV = max(dot(clearcoat_N, in.V), 0.0001); + let clearcoat_R = reflect(-in.V, clearcoat_N); +#endif // STANDARD_MATERIAL_CLEARCOAT + + let diffuse_color = calculate_diffuse_color( + output_color.rgb, + metallic, + specular_transmission, + diffuse_transmission + ); + + // Diffuse transmissive strength is inversely related to metallicity and specular transmission, but directly related to diffuse transmission + let diffuse_transmissive_color = output_color.rgb * (1.0 - metallic) * (1.0 - specular_transmission) * diffuse_transmission; + + // Calculate the world position of the second Lambertian lobe used for diffuse transmission, by subtracting material thickness + let diffuse_transmissive_lobe_world_position = in.world_position - vec4(in.world_normal, 0.0) * thickness; + + let F0 = calculate_F0(output_color.rgb, metallic, reflectance); + let F_ab = lighting::F_AB(perceptual_roughness, NdotV); + + var direct_light: vec3 = vec3(0.0); + + // Transmitted Light (Specular and Diffuse) + var transmitted_light: vec3 = vec3(0.0); + + // Pack all the values into a structure. + var lighting_input: lighting::LightingInput; + lighting_input.layers[LAYER_BASE].NdotV = NdotV; + lighting_input.layers[LAYER_BASE].N = in.N; + lighting_input.layers[LAYER_BASE].R = R; + lighting_input.layers[LAYER_BASE].perceptual_roughness = perceptual_roughness; + lighting_input.layers[LAYER_BASE].roughness = roughness; + lighting_input.P = in.world_position.xyz; + lighting_input.V = in.V; + lighting_input.diffuse_color = diffuse_color; + lighting_input.F0_ = F0; + lighting_input.F_ab = F_ab; +#ifdef STANDARD_MATERIAL_CLEARCOAT + lighting_input.layers[LAYER_CLEARCOAT].NdotV = clearcoat_NdotV; + lighting_input.layers[LAYER_CLEARCOAT].N = clearcoat_N; + lighting_input.layers[LAYER_CLEARCOAT].R = clearcoat_R; + lighting_input.layers[LAYER_CLEARCOAT].perceptual_roughness = clearcoat_perceptual_roughness; + lighting_input.layers[LAYER_CLEARCOAT].roughness = clearcoat_roughness; + lighting_input.clearcoat_strength = clearcoat; +#endif // STANDARD_MATERIAL_CLEARCOAT +#ifdef STANDARD_MATERIAL_ANISOTROPY + lighting_input.anisotropy = in.anisotropy_strength; + lighting_input.Ta = in.anisotropy_T; + lighting_input.Ba = in.anisotropy_B; +#endif // STANDARD_MATERIAL_ANISOTROPY + + // And do the same for transmissive if we need to. +#ifdef STANDARD_MATERIAL_DIFFUSE_TRANSMISSION + var transmissive_lighting_input: lighting::LightingInput; + transmissive_lighting_input.layers[LAYER_BASE].NdotV = 1.0; + transmissive_lighting_input.layers[LAYER_BASE].N = -in.N; + transmissive_lighting_input.layers[LAYER_BASE].R = vec3(0.0); + transmissive_lighting_input.layers[LAYER_BASE].perceptual_roughness = 1.0; + transmissive_lighting_input.layers[LAYER_BASE].roughness = 1.0; + transmissive_lighting_input.P = diffuse_transmissive_lobe_world_position.xyz; + transmissive_lighting_input.V = -in.V; + transmissive_lighting_input.diffuse_color = diffuse_transmissive_color; + transmissive_lighting_input.F0_ = vec3(0.0); + transmissive_lighting_input.F_ab = vec2(0.1); +#ifdef STANDARD_MATERIAL_CLEARCOAT + transmissive_lighting_input.layers[LAYER_CLEARCOAT].NdotV = 0.0; + transmissive_lighting_input.layers[LAYER_CLEARCOAT].N = vec3(0.0); + transmissive_lighting_input.layers[LAYER_CLEARCOAT].R = vec3(0.0); + transmissive_lighting_input.layers[LAYER_CLEARCOAT].perceptual_roughness = 0.0; + transmissive_lighting_input.layers[LAYER_CLEARCOAT].roughness = 0.0; + transmissive_lighting_input.clearcoat_strength = 0.0; +#endif // STANDARD_MATERIAL_CLEARCOAT +#ifdef STANDARD_MATERIAL_ANISOTROPY + transmissive_lighting_input.anisotropy = in.anisotropy_strength; + transmissive_lighting_input.Ta = in.anisotropy_T; + transmissive_lighting_input.Ba = in.anisotropy_B; +#endif // STANDARD_MATERIAL_ANISOTROPY +#endif // STANDARD_MATERIAL_DIFFUSE_TRANSMISSION + + let view_z = dot(vec4( + view_bindings::view.view_from_world[0].z, + view_bindings::view.view_from_world[1].z, + view_bindings::view.view_from_world[2].z, + view_bindings::view.view_from_world[3].z + ), in.world_position); + let cluster_index = clustering::fragment_cluster_index(in.frag_coord.xy, view_z, in.is_orthographic); + var clusterable_object_index_ranges = + clustering::unpack_clusterable_object_index_ranges(cluster_index); + + // Point lights (direct) + for (var i: u32 = clusterable_object_index_ranges.first_point_light_index_offset; + i < clusterable_object_index_ranges.first_spot_light_index_offset; + i = i + 1u) { + let light_id = clustering::get_clusterable_object_id(i); + + // If we're lightmapped, disable diffuse contribution from the light if + // requested, to avoid double-counting light. +#ifdef LIGHTMAP + let enable_diffuse = + (view_bindings::clusterable_objects.data[light_id].flags & + mesh_view_types::POINT_LIGHT_FLAGS_AFFECTS_LIGHTMAPPED_MESH_DIFFUSE_BIT) != 0u; +#else // LIGHTMAP + let enable_diffuse = true; +#endif // LIGHTMAP + + var shadow: f32 = 1.0; + if ((in.flags & MESH_FLAGS_SHADOW_RECEIVER_BIT) != 0u + && (view_bindings::clusterable_objects.data[light_id].flags & mesh_view_types::POINT_LIGHT_FLAGS_SHADOWS_ENABLED_BIT) != 0u) { + shadow = shadows::fetch_point_shadow(light_id, in.world_position, in.world_normal); + } + + let light_contrib = lighting::point_light(light_id, &lighting_input, enable_diffuse, true); + direct_light += light_contrib * shadow; + +#ifdef STANDARD_MATERIAL_DIFFUSE_TRANSMISSION + // NOTE: We use the diffuse transmissive color, the second Lambertian lobe's calculated + // world position, inverted normal and view vectors, and the following simplified + // values for a fully diffuse transmitted light contribution approximation: + // + // roughness = 1.0; + // NdotV = 1.0; + // R = vec3(0.0) // doesn't really matter + // F_ab = vec2(0.1) + // F0 = vec3(0.0) + var transmitted_shadow: f32 = 1.0; + if ((in.flags & (MESH_FLAGS_SHADOW_RECEIVER_BIT | MESH_FLAGS_TRANSMITTED_SHADOW_RECEIVER_BIT)) == (MESH_FLAGS_SHADOW_RECEIVER_BIT | MESH_FLAGS_TRANSMITTED_SHADOW_RECEIVER_BIT) + && (view_bindings::clusterable_objects.data[light_id].flags & mesh_view_types::POINT_LIGHT_FLAGS_SHADOWS_ENABLED_BIT) != 0u) { + transmitted_shadow = shadows::fetch_point_shadow(light_id, diffuse_transmissive_lobe_world_position, -in.world_normal); + } + + let transmitted_light_contrib = + lighting::point_light(light_id, &transmissive_lighting_input, enable_diffuse, true); + transmitted_light += transmitted_light_contrib * transmitted_shadow; +#endif + } + + // Spot lights (direct) + for (var i: u32 = clusterable_object_index_ranges.first_spot_light_index_offset; + i < clusterable_object_index_ranges.first_reflection_probe_index_offset; + i = i + 1u) { + let light_id = clustering::get_clusterable_object_id(i); + + // If we're lightmapped, disable diffuse contribution from the light if + // requested, to avoid double-counting light. +#ifdef LIGHTMAP + let enable_diffuse = + (view_bindings::clusterable_objects.data[light_id].flags & + mesh_view_types::POINT_LIGHT_FLAGS_AFFECTS_LIGHTMAPPED_MESH_DIFFUSE_BIT) != 0u; +#else // LIGHTMAP + let enable_diffuse = true; +#endif // LIGHTMAP + + var shadow: f32 = 1.0; + if ((in.flags & MESH_FLAGS_SHADOW_RECEIVER_BIT) != 0u + && (view_bindings::clusterable_objects.data[light_id].flags & + mesh_view_types::POINT_LIGHT_FLAGS_SHADOWS_ENABLED_BIT) != 0u) { + shadow = shadows::fetch_spot_shadow( + light_id, + in.world_position, + in.world_normal, + view_bindings::clusterable_objects.data[light_id].shadow_map_near_z, + ); + } + + let light_contrib = lighting::spot_light(light_id, &lighting_input, enable_diffuse); + direct_light += light_contrib * shadow; + +#ifdef STANDARD_MATERIAL_DIFFUSE_TRANSMISSION + // NOTE: We use the diffuse transmissive color, the second Lambertian lobe's calculated + // world position, inverted normal and view vectors, and the following simplified + // values for a fully diffuse transmitted light contribution approximation: + // + // roughness = 1.0; + // NdotV = 1.0; + // R = vec3(0.0) // doesn't really matter + // F_ab = vec2(0.1) + // F0 = vec3(0.0) + var transmitted_shadow: f32 = 1.0; + if ((in.flags & (MESH_FLAGS_SHADOW_RECEIVER_BIT | MESH_FLAGS_TRANSMITTED_SHADOW_RECEIVER_BIT)) == (MESH_FLAGS_SHADOW_RECEIVER_BIT | MESH_FLAGS_TRANSMITTED_SHADOW_RECEIVER_BIT) + && (view_bindings::clusterable_objects.data[light_id].flags & mesh_view_types::POINT_LIGHT_FLAGS_SHADOWS_ENABLED_BIT) != 0u) { + transmitted_shadow = shadows::fetch_spot_shadow( + light_id, + diffuse_transmissive_lobe_world_position, + -in.world_normal, + view_bindings::clusterable_objects.data[light_id].shadow_map_near_z, + ); + } + + let transmitted_light_contrib = + lighting::spot_light(light_id, &transmissive_lighting_input, enable_diffuse); + transmitted_light += transmitted_light_contrib * transmitted_shadow; +#endif + } + + // directional lights (direct) + let n_directional_lights = view_bindings::lights.n_directional_lights; + for (var i: u32 = 0u; i < n_directional_lights; i = i + 1u) { + // check if this light should be skipped, which occurs if this light does not intersect with the view + // note point and spot lights aren't skippable, as the relevant lights are filtered in `assign_lights_to_clusters` + let light = &view_bindings::lights.directional_lights[i]; + + // If we're lightmapped, disable diffuse contribution from the light if + // requested, to avoid double-counting light. +#ifdef LIGHTMAP + let enable_diffuse = + ((*light).flags & + mesh_view_types::DIRECTIONAL_LIGHT_FLAGS_AFFECTS_LIGHTMAPPED_MESH_DIFFUSE_BIT) != + 0u; +#else // LIGHTMAP + let enable_diffuse = true; +#endif // LIGHTMAP + + var shadow: f32 = 1.0; + if ((in.flags & MESH_FLAGS_SHADOW_RECEIVER_BIT) != 0u + && (view_bindings::lights.directional_lights[i].flags & mesh_view_types::DIRECTIONAL_LIGHT_FLAGS_SHADOWS_ENABLED_BIT) != 0u) { + shadow = shadows::fetch_directional_shadow(i, in.world_position, in.world_normal, view_z); + } + + var light_contrib = lighting::directional_light(i, &lighting_input, enable_diffuse); + +#ifdef DIRECTIONAL_LIGHT_SHADOW_MAP_DEBUG_CASCADES + light_contrib = shadows::cascade_debug_visualization(light_contrib, i, view_z); +#endif + direct_light += light_contrib * shadow; + +#ifdef STANDARD_MATERIAL_DIFFUSE_TRANSMISSION + // NOTE: We use the diffuse transmissive color, the second Lambertian lobe's calculated + // world position, inverted normal and view vectors, and the following simplified + // values for a fully diffuse transmitted light contribution approximation: + // + // roughness = 1.0; + // NdotV = 1.0; + // R = vec3(0.0) // doesn't really matter + // F_ab = vec2(0.1) + // F0 = vec3(0.0) + var transmitted_shadow: f32 = 1.0; + if ((in.flags & (MESH_FLAGS_SHADOW_RECEIVER_BIT | MESH_FLAGS_TRANSMITTED_SHADOW_RECEIVER_BIT)) == (MESH_FLAGS_SHADOW_RECEIVER_BIT | MESH_FLAGS_TRANSMITTED_SHADOW_RECEIVER_BIT) + && (view_bindings::lights.directional_lights[i].flags & mesh_view_types::DIRECTIONAL_LIGHT_FLAGS_SHADOWS_ENABLED_BIT) != 0u) { + transmitted_shadow = shadows::fetch_directional_shadow(i, diffuse_transmissive_lobe_world_position, -in.world_normal, view_z); + } + + let transmitted_light_contrib = + lighting::directional_light(i, &transmissive_lighting_input, enable_diffuse); + transmitted_light += transmitted_light_contrib * transmitted_shadow; +#endif + } + +#ifdef STANDARD_MATERIAL_DIFFUSE_TRANSMISSION + // NOTE: We use the diffuse transmissive color, the second Lambertian lobe's calculated + // world position, inverted normal and view vectors, and the following simplified + // values for a fully diffuse transmitted light contribution approximation: + // + // perceptual_roughness = 1.0; + // NdotV = 1.0; + // F0 = vec3(0.0) + // diffuse_occlusion = vec3(1.0) + transmitted_light += ambient::ambient_light(diffuse_transmissive_lobe_world_position, -in.N, -in.V, 1.0, diffuse_transmissive_color, vec3(0.0), 1.0, vec3(1.0)); +#endif + + // Diffuse indirect lighting can come from a variety of sources. The + // priority goes like this: + // + // 1. Lightmap (highest) + // 2. Irradiance volume + // 3. Environment map (lowest) + // + // When we find a source of diffuse indirect lighting, we stop accumulating + // any more diffuse indirect light. This avoids double-counting if, for + // example, both lightmaps and irradiance volumes are present. + + var indirect_light = vec3(0.0f); + var found_diffuse_indirect = false; + +#ifdef LIGHTMAP + indirect_light += in.lightmap_light * diffuse_color; + found_diffuse_indirect = true; +#endif + +#ifdef IRRADIANCE_VOLUME + // Irradiance volume light (indirect) + if (!found_diffuse_indirect) { + let irradiance_volume_light = irradiance_volume::irradiance_volume_light( + in.world_position.xyz, + in.N, + &clusterable_object_index_ranges, + ); + indirect_light += irradiance_volume_light * diffuse_color * diffuse_occlusion; + found_diffuse_indirect = true; + } +#endif + + // Environment map light (indirect) +#ifdef ENVIRONMENT_MAP + // If screen space reflections are going to be used for this material, don't + // accumulate environment map light yet. The SSR shader will do it. +#ifdef SCREEN_SPACE_REFLECTIONS + let use_ssr = perceptual_roughness <= + view_bindings::ssr_settings.perceptual_roughness_threshold; +#else // SCREEN_SPACE_REFLECTIONS + let use_ssr = false; +#endif // SCREEN_SPACE_REFLECTIONS + + if (!use_ssr) { +#ifdef STANDARD_MATERIAL_ANISOTROPY + var bent_normal_lighting_input = lighting_input; + bend_normal_for_anisotropy(&bent_normal_lighting_input); + let environment_map_lighting_input = &bent_normal_lighting_input; +#else // STANDARD_MATERIAL_ANISOTROPY + let environment_map_lighting_input = &lighting_input; +#endif // STANDARD_MATERIAL_ANISOTROPY + + let environment_light = environment_map::environment_map_light( + environment_map_lighting_input, + &clusterable_object_index_ranges, + found_diffuse_indirect, + ); + + indirect_light += environment_light.diffuse * diffuse_occlusion + + environment_light.specular * specular_occlusion; + } +#endif // ENVIRONMENT_MAP + + // Ambient light (indirect) + // If we are lightmapped, disable the ambient contribution if requested. + // This is to avoid double-counting ambient light. (It might be part of the lightmap) +#ifdef LIGHTMAP + let enable_ambient = view_bindings::lights.ambient_light_affects_lightmapped_meshes != 0u; +#else // LIGHTMAP + let enable_ambient = true; +#endif // LIGHTMAP + if (enable_ambient) { + indirect_light += ambient::ambient_light(in.world_position, in.N, in.V, NdotV, diffuse_color, F0, perceptual_roughness, diffuse_occlusion); + } + + // we'll use the specular component of the transmitted environment + // light in the call to `specular_transmissive_light()` below + var specular_transmitted_environment_light = vec3(0.0); + +#ifdef ENVIRONMENT_MAP + +#ifdef STANDARD_MATERIAL_DIFFUSE_OR_SPECULAR_TRANSMISSION + // NOTE: We use the diffuse transmissive color, inverted normal and view vectors, + // and the following simplified values for the transmitted environment light contribution + // approximation: + // + // diffuse_color = vec3(1.0) // later we use `diffuse_transmissive_color` and `specular_transmissive_color` + // NdotV = 1.0; + // R = T // see definition below + // F0 = vec3(1.0) + // diffuse_occlusion = 1.0 + // + // (This one is slightly different from the other light types above, because the environment + // map light returns both diffuse and specular components separately, and we want to use both) + + let T = -normalize( + in.V + // start with view vector at entry point + refract(in.V, -in.N, 1.0 / ior) * thickness // add refracted vector scaled by thickness, towards exit point + ); // normalize to find exit point view vector + + var transmissive_environment_light_input: lighting::LightingInput; + transmissive_environment_light_input.diffuse_color = vec3(1.0); + transmissive_environment_light_input.layers[LAYER_BASE].NdotV = 1.0; + transmissive_environment_light_input.P = in.world_position.xyz; + transmissive_environment_light_input.layers[LAYER_BASE].N = -in.N; + transmissive_environment_light_input.V = in.V; + transmissive_environment_light_input.layers[LAYER_BASE].R = T; + transmissive_environment_light_input.layers[LAYER_BASE].perceptual_roughness = perceptual_roughness; + transmissive_environment_light_input.layers[LAYER_BASE].roughness = roughness; + transmissive_environment_light_input.F0_ = vec3(1.0); + transmissive_environment_light_input.F_ab = vec2(0.1); +#ifdef STANDARD_MATERIAL_CLEARCOAT + // No clearcoat. + transmissive_environment_light_input.clearcoat_strength = 0.0; + transmissive_environment_light_input.layers[LAYER_CLEARCOAT].NdotV = 0.0; + transmissive_environment_light_input.layers[LAYER_CLEARCOAT].N = in.N; + transmissive_environment_light_input.layers[LAYER_CLEARCOAT].R = vec3(0.0); + transmissive_environment_light_input.layers[LAYER_CLEARCOAT].perceptual_roughness = 0.0; + transmissive_environment_light_input.layers[LAYER_CLEARCOAT].roughness = 0.0; +#endif // STANDARD_MATERIAL_CLEARCOAT + + let transmitted_environment_light = environment_map::environment_map_light( + &transmissive_environment_light_input, + &clusterable_object_index_ranges, + false, + ); + +#ifdef STANDARD_MATERIAL_DIFFUSE_TRANSMISSION + transmitted_light += transmitted_environment_light.diffuse * diffuse_transmissive_color; +#endif // STANDARD_MATERIAL_DIFFUSE_TRANSMISSION +#ifdef STANDARD_MATERIAL_SPECULAR_TRANSMISSION + specular_transmitted_environment_light = transmitted_environment_light.specular * specular_transmissive_color; +#endif // STANDARD_MATERIAL_SPECULAR_TRANSMISSION + +#endif // STANDARD_MATERIAL_SPECULAR_OR_DIFFUSE_TRANSMISSION + +#endif // ENVIRONMENT_MAP + + var emissive_light = emissive.rgb * output_color.a; + + // "The clearcoat layer is on top of emission in the layering stack. + // Consequently, the emission is darkened by the Fresnel term." + // + // +#ifdef STANDARD_MATERIAL_CLEARCOAT + emissive_light = emissive_light * (0.04 + (1.0 - 0.04) * pow(1.0 - clearcoat_NdotV, 5.0)); +#endif + + emissive_light = emissive_light * mix(1.0, view_bindings::view.exposure, emissive.a); + +#ifdef STANDARD_MATERIAL_SPECULAR_TRANSMISSION + transmitted_light += transmission::specular_transmissive_light(in.world_position, in.frag_coord.xyz, view_z, in.N, in.V, F0, ior, thickness, perceptual_roughness, specular_transmissive_color, specular_transmitted_environment_light).rgb; + + if (in.material.flags & pbr_types::STANDARD_MATERIAL_FLAGS_ATTENUATION_ENABLED_BIT) != 0u { + // We reuse the `atmospheric_fog()` function here, as it's fundamentally + // equivalent to the attenuation that takes place inside the material volume, + // and will allow us to eventually hook up subsurface scattering more easily + var attenuation_fog: mesh_view_types::Fog; + attenuation_fog.base_color.a = 1.0; + attenuation_fog.be = pow(1.0 - in.material.attenuation_color.rgb, vec3(E)) / in.material.attenuation_distance; + // TODO: Add the subsurface scattering factor below + // attenuation_fog.bi = /* ... */ + transmitted_light = bevy_pbr::fog::atmospheric_fog( + attenuation_fog, vec4(transmitted_light, 1.0), thickness, + vec3(0.0) // TODO: Pass in (pre-attenuated) scattered light contribution here + ).rgb; + } +#endif + + // Total light + output_color = vec4( + (view_bindings::view.exposure * (transmitted_light + direct_light + indirect_light)) + emissive_light, + output_color.a + ); + + output_color = clustering::cluster_debug_visualization( + output_color, + view_z, + in.is_orthographic, + clusterable_object_index_ranges, + cluster_index, + ); + + return output_color; +} +#endif // PREPASS_FRAGMENT + +#ifdef DISTANCE_FOG +fn apply_fog(fog_params: mesh_view_types::Fog, input_color: vec4, fragment_world_position: vec3, view_world_position: vec3) -> vec4 { + let view_to_world = fragment_world_position.xyz - view_world_position.xyz; + + // `length()` is used here instead of just `view_to_world.z` since that produces more + // high quality results, especially for denser/smaller fogs. we get a "curved" + // fog shape that remains consistent with camera rotation, instead of a "linear" + // fog shape that looks a bit fake + let distance = length(view_to_world); + + var scattering = vec3(0.0); + if fog_params.directional_light_color.a > 0.0 { + let view_to_world_normalized = view_to_world / distance; + let n_directional_lights = view_bindings::lights.n_directional_lights; + for (var i: u32 = 0u; i < n_directional_lights; i = i + 1u) { + let light = view_bindings::lights.directional_lights[i]; + scattering += pow( + max( + dot(view_to_world_normalized, light.direction_to_light), + 0.0 + ), + fog_params.directional_light_exponent + ) * light.color.rgb * view_bindings::view.exposure; + } + } + + if fog_params.mode == mesh_view_types::FOG_MODE_LINEAR { + return bevy_pbr::fog::linear_fog(fog_params, input_color, distance, scattering); + } else if fog_params.mode == mesh_view_types::FOG_MODE_EXPONENTIAL { + return bevy_pbr::fog::exponential_fog(fog_params, input_color, distance, scattering); + } else if fog_params.mode == mesh_view_types::FOG_MODE_EXPONENTIAL_SQUARED { + return bevy_pbr::fog::exponential_squared_fog(fog_params, input_color, distance, scattering); + } else if fog_params.mode == mesh_view_types::FOG_MODE_ATMOSPHERIC { + return bevy_pbr::fog::atmospheric_fog(fog_params, input_color, distance, scattering); + } else { + return input_color; + } +} +#endif // DISTANCE_FOG + +#ifdef PREMULTIPLY_ALPHA +fn premultiply_alpha(standard_material_flags: u32, color: vec4) -> vec4 { +// `Blend`, `Premultiplied` and `Alpha` all share the same `BlendState`. Depending +// on the alpha mode, we premultiply the color channels by the alpha channel value, +// (and also optionally replace the alpha value with 0.0) so that the result produces +// the desired blend mode when sent to the blending operation. +#ifdef BLEND_PREMULTIPLIED_ALPHA + // For `BlendState::PREMULTIPLIED_ALPHA_BLENDING` the blend function is: + // + // result = 1 * src_color + (1 - src_alpha) * dst_color + let alpha_mode = standard_material_flags & pbr_types::STANDARD_MATERIAL_FLAGS_ALPHA_MODE_RESERVED_BITS; + if alpha_mode == pbr_types::STANDARD_MATERIAL_FLAGS_ALPHA_MODE_ADD { + // Here, we premultiply `src_color` by `src_alpha`, and replace `src_alpha` with 0.0: + // + // src_color *= src_alpha + // src_alpha = 0.0 + // + // We end up with: + // + // result = 1 * (src_alpha * src_color) + (1 - 0) * dst_color + // result = src_alpha * src_color + 1 * dst_color + // + // Which is the blend operation for additive blending + return vec4(color.rgb * color.a, 0.0); + } else { + // Here, we don't do anything, so that we get premultiplied alpha blending. (As expected) + return color.rgba; + } +#endif +// `Multiply` uses its own `BlendState`, but we still need to premultiply here in the +// shader so that we get correct results as we tweak the alpha channel +#ifdef BLEND_MULTIPLY + // The blend function is: + // + // result = dst_color * src_color + (1 - src_alpha) * dst_color + // + // We premultiply `src_color` by `src_alpha`: + // + // src_color *= src_alpha + // + // We end up with: + // + // result = dst_color * (src_color * src_alpha) + (1 - src_alpha) * dst_color + // result = src_alpha * (src_color * dst_color) + (1 - src_alpha) * dst_color + // + // Which is the blend operation for multiplicative blending with arbitrary mixing + // controlled by the source alpha channel + return vec4(color.rgb * color.a, color.a); +#endif +} +#endif + +// fog, alpha premultiply +// for non-hdr cameras, tonemapping and debanding +fn main_pass_post_lighting_processing( + pbr_input: pbr_types::PbrInput, + input_color: vec4, +) -> vec4 { + var output_color = input_color; + +#ifdef DISTANCE_FOG + // fog + if ((pbr_input.material.flags & pbr_types::STANDARD_MATERIAL_FLAGS_FOG_ENABLED_BIT) != 0u) { + output_color = apply_fog(view_bindings::fog, output_color, pbr_input.world_position.xyz, view_bindings::view.world_position.xyz); + } +#endif // DISTANCE_FOG + +#ifdef TONEMAP_IN_SHADER + output_color = tone_mapping(output_color, view_bindings::view.color_grading); +#ifdef DEBAND_DITHER + var output_rgb = output_color.rgb; + output_rgb = powsafe(output_rgb, 1.0 / 2.2); + output_rgb += screen_space_dither(pbr_input.frag_coord.xy); + // This conversion back to linear space is required because our output texture format is + // SRGB; the GPU will assume our output is linear and will apply an SRGB conversion. + output_rgb = powsafe(output_rgb, 2.2); + output_color = vec4(output_rgb, output_color.a); +#endif +#endif +#ifdef PREMULTIPLY_ALPHA + output_color = premultiply_alpha(pbr_input.material.flags, output_color); +#endif + return output_color; +} diff --git a/crates/libmarathon/src/render/pbr/render/pbr_lighting.wgsl b/crates/libmarathon/src/render/pbr/render/pbr_lighting.wgsl new file mode 100644 index 0000000..7496dea --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/pbr_lighting.wgsl @@ -0,0 +1,856 @@ +#define_import_path bevy_pbr::lighting + +#import bevy_pbr::{ + mesh_view_types::POINT_LIGHT_FLAGS_SPOT_LIGHT_Y_NEGATIVE, + mesh_view_bindings as view_bindings, +} +#import bevy_render::maths::PI + +const LAYER_BASE: u32 = 0; +const LAYER_CLEARCOAT: u32 = 1; + +// From the Filament design doc +// https://google.github.io/filament/Filament.html#table_symbols +// Symbol Definition +// v View unit vector +// l Incident light unit vector +// n Surface normal unit vector +// h Half unit vector between l and v +// f BRDF +// f_d Diffuse component of a BRDF +// f_r Specular component of a BRDF +// α Roughness, remapped from using input perceptualRoughness +// σ Diffuse reflectance +// Ω Spherical domain +// f0 Reflectance at normal incidence +// f90 Reflectance at grazing angle +// χ+(a) Heaviside function (1 if a>0 and 0 otherwise) +// nior Index of refraction (IOR) of an interface +// ⟨n⋅l⟩ Dot product clamped to [0..1] +// ⟨a⟩ Saturated value (clamped to [0..1]) + +// The Bidirectional Reflectance Distribution Function (BRDF) describes the surface response of a standard material +// and consists of two components, the diffuse component (f_d) and the specular component (f_r): +// f(v,l) = f_d(v,l) + f_r(v,l) +// +// The form of the microfacet model is the same for diffuse and specular +// f_r(v,l) = f_d(v,l) = 1 / { |n⋅v||n⋅l| } ∫_Ω D(m,α) G(v,l,m) f_m(v,l,m) (v⋅m) (l⋅m) dm +// +// In which: +// D, also called the Normal Distribution Function (NDF) models the distribution of the microfacets +// G models the visibility (or occlusion or shadow-masking) of the microfacets +// f_m is the microfacet BRDF and differs between specular and diffuse components +// +// The above integration needs to be approximated. + +// Input to a lighting function for a single layer (either the base layer or the +// clearcoat layer). +struct LayerLightingInput { + // The normal vector. + N: vec3, + // The reflected vector. + R: vec3, + // The normal vector ⋅ the view vector. + NdotV: f32, + + // The perceptual roughness of the layer. + perceptual_roughness: f32, + // The roughness of the layer. + roughness: f32, +} + +// Input to a lighting function (`point_light`, `spot_light`, +// `directional_light`). +struct LightingInput { +#ifdef STANDARD_MATERIAL_CLEARCOAT + layers: array, +#else // STANDARD_MATERIAL_CLEARCOAT + layers: array, +#endif // STANDARD_MATERIAL_CLEARCOAT + + // The world-space position. + P: vec3, + // The vector to the view. + V: vec3, + + // The diffuse color of the material. + diffuse_color: vec3, + + // Specular reflectance at the normal incidence angle. + // + // This should be read F₀, but due to Naga limitations we can't name it that. + F0_: vec3, + // Constants for the BRDF approximation. + // + // See `EnvBRDFApprox` in + // . + // What we call `F_ab` they call `AB`. + F_ab: vec2, + +#ifdef STANDARD_MATERIAL_CLEARCOAT + // The strength of the clearcoat layer. + clearcoat_strength: f32, +#endif // STANDARD_MATERIAL_CLEARCOAT + +#ifdef STANDARD_MATERIAL_ANISOTROPY + // The anisotropy strength, reflecting the amount of increased roughness in + // the tangent direction. + anisotropy: f32, + // The tangent direction for anisotropy: i.e. the direction in which + // roughness increases. + Ta: vec3, + // The bitangent direction, which is the cross product of the normal with + // the tangent direction. + Ba: vec3, +#endif // STANDARD_MATERIAL_ANISOTROPY +} + +// Values derived from the `LightingInput` for both diffuse and specular lights. +struct DerivedLightingInput { + // The half-vector between L, the incident light vector, and V, the view + // vector. + H: vec3, + // The normal vector ⋅ the incident light vector. + NdotL: f32, + // The normal vector ⋅ the half-vector. + NdotH: f32, + // The incident light vector ⋅ the half-vector. + LdotH: f32, +} + +// distanceAttenuation is simply the square falloff of light intensity +// combined with a smooth attenuation at the edge of the light radius +// +// light radius is a non-physical construct for efficiency purposes, +// because otherwise every light affects every fragment in the scene +fn getDistanceAttenuation(distanceSquare: f32, inverseRangeSquared: f32) -> f32 { + let factor = distanceSquare * inverseRangeSquared; + let smoothFactor = saturate(1.0 - factor * factor); + let attenuation = smoothFactor * smoothFactor; + return attenuation * 1.0 / max(distanceSquare, 0.0001); +} + +// Normal distribution function (specular D) +// Based on https://google.github.io/filament/Filament.html#citation-walter07 + +// D_GGX(h,α) = α^2 / { π ((n⋅h)^2 (α2−1) + 1)^2 } + +// Simple implementation, has precision problems when using fp16 instead of fp32 +// see https://google.github.io/filament/Filament.html#listing_speculardfp16 +fn D_GGX(roughness: f32, NdotH: f32) -> f32 { + let oneMinusNdotHSquared = 1.0 - NdotH * NdotH; + let a = NdotH * roughness; + let k = roughness / (oneMinusNdotHSquared + a * a); + let d = k * k * (1.0 / PI); + return d; +} + +// An approximation of the anisotropic GGX distribution function. +// +// 1 +// D(𝐡) = ─────────────────────────────────────────────────── +// παₜα_b((𝐡 ⋅ 𝐭)² / αₜ²) + (𝐡 ⋅ 𝐛)² / α_b² + (𝐡 ⋅ 𝐧)²)² +// +// * `T` = 𝐭 = the tangent direction = the direction of increased roughness. +// +// * `B` = 𝐛 = the bitangent direction = the direction of decreased roughness. +// +// * `at` = αₜ = the alpha-roughness in the tangent direction. +// +// * `ab` = α_b = the alpha-roughness in the bitangent direction. +// +// This is from the `KHR_materials_anisotropy` spec: +// +fn D_GGX_anisotropic(at: f32, ab: f32, NdotH: f32, TdotH: f32, BdotH: f32) -> f32 { + let a2 = at * ab; + let f = vec3(ab * TdotH, at * BdotH, a2 * NdotH); + let w2 = a2 / dot(f, f); + let d = a2 * w2 * w2 * (1.0 / PI); + return d; +} + +// Visibility function (Specular G) +// V(v,l,a) = G(v,l,α) / { 4 (n⋅v) (n⋅l) } +// such that f_r becomes +// f_r(v,l) = D(h,α) V(v,l,α) F(v,h,f0) +// where +// V(v,l,α) = 0.5 / { n⋅l sqrt((n⋅v)^2 (1−α2) + α2) + n⋅v sqrt((n⋅l)^2 (1−α2) + α2) } +// Note the two sqrt's, that may be slow on mobile, see https://google.github.io/filament/Filament.html#listing_approximatedspecularv +fn V_SmithGGXCorrelated(roughness: f32, NdotV: f32, NdotL: f32) -> f32 { + let a2 = roughness * roughness; + let lambdaV = NdotL * sqrt((NdotV - a2 * NdotV) * NdotV + a2); + let lambdaL = NdotV * sqrt((NdotL - a2 * NdotL) * NdotL + a2); + let v = 0.5 / (lambdaV + lambdaL); + return v; +} + +// The visibility function, anisotropic variant. +fn V_GGX_anisotropic( + at: f32, + ab: f32, + NdotL: f32, + NdotV: f32, + BdotV: f32, + TdotV: f32, + TdotL: f32, + BdotL: f32, +) -> f32 { + let GGX_V = NdotL * length(vec3(at * TdotV, ab * BdotV, NdotV)); + let GGX_L = NdotV * length(vec3(at * TdotL, ab * BdotL, NdotL)); + let v = 0.5 / (GGX_V + GGX_L); + return saturate(v); +} + +// Probability-density function that matches the bounded VNDF sampler +// https://gpuopen.com/download/Bounded_VNDF_Sampling_for_Smith-GGX_Reflections.pdf (Listing 2) +fn ggx_vndf_pdf(i: vec3, NdotH: f32, roughness: f32) -> f32 { + let ndf = D_GGX(roughness, NdotH); + + // Common terms + let ai = roughness * i.xy; + let len2 = dot(ai, ai); + let t = sqrt(len2 + i.z * i.z); + if i.z >= 0.0 { + let a = roughness; + let s = 1.0 + length(i.xy); + let a2 = a * a; + let s2 = s * s; + let k = (1.0 - a2) * s2 / (s2 + a2 * i.z * i.z); + return ndf / (2.0 * (k * i.z + t)); + } + + // Backfacing case + return ndf * (t - i.z) / (2.0 * len2); +} + +// https://gpuopen.com/download/Bounded_VNDF_Sampling_for_Smith-GGX_Reflections.pdf (Listing 1) +fn sample_visible_ggx( + xi: vec2, + roughness: f32, + normal: vec3, + view: vec3, +) -> vec3 { + let n = normal; + let alpha = roughness; + + // Decompose view into components parallel/perpendicular to the normal + let wi_n = dot(view, n); + let wi_z = -n * wi_n; + let wi_xy = view + wi_z; + + // Warp view vector to the unit-roughness configuration + let wi_std = -normalize(alpha * wi_xy + wi_z); + + // Compute wi_std.z once for reuse + let wi_std_z = dot(wi_std, n); + + // Bounded VNDF sampling + // Compute the bound parameter k (Eq. 5) and the scaled z–limit b (Eq. 6) + let s = 1.0 + length(wi_xy); + let a = clamp(alpha, 0.0, 1.0); + let a2 = a * a; + let s2 = s * s; + let k = (1.0 - a2) * s2 / (s2 + a2 * wi_n * wi_n); + let b = select(wi_std_z, k * wi_std_z, wi_n > 0.0); + + // Sample a spherical cap in (-b, 1] + let z = 1.0 - xi.y * (1.0 + b); + let sin_theta = sqrt(max(0.0, 1.0 - z * z)); + let phi = 2.0 * PI * xi.x - PI; + let x = sin_theta * cos(phi); + let y = sin_theta * sin(phi); + let c_std = vec3f(x, y, z); + + // Reflect the sample so that the normal aligns with +Z + let up = vec3f(0.0, 0.0, 1.0); + let wr = n + up; + let c = dot(wr, c_std) * wr / wr.z - c_std; + + // Half-vector in the standard frame + let wm_std = c + wi_std; + let wm_std_z = n * dot(n, wm_std); + let wm_std_xy = wm_std_z - wm_std; + + // Unwarp back to original roughness and compute microfacet normal + let H = normalize(alpha * wm_std_xy + wm_std_z); + + // Reflect view to obtain the outgoing (light) direction + return reflect(-view, H); +} + +// Smith geometric shadowing function +fn G_Smith(NdotV: f32, NdotL: f32, roughness: f32) -> f32 { + let k = roughness / 2.0; + let GGXL = NdotL / (NdotL * (1.0 - k) + k); + let GGXV = NdotV / (NdotV * (1.0 - k) + k); + return GGXL * GGXV; +} + +// A simpler, but nonphysical, alternative to Smith-GGX. We use this for +// clearcoat, per the Filament spec. +// +// https://google.github.io/filament/Filament.html#materialsystem/clearcoatmodel#toc4.9.1 +fn V_Kelemen(LdotH: f32) -> f32 { + return 0.25 / (LdotH * LdotH); +} + +// Fresnel function +// see https://google.github.io/filament/Filament.html#citation-schlick94 +// F_Schlick(v,h,f_0,f_90) = f_0 + (f_90 − f_0) (1 − v⋅h)^5 +fn F_Schlick_vec(f0: vec3, f90: f32, VdotH: f32) -> vec3 { + // not using mix to keep the vec3 and float versions identical + return f0 + (f90 - f0) * pow(1.0 - VdotH, 5.0); +} + +fn F_Schlick(f0: f32, f90: f32, VdotH: f32) -> f32 { + // not using mix to keep the vec3 and float versions identical + return f0 + (f90 - f0) * pow(1.0 - VdotH, 5.0); +} + +fn fresnel(f0: vec3, LdotH: f32) -> vec3 { + // f_90 suitable for ambient occlusion + // see https://google.github.io/filament/Filament.html#lighting/occlusion + let f90 = saturate(dot(f0, vec3(50.0 * 0.33))); + return F_Schlick_vec(f0, f90, LdotH); +} + +// Given distribution, visibility, and Fresnel term, calculates the final +// specular light. +// +// Multiscattering approximation: +// +fn specular_multiscatter( + D: f32, + V: f32, + F: vec3, + F0: vec3, + F_ab: vec2, + specular_intensity: f32, +) -> vec3 { + var Fr = (specular_intensity * D * V) * F; + Fr *= 1.0 + F0 * (1.0 / F_ab.x - 1.0); + return Fr; +} + +// Specular BRDF +// https://google.github.io/filament/Filament.html#materialsystem/specularbrdf + +// N, V, and L must all be normalized. +fn derive_lighting_input(N: vec3, V: vec3, L: vec3) -> DerivedLightingInput { + var input: DerivedLightingInput; + var H: vec3 = normalize(L + V); + input.H = H; + input.NdotL = saturate(dot(N, L)); + input.NdotH = saturate(dot(N, H)); + input.LdotH = saturate(dot(L, H)); + return input; +} + +// Returns L in the `xyz` components and the specular intensity in the `w` component. +fn compute_specular_layer_values_for_point_light( + input: ptr, + layer: u32, + V: vec3, + light_to_frag: vec3, + light_position_radius: f32, +) -> vec4 { + // Unpack. + let R = (*input).layers[layer].R; + let a = (*input).layers[layer].roughness; + + // Representative Point Area Lights. + // see http://blog.selfshadow.com/publications/s2013-shading-course/karis/s2013_pbs_epic_notes_v2.pdf p14-16 + var LtFdotR = dot(light_to_frag, R); + + // HACK: the following line is an amendment to fix a discontinuity when a surface + // intersects the light sphere. See https://github.com/bevyengine/bevy/issues/13318 + // + // This sentence in the reference is crux of the problem: "We approximate finding the point with the + // smallest angle to the reflection ray by finding the point with the smallest distance to the ray." + // This approximation turns out to be completely wrong for points inside or near the sphere. + // Clamping this dot product to be positive ensures `centerToRay` lies on ray and not behind it. + // Any non-zero epsilon works here, it just has to be positive to avoid a singularity at zero. + // However, this is still far from physically accurate. Deriving an exact solution would help, + // but really we should adopt a superior solution to area lighting, such as: + // Physically Based Area Lights by Michal Drobot, or + // Polygonal-Light Shading with Linearly Transformed Cosines by Eric Heitz et al. + LtFdotR = max(0.0001, LtFdotR); + + let centerToRay = LtFdotR * R - light_to_frag; + let closestPoint = light_to_frag + centerToRay * saturate( + light_position_radius * inverseSqrt(dot(centerToRay, centerToRay))); + let LspecLengthInverse = inverseSqrt(dot(closestPoint, closestPoint)); + let normalizationFactor = a / saturate(a + (light_position_radius * 0.5 * LspecLengthInverse)); + let intensity = normalizationFactor * normalizationFactor; + + let L: vec3 = closestPoint * LspecLengthInverse; // normalize() equivalent? + return vec4(L, intensity); +} + +// Cook-Torrance approximation of the microfacet model integration using Fresnel law F to model f_m +// f_r(v,l) = { D(h,α) G(v,l,α) F(v,h,f0) } / { 4 (n⋅v) (n⋅l) } +fn specular( + input: ptr, + derived_input: ptr, + specular_intensity: f32, +) -> vec3 { + // Unpack. + let roughness = (*input).layers[LAYER_BASE].roughness; + let NdotV = (*input).layers[LAYER_BASE].NdotV; + let F0 = (*input).F0_; + let NdotL = (*derived_input).NdotL; + let NdotH = (*derived_input).NdotH; + let LdotH = (*derived_input).LdotH; + + // Calculate distribution. + let D = D_GGX(roughness, NdotH); + // Calculate visibility. + let V = V_SmithGGXCorrelated(roughness, NdotV, NdotL); + // Calculate the Fresnel term. + let F = fresnel(F0, LdotH); + + // Calculate the specular light. + let Fr = specular_multiscatter(D, V, F, F0, (*input).F_ab, specular_intensity); + return Fr; +} + +// Calculates the specular light for the clearcoat layer. Returns Fc, the +// Fresnel term, in the first channel, and Frc, the specular clearcoat light, in +// the second channel. +// +// +fn specular_clearcoat( + input: ptr, + derived_input: ptr, + clearcoat_strength: f32, + specular_intensity: f32, +) -> vec2 { + // Unpack. + let roughness = (*input).layers[LAYER_CLEARCOAT].roughness; + let NdotH = (*derived_input).NdotH; + let LdotH = (*derived_input).LdotH; + + // Calculate distribution. + let Dc = D_GGX(roughness, NdotH); + // Calculate visibility. + let Vc = V_Kelemen(LdotH); + // Calculate the Fresnel term. + let Fc = F_Schlick(0.04, 1.0, LdotH) * clearcoat_strength; + // Calculate the specular light. + let Frc = (specular_intensity * Dc * Vc) * Fc; + return vec2(Fc, Frc); +} + +#ifdef STANDARD_MATERIAL_ANISOTROPY + +fn specular_anisotropy( + input: ptr, + derived_input: ptr, + L: vec3, + specular_intensity: f32, +) -> vec3 { + // Unpack. + let roughness = (*input).layers[LAYER_BASE].roughness; + let NdotV = (*input).layers[LAYER_BASE].NdotV; + let V = (*input).V; + let F0 = (*input).F0_; + let anisotropy = (*input).anisotropy; + let Ta = (*input).Ta; + let Ba = (*input).Ba; + let H = (*derived_input).H; + let NdotL = (*derived_input).NdotL; + let NdotH = (*derived_input).NdotH; + let LdotH = (*derived_input).LdotH; + + let TdotL = dot(Ta, L); + let BdotL = dot(Ba, L); + let TdotH = dot(Ta, H); + let BdotH = dot(Ba, H); + let TdotV = dot(Ta, V); + let BdotV = dot(Ba, V); + + let ab = roughness * roughness; + let at = mix(ab, 1.0, anisotropy * anisotropy); + + let Da = D_GGX_anisotropic(at, ab, NdotH, TdotH, BdotH); + let Va = V_GGX_anisotropic(at, ab, NdotL, NdotV, BdotV, TdotV, TdotL, BdotL); + let Fa = fresnel(F0, LdotH); + + // Calculate the specular light. + let Fr = specular_multiscatter(Da, Va, Fa, F0, (*input).F_ab, specular_intensity); + return Fr; +} + +#endif // STANDARD_MATERIAL_ANISOTROPY + +// Diffuse BRDF +// https://google.github.io/filament/Filament.html#materialsystem/diffusebrdf +// fd(v,l) = σ/π * 1 / { |n⋅v||n⋅l| } ∫Ω D(m,α) G(v,l,m) (v⋅m) (l⋅m) dm +// +// simplest approximation +// float Fd_Lambert() { +// return 1.0 / PI; +// } +// +// vec3 Fd = diffuseColor * Fd_Lambert(); +// +// Disney approximation +// See https://google.github.io/filament/Filament.html#citation-burley12 +// minimal quality difference +fn Fd_Burley( + input: ptr, + derived_input: ptr, +) -> f32 { + // Unpack. + let roughness = (*input).layers[LAYER_BASE].roughness; + let NdotV = (*input).layers[LAYER_BASE].NdotV; + let NdotL = (*derived_input).NdotL; + let LdotH = (*derived_input).LdotH; + + let f90 = 0.5 + 2.0 * roughness * LdotH * LdotH; + let lightScatter = F_Schlick(1.0, f90, NdotL); + let viewScatter = F_Schlick(1.0, f90, NdotV); + return lightScatter * viewScatter * (1.0 / PI); +} + +// Scale/bias approximation +// https://www.unrealengine.com/en-US/blog/physically-based-shading-on-mobile +// TODO: Use a LUT (more accurate) +fn F_AB(perceptual_roughness: f32, NdotV: f32) -> vec2 { + let c0 = vec4(-1.0, -0.0275, -0.572, 0.022); + let c1 = vec4(1.0, 0.0425, 1.04, -0.04); + let r = perceptual_roughness * c0 + c1; + let a004 = min(r.x * r.x, exp2(-9.28 * NdotV)) * r.x + r.y; + return vec2(-1.04, 1.04) * a004 + r.zw; +} + +fn EnvBRDFApprox(F0: vec3, F_ab: vec2) -> vec3 { + return F0 * F_ab.x + F_ab.y; +} + +fn perceptualRoughnessToRoughness(perceptualRoughness: f32) -> f32 { + // clamp perceptual roughness to prevent precision problems + // According to Filament design 0.089 is recommended for mobile + // Filament uses 0.045 for non-mobile + let clampedPerceptualRoughness = clamp(perceptualRoughness, 0.089, 1.0); + return clampedPerceptualRoughness * clampedPerceptualRoughness; +} + +// this must align with CubemapLayout in decal/clustered.rs +const CUBEMAP_TYPE_CROSS_VERTICAL: u32 = 0; +const CUBEMAP_TYPE_CROSS_HORIZONTAL: u32 = 1; +const CUBEMAP_TYPE_SEQUENCE_VERTICAL: u32 = 2; +const CUBEMAP_TYPE_SEQUENCE_HORIZONTAL: u32 = 3; + +const X_PLUS: u32 = 0; +const X_MINUS: u32 = 1; +const Y_PLUS: u32 = 2; +const Y_MINUS: u32 = 3; +const Z_MINUS: u32 = 4; +const Z_PLUS: u32 = 5; + +fn cubemap_uv(direction: vec3, cubemap_type: u32) -> vec2 { + let abs_direction = abs(direction); + let max_axis = max(abs_direction.x, max(abs_direction.y, abs_direction.z)); + + let face_index = select( + select(X_PLUS, X_MINUS, direction.x < 0.0), + select( + select(Y_PLUS, Y_MINUS, direction.y < 0.0), + select(Z_PLUS, Z_MINUS, direction.z < 0.0), + max_axis != abs_direction.y + ), + max_axis != abs_direction.x + ); + + var face_uv: vec2; + var divisor: f32; + var corner_uv: vec2 = vec2(0, 0); + var face_size: vec2; + + switch face_index { + case X_PLUS: { face_uv = vec2(direction.z, -direction.y); divisor = direction.x; } + case X_MINUS: { face_uv = vec2(-direction.z, -direction.y); divisor = -direction.x; } + case Y_PLUS: { face_uv = vec2(direction.x, -direction.z); divisor = direction.y; } + case Y_MINUS: { face_uv = vec2(direction.x, direction.z); divisor = -direction.y; } + case Z_PLUS: { face_uv = vec2(direction.x, direction.y); divisor = direction.z; } + case Z_MINUS: { face_uv = vec2(direction.x, -direction.y); divisor = -direction.z; } + default: {} + } + face_uv = (face_uv / divisor) * 0.5 + 0.5; + + switch cubemap_type { + case CUBEMAP_TYPE_CROSS_VERTICAL: { + face_size = vec2(1.0/3.0, 1.0/4.0); + corner_uv = vec2((0x111102u >> (4 * face_index)) & 0xFu, (0x132011u >> (4 * face_index)) & 0xFu); + } + case CUBEMAP_TYPE_CROSS_HORIZONTAL: { + face_size = vec2(1.0/4.0, 1.0/3.0); + corner_uv = vec2((0x131102u >> (4 * face_index)) & 0xFu, (0x112011u >> (4 * face_index)) & 0xFu); + } + case CUBEMAP_TYPE_SEQUENCE_HORIZONTAL: { + face_size = vec2(1.0/6.0, 1.0); + corner_uv.x = face_index; + } + case CUBEMAP_TYPE_SEQUENCE_VERTICAL: { + face_size = vec2(1.0, 1.0/6.0); + corner_uv.y = face_index; + } + default: {} + } + + return (vec2(corner_uv) + face_uv) * face_size; +} + +fn point_light( + light_id: u32, + input: ptr, + enable_diffuse: bool, + enable_texture: bool, +) -> vec3 { + // Unpack. + let diffuse_color = (*input).diffuse_color; + let P = (*input).P; + let N = (*input).layers[LAYER_BASE].N; + let V = (*input).V; + + let light = &view_bindings::clusterable_objects.data[light_id]; + let light_to_frag = (*light).position_radius.xyz - P; + let L = normalize(light_to_frag); + let distance_square = dot(light_to_frag, light_to_frag); + let rangeAttenuation = getDistanceAttenuation(distance_square, (*light).color_inverse_square_range.w); + + // Base layer + + let specular_L_intensity = compute_specular_layer_values_for_point_light( + input, + LAYER_BASE, + V, + light_to_frag, + (*light).position_radius.w, + ); + var specular_derived_input = derive_lighting_input(N, V, specular_L_intensity.xyz); + + let specular_intensity = specular_L_intensity.w; + +#ifdef STANDARD_MATERIAL_ANISOTROPY + let specular_light = specular_anisotropy(input, &specular_derived_input, L, specular_intensity); +#else // STANDARD_MATERIAL_ANISOTROPY + let specular_light = specular(input, &specular_derived_input, specular_intensity); +#endif // STANDARD_MATERIAL_ANISOTROPY + + // Clearcoat + +#ifdef STANDARD_MATERIAL_CLEARCOAT + // Unpack. + let clearcoat_N = (*input).layers[LAYER_CLEARCOAT].N; + let clearcoat_strength = (*input).clearcoat_strength; + + // Perform specular input calculations again for the clearcoat layer. We + // can't reuse the above because the clearcoat normal might be different + // from the main layer normal. + let clearcoat_specular_L_intensity = compute_specular_layer_values_for_point_light( + input, + LAYER_CLEARCOAT, + V, + light_to_frag, + (*light).position_radius.w, + ); + var clearcoat_specular_derived_input = + derive_lighting_input(clearcoat_N, V, clearcoat_specular_L_intensity.xyz); + + // Calculate the specular light. + let clearcoat_specular_intensity = clearcoat_specular_L_intensity.w; + let Fc_Frc = specular_clearcoat( + input, + &clearcoat_specular_derived_input, + clearcoat_strength, + clearcoat_specular_intensity + ); + let inv_Fc = 1.0 - Fc_Frc.r; // Inverse Fresnel term. + let Frc = Fc_Frc.g; // Clearcoat light. +#endif // STANDARD_MATERIAL_CLEARCOAT + + // Diffuse. + // Comes after specular since its N⋅L is used in the lighting equation. + var derived_input = derive_lighting_input(N, V, L); + var diffuse = vec3(0.0); + if (enable_diffuse) { + diffuse = diffuse_color * Fd_Burley(input, &derived_input); + } + + // See https://google.github.io/filament/Filament.html#mjx-eqn-pointLightLuminanceEquation + // Lout = f(v,l) Φ / { 4 π d^2 }⟨n⋅l⟩ + // where + // f(v,l) = (f_d(v,l) + f_r(v,l)) * light_color + // Φ is luminous power in lumens + // our rangeAttenuation = 1 / d^2 multiplied with an attenuation factor for smoothing at the edge of the non-physical maximum light radius + + // For a point light, luminous intensity, I, in lumens per steradian is given by: + // I = Φ / 4 π + // The derivation of this can be seen here: https://google.github.io/filament/Filament.html#mjx-eqn-pointLightLuminousPower + + // NOTE: (*light).color.rgb is premultiplied with (*light).intensity / 4 π (which would be the luminous intensity) on the CPU + + var color: vec3; +#ifdef STANDARD_MATERIAL_CLEARCOAT + // Account for the Fresnel term from the clearcoat darkening the main layer. + // + // + color = (diffuse + specular_light * inv_Fc) * inv_Fc + Frc; +#else // STANDARD_MATERIAL_CLEARCOAT + color = diffuse + specular_light; +#endif // STANDARD_MATERIAL_CLEARCOAT + + var texture_sample = 1f; + +#ifdef LIGHT_TEXTURES + if enable_texture && (*light).decal_index != 0xFFFFFFFFu { + let relative_position = (view_bindings::clustered_decals.decals[(*light).decal_index].local_from_world * vec4(P, 1.0)).xyz; + let cubemap_type = view_bindings::clustered_decals.decals[(*light).decal_index].tag; + let decal_uv = cubemap_uv(relative_position, cubemap_type); + let image_index = view_bindings::clustered_decals.decals[(*light).decal_index].image_index; + + texture_sample = textureSampleLevel( + view_bindings::clustered_decal_textures[image_index], + view_bindings::clustered_decal_sampler, + decal_uv, + 0.0 + ).r; + } +#endif + + return color * (*light).color_inverse_square_range.rgb * + (rangeAttenuation * derived_input.NdotL) * texture_sample; +} + +fn spot_light( + light_id: u32, + input: ptr, + enable_diffuse: bool +) -> vec3 { + // reuse the point light calculations + let point_light = point_light(light_id, input, enable_diffuse, false); + + let light = &view_bindings::clusterable_objects.data[light_id]; + + // reconstruct spot dir from x/z and y-direction flag + var spot_dir = vec3((*light).light_custom_data.x, 0.0, (*light).light_custom_data.y); + spot_dir.y = sqrt(max(0.0, 1.0 - spot_dir.x * spot_dir.x - spot_dir.z * spot_dir.z)); + if ((*light).flags & POINT_LIGHT_FLAGS_SPOT_LIGHT_Y_NEGATIVE) != 0u { + spot_dir.y = -spot_dir.y; + } + let light_to_frag = (*light).position_radius.xyz - (*input).P.xyz; + + // calculate attenuation based on filament formula https://google.github.io/filament/Filament.html#listing_glslpunctuallight + // spot_scale and spot_offset have been precomputed + // note we normalize here to get "l" from the filament listing. spot_dir is already normalized + let cd = dot(-spot_dir, normalize(light_to_frag)); + let attenuation = saturate(cd * (*light).light_custom_data.z + (*light).light_custom_data.w); + let spot_attenuation = attenuation * attenuation; + + var texture_sample = 1f; + +#ifdef LIGHT_TEXTURES + if (*light).decal_index != 0xFFFFFFFFu { + let local_position = (view_bindings::clustered_decals.decals[(*light).decal_index].local_from_world * + vec4((*input).P, 1.0)).xyz; + if local_position.z < 0.0 { + let decal_uv = (local_position.xy / (local_position.z * (*light).spot_light_tan_angle)) * vec2(-0.5, 0.5) + 0.5; + let image_index = view_bindings::clustered_decals.decals[(*light).decal_index].image_index; + + texture_sample = textureSampleLevel( + view_bindings::clustered_decal_textures[image_index], + view_bindings::clustered_decal_sampler, + decal_uv, + 0.0 + ).r; + } + } +#endif + + return point_light * spot_attenuation * texture_sample; +} + +fn directional_light( + light_id: u32, + input: ptr, + enable_diffuse: bool +) -> vec3 { + // Unpack. + let diffuse_color = (*input).diffuse_color; + let NdotV = (*input).layers[LAYER_BASE].NdotV; + let N = (*input).layers[LAYER_BASE].N; + let V = (*input).V; + let roughness = (*input).layers[LAYER_BASE].roughness; + + let light = &view_bindings::lights.directional_lights[light_id]; + + let L = (*light).direction_to_light.xyz; + var derived_input = derive_lighting_input(N, V, L); + + var diffuse = vec3(0.0); + if (enable_diffuse) { + diffuse = diffuse_color * Fd_Burley(input, &derived_input); + } + +#ifdef STANDARD_MATERIAL_ANISOTROPY + let specular_light = specular_anisotropy(input, &derived_input, L, 1.0); +#else // STANDARD_MATERIAL_ANISOTROPY + let specular_light = specular(input, &derived_input, 1.0); +#endif // STANDARD_MATERIAL_ANISOTROPY + +#ifdef STANDARD_MATERIAL_CLEARCOAT + let clearcoat_N = (*input).layers[LAYER_CLEARCOAT].N; + let clearcoat_strength = (*input).clearcoat_strength; + + // Perform specular input calculations again for the clearcoat layer. We + // can't reuse the above because the clearcoat normal might be different + // from the main layer normal. + var derived_clearcoat_input = derive_lighting_input(clearcoat_N, V, L); + + let Fc_Frc = + specular_clearcoat(input, &derived_clearcoat_input, clearcoat_strength, 1.0); + let inv_Fc = 1.0 - Fc_Frc.r; + let Frc = Fc_Frc.g; +#endif // STANDARD_MATERIAL_CLEARCOAT + + var color: vec3; +#ifdef STANDARD_MATERIAL_CLEARCOAT + // Account for the Fresnel term from the clearcoat darkening the main layer. + // + // + color = (diffuse + specular_light * inv_Fc) * inv_Fc * derived_input.NdotL + + Frc * derived_clearcoat_input.NdotL; +#else // STANDARD_MATERIAL_CLEARCOAT + color = (diffuse + specular_light) * derived_input.NdotL; +#endif // STANDARD_MATERIAL_CLEARCOAT + + var texture_sample = 1f; + +#ifdef LIGHT_TEXTURES + if (*light).decal_index != 0xFFFFFFFFu { + let local_position = (view_bindings::clustered_decals.decals[(*light).decal_index].local_from_world * + vec4((*input).P, 1.0)).xyz; + let decal_uv = local_position.xy * vec2(-0.5, 0.5) + 0.5; + + // if tiled or within tile + if (view_bindings::clustered_decals.decals[(*light).decal_index].tag != 0u) + || all(clamp(decal_uv, vec2(0.0), vec2(1.0)) == decal_uv) + { + let image_index = view_bindings::clustered_decals.decals[(*light).decal_index].image_index; + + texture_sample = textureSampleLevel( + view_bindings::clustered_decal_textures[image_index], + view_bindings::clustered_decal_sampler, + decal_uv - floor(decal_uv), + 0.0 + ).r; + } else { + texture_sample = 0f; + } + } +#endif + + return color * (*light).color.rgb * texture_sample; +} diff --git a/crates/libmarathon/src/render/pbr/render/pbr_prepass.wgsl b/crates/libmarathon/src/render/pbr/render/pbr_prepass.wgsl new file mode 100644 index 0000000..68c3602 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/pbr_prepass.wgsl @@ -0,0 +1,151 @@ +#import bevy_pbr::{ + pbr_prepass_functions, + pbr_bindings, + pbr_bindings::material, + pbr_types, + pbr_functions, + pbr_functions::SampleBias, + prepass_io, + mesh_bindings::mesh, + mesh_view_bindings::view, +} + +#import bevy_render::bindless::{bindless_samplers_filtering, bindless_textures_2d} + +#ifdef MESHLET_MESH_MATERIAL_PASS +#import bevy_pbr::meshlet_visibility_buffer_resolve::resolve_vertex_output +#endif + +#ifdef BINDLESS +#import bevy_pbr::pbr_bindings::material_indices +#endif // BINDLESS + +#ifdef PREPASS_FRAGMENT +@fragment +fn fragment( +#ifdef MESHLET_MESH_MATERIAL_PASS + @builtin(position) frag_coord: vec4, +#else + in: prepass_io::VertexOutput, + @builtin(front_facing) is_front: bool, +#endif +) -> prepass_io::FragmentOutput { +#ifdef MESHLET_MESH_MATERIAL_PASS + let in = resolve_vertex_output(frag_coord); + let is_front = true; +#else // MESHLET_MESH_MATERIAL_PASS + +#ifdef BINDLESS + let slot = mesh[in.instance_index].material_and_lightmap_bind_group_slot & 0xffffu; + let flags = pbr_bindings::material_array[material_indices[slot].material].flags; + let uv_transform = pbr_bindings::material_array[material_indices[slot].material].uv_transform; +#else // BINDLESS + let flags = pbr_bindings::material.flags; + let uv_transform = pbr_bindings::material.uv_transform; +#endif // BINDLESS + + // If we're in the crossfade section of a visibility range, conditionally + // discard the fragment according to the visibility pattern. +#ifdef VISIBILITY_RANGE_DITHER + pbr_functions::visibility_range_dither(in.position, in.visibility_range_dither); +#endif // VISIBILITY_RANGE_DITHER + + pbr_prepass_functions::prepass_alpha_discard(in); +#endif // MESHLET_MESH_MATERIAL_PASS + + var out: prepass_io::FragmentOutput; + +#ifdef UNCLIPPED_DEPTH_ORTHO_EMULATION + out.frag_depth = in.unclipped_depth; +#endif // UNCLIPPED_DEPTH_ORTHO_EMULATION + +#ifdef NORMAL_PREPASS + // NOTE: Unlit bit not set means == 0 is true, so the true case is if lit + if (flags & pbr_types::STANDARD_MATERIAL_FLAGS_UNLIT_BIT) == 0u { + let double_sided = (flags & pbr_types::STANDARD_MATERIAL_FLAGS_DOUBLE_SIDED_BIT) != 0u; + + let world_normal = pbr_functions::prepare_world_normal( + in.world_normal, + double_sided, + is_front, + ); + + var normal = world_normal; + +#ifdef VERTEX_UVS +#ifdef VERTEX_TANGENTS +#ifdef STANDARD_MATERIAL_NORMAL_MAP + +// TODO: Transforming UVs mean we need to apply derivative chain rule for meshlet mesh material pass +#ifdef STANDARD_MATERIAL_NORMAL_MAP_UV_B + let uv = (uv_transform * vec3(in.uv_b, 1.0)).xy; +#else + let uv = (uv_transform * vec3(in.uv, 1.0)).xy; +#endif + + // Fill in the sample bias so we can sample from textures. + var bias: SampleBias; +#ifdef MESHLET_MESH_MATERIAL_PASS + bias.ddx_uv = in.ddx_uv; + bias.ddy_uv = in.ddy_uv; +#else // MESHLET_MESH_MATERIAL_PASS + bias.mip_bias = view.mip_bias; +#endif // MESHLET_MESH_MATERIAL_PASS + + let Nt = +#ifdef MESHLET_MESH_MATERIAL_PASS + textureSampleGrad( +#else // MESHLET_MESH_MATERIAL_PASS + textureSampleBias( +#endif // MESHLET_MESH_MATERIAL_PASS +#ifdef BINDLESS + bindless_textures_2d[material_indices[slot].normal_map_texture], + bindless_samplers_filtering[material_indices[slot].normal_map_sampler], +#else // BINDLESS + pbr_bindings::normal_map_texture, + pbr_bindings::normal_map_sampler, +#endif // BINDLESS + uv, +#ifdef MESHLET_MESH_MATERIAL_PASS + bias.ddx_uv, + bias.ddy_uv, +#else // MESHLET_MESH_MATERIAL_PASS + bias.mip_bias, +#endif // MESHLET_MESH_MATERIAL_PASS + ).rgb; + let TBN = pbr_functions::calculate_tbn_mikktspace(normal, in.world_tangent); + + normal = pbr_functions::apply_normal_mapping( + flags, + TBN, + double_sided, + is_front, + Nt, + ); + +#endif // STANDARD_MATERIAL_NORMAL_MAP +#endif // VERTEX_TANGENTS +#endif // VERTEX_UVS + + out.normal = vec4(normal * 0.5 + vec3(0.5), 1.0); + } else { + out.normal = vec4(in.world_normal * 0.5 + vec3(0.5), 1.0); + } +#endif // NORMAL_PREPASS + +#ifdef MOTION_VECTOR_PREPASS +#ifdef MESHLET_MESH_MATERIAL_PASS + out.motion_vector = in.motion_vector; +#else + out.motion_vector = pbr_prepass_functions::calculate_motion_vector(in.world_position, in.previous_world_position); +#endif +#endif + + return out; +} +#else +@fragment +fn fragment(in: prepass_io::VertexOutput) { + pbr_prepass_functions::prepass_alpha_discard(in); +} +#endif // PREPASS_FRAGMENT diff --git a/crates/libmarathon/src/render/pbr/render/pbr_prepass_functions.wgsl b/crates/libmarathon/src/render/pbr/render/pbr_prepass_functions.wgsl new file mode 100644 index 0000000..d2d2c71 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/pbr_prepass_functions.wgsl @@ -0,0 +1,102 @@ +#define_import_path bevy_pbr::pbr_prepass_functions + +#import bevy_render::bindless::{bindless_samplers_filtering, bindless_textures_2d} + +#import bevy_pbr::{ + prepass_io::VertexOutput, + prepass_bindings::previous_view_uniforms, + mesh_bindings::mesh, + mesh_view_bindings::view, + pbr_bindings, + pbr_types, +} + +#ifdef BINDLESS +#import bevy_pbr::pbr_bindings::material_indices +#endif // BINDLESS + +// Cutoff used for the premultiplied alpha modes BLEND, ADD, and ALPHA_TO_COVERAGE. +const PREMULTIPLIED_ALPHA_CUTOFF = 0.05; + +// We can use a simplified version of alpha_discard() here since we only need to handle the alpha_cutoff +fn prepass_alpha_discard(in: VertexOutput) { + +#ifdef MAY_DISCARD +#ifdef BINDLESS + let slot = mesh[in.instance_index].material_and_lightmap_bind_group_slot & 0xffffu; + var output_color: vec4 = pbr_bindings::material_array[material_indices[slot].material].base_color; + let flags = pbr_bindings::material_array[material_indices[slot].material].flags; +#else // BINDLESS + var output_color: vec4 = pbr_bindings::material.base_color; + let flags = pbr_bindings::material.flags; +#endif // BINDLESS + +#ifdef VERTEX_UVS +#ifdef STANDARD_MATERIAL_BASE_COLOR_UV_B + var uv = in.uv_b; +#else // STANDARD_MATERIAL_BASE_COLOR_UV_B + var uv = in.uv; +#endif // STANDARD_MATERIAL_BASE_COLOR_UV_B + +#ifdef BINDLESS + let uv_transform = pbr_bindings::material_array[material_indices[slot].material].uv_transform; +#else // BINDLESS + let uv_transform = pbr_bindings::material.uv_transform; +#endif // BINDLESS + + uv = (uv_transform * vec3(uv, 1.0)).xy; + if (flags & pbr_types::STANDARD_MATERIAL_FLAGS_BASE_COLOR_TEXTURE_BIT) != 0u { + output_color = output_color * textureSampleBias( +#ifdef BINDLESS + bindless_textures_2d[material_indices[slot].base_color_texture], + bindless_samplers_filtering[material_indices[slot].base_color_sampler], +#else // BINDLESS + pbr_bindings::base_color_texture, + pbr_bindings::base_color_sampler, +#endif // BINDLESS + uv, + view.mip_bias + ); + } +#endif // VERTEX_UVS + + let alpha_mode = flags & pbr_types::STANDARD_MATERIAL_FLAGS_ALPHA_MODE_RESERVED_BITS; + if alpha_mode == pbr_types::STANDARD_MATERIAL_FLAGS_ALPHA_MODE_MASK { +#ifdef BINDLESS + let alpha_cutoff = pbr_bindings::material_array[material_indices[slot].material].alpha_cutoff; +#else // BINDLESS + let alpha_cutoff = pbr_bindings::material.alpha_cutoff; +#endif // BINDLESS + if output_color.a < alpha_cutoff { + discard; + } + } else if (alpha_mode == pbr_types::STANDARD_MATERIAL_FLAGS_ALPHA_MODE_BLEND || + alpha_mode == pbr_types::STANDARD_MATERIAL_FLAGS_ALPHA_MODE_ADD || + alpha_mode == pbr_types::STANDARD_MATERIAL_FLAGS_ALPHA_MODE_ALPHA_TO_COVERAGE) { + if output_color.a < PREMULTIPLIED_ALPHA_CUTOFF { + discard; + } + } else if alpha_mode == pbr_types::STANDARD_MATERIAL_FLAGS_ALPHA_MODE_PREMULTIPLIED { + if all(output_color < vec4(PREMULTIPLIED_ALPHA_CUTOFF)) { + discard; + } + } + +#endif // MAY_DISCARD +} + +#ifdef MOTION_VECTOR_PREPASS +fn calculate_motion_vector(world_position: vec4, previous_world_position: vec4) -> vec2 { + let clip_position_t = view.unjittered_clip_from_world * world_position; + let clip_position = clip_position_t.xy / clip_position_t.w; + let previous_clip_position_t = previous_view_uniforms.clip_from_world * previous_world_position; + let previous_clip_position = previous_clip_position_t.xy / previous_clip_position_t.w; + // These motion vectors are used as offsets to UV positions and are stored + // in the range -1,1 to allow offsetting from the one corner to the + // diagonally-opposite corner in UV coordinates, in either direction. + // A difference between diagonally-opposite corners of clip space is in the + // range -2,2, so this needs to be scaled by 0.5. And the V direction goes + // down where clip space y goes up, so y needs to be flipped. + return (clip_position - previous_clip_position) * vec2(0.5, -0.5); +} +#endif // MOTION_VECTOR_PREPASS diff --git a/crates/libmarathon/src/render/pbr/render/pbr_transmission.wgsl b/crates/libmarathon/src/render/pbr/render/pbr_transmission.wgsl new file mode 100644 index 0000000..720a42b --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/pbr_transmission.wgsl @@ -0,0 +1,192 @@ +#define_import_path bevy_pbr::transmission + +#import bevy_pbr::{ + lighting, + prepass_utils, + utils::interleaved_gradient_noise, + utils, + mesh_view_bindings as view_bindings, +}; + +#import bevy_render::maths::PI + +#ifdef TONEMAP_IN_SHADER +#import bevy_core_pipeline::tonemapping::approximate_inverse_tone_mapping +#endif + +fn specular_transmissive_light(world_position: vec4, frag_coord: vec3, view_z: f32, N: vec3, V: vec3, F0: vec3, ior: f32, thickness: f32, perceptual_roughness: f32, specular_transmissive_color: vec3, transmitted_environment_light_specular: vec3) -> vec3 { + // Calculate the ratio between refraction indexes. Assume air/vacuum for the space outside the mesh + let eta = 1.0 / ior; + + // Calculate incidence vector (opposite to view vector) and its dot product with the mesh normal + let I = -V; + let NdotI = dot(N, I); + + // Calculate refracted direction using Snell's law + let k = 1.0 - eta * eta * (1.0 - NdotI * NdotI); + let T = eta * I - (eta * NdotI + sqrt(k)) * N; + + // Calculate the exit position of the refracted ray, by propagating refracted direction through thickness + let exit_position = world_position.xyz + T * thickness; + + // Transform exit_position into clip space + let clip_exit_position = view_bindings::view.clip_from_world * vec4(exit_position, 1.0); + + // Scale / offset position so that coordinate is in right space for sampling transmissive background texture + let offset_position = (clip_exit_position.xy / clip_exit_position.w) * vec2(0.5, -0.5) + 0.5; + + // Fetch background color + var background_color: vec4; + if perceptual_roughness == 0.0 { + // If the material has zero roughness, we can use a faster approach without the blur + background_color = fetch_transmissive_background_non_rough(offset_position, frag_coord); + } else { + background_color = fetch_transmissive_background(offset_position, frag_coord, view_z, perceptual_roughness); + } + + // Compensate for exposure, since the background color is coming from an already exposure-adjusted texture + background_color = vec4(background_color.rgb / view_bindings::view.exposure, background_color.a); + + // Dot product of the refracted direction with the exit normal (Note: We assume the exit normal is the entry normal but inverted) + let MinusNdotT = dot(-N, T); + + // Calculate 1.0 - fresnel factor (how much light is _NOT_ reflected, i.e. how much is transmitted) + let F = vec3(1.0) - lighting::fresnel(F0, MinusNdotT); + + // Calculate final color by applying fresnel multiplied specular transmissive color to a mix of background color and transmitted specular environment light + return F * specular_transmissive_color * mix(transmitted_environment_light_specular, background_color.rgb, background_color.a); +} + +fn fetch_transmissive_background_non_rough(offset_position: vec2, frag_coord: vec3) -> vec4 { + var background_color = textureSampleLevel( + view_bindings::view_transmission_texture, + view_bindings::view_transmission_sampler, + offset_position, + 0.0 + ); + +#ifdef DEPTH_PREPASS +#ifndef WEBGL2 + // Use depth prepass data to reject values that are in front of the current fragment + if prepass_utils::prepass_depth(vec4(offset_position * view_bindings::view.viewport.zw, 0.0, 0.0), 0u) > frag_coord.z { + background_color.a = 0.0; + } +#endif +#endif + +#ifdef TONEMAP_IN_SHADER + background_color = approximate_inverse_tone_mapping(background_color, view_bindings::view.color_grading); +#endif + + return background_color; +} + +fn fetch_transmissive_background(offset_position: vec2, frag_coord: vec3, view_z: f32, perceptual_roughness: f32) -> vec4 { + // Calculate view aspect ratio, used to scale offset so that it's proportionate + let aspect = view_bindings::view.viewport.z / view_bindings::view.viewport.w; + + // Calculate how “blurry” the transmission should be. + // Blur is more or less eyeballed to look approximately “right”, since the “correct” + // approach would involve projecting many scattered rays and figuring out their individual + // exit positions. IRL, light rays can be scattered when entering/exiting a material (due to + // roughness) or inside the material (due to subsurface scattering). Here, we only consider + // the first scenario. + // + // Blur intensity is: + // - proportional to the square of `perceptual_roughness` + // - proportional to the inverse of view z + let blur_intensity = (perceptual_roughness * perceptual_roughness) / view_z; + +#ifdef SCREEN_SPACE_SPECULAR_TRANSMISSION_BLUR_TAPS + let num_taps = #{SCREEN_SPACE_SPECULAR_TRANSMISSION_BLUR_TAPS}; // Controlled by the `Camera3d::screen_space_specular_transmission_quality` property +#else + let num_taps = 8; // Fallback to 8 taps, if not specified +#endif + let num_spirals = i32(ceil(f32(num_taps) / 8.0)); +#ifdef TEMPORAL_JITTER + let random_angle = interleaved_gradient_noise(frag_coord.xy, view_bindings::globals.frame_count); +#else + let random_angle = interleaved_gradient_noise(frag_coord.xy, 0u); +#endif + // Pixel checkerboard pattern (helps make the interleaved gradient noise pattern less visible) + let pixel_checkboard = ( +#ifdef TEMPORAL_JITTER + // 0 or 1 on even/odd pixels, alternates every frame + (i32(frag_coord.x) + i32(frag_coord.y) + i32(view_bindings::globals.frame_count)) % 2 +#else + // 0 or 1 on even/odd pixels + (i32(frag_coord.x) + i32(frag_coord.y)) % 2 +#endif + ); + + var result = vec4(0.0); + for (var i: i32 = 0; i < num_taps; i = i + 1) { + let current_spiral = (i >> 3u); + let angle = (random_angle + f32(current_spiral) / f32(num_spirals)) * 2.0 * PI; + let m = vec2(sin(angle), cos(angle)); + let rotation_matrix = mat2x2( + m.y, -m.x, + m.x, m.y + ); + + // Get spiral offset + var spiral_offset: vec2; + switch i & 7 { + // https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare (slides 120-135) + // TODO: Figure out a more reasonable way of doing this, as WGSL + // seems to only allow constant indexes into constant arrays at the moment. + // The downstream shader compiler should be able to optimize this into a single + // constant when unrolling the for loop, but it's still not ideal. + case 0: { spiral_offset = utils::SPIRAL_OFFSET_0_; } // Note: We go even first and then odd, so that the lowest + case 1: { spiral_offset = utils::SPIRAL_OFFSET_2_; } // quality possible (which does 4 taps) still does a full spiral + case 2: { spiral_offset = utils::SPIRAL_OFFSET_4_; } // instead of just the first half of it + case 3: { spiral_offset = utils::SPIRAL_OFFSET_6_; } + case 4: { spiral_offset = utils::SPIRAL_OFFSET_1_; } + case 5: { spiral_offset = utils::SPIRAL_OFFSET_3_; } + case 6: { spiral_offset = utils::SPIRAL_OFFSET_5_; } + case 7: { spiral_offset = utils::SPIRAL_OFFSET_7_; } + default: {} + } + + // Make each consecutive spiral slightly smaller than the previous one + spiral_offset *= 1.0 - (0.5 * f32(current_spiral + 1) / f32(num_spirals)); + + // Rotate and correct for aspect ratio + let rotated_spiral_offset = (rotation_matrix * spiral_offset) * vec2(1.0, aspect); + + // Calculate final offset position, with blur and spiral offset + let modified_offset_position = offset_position + rotated_spiral_offset * blur_intensity * (1.0 - f32(pixel_checkboard) * 0.1); + + // Sample the view transmission texture at the offset position + noise offset, to get the background color + var sample = textureSampleLevel( + view_bindings::view_transmission_texture, + view_bindings::view_transmission_sampler, + modified_offset_position, + 0.0 + ); + +#ifdef DEPTH_PREPASS +#ifndef WEBGL2 + // Use depth prepass data to reject values that are in front of the current fragment + if prepass_utils::prepass_depth(vec4(modified_offset_position * view_bindings::view.viewport.zw, 0.0, 0.0), 0u) > frag_coord.z { + sample = vec4(0.0); + } +#endif +#endif + + // As blur intensity grows higher, gradually limit *very bright* color RGB values towards a + // maximum length of 1.0 to prevent stray “firefly” pixel artifacts. This can potentially make + // very strong emissive meshes appear much dimmer, but the artifacts are noticeable enough to + // warrant this treatment. + let normalized_rgb = normalize(sample.rgb); + result += vec4(min(sample.rgb, normalized_rgb / saturate(blur_intensity / 2.0)), sample.a); + } + + result /= f32(num_taps); + +#ifdef TONEMAP_IN_SHADER + result = approximate_inverse_tone_mapping(result, view_bindings::view.color_grading); +#endif + + return result; +} diff --git a/crates/libmarathon/src/render/pbr/render/pbr_types.wgsl b/crates/libmarathon/src/render/pbr/render/pbr_types.wgsl new file mode 100644 index 0000000..b8b51c5 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/pbr_types.wgsl @@ -0,0 +1,151 @@ +#define_import_path bevy_pbr::pbr_types + +// Since this is a hot path, try to keep the alignment and size of the struct members in mind. +// You can find the alignment and sizes at . +struct StandardMaterial { + base_color: vec4, + emissive: vec4, + attenuation_color: vec4, + uv_transform: mat3x3, + reflectance: vec3, + perceptual_roughness: f32, + metallic: f32, + diffuse_transmission: f32, + specular_transmission: f32, + thickness: f32, + ior: f32, + attenuation_distance: f32, + clearcoat: f32, + clearcoat_perceptual_roughness: f32, + anisotropy_strength: f32, + anisotropy_rotation: vec2, + // 'flags' is a bit field indicating various options. u32 is 32 bits so we have up to 32 options. + flags: u32, + alpha_cutoff: f32, + parallax_depth_scale: f32, + max_parallax_layer_count: f32, + lightmap_exposure: f32, + max_relief_mapping_search_steps: u32, + /// ID for specifying which deferred lighting pass should be used for rendering this material, if any. + deferred_lighting_pass_id: u32, +}; + +// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// NOTE: if these flags are updated or changed. Be sure to also update +// deferred_flags_from_mesh_material_flags and mesh_material_flags_from_deferred_flags +// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +const STANDARD_MATERIAL_FLAGS_BASE_COLOR_TEXTURE_BIT: u32 = 1u << 0u; +const STANDARD_MATERIAL_FLAGS_EMISSIVE_TEXTURE_BIT: u32 = 1u << 1u; +const STANDARD_MATERIAL_FLAGS_METALLIC_ROUGHNESS_TEXTURE_BIT: u32 = 1u << 2u; +const STANDARD_MATERIAL_FLAGS_OCCLUSION_TEXTURE_BIT: u32 = 1u << 3u; +const STANDARD_MATERIAL_FLAGS_DOUBLE_SIDED_BIT: u32 = 1u << 4u; +const STANDARD_MATERIAL_FLAGS_UNLIT_BIT: u32 = 1u << 5u; +const STANDARD_MATERIAL_FLAGS_TWO_COMPONENT_NORMAL_MAP: u32 = 1u << 6u; +const STANDARD_MATERIAL_FLAGS_FLIP_NORMAL_MAP_Y: u32 = 1u << 7u; +const STANDARD_MATERIAL_FLAGS_FOG_ENABLED_BIT: u32 = 1u << 8u; +const STANDARD_MATERIAL_FLAGS_DEPTH_MAP_BIT: u32 = 1u << 9u; +const STANDARD_MATERIAL_FLAGS_SPECULAR_TRANSMISSION_TEXTURE_BIT: u32 = 1u << 10u; +const STANDARD_MATERIAL_FLAGS_THICKNESS_TEXTURE_BIT: u32 = 1u << 11u; +const STANDARD_MATERIAL_FLAGS_DIFFUSE_TRANSMISSION_TEXTURE_BIT: u32 = 1u << 12u; +const STANDARD_MATERIAL_FLAGS_ATTENUATION_ENABLED_BIT: u32 = 1u << 13u; +const STANDARD_MATERIAL_FLAGS_CLEARCOAT_TEXTURE_BIT: u32 = 1u << 14u; +const STANDARD_MATERIAL_FLAGS_CLEARCOAT_ROUGHNESS_TEXTURE_BIT: u32 = 1u << 15u; +const STANDARD_MATERIAL_FLAGS_CLEARCOAT_NORMAL_TEXTURE_BIT: u32 = 1u << 16u; +const STANDARD_MATERIAL_FLAGS_ANISOTROPY_TEXTURE_BIT: u32 = 1u << 17u; +const STANDARD_MATERIAL_FLAGS_SPECULAR_TEXTURE_BIT: u32 = 1u << 18u; +const STANDARD_MATERIAL_FLAGS_SPECULAR_TINT_TEXTURE_BIT: u32 = 1u << 19u; +const STANDARD_MATERIAL_FLAGS_ALPHA_MODE_RESERVED_BITS: u32 = 7u << 29u; // (0b111u << 29u) +const STANDARD_MATERIAL_FLAGS_ALPHA_MODE_OPAQUE: u32 = 0u << 29u; +const STANDARD_MATERIAL_FLAGS_ALPHA_MODE_MASK: u32 = 1u << 29u; +const STANDARD_MATERIAL_FLAGS_ALPHA_MODE_BLEND: u32 = 2u << 29u; +const STANDARD_MATERIAL_FLAGS_ALPHA_MODE_PREMULTIPLIED: u32 = 3u << 29u; +const STANDARD_MATERIAL_FLAGS_ALPHA_MODE_ADD: u32 = 4u << 29u; +const STANDARD_MATERIAL_FLAGS_ALPHA_MODE_MULTIPLY: u32 = 5u << 29u; +const STANDARD_MATERIAL_FLAGS_ALPHA_MODE_ALPHA_TO_COVERAGE: u32 = 6u << 29u; + + +// Creates a StandardMaterial with default values +fn standard_material_new() -> StandardMaterial { + var material: StandardMaterial; + + // NOTE: Keep in-sync with src/pbr_material.rs! + material.base_color = vec4(1.0, 1.0, 1.0, 1.0); + material.emissive = vec4(0.0, 0.0, 0.0, 1.0); + material.perceptual_roughness = 0.5; + material.metallic = 0.00; + material.reflectance = vec3(0.5); + material.diffuse_transmission = 0.0; + material.specular_transmission = 0.0; + material.thickness = 0.0; + material.ior = 1.5; + material.attenuation_distance = 1.0; + material.attenuation_color = vec4(1.0, 1.0, 1.0, 1.0); + material.clearcoat = 0.0; + material.clearcoat_perceptual_roughness = 0.0; + material.flags = STANDARD_MATERIAL_FLAGS_ALPHA_MODE_OPAQUE; + material.alpha_cutoff = 0.5; + material.parallax_depth_scale = 0.1; + material.max_parallax_layer_count = 16.0; + material.max_relief_mapping_search_steps = 5u; + material.deferred_lighting_pass_id = 1u; + // scale 1, translation 0, rotation 0 + material.uv_transform = mat3x3(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0); + + return material; +} + +struct PbrInput { + material: StandardMaterial, + // Note: this gets monochromized upon deferred PbrInput reconstruction. + diffuse_occlusion: vec3, + // Note: this is 1.0 (entirely unoccluded) when SSAO and SSR are off. + specular_occlusion: f32, + frag_coord: vec4, + world_position: vec4, + // Normalized world normal used for shadow mapping as normal-mapping is not used for shadow + // mapping + world_normal: vec3, + // Normalized normal-mapped world normal used for lighting + N: vec3, + // Normalized view vector in world space, pointing from the fragment world position toward the + // view world position + V: vec3, + lightmap_light: vec3, + clearcoat_N: vec3, + anisotropy_strength: f32, + // These two aren't specific to anisotropy, but we only fill them in if + // we're doing anisotropy, so they're prefixed with `anisotropy_`. + anisotropy_T: vec3, + anisotropy_B: vec3, + is_orthographic: bool, + flags: u32, +}; + +// Creates a PbrInput with default values +fn pbr_input_new() -> PbrInput { + var pbr_input: PbrInput; + + pbr_input.material = standard_material_new(); + pbr_input.diffuse_occlusion = vec3(1.0); + // If SSAO is enabled, then this gets overwritten with proper specular occlusion. If its not, then we get specular environment map unoccluded (we have no data with which to occlude it with). + pbr_input.specular_occlusion = 1.0; + + pbr_input.frag_coord = vec4(0.0, 0.0, 0.0, 1.0); + pbr_input.world_position = vec4(0.0, 0.0, 0.0, 1.0); + pbr_input.world_normal = vec3(0.0, 0.0, 1.0); + + pbr_input.is_orthographic = false; + + pbr_input.N = vec3(0.0, 0.0, 1.0); + pbr_input.V = vec3(1.0, 0.0, 0.0); + + pbr_input.clearcoat_N = vec3(0.0); + pbr_input.anisotropy_T = vec3(0.0); + pbr_input.anisotropy_B = vec3(0.0); + + pbr_input.lightmap_light = vec3(0.0); + + pbr_input.flags = 0u; + + return pbr_input; +} diff --git a/crates/libmarathon/src/render/pbr/render/reset_indirect_batch_sets.wgsl b/crates/libmarathon/src/render/pbr/render/reset_indirect_batch_sets.wgsl new file mode 100644 index 0000000..9309594 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/reset_indirect_batch_sets.wgsl @@ -0,0 +1,25 @@ +// Resets the indirect draw counts to zero. +// +// This shader is needed because we reuse the same indirect batch set count +// buffer (i.e. the buffer that gets passed to `multi_draw_indirect_count` to +// determine how many objects to draw) between phases (early, late, and main). +// Before launching `build_indirect_params.wgsl`, we need to reinitialize the +// value to 0. + +#import bevy_pbr::mesh_preprocess_types::IndirectBatchSet + +@group(0) @binding(0) var indirect_batch_sets: array; + +@compute +@workgroup_size(64) +fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { + // Figure out our instance index. If this thread doesn't correspond to any + // index, bail. + let instance_index = global_invocation_id.x; + if (instance_index >= arrayLength(&indirect_batch_sets)) { + return; + } + + // Reset the number of batch sets to 0. + atomicStore(&indirect_batch_sets[instance_index].indirect_parameters_count, 0u); +} diff --git a/crates/libmarathon/src/render/pbr/render/rgb9e5.wgsl b/crates/libmarathon/src/render/pbr/render/rgb9e5.wgsl new file mode 100644 index 0000000..c635c83 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/rgb9e5.wgsl @@ -0,0 +1,63 @@ +#define_import_path bevy_pbr::rgb9e5 + +const RGB9E5_EXPONENT_BITS = 5u; +const RGB9E5_MANTISSA_BITS = 9; +const RGB9E5_MANTISSA_BITSU = 9u; +const RGB9E5_EXP_BIAS = 15; +const RGB9E5_MAX_VALID_BIASED_EXP = 31u; + +//#define MAX_RGB9E5_EXP (RGB9E5_MAX_VALID_BIASED_EXP - RGB9E5_EXP_BIAS) +//#define RGB9E5_MANTISSA_VALUES (1< i32 { + let f = bitcast(x); + let biasedexponent = (f & 0x7F800000u) >> 23u; + return i32(biasedexponent) - 127; +} + +// https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt +fn vec3_to_rgb9e5_(rgb_in: vec3) -> u32 { + let rgb = clamp(rgb_in, vec3(0.0), vec3(MAX_RGB9E5_)); + + let maxrgb = max(rgb.r, max(rgb.g, rgb.b)); + var exp_shared = max(-RGB9E5_EXP_BIAS - 1, floor_log2_(maxrgb)) + 1 + RGB9E5_EXP_BIAS; + var denom = exp2(f32(exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS)); + + let maxm = i32(floor(maxrgb / denom + 0.5)); + if (maxm == RGB9E5_MANTISSA_VALUES) { + denom *= 2.0; + exp_shared += 1; + } + + let n = vec3(floor(rgb / denom + 0.5)); + + return (u32(exp_shared) << 27u) | (n.b << 18u) | (n.g << 9u) | (n.r << 0u); +} + +// Builtin extractBits() is not working on WEBGL or DX12 +// DX12: HLSL: Unimplemented("write_expr_math ExtractBits") +fn extract_bits(value: u32, offset: u32, bits: u32) -> u32 { + let mask = (1u << bits) - 1u; + return (value >> offset) & mask; +} + +fn rgb9e5_to_vec3_(v: u32) -> vec3 { + let exponent = i32(extract_bits(v, 27u, RGB9E5_EXPONENT_BITS)) - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS; + let scale = exp2(f32(exponent)); + + return vec3( + f32(extract_bits(v, 0u, RGB9E5_MANTISSA_BITSU)), + f32(extract_bits(v, 9u, RGB9E5_MANTISSA_BITSU)), + f32(extract_bits(v, 18u, RGB9E5_MANTISSA_BITSU)) + ) * scale; +} diff --git a/crates/libmarathon/src/render/pbr/render/shadow_sampling.wgsl b/crates/libmarathon/src/render/pbr/render/shadow_sampling.wgsl new file mode 100644 index 0000000..2b35e57 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/shadow_sampling.wgsl @@ -0,0 +1,599 @@ +#define_import_path bevy_pbr::shadow_sampling + +#import bevy_pbr::{ + mesh_view_bindings as view_bindings, + utils::interleaved_gradient_noise, + utils, +} +#import bevy_render::maths::{orthonormalize, PI} + +// Do the lookup, using HW 2x2 PCF and comparison +fn sample_shadow_map_hardware(light_local: vec2, depth: f32, array_index: i32) -> f32 { +#ifdef NO_ARRAY_TEXTURES_SUPPORT + return textureSampleCompare( + view_bindings::directional_shadow_textures, + view_bindings::directional_shadow_textures_comparison_sampler, + light_local, + depth, + ); +#else + return textureSampleCompareLevel( + view_bindings::directional_shadow_textures, + view_bindings::directional_shadow_textures_comparison_sampler, + light_local, + array_index, + depth, + ); +#endif +} + +// Does a single sample of the blocker search, a part of the PCSS algorithm. +// This is the variant used for directional lights. +fn search_for_blockers_in_shadow_map_hardware( + light_local: vec2, + depth: f32, + array_index: i32, +) -> vec2 { +#ifdef WEBGL2 + // Make sure that the WebGL 2 compiler doesn't see `sampled_depth` sampled + // with different samplers, or it'll blow up. + return vec2(0.0); +#else // WEBGL2 + +#ifdef PCSS_SAMPLERS_AVAILABLE + +#ifdef NO_ARRAY_TEXTURES_SUPPORT + let sampled_depth = textureSampleLevel( + view_bindings::directional_shadow_textures, + view_bindings::directional_shadow_textures_linear_sampler, + light_local, + 0u, + ); +#else // NO_ARRAY_TEXTURES_SUPPORT + let sampled_depth = textureSampleLevel( + view_bindings::directional_shadow_textures, + view_bindings::directional_shadow_textures_linear_sampler, + light_local, + array_index, + 0u, + ); +#endif // NO_ARRAY_TEXTURES_SUPPORT + return select(vec2(0.0), vec2(sampled_depth, 1.0), sampled_depth >= depth); + +#else // PCSS_SAMPLERS_AVAILABLE + return vec2(0.0); +#endif // PCSS_SAMPLERS_AVAILABLE + +#endif // WEBGL2 +} + +// Numbers determined by trial and error that gave nice results. +const SPOT_SHADOW_TEXEL_SIZE: f32 = 0.0134277345; +const POINT_SHADOW_SCALE: f32 = 0.003; +const POINT_SHADOW_TEMPORAL_OFFSET_SCALE: f32 = 0.5; + +// These are the standard MSAA sample point positions from D3D. They were chosen +// to get a reasonable distribution that's not too regular. +// +// https://learn.microsoft.com/en-us/windows/win32/api/d3d11/ne-d3d11-d3d11_standard_multisample_quality_levels?redirectedfrom=MSDN +const D3D_SAMPLE_POINT_POSITIONS: array, 8> = array( + vec2( 0.125, -0.375), + vec2(-0.125, 0.375), + vec2( 0.625, 0.125), + vec2(-0.375, -0.625), + vec2(-0.625, 0.625), + vec2(-0.875, -0.125), + vec2( 0.375, 0.875), + vec2( 0.875, -0.875), +); + +// And these are the coefficients corresponding to the probability distribution +// function of a 2D Gaussian lobe with zero mean and the identity covariance +// matrix at those points. +const D3D_SAMPLE_POINT_COEFFS: array = array( + 0.157112, + 0.157112, + 0.138651, + 0.130251, + 0.114946, + 0.114946, + 0.107982, + 0.079001, +); + +// https://web.archive.org/web/20230210095515/http://the-witness.net/news/2013/09/shadow-mapping-summary-part-1 +fn sample_shadow_map_castano_thirteen(light_local: vec2, depth: f32, array_index: i32) -> f32 { + let shadow_map_size = vec2(textureDimensions(view_bindings::directional_shadow_textures)); + let inv_shadow_map_size = 1.0 / shadow_map_size; + + let uv = light_local * shadow_map_size; + var base_uv = floor(uv + 0.5); + let s = (uv.x + 0.5 - base_uv.x); + let t = (uv.y + 0.5 - base_uv.y); + base_uv -= 0.5; + base_uv *= inv_shadow_map_size; + + let uw0 = (4.0 - 3.0 * s); + let uw1 = 7.0; + let uw2 = (1.0 + 3.0 * s); + + let u0 = (3.0 - 2.0 * s) / uw0 - 2.0; + let u1 = (3.0 + s) / uw1; + let u2 = s / uw2 + 2.0; + + let vw0 = (4.0 - 3.0 * t); + let vw1 = 7.0; + let vw2 = (1.0 + 3.0 * t); + + let v0 = (3.0 - 2.0 * t) / vw0 - 2.0; + let v1 = (3.0 + t) / vw1; + let v2 = t / vw2 + 2.0; + + var sum = 0.0; + + sum += uw0 * vw0 * sample_shadow_map_hardware(base_uv + (vec2(u0, v0) * inv_shadow_map_size), depth, array_index); + sum += uw1 * vw0 * sample_shadow_map_hardware(base_uv + (vec2(u1, v0) * inv_shadow_map_size), depth, array_index); + sum += uw2 * vw0 * sample_shadow_map_hardware(base_uv + (vec2(u2, v0) * inv_shadow_map_size), depth, array_index); + + sum += uw0 * vw1 * sample_shadow_map_hardware(base_uv + (vec2(u0, v1) * inv_shadow_map_size), depth, array_index); + sum += uw1 * vw1 * sample_shadow_map_hardware(base_uv + (vec2(u1, v1) * inv_shadow_map_size), depth, array_index); + sum += uw2 * vw1 * sample_shadow_map_hardware(base_uv + (vec2(u2, v1) * inv_shadow_map_size), depth, array_index); + + sum += uw0 * vw2 * sample_shadow_map_hardware(base_uv + (vec2(u0, v2) * inv_shadow_map_size), depth, array_index); + sum += uw1 * vw2 * sample_shadow_map_hardware(base_uv + (vec2(u1, v2) * inv_shadow_map_size), depth, array_index); + sum += uw2 * vw2 * sample_shadow_map_hardware(base_uv + (vec2(u2, v2) * inv_shadow_map_size), depth, array_index); + + return sum * (1.0 / 144.0); +} + +fn map(min1: f32, max1: f32, min2: f32, max2: f32, value: f32) -> f32 { + return min2 + (value - min1) * (max2 - min2) / (max1 - min1); +} + +// Creates a random rotation matrix using interleaved gradient noise. +// +// See: https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare/ +fn random_rotation_matrix(scale: vec2, temporal: bool) -> mat2x2 { + let random_angle = 2.0 * PI * interleaved_gradient_noise( + scale, select(1u, view_bindings::globals.frame_count, temporal)); + let m = vec2(sin(random_angle), cos(random_angle)); + return mat2x2( + m.y, -m.x, + m.x, m.y + ); +} + +// Calculates the distance between spiral samples for the given texel size and +// penumbra size. This is used for the Jimenez '14 (i.e. temporal) variant of +// shadow sampling. +fn calculate_uv_offset_scale_jimenez_fourteen(texel_size: f32, blur_size: f32) -> vec2 { + let shadow_map_size = vec2(textureDimensions(view_bindings::directional_shadow_textures)); + + // Empirically chosen fudge factor to make PCF look better across different CSM cascades + let f = map(0.00390625, 0.022949219, 0.015, 0.035, texel_size); + return f * blur_size / (texel_size * shadow_map_size); +} + +fn sample_shadow_map_jimenez_fourteen( + light_local: vec2, + depth: f32, + array_index: i32, + texel_size: f32, + blur_size: f32, + temporal: bool, +) -> f32 { + let shadow_map_size = vec2(textureDimensions(view_bindings::directional_shadow_textures)); + let rotation_matrix = random_rotation_matrix(light_local * shadow_map_size, temporal); + let uv_offset_scale = calculate_uv_offset_scale_jimenez_fourteen(texel_size, blur_size); + + // https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare (slides 120-135) + let sample_offset0 = (rotation_matrix * utils::SPIRAL_OFFSET_0_) * uv_offset_scale; + let sample_offset1 = (rotation_matrix * utils::SPIRAL_OFFSET_1_) * uv_offset_scale; + let sample_offset2 = (rotation_matrix * utils::SPIRAL_OFFSET_2_) * uv_offset_scale; + let sample_offset3 = (rotation_matrix * utils::SPIRAL_OFFSET_3_) * uv_offset_scale; + let sample_offset4 = (rotation_matrix * utils::SPIRAL_OFFSET_4_) * uv_offset_scale; + let sample_offset5 = (rotation_matrix * utils::SPIRAL_OFFSET_5_) * uv_offset_scale; + let sample_offset6 = (rotation_matrix * utils::SPIRAL_OFFSET_6_) * uv_offset_scale; + let sample_offset7 = (rotation_matrix * utils::SPIRAL_OFFSET_7_) * uv_offset_scale; + + var sum = 0.0; + sum += sample_shadow_map_hardware(light_local + sample_offset0, depth, array_index); + sum += sample_shadow_map_hardware(light_local + sample_offset1, depth, array_index); + sum += sample_shadow_map_hardware(light_local + sample_offset2, depth, array_index); + sum += sample_shadow_map_hardware(light_local + sample_offset3, depth, array_index); + sum += sample_shadow_map_hardware(light_local + sample_offset4, depth, array_index); + sum += sample_shadow_map_hardware(light_local + sample_offset5, depth, array_index); + sum += sample_shadow_map_hardware(light_local + sample_offset6, depth, array_index); + sum += sample_shadow_map_hardware(light_local + sample_offset7, depth, array_index); + return sum / 8.0; +} + +// Performs the blocker search portion of percentage-closer soft shadows (PCSS). +// This is the variation used for directional lights. +// +// We can't use Castano '13 here because that has a hard-wired fixed size, while +// the PCSS algorithm requires a search size that varies based on the size of +// the light. So we instead use the D3D sample point positions, spaced according +// to the search size, to provide a sample pattern in a similar manner to the +// cubemap sampling approach we use for PCF. +// +// `search_size` is the size of the search region in texels. +fn search_for_blockers_in_shadow_map( + light_local: vec2, + depth: f32, + array_index: i32, + texel_size: f32, + search_size: f32, +) -> f32 { + let shadow_map_size = vec2(textureDimensions(view_bindings::directional_shadow_textures)); + let uv_offset_scale = search_size / (texel_size * shadow_map_size); + + let offset0 = D3D_SAMPLE_POINT_POSITIONS[0] * uv_offset_scale; + let offset1 = D3D_SAMPLE_POINT_POSITIONS[1] * uv_offset_scale; + let offset2 = D3D_SAMPLE_POINT_POSITIONS[2] * uv_offset_scale; + let offset3 = D3D_SAMPLE_POINT_POSITIONS[3] * uv_offset_scale; + let offset4 = D3D_SAMPLE_POINT_POSITIONS[4] * uv_offset_scale; + let offset5 = D3D_SAMPLE_POINT_POSITIONS[5] * uv_offset_scale; + let offset6 = D3D_SAMPLE_POINT_POSITIONS[6] * uv_offset_scale; + let offset7 = D3D_SAMPLE_POINT_POSITIONS[7] * uv_offset_scale; + + var sum = vec2(0.0); + sum += search_for_blockers_in_shadow_map_hardware(light_local + offset0, depth, array_index); + sum += search_for_blockers_in_shadow_map_hardware(light_local + offset1, depth, array_index); + sum += search_for_blockers_in_shadow_map_hardware(light_local + offset2, depth, array_index); + sum += search_for_blockers_in_shadow_map_hardware(light_local + offset3, depth, array_index); + sum += search_for_blockers_in_shadow_map_hardware(light_local + offset4, depth, array_index); + sum += search_for_blockers_in_shadow_map_hardware(light_local + offset5, depth, array_index); + sum += search_for_blockers_in_shadow_map_hardware(light_local + offset6, depth, array_index); + sum += search_for_blockers_in_shadow_map_hardware(light_local + offset7, depth, array_index); + + if (sum.y == 0.0) { + return 0.0; + } + return sum.x / sum.y; +} + +fn sample_shadow_map(light_local: vec2, depth: f32, array_index: i32, texel_size: f32) -> f32 { +#ifdef SHADOW_FILTER_METHOD_GAUSSIAN + return sample_shadow_map_castano_thirteen(light_local, depth, array_index); +#else ifdef SHADOW_FILTER_METHOD_TEMPORAL + return sample_shadow_map_jimenez_fourteen( + light_local, depth, array_index, texel_size, 1.0, true); +#else ifdef SHADOW_FILTER_METHOD_HARDWARE_2X2 + return sample_shadow_map_hardware(light_local, depth, array_index); +#else + // This needs a default return value to avoid shader compilation errors if it's compiled with no SHADOW_FILTER_METHOD_* defined. + // (eg. if the normal prepass is enabled it ends up compiling this due to the normal prepass depending on pbr_functions, which depends on shadows) + // This should never actually get used, as anyone using bevy's lighting/shadows should always have a SHADOW_FILTER_METHOD defined. + // Set to 0 to make it obvious that something is wrong. + return 0.0; +#endif +} + +// Samples the shadow map for a directional light when percentage-closer soft +// shadows are being used. +// +// We first search for a *blocker*, which is the average depth value of any +// shadow map samples that are adjacent to the sample we're considering. That +// allows us to determine the penumbra size; a larger gap between the blocker +// and the depth of this sample results in a wider penumbra. Finally, we sample +// the shadow map the same way we do in PCF, using that penumbra width. +// +// A good overview of the technique: +// +fn sample_shadow_map_pcss( + light_local: vec2, + depth: f32, + array_index: i32, + texel_size: f32, + light_size: f32, +) -> f32 { + // Determine the average Z value of the closest blocker. + let z_blocker = search_for_blockers_in_shadow_map( + light_local, depth, array_index, texel_size, light_size); + + // Don't let the blur size go below 0.5, or shadows will look unacceptably aliased. + let blur_size = max((z_blocker - depth) * light_size / depth, 0.5); + + // FIXME: We can't use Castano '13 here because that has a hard-wired fixed + // size. So we instead use Jimenez '14 unconditionally. In the non-temporal + // variant this is unfortunately rather noisy. This may be improvable in the + // future by generating a mip chain of the shadow map and using that to + // provide better blurs. +#ifdef SHADOW_FILTER_METHOD_TEMPORAL + return sample_shadow_map_jimenez_fourteen( + light_local, depth, array_index, texel_size, blur_size, true); +#else // SHADOW_FILTER_METHOD_TEMPORAL + return sample_shadow_map_jimenez_fourteen( + light_local, depth, array_index, texel_size, blur_size, false); +#endif // SHADOW_FILTER_METHOD_TEMPORAL +} + +// NOTE: Due to the non-uniform control flow in `shadows::fetch_point_shadow`, +// we must use the Level variant of textureSampleCompare to avoid undefined +// behavior due to some of the fragments in a quad (2x2 fragments) being +// processed not being sampled, and this messing with mip-mapping functionality. +// The shadow maps have no mipmaps so Level just samples from LOD 0. +fn sample_shadow_cubemap_hardware(light_local: vec3, depth: f32, light_id: u32) -> f32 { +#ifdef NO_CUBE_ARRAY_TEXTURES_SUPPORT + return textureSampleCompare( + view_bindings::point_shadow_textures, + view_bindings::point_shadow_textures_comparison_sampler, + light_local, + depth + ); +#else + return textureSampleCompareLevel( + view_bindings::point_shadow_textures, + view_bindings::point_shadow_textures_comparison_sampler, + light_local, + i32(light_id), + depth + ); +#endif +} + +// Performs one sample of the blocker search. This variation of the blocker +// search function is for point and spot lights. +fn search_for_blockers_in_shadow_cubemap_hardware( + light_local: vec3, + depth: f32, + light_id: u32, +) -> vec2 { +#ifdef WEBGL2 + // Make sure that the WebGL 2 compiler doesn't see `sampled_depth` sampled + // with different samplers, or it'll blow up. + return vec2(0.0); +#else // WEBGL2 + +#ifdef PCSS_SAMPLERS_AVAILABLE + +#ifdef NO_CUBE_ARRAY_TEXTURES_SUPPORT + let sampled_depth = textureSample( + view_bindings::point_shadow_textures, + view_bindings::point_shadow_textures_linear_sampler, + light_local, + ); +#else + let sampled_depth = textureSample( + view_bindings::point_shadow_textures, + view_bindings::point_shadow_textures_linear_sampler, + light_local, + i32(light_id), + ); +#endif + + return select(vec2(0.0), vec2(sampled_depth, 1.0), sampled_depth >= depth); + +#else // PCSS_SAMPLERS_AVAILABLE + return vec2(0.0); +#endif // PCSS_SAMPLERS_AVAILABLE + +#endif // WEBGL2 +} + +fn sample_shadow_cubemap_at_offset( + position: vec2, + coeff: f32, + x_basis: vec3, + y_basis: vec3, + light_local: vec3, + depth: f32, + light_id: u32, +) -> f32 { + return sample_shadow_cubemap_hardware( + light_local + position.x * x_basis + position.y * y_basis, + depth, + light_id + ) * coeff; +} + +// Computes the search position and performs one sample of the blocker search. +// This variation of the blocker search function is for point and spot lights. +// +// `x_basis`, `y_basis`, and `light_local` form an orthonormal basis over which +// the blocker search happens. +fn search_for_blockers_in_shadow_cubemap_at_offset( + position: vec2, + x_basis: vec3, + y_basis: vec3, + light_local: vec3, + depth: f32, + light_id: u32, +) -> vec2 { + return search_for_blockers_in_shadow_cubemap_hardware( + light_local + position.x * x_basis + position.y * y_basis, + depth, + light_id + ); +} + +// This more or less does what Castano13 does, but in 3D space. Castano13 is +// essentially an optimized 2D Gaussian filter that takes advantage of the +// bilinear filtering hardware to reduce the number of samples needed. This +// trick doesn't apply to cubemaps, so we manually apply a Gaussian filter over +// the standard 8xMSAA pattern instead. +fn sample_shadow_cubemap_gaussian( + light_local: vec3, + depth: f32, + scale: f32, + distance_to_light: f32, + light_id: u32, +) -> f32 { + // Create an orthonormal basis so we can apply a 2D sampling pattern to a + // cubemap. + let basis = orthonormalize(normalize(light_local)) * scale * distance_to_light; + + var sum: f32 = 0.0; + sum += sample_shadow_cubemap_at_offset( + D3D_SAMPLE_POINT_POSITIONS[0], D3D_SAMPLE_POINT_COEFFS[0], + basis[0], basis[1], light_local, depth, light_id); + sum += sample_shadow_cubemap_at_offset( + D3D_SAMPLE_POINT_POSITIONS[1], D3D_SAMPLE_POINT_COEFFS[1], + basis[0], basis[1], light_local, depth, light_id); + sum += sample_shadow_cubemap_at_offset( + D3D_SAMPLE_POINT_POSITIONS[2], D3D_SAMPLE_POINT_COEFFS[2], + basis[0], basis[1], light_local, depth, light_id); + sum += sample_shadow_cubemap_at_offset( + D3D_SAMPLE_POINT_POSITIONS[3], D3D_SAMPLE_POINT_COEFFS[3], + basis[0], basis[1], light_local, depth, light_id); + sum += sample_shadow_cubemap_at_offset( + D3D_SAMPLE_POINT_POSITIONS[4], D3D_SAMPLE_POINT_COEFFS[4], + basis[0], basis[1], light_local, depth, light_id); + sum += sample_shadow_cubemap_at_offset( + D3D_SAMPLE_POINT_POSITIONS[5], D3D_SAMPLE_POINT_COEFFS[5], + basis[0], basis[1], light_local, depth, light_id); + sum += sample_shadow_cubemap_at_offset( + D3D_SAMPLE_POINT_POSITIONS[6], D3D_SAMPLE_POINT_COEFFS[6], + basis[0], basis[1], light_local, depth, light_id); + sum += sample_shadow_cubemap_at_offset( + D3D_SAMPLE_POINT_POSITIONS[7], D3D_SAMPLE_POINT_COEFFS[7], + basis[0], basis[1], light_local, depth, light_id); + return sum; +} + +// This is a port of the Jimenez14 filter above to the 3D space. It jitters the +// points in the spiral pattern after first creating a 2D orthonormal basis +// along the principal light direction. +fn sample_shadow_cubemap_jittered( + light_local: vec3, + depth: f32, + scale: f32, + distance_to_light: f32, + light_id: u32, + temporal: bool, +) -> f32 { + // Create an orthonormal basis so we can apply a 2D sampling pattern to a + // cubemap. + let basis = orthonormalize(normalize(light_local)) * scale * distance_to_light; + + let rotation_matrix = random_rotation_matrix(vec2(1.0), temporal); + + let sample_offset0 = rotation_matrix * utils::SPIRAL_OFFSET_0_ * + POINT_SHADOW_TEMPORAL_OFFSET_SCALE; + let sample_offset1 = rotation_matrix * utils::SPIRAL_OFFSET_1_ * + POINT_SHADOW_TEMPORAL_OFFSET_SCALE; + let sample_offset2 = rotation_matrix * utils::SPIRAL_OFFSET_2_ * + POINT_SHADOW_TEMPORAL_OFFSET_SCALE; + let sample_offset3 = rotation_matrix * utils::SPIRAL_OFFSET_3_ * + POINT_SHADOW_TEMPORAL_OFFSET_SCALE; + let sample_offset4 = rotation_matrix * utils::SPIRAL_OFFSET_4_ * + POINT_SHADOW_TEMPORAL_OFFSET_SCALE; + let sample_offset5 = rotation_matrix * utils::SPIRAL_OFFSET_5_ * + POINT_SHADOW_TEMPORAL_OFFSET_SCALE; + let sample_offset6 = rotation_matrix * utils::SPIRAL_OFFSET_6_ * + POINT_SHADOW_TEMPORAL_OFFSET_SCALE; + let sample_offset7 = rotation_matrix * utils::SPIRAL_OFFSET_7_ * + POINT_SHADOW_TEMPORAL_OFFSET_SCALE; + + var sum: f32 = 0.0; + sum += sample_shadow_cubemap_at_offset( + sample_offset0, 0.125, basis[0], basis[1], light_local, depth, light_id); + sum += sample_shadow_cubemap_at_offset( + sample_offset1, 0.125, basis[0], basis[1], light_local, depth, light_id); + sum += sample_shadow_cubemap_at_offset( + sample_offset2, 0.125, basis[0], basis[1], light_local, depth, light_id); + sum += sample_shadow_cubemap_at_offset( + sample_offset3, 0.125, basis[0], basis[1], light_local, depth, light_id); + sum += sample_shadow_cubemap_at_offset( + sample_offset4, 0.125, basis[0], basis[1], light_local, depth, light_id); + sum += sample_shadow_cubemap_at_offset( + sample_offset5, 0.125, basis[0], basis[1], light_local, depth, light_id); + sum += sample_shadow_cubemap_at_offset( + sample_offset6, 0.125, basis[0], basis[1], light_local, depth, light_id); + sum += sample_shadow_cubemap_at_offset( + sample_offset7, 0.125, basis[0], basis[1], light_local, depth, light_id); + return sum; +} + +fn sample_shadow_cubemap( + light_local: vec3, + distance_to_light: f32, + depth: f32, + light_id: u32, +) -> f32 { +#ifdef SHADOW_FILTER_METHOD_GAUSSIAN + return sample_shadow_cubemap_gaussian( + light_local, depth, POINT_SHADOW_SCALE, distance_to_light, light_id); +#else ifdef SHADOW_FILTER_METHOD_TEMPORAL + return sample_shadow_cubemap_jittered( + light_local, depth, POINT_SHADOW_SCALE, distance_to_light, light_id, true); +#else ifdef SHADOW_FILTER_METHOD_HARDWARE_2X2 + return sample_shadow_cubemap_hardware(light_local, depth, light_id); +#else + // This needs a default return value to avoid shader compilation errors if it's compiled with no SHADOW_FILTER_METHOD_* defined. + // (eg. if the normal prepass is enabled it ends up compiling this due to the normal prepass depending on pbr_functions, which depends on shadows) + // This should never actually get used, as anyone using bevy's lighting/shadows should always have a SHADOW_FILTER_METHOD defined. + // Set to 0 to make it obvious that something is wrong. + return 0.0; +#endif +} + +// Searches for PCSS blockers in a cubemap. This is the variant of the blocker +// search used for point and spot lights. +// +// This follows the logic in `sample_shadow_cubemap_gaussian`, but uses linear +// sampling instead of percentage-closer filtering. +// +// The `scale` parameter represents the size of the light. +fn search_for_blockers_in_shadow_cubemap( + light_local: vec3, + depth: f32, + scale: f32, + distance_to_light: f32, + light_id: u32, +) -> f32 { + // Create an orthonormal basis so we can apply a 2D sampling pattern to a + // cubemap. + let basis = orthonormalize(normalize(light_local)) * scale * distance_to_light; + + var sum: vec2 = vec2(0.0); + sum += search_for_blockers_in_shadow_cubemap_at_offset( + D3D_SAMPLE_POINT_POSITIONS[0], basis[0], basis[1], light_local, depth, light_id); + sum += search_for_blockers_in_shadow_cubemap_at_offset( + D3D_SAMPLE_POINT_POSITIONS[1], basis[0], basis[1], light_local, depth, light_id); + sum += search_for_blockers_in_shadow_cubemap_at_offset( + D3D_SAMPLE_POINT_POSITIONS[2], basis[0], basis[1], light_local, depth, light_id); + sum += search_for_blockers_in_shadow_cubemap_at_offset( + D3D_SAMPLE_POINT_POSITIONS[3], basis[0], basis[1], light_local, depth, light_id); + sum += search_for_blockers_in_shadow_cubemap_at_offset( + D3D_SAMPLE_POINT_POSITIONS[4], basis[0], basis[1], light_local, depth, light_id); + sum += search_for_blockers_in_shadow_cubemap_at_offset( + D3D_SAMPLE_POINT_POSITIONS[5], basis[0], basis[1], light_local, depth, light_id); + sum += search_for_blockers_in_shadow_cubemap_at_offset( + D3D_SAMPLE_POINT_POSITIONS[6], basis[0], basis[1], light_local, depth, light_id); + sum += search_for_blockers_in_shadow_cubemap_at_offset( + D3D_SAMPLE_POINT_POSITIONS[7], basis[0], basis[1], light_local, depth, light_id); + + if (sum.y == 0.0) { + return 0.0; + } + return sum.x / sum.y; +} + +// Samples the shadow map for a point or spot light when percentage-closer soft +// shadows are being used. +// +// A good overview of the technique: +// +fn sample_shadow_cubemap_pcss( + light_local: vec3, + distance_to_light: f32, + depth: f32, + light_id: u32, + light_size: f32, +) -> f32 { + let z_blocker = search_for_blockers_in_shadow_cubemap( + light_local, depth, light_size, distance_to_light, light_id); + + // Don't let the blur size go below 0.5, or shadows will look unacceptably aliased. + let blur_size = max((z_blocker - depth) * light_size / depth, 0.5); + +#ifdef SHADOW_FILTER_METHOD_TEMPORAL + return sample_shadow_cubemap_jittered( + light_local, depth, POINT_SHADOW_SCALE * blur_size, distance_to_light, light_id, true); +#else + return sample_shadow_cubemap_jittered( + light_local, depth, POINT_SHADOW_SCALE * blur_size, distance_to_light, light_id, false); +#endif +} diff --git a/crates/libmarathon/src/render/pbr/render/shadows.wgsl b/crates/libmarathon/src/render/pbr/render/shadows.wgsl new file mode 100644 index 0000000..a3727b4 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/shadows.wgsl @@ -0,0 +1,231 @@ +#define_import_path bevy_pbr::shadows + +#import bevy_pbr::{ + mesh_view_types::POINT_LIGHT_FLAGS_SPOT_LIGHT_Y_NEGATIVE, + mesh_view_bindings as view_bindings, + shadow_sampling::{ + SPOT_SHADOW_TEXEL_SIZE, sample_shadow_cubemap, sample_shadow_cubemap_pcss, + sample_shadow_map, sample_shadow_map_pcss, + } +} + +#import bevy_render::{ + color_operations::hsv_to_rgb, + maths::{orthonormalize, PI_2} +} + +const flip_z: vec3 = vec3(1.0, 1.0, -1.0); + +fn fetch_point_shadow(light_id: u32, frag_position: vec4, surface_normal: vec3) -> f32 { + let light = &view_bindings::clusterable_objects.data[light_id]; + + // because the shadow maps align with the axes and the frustum planes are at 45 degrees + // we can get the worldspace depth by taking the largest absolute axis + let surface_to_light = (*light).position_radius.xyz - frag_position.xyz; + let surface_to_light_abs = abs(surface_to_light); + let distance_to_light = max(surface_to_light_abs.x, max(surface_to_light_abs.y, surface_to_light_abs.z)); + + // The normal bias here is already scaled by the texel size at 1 world unit from the light. + // The texel size increases proportionally with distance from the light so multiplying by + // distance to light scales the normal bias to the texel size at the fragment distance. + let normal_offset = (*light).shadow_normal_bias * distance_to_light * surface_normal.xyz; + let depth_offset = (*light).shadow_depth_bias * normalize(surface_to_light.xyz); + let offset_position = frag_position.xyz + normal_offset + depth_offset; + + // similar largest-absolute-axis trick as above, but now with the offset fragment position + let frag_ls = offset_position.xyz - (*light).position_radius.xyz ; + let abs_position_ls = abs(frag_ls); + let major_axis_magnitude = max(abs_position_ls.x, max(abs_position_ls.y, abs_position_ls.z)); + + // NOTE: These simplifications come from multiplying: + // projection * vec4(0, 0, -major_axis_magnitude, 1.0) + // and keeping only the terms that have any impact on the depth. + // Projection-agnostic approach: + let zw = -major_axis_magnitude * (*light).light_custom_data.xy + (*light).light_custom_data.zw; + let depth = zw.x / zw.y; + + // If soft shadows are enabled, use the PCSS path. Cubemaps assume a + // left-handed coordinate space, so we have to flip the z-axis when + // sampling. + if ((*light).soft_shadow_size > 0.0) { + return sample_shadow_cubemap_pcss( + frag_ls * flip_z, + distance_to_light, + depth, + light_id, + (*light).soft_shadow_size, + ); + } + + // Do the lookup, using HW PCF and comparison. Cubemaps assume a left-handed + // coordinate space, so we have to flip the z-axis when sampling. + return sample_shadow_cubemap(frag_ls * flip_z, distance_to_light, depth, light_id); +} + +fn fetch_spot_shadow( + light_id: u32, + frag_position: vec4, + surface_normal: vec3, + near_z: f32, +) -> f32 { + let light = &view_bindings::clusterable_objects.data[light_id]; + + let surface_to_light = (*light).position_radius.xyz - frag_position.xyz; + + // construct the light view matrix + var spot_dir = vec3((*light).light_custom_data.x, 0.0, (*light).light_custom_data.y); + // reconstruct spot dir from x/z and y-direction flag + spot_dir.y = sqrt(max(0.0, 1.0 - spot_dir.x * spot_dir.x - spot_dir.z * spot_dir.z)); + if (((*light).flags & POINT_LIGHT_FLAGS_SPOT_LIGHT_Y_NEGATIVE) != 0u) { + spot_dir.y = -spot_dir.y; + } + + // view matrix z_axis is the reverse of transform.forward() + let fwd = -spot_dir; + let distance_to_light = dot(fwd, surface_to_light); + let offset_position = + -surface_to_light + + ((*light).shadow_depth_bias * normalize(surface_to_light)) + + (surface_normal.xyz * (*light).shadow_normal_bias) * distance_to_light; + + let light_inv_rot = orthonormalize(fwd); + + // because the matrix is a pure rotation matrix, the inverse is just the transpose, and to calculate + // the product of the transpose with a vector we can just post-multiply instead of pre-multiplying. + // this allows us to keep the matrix construction code identical between CPU and GPU. + let projected_position = offset_position * light_inv_rot; + + // divide xy by perspective matrix "f" and by -projected.z (projected.z is -projection matrix's w) + // to get ndc coordinates + let f_div_minus_z = 1.0 / ((*light).spot_light_tan_angle * -projected_position.z); + let shadow_xy_ndc = projected_position.xy * f_div_minus_z; + // convert to uv coordinates + let shadow_uv = shadow_xy_ndc * vec2(0.5, -0.5) + vec2(0.5, 0.5); + + let depth = near_z / -projected_position.z; + + // If soft shadows are enabled, use the PCSS path. + let array_index = i32(light_id) + view_bindings::lights.spot_light_shadowmap_offset; + if ((*light).soft_shadow_size > 0.0) { + return sample_shadow_map_pcss( + shadow_uv, depth, array_index, SPOT_SHADOW_TEXEL_SIZE, (*light).soft_shadow_size); + } + + return sample_shadow_map(shadow_uv, depth, array_index, SPOT_SHADOW_TEXEL_SIZE); +} + +fn get_cascade_index(light_id: u32, view_z: f32) -> u32 { + let light = &view_bindings::lights.directional_lights[light_id]; + + for (var i: u32 = 0u; i < (*light).num_cascades; i = i + 1u) { + if (-view_z < (*light).cascades[i].far_bound) { + return i; + } + } + return (*light).num_cascades; +} + +// Converts from world space to the uv position in the light's shadow map. +// +// The depth is stored in the return value's z coordinate. If the return value's +// w coordinate is 0.0, then we landed outside the shadow map entirely. +fn world_to_directional_light_local( + light_id: u32, + cascade_index: u32, + offset_position: vec4 +) -> vec4 { + let light = &view_bindings::lights.directional_lights[light_id]; + let cascade = &(*light).cascades[cascade_index]; + + let offset_position_clip = (*cascade).clip_from_world * offset_position; + if (offset_position_clip.w <= 0.0) { + return vec4(0.0); + } + let offset_position_ndc = offset_position_clip.xyz / offset_position_clip.w; + // No shadow outside the orthographic projection volume + if (any(offset_position_ndc.xy < vec2(-1.0)) || offset_position_ndc.z < 0.0 + || any(offset_position_ndc > vec3(1.0))) { + return vec4(0.0); + } + + // compute texture coordinates for shadow lookup, compensating for the Y-flip difference + // between the NDC and texture coordinates + let flip_correction = vec2(0.5, -0.5); + let light_local = offset_position_ndc.xy * flip_correction + vec2(0.5, 0.5); + + let depth = offset_position_ndc.z; + + return vec4(light_local, depth, 1.0); +} + +fn sample_directional_cascade( + light_id: u32, + cascade_index: u32, + frag_position: vec4, + surface_normal: vec3, +) -> f32 { + let light = &view_bindings::lights.directional_lights[light_id]; + let cascade = &(*light).cascades[cascade_index]; + + // The normal bias is scaled to the texel size. + let normal_offset = (*light).shadow_normal_bias * (*cascade).texel_size * surface_normal.xyz; + let depth_offset = (*light).shadow_depth_bias * (*light).direction_to_light.xyz; + let offset_position = vec4(frag_position.xyz + normal_offset + depth_offset, frag_position.w); + + let light_local = world_to_directional_light_local(light_id, cascade_index, offset_position); + if (light_local.w == 0.0) { + return 1.0; + } + + let array_index = i32((*light).depth_texture_base_index + cascade_index); + let texel_size = (*cascade).texel_size; + + // If soft shadows are enabled, use the PCSS path. + if ((*light).soft_shadow_size > 0.0) { + return sample_shadow_map_pcss( + light_local.xy, light_local.z, array_index, texel_size, (*light).soft_shadow_size); + } + + return sample_shadow_map(light_local.xy, light_local.z, array_index, texel_size); +} + +fn fetch_directional_shadow(light_id: u32, frag_position: vec4, surface_normal: vec3, view_z: f32) -> f32 { + let light = &view_bindings::lights.directional_lights[light_id]; + let cascade_index = get_cascade_index(light_id, view_z); + + if (cascade_index >= (*light).num_cascades) { + return 1.0; + } + + var shadow = sample_directional_cascade(light_id, cascade_index, frag_position, surface_normal); + + // Blend with the next cascade, if there is one. + let next_cascade_index = cascade_index + 1u; + if (next_cascade_index < (*light).num_cascades) { + let this_far_bound = (*light).cascades[cascade_index].far_bound; + let next_near_bound = (1.0 - (*light).cascades_overlap_proportion) * this_far_bound; + if (-view_z >= next_near_bound) { + let next_shadow = sample_directional_cascade(light_id, next_cascade_index, frag_position, surface_normal); + shadow = mix(shadow, next_shadow, (-view_z - next_near_bound) / (this_far_bound - next_near_bound)); + } + } + return shadow; +} + +fn cascade_debug_visualization( + output_color: vec3, + light_id: u32, + view_z: f32, +) -> vec3 { + let overlay_alpha = 0.95; + let cascade_index = get_cascade_index(light_id, view_z); + let cascade_color_hsv = vec3( + f32(cascade_index) / f32(#{MAX_CASCADES_PER_LIGHT}u + 1u) * PI_2, + 1.0, + 0.5 + ); + let cascade_color = hsv_to_rgb(cascade_color_hsv); + return vec3( + (1.0 - overlay_alpha) * output_color.rgb + overlay_alpha * cascade_color + ); +} diff --git a/crates/libmarathon/src/render/pbr/render/skin.rs b/crates/libmarathon/src/render/pbr/render/skin.rs new file mode 100644 index 0000000..9b26f4e --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/skin.rs @@ -0,0 +1,623 @@ +use core::mem::{self, size_of}; +use std::sync::OnceLock; + +use bevy_asset::{prelude::AssetChanged, Assets}; +use bevy_camera::visibility::ViewVisibility; +use bevy_ecs::prelude::*; +use bevy_math::Mat4; +use bevy_mesh::skinning::{SkinnedMesh, SkinnedMeshInverseBindposes}; +use bevy_platform::collections::hash_map::Entry; +use crate::render::render_resource::{Buffer, BufferDescriptor}; +use crate::render::sync_world::{MainEntity, MainEntityHashMap, MainEntityHashSet}; +use crate::render::{ + batching::NoAutomaticBatching, + render_resource::BufferUsages, + renderer::{RenderDevice, RenderQueue}, + Extract, +}; +use bevy_transform::prelude::GlobalTransform; +use offset_allocator::{Allocation, Allocator}; +use smallvec::SmallVec; +use tracing::error; + +/// Maximum number of joints supported for skinned meshes. +/// +/// It is used to allocate buffers. +/// The correctness of the value depends on the GPU/platform. +/// The current value is chosen because it is guaranteed to work everywhere. +/// To allow for bigger values, a check must be made for the limits +/// of the GPU at runtime, which would mean not using consts anymore. +pub const MAX_JOINTS: usize = 256; + +/// The total number of joints we support. +/// +/// This is 256 GiB worth of joint matrices, which we will never hit under any +/// reasonable circumstances. +const MAX_TOTAL_JOINTS: u32 = 1024 * 1024 * 1024; + +/// The number of joints that we allocate at a time. +/// +/// Some hardware requires that uniforms be allocated on 256-byte boundaries, so +/// we need to allocate 4 64-byte matrices at a time to satisfy alignment +/// requirements. +const JOINTS_PER_ALLOCATION_UNIT: u32 = (256 / size_of::()) as u32; + +/// The maximum ratio of the number of entities whose transforms changed to the +/// total number of joints before we re-extract all joints. +/// +/// We use this as a heuristic to decide whether it's worth switching over to +/// fine-grained detection to determine which skins need extraction. If the +/// number of changed entities is over this threshold, we skip change detection +/// and simply re-extract the transforms of all joints. +const JOINT_EXTRACTION_THRESHOLD_FACTOR: f64 = 0.25; + +/// The location of the first joint matrix in the skin uniform buffer. +#[derive(Clone, Copy)] +pub struct SkinByteOffset { + /// The byte offset of the first joint matrix. + pub byte_offset: u32, +} + +impl SkinByteOffset { + /// Index to be in address space based on the size of a skin uniform. + const fn from_index(index: usize) -> Self { + SkinByteOffset { + byte_offset: (index * size_of::()) as u32, + } + } + + /// Returns this skin index in elements (not bytes). + /// + /// Each element is a 4x4 matrix. + pub fn index(&self) -> u32 { + self.byte_offset / size_of::() as u32 + } +} + +/// The GPU buffers containing joint matrices for all skinned meshes. +/// +/// This is double-buffered: we store the joint matrices of each mesh for the +/// previous frame in addition to those of each mesh for the current frame. This +/// is for motion vector calculation. Every frame, we swap buffers and overwrite +/// the joint matrix buffer from two frames ago with the data for the current +/// frame. +/// +/// Notes on implementation: see comment on top of the `extract_skins` system. +#[derive(Resource)] +pub struct SkinUniforms { + /// The CPU-side buffer that stores the joint matrices for skinned meshes in + /// the current frame. + pub current_staging_buffer: Vec, + /// The GPU-side buffer that stores the joint matrices for skinned meshes in + /// the current frame. + pub current_buffer: Buffer, + /// The GPU-side buffer that stores the joint matrices for skinned meshes in + /// the previous frame. + pub prev_buffer: Buffer, + /// The offset allocator that manages the placement of the joints within the + /// [`Self::current_buffer`]. + allocator: Allocator, + /// Allocation information that we keep about each skin. + skin_uniform_info: MainEntityHashMap, + /// Maps each joint entity to the skins it's associated with. + /// + /// We use this in conjunction with change detection to only update the + /// skins that need updating each frame. + /// + /// Note that conceptually this is a hash map of sets, but we use a + /// [`SmallVec`] to avoid allocations for the vast majority of the cases in + /// which each bone belongs to exactly one skin. + joint_to_skins: MainEntityHashMap>, + /// The total number of joints in the scene. + /// + /// We use this as part of our heuristic to decide whether to use + /// fine-grained change detection. + total_joints: usize, +} + +impl FromWorld for SkinUniforms { + fn from_world(world: &mut World) -> Self { + let device = world.resource::(); + let buffer_usages = (if skins_use_uniform_buffers(device) { + BufferUsages::UNIFORM + } else { + BufferUsages::STORAGE + }) | BufferUsages::COPY_DST; + + // Create the current and previous buffer with the minimum sizes. + // + // These will be swapped every frame. + let current_buffer = device.create_buffer(&BufferDescriptor { + label: Some("skin uniform buffer"), + size: MAX_JOINTS as u64 * size_of::() as u64, + usage: buffer_usages, + mapped_at_creation: false, + }); + let prev_buffer = device.create_buffer(&BufferDescriptor { + label: Some("skin uniform buffer"), + size: MAX_JOINTS as u64 * size_of::() as u64, + usage: buffer_usages, + mapped_at_creation: false, + }); + + Self { + current_staging_buffer: vec![], + current_buffer, + prev_buffer, + allocator: Allocator::new(MAX_TOTAL_JOINTS), + skin_uniform_info: MainEntityHashMap::default(), + joint_to_skins: MainEntityHashMap::default(), + total_joints: 0, + } + } +} + +impl SkinUniforms { + /// Returns the current offset in joints of the skin in the buffer. + pub fn skin_index(&self, skin: MainEntity) -> Option { + self.skin_uniform_info + .get(&skin) + .map(SkinUniformInfo::offset) + } + + /// Returns the current offset in bytes of the skin in the buffer. + pub fn skin_byte_offset(&self, skin: MainEntity) -> Option { + self.skin_uniform_info.get(&skin).map(|skin_uniform_info| { + SkinByteOffset::from_index(skin_uniform_info.offset() as usize) + }) + } + + /// Returns an iterator over all skins in the scene. + pub fn all_skins(&self) -> impl Iterator { + self.skin_uniform_info.keys() + } +} + +/// Allocation information about each skin. +struct SkinUniformInfo { + /// The allocation of the joints within the [`SkinUniforms::current_buffer`]. + allocation: Allocation, + /// The entities that comprise the joints. + joints: Vec, +} + +impl SkinUniformInfo { + /// The offset in joints within the [`SkinUniforms::current_staging_buffer`]. + fn offset(&self) -> u32 { + self.allocation.offset * JOINTS_PER_ALLOCATION_UNIT + } +} + +/// Returns true if skinning must use uniforms (and dynamic offsets) because +/// storage buffers aren't supported on the current platform. +pub fn skins_use_uniform_buffers(render_device: &RenderDevice) -> bool { + static SKINS_USE_UNIFORM_BUFFERS: OnceLock = OnceLock::new(); + *SKINS_USE_UNIFORM_BUFFERS + .get_or_init(|| render_device.limits().max_storage_buffers_per_shader_stage == 0) +} + +/// Uploads the buffers containing the joints to the GPU. +pub fn prepare_skins( + render_device: Res, + render_queue: Res, + uniform: ResMut, +) { + let uniform = uniform.into_inner(); + + if uniform.current_staging_buffer.is_empty() { + return; + } + + // Swap current and previous buffers. + mem::swap(&mut uniform.current_buffer, &mut uniform.prev_buffer); + + // Resize the buffers if necessary. Include extra space equal to `MAX_JOINTS` + // because we need to be able to bind a full uniform buffer's worth of data + // if skins use uniform buffers on this platform. + let needed_size = (uniform.current_staging_buffer.len() as u64 + MAX_JOINTS as u64) + * size_of::() as u64; + if uniform.current_buffer.size() < needed_size { + let mut new_size = uniform.current_buffer.size(); + while new_size < needed_size { + // 1.5× growth factor. + new_size = (new_size + new_size / 2).next_multiple_of(4); + } + + // Create the new buffers. + let buffer_usages = if skins_use_uniform_buffers(&render_device) { + BufferUsages::UNIFORM + } else { + BufferUsages::STORAGE + } | BufferUsages::COPY_DST; + uniform.current_buffer = render_device.create_buffer(&BufferDescriptor { + label: Some("skin uniform buffer"), + usage: buffer_usages, + size: new_size, + mapped_at_creation: false, + }); + uniform.prev_buffer = render_device.create_buffer(&BufferDescriptor { + label: Some("skin uniform buffer"), + usage: buffer_usages, + size: new_size, + mapped_at_creation: false, + }); + + // We've created a new `prev_buffer` but we don't have the previous joint + // data needed to fill it out correctly. Use the current joint data + // instead. + // + // TODO: This is a bug - will cause motion blur to ignore joint movement + // for one frame. + render_queue.write_buffer( + &uniform.prev_buffer, + 0, + bytemuck::must_cast_slice(&uniform.current_staging_buffer[..]), + ); + } + + // Write the data from `uniform.current_staging_buffer` into + // `uniform.current_buffer`. + render_queue.write_buffer( + &uniform.current_buffer, + 0, + bytemuck::must_cast_slice(&uniform.current_staging_buffer[..]), + ); + + // We don't need to write `uniform.prev_buffer` because we already wrote it + // last frame, and the data should still be on the GPU. +} + +// Notes on implementation: +// We define the uniform binding as an array, N> in the shader, +// where N is the maximum number of Mat4s we can fit in the uniform binding, +// which may be as little as 16kB or 64kB. But, we may not need all N. +// We may only need, for example, 10. +// +// If we used uniform buffers ‘normally’ then we would have to write a full +// binding of data for each dynamic offset binding, which is wasteful, makes +// the buffer much larger than it needs to be, and uses more memory bandwidth +// to transfer the data, which then costs frame time So @superdump came up +// with this design: just bind data at the specified offset and interpret +// the data at that offset as an array regardless of what is there. +// +// So instead of writing N Mat4s when you only need 10, you write 10, and +// then pad up to the next dynamic offset alignment. Then write the next. +// And for the last dynamic offset binding, make sure there is a full binding +// of data after it so that the buffer is of size +// `last dynamic offset` + `array>`. +// +// Then when binding the first dynamic offset, the first 10 entries in the array +// are what you expect, but if you read the 11th you’re reading ‘invalid’ data +// which could be padding or could be from the next binding. +// +// In this way, we can pack ‘variable sized arrays’ into uniform buffer bindings +// which normally only support fixed size arrays. You just have to make sure +// in the shader that you only read the values that are valid for that binding. +pub fn extract_skins( + skin_uniforms: ResMut, + skinned_meshes: Extract>, + changed_skinned_meshes: Extract< + Query< + (Entity, &ViewVisibility, &SkinnedMesh), + Or<( + Changed, + Changed, + AssetChanged, + )>, + >, + >, + skinned_mesh_inverse_bindposes: Extract>>, + changed_transforms: Extract>>, + joints: Extract>, + mut removed_skinned_meshes_query: Extract>, +) { + let skin_uniforms = skin_uniforms.into_inner(); + + // Find skins that have become visible or invisible on this frame. Allocate, + // reallocate, or free space for them as necessary. + add_or_delete_skins( + skin_uniforms, + &changed_skinned_meshes, + &skinned_mesh_inverse_bindposes, + &joints, + ); + + // Extract the transforms for all joints from the scene, and write them into + // the staging buffer at the appropriate spot. + extract_joints( + skin_uniforms, + &skinned_meshes, + &changed_skinned_meshes, + &skinned_mesh_inverse_bindposes, + &changed_transforms, + &joints, + ); + + // Delete skins that became invisible. + for skinned_mesh_entity in removed_skinned_meshes_query.read() { + // Only remove a skin if we didn't pick it up in `add_or_delete_skins`. + // It's possible that a necessary component was removed and re-added in + // the same frame. + if !changed_skinned_meshes.contains(skinned_mesh_entity) { + remove_skin(skin_uniforms, skinned_mesh_entity.into()); + } + } +} + +/// Searches for all skins that have become visible or invisible this frame and +/// allocations for them as necessary. +fn add_or_delete_skins( + skin_uniforms: &mut SkinUniforms, + changed_skinned_meshes: &Query< + (Entity, &ViewVisibility, &SkinnedMesh), + Or<( + Changed, + Changed, + AssetChanged, + )>, + >, + skinned_mesh_inverse_bindposes: &Assets, + joints: &Query<&GlobalTransform>, +) { + // Find every skinned mesh that changed one of (1) visibility; (2) joint + // entities (part of `SkinnedMesh`); (3) the associated + // `SkinnedMeshInverseBindposes` asset. + for (skinned_mesh_entity, skinned_mesh_view_visibility, skinned_mesh) in changed_skinned_meshes + { + // Remove the skin if it existed last frame. + let skinned_mesh_entity = MainEntity::from(skinned_mesh_entity); + remove_skin(skin_uniforms, skinned_mesh_entity); + + // If the skin is invisible, we're done. + if !(*skinned_mesh_view_visibility).get() { + continue; + } + + // Initialize the skin. + add_skin( + skinned_mesh_entity, + skinned_mesh, + skin_uniforms, + skinned_mesh_inverse_bindposes, + joints, + ); + } +} + +/// Extracts the global transforms of all joints and updates the staging buffer +/// as necessary. +fn extract_joints( + skin_uniforms: &mut SkinUniforms, + skinned_meshes: &Query<(Entity, &SkinnedMesh)>, + changed_skinned_meshes: &Query< + (Entity, &ViewVisibility, &SkinnedMesh), + Or<( + Changed, + Changed, + AssetChanged, + )>, + >, + skinned_mesh_inverse_bindposes: &Assets, + changed_transforms: &Query<(Entity, &GlobalTransform), Changed>, + joints: &Query<&GlobalTransform>, +) { + // If the number of entities that changed transforms exceeds a certain + // fraction (currently 25%) of the total joints in the scene, then skip + // fine-grained change detection. + // + // Note that this is a crude heuristic, for performance reasons. It doesn't + // consider the ratio of modified *joints* to total joints, only the ratio + // of modified *entities* to total joints. Thus in the worst case we might + // end up re-extracting all skins even though none of the joints changed. + // But making the heuristic finer-grained would make it slower to evaluate, + // and we don't want to lose performance. + let threshold = + (skin_uniforms.total_joints as f64 * JOINT_EXTRACTION_THRESHOLD_FACTOR).floor() as usize; + + if changed_transforms.iter().nth(threshold).is_some() { + // Go ahead and re-extract all skins in the scene. + for (skin_entity, skin) in skinned_meshes { + extract_joints_for_skin( + skin_entity.into(), + skin, + skin_uniforms, + changed_skinned_meshes, + skinned_mesh_inverse_bindposes, + joints, + ); + } + return; + } + + // Use fine-grained change detection to figure out only the skins that need + // to have their joints re-extracted. + let dirty_skins: MainEntityHashSet = changed_transforms + .iter() + .flat_map(|(joint, _)| skin_uniforms.joint_to_skins.get(&MainEntity::from(joint))) + .flat_map(|skin_joint_mappings| skin_joint_mappings.iter()) + .copied() + .collect(); + + // Re-extract the joints for only those skins. + for skin_entity in dirty_skins { + let Ok((_, skin)) = skinned_meshes.get(*skin_entity) else { + continue; + }; + extract_joints_for_skin( + skin_entity, + skin, + skin_uniforms, + changed_skinned_meshes, + skinned_mesh_inverse_bindposes, + joints, + ); + } +} + +/// Extracts all joints for a single skin and writes their transforms into the +/// CPU staging buffer. +fn extract_joints_for_skin( + skin_entity: MainEntity, + skin: &SkinnedMesh, + skin_uniforms: &mut SkinUniforms, + changed_skinned_meshes: &Query< + (Entity, &ViewVisibility, &SkinnedMesh), + Or<( + Changed, + Changed, + AssetChanged, + )>, + >, + skinned_mesh_inverse_bindposes: &Assets, + joints: &Query<&GlobalTransform>, +) { + // If we initialized the skin this frame, we already populated all + // the joints, so there's no need to populate them again. + if changed_skinned_meshes.contains(*skin_entity) { + return; + } + + // Fetch information about the skin. + let Some(skin_uniform_info) = skin_uniforms.skin_uniform_info.get(&skin_entity) else { + return; + }; + let Some(skinned_mesh_inverse_bindposes) = + skinned_mesh_inverse_bindposes.get(&skin.inverse_bindposes) + else { + return; + }; + + // Calculate and write in the new joint matrices. + for (joint_index, (&joint, skinned_mesh_inverse_bindpose)) in skin + .joints + .iter() + .zip(skinned_mesh_inverse_bindposes.iter()) + .enumerate() + { + let Ok(joint_transform) = joints.get(joint) else { + continue; + }; + + let joint_matrix = joint_transform.affine() * *skinned_mesh_inverse_bindpose; + skin_uniforms.current_staging_buffer[skin_uniform_info.offset() as usize + joint_index] = + joint_matrix; + } +} + +/// Allocates space for a new skin in the buffers, and populates its joints. +fn add_skin( + skinned_mesh_entity: MainEntity, + skinned_mesh: &SkinnedMesh, + skin_uniforms: &mut SkinUniforms, + skinned_mesh_inverse_bindposes: &Assets, + joints: &Query<&GlobalTransform>, +) { + // Allocate space for the joints. + let Some(allocation) = skin_uniforms.allocator.allocate( + skinned_mesh + .joints + .len() + .div_ceil(JOINTS_PER_ALLOCATION_UNIT as usize) as u32, + ) else { + error!( + "Out of space for skin: {:?}. Tried to allocate space for {:?} joints.", + skinned_mesh_entity, + skinned_mesh.joints.len() + ); + return; + }; + + // Store that allocation. + let skin_uniform_info = SkinUniformInfo { + allocation, + joints: skinned_mesh + .joints + .iter() + .map(|entity| MainEntity::from(*entity)) + .collect(), + }; + + let skinned_mesh_inverse_bindposes = + skinned_mesh_inverse_bindposes.get(&skinned_mesh.inverse_bindposes); + + for (joint_index, &joint) in skinned_mesh.joints.iter().enumerate() { + // Calculate the initial joint matrix. + let skinned_mesh_inverse_bindpose = + skinned_mesh_inverse_bindposes.and_then(|skinned_mesh_inverse_bindposes| { + skinned_mesh_inverse_bindposes.get(joint_index) + }); + let joint_matrix = match (skinned_mesh_inverse_bindpose, joints.get(joint)) { + (Some(skinned_mesh_inverse_bindpose), Ok(transform)) => { + transform.affine() * *skinned_mesh_inverse_bindpose + } + _ => Mat4::IDENTITY, + }; + + // Write in the new joint matrix, growing the staging buffer if + // necessary. + let buffer_index = skin_uniform_info.offset() as usize + joint_index; + if skin_uniforms.current_staging_buffer.len() < buffer_index + 1 { + skin_uniforms + .current_staging_buffer + .resize(buffer_index + 1, Mat4::IDENTITY); + } + skin_uniforms.current_staging_buffer[buffer_index] = joint_matrix; + + // Record the inverse mapping from the joint back to the skin. We use + // this in order to perform fine-grained joint extraction. + skin_uniforms + .joint_to_skins + .entry(MainEntity::from(joint)) + .or_default() + .push(skinned_mesh_entity); + } + + // Record the number of joints. + skin_uniforms.total_joints += skinned_mesh.joints.len(); + + skin_uniforms + .skin_uniform_info + .insert(skinned_mesh_entity, skin_uniform_info); +} + +/// Deallocates a skin and removes it from the [`SkinUniforms`]. +fn remove_skin(skin_uniforms: &mut SkinUniforms, skinned_mesh_entity: MainEntity) { + let Some(old_skin_uniform_info) = skin_uniforms.skin_uniform_info.remove(&skinned_mesh_entity) + else { + return; + }; + + // Free the allocation. + skin_uniforms + .allocator + .free(old_skin_uniform_info.allocation); + + // Remove the inverse mapping from each joint back to the skin. + for &joint in &old_skin_uniform_info.joints { + if let Entry::Occupied(mut entry) = skin_uniforms.joint_to_skins.entry(joint) { + entry.get_mut().retain(|skin| *skin != skinned_mesh_entity); + if entry.get_mut().is_empty() { + entry.remove(); + } + } + } + + // Update the total number of joints. + skin_uniforms.total_joints -= old_skin_uniform_info.joints.len(); +} + +// NOTE: The skinned joints uniform buffer has to be bound at a dynamic offset per +// entity and so cannot currently be batched on WebGL 2. +pub fn no_automatic_skin_batching( + mut commands: Commands, + query: Query, Without)>, + render_device: Res, +) { + if !skins_use_uniform_buffers(&render_device) { + return; + } + + for entity in &query { + commands.entity(entity).try_insert(NoAutomaticBatching); + } +} diff --git a/crates/libmarathon/src/render/pbr/render/skinning.wgsl b/crates/libmarathon/src/render/pbr/render/skinning.wgsl new file mode 100644 index 0000000..6c4da07 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/skinning.wgsl @@ -0,0 +1,95 @@ +#define_import_path bevy_pbr::skinning + +#import bevy_pbr::mesh_types::SkinnedMesh +#import bevy_pbr::mesh_bindings::mesh + +#ifdef SKINNED + +#ifdef SKINS_USE_UNIFORM_BUFFERS +@group(2) @binding(1) var joint_matrices: SkinnedMesh; +#else // SKINS_USE_UNIFORM_BUFFERS +@group(2) @binding(1) var joint_matrices: array>; +#endif // SKINS_USE_UNIFORM_BUFFERS + +// An array of matrices specifying the joint positions from the previous frame. +// +// This is used for motion vector computation. +// +// If this is the first frame, or we're otherwise prevented from using data from +// the previous frame, this is simply the same as `joint_matrices` above. +#ifdef SKINS_USE_UNIFORM_BUFFERS +@group(2) @binding(6) var prev_joint_matrices: SkinnedMesh; +#else // SKINS_USE_UNIFORM_BUFFERS +@group(2) @binding(6) var prev_joint_matrices: array>; +#endif // SKINS_USE_UNIFORM_BUFFERS + +fn skin_model( + indexes: vec4, + weights: vec4, + instance_index: u32, +) -> mat4x4 { +#ifdef SKINS_USE_UNIFORM_BUFFERS + return weights.x * joint_matrices.data[indexes.x] + + weights.y * joint_matrices.data[indexes.y] + + weights.z * joint_matrices.data[indexes.z] + + weights.w * joint_matrices.data[indexes.w]; +#else // SKINS_USE_UNIFORM_BUFFERS + var skin_index = mesh[instance_index].current_skin_index; + return weights.x * joint_matrices[skin_index + indexes.x] + + weights.y * joint_matrices[skin_index + indexes.y] + + weights.z * joint_matrices[skin_index + indexes.z] + + weights.w * joint_matrices[skin_index + indexes.w]; +#endif // SKINS_USE_UNIFORM_BUFFERS +} + +// Returns the skinned position of a vertex with the given weights from the +// previous frame. +// +// This is used for motion vector computation. +fn skin_prev_model( + indexes: vec4, + weights: vec4, + instance_index: u32, +) -> mat4x4 { +#ifdef SKINS_USE_UNIFORM_BUFFERS + return weights.x * prev_joint_matrices.data[indexes.x] + + weights.y * prev_joint_matrices.data[indexes.y] + + weights.z * prev_joint_matrices.data[indexes.z] + + weights.w * prev_joint_matrices.data[indexes.w]; +#else // SKINS_USE_UNIFORM_BUFFERS + let skin_index = mesh[instance_index].current_skin_index; + return weights.x * prev_joint_matrices[skin_index + indexes.x] + + weights.y * prev_joint_matrices[skin_index + indexes.y] + + weights.z * prev_joint_matrices[skin_index + indexes.z] + + weights.w * prev_joint_matrices[skin_index + indexes.w]; +#endif // SKINS_USE_UNIFORM_BUFFERS +} + +fn inverse_transpose_3x3m(in: mat3x3) -> mat3x3 { + let x = cross(in[1], in[2]); + let y = cross(in[2], in[0]); + let z = cross(in[0], in[1]); + let det = dot(in[2], z); + return mat3x3( + x / det, + y / det, + z / det + ); +} + +fn skin_normals( + world_from_local: mat4x4, + normal: vec3, +) -> vec3 { + return normalize( + inverse_transpose_3x3m( + mat3x3( + world_from_local[0].xyz, + world_from_local[1].xyz, + world_from_local[2].xyz + ) + ) * normal + ); +} + +#endif diff --git a/crates/libmarathon/src/render/pbr/render/utils.wgsl b/crates/libmarathon/src/render/pbr/render/utils.wgsl new file mode 100644 index 0000000..8e91aeb --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/utils.wgsl @@ -0,0 +1,205 @@ +#define_import_path bevy_pbr::utils + +#import bevy_pbr::rgb9e5 +#import bevy_render::maths::{PI, PI_2, orthonormalize} + +// Generates a random u32 in range [0, u32::MAX]. +// +// `state` is a mutable reference to a u32 used as the seed. +// +// Values are generated via "white noise", with no correlation between values. +// In shaders, you often want spatial and/or temporal correlation. Use a different RNG method for these use cases. +// +// https://www.pcg-random.org +// https://www.reedbeta.com/blog/hash-functions-for-gpu-rendering +fn rand_u(state: ptr) -> u32 { + *state = *state * 747796405u + 2891336453u; + let word = ((*state >> ((*state >> 28u) + 4u)) ^ *state) * 277803737u; + return (word >> 22u) ^ word; +} + +// Generates a random f32 in range [0, 1.0]. +fn rand_f(state: ptr) -> f32 { + *state = *state * 747796405u + 2891336453u; + let word = ((*state >> ((*state >> 28u) + 4u)) ^ *state) * 277803737u; + return f32((word >> 22u) ^ word) * bitcast(0x2f800004u); +} + +// Generates a random vec2 where each value is in range [0, 1.0]. +fn rand_vec2f(state: ptr) -> vec2 { + return vec2(rand_f(state), rand_f(state)); +} + +// Generates a random u32 in range [0, n). +fn rand_range_u(n: u32, state: ptr) -> u32 { + return rand_u(state) % n; +} + +// returns the (0-1, 0-1) position within the given viewport for the current buffer coords . +// buffer coords can be obtained from `@builtin(position).xy`. +// the view uniform struct contains the current camera viewport in `view.viewport`. +// topleft = 0,0 +fn coords_to_viewport_uv(position: vec2, viewport: vec4) -> vec2 { + return (position - viewport.xy) / viewport.zw; +} + +// https://jcgt.org/published/0003/02/01/paper.pdf + +// For encoding normals or unit direction vectors as octahedral coordinates. +fn octahedral_encode(v: vec3) -> vec2 { + var n = v / (abs(v.x) + abs(v.y) + abs(v.z)); + let octahedral_wrap = (1.0 - abs(n.yx)) * select(vec2(-1.0), vec2(1.0), n.xy > vec2f(0.0)); + let n_xy = select(octahedral_wrap, n.xy, n.z >= 0.0); + return n_xy * 0.5 + 0.5; +} + +// For decoding normals or unit direction vectors from octahedral coordinates. +fn octahedral_decode(v: vec2) -> vec3 { + let f = v * 2.0 - 1.0; + return octahedral_decode_signed(f); +} + +// Like octahedral_decode, but for input in [-1, 1] instead of [0, 1]. +fn octahedral_decode_signed(v: vec2) -> vec3 { + var n = vec3(v.xy, 1.0 - abs(v.x) - abs(v.y)); + let t = saturate(-n.z); + let w = select(vec2(t), vec2(-t), n.xy >= vec2(0.0)); + n = vec3(n.xy + w, n.z); + return normalize(n); +} + +// https://blog.demofox.org/2022/01/01/interleaved-gradient-noise-a-different-kind-of-low-discrepancy-sequence +fn interleaved_gradient_noise(pixel_coordinates: vec2, frame: u32) -> f32 { + let xy = pixel_coordinates + 5.588238 * f32(frame % 64u); + return fract(52.9829189 * fract(0.06711056 * xy.x + 0.00583715 * xy.y)); +} + +// Hammersley sequence for quasi-random points +fn hammersley_2d(i: u32, n: u32) -> vec2f { + let inv_n = 1.0 / f32(n); + let vdc = f32(reverseBits(i)) * 2.3283064365386963e-10; // 1/2^32 + return vec2f(f32(i) * inv_n, vdc); +} + +// https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare (slides 120-135) +// TODO: Use an array here instead of a bunch of constants, once arrays work properly under DX12. +// NOTE: The names have a final underscore to avoid the following error: +// `Composable module identifiers must not require substitution according to naga writeback rules` +const SPIRAL_OFFSET_0_ = vec2(-0.7071, 0.7071); +const SPIRAL_OFFSET_1_ = vec2(-0.0000, -0.8750); +const SPIRAL_OFFSET_2_ = vec2( 0.5303, 0.5303); +const SPIRAL_OFFSET_3_ = vec2(-0.6250, -0.0000); +const SPIRAL_OFFSET_4_ = vec2( 0.3536, -0.3536); +const SPIRAL_OFFSET_5_ = vec2(-0.0000, 0.3750); +const SPIRAL_OFFSET_6_ = vec2(-0.1768, -0.1768); +const SPIRAL_OFFSET_7_ = vec2( 0.1250, 0.0000); + +// https://www.realtimerendering.com/raytracinggems/unofficial_RayTracingGems_v1.9.pdf#0004286901.INDD%3ASec28%3A303 +fn sample_cosine_hemisphere(normal: vec3, rng: ptr) -> vec3 { + let cos_theta = 1.0 - 2.0 * rand_f(rng); + let phi = PI_2 * rand_f(rng); + let sin_theta = sqrt(max(1.0 - cos_theta * cos_theta, 0.0)); + let x = normal.x + sin_theta * cos(phi); + let y = normal.y + sin_theta * sin(phi); + let z = normal.z + cos_theta; + return vec3(x, y, z); +} +// https://www.pbr-book.org/3ed-2018/Monte_Carlo_Integration/2D_Sampling_with_Multidimensional_Transformations#UniformlySamplingaHemisphere +fn sample_uniform_hemisphere(normal: vec3, rng: ptr) -> vec3 { + let cos_theta = rand_f(rng); + let phi = PI_2 * rand_f(rng); + let sin_theta = sqrt(max(1.0 - cos_theta * cos_theta, 0.0)); + let x = sin_theta * cos(phi); + let y = sin_theta * sin(phi); + let z = cos_theta; + return orthonormalize(normal) * vec3(x, y, z); +} + +fn uniform_hemisphere_inverse_pdf() -> f32 { + return PI_2; +} + +// https://www.realtimerendering.com/raytracinggems/unofficial_RayTracingGems_v1.9.pdf#0004286901.INDD%3ASec19%3A294 +fn sample_disk(disk_radius: f32, rng: ptr) -> vec2 { + let ab = 2.0 * rand_vec2f(rng) - 1.0; + let a = ab.x; + var b = ab.y; + if (b == 0.0) { b = 1.0; } + + var phi: f32; + var r: f32; + if (a * a > b * b) { + r = disk_radius * a; + phi = (PI / 4.0) * (b / a); + } else { + r = disk_radius * b; + phi = (PI / 2.0) - (PI / 4.0) * (a / b); + } + + let x = r * cos(phi); + let y = r * sin(phi); + return vec2(x, y); +} + +// Convert UV and face index to direction vector +fn sample_cube_dir(uv: vec2f, face: u32) -> vec3f { + // Convert from [0,1] to [-1,1] + let uvc = 2.0 * uv - 1.0; + + // Generate direction based on the cube face + var dir: vec3f; + switch(face) { + case 0u: { dir = vec3f( 1.0, -uvc.y, -uvc.x); } // +X + case 1u: { dir = vec3f(-1.0, -uvc.y, uvc.x); } // -X + case 2u: { dir = vec3f( uvc.x, 1.0, uvc.y); } // +Y + case 3u: { dir = vec3f( uvc.x, -1.0, -uvc.y); } // -Y + case 4u: { dir = vec3f( uvc.x, -uvc.y, 1.0); } // +Z + case 5u: { dir = vec3f(-uvc.x, -uvc.y, -1.0); } // -Z + default: { dir = vec3f(0.0); } + } + return normalize(dir); +} + +// Convert direction vector to cube face UV +struct CubeUV { + uv: vec2f, + face: u32, +} +fn dir_to_cube_uv(dir: vec3f) -> CubeUV { + let abs_dir = abs(dir); + var face: u32 = 0u; + var uv: vec2f = vec2f(0.0); + + // Find the dominant axis to determine face + if (abs_dir.x >= abs_dir.y && abs_dir.x >= abs_dir.z) { + // X axis is dominant + if (dir.x > 0.0) { + face = 0u; // +X + uv = vec2f(-dir.z, -dir.y) / dir.x; + } else { + face = 1u; // -X + uv = vec2f(dir.z, -dir.y) / abs_dir.x; + } + } else if (abs_dir.y >= abs_dir.x && abs_dir.y >= abs_dir.z) { + // Y axis is dominant + if (dir.y > 0.0) { + face = 2u; // +Y + uv = vec2f(dir.x, dir.z) / dir.y; + } else { + face = 3u; // -Y + uv = vec2f(dir.x, -dir.z) / abs_dir.y; + } + } else { + // Z axis is dominant + if (dir.z > 0.0) { + face = 4u; // +Z + uv = vec2f(dir.x, -dir.y) / dir.z; + } else { + face = 5u; // -Z + uv = vec2f(-dir.x, -dir.y) / abs_dir.z; + } + } + + // Convert from [-1,1] to [0,1] + return CubeUV(uv * 0.5 + 0.5, face); +} diff --git a/crates/libmarathon/src/render/pbr/render/view_transformations.wgsl b/crates/libmarathon/src/render/pbr/render/view_transformations.wgsl new file mode 100644 index 0000000..dfb4d6e --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/view_transformations.wgsl @@ -0,0 +1,238 @@ +#define_import_path bevy_pbr::view_transformations + +#import bevy_pbr::mesh_view_bindings as view_bindings +#import bevy_pbr::prepass_bindings + +/// World space: +/// +y is up + +/// View space: +/// -z is forward, +x is right, +y is up +/// Forward is from the camera position into the scene. +/// (0.0, 0.0, -1.0) is linear distance of 1.0 in front of the camera's view relative to the camera's rotation +/// (0.0, 1.0, 0.0) is linear distance of 1.0 above the camera's view relative to the camera's rotation + +/// NDC (normalized device coordinate): +/// https://www.w3.org/TR/webgpu/#coordinate-systems +/// (-1.0, -1.0) in NDC is located at the bottom-left corner of NDC +/// (1.0, 1.0) in NDC is located at the top-right corner of NDC +/// Z is depth where: +/// 1.0 is near clipping plane +/// Perspective projection: 0.0 is inf far away +/// Orthographic projection: 0.0 is far clipping plane + +/// Clip space: +/// This is NDC before the perspective divide, still in homogenous coordinate space. +/// Dividing a clip space point by its w component yields a point in NDC space. + +/// UV space: +/// 0.0, 0.0 is the top left +/// 1.0, 1.0 is the bottom right + + +// ----------------- +// TO WORLD -------- +// ----------------- + +/// Convert a view space position to world space +fn position_view_to_world(view_pos: vec3) -> vec3 { + let world_pos = view_bindings::view.world_from_view * vec4(view_pos, 1.0); + return world_pos.xyz; +} + +/// Convert a clip space position to world space +fn position_clip_to_world(clip_pos: vec4) -> vec3 { + let world_pos = view_bindings::view.world_from_clip * clip_pos; + return world_pos.xyz; +} + +/// Convert a ndc space position to world space +fn position_ndc_to_world(ndc_pos: vec3) -> vec3 { + let world_pos = view_bindings::view.world_from_clip * vec4(ndc_pos, 1.0); + return world_pos.xyz / world_pos.w; +} + +/// Convert a view space direction to world space +fn direction_view_to_world(view_dir: vec3) -> vec3 { + let world_dir = view_bindings::view.world_from_view * vec4(view_dir, 0.0); + return world_dir.xyz; +} + +/// Convert a clip space direction to world space +fn direction_clip_to_world(clip_dir: vec4) -> vec3 { + let world_dir = view_bindings::view.world_from_clip * clip_dir; + return world_dir.xyz; +} + +// ----------------- +// TO VIEW --------- +// ----------------- + +/// Convert a world space position to view space +fn position_world_to_view(world_pos: vec3) -> vec3 { + let view_pos = view_bindings::view.view_from_world * vec4(world_pos, 1.0); + return view_pos.xyz; +} + +/// Convert a clip space position to view space +fn position_clip_to_view(clip_pos: vec4) -> vec3 { + let view_pos = view_bindings::view.view_from_clip * clip_pos; + return view_pos.xyz; +} + +/// Convert a ndc space position to view space +fn position_ndc_to_view(ndc_pos: vec3) -> vec3 { + let view_pos = view_bindings::view.view_from_clip * vec4(ndc_pos, 1.0); + return view_pos.xyz / view_pos.w; +} + +/// Convert a world space direction to view space +fn direction_world_to_view(world_dir: vec3) -> vec3 { + let view_dir = view_bindings::view.view_from_world * vec4(world_dir, 0.0); + return view_dir.xyz; +} + +/// Convert a clip space direction to view space +fn direction_clip_to_view(clip_dir: vec4) -> vec3 { + let view_dir = view_bindings::view.view_from_clip * clip_dir; + return view_dir.xyz; +} + +// ----------------- +// TO PREV. VIEW --- +// ----------------- + +fn position_world_to_prev_view(world_pos: vec3) -> vec3 { + let view_pos = prepass_bindings::previous_view_uniforms.view_from_world * + vec4(world_pos, 1.0); + return view_pos.xyz; +} + +fn position_world_to_prev_ndc(world_pos: vec3) -> vec3 { + let ndc_pos = prepass_bindings::previous_view_uniforms.clip_from_world * + vec4(world_pos, 1.0); + return ndc_pos.xyz / ndc_pos.w; +} + +// ----------------- +// TO CLIP --------- +// ----------------- + +/// Convert a world space position to clip space +fn position_world_to_clip(world_pos: vec3) -> vec4 { + let clip_pos = view_bindings::view.clip_from_world * vec4(world_pos, 1.0); + return clip_pos; +} + +/// Convert a view space position to clip space +fn position_view_to_clip(view_pos: vec3) -> vec4 { + let clip_pos = view_bindings::view.clip_from_view * vec4(view_pos, 1.0); + return clip_pos; +} + +/// Convert a world space direction to clip space +fn direction_world_to_clip(world_dir: vec3) -> vec4 { + let clip_dir = view_bindings::view.clip_from_world * vec4(world_dir, 0.0); + return clip_dir; +} + +/// Convert a view space direction to clip space +fn direction_view_to_clip(view_dir: vec3) -> vec4 { + let clip_dir = view_bindings::view.clip_from_view * vec4(view_dir, 0.0); + return clip_dir; +} + +// ----------------- +// TO NDC ---------- +// ----------------- + +/// Convert a world space position to ndc space +fn position_world_to_ndc(world_pos: vec3) -> vec3 { + let ndc_pos = view_bindings::view.clip_from_world * vec4(world_pos, 1.0); + return ndc_pos.xyz / ndc_pos.w; +} + +/// Convert a view space position to ndc space +fn position_view_to_ndc(view_pos: vec3) -> vec3 { + let ndc_pos = view_bindings::view.clip_from_view * vec4(view_pos, 1.0); + return ndc_pos.xyz / ndc_pos.w; +} + +// ----------------- +// DEPTH ----------- +// ----------------- + +/// Retrieve the perspective camera near clipping plane +fn perspective_camera_near() -> f32 { + return view_bindings::view.clip_from_view[3][2]; +} + +/// Convert ndc depth to linear view z. +/// Note: Depth values in front of the camera will be negative as -z is forward +fn depth_ndc_to_view_z(ndc_depth: f32) -> f32 { +#ifdef VIEW_PROJECTION_PERSPECTIVE + return -perspective_camera_near() / ndc_depth; +#else ifdef VIEW_PROJECTION_ORTHOGRAPHIC + return -(view_bindings::view.clip_from_view[3][2] - ndc_depth) / view_bindings::view.clip_from_view[2][2]; +#else + let view_pos = view_bindings::view.view_from_clip * vec4(0.0, 0.0, ndc_depth, 1.0); + return view_pos.z / view_pos.w; +#endif +} + +/// Convert linear view z to ndc depth. +/// Note: View z input should be negative for values in front of the camera as -z is forward +fn view_z_to_depth_ndc(view_z: f32) -> f32 { +#ifdef VIEW_PROJECTION_PERSPECTIVE + return -perspective_camera_near() / view_z; +#else ifdef VIEW_PROJECTION_ORTHOGRAPHIC + return view_bindings::view.clip_from_view[3][2] + view_z * view_bindings::view.clip_from_view[2][2]; +#else + let ndc_pos = view_bindings::view.clip_from_view * vec4(0.0, 0.0, view_z, 1.0); + return ndc_pos.z / ndc_pos.w; +#endif +} + +fn prev_view_z_to_depth_ndc(view_z: f32) -> f32 { +#ifdef VIEW_PROJECTION_PERSPECTIVE + return -perspective_camera_near() / view_z; +#else ifdef VIEW_PROJECTION_ORTHOGRAPHIC + return prepass_bindings::previous_view_uniforms.clip_from_view[3][2] + + view_z * prepass_bindings::previous_view_uniforms.clip_from_view[2][2]; +#else + let ndc_pos = prepass_bindings::previous_view_uniforms.clip_from_view * + vec4(0.0, 0.0, view_z, 1.0); + return ndc_pos.z / ndc_pos.w; +#endif +} + +// ----------------- +// UV -------------- +// ----------------- + +/// Convert ndc space xy coordinate [-1.0 .. 1.0] to uv [0.0 .. 1.0] +fn ndc_to_uv(ndc: vec2) -> vec2 { + return ndc * vec2(0.5, -0.5) + vec2(0.5); +} + +/// Convert uv [0.0 .. 1.0] coordinate to ndc space xy [-1.0 .. 1.0] +fn uv_to_ndc(uv: vec2) -> vec2 { + return uv * vec2(2.0, -2.0) + vec2(-1.0, 1.0); +} + +/// returns the (0.0, 0.0) .. (1.0, 1.0) position within the viewport for the current render target +/// [0 .. render target viewport size] eg. [(0.0, 0.0) .. (1280.0, 720.0)] to [(0.0, 0.0) .. (1.0, 1.0)] +fn frag_coord_to_uv(frag_coord: vec2) -> vec2 { + return (frag_coord - view_bindings::view.viewport.xy) / view_bindings::view.viewport.zw; +} + +/// Convert frag coord to ndc +fn frag_coord_to_ndc(frag_coord: vec4) -> vec3 { + return vec3(uv_to_ndc(frag_coord_to_uv(frag_coord.xy)), frag_coord.z); +} + +/// Convert ndc space xy coordinate [-1.0 .. 1.0] to [0 .. render target +/// viewport size] +fn ndc_to_frag_coord(ndc: vec2) -> vec2 { + return ndc_to_uv(ndc) * view_bindings::view.viewport.zw; +} diff --git a/crates/libmarathon/src/render/pbr/render/wireframe.wgsl b/crates/libmarathon/src/render/pbr/render/wireframe.wgsl new file mode 100644 index 0000000..3873ffa --- /dev/null +++ b/crates/libmarathon/src/render/pbr/render/wireframe.wgsl @@ -0,0 +1,12 @@ +#import bevy_pbr::forward_io::VertexOutput + +struct PushConstants { + color: vec4 +} + +var push_constants: PushConstants; + +@fragment +fn fragment(in: VertexOutput) -> @location(0) vec4 { + return push_constants.color; +} diff --git a/crates/libmarathon/src/render/pbr/ssao/mod.rs b/crates/libmarathon/src/render/pbr/ssao/mod.rs new file mode 100644 index 0000000..e566f93 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/ssao/mod.rs @@ -0,0 +1,757 @@ +use crate::render::pbr::NodePbr; +use bevy_app::{App, Plugin}; +use bevy_asset::{embedded_asset, load_embedded_asset, Handle}; +use bevy_camera::{Camera, Camera3d}; +use crate::render::{ + core_3d::graph::{Core3d, Node3d}, + prepass::{DepthPrepass, NormalPrepass, ViewPrepassTextures}, +}; +use bevy_ecs::{ + prelude::{Component, Entity}, + query::{Has, QueryItem, With}, + reflect::ReflectComponent, + resource::Resource, + schedule::IntoScheduleConfigs, + system::{Commands, Query, Res, ResMut}, + world::{FromWorld, World}, +}; +use bevy_image::ToExtents; +use bevy_reflect::{std_traits::ReflectDefault, Reflect}; +use crate::render::{ + camera::{ExtractedCamera, TemporalJitter}, + diagnostic::RecordDiagnostics, + extract_component::ExtractComponent, + globals::{GlobalsBuffer, GlobalsUniform}, + render_graph::{NodeRunError, RenderGraphContext, RenderGraphExt, ViewNode, ViewNodeRunner}, + render_resource::{ + binding_types::{ + sampler, texture_2d, texture_depth_2d, texture_storage_2d, uniform_buffer, + }, + *, + }, + renderer::{RenderAdapter, RenderContext, RenderDevice, RenderQueue}, + sync_component::SyncComponentPlugin, + sync_world::RenderEntity, + texture::{CachedTexture, TextureCache}, + view::{Msaa, ViewUniform, ViewUniformOffset, ViewUniforms}, + Extract, ExtractSchedule, Render, RenderApp, RenderSystems, +}; +use bevy_shader::{load_shader_library, Shader, ShaderDefVal}; +use bevy_utils::prelude::default; +use core::mem; +use tracing::{error, warn}; + +/// Plugin for screen space ambient occlusion. +pub struct ScreenSpaceAmbientOcclusionPlugin; + +impl Plugin for ScreenSpaceAmbientOcclusionPlugin { + fn build(&self, app: &mut App) { + load_shader_library!(app, "ssao_utils.wgsl"); + + embedded_asset!(app, "preprocess_depth.wgsl"); + embedded_asset!(app, "ssao.wgsl"); + embedded_asset!(app, "spatial_denoise.wgsl"); + + app.add_plugins(SyncComponentPlugin::::default()); + } + + fn finish(&self, app: &mut App) { + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + if !render_app + .world() + .resource::() + .get_texture_format_features(TextureFormat::R16Float) + .allowed_usages + .contains(TextureUsages::STORAGE_BINDING) + { + warn!("ScreenSpaceAmbientOcclusionPlugin not loaded. GPU lacks support: TextureFormat::R16Float does not support TextureUsages::STORAGE_BINDING."); + return; + } + + if render_app + .world() + .resource::() + .limits() + .max_storage_textures_per_shader_stage + < 5 + { + warn!("ScreenSpaceAmbientOcclusionPlugin not loaded. GPU lacks support: Limits::max_storage_textures_per_shader_stage is less than 5."); + return; + } + + render_app + .init_resource::() + .init_resource::>() + .add_systems(ExtractSchedule, extract_ssao_settings) + .add_systems( + Render, + ( + prepare_ssao_pipelines.in_set(RenderSystems::Prepare), + prepare_ssao_textures.in_set(RenderSystems::PrepareResources), + prepare_ssao_bind_groups.in_set(RenderSystems::PrepareBindGroups), + ), + ) + .add_render_graph_node::>( + Core3d, + NodePbr::ScreenSpaceAmbientOcclusion, + ) + .add_render_graph_edges( + Core3d, + ( + // END_PRE_PASSES -> SCREEN_SPACE_AMBIENT_OCCLUSION -> MAIN_PASS + Node3d::EndPrepasses, + NodePbr::ScreenSpaceAmbientOcclusion, + Node3d::StartMainPass, + ), + ); + } +} + +/// Component to apply screen space ambient occlusion to a 3d camera. +/// +/// Screen space ambient occlusion (SSAO) approximates small-scale, +/// local occlusion of _indirect_ diffuse light between objects, based on what's visible on-screen. +/// SSAO does not apply to direct lighting, such as point or directional lights. +/// +/// This darkens creases, e.g. on staircases, and gives nice contact shadows +/// where objects meet, giving entities a more "grounded" feel. +/// +/// # Usage Notes +/// +/// Requires that you add [`ScreenSpaceAmbientOcclusionPlugin`] to your app. +/// +/// It strongly recommended that you use SSAO in conjunction with +/// TAA (`TemporalAntiAliasing`). +/// Doing so greatly reduces SSAO noise. +/// +/// SSAO is not supported on `WebGL2`, and is not currently supported on `WebGPU`. +#[derive(Component, ExtractComponent, Reflect, PartialEq, Clone, Debug)] +#[reflect(Component, Debug, Default, PartialEq, Clone)] +#[require(DepthPrepass, NormalPrepass)] +#[doc(alias = "Ssao")] +pub struct ScreenSpaceAmbientOcclusion { + /// Quality of the SSAO effect. + pub quality_level: ScreenSpaceAmbientOcclusionQualityLevel, + /// A constant estimated thickness of objects. + /// + /// This value is used to decide how far behind an object a ray of light needs to be in order + /// to pass behind it. Any ray closer than that will be occluded. + pub constant_object_thickness: f32, +} + +impl Default for ScreenSpaceAmbientOcclusion { + fn default() -> Self { + Self { + quality_level: ScreenSpaceAmbientOcclusionQualityLevel::default(), + constant_object_thickness: 0.25, + } + } +} + +#[derive(Reflect, PartialEq, Eq, Hash, Clone, Copy, Default, Debug)] +#[reflect(PartialEq, Hash, Clone, Default)] +pub enum ScreenSpaceAmbientOcclusionQualityLevel { + Low, + Medium, + #[default] + High, + Ultra, + Custom { + /// Higher slice count means less noise, but worse performance. + slice_count: u32, + /// Samples per slice side is also tweakable, but recommended to be left at 2 or 3. + samples_per_slice_side: u32, + }, +} + +impl ScreenSpaceAmbientOcclusionQualityLevel { + fn sample_counts(&self) -> (u32, u32) { + match self { + Self::Low => (1, 2), // 4 spp (1 * (2 * 2)), plus optional temporal samples + Self::Medium => (2, 2), // 8 spp (2 * (2 * 2)), plus optional temporal samples + Self::High => (3, 3), // 18 spp (3 * (3 * 2)), plus optional temporal samples + Self::Ultra => (9, 3), // 54 spp (9 * (3 * 2)), plus optional temporal samples + Self::Custom { + slice_count: slices, + samples_per_slice_side, + } => (*slices, *samples_per_slice_side), + } + } +} + +#[derive(Default)] +struct SsaoNode {} + +impl ViewNode for SsaoNode { + type ViewQuery = ( + &'static ExtractedCamera, + &'static SsaoPipelineId, + &'static SsaoBindGroups, + &'static ViewUniformOffset, + ); + + fn run( + &self, + _graph: &mut RenderGraphContext, + render_context: &mut RenderContext, + (camera, pipeline_id, bind_groups, view_uniform_offset): QueryItem, + world: &World, + ) -> Result<(), NodeRunError> { + let pipelines = world.resource::(); + let pipeline_cache = world.resource::(); + let ( + Some(camera_size), + Some(preprocess_depth_pipeline), + Some(spatial_denoise_pipeline), + Some(ssao_pipeline), + ) = ( + camera.physical_viewport_size, + pipeline_cache.get_compute_pipeline(pipelines.preprocess_depth_pipeline), + pipeline_cache.get_compute_pipeline(pipelines.spatial_denoise_pipeline), + pipeline_cache.get_compute_pipeline(pipeline_id.0), + ) + else { + return Ok(()); + }; + + let diagnostics = render_context.diagnostic_recorder(); + + let command_encoder = render_context.command_encoder(); + command_encoder.push_debug_group("ssao"); + let time_span = diagnostics.time_span(command_encoder, "ssao"); + + { + let mut preprocess_depth_pass = + command_encoder.begin_compute_pass(&ComputePassDescriptor { + label: Some("ssao_preprocess_depth"), + timestamp_writes: None, + }); + preprocess_depth_pass.set_pipeline(preprocess_depth_pipeline); + preprocess_depth_pass.set_bind_group(0, &bind_groups.preprocess_depth_bind_group, &[]); + preprocess_depth_pass.set_bind_group( + 1, + &bind_groups.common_bind_group, + &[view_uniform_offset.offset], + ); + preprocess_depth_pass.dispatch_workgroups( + camera_size.x.div_ceil(16), + camera_size.y.div_ceil(16), + 1, + ); + } + + { + let mut ssao_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor { + label: Some("ssao"), + timestamp_writes: None, + }); + ssao_pass.set_pipeline(ssao_pipeline); + ssao_pass.set_bind_group(0, &bind_groups.ssao_bind_group, &[]); + ssao_pass.set_bind_group( + 1, + &bind_groups.common_bind_group, + &[view_uniform_offset.offset], + ); + ssao_pass.dispatch_workgroups(camera_size.x.div_ceil(8), camera_size.y.div_ceil(8), 1); + } + + { + let mut spatial_denoise_pass = + command_encoder.begin_compute_pass(&ComputePassDescriptor { + label: Some("ssao_spatial_denoise"), + timestamp_writes: None, + }); + spatial_denoise_pass.set_pipeline(spatial_denoise_pipeline); + spatial_denoise_pass.set_bind_group(0, &bind_groups.spatial_denoise_bind_group, &[]); + spatial_denoise_pass.set_bind_group( + 1, + &bind_groups.common_bind_group, + &[view_uniform_offset.offset], + ); + spatial_denoise_pass.dispatch_workgroups( + camera_size.x.div_ceil(8), + camera_size.y.div_ceil(8), + 1, + ); + } + + time_span.end(command_encoder); + command_encoder.pop_debug_group(); + Ok(()) + } +} + +#[derive(Resource)] +struct SsaoPipelines { + preprocess_depth_pipeline: CachedComputePipelineId, + spatial_denoise_pipeline: CachedComputePipelineId, + + common_bind_group_layout: BindGroupLayout, + preprocess_depth_bind_group_layout: BindGroupLayout, + ssao_bind_group_layout: BindGroupLayout, + spatial_denoise_bind_group_layout: BindGroupLayout, + + hilbert_index_lut: TextureView, + point_clamp_sampler: Sampler, + linear_clamp_sampler: Sampler, + + shader: Handle, +} + +impl FromWorld for SsaoPipelines { + fn from_world(world: &mut World) -> Self { + let render_device = world.resource::(); + let render_queue = world.resource::(); + let pipeline_cache = world.resource::(); + + let hilbert_index_lut = render_device + .create_texture_with_data( + render_queue, + &(TextureDescriptor { + label: Some("ssao_hilbert_index_lut"), + size: Extent3d { + width: HILBERT_WIDTH as u32, + height: HILBERT_WIDTH as u32, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R16Uint, + usage: TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }), + TextureDataOrder::default(), + bytemuck::cast_slice(&generate_hilbert_index_lut()), + ) + .create_view(&TextureViewDescriptor::default()); + + let point_clamp_sampler = render_device.create_sampler(&SamplerDescriptor { + min_filter: FilterMode::Nearest, + mag_filter: FilterMode::Nearest, + mipmap_filter: FilterMode::Nearest, + address_mode_u: AddressMode::ClampToEdge, + address_mode_v: AddressMode::ClampToEdge, + ..Default::default() + }); + let linear_clamp_sampler = render_device.create_sampler(&SamplerDescriptor { + min_filter: FilterMode::Linear, + mag_filter: FilterMode::Linear, + mipmap_filter: FilterMode::Nearest, + address_mode_u: AddressMode::ClampToEdge, + address_mode_v: AddressMode::ClampToEdge, + ..Default::default() + }); + + let common_bind_group_layout = render_device.create_bind_group_layout( + "ssao_common_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + sampler(SamplerBindingType::NonFiltering), + sampler(SamplerBindingType::Filtering), + uniform_buffer::(true), + ), + ), + ); + + let preprocess_depth_bind_group_layout = render_device.create_bind_group_layout( + "ssao_preprocess_depth_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + texture_depth_2d(), + texture_storage_2d(TextureFormat::R16Float, StorageTextureAccess::WriteOnly), + texture_storage_2d(TextureFormat::R16Float, StorageTextureAccess::WriteOnly), + texture_storage_2d(TextureFormat::R16Float, StorageTextureAccess::WriteOnly), + texture_storage_2d(TextureFormat::R16Float, StorageTextureAccess::WriteOnly), + texture_storage_2d(TextureFormat::R16Float, StorageTextureAccess::WriteOnly), + ), + ), + ); + + let ssao_bind_group_layout = render_device.create_bind_group_layout( + "ssao_ssao_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + texture_2d(TextureSampleType::Float { filterable: true }), + texture_2d(TextureSampleType::Float { filterable: false }), + texture_2d(TextureSampleType::Uint), + texture_storage_2d(TextureFormat::R16Float, StorageTextureAccess::WriteOnly), + texture_storage_2d(TextureFormat::R32Uint, StorageTextureAccess::WriteOnly), + uniform_buffer::(false), + uniform_buffer::(false), + ), + ), + ); + + let spatial_denoise_bind_group_layout = render_device.create_bind_group_layout( + "ssao_spatial_denoise_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + texture_2d(TextureSampleType::Float { filterable: false }), + texture_2d(TextureSampleType::Uint), + texture_storage_2d(TextureFormat::R16Float, StorageTextureAccess::WriteOnly), + ), + ), + ); + + let preprocess_depth_pipeline = + pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { + label: Some("ssao_preprocess_depth_pipeline".into()), + layout: vec![ + preprocess_depth_bind_group_layout.clone(), + common_bind_group_layout.clone(), + ], + shader: load_embedded_asset!(world, "preprocess_depth.wgsl"), + ..default() + }); + + let spatial_denoise_pipeline = + pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { + label: Some("ssao_spatial_denoise_pipeline".into()), + layout: vec![ + spatial_denoise_bind_group_layout.clone(), + common_bind_group_layout.clone(), + ], + shader: load_embedded_asset!(world, "spatial_denoise.wgsl"), + ..default() + }); + + Self { + preprocess_depth_pipeline, + spatial_denoise_pipeline, + + common_bind_group_layout, + preprocess_depth_bind_group_layout, + ssao_bind_group_layout, + spatial_denoise_bind_group_layout, + + hilbert_index_lut, + point_clamp_sampler, + linear_clamp_sampler, + + shader: load_embedded_asset!(world, "ssao.wgsl"), + } + } +} + +#[derive(PartialEq, Eq, Hash, Clone)] +struct SsaoPipelineKey { + quality_level: ScreenSpaceAmbientOcclusionQualityLevel, + temporal_jitter: bool, +} + +impl SpecializedComputePipeline for SsaoPipelines { + type Key = SsaoPipelineKey; + + fn specialize(&self, key: Self::Key) -> ComputePipelineDescriptor { + let (slice_count, samples_per_slice_side) = key.quality_level.sample_counts(); + + let mut shader_defs = vec![ + ShaderDefVal::Int("SLICE_COUNT".to_string(), slice_count as i32), + ShaderDefVal::Int( + "SAMPLES_PER_SLICE_SIDE".to_string(), + samples_per_slice_side as i32, + ), + ]; + + if key.temporal_jitter { + shader_defs.push("TEMPORAL_JITTER".into()); + } + + ComputePipelineDescriptor { + label: Some("ssao_ssao_pipeline".into()), + layout: vec![ + self.ssao_bind_group_layout.clone(), + self.common_bind_group_layout.clone(), + ], + shader: self.shader.clone(), + shader_defs, + ..default() + } + } +} + +fn extract_ssao_settings( + mut commands: Commands, + cameras: Extract< + Query< + (RenderEntity, &Camera, &ScreenSpaceAmbientOcclusion, &Msaa), + (With, With, With), + >, + >, +) { + for (entity, camera, ssao_settings, msaa) in &cameras { + if *msaa != Msaa::Off { + error!( + "SSAO is being used which requires Msaa::Off, but Msaa is currently set to Msaa::{:?}", + *msaa + ); + return; + } + let mut entity_commands = commands + .get_entity(entity) + .expect("SSAO entity wasn't synced."); + if camera.is_active { + entity_commands.insert(ssao_settings.clone()); + } else { + entity_commands.remove::(); + } + } +} + +#[derive(Component)] +pub struct ScreenSpaceAmbientOcclusionResources { + preprocessed_depth_texture: CachedTexture, + ssao_noisy_texture: CachedTexture, // Pre-spatially denoised texture + pub screen_space_ambient_occlusion_texture: CachedTexture, // Spatially denoised texture + depth_differences_texture: CachedTexture, + thickness_buffer: Buffer, +} + +fn prepare_ssao_textures( + mut commands: Commands, + mut texture_cache: ResMut, + render_device: Res, + views: Query<(Entity, &ExtractedCamera, &ScreenSpaceAmbientOcclusion)>, +) { + for (entity, camera, ssao_settings) in &views { + let Some(physical_viewport_size) = camera.physical_viewport_size else { + continue; + }; + let size = physical_viewport_size.to_extents(); + + let preprocessed_depth_texture = texture_cache.get( + &render_device, + TextureDescriptor { + label: Some("ssao_preprocessed_depth_texture"), + size, + mip_level_count: 5, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R16Float, + usage: TextureUsages::STORAGE_BINDING | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }, + ); + + let ssao_noisy_texture = texture_cache.get( + &render_device, + TextureDescriptor { + label: Some("ssao_noisy_texture"), + size, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R16Float, + usage: TextureUsages::STORAGE_BINDING | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }, + ); + + let ssao_texture = texture_cache.get( + &render_device, + TextureDescriptor { + label: Some("ssao_texture"), + size, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R16Float, + usage: TextureUsages::STORAGE_BINDING | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }, + ); + + let depth_differences_texture = texture_cache.get( + &render_device, + TextureDescriptor { + label: Some("ssao_depth_differences_texture"), + size, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R32Uint, + usage: TextureUsages::STORAGE_BINDING | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }, + ); + + let thickness_buffer = render_device.create_buffer_with_data(&BufferInitDescriptor { + label: Some("thickness_buffer"), + contents: &ssao_settings.constant_object_thickness.to_le_bytes(), + usage: BufferUsages::UNIFORM, + }); + + commands + .entity(entity) + .insert(ScreenSpaceAmbientOcclusionResources { + preprocessed_depth_texture, + ssao_noisy_texture, + screen_space_ambient_occlusion_texture: ssao_texture, + depth_differences_texture, + thickness_buffer, + }); + } +} + +#[derive(Component)] +struct SsaoPipelineId(CachedComputePipelineId); + +fn prepare_ssao_pipelines( + mut commands: Commands, + pipeline_cache: Res, + mut pipelines: ResMut>, + pipeline: Res, + views: Query<(Entity, &ScreenSpaceAmbientOcclusion, Has)>, +) { + for (entity, ssao_settings, temporal_jitter) in &views { + let pipeline_id = pipelines.specialize( + &pipeline_cache, + &pipeline, + SsaoPipelineKey { + quality_level: ssao_settings.quality_level, + temporal_jitter, + }, + ); + + commands.entity(entity).insert(SsaoPipelineId(pipeline_id)); + } +} + +#[derive(Component)] +struct SsaoBindGroups { + common_bind_group: BindGroup, + preprocess_depth_bind_group: BindGroup, + ssao_bind_group: BindGroup, + spatial_denoise_bind_group: BindGroup, +} + +fn prepare_ssao_bind_groups( + mut commands: Commands, + render_device: Res, + pipelines: Res, + view_uniforms: Res, + global_uniforms: Res, + views: Query<( + Entity, + &ScreenSpaceAmbientOcclusionResources, + &ViewPrepassTextures, + )>, +) { + let (Some(view_uniforms), Some(globals_uniforms)) = ( + view_uniforms.uniforms.binding(), + global_uniforms.buffer.binding(), + ) else { + return; + }; + + for (entity, ssao_resources, prepass_textures) in &views { + let common_bind_group = render_device.create_bind_group( + "ssao_common_bind_group", + &pipelines.common_bind_group_layout, + &BindGroupEntries::sequential(( + &pipelines.point_clamp_sampler, + &pipelines.linear_clamp_sampler, + view_uniforms.clone(), + )), + ); + + let create_depth_view = |mip_level| { + ssao_resources + .preprocessed_depth_texture + .texture + .create_view(&TextureViewDescriptor { + label: Some("ssao_preprocessed_depth_texture_mip_view"), + base_mip_level: mip_level, + format: Some(TextureFormat::R16Float), + dimension: Some(TextureViewDimension::D2), + mip_level_count: Some(1), + ..default() + }) + }; + + let preprocess_depth_bind_group = render_device.create_bind_group( + "ssao_preprocess_depth_bind_group", + &pipelines.preprocess_depth_bind_group_layout, + &BindGroupEntries::sequential(( + prepass_textures.depth_view().unwrap(), + &create_depth_view(0), + &create_depth_view(1), + &create_depth_view(2), + &create_depth_view(3), + &create_depth_view(4), + )), + ); + + let ssao_bind_group = render_device.create_bind_group( + "ssao_ssao_bind_group", + &pipelines.ssao_bind_group_layout, + &BindGroupEntries::sequential(( + &ssao_resources.preprocessed_depth_texture.default_view, + prepass_textures.normal_view().unwrap(), + &pipelines.hilbert_index_lut, + &ssao_resources.ssao_noisy_texture.default_view, + &ssao_resources.depth_differences_texture.default_view, + globals_uniforms.clone(), + ssao_resources.thickness_buffer.as_entire_binding(), + )), + ); + + let spatial_denoise_bind_group = render_device.create_bind_group( + "ssao_spatial_denoise_bind_group", + &pipelines.spatial_denoise_bind_group_layout, + &BindGroupEntries::sequential(( + &ssao_resources.ssao_noisy_texture.default_view, + &ssao_resources.depth_differences_texture.default_view, + &ssao_resources + .screen_space_ambient_occlusion_texture + .default_view, + )), + ); + + commands.entity(entity).insert(SsaoBindGroups { + common_bind_group, + preprocess_depth_bind_group, + ssao_bind_group, + spatial_denoise_bind_group, + }); + } +} + +fn generate_hilbert_index_lut() -> [[u16; 64]; 64] { + use core::array::from_fn; + from_fn(|x| from_fn(|y| hilbert_index(x as u16, y as u16))) +} + +// https://www.shadertoy.com/view/3tB3z3 +const HILBERT_WIDTH: u16 = 64; +fn hilbert_index(mut x: u16, mut y: u16) -> u16 { + let mut index = 0; + + let mut level: u16 = HILBERT_WIDTH / 2; + while level > 0 { + let region_x = (x & level > 0) as u16; + let region_y = (y & level > 0) as u16; + index += level * level * ((3 * region_x) ^ region_y); + + if region_y == 0 { + if region_x == 1 { + x = HILBERT_WIDTH - 1 - x; + y = HILBERT_WIDTH - 1 - y; + } + + mem::swap(&mut x, &mut y); + } + + level /= 2; + } + + index +} diff --git a/crates/libmarathon/src/render/pbr/ssao/preprocess_depth.wgsl b/crates/libmarathon/src/render/pbr/ssao/preprocess_depth.wgsl new file mode 100644 index 0000000..a386b09 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/ssao/preprocess_depth.wgsl @@ -0,0 +1,102 @@ +// Inputs a depth texture and outputs a MIP-chain of depths. +// +// Because SSAO's performance is bound by texture reads, this increases +// performance over using the full resolution depth for every sample. + +// Reference: https://research.nvidia.com/sites/default/files/pubs/2012-06_Scalable-Ambient-Obscurance/McGuire12SAO.pdf, section 2.2 + +#import bevy_render::view::View + +@group(0) @binding(0) var input_depth: texture_depth_2d; +@group(0) @binding(1) var preprocessed_depth_mip0: texture_storage_2d; +@group(0) @binding(2) var preprocessed_depth_mip1: texture_storage_2d; +@group(0) @binding(3) var preprocessed_depth_mip2: texture_storage_2d; +@group(0) @binding(4) var preprocessed_depth_mip3: texture_storage_2d; +@group(0) @binding(5) var preprocessed_depth_mip4: texture_storage_2d; +@group(1) @binding(0) var point_clamp_sampler: sampler; +@group(1) @binding(1) var linear_clamp_sampler: sampler; +@group(1) @binding(2) var view: View; + + +// Using 4 depths from the previous MIP, compute a weighted average for the depth of the current MIP +fn weighted_average(depth0: f32, depth1: f32, depth2: f32, depth3: f32) -> f32 { + let depth_range_scale_factor = 0.75; + let effect_radius = depth_range_scale_factor * 0.5 * 1.457; + let falloff_range = 0.615 * effect_radius; + let falloff_from = effect_radius * (1.0 - 0.615); + let falloff_mul = -1.0 / falloff_range; + let falloff_add = falloff_from / falloff_range + 1.0; + + let min_depth = min(min(depth0, depth1), min(depth2, depth3)); + let weight0 = saturate((depth0 - min_depth) * falloff_mul + falloff_add); + let weight1 = saturate((depth1 - min_depth) * falloff_mul + falloff_add); + let weight2 = saturate((depth2 - min_depth) * falloff_mul + falloff_add); + let weight3 = saturate((depth3 - min_depth) * falloff_mul + falloff_add); + let weight_total = weight0 + weight1 + weight2 + weight3; + + return ((weight0 * depth0) + (weight1 * depth1) + (weight2 * depth2) + (weight3 * depth3)) / weight_total; +} + +// Used to share the depths from the previous MIP level between all invocations in a workgroup +var previous_mip_depth: array, 8>; + +@compute +@workgroup_size(8, 8, 1) +fn preprocess_depth(@builtin(global_invocation_id) global_id: vec3, @builtin(local_invocation_id) local_id: vec3) { + let base_coordinates = vec2(global_id.xy); + + // MIP 0 - Copy 4 texels from the input depth (per invocation, 8x8 invocations per workgroup) + let pixel_coordinates0 = base_coordinates * 2i; + let pixel_coordinates1 = pixel_coordinates0 + vec2(1i, 0i); + let pixel_coordinates2 = pixel_coordinates0 + vec2(0i, 1i); + let pixel_coordinates3 = pixel_coordinates0 + vec2(1i, 1i); + let depths_uv = vec2(pixel_coordinates0) / view.viewport.zw; + let depths = textureGather(0, input_depth, point_clamp_sampler, depths_uv, vec2(1i, 1i)); + textureStore(preprocessed_depth_mip0, pixel_coordinates0, vec4(depths.w, 0.0, 0.0, 0.0)); + textureStore(preprocessed_depth_mip0, pixel_coordinates1, vec4(depths.z, 0.0, 0.0, 0.0)); + textureStore(preprocessed_depth_mip0, pixel_coordinates2, vec4(depths.x, 0.0, 0.0, 0.0)); + textureStore(preprocessed_depth_mip0, pixel_coordinates3, vec4(depths.y, 0.0, 0.0, 0.0)); + + // MIP 1 - Weighted average of MIP 0's depth values (per invocation, 8x8 invocations per workgroup) + let depth_mip1 = weighted_average(depths.w, depths.z, depths.x, depths.y); + textureStore(preprocessed_depth_mip1, base_coordinates, vec4(depth_mip1, 0.0, 0.0, 0.0)); + previous_mip_depth[local_id.x][local_id.y] = depth_mip1; + + workgroupBarrier(); + + // MIP 2 - Weighted average of MIP 1's depth values (per invocation, 4x4 invocations per workgroup) + if all(local_id.xy % vec2(2u) == vec2(0u)) { + let depth0 = previous_mip_depth[local_id.x + 0u][local_id.y + 0u]; + let depth1 = previous_mip_depth[local_id.x + 1u][local_id.y + 0u]; + let depth2 = previous_mip_depth[local_id.x + 0u][local_id.y + 1u]; + let depth3 = previous_mip_depth[local_id.x + 1u][local_id.y + 1u]; + let depth_mip2 = weighted_average(depth0, depth1, depth2, depth3); + textureStore(preprocessed_depth_mip2, base_coordinates / 2i, vec4(depth_mip2, 0.0, 0.0, 0.0)); + previous_mip_depth[local_id.x][local_id.y] = depth_mip2; + } + + workgroupBarrier(); + + // MIP 3 - Weighted average of MIP 2's depth values (per invocation, 2x2 invocations per workgroup) + if all(local_id.xy % vec2(4u) == vec2(0u)) { + let depth0 = previous_mip_depth[local_id.x + 0u][local_id.y + 0u]; + let depth1 = previous_mip_depth[local_id.x + 2u][local_id.y + 0u]; + let depth2 = previous_mip_depth[local_id.x + 0u][local_id.y + 2u]; + let depth3 = previous_mip_depth[local_id.x + 2u][local_id.y + 2u]; + let depth_mip3 = weighted_average(depth0, depth1, depth2, depth3); + textureStore(preprocessed_depth_mip3, base_coordinates / 4i, vec4(depth_mip3, 0.0, 0.0, 0.0)); + previous_mip_depth[local_id.x][local_id.y] = depth_mip3; + } + + workgroupBarrier(); + + // MIP 4 - Weighted average of MIP 3's depth values (per invocation, 1 invocation per workgroup) + if all(local_id.xy % vec2(8u) == vec2(0u)) { + let depth0 = previous_mip_depth[local_id.x + 0u][local_id.y + 0u]; + let depth1 = previous_mip_depth[local_id.x + 4u][local_id.y + 0u]; + let depth2 = previous_mip_depth[local_id.x + 0u][local_id.y + 4u]; + let depth3 = previous_mip_depth[local_id.x + 4u][local_id.y + 4u]; + let depth_mip4 = weighted_average(depth0, depth1, depth2, depth3); + textureStore(preprocessed_depth_mip4, base_coordinates / 8i, vec4(depth_mip4, 0.0, 0.0, 0.0)); + } +} diff --git a/crates/libmarathon/src/render/pbr/ssao/spatial_denoise.wgsl b/crates/libmarathon/src/render/pbr/ssao/spatial_denoise.wgsl new file mode 100644 index 0000000..1c04f9c --- /dev/null +++ b/crates/libmarathon/src/render/pbr/ssao/spatial_denoise.wgsl @@ -0,0 +1,85 @@ +// 3x3 bilaterial filter (edge-preserving blur) +// https://people.csail.mit.edu/sparis/bf_course/course_notes.pdf + +// Note: Does not use the Gaussian kernel part of a typical bilateral blur +// From the paper: "use the information gathered on a neighborhood of 4 × 4 using a bilateral filter for +// reconstruction, using _uniform_ convolution weights" + +// Note: The paper does a 4x4 (not quite centered) filter, offset by +/- 1 pixel every other frame +// XeGTAO does a 3x3 filter, on two pixels at a time per compute thread, applied twice +// We do a 3x3 filter, on 1 pixel per compute thread, applied once + +#import bevy_render::view::View + +@group(0) @binding(0) var ambient_occlusion_noisy: texture_2d; +@group(0) @binding(1) var depth_differences: texture_2d; +@group(0) @binding(2) var ambient_occlusion: texture_storage_2d; +@group(1) @binding(0) var point_clamp_sampler: sampler; +@group(1) @binding(1) var linear_clamp_sampler: sampler; +@group(1) @binding(2) var view: View; + +@compute +@workgroup_size(8, 8, 1) +fn spatial_denoise(@builtin(global_invocation_id) global_id: vec3) { + let pixel_coordinates = vec2(global_id.xy); + let uv = vec2(pixel_coordinates) / view.viewport.zw; + + let edges0 = textureGather(0, depth_differences, point_clamp_sampler, uv); + let edges1 = textureGather(0, depth_differences, point_clamp_sampler, uv, vec2(2i, 0i)); + let edges2 = textureGather(0, depth_differences, point_clamp_sampler, uv, vec2(1i, 2i)); + let visibility0 = textureGather(0, ambient_occlusion_noisy, point_clamp_sampler, uv); + let visibility1 = textureGather(0, ambient_occlusion_noisy, point_clamp_sampler, uv, vec2(2i, 0i)); + let visibility2 = textureGather(0, ambient_occlusion_noisy, point_clamp_sampler, uv, vec2(0i, 2i)); + let visibility3 = textureGather(0, ambient_occlusion_noisy, point_clamp_sampler, uv, vec2(2i, 2i)); + + let left_edges = unpack4x8unorm(edges0.x); + let right_edges = unpack4x8unorm(edges1.x); + let top_edges = unpack4x8unorm(edges0.z); + let bottom_edges = unpack4x8unorm(edges2.w); + var center_edges = unpack4x8unorm(edges0.y); + center_edges *= vec4(left_edges.y, right_edges.x, top_edges.w, bottom_edges.z); + + let center_weight = 1.2; + let left_weight = center_edges.x; + let right_weight = center_edges.y; + let top_weight = center_edges.z; + let bottom_weight = center_edges.w; + let top_left_weight = 0.425 * (top_weight * top_edges.x + left_weight * left_edges.z); + let top_right_weight = 0.425 * (top_weight * top_edges.y + right_weight * right_edges.z); + let bottom_left_weight = 0.425 * (bottom_weight * bottom_edges.x + left_weight * left_edges.w); + let bottom_right_weight = 0.425 * (bottom_weight * bottom_edges.y + right_weight * right_edges.w); + + let center_visibility = visibility0.y; + let left_visibility = visibility0.x; + let right_visibility = visibility0.z; + let top_visibility = visibility1.x; + let bottom_visibility = visibility2.z; + let top_left_visibility = visibility0.w; + let top_right_visibility = visibility1.w; + let bottom_left_visibility = visibility2.w; + let bottom_right_visibility = visibility3.w; + + var sum = center_visibility; + sum += left_visibility * left_weight; + sum += right_visibility * right_weight; + sum += top_visibility * top_weight; + sum += bottom_visibility * bottom_weight; + sum += top_left_visibility * top_left_weight; + sum += top_right_visibility * top_right_weight; + sum += bottom_left_visibility * bottom_left_weight; + sum += bottom_right_visibility * bottom_right_weight; + + var sum_weight = center_weight; + sum_weight += left_weight; + sum_weight += right_weight; + sum_weight += top_weight; + sum_weight += bottom_weight; + sum_weight += top_left_weight; + sum_weight += top_right_weight; + sum_weight += bottom_left_weight; + sum_weight += bottom_right_weight; + + let denoised_visibility = sum / sum_weight; + + textureStore(ambient_occlusion, pixel_coordinates, vec4(denoised_visibility, 0.0, 0.0, 0.0)); +} diff --git a/crates/libmarathon/src/render/pbr/ssao/ssao.wgsl b/crates/libmarathon/src/render/pbr/ssao/ssao.wgsl new file mode 100644 index 0000000..ac64d56 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/ssao/ssao.wgsl @@ -0,0 +1,200 @@ +// Visibility Bitmask Ambient Occlusion (VBAO) +// Paper: ttps://ar5iv.labs.arxiv.org/html/2301.11376 + +// Source code heavily based on XeGTAO v1.30 from Intel +// https://github.com/GameTechDev/XeGTAO/blob/0d177ce06bfa642f64d8af4de1197ad1bcb862d4/Source/Rendering/Shaders/XeGTAO.hlsli + +// Source code based on the existing XeGTAO implementation and +// https://cdrinmatane.github.io/posts/ssaovb-code/ + +// Source code base on SSRT3 implementation +// https://github.com/cdrinmatane/SSRT3 + +#import bevy_render::maths::fast_acos + +#import bevy_render::{ + view::View, + globals::Globals, + maths::{PI, HALF_PI}, +} + +@group(0) @binding(0) var preprocessed_depth: texture_2d; +@group(0) @binding(1) var normals: texture_2d; +@group(0) @binding(2) var hilbert_index_lut: texture_2d; +@group(0) @binding(3) var ambient_occlusion: texture_storage_2d; +@group(0) @binding(4) var depth_differences: texture_storage_2d; +@group(0) @binding(5) var globals: Globals; +@group(0) @binding(6) var thickness: f32; +@group(1) @binding(0) var point_clamp_sampler: sampler; +@group(1) @binding(1) var linear_clamp_sampler: sampler; +@group(1) @binding(2) var view: View; + +fn load_noise(pixel_coordinates: vec2) -> vec2 { + var index = textureLoad(hilbert_index_lut, pixel_coordinates % 64, 0).r; + +#ifdef TEMPORAL_JITTER + index += 288u * (globals.frame_count % 64u); +#endif + + // R2 sequence - http://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences + return fract(0.5 + f32(index) * vec2(0.75487766624669276005, 0.5698402909980532659114)); +} + +// Calculate differences in depth between neighbor pixels (later used by the spatial denoiser pass to preserve object edges) +fn calculate_neighboring_depth_differences(pixel_coordinates: vec2) -> f32 { + // Sample the pixel's depth and 4 depths around it + let uv = vec2(pixel_coordinates) / view.viewport.zw; + let depths_upper_left = textureGather(0, preprocessed_depth, point_clamp_sampler, uv); + let depths_bottom_right = textureGather(0, preprocessed_depth, point_clamp_sampler, uv, vec2(1i, 1i)); + let depth_center = depths_upper_left.y; + let depth_left = depths_upper_left.x; + let depth_top = depths_upper_left.z; + let depth_bottom = depths_bottom_right.x; + let depth_right = depths_bottom_right.z; + + // Calculate the depth differences (large differences represent object edges) + var edge_info = vec4(depth_left, depth_right, depth_top, depth_bottom) - depth_center; + let slope_left_right = (edge_info.y - edge_info.x) * 0.5; + let slope_top_bottom = (edge_info.w - edge_info.z) * 0.5; + let edge_info_slope_adjusted = edge_info + vec4(slope_left_right, -slope_left_right, slope_top_bottom, -slope_top_bottom); + edge_info = min(abs(edge_info), abs(edge_info_slope_adjusted)); + let bias = 0.25; // Using the bias and then saturating nudges the values a bit + let scale = depth_center * 0.011; // Weight the edges by their distance from the camera + edge_info = saturate((1.0 + bias) - edge_info / scale); // Apply the bias and scale, and invert edge_info so that small values become large, and vice versa + + // Pack the edge info into the texture + let edge_info_packed = vec4(pack4x8unorm(edge_info), 0u, 0u, 0u); + textureStore(depth_differences, pixel_coordinates, edge_info_packed); + + return depth_center; +} + +fn load_normal_view_space(uv: vec2) -> vec3 { + var world_normal = textureSampleLevel(normals, point_clamp_sampler, uv, 0.0).xyz; + world_normal = (world_normal * 2.0) - 1.0; + let view_from_world = mat3x3( + view.view_from_world[0].xyz, + view.view_from_world[1].xyz, + view.view_from_world[2].xyz, + ); + return view_from_world * world_normal; +} + +fn reconstruct_view_space_position(depth: f32, uv: vec2) -> vec3 { + let clip_xy = vec2(uv.x * 2.0 - 1.0, 1.0 - 2.0 * uv.y); + let t = view.view_from_clip * vec4(clip_xy, depth, 1.0); + let view_xyz = t.xyz / t.w; + return view_xyz; +} + +fn load_and_reconstruct_view_space_position(uv: vec2, sample_mip_level: f32) -> vec3 { + let depth = textureSampleLevel(preprocessed_depth, linear_clamp_sampler, uv, sample_mip_level).r; + return reconstruct_view_space_position(depth, uv); +} + +fn updateSectors( + min_horizon: f32, + max_horizon: f32, + samples_per_slice: f32, + bitmask: u32, +) -> u32 { + let start_horizon = u32(min_horizon * samples_per_slice); + let angle_horizon = u32(ceil((max_horizon - min_horizon) * samples_per_slice)); + + return insertBits(bitmask, 0xFFFFFFFFu, start_horizon, angle_horizon); +} + +fn processSample( + delta_position: vec3, + view_vec: vec3, + sampling_direction: f32, + n: vec2, + samples_per_slice: f32, + bitmask: ptr, +) { + let delta_position_back_face = delta_position - view_vec * thickness; + + var front_back_horizon = vec2( + fast_acos(dot(normalize(delta_position), view_vec)), + fast_acos(dot(normalize(delta_position_back_face), view_vec)), + ); + + front_back_horizon = saturate(fma(vec2(sampling_direction), -front_back_horizon, n)); + front_back_horizon = select(front_back_horizon.xy, front_back_horizon.yx, sampling_direction >= 0.0); + + *bitmask = updateSectors(front_back_horizon.x, front_back_horizon.y, samples_per_slice, *bitmask); +} + +@compute +@workgroup_size(8, 8, 1) +fn ssao(@builtin(global_invocation_id) global_id: vec3) { + let slice_count = f32(#SLICE_COUNT); + let samples_per_slice_side = f32(#SAMPLES_PER_SLICE_SIDE); + let effect_radius = 0.5 * 1.457; + let falloff_range = 0.615 * effect_radius; + let falloff_from = effect_radius * (1.0 - 0.615); + let falloff_mul = -1.0 / falloff_range; + let falloff_add = falloff_from / falloff_range + 1.0; + + let pixel_coordinates = vec2(global_id.xy); + let uv = (vec2(pixel_coordinates) + 0.5) / view.viewport.zw; + + var pixel_depth = calculate_neighboring_depth_differences(pixel_coordinates); + pixel_depth += 0.00001; // Avoid depth precision issues + + let pixel_position = reconstruct_view_space_position(pixel_depth, uv); + let pixel_normal = load_normal_view_space(uv); + let view_vec = normalize(-pixel_position); + + let noise = load_noise(pixel_coordinates); + let sample_scale = (-0.5 * effect_radius * view.clip_from_view[0][0]) / pixel_position.z; + + var visibility = 0.0; + var occluded_sample_count = 0u; + for (var slice_t = 0.0; slice_t < slice_count; slice_t += 1.0) { + let slice = slice_t + noise.x; + let phi = (PI / slice_count) * slice; + let omega = vec2(cos(phi), sin(phi)); + + let direction = vec3(omega.xy, 0.0); + let orthographic_direction = direction - (dot(direction, view_vec) * view_vec); + let axis = cross(direction, view_vec); + let projected_normal = pixel_normal - axis * dot(pixel_normal, axis); + let projected_normal_length = length(projected_normal); + + let sign_norm = sign(dot(orthographic_direction, projected_normal)); + let cos_norm = saturate(dot(projected_normal, view_vec) / projected_normal_length); + let n = vec2((HALF_PI - sign_norm * fast_acos(cos_norm)) * (1.0 / PI)); + + var bitmask = 0u; + + let sample_mul = vec2(omega.x, -omega.y) * sample_scale; + for (var sample_t = 0.0; sample_t < samples_per_slice_side; sample_t += 1.0) { + var sample_noise = (slice_t + sample_t * samples_per_slice_side) * 0.6180339887498948482; + sample_noise = fract(noise.y + sample_noise); + + var s = (sample_t + sample_noise) / samples_per_slice_side; + s *= s; // https://github.com/GameTechDev/XeGTAO#sample-distribution + let sample = s * sample_mul; + + // * view.viewport.zw gets us from [0, 1] to [0, viewport_size], which is needed for this to get the correct mip levels + let sample_mip_level = clamp(log2(length(sample * view.viewport.zw)) - 3.3, 0.0, 5.0); // https://github.com/GameTechDev/XeGTAO#memory-bandwidth-bottleneck + let sample_position_1 = load_and_reconstruct_view_space_position(uv + sample, sample_mip_level); + let sample_position_2 = load_and_reconstruct_view_space_position(uv - sample, sample_mip_level); + + let sample_difference_1 = sample_position_1 - pixel_position; + let sample_difference_2 = sample_position_2 - pixel_position; + + processSample(sample_difference_1, view_vec, -1.0, n, samples_per_slice_side * 2.0, &bitmask); + processSample(sample_difference_2, view_vec, 1.0, n, samples_per_slice_side * 2.0, &bitmask); + } + + occluded_sample_count += countOneBits(bitmask); + } + + visibility = 1.0 - f32(occluded_sample_count) / (slice_count * 2.0 * samples_per_slice_side); + + visibility = clamp(visibility, 0.03, 1.0); + + textureStore(ambient_occlusion, pixel_coordinates, vec4(visibility, 0.0, 0.0, 0.0)); +} diff --git a/crates/libmarathon/src/render/pbr/ssao/ssao_utils.wgsl b/crates/libmarathon/src/render/pbr/ssao/ssao_utils.wgsl new file mode 100644 index 0000000..be19fa6 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/ssao/ssao_utils.wgsl @@ -0,0 +1,13 @@ +#define_import_path bevy_pbr::ssao_utils + +#import bevy_render::maths::{PI, HALF_PI} + +// Approximates single-bounce ambient occlusion to multi-bounce ambient occlusion +// https://blog.selfshadow.com/publications/s2016-shading-course/activision/s2016_pbs_activision_occlusion.pdf#page=78 +fn ssao_multibounce(visibility: f32, base_color: vec3) -> vec3 { + let a = 2.0404 * base_color - 0.3324; + let b = -4.7951 * base_color + 0.6417; + let c = 2.7552 * base_color + 0.6903; + let x = vec3(visibility); + return max(x, ((x * a + b) * x + c) * x); +} diff --git a/crates/libmarathon/src/render/pbr/ssr/mod.rs b/crates/libmarathon/src/render/pbr/ssr/mod.rs new file mode 100644 index 0000000..93ed839 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/ssr/mod.rs @@ -0,0 +1,578 @@ +//! Screen space reflections implemented via raymarching. + +use bevy_app::{App, Plugin}; +use bevy_asset::{load_embedded_asset, AssetServer, Handle}; +use crate::render::{ + core_3d::{ + graph::{Core3d, Node3d}, + DEPTH_TEXTURE_SAMPLING_SUPPORTED, + }, + prepass::{DeferredPrepass, DepthPrepass, MotionVectorPrepass, NormalPrepass}, + FullscreenShader, +}; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::{ + component::Component, + entity::Entity, + query::{Has, QueryItem, With}, + reflect::ReflectComponent, + resource::Resource, + schedule::IntoScheduleConfigs as _, + system::{lifetimeless::Read, Commands, Query, Res, ResMut}, + world::World, +}; +use bevy_image::BevyDefault as _; +use bevy_light::EnvironmentMapLight; +use bevy_reflect::{std_traits::ReflectDefault, Reflect}; +use crate::render::{ + diagnostic::RecordDiagnostics, + extract_component::{ExtractComponent, ExtractComponentPlugin}, + render_graph::{ + NodeRunError, RenderGraph, RenderGraphContext, RenderGraphExt, ViewNode, ViewNodeRunner, + }, + render_resource::{ + binding_types, AddressMode, BindGroupEntries, BindGroupLayout, BindGroupLayoutEntries, + CachedRenderPipelineId, ColorTargetState, ColorWrites, DynamicUniformBuffer, FilterMode, + FragmentState, Operations, PipelineCache, RenderPassColorAttachment, RenderPassDescriptor, + RenderPipelineDescriptor, Sampler, SamplerBindingType, SamplerDescriptor, ShaderStages, + ShaderType, SpecializedRenderPipeline, SpecializedRenderPipelines, TextureFormat, + TextureSampleType, + }, + renderer::{RenderAdapter, RenderContext, RenderDevice, RenderQueue}, + view::{ExtractedView, Msaa, ViewTarget, ViewUniformOffset}, + Render, RenderApp, RenderStartup, RenderSystems, +}; +use bevy_shader::{load_shader_library, Shader}; +use bevy_utils::{once, prelude::default}; +use tracing::info; + +use crate::render::pbr::{ + binding_arrays_are_usable, graph::NodePbr, MeshPipelineViewLayoutKey, MeshPipelineViewLayouts, + MeshViewBindGroup, RenderViewLightProbes, ViewEnvironmentMapUniformOffset, + ViewFogUniformOffset, ViewLightProbesUniformOffset, ViewLightsUniformOffset, +}; + +/// Enables screen-space reflections for a camera. +/// +/// Screen-space reflections are currently only supported with deferred rendering. +pub struct ScreenSpaceReflectionsPlugin; + +/// Add this component to a camera to enable *screen-space reflections* (SSR). +/// +/// Screen-space reflections currently require deferred rendering in order to +/// appear. Therefore, they also need the [`DepthPrepass`] and [`DeferredPrepass`] +/// components, which are inserted automatically. +/// +/// SSR currently performs no roughness filtering for glossy reflections, so +/// only very smooth surfaces will reflect objects in screen space. You can +/// adjust the `perceptual_roughness_threshold` in order to tune the threshold +/// below which screen-space reflections will be traced. +/// +/// As with all screen-space techniques, SSR can only reflect objects on screen. +/// When objects leave the camera, they will disappear from reflections. +/// An alternative that doesn't suffer from this problem is the combination of +/// a [`LightProbe`](bevy_light::LightProbe) and [`EnvironmentMapLight`]. The advantage of SSR is +/// that it can reflect all objects, not just static ones. +/// +/// SSR is an approximation technique and produces artifacts in some situations. +/// Hand-tuning the settings in this component will likely be useful. +/// +/// Screen-space reflections are presently unsupported on WebGL 2 because of a +/// bug whereby Naga doesn't generate correct GLSL when sampling depth buffers, +/// which is required for screen-space raymarching. +#[derive(Clone, Copy, Component, Reflect)] +#[reflect(Component, Default, Clone)] +#[require(DepthPrepass, DeferredPrepass)] +#[doc(alias = "Ssr")] +pub struct ScreenSpaceReflections { + /// The maximum PBR roughness level that will enable screen space + /// reflections. + pub perceptual_roughness_threshold: f32, + + /// When marching the depth buffer, we only have 2.5D information and don't + /// know how thick surfaces are. We shall assume that the depth buffer + /// fragments are cuboids with a constant thickness defined by this + /// parameter. + pub thickness: f32, + + /// The number of steps to be taken at regular intervals to find an initial + /// intersection. Must not be zero. + /// + /// Higher values result in higher-quality reflections, because the + /// raymarching shader is less likely to miss objects. However, they take + /// more GPU time. + pub linear_steps: u32, + + /// Exponent to be applied in the linear part of the march. + /// + /// A value of 1.0 will result in equidistant steps, and higher values will + /// compress the earlier steps, and expand the later ones. This might be + /// desirable in order to get more detail close to objects. + /// + /// For optimal performance, this should be a small unsigned integer, such + /// as 1 or 2. + pub linear_march_exponent: f32, + + /// Number of steps in a bisection (binary search) to perform once the + /// linear search has found an intersection. Helps narrow down the hit, + /// increasing the chance of the secant method finding an accurate hit + /// point. + pub bisection_steps: u32, + + /// Approximate the root position using the secant method—by solving for + /// line-line intersection between the ray approach rate and the surface + /// gradient. + pub use_secant: bool, +} + +/// A version of [`ScreenSpaceReflections`] for upload to the GPU. +/// +/// For more information on these fields, see the corresponding documentation in +/// [`ScreenSpaceReflections`]. +#[derive(Clone, Copy, Component, ShaderType)] +pub struct ScreenSpaceReflectionsUniform { + perceptual_roughness_threshold: f32, + thickness: f32, + linear_steps: u32, + linear_march_exponent: f32, + bisection_steps: u32, + /// A boolean converted to a `u32`. + use_secant: u32, +} + +/// The node in the render graph that traces screen space reflections. +#[derive(Default)] +pub struct ScreenSpaceReflectionsNode; + +/// Identifies which screen space reflections render pipeline a view needs. +#[derive(Component, Deref, DerefMut)] +pub struct ScreenSpaceReflectionsPipelineId(pub CachedRenderPipelineId); + +/// Information relating to the render pipeline for the screen space reflections +/// shader. +#[derive(Resource)] +pub struct ScreenSpaceReflectionsPipeline { + mesh_view_layouts: MeshPipelineViewLayouts, + color_sampler: Sampler, + depth_linear_sampler: Sampler, + depth_nearest_sampler: Sampler, + bind_group_layout: BindGroupLayout, + binding_arrays_are_usable: bool, + fullscreen_shader: FullscreenShader, + fragment_shader: Handle, +} + +/// A GPU buffer that stores the screen space reflection settings for each view. +#[derive(Resource, Default, Deref, DerefMut)] +pub struct ScreenSpaceReflectionsBuffer(pub DynamicUniformBuffer); + +/// A component that stores the offset within the +/// [`ScreenSpaceReflectionsBuffer`] for each view. +#[derive(Component, Default, Deref, DerefMut)] +pub struct ViewScreenSpaceReflectionsUniformOffset(u32); + +/// Identifies a specific configuration of the SSR pipeline shader. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub struct ScreenSpaceReflectionsPipelineKey { + mesh_pipeline_view_key: MeshPipelineViewLayoutKey, + is_hdr: bool, + has_environment_maps: bool, +} + +impl Plugin for ScreenSpaceReflectionsPlugin { + fn build(&self, app: &mut App) { + load_shader_library!(app, "ssr.wgsl"); + load_shader_library!(app, "raymarch.wgsl"); + + app.add_plugins(ExtractComponentPlugin::::default()); + + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + render_app + .init_resource::() + .init_resource::>() + .add_systems( + RenderStartup, + ( + init_screen_space_reflections_pipeline, + add_screen_space_reflections_render_graph_edges, + ), + ) + .add_systems(Render, prepare_ssr_pipelines.in_set(RenderSystems::Prepare)) + .add_systems( + Render, + prepare_ssr_settings.in_set(RenderSystems::PrepareResources), + ) + // Note: we add this node here but then we add edges in + // `add_screen_space_reflections_render_graph_edges`. + .add_render_graph_node::>( + Core3d, + NodePbr::ScreenSpaceReflections, + ); + } +} + +fn add_screen_space_reflections_render_graph_edges(mut render_graph: ResMut) { + let subgraph = render_graph.sub_graph_mut(Core3d); + + subgraph.add_node_edge(NodePbr::ScreenSpaceReflections, Node3d::MainOpaquePass); + + if subgraph + .get_node_state(NodePbr::DeferredLightingPass) + .is_ok() + { + subgraph.add_node_edge( + NodePbr::DeferredLightingPass, + NodePbr::ScreenSpaceReflections, + ); + } +} + +impl Default for ScreenSpaceReflections { + // Reasonable default values. + // + // These are from + // . + fn default() -> Self { + Self { + perceptual_roughness_threshold: 0.1, + linear_steps: 16, + bisection_steps: 4, + use_secant: true, + thickness: 0.25, + linear_march_exponent: 1.0, + } + } +} + +impl ViewNode for ScreenSpaceReflectionsNode { + type ViewQuery = ( + Read, + Read, + Read, + Read, + Read, + Read, + Read, + Read, + Read, + ); + + fn run<'w>( + &self, + _: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + ( + view_target, + view_uniform_offset, + view_lights_offset, + view_fog_offset, + view_light_probes_offset, + view_ssr_offset, + view_environment_map_offset, + view_bind_group, + ssr_pipeline_id, + ): QueryItem<'w, '_, Self::ViewQuery>, + world: &'w World, + ) -> Result<(), NodeRunError> { + // Grab the render pipeline. + let pipeline_cache = world.resource::(); + let Some(render_pipeline) = pipeline_cache.get_render_pipeline(**ssr_pipeline_id) else { + return Ok(()); + }; + + let diagnostics = render_context.diagnostic_recorder(); + + // Set up a standard pair of postprocessing textures. + let postprocess = view_target.post_process_write(); + + // Create the bind group for this view. + let ssr_pipeline = world.resource::(); + let ssr_bind_group = render_context.render_device().create_bind_group( + "SSR bind group", + &ssr_pipeline.bind_group_layout, + &BindGroupEntries::sequential(( + postprocess.source, + &ssr_pipeline.color_sampler, + &ssr_pipeline.depth_linear_sampler, + &ssr_pipeline.depth_nearest_sampler, + )), + ); + + // Build the SSR render pass. + let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some("ssr"), + color_attachments: &[Some(RenderPassColorAttachment { + view: postprocess.destination, + depth_slice: None, + resolve_target: None, + ops: Operations::default(), + })], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + }); + let pass_span = diagnostics.pass_span(&mut render_pass, "ssr"); + + // Set bind groups. + render_pass.set_render_pipeline(render_pipeline); + render_pass.set_bind_group( + 0, + &view_bind_group.main, + &[ + view_uniform_offset.offset, + view_lights_offset.offset, + view_fog_offset.offset, + **view_light_probes_offset, + **view_ssr_offset, + **view_environment_map_offset, + ], + ); + render_pass.set_bind_group(1, &view_bind_group.binding_array, &[]); + + // Perform the SSR render pass. + render_pass.set_bind_group(2, &ssr_bind_group, &[]); + render_pass.draw(0..3, 0..1); + + pass_span.end(&mut render_pass); + + Ok(()) + } +} + +pub fn init_screen_space_reflections_pipeline( + mut commands: Commands, + render_device: Res, + render_adapter: Res, + mesh_view_layouts: Res, + fullscreen_shader: Res, + asset_server: Res, +) { + // Create the bind group layout. + let bind_group_layout = render_device.create_bind_group_layout( + "SSR bind group layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::FRAGMENT, + ( + binding_types::texture_2d(TextureSampleType::Float { filterable: true }), + binding_types::sampler(SamplerBindingType::Filtering), + binding_types::sampler(SamplerBindingType::Filtering), + binding_types::sampler(SamplerBindingType::NonFiltering), + ), + ), + ); + + // Create the samplers we need. + + let color_sampler = render_device.create_sampler(&SamplerDescriptor { + label: "SSR color sampler".into(), + address_mode_u: AddressMode::ClampToEdge, + address_mode_v: AddressMode::ClampToEdge, + mag_filter: FilterMode::Linear, + min_filter: FilterMode::Linear, + ..default() + }); + + let depth_linear_sampler = render_device.create_sampler(&SamplerDescriptor { + label: "SSR depth linear sampler".into(), + address_mode_u: AddressMode::ClampToEdge, + address_mode_v: AddressMode::ClampToEdge, + mag_filter: FilterMode::Linear, + min_filter: FilterMode::Linear, + ..default() + }); + + let depth_nearest_sampler = render_device.create_sampler(&SamplerDescriptor { + label: "SSR depth nearest sampler".into(), + address_mode_u: AddressMode::ClampToEdge, + address_mode_v: AddressMode::ClampToEdge, + mag_filter: FilterMode::Nearest, + min_filter: FilterMode::Nearest, + ..default() + }); + + commands.insert_resource(ScreenSpaceReflectionsPipeline { + mesh_view_layouts: mesh_view_layouts.clone(), + color_sampler, + depth_linear_sampler, + depth_nearest_sampler, + bind_group_layout, + binding_arrays_are_usable: binding_arrays_are_usable(&render_device, &render_adapter), + fullscreen_shader: fullscreen_shader.clone(), + // Even though ssr was loaded using load_shader_library, we can still access it like a + // normal embedded asset (so we can use it as both a library or a kernel). + fragment_shader: load_embedded_asset!(asset_server.as_ref(), "ssr.wgsl"), + }); +} + +/// Sets up screen space reflection pipelines for each applicable view. +pub fn prepare_ssr_pipelines( + mut commands: Commands, + pipeline_cache: Res, + mut pipelines: ResMut>, + ssr_pipeline: Res, + views: Query< + ( + Entity, + &ExtractedView, + Has>, + Has, + Has, + ), + ( + With, + With, + With, + ), + >, +) { + for ( + entity, + extracted_view, + has_environment_maps, + has_normal_prepass, + has_motion_vector_prepass, + ) in &views + { + // SSR is only supported in the deferred pipeline, which has no MSAA + // support. Thus we can assume MSAA is off. + let mut mesh_pipeline_view_key = MeshPipelineViewLayoutKey::from(Msaa::Off) + | MeshPipelineViewLayoutKey::DEPTH_PREPASS + | MeshPipelineViewLayoutKey::DEFERRED_PREPASS; + mesh_pipeline_view_key.set( + MeshPipelineViewLayoutKey::NORMAL_PREPASS, + has_normal_prepass, + ); + mesh_pipeline_view_key.set( + MeshPipelineViewLayoutKey::MOTION_VECTOR_PREPASS, + has_motion_vector_prepass, + ); + + // Build the pipeline. + let pipeline_id = pipelines.specialize( + &pipeline_cache, + &ssr_pipeline, + ScreenSpaceReflectionsPipelineKey { + mesh_pipeline_view_key, + is_hdr: extracted_view.hdr, + has_environment_maps, + }, + ); + + // Note which pipeline ID was used. + commands + .entity(entity) + .insert(ScreenSpaceReflectionsPipelineId(pipeline_id)); + } +} + +/// Gathers up screen space reflection settings for each applicable view and +/// writes them into a GPU buffer. +pub fn prepare_ssr_settings( + mut commands: Commands, + views: Query<(Entity, Option<&ScreenSpaceReflectionsUniform>), With>, + mut ssr_settings_buffer: ResMut, + render_device: Res, + render_queue: Res, +) { + let Some(mut writer) = + ssr_settings_buffer.get_writer(views.iter().len(), &render_device, &render_queue) + else { + return; + }; + + for (view, ssr_uniform) in views.iter() { + let uniform_offset = match ssr_uniform { + None => 0, + Some(ssr_uniform) => writer.write(ssr_uniform), + }; + commands + .entity(view) + .insert(ViewScreenSpaceReflectionsUniformOffset(uniform_offset)); + } +} + +impl ExtractComponent for ScreenSpaceReflections { + type QueryData = Read; + + type QueryFilter = (); + + type Out = ScreenSpaceReflectionsUniform; + + fn extract_component(settings: QueryItem<'_, '_, Self::QueryData>) -> Option { + if !DEPTH_TEXTURE_SAMPLING_SUPPORTED { + once!(info!( + "Disabling screen-space reflections on this platform because depth textures \ + aren't supported correctly" + )); + return None; + } + + Some((*settings).into()) + } +} + +impl SpecializedRenderPipeline for ScreenSpaceReflectionsPipeline { + type Key = ScreenSpaceReflectionsPipelineKey; + + fn specialize(&self, key: Self::Key) -> RenderPipelineDescriptor { + let layout = self + .mesh_view_layouts + .get_view_layout(key.mesh_pipeline_view_key); + let layout = vec![ + layout.main_layout.clone(), + layout.binding_array_layout.clone(), + self.bind_group_layout.clone(), + ]; + + let mut shader_defs = vec![ + "DEPTH_PREPASS".into(), + "DEFERRED_PREPASS".into(), + "SCREEN_SPACE_REFLECTIONS".into(), + ]; + + if key.has_environment_maps { + shader_defs.push("ENVIRONMENT_MAP".into()); + } + + if self.binding_arrays_are_usable { + shader_defs.push("MULTIPLE_LIGHT_PROBES_IN_ARRAY".into()); + } + + RenderPipelineDescriptor { + label: Some("SSR pipeline".into()), + layout, + vertex: self.fullscreen_shader.to_vertex_state(), + fragment: Some(FragmentState { + shader: self.fragment_shader.clone(), + shader_defs, + targets: vec![Some(ColorTargetState { + format: if key.is_hdr { + ViewTarget::TEXTURE_FORMAT_HDR + } else { + TextureFormat::bevy_default() + }, + blend: None, + write_mask: ColorWrites::ALL, + })], + ..default() + }), + ..default() + } + } +} + +impl From for ScreenSpaceReflectionsUniform { + fn from(settings: ScreenSpaceReflections) -> Self { + Self { + perceptual_roughness_threshold: settings.perceptual_roughness_threshold, + thickness: settings.thickness, + linear_steps: settings.linear_steps, + linear_march_exponent: settings.linear_march_exponent, + bisection_steps: settings.bisection_steps, + use_secant: settings.use_secant as u32, + } + } +} diff --git a/crates/libmarathon/src/render/pbr/ssr/raymarch.wgsl b/crates/libmarathon/src/render/pbr/ssr/raymarch.wgsl new file mode 100644 index 0000000..12140c9 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/ssr/raymarch.wgsl @@ -0,0 +1,511 @@ +// Copyright (c) 2023 Tomasz Stachowiak +// +// This contribution is dual licensed under EITHER OF +// +// Apache License, Version 2.0, (http://www.apache.org/licenses/LICENSE-2.0) +// MIT license (http://opensource.org/licenses/MIT) +// +// at your option. +// +// This is a port of the original [`raymarch.hlsl`] to WGSL. It's deliberately +// kept as close as possible so that patches to the original `raymarch.hlsl` +// have the greatest chances of applying to this version. +// +// [`raymarch.hlsl`]: +// https://gist.github.com/h3r2tic/9c8356bdaefbe80b1a22ae0aaee192db + +#define_import_path bevy_pbr::raymarch + +#import bevy_pbr::mesh_view_bindings::depth_prepass_texture +#import bevy_pbr::view_transformations::{ + direction_world_to_clip, + ndc_to_uv, + perspective_camera_near, + position_world_to_ndc, +} + +// Allows us to sample from the depth buffer with bilinear filtering. +@group(2) @binding(2) var depth_linear_sampler: sampler; + +// Allows us to sample from the depth buffer with nearest-neighbor filtering. +@group(2) @binding(3) var depth_nearest_sampler: sampler; + +// Main code + +struct HybridRootFinder { + linear_steps: u32, + bisection_steps: u32, + use_secant: bool, + linear_march_exponent: f32, + + jitter: f32, + min_t: f32, + max_t: f32, +} + +fn hybrid_root_finder_new_with_linear_steps(v: u32) -> HybridRootFinder { + var res: HybridRootFinder; + res.linear_steps = v; + res.bisection_steps = 0u; + res.use_secant = false; + res.linear_march_exponent = 1.0; + res.jitter = 1.0; + res.min_t = 0.0; + res.max_t = 1.0; + return res; +} + +fn hybrid_root_finder_find_root( + root_finder: ptr, + start: vec3, + end: vec3, + distance_fn: ptr, + hit_t: ptr, + miss_t: ptr, + hit_d: ptr, +) -> bool { + let dir = end - start; + + var min_t = (*root_finder).min_t; + var max_t = (*root_finder).max_t; + + var min_d = DistanceWithPenetration(0.0, false, 0.0); + var max_d = DistanceWithPenetration(0.0, false, 0.0); + + let step_size = (max_t - min_t) / f32((*root_finder).linear_steps); + + var intersected = false; + + // + // Ray march using linear steps + + if ((*root_finder).linear_steps > 0u) { + let candidate_t = mix( + min_t, + max_t, + pow( + (*root_finder).jitter / f32((*root_finder).linear_steps), + (*root_finder).linear_march_exponent + ) + ); + + let candidate = start + dir * candidate_t; + let candidate_d = depth_raymarch_distance_fn_evaluate(distance_fn, candidate); + intersected = candidate_d.distance < 0.0 && candidate_d.valid; + + if (intersected) { + max_t = candidate_t; + max_d = candidate_d; + // The `[min_t .. max_t]` interval contains an intersection. End the linear search. + } else { + // No intersection yet. Carry on. + min_t = candidate_t; + min_d = candidate_d; + + for (var step = 1u; step < (*root_finder).linear_steps; step += 1u) { + let candidate_t = mix( + (*root_finder).min_t, + (*root_finder).max_t, + pow( + (f32(step) + (*root_finder).jitter) / f32((*root_finder).linear_steps), + (*root_finder).linear_march_exponent + ) + ); + + let candidate = start + dir * candidate_t; + let candidate_d = depth_raymarch_distance_fn_evaluate(distance_fn, candidate); + intersected = candidate_d.distance < 0.0 && candidate_d.valid; + + if (intersected) { + max_t = candidate_t; + max_d = candidate_d; + // The `[min_t .. max_t]` interval contains an intersection. + // End the linear search. + break; + } else { + // No intersection yet. Carry on. + min_t = candidate_t; + min_d = candidate_d; + } + } + } + } + + *miss_t = min_t; + *hit_t = min_t; + + // + // Refine the hit using bisection + + if (intersected) { + for (var step = 0u; step < (*root_finder).bisection_steps; step += 1u) { + let mid_t = (min_t + max_t) * 0.5; + let candidate = start + dir * mid_t; + let candidate_d = depth_raymarch_distance_fn_evaluate(distance_fn, candidate); + + if (candidate_d.distance < 0.0 && candidate_d.valid) { + // Intersection at the mid point. Refine the first half. + max_t = mid_t; + max_d = candidate_d; + } else { + // No intersection yet at the mid point. Refine the second half. + min_t = mid_t; + min_d = candidate_d; + } + } + + if ((*root_finder).use_secant) { + // Finish with one application of the secant method + let total_d = min_d.distance + -max_d.distance; + + let mid_t = mix(min_t, max_t, min_d.distance / total_d); + let candidate = start + dir * mid_t; + let candidate_d = depth_raymarch_distance_fn_evaluate(distance_fn, candidate); + + // Only accept the result of the secant method if it improves upon + // the previous result. + // + // Technically root_finder should be `abs(candidate_d.distance) < + // min(min_d.distance, -max_d.distance) * frac`, but root_finder seems + // sufficient. + if (abs(candidate_d.distance) < min_d.distance * 0.9 && candidate_d.valid) { + *hit_t = mid_t; + *hit_d = candidate_d; + } else { + *hit_t = max_t; + *hit_d = max_d; + } + + return true; + } else { + *hit_t = max_t; + *hit_d = max_d; + return true; + } + } else { + // Mark the conservative miss distance. + *hit_t = min_t; + return false; + } +} + +struct DistanceWithPenetration { + /// Distance to the surface of which a root we're trying to find + distance: f32, + + /// Whether to consider this sample valid for intersection. + /// Mostly relevant for allowing the ray marcher to travel behind surfaces, + /// as it will mark surfaces it travels under as invalid. + valid: bool, + + /// Conservative estimate of depth to which the ray penetrates the marched surface. + penetration: f32, +} + +struct DepthRaymarchDistanceFn { + depth_tex_size: vec2, + + march_behind_surfaces: bool, + depth_thickness: f32, + + use_sloppy_march: bool, +} + +fn depth_raymarch_distance_fn_evaluate( + distance_fn: ptr, + ray_point_cs: vec3, +) -> DistanceWithPenetration { + let interp_uv = ndc_to_uv(ray_point_cs.xy); + + let ray_depth = 1.0 / ray_point_cs.z; + + // We're using both point-sampled and bilinear-filtered values from the depth buffer. + // + // That's really stupid but works like magic. For samples taken near the ray origin, + // the discrete nature of the depth buffer becomes a problem. It's not a land of continuous surfaces, + // but a bunch of stacked duplo bricks. + // + // Technically we should be taking discrete steps in distance_fn duplo land, but then we're at the mercy + // of arbitrary quantization of our directions -- and sometimes we'll take a step which would + // claim that the ray is occluded -- even though the underlying smooth surface wouldn't occlude it. + // + // If we instead take linear taps from the depth buffer, we reconstruct the linear surface. + // That fixes acne, but introduces false shadowing near object boundaries, as we now pretend + // that everything is shrink-wrapped by distance_fn continuous 2.5D surface, and our depth thickness + // heuristic ends up falling apart. + // + // The fix is to consider both the smooth and the discrete surfaces, and only claim occlusion + // when the ray descends below both. + // + // The two approaches end up fixing each other's artifacts: + // * The false occlusions due to duplo land are rejected because the ray stays above the smooth surface. + // * The shrink-wrap surface is no longer continuous, so it's possible for rays to miss it. + + let linear_depth = + 1.0 / textureSampleLevel(depth_prepass_texture, depth_linear_sampler, interp_uv, 0u); + let unfiltered_depth = + 1.0 / textureSampleLevel(depth_prepass_texture, depth_nearest_sampler, interp_uv, 0u); + + var max_depth: f32; + var min_depth: f32; + + if ((*distance_fn).use_sloppy_march) { + max_depth = unfiltered_depth; + min_depth = unfiltered_depth; + } else { + max_depth = max(linear_depth, unfiltered_depth); + min_depth = min(linear_depth, unfiltered_depth); + } + + let bias = 0.000002; + + var res: DistanceWithPenetration; + res.distance = max_depth * (1.0 + bias) - ray_depth; + + // distance_fn will be used at the end of the ray march to potentially discard the hit. + res.penetration = ray_depth - min_depth; + + if ((*distance_fn).march_behind_surfaces) { + res.valid = res.penetration < (*distance_fn).depth_thickness; + } else { + res.valid = true; + } + + return res; +} + +struct DepthRayMarchResult { + /// True if the raymarch hit something. + hit: bool, + + /// In case of a hit, the normalized distance to it. + /// + /// In case of a miss, the furthest the ray managed to travel, which could either be + /// exceeding the max range, or getting behind a surface further than the depth thickness. + /// + /// Range: `0..=1` as a lerp factor over `ray_start_cs..=ray_end_cs`. + hit_t: f32, + + /// UV corresponding to `hit_t`. + hit_uv: vec2, + + /// The distance that the hit point penetrates into the hit surface. + /// Will normally be non-zero due to limited precision of the ray march. + /// + /// In case of a miss: undefined. + hit_penetration: f32, + + /// Ditto, within the range `0..DepthRayMarch::depth_thickness_linear_z` + /// + /// In case of a miss: undefined. + hit_penetration_frac: f32, +} + +struct DepthRayMarch { + /// Number of steps to be taken at regular intervals to find an initial intersection. + /// Must not be zero. + linear_steps: u32, + + /// Exponent to be applied in the linear part of the march. + /// + /// A value of 1.0 will result in equidistant steps, and higher values will compress + /// the earlier steps, and expand the later ones. This might be desirable in order + /// to get more detail close to objects in SSR or SSGI. + /// + /// For optimal performance, this should be a small compile-time unsigned integer, + /// such as 1 or 2. + linear_march_exponent: f32, + + /// Number of steps in a bisection (binary search) to perform once the linear search + /// has found an intersection. Helps narrow down the hit, increasing the chance of + /// the secant method finding an accurate hit point. + /// + /// Useful when sampling color, e.g. SSR or SSGI, but pointless for contact shadows. + bisection_steps: u32, + + /// Approximate the root position using the secant method -- by solving for line-line + /// intersection between the ray approach rate and the surface gradient. + /// + /// Useful when sampling color, e.g. SSR or SSGI, but pointless for contact shadows. + use_secant: bool, + + /// Jitter to apply to the first step of the linear search; 0..=1 range, mapping + /// to the extent of a single linear step in the first phase of the search. + /// Use 1.0 if you don't want jitter. + jitter: f32, + + /// Clip space coordinates (w=1) of the ray. + ray_start_cs: vec3, + ray_end_cs: vec3, + + /// Should be used for contact shadows, but not for any color bounce, e.g. SSR. + /// + /// For SSR etc. this can easily create leaks, but with contact shadows it allows the rays + /// to pass over invalid occlusions (due to thickness), and find potentially valid ones ahead. + /// + /// Note that this will cause the linear search to potentially miss surfaces, + /// because when the ray overshoots and ends up penetrating a surface further than + /// `depth_thickness_linear_z`, the ray marcher will just carry on. + /// + /// For this reason, this may require a lot of samples, or high depth thickness, + /// so that `depth_thickness_linear_z >= world space ray length / linear_steps`. + march_behind_surfaces: bool, + + /// If `true`, the ray marcher only performs nearest lookups of the depth buffer, + /// resulting in aliasing and false occlusion when marching tiny detail. + /// It should work fine for longer traces with fewer rays though. + use_sloppy_march: bool, + + /// When marching the depth buffer, we only have 2.5D information, and don't know how + /// thick surfaces are. We shall assume that the depth buffer fragments are little squares + /// with a constant thickness defined by this parameter. + depth_thickness_linear_z: f32, + + /// Size of the depth buffer we're marching in, in pixels. + depth_tex_size: vec2, +} + +fn depth_ray_march_new_from_depth(depth_tex_size: vec2) -> DepthRayMarch { + var res: DepthRayMarch; + res.jitter = 1.0; + res.linear_steps = 4u; + res.bisection_steps = 0u; + res.linear_march_exponent = 1.0; + res.depth_tex_size = depth_tex_size; + res.depth_thickness_linear_z = 1.0; + res.march_behind_surfaces = false; + res.use_sloppy_march = false; + return res; +} + +fn depth_ray_march_to_cs_dir_impl( + raymarch: ptr, + dir_cs: vec4, + infinite: bool, +) { + var end_cs = vec4((*raymarch).ray_start_cs, 1.0) + dir_cs; + + // Perform perspective division, but avoid dividing by zero for rays + // heading directly towards the eye. + end_cs /= select(-1.0, 1.0, end_cs.w >= 0.0) * max(1e-10, abs(end_cs.w)); + + // Clip ray start to the view frustum + var delta_cs = end_cs.xyz - (*raymarch).ray_start_cs; + let near_edge = select(vec3(-1.0, -1.0, 0.0), vec3(1.0, 1.0, 1.0), delta_cs < vec3(0.0)); + let dist_to_near_edge = (near_edge - (*raymarch).ray_start_cs) / delta_cs; + let max_dist_to_near_edge = max(dist_to_near_edge.x, dist_to_near_edge.y); + (*raymarch).ray_start_cs += delta_cs * max(0.0, max_dist_to_near_edge); + + // Clip ray end to the view frustum + + delta_cs = end_cs.xyz - (*raymarch).ray_start_cs; + let far_edge = select(vec3(-1.0, -1.0, 0.0), vec3(1.0, 1.0, 1.0), delta_cs >= vec3(0.0)); + let dist_to_far_edge = (far_edge - (*raymarch).ray_start_cs) / delta_cs; + let min_dist_to_far_edge = min( + min(dist_to_far_edge.x, dist_to_far_edge.y), + dist_to_far_edge.z + ); + + if (infinite) { + delta_cs *= min_dist_to_far_edge; + } else { + // If unbounded, would make the ray reach the end of the frustum + delta_cs *= min(1.0, min_dist_to_far_edge); + } + + (*raymarch).ray_end_cs = (*raymarch).ray_start_cs + delta_cs; +} + +/// March from a clip-space position (w = 1) +fn depth_ray_march_from_cs(raymarch: ptr, v: vec3) { + (*raymarch).ray_start_cs = v; +} + +/// March to a clip-space position (w = 1) +/// +/// Must be called after `from_cs`, as it will clip the world-space ray to the view frustum. +fn depth_ray_march_to_cs(raymarch: ptr, end_cs: vec3) { + let dir = vec4(end_cs - (*raymarch).ray_start_cs, 0.0) * sign(end_cs.z); + depth_ray_march_to_cs_dir_impl(raymarch, dir, false); +} + +/// March towards a clip-space direction. Infinite (ray is extended to cover the whole view frustum). +/// +/// Must be called after `from_cs`, as it will clip the world-space ray to the view frustum. +fn depth_ray_march_to_cs_dir(raymarch: ptr, dir: vec4) { + depth_ray_march_to_cs_dir_impl(raymarch, dir, true); +} + +/// March to a world-space position. +/// +/// Must be called after `from_cs`, as it will clip the world-space ray to the view frustum. +fn depth_ray_march_to_ws(raymarch: ptr, end: vec3) { + depth_ray_march_to_cs(raymarch, position_world_to_ndc(end)); +} + +/// March towards a world-space direction. Infinite (ray is extended to cover the whole view frustum). +/// +/// Must be called after `from_cs`, as it will clip the world-space ray to the view frustum. +fn depth_ray_march_to_ws_dir(raymarch: ptr, dir: vec3) { + depth_ray_march_to_cs_dir_impl(raymarch, direction_world_to_clip(dir), true); +} + +/// Perform the ray march. +fn depth_ray_march_march(raymarch: ptr) -> DepthRayMarchResult { + var res = DepthRayMarchResult(false, 0.0, vec2(0.0), 0.0, 0.0); + + let ray_start_uv = ndc_to_uv((*raymarch).ray_start_cs.xy); + let ray_end_uv = ndc_to_uv((*raymarch).ray_end_cs.xy); + + let ray_uv_delta = ray_end_uv - ray_start_uv; + let ray_len_px = ray_uv_delta * (*raymarch).depth_tex_size; + + let min_px_per_step = 1u; + let step_count = max( + 2, + min(i32((*raymarch).linear_steps), i32(floor(length(ray_len_px) / f32(min_px_per_step)))) + ); + + let linear_z_to_scaled_linear_z = 1.0 / perspective_camera_near(); + let depth_thickness = (*raymarch).depth_thickness_linear_z * linear_z_to_scaled_linear_z; + + var distance_fn: DepthRaymarchDistanceFn; + distance_fn.depth_tex_size = (*raymarch).depth_tex_size; + distance_fn.march_behind_surfaces = (*raymarch).march_behind_surfaces; + distance_fn.depth_thickness = depth_thickness; + distance_fn.use_sloppy_march = (*raymarch).use_sloppy_march; + + var hit: DistanceWithPenetration; + + var hit_t = 0.0; + var miss_t = 0.0; + var root_finder = hybrid_root_finder_new_with_linear_steps(u32(step_count)); + root_finder.bisection_steps = (*raymarch).bisection_steps; + root_finder.use_secant = (*raymarch).use_secant; + root_finder.linear_march_exponent = (*raymarch).linear_march_exponent; + root_finder.jitter = (*raymarch).jitter; + let intersected = hybrid_root_finder_find_root( + &root_finder, + (*raymarch).ray_start_cs, + (*raymarch).ray_end_cs, + &distance_fn, + &hit_t, + &miss_t, + &hit + ); + + res.hit_t = hit_t; + + if (intersected && hit.penetration < depth_thickness && hit.distance < depth_thickness) { + res.hit = true; + res.hit_uv = mix(ray_start_uv, ray_end_uv, res.hit_t); + res.hit_penetration = hit.penetration / linear_z_to_scaled_linear_z; + res.hit_penetration_frac = hit.penetration / depth_thickness; + return res; + } + + res.hit_t = miss_t; + res.hit_uv = mix(ray_start_uv, ray_end_uv, res.hit_t); + + return res; +} diff --git a/crates/libmarathon/src/render/pbr/ssr/ssr.wgsl b/crates/libmarathon/src/render/pbr/ssr/ssr.wgsl new file mode 100644 index 0000000..d646ac6 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/ssr/ssr.wgsl @@ -0,0 +1,194 @@ +// A postprocessing pass that performs screen-space reflections. + +#define_import_path bevy_pbr::ssr + +#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput +#import bevy_pbr::{ + clustered_forward, + lighting, + lighting::{LAYER_BASE, LAYER_CLEARCOAT}, + mesh_view_bindings::{view, depth_prepass_texture, deferred_prepass_texture, ssr_settings}, + pbr_deferred_functions::pbr_input_from_deferred_gbuffer, + pbr_deferred_types, + pbr_functions, + prepass_utils, + raymarch::{ + depth_ray_march_from_cs, + depth_ray_march_march, + depth_ray_march_new_from_depth, + depth_ray_march_to_ws_dir, + }, + utils, + view_transformations::{ + depth_ndc_to_view_z, + frag_coord_to_ndc, + ndc_to_frag_coord, + ndc_to_uv, + position_view_to_ndc, + position_world_to_ndc, + position_world_to_view, + }, +} +#import bevy_render::view::View + +#ifdef ENVIRONMENT_MAP +#import bevy_pbr::environment_map +#endif + +// The texture representing the color framebuffer. +@group(2) @binding(0) var color_texture: texture_2d; + +// The sampler that lets us sample from the color framebuffer. +@group(2) @binding(1) var color_sampler: sampler; + +// Group 1, bindings 2 and 3 are in `raymarch.wgsl`. + +// Returns the reflected color in the RGB channel and the specular occlusion in +// the alpha channel. +// +// The general approach here is similar to [1]. We first project the reflection +// ray into screen space. Then we perform uniform steps along that screen-space +// reflected ray, converting each step to view space. +// +// The arguments are: +// +// * `R_world`: The reflection vector in world space. +// +// * `P_world`: The current position in world space. +// +// [1]: https://lettier.github.io/3d-game-shaders-for-beginners/screen-space-reflection.html +fn evaluate_ssr(R_world: vec3, P_world: vec3) -> vec4 { + let depth_size = vec2(textureDimensions(depth_prepass_texture)); + + var raymarch = depth_ray_march_new_from_depth(depth_size); + depth_ray_march_from_cs(&raymarch, position_world_to_ndc(P_world)); + depth_ray_march_to_ws_dir(&raymarch, normalize(R_world)); + raymarch.linear_steps = ssr_settings.linear_steps; + raymarch.bisection_steps = ssr_settings.bisection_steps; + raymarch.use_secant = ssr_settings.use_secant != 0u; + raymarch.depth_thickness_linear_z = ssr_settings.thickness; + raymarch.jitter = 1.0; // Disable jitter for now. + raymarch.march_behind_surfaces = false; + + let raymarch_result = depth_ray_march_march(&raymarch); + if (raymarch_result.hit) { + return vec4( + textureSampleLevel(color_texture, color_sampler, raymarch_result.hit_uv, 0.0).rgb, + 0.0 + ); + } + + return vec4(0.0, 0.0, 0.0, 1.0); +} + +@fragment +fn fragment(in: FullscreenVertexOutput) -> @location(0) vec4 { + // Sample the depth. + var frag_coord = in.position; + frag_coord.z = prepass_utils::prepass_depth(in.position, 0u); + + // Load the G-buffer data. + let fragment = textureLoad(color_texture, vec2(frag_coord.xy), 0); + let gbuffer = textureLoad(deferred_prepass_texture, vec2(frag_coord.xy), 0); + let pbr_input = pbr_input_from_deferred_gbuffer(frag_coord, gbuffer); + + // Don't do anything if the surface is too rough, since we can't blur or do + // temporal accumulation yet. + let perceptual_roughness = pbr_input.material.perceptual_roughness; + if (perceptual_roughness > ssr_settings.perceptual_roughness_threshold) { + return fragment; + } + + // Unpack the PBR input. + var specular_occlusion = pbr_input.specular_occlusion; + let world_position = pbr_input.world_position.xyz; + let N = pbr_input.N; + let V = pbr_input.V; + + // Calculate the reflection vector. + let R = reflect(-V, N); + + // Do the raymarching. + let ssr_specular = evaluate_ssr(R, world_position); + var indirect_light = ssr_specular.rgb; + specular_occlusion *= ssr_specular.a; + + // Sample the environment map if necessary. + // + // This will take the specular part of the environment map into account if + // the ray missed. Otherwise, it only takes the diffuse part. + // + // TODO: Merge this with the duplicated code in `apply_pbr_lighting`. +#ifdef ENVIRONMENT_MAP + // Unpack values required for environment mapping. + let base_color = pbr_input.material.base_color.rgb; + let metallic = pbr_input.material.metallic; + let reflectance = pbr_input.material.reflectance; + let specular_transmission = pbr_input.material.specular_transmission; + let diffuse_transmission = pbr_input.material.diffuse_transmission; + let diffuse_occlusion = pbr_input.diffuse_occlusion; + +#ifdef STANDARD_MATERIAL_CLEARCOAT + // Do the above calculations again for the clearcoat layer. Remember that + // the clearcoat can have its own roughness and its own normal. + let clearcoat = pbr_input.material.clearcoat; + let clearcoat_perceptual_roughness = pbr_input.material.clearcoat_perceptual_roughness; + let clearcoat_roughness = lighting::perceptualRoughnessToRoughness(clearcoat_perceptual_roughness); + let clearcoat_N = pbr_input.clearcoat_N; + let clearcoat_NdotV = max(dot(clearcoat_N, pbr_input.V), 0.0001); + let clearcoat_R = reflect(-pbr_input.V, clearcoat_N); +#endif // STANDARD_MATERIAL_CLEARCOAT + + // Calculate various other values needed for environment mapping. + let roughness = lighting::perceptualRoughnessToRoughness(perceptual_roughness); + let diffuse_color = pbr_functions::calculate_diffuse_color( + base_color, + metallic, + specular_transmission, + diffuse_transmission + ); + let NdotV = max(dot(N, V), 0.0001); + let F_ab = lighting::F_AB(perceptual_roughness, NdotV); + let F0 = pbr_functions::calculate_F0(base_color, metallic, reflectance); + + // Pack all the values into a structure. + var lighting_input: lighting::LightingInput; + lighting_input.layers[LAYER_BASE].NdotV = NdotV; + lighting_input.layers[LAYER_BASE].N = N; + lighting_input.layers[LAYER_BASE].R = R; + lighting_input.layers[LAYER_BASE].perceptual_roughness = perceptual_roughness; + lighting_input.layers[LAYER_BASE].roughness = roughness; + lighting_input.P = world_position.xyz; + lighting_input.V = V; + lighting_input.diffuse_color = diffuse_color; + lighting_input.F0_ = F0; + lighting_input.F_ab = F_ab; +#ifdef STANDARD_MATERIAL_CLEARCOAT + lighting_input.layers[LAYER_CLEARCOAT].NdotV = clearcoat_NdotV; + lighting_input.layers[LAYER_CLEARCOAT].N = clearcoat_N; + lighting_input.layers[LAYER_CLEARCOAT].R = clearcoat_R; + lighting_input.layers[LAYER_CLEARCOAT].perceptual_roughness = clearcoat_perceptual_roughness; + lighting_input.layers[LAYER_CLEARCOAT].roughness = clearcoat_roughness; + lighting_input.clearcoat_strength = clearcoat; +#endif // STANDARD_MATERIAL_CLEARCOAT + + // Determine which cluster we're in. We'll need this to find the right + // reflection probe. + let cluster_index = clustered_forward::fragment_cluster_index( + frag_coord.xy, frag_coord.z, false); + var clusterable_object_index_ranges = + clustered_forward::unpack_clusterable_object_index_ranges(cluster_index); + + // Sample the environment map. + let environment_light = environment_map::environment_map_light( + &lighting_input, &clusterable_object_index_ranges, false); + + // Accumulate the environment map light. + indirect_light += view.exposure * + (environment_light.diffuse * diffuse_occlusion + + environment_light.specular * specular_occlusion); +#endif + + // Write the results. + return vec4(fragment.rgb + indirect_light, 1.0); +} diff --git a/crates/libmarathon/src/render/pbr/volumetric_fog/mod.rs b/crates/libmarathon/src/render/pbr/volumetric_fog/mod.rs new file mode 100644 index 0000000..160dc14 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/volumetric_fog/mod.rs @@ -0,0 +1,114 @@ +//! Volumetric fog and volumetric lighting, also known as light shafts or god +//! rays. +//! +//! This module implements a more physically-accurate, but slower, form of fog +//! than the [`crate::fog`] module does. Notably, this *volumetric fog* allows +//! for light beams from directional lights to shine through, creating what is +//! known as *light shafts* or *god rays*. +//! +//! To add volumetric fog to a scene, add [`bevy_light::VolumetricFog`] to the +//! camera, and add [`bevy_light::VolumetricLight`] to directional lights that you wish to +//! be volumetric. [`bevy_light::VolumetricFog`] feature numerous settings that +//! allow you to define the accuracy of the simulation, as well as the look of +//! the fog. Currently, only interaction with directional lights that have +//! shadow maps is supported. Note that the overhead of the effect scales +//! directly with the number of directional lights in use, so apply +//! [`bevy_light::VolumetricLight`] sparingly for the best results. +//! +//! The overall algorithm, which is implemented as a postprocessing effect, is a +//! combination of the techniques described in [Scratchapixel] and [this blog +//! post]. It uses raymarching in screen space, transformed into shadow map +//! space for sampling and combined with physically-based modeling of absorption +//! and scattering. Bevy employs the widely-used [Henyey-Greenstein phase +//! function] to model asymmetry; this essentially allows light shafts to fade +//! into and out of existence as the user views them. +//! +//! [Scratchapixel]: https://www.scratchapixel.com/lessons/3d-basic-rendering/volume-rendering-for-developers/intro-volume-rendering.html +//! +//! [this blog post]: https://www.alexandre-pestana.com/volumetric-lights/ +//! +//! [Henyey-Greenstein phase function]: https://www.pbr-book.org/4ed/Volume_Scattering/Phase_Functions#TheHenyeyndashGreensteinPhaseFunction + +use bevy_app::{App, Plugin}; +use bevy_asset::{embedded_asset, Assets, Handle}; +use crate::render::core_3d::{ + graph::{Core3d, Node3d}, + prepare_core_3d_depth_textures, +}; +use bevy_ecs::{resource::Resource, schedule::IntoScheduleConfigs as _}; +use bevy_light::FogVolume; +use bevy_math::{ + primitives::{Cuboid, Plane3d}, + Vec2, Vec3, +}; +use bevy_mesh::{Mesh, Meshable}; +use crate::render::{ + render_graph::{RenderGraphExt, ViewNodeRunner}, + render_resource::SpecializedRenderPipelines, + sync_component::SyncComponentPlugin, + ExtractSchedule, Render, RenderApp, RenderStartup, RenderSystems, +}; +use render::{VolumetricFogNode, VolumetricFogPipeline, VolumetricFogUniformBuffer}; + +use crate::render::pbr::{graph::NodePbr, volumetric_fog::render::init_volumetric_fog_pipeline}; + +pub mod render; + +/// A plugin that implements volumetric fog. +pub struct VolumetricFogPlugin; + +#[derive(Resource)] +pub struct FogAssets { + plane_mesh: Handle, + cube_mesh: Handle, +} + +impl Plugin for VolumetricFogPlugin { + fn build(&self, app: &mut App) { + embedded_asset!(app, "volumetric_fog.wgsl"); + + let mut meshes = app.world_mut().resource_mut::>(); + let plane_mesh = meshes.add(Plane3d::new(Vec3::Z, Vec2::ONE).mesh()); + let cube_mesh = meshes.add(Cuboid::new(1.0, 1.0, 1.0).mesh()); + + app.add_plugins(SyncComponentPlugin::::default()); + + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + render_app + .insert_resource(FogAssets { + plane_mesh, + cube_mesh, + }) + .init_resource::>() + .init_resource::() + .add_systems(RenderStartup, init_volumetric_fog_pipeline) + .add_systems(ExtractSchedule, render::extract_volumetric_fog) + .add_systems( + Render, + ( + render::prepare_volumetric_fog_pipelines.in_set(RenderSystems::Prepare), + render::prepare_volumetric_fog_uniforms.in_set(RenderSystems::Prepare), + render::prepare_view_depth_textures_for_volumetric_fog + .in_set(RenderSystems::Prepare) + .before(prepare_core_3d_depth_textures), + ), + ) + .add_render_graph_node::>( + Core3d, + NodePbr::VolumetricFog, + ) + .add_render_graph_edges( + Core3d, + // Volumetric fog should run after the main pass but before bloom, so + // we order if at the start of post processing. + ( + Node3d::EndMainPass, + NodePbr::VolumetricFog, + Node3d::StartMainPassPostProcessing, + ), + ); + } +} diff --git a/crates/libmarathon/src/render/pbr/volumetric_fog/render.rs b/crates/libmarathon/src/render/pbr/volumetric_fog/render.rs new file mode 100644 index 0000000..6e9955d --- /dev/null +++ b/crates/libmarathon/src/render/pbr/volumetric_fog/render.rs @@ -0,0 +1,882 @@ +//! Rendering of fog volumes. + +use core::array; + +use bevy_asset::{load_embedded_asset, AssetId, AssetServer, Handle}; +use bevy_camera::Camera3d; +use bevy_color::ColorToComponents as _; +use crate::render::prepass::{ + DeferredPrepass, DepthPrepass, MotionVectorPrepass, NormalPrepass, +}; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::{ + component::Component, + entity::Entity, + query::{Has, QueryItem, With}, + resource::Resource, + system::{lifetimeless::Read, Commands, Local, Query, Res, ResMut}, + world::World, +}; +use bevy_image::{BevyDefault, Image}; +use bevy_light::{FogVolume, VolumetricFog, VolumetricLight}; +use bevy_math::{vec4, Affine3A, Mat4, Vec3, Vec3A, Vec4}; +use bevy_mesh::{Mesh, MeshVertexBufferLayoutRef}; +use crate::render::{ + diagnostic::RecordDiagnostics, + mesh::{allocator::MeshAllocator, RenderMesh, RenderMeshBufferInfo}, + render_asset::RenderAssets, + render_graph::{NodeRunError, RenderGraphContext, ViewNode}, + render_resource::{ + binding_types::{ + sampler, texture_3d, texture_depth_2d, texture_depth_2d_multisampled, uniform_buffer, + }, + BindGroupLayout, BindGroupLayoutEntries, BindingResource, BlendComponent, BlendFactor, + BlendOperation, BlendState, CachedRenderPipelineId, ColorTargetState, ColorWrites, + DynamicBindGroupEntries, DynamicUniformBuffer, Face, FragmentState, LoadOp, Operations, + PipelineCache, PrimitiveState, RenderPassColorAttachment, RenderPassDescriptor, + RenderPipelineDescriptor, SamplerBindingType, ShaderStages, ShaderType, + SpecializedRenderPipeline, SpecializedRenderPipelines, StoreOp, TextureFormat, + TextureSampleType, TextureUsages, VertexState, + }, + renderer::{RenderContext, RenderDevice, RenderQueue}, + sync_world::RenderEntity, + texture::GpuImage, + view::{ExtractedView, Msaa, ViewDepthTexture, ViewTarget, ViewUniformOffset}, + Extract, +}; +use bevy_shader::Shader; +use bevy_transform::components::GlobalTransform; +use bevy_utils::prelude::default; +use bitflags::bitflags; + +use crate::render::pbr::{ + MeshPipelineViewLayoutKey, MeshPipelineViewLayouts, MeshViewBindGroup, + ViewEnvironmentMapUniformOffset, ViewFogUniformOffset, ViewLightProbesUniformOffset, + ViewLightsUniformOffset, ViewScreenSpaceReflectionsUniformOffset, +}; + +use super::FogAssets; + +bitflags! { + /// Flags that describe the bind group layout used to render volumetric fog. + #[derive(Clone, Copy, PartialEq)] + struct VolumetricFogBindGroupLayoutKey: u8 { + /// The framebuffer is multisampled. + const MULTISAMPLED = 0x1; + /// The volumetric fog has a 3D voxel density texture. + const DENSITY_TEXTURE = 0x2; + } +} + +bitflags! { + /// Flags that describe the rasterization pipeline used to render volumetric + /// fog. + #[derive(Clone, Copy, PartialEq, Eq, Hash)] + struct VolumetricFogPipelineKeyFlags: u8 { + /// The view's color format has high dynamic range. + const HDR = 0x1; + /// The volumetric fog has a 3D voxel density texture. + const DENSITY_TEXTURE = 0x2; + } +} + +/// The total number of bind group layouts. +/// +/// This is the total number of combinations of all +/// [`VolumetricFogBindGroupLayoutKey`] flags. +const VOLUMETRIC_FOG_BIND_GROUP_LAYOUT_COUNT: usize = + VolumetricFogBindGroupLayoutKey::all().bits() as usize + 1; + +/// A matrix that converts from local 1×1×1 space to UVW 3D density texture +/// space. +static UVW_FROM_LOCAL: Mat4 = Mat4::from_cols( + vec4(1.0, 0.0, 0.0, 0.0), + vec4(0.0, 1.0, 0.0, 0.0), + vec4(0.0, 0.0, 1.0, 0.0), + vec4(0.5, 0.5, 0.5, 1.0), +); + +/// The GPU pipeline for the volumetric fog postprocessing effect. +#[derive(Resource)] +pub struct VolumetricFogPipeline { + /// A reference to the shared set of mesh pipeline view layouts. + mesh_view_layouts: MeshPipelineViewLayouts, + + /// All bind group layouts. + /// + /// Since there aren't too many of these, we precompile them all. + volumetric_view_bind_group_layouts: [BindGroupLayout; VOLUMETRIC_FOG_BIND_GROUP_LAYOUT_COUNT], + + // The shader asset handle. + shader: Handle, +} + +/// The two render pipelines that we use for fog volumes: one for when a 3D +/// density texture is present and one for when it isn't. +#[derive(Component)] +pub struct ViewVolumetricFogPipelines { + /// The render pipeline that we use when no density texture is present, and + /// the density distribution is uniform. + pub textureless: CachedRenderPipelineId, + /// The render pipeline that we use when a density texture is present. + pub textured: CachedRenderPipelineId, +} + +/// The node in the render graph, part of the postprocessing stack, that +/// implements volumetric fog. +#[derive(Default)] +pub struct VolumetricFogNode; + +/// Identifies a single specialization of the volumetric fog shader. +#[derive(PartialEq, Eq, Hash, Clone)] +pub struct VolumetricFogPipelineKey { + /// The layout of the view, which is needed for the raymarching. + mesh_pipeline_view_key: MeshPipelineViewLayoutKey, + + /// The vertex buffer layout of the primitive. + /// + /// Both planes (used when the camera is inside the fog volume) and cubes + /// (used when the camera is outside the fog volume) use identical vertex + /// buffer layouts, so we only need one of them. + vertex_buffer_layout: MeshVertexBufferLayoutRef, + + /// Flags that specify features on the pipeline key. + flags: VolumetricFogPipelineKeyFlags, +} + +/// The same as [`VolumetricFog`] and [`FogVolume`], but formatted for +/// the GPU. +/// +/// See the documentation of those structures for more information on these +/// fields. +#[derive(ShaderType)] +pub struct VolumetricFogUniform { + clip_from_local: Mat4, + + /// The transform from world space to 3D density texture UVW space. + uvw_from_world: Mat4, + + /// View-space plane equations of the far faces of the fog volume cuboid. + /// + /// The vector takes the form V = (N, -N⋅Q), where N is the normal of the + /// plane and Q is any point in it, in view space. The equation of the plane + /// for homogeneous point P = (Px, Py, Pz, Pw) is V⋅P = 0. + far_planes: [Vec4; 3], + + fog_color: Vec3, + light_tint: Vec3, + ambient_color: Vec3, + ambient_intensity: f32, + step_count: u32, + + /// The radius of a sphere that bounds the fog volume in view space. + bounding_radius: f32, + + absorption: f32, + scattering: f32, + density: f32, + density_texture_offset: Vec3, + scattering_asymmetry: f32, + light_intensity: f32, + jitter_strength: f32, +} + +/// Specifies the offset within the [`VolumetricFogUniformBuffer`] of the +/// [`VolumetricFogUniform`] for a specific view. +#[derive(Component, Deref, DerefMut)] +pub struct ViewVolumetricFog(Vec); + +/// Information that the render world needs to maintain about each fog volume. +pub struct ViewFogVolume { + /// The 3D voxel density texture for this volume, if present. + density_texture: Option>, + /// The offset of this view's [`VolumetricFogUniform`] structure within the + /// [`VolumetricFogUniformBuffer`]. + uniform_buffer_offset: u32, + /// True if the camera is outside the fog volume; false if it's inside the + /// fog volume. + exterior: bool, +} + +/// The GPU buffer that stores the [`VolumetricFogUniform`] data. +#[derive(Resource, Default, Deref, DerefMut)] +pub struct VolumetricFogUniformBuffer(pub DynamicUniformBuffer); + +pub fn init_volumetric_fog_pipeline( + mut commands: Commands, + render_device: Res, + mesh_view_layouts: Res, + asset_server: Res, +) { + // Create the bind group layout entries common to all bind group + // layouts. + let base_bind_group_layout_entries = &BindGroupLayoutEntries::single( + ShaderStages::VERTEX_FRAGMENT, + // `volumetric_fog` + uniform_buffer::(true), + ); + + // For every combination of `VolumetricFogBindGroupLayoutKey` bits, + // create a bind group layout. + let bind_group_layouts = array::from_fn(|bits| { + let flags = VolumetricFogBindGroupLayoutKey::from_bits_retain(bits as u8); + + let mut bind_group_layout_entries = base_bind_group_layout_entries.to_vec(); + + // `depth_texture` + bind_group_layout_entries.extend_from_slice(&BindGroupLayoutEntries::with_indices( + ShaderStages::FRAGMENT, + (( + 1, + if flags.contains(VolumetricFogBindGroupLayoutKey::MULTISAMPLED) { + texture_depth_2d_multisampled() + } else { + texture_depth_2d() + }, + ),), + )); + + // `density_texture` and `density_sampler` + if flags.contains(VolumetricFogBindGroupLayoutKey::DENSITY_TEXTURE) { + bind_group_layout_entries.extend_from_slice(&BindGroupLayoutEntries::with_indices( + ShaderStages::FRAGMENT, + ( + (2, texture_3d(TextureSampleType::Float { filterable: true })), + (3, sampler(SamplerBindingType::Filtering)), + ), + )); + } + + // Create the bind group layout. + let description = flags.bind_group_layout_description(); + render_device.create_bind_group_layout(&*description, &bind_group_layout_entries) + }); + + commands.insert_resource(VolumetricFogPipeline { + mesh_view_layouts: mesh_view_layouts.clone(), + volumetric_view_bind_group_layouts: bind_group_layouts, + shader: load_embedded_asset!(asset_server.as_ref(), "volumetric_fog.wgsl"), + }); +} + +/// Extracts [`VolumetricFog`], [`FogVolume`], and [`VolumetricLight`]s +/// from the main world to the render world. +pub fn extract_volumetric_fog( + mut commands: Commands, + view_targets: Extract>, + fog_volumes: Extract>, + volumetric_lights: Extract>, +) { + if volumetric_lights.is_empty() { + // TODO: needs better way to handle clean up in render world + for (entity, ..) in view_targets.iter() { + commands + .entity(entity) + .remove::<(VolumetricFog, ViewVolumetricFogPipelines, ViewVolumetricFog)>(); + } + for (entity, ..) in fog_volumes.iter() { + commands.entity(entity).remove::(); + } + return; + } + + for (entity, volumetric_fog) in view_targets.iter() { + commands + .get_entity(entity) + .expect("Volumetric fog entity wasn't synced.") + .insert(*volumetric_fog); + } + + for (entity, fog_volume, fog_transform) in fog_volumes.iter() { + commands + .get_entity(entity) + .expect("Fog volume entity wasn't synced.") + .insert((*fog_volume).clone()) + .insert(*fog_transform); + } + + for (entity, volumetric_light) in volumetric_lights.iter() { + commands + .get_entity(entity) + .expect("Volumetric light entity wasn't synced.") + .insert(*volumetric_light); + } +} + +impl ViewNode for VolumetricFogNode { + type ViewQuery = ( + Read, + Read, + Read, + Read, + Read, + Read, + Read, + Read, + Read, + Read, + Read, + Read, + ); + + fn run<'w>( + &self, + _: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + ( + view_target, + view_depth_texture, + view_volumetric_lighting_pipelines, + view_uniform_offset, + view_lights_offset, + view_fog_offset, + view_light_probes_offset, + view_fog_volumes, + view_bind_group, + view_ssr_offset, + msaa, + view_environment_map_offset, + ): QueryItem<'w, '_, Self::ViewQuery>, + world: &'w World, + ) -> Result<(), NodeRunError> { + let pipeline_cache = world.resource::(); + let volumetric_lighting_pipeline = world.resource::(); + let volumetric_lighting_uniform_buffers = world.resource::(); + let image_assets = world.resource::>(); + let mesh_allocator = world.resource::(); + + // Fetch the uniform buffer and binding. + let ( + Some(textureless_pipeline), + Some(textured_pipeline), + Some(volumetric_lighting_uniform_buffer_binding), + ) = ( + pipeline_cache.get_render_pipeline(view_volumetric_lighting_pipelines.textureless), + pipeline_cache.get_render_pipeline(view_volumetric_lighting_pipelines.textured), + volumetric_lighting_uniform_buffers.binding(), + ) + else { + return Ok(()); + }; + + let diagnostics = render_context.diagnostic_recorder(); + render_context + .command_encoder() + .push_debug_group("volumetric_lighting"); + let time_span = + diagnostics.time_span(render_context.command_encoder(), "volumetric_lighting"); + + let fog_assets = world.resource::(); + let render_meshes = world.resource::>(); + + for view_fog_volume in view_fog_volumes.iter() { + // If the camera is outside the fog volume, pick the cube mesh; + // otherwise, pick the plane mesh. In the latter case we'll be + // effectively rendering a full-screen quad. + let mesh_handle = if view_fog_volume.exterior { + fog_assets.cube_mesh.clone() + } else { + fog_assets.plane_mesh.clone() + }; + + let Some(vertex_buffer_slice) = mesh_allocator.mesh_vertex_slice(&mesh_handle.id()) + else { + continue; + }; + + let density_image = view_fog_volume + .density_texture + .and_then(|density_texture| image_assets.get(density_texture)); + + // Pick the right pipeline, depending on whether a density texture + // is present or not. + let pipeline = if density_image.is_some() { + textured_pipeline + } else { + textureless_pipeline + }; + + // This should always succeed, but if the asset was unloaded don't + // panic. + let Some(render_mesh) = render_meshes.get(&mesh_handle) else { + return Ok(()); + }; + + // Create the bind group for the view. + // + // TODO: Cache this. + + let mut bind_group_layout_key = VolumetricFogBindGroupLayoutKey::empty(); + bind_group_layout_key.set( + VolumetricFogBindGroupLayoutKey::MULTISAMPLED, + !matches!(*msaa, Msaa::Off), + ); + + // Create the bind group entries. The ones relating to the density + // texture will only be filled in if that texture is present. + let mut bind_group_entries = DynamicBindGroupEntries::sequential(( + volumetric_lighting_uniform_buffer_binding.clone(), + BindingResource::TextureView(view_depth_texture.view()), + )); + if let Some(density_image) = density_image { + bind_group_layout_key.insert(VolumetricFogBindGroupLayoutKey::DENSITY_TEXTURE); + bind_group_entries = bind_group_entries.extend_sequential(( + BindingResource::TextureView(&density_image.texture_view), + BindingResource::Sampler(&density_image.sampler), + )); + } + + let volumetric_view_bind_group_layout = &volumetric_lighting_pipeline + .volumetric_view_bind_group_layouts[bind_group_layout_key.bits() as usize]; + + let volumetric_view_bind_group = render_context.render_device().create_bind_group( + None, + volumetric_view_bind_group_layout, + &bind_group_entries, + ); + + let render_pass_descriptor = RenderPassDescriptor { + label: Some("volumetric lighting pass"), + color_attachments: &[Some(RenderPassColorAttachment { + view: view_target.main_texture_view(), + depth_slice: None, + resolve_target: None, + ops: Operations { + load: LoadOp::Load, + store: StoreOp::Store, + }, + })], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + }; + + let mut render_pass = render_context + .command_encoder() + .begin_render_pass(&render_pass_descriptor); + + render_pass.set_vertex_buffer(0, *vertex_buffer_slice.buffer.slice(..)); + render_pass.set_pipeline(pipeline); + render_pass.set_bind_group( + 0, + &view_bind_group.main, + &[ + view_uniform_offset.offset, + view_lights_offset.offset, + view_fog_offset.offset, + **view_light_probes_offset, + **view_ssr_offset, + **view_environment_map_offset, + ], + ); + render_pass.set_bind_group( + 1, + &volumetric_view_bind_group, + &[view_fog_volume.uniform_buffer_offset], + ); + + // Draw elements or arrays, as appropriate. + match &render_mesh.buffer_info { + RenderMeshBufferInfo::Indexed { + index_format, + count, + } => { + let Some(index_buffer_slice) = + mesh_allocator.mesh_index_slice(&mesh_handle.id()) + else { + continue; + }; + + render_pass + .set_index_buffer(*index_buffer_slice.buffer.slice(..), *index_format); + render_pass.draw_indexed( + index_buffer_slice.range.start..(index_buffer_slice.range.start + count), + vertex_buffer_slice.range.start as i32, + 0..1, + ); + } + RenderMeshBufferInfo::NonIndexed => { + render_pass.draw(vertex_buffer_slice.range, 0..1); + } + } + } + + time_span.end(render_context.command_encoder()); + render_context.command_encoder().pop_debug_group(); + + Ok(()) + } +} + +impl SpecializedRenderPipeline for VolumetricFogPipeline { + type Key = VolumetricFogPipelineKey; + + fn specialize(&self, key: Self::Key) -> RenderPipelineDescriptor { + // We always use hardware 2x2 filtering for sampling the shadow map; the + // more accurate versions with percentage-closer filtering aren't worth + // the overhead. + let mut shader_defs = vec!["SHADOW_FILTER_METHOD_HARDWARE_2X2".into()]; + + // We need a separate layout for MSAA and non-MSAA, as well as one for + // the presence or absence of the density texture. + let mut bind_group_layout_key = VolumetricFogBindGroupLayoutKey::empty(); + bind_group_layout_key.set( + VolumetricFogBindGroupLayoutKey::MULTISAMPLED, + key.mesh_pipeline_view_key + .contains(MeshPipelineViewLayoutKey::MULTISAMPLED), + ); + bind_group_layout_key.set( + VolumetricFogBindGroupLayoutKey::DENSITY_TEXTURE, + key.flags + .contains(VolumetricFogPipelineKeyFlags::DENSITY_TEXTURE), + ); + + let volumetric_view_bind_group_layout = + self.volumetric_view_bind_group_layouts[bind_group_layout_key.bits() as usize].clone(); + + // Both the cube and plane have the same vertex layout, so we don't need + // to distinguish between the two. + let vertex_format = key + .vertex_buffer_layout + .0 + .get_layout(&[Mesh::ATTRIBUTE_POSITION.at_shader_location(0)]) + .expect("Failed to get vertex layout for volumetric fog hull"); + + if key + .mesh_pipeline_view_key + .contains(MeshPipelineViewLayoutKey::MULTISAMPLED) + { + shader_defs.push("MULTISAMPLED".into()); + } + + if key + .flags + .contains(VolumetricFogPipelineKeyFlags::DENSITY_TEXTURE) + { + shader_defs.push("DENSITY_TEXTURE".into()); + } + + let layout = self + .mesh_view_layouts + .get_view_layout(key.mesh_pipeline_view_key); + let layout = vec![ + layout.main_layout.clone(), + volumetric_view_bind_group_layout.clone(), + ]; + + RenderPipelineDescriptor { + label: Some("volumetric lighting pipeline".into()), + layout, + vertex: VertexState { + shader: self.shader.clone(), + shader_defs: shader_defs.clone(), + buffers: vec![vertex_format], + ..default() + }, + primitive: PrimitiveState { + cull_mode: Some(Face::Back), + ..default() + }, + fragment: Some(FragmentState { + shader: self.shader.clone(), + shader_defs, + targets: vec![Some(ColorTargetState { + format: if key.flags.contains(VolumetricFogPipelineKeyFlags::HDR) { + ViewTarget::TEXTURE_FORMAT_HDR + } else { + TextureFormat::bevy_default() + }, + // Blend on top of what's already in the framebuffer. Doing + // the alpha blending with the hardware blender allows us to + // avoid having to use intermediate render targets. + blend: Some(BlendState { + color: BlendComponent { + src_factor: BlendFactor::One, + dst_factor: BlendFactor::OneMinusSrcAlpha, + operation: BlendOperation::Add, + }, + alpha: BlendComponent { + src_factor: BlendFactor::Zero, + dst_factor: BlendFactor::One, + operation: BlendOperation::Add, + }, + }), + write_mask: ColorWrites::ALL, + })], + ..default() + }), + ..default() + } + } +} + +/// Specializes volumetric fog pipelines for all views with that effect enabled. +pub fn prepare_volumetric_fog_pipelines( + mut commands: Commands, + pipeline_cache: Res, + mut pipelines: ResMut>, + volumetric_lighting_pipeline: Res, + fog_assets: Res, + view_targets: Query< + ( + Entity, + &ExtractedView, + &Msaa, + Has, + Has, + Has, + Has, + ), + With, + >, + meshes: Res>, +) { + let Some(plane_mesh) = meshes.get(&fog_assets.plane_mesh) else { + // There's an off chance that the mesh won't be prepared yet if `RenderAssetBytesPerFrame` limiting is in use. + return; + }; + + for ( + entity, + view, + msaa, + normal_prepass, + depth_prepass, + motion_vector_prepass, + deferred_prepass, + ) in view_targets.iter() + { + // Create a mesh pipeline view layout key corresponding to the view. + let mut mesh_pipeline_view_key = MeshPipelineViewLayoutKey::from(*msaa); + mesh_pipeline_view_key.set(MeshPipelineViewLayoutKey::NORMAL_PREPASS, normal_prepass); + mesh_pipeline_view_key.set(MeshPipelineViewLayoutKey::DEPTH_PREPASS, depth_prepass); + mesh_pipeline_view_key.set( + MeshPipelineViewLayoutKey::MOTION_VECTOR_PREPASS, + motion_vector_prepass, + ); + mesh_pipeline_view_key.set( + MeshPipelineViewLayoutKey::DEFERRED_PREPASS, + deferred_prepass, + ); + + let mut textureless_flags = VolumetricFogPipelineKeyFlags::empty(); + textureless_flags.set(VolumetricFogPipelineKeyFlags::HDR, view.hdr); + + // Specialize the pipeline. + let textureless_pipeline_key = VolumetricFogPipelineKey { + mesh_pipeline_view_key, + vertex_buffer_layout: plane_mesh.layout.clone(), + flags: textureless_flags, + }; + let textureless_pipeline_id = pipelines.specialize( + &pipeline_cache, + &volumetric_lighting_pipeline, + textureless_pipeline_key.clone(), + ); + let textured_pipeline_id = pipelines.specialize( + &pipeline_cache, + &volumetric_lighting_pipeline, + VolumetricFogPipelineKey { + flags: textureless_pipeline_key.flags + | VolumetricFogPipelineKeyFlags::DENSITY_TEXTURE, + ..textureless_pipeline_key + }, + ); + + commands.entity(entity).insert(ViewVolumetricFogPipelines { + textureless: textureless_pipeline_id, + textured: textured_pipeline_id, + }); + } +} + +/// A system that converts [`VolumetricFog`] into [`VolumetricFogUniform`]s. +pub fn prepare_volumetric_fog_uniforms( + mut commands: Commands, + mut volumetric_lighting_uniform_buffer: ResMut, + view_targets: Query<(Entity, &ExtractedView, &VolumetricFog)>, + fog_volumes: Query<(Entity, &FogVolume, &GlobalTransform)>, + render_device: Res, + render_queue: Res, + mut local_from_world_matrices: Local>, +) { + // Do this up front to avoid O(n^2) matrix inversion. + local_from_world_matrices.clear(); + for (_, _, fog_transform) in fog_volumes.iter() { + local_from_world_matrices.push(fog_transform.affine().inverse()); + } + + let uniform_count = view_targets.iter().len() * local_from_world_matrices.len(); + + let Some(mut writer) = + volumetric_lighting_uniform_buffer.get_writer(uniform_count, &render_device, &render_queue) + else { + return; + }; + + for (view_entity, extracted_view, volumetric_fog) in view_targets.iter() { + let world_from_view = extracted_view.world_from_view.affine(); + + let mut view_fog_volumes = vec![]; + + for ((_, fog_volume, _), local_from_world) in + fog_volumes.iter().zip(local_from_world_matrices.iter()) + { + // Calculate the transforms to and from 1×1×1 local space. + let local_from_view = *local_from_world * world_from_view; + let view_from_local = local_from_view.inverse(); + + // Determine whether the camera is inside or outside the volume, and + // calculate the clip space transform. + let interior = camera_is_inside_fog_volume(&local_from_view); + let hull_clip_from_local = calculate_fog_volume_clip_from_local_transforms( + interior, + &extracted_view.clip_from_view, + &view_from_local, + ); + + // Calculate the radius of the sphere that bounds the fog volume. + let bounding_radius = view_from_local + .transform_vector3a(Vec3A::splat(0.5)) + .length(); + + // Write out our uniform. + let uniform_buffer_offset = writer.write(&VolumetricFogUniform { + clip_from_local: hull_clip_from_local, + uvw_from_world: UVW_FROM_LOCAL * *local_from_world, + far_planes: get_far_planes(&view_from_local), + fog_color: fog_volume.fog_color.to_linear().to_vec3(), + light_tint: fog_volume.light_tint.to_linear().to_vec3(), + ambient_color: volumetric_fog.ambient_color.to_linear().to_vec3(), + ambient_intensity: volumetric_fog.ambient_intensity, + step_count: volumetric_fog.step_count, + bounding_radius, + absorption: fog_volume.absorption, + scattering: fog_volume.scattering, + density: fog_volume.density_factor, + density_texture_offset: fog_volume.density_texture_offset, + scattering_asymmetry: fog_volume.scattering_asymmetry, + light_intensity: fog_volume.light_intensity, + jitter_strength: volumetric_fog.jitter, + }); + + view_fog_volumes.push(ViewFogVolume { + uniform_buffer_offset, + exterior: !interior, + density_texture: fog_volume.density_texture.as_ref().map(Handle::id), + }); + } + + commands + .entity(view_entity) + .insert(ViewVolumetricFog(view_fog_volumes)); + } +} + +/// A system that marks all view depth textures as readable in shaders. +/// +/// The volumetric lighting pass needs to do this, and it doesn't happen by +/// default. +pub fn prepare_view_depth_textures_for_volumetric_fog( + mut view_targets: Query<&mut Camera3d>, + fog_volumes: Query<&VolumetricFog>, +) { + if fog_volumes.is_empty() { + return; + } + + for mut camera in view_targets.iter_mut() { + camera.depth_texture_usages.0 |= TextureUsages::TEXTURE_BINDING.bits(); + } +} + +fn get_far_planes(view_from_local: &Affine3A) -> [Vec4; 3] { + let (mut far_planes, mut next_index) = ([Vec4::ZERO; 3], 0); + + for &local_normal in &[ + Vec3A::X, + Vec3A::NEG_X, + Vec3A::Y, + Vec3A::NEG_Y, + Vec3A::Z, + Vec3A::NEG_Z, + ] { + let view_normal = view_from_local + .transform_vector3a(local_normal) + .normalize_or_zero(); + if view_normal.z <= 0.0 { + continue; + } + + let view_position = view_from_local.transform_point3a(-local_normal * 0.5); + let plane_coords = view_normal.extend(-view_normal.dot(view_position)); + + far_planes[next_index] = plane_coords; + next_index += 1; + if next_index == far_planes.len() { + continue; + } + } + + far_planes +} + +impl VolumetricFogBindGroupLayoutKey { + /// Creates an appropriate debug description for the bind group layout with + /// these flags. + fn bind_group_layout_description(&self) -> String { + if self.is_empty() { + return "volumetric lighting view bind group layout".to_owned(); + } + + format!( + "volumetric lighting view bind group layout ({})", + self.iter() + .filter_map(|flag| { + if flag == VolumetricFogBindGroupLayoutKey::DENSITY_TEXTURE { + Some("density texture") + } else if flag == VolumetricFogBindGroupLayoutKey::MULTISAMPLED { + Some("multisampled") + } else { + None + } + }) + .collect::>() + .join(", ") + ) + } +} + +/// Given the transform from the view to the 1×1×1 cube in local fog volume +/// space, returns true if the camera is inside the volume. +fn camera_is_inside_fog_volume(local_from_view: &Affine3A) -> bool { + local_from_view + .translation + .abs() + .cmple(Vec3A::splat(0.5)) + .all() +} + +/// Given the local transforms, returns the matrix that transforms model space +/// to clip space. +fn calculate_fog_volume_clip_from_local_transforms( + interior: bool, + clip_from_view: &Mat4, + view_from_local: &Affine3A, +) -> Mat4 { + if !interior { + return *clip_from_view * Mat4::from(*view_from_local); + } + + // If the camera is inside the fog volume, then we'll be rendering a full + // screen quad. The shader will start its raymarch at the fragment depth + // value, however, so we need to make sure that the depth of the full screen + // quad is at the near clip plane `z_near`. + let z_near = clip_from_view.w_axis[2]; + Mat4::from_cols( + vec4(z_near, 0.0, 0.0, 0.0), + vec4(0.0, z_near, 0.0, 0.0), + vec4(0.0, 0.0, 0.0, 0.0), + vec4(0.0, 0.0, z_near, z_near), + ) +} diff --git a/crates/libmarathon/src/render/pbr/volumetric_fog/volumetric_fog.wgsl b/crates/libmarathon/src/render/pbr/volumetric_fog/volumetric_fog.wgsl new file mode 100644 index 0000000..43e3fc9 --- /dev/null +++ b/crates/libmarathon/src/render/pbr/volumetric_fog/volumetric_fog.wgsl @@ -0,0 +1,486 @@ +// A postprocessing shader that implements volumetric fog via raymarching and +// sampling directional light shadow maps. +// +// The overall approach is a combination of the volumetric rendering in [1] and +// the shadow map raymarching in [2]. First, we raytrace the AABB of the fog +// volume in order to determine how long our ray is. Then we do a raymarch, with +// physically-based calculations at each step to determine how much light was +// absorbed, scattered out, and scattered in. To determine in-scattering, we +// sample the shadow map for the light to determine whether the point was in +// shadow or not. +// +// [1]: https://www.scratchapixel.com/lessons/3d-basic-rendering/volume-rendering-for-developers/intro-volume-rendering.html +// +// [2]: http://www.alexandre-pestana.com/volumetric-lights/ + +#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput +#import bevy_pbr::mesh_functions::{get_world_from_local, mesh_position_local_to_clip} +#import bevy_pbr::mesh_view_bindings::{globals, lights, view, clusterable_objects} +#import bevy_pbr::mesh_view_types::{ + DIRECTIONAL_LIGHT_FLAGS_VOLUMETRIC_BIT, + POINT_LIGHT_FLAGS_SHADOWS_ENABLED_BIT, + POINT_LIGHT_FLAGS_VOLUMETRIC_BIT, + POINT_LIGHT_FLAGS_SPOT_LIGHT_Y_NEGATIVE, + ClusterableObject +} +#import bevy_pbr::shadow_sampling::{ + sample_shadow_map_hardware, + sample_shadow_cubemap, + sample_shadow_map, + SPOT_SHADOW_TEXEL_SIZE +} +#import bevy_pbr::shadows::{get_cascade_index, world_to_directional_light_local} +#import bevy_pbr::utils::interleaved_gradient_noise +#import bevy_pbr::view_transformations::{ + depth_ndc_to_view_z, + frag_coord_to_ndc, + position_ndc_to_view, + position_ndc_to_world, + position_view_to_world +} +#import bevy_pbr::clustered_forward as clustering +#import bevy_pbr::lighting::getDistanceAttenuation; + +// The GPU version of [`VolumetricFog`]. See the comments in +// `volumetric_fog/mod.rs` for descriptions of the fields here. +struct VolumetricFog { + clip_from_local: mat4x4, + uvw_from_world: mat4x4, + far_planes: array, 3>, + fog_color: vec3, + light_tint: vec3, + ambient_color: vec3, + ambient_intensity: f32, + step_count: u32, + bounding_radius: f32, + absorption: f32, + scattering: f32, + density_factor: f32, + density_texture_offset: vec3, + scattering_asymmetry: f32, + light_intensity: f32, + jitter_strength: f32, +} + +@group(1) @binding(0) var volumetric_fog: VolumetricFog; + +#ifdef MULTISAMPLED +@group(1) @binding(1) var depth_texture: texture_depth_multisampled_2d; +#else +@group(1) @binding(1) var depth_texture: texture_depth_2d; +#endif + +#ifdef DENSITY_TEXTURE +@group(1) @binding(2) var density_texture: texture_3d; +@group(1) @binding(3) var density_sampler: sampler; +#endif // DENSITY_TEXTURE + +// 1 / (4π) +const FRAC_4_PI: f32 = 0.07957747154594767; + +struct Vertex { + @builtin(instance_index) instance_index: u32, + @location(0) position: vec3, +} + +@vertex +fn vertex(vertex: Vertex) -> @builtin(position) vec4 { + return volumetric_fog.clip_from_local * vec4(vertex.position, 1.0); +} + +// The common Henyey-Greenstein asymmetric phase function [1] [2]. +// +// This determines how much light goes toward the viewer as opposed to away from +// the viewer. From a visual point of view, it controls how the light shafts +// appear and disappear as the camera looks at the light source. +// +// [1]: https://www.scratchapixel.com/lessons/3d-basic-rendering/volume-rendering-for-developers/ray-marching-get-it-right.html +// +// [2]: https://www.pbr-book.org/4ed/Volume_Scattering/Phase_Functions#TheHenyeyndashGreensteinPhaseFunction +fn henyey_greenstein(neg_LdotV: f32) -> f32 { + let g = volumetric_fog.scattering_asymmetry; + let denom = 1.0 + g * g - 2.0 * g * neg_LdotV; + return FRAC_4_PI * (1.0 - g * g) / (denom * sqrt(denom)); +} + +@fragment +fn fragment(@builtin(position) position: vec4) -> @location(0) vec4 { + // Unpack the `volumetric_fog` settings. + let uvw_from_world = volumetric_fog.uvw_from_world; + let fog_color = volumetric_fog.fog_color; + let ambient_color = volumetric_fog.ambient_color; + let ambient_intensity = volumetric_fog.ambient_intensity; + let step_count = volumetric_fog.step_count; + let bounding_radius = volumetric_fog.bounding_radius; + let absorption = volumetric_fog.absorption; + let scattering = volumetric_fog.scattering; + let density_factor = volumetric_fog.density_factor; + let density_texture_offset = volumetric_fog.density_texture_offset; + let light_tint = volumetric_fog.light_tint; + let light_intensity = volumetric_fog.light_intensity; + let jitter_strength = volumetric_fog.jitter_strength; + + // Unpack the view. + let exposure = view.exposure; + + // Sample the depth to put an upper bound on the length of the ray (as we + // shouldn't trace through solid objects). If this is multisample, just use + // sample 0; this is approximate but good enough. + let frag_coord = position; + let ndc_end_depth_from_buffer = textureLoad(depth_texture, vec2(frag_coord.xy), 0); + let view_end_depth_from_buffer = -position_ndc_to_view( + frag_coord_to_ndc(vec4(position.xy, ndc_end_depth_from_buffer, 1.0))).z; + + // Calculate the start position of the ray. Since we're only rendering front + // faces of the AABB, this is the current fragment's depth. + let view_start_pos = position_ndc_to_view(frag_coord_to_ndc(frag_coord)); + + // Calculate the end position of the ray. This requires us to raytrace the + // three back faces of the AABB to find the one that our ray intersects. + var end_depth_view = 0.0; + for (var plane_index = 0; plane_index < 3; plane_index += 1) { + let plane = volumetric_fog.far_planes[plane_index]; + let other_plane_a = volumetric_fog.far_planes[(plane_index + 1) % 3]; + let other_plane_b = volumetric_fog.far_planes[(plane_index + 2) % 3]; + + // Calculate the intersection of the ray and the plane. The ray must + // intersect in front of us (t > 0). + let t = -plane.w / dot(plane.xyz, view_start_pos.xyz); + if (t < 0.0) { + continue; + } + let hit_pos = view_start_pos.xyz * t; + + // The intersection point must be in front of the other backfaces. + let other_sides = vec2( + dot(vec4(hit_pos, 1.0), other_plane_a) >= 0.0, + dot(vec4(hit_pos, 1.0), other_plane_b) >= 0.0 + ); + + // If those tests pass, we found our backface. + if (all(other_sides)) { + end_depth_view = -hit_pos.z; + break; + } + } + + // Starting at the end depth, which we got above, figure out how long the + // ray we want to trace is and the length of each increment. + end_depth_view = min(end_depth_view, view_end_depth_from_buffer); + + // We assume world and view have the same scale here. + let start_depth_view = -depth_ndc_to_view_z(frag_coord.z); + let ray_length_view = abs(end_depth_view - start_depth_view); + let inv_step_count = 1.0 / f32(step_count); + let step_size_world = ray_length_view * inv_step_count; + + let directional_light_count = lights.n_directional_lights; + + // Calculate the ray origin (`Ro`) and the ray direction (`Rd`) in NDC, + // view, and world coordinates. + let Rd_ndc = vec3(frag_coord_to_ndc(position).xy, 1.0); + let Rd_view = normalize(position_ndc_to_view(Rd_ndc)); + var Ro_world = position_view_to_world(view_start_pos.xyz); + let Rd_world = normalize(position_ndc_to_world(Rd_ndc) - view.world_position); + + // Offset by jitter. + let jitter = interleaved_gradient_noise(position.xy, globals.frame_count) * jitter_strength; + Ro_world += Rd_world * jitter; + + // Use Beer's law [1] [2] to calculate the maximum amount of light that each + // directional light could contribute, and modulate that value by the light + // tint and fog color. (The actual value will in turn be modulated by the + // phase according to the Henyey-Greenstein formula.) + // + // [1]: https://www.scratchapixel.com/lessons/3d-basic-rendering/volume-rendering-for-developers/intro-volume-rendering.html + // + // [2]: https://en.wikipedia.org/wiki/Beer%E2%80%93Lambert_law + + // Use Beer's law again to accumulate the ambient light all along the path. + var accumulated_color = exp(-ray_length_view * (absorption + scattering)) * ambient_color * + ambient_intensity; + + // This is the amount of the background that shows through. We're actually + // going to recompute this over and over again for each directional light, + // coming up with the same values each time. + var background_alpha = 1.0; + + // If we have a density texture, transform to its local space. +#ifdef DENSITY_TEXTURE + let Ro_uvw = (uvw_from_world * vec4(Ro_world, 1.0)).xyz; + let Rd_step_uvw = mat3x3(uvw_from_world[0].xyz, uvw_from_world[1].xyz, uvw_from_world[2].xyz) * + (Rd_world * step_size_world); +#endif // DENSITY_TEXTURE + + for (var light_index = 0u; light_index < directional_light_count; light_index += 1u) { + // Volumetric lights are all sorted first, so the first time we come to + // a non-volumetric light, we know we've seen them all. + let light = &lights.directional_lights[light_index]; + if (((*light).flags & DIRECTIONAL_LIGHT_FLAGS_VOLUMETRIC_BIT) == 0) { + break; + } + + // Offset the depth value by the bias. + let depth_offset = (*light).shadow_depth_bias * (*light).direction_to_light.xyz; + + // Compute phase, which determines the fraction of light that's + // scattered toward the camera instead of away from it. + let neg_LdotV = dot(normalize((*light).direction_to_light.xyz), Rd_world); + let phase = henyey_greenstein(neg_LdotV); + + // Reset `background_alpha` for a new raymarch. + background_alpha = 1.0; + + // Start raymarching. + for (var step = 0u; step < step_count; step += 1u) { + // As an optimization, break if we've gotten too dark. + if (background_alpha < 0.001) { + break; + } + + // Calculate where we are in the ray. + let P_world = Ro_world + Rd_world * f32(step) * step_size_world; + let P_view = Rd_view * f32(step) * step_size_world; + + var density = density_factor; +#ifdef DENSITY_TEXTURE + // Take the density texture into account, if there is one. + // + // The uvs should never go outside the (0, 0, 0) to (1, 1, 1) box, + // but sometimes due to floating point error they can. Handle this + // case. + let P_uvw = Ro_uvw + Rd_step_uvw * f32(step); + if (all(P_uvw >= vec3(0.0)) && all(P_uvw <= vec3(1.0))) { + density *= textureSampleLevel(density_texture, density_sampler, P_uvw + density_texture_offset, 0.0).r; + } else { + density = 0.0; + } +#endif // DENSITY_TEXTURE + + // Calculate absorption (amount of light absorbed by the fog) and + // out-scattering (amount of light the fog scattered away). + let sample_attenuation = exp(-step_size_world * density * (absorption + scattering)); + + // Process absorption and out-scattering. + background_alpha *= sample_attenuation; + + // Compute in-scattering (amount of light other fog particles + // scattered into this ray). This is where any directional light is + // scattered in. + + // Prepare to sample the shadow map. + let cascade_index = get_cascade_index(light_index, P_view.z); + let light_local = world_to_directional_light_local( + light_index, + cascade_index, + vec4(P_world + depth_offset, 1.0) + ); + + // If we're outside the shadow map entirely, local light attenuation + // is zero. + var local_light_attenuation = f32(light_local.w != 0.0); + + // Otherwise, sample the shadow map to determine whether, and by how + // much, this sample is in the light. + if (local_light_attenuation != 0.0) { + let cascade = &(*light).cascades[cascade_index]; + let array_index = i32((*light).depth_texture_base_index + cascade_index); + local_light_attenuation = + sample_shadow_map_hardware(light_local.xy, light_local.z, array_index); + } + + if (local_light_attenuation != 0.0) { + let light_attenuation = exp(-density * bounding_radius * (absorption + scattering)); + let light_factors_per_step = fog_color * light_tint * light_attenuation * + scattering * density * step_size_world * light_intensity * exposure; + + // Modulate the factor we calculated above by the phase, fog color, + // light color, light tint. + let light_color_per_step = (*light).color.rgb * phase * light_factors_per_step; + + // Accumulate the light. + accumulated_color += light_color_per_step * local_light_attenuation * + background_alpha; + } + } + } + + // Point lights and Spot lights + let view_z = view_start_pos.z; + let is_orthographic = view.clip_from_view[3].w == 1.0; + let cluster_index = clustering::fragment_cluster_index(frag_coord.xy, view_z, is_orthographic); + var clusterable_object_index_ranges = + clustering::unpack_clusterable_object_index_ranges(cluster_index); + for (var i: u32 = clusterable_object_index_ranges.first_point_light_index_offset; + i < clusterable_object_index_ranges.first_reflection_probe_index_offset; + i = i + 1u) { + let light_id = clustering::get_clusterable_object_id(i); + let light = &clusterable_objects.data[light_id]; + if (((*light).flags & POINT_LIGHT_FLAGS_VOLUMETRIC_BIT) == 0) { + continue; + } + + // Reset `background_alpha` for a new raymarch. + background_alpha = 1.0; + + // Start raymarching. + for (var step = 0u; step < step_count; step += 1u) { + // As an optimization, break if we've gotten too dark. + if (background_alpha < 0.001) { + break; + } + + // Calculate where we are in the ray. + let P_world = Ro_world + Rd_world * f32(step) * step_size_world; + let P_view = Rd_view * f32(step) * step_size_world; + + var density = density_factor; + + let light_to_frag = (*light).position_radius.xyz - P_world; + let V = Rd_world; + let L = normalize(light_to_frag); + let distance_square = dot(light_to_frag, light_to_frag); + let distance_atten = getDistanceAttenuation(distance_square, (*light).color_inverse_square_range.w); + var local_light_attenuation = distance_atten; + if (i < clusterable_object_index_ranges.first_spot_light_index_offset) { + var shadow: f32 = 1.0; + if (((*light).flags & POINT_LIGHT_FLAGS_SHADOWS_ENABLED_BIT) != 0u) { + shadow = fetch_point_shadow_without_normal(light_id, vec4(P_world, 1.0)); + } + local_light_attenuation *= shadow; + } else { + // spot light attenuation + // reconstruct spot dir from x/z and y-direction flag + var spot_dir = vec3((*light).light_custom_data.x, 0.0, (*light).light_custom_data.y); + spot_dir.y = sqrt(max(0.0, 1.0 - spot_dir.x * spot_dir.x - spot_dir.z * spot_dir.z)); + if ((*light).flags & POINT_LIGHT_FLAGS_SPOT_LIGHT_Y_NEGATIVE) != 0u { + spot_dir.y = -spot_dir.y; + } + let light_to_frag = (*light).position_radius.xyz - P_world; + + // calculate attenuation based on filament formula https://google.github.io/filament/Filament.html#listing_glslpunctuallight + // spot_scale and spot_offset have been precomputed + // note we normalize here to get "l" from the filament listing. spot_dir is already normalized + let cd = dot(-spot_dir, normalize(light_to_frag)); + let attenuation = saturate(cd * (*light).light_custom_data.z + (*light).light_custom_data.w); + let spot_attenuation = attenuation * attenuation; + + var shadow: f32 = 1.0; + if (((*light).flags & POINT_LIGHT_FLAGS_SHADOWS_ENABLED_BIT) != 0u) { + shadow = fetch_spot_shadow_without_normal(light_id, vec4(P_world, 1.0)); + } + local_light_attenuation *= spot_attenuation * shadow; + } + + // Calculate absorption (amount of light absorbed by the fog) and + // out-scattering (amount of light the fog scattered away). + let sample_attenuation = exp(-step_size_world * density * (absorption + scattering)); + + // Process absorption and out-scattering. + background_alpha *= sample_attenuation; + + let light_attenuation = exp(-density * bounding_radius * (absorption + scattering)); + let light_factors_per_step = fog_color * light_tint * light_attenuation * + scattering * density * step_size_world * light_intensity * 0.1; + + // Modulate the factor we calculated above by the phase, fog color, + // light color, light tint. + let light_color_per_step = (*light).color_inverse_square_range.rgb * light_factors_per_step; + + // Accumulate the light. + accumulated_color += light_color_per_step * local_light_attenuation * + background_alpha; + } + } + + // We're done! Return the color with alpha so it can be blended onto the + // render target. + return vec4(accumulated_color, 1.0 - background_alpha); +} + +fn fetch_point_shadow_without_normal(light_id: u32, frag_position: vec4) -> f32 { + let light = &clusterable_objects.data[light_id]; + + // because the shadow maps align with the axes and the frustum planes are at 45 degrees + // we can get the worldspace depth by taking the largest absolute axis + let surface_to_light = (*light).position_radius.xyz - frag_position.xyz; + let surface_to_light_abs = abs(surface_to_light); + let distance_to_light = max(surface_to_light_abs.x, max(surface_to_light_abs.y, surface_to_light_abs.z)); + + // The normal bias here is already scaled by the texel size at 1 world unit from the light. + // The texel size increases proportionally with distance from the light so multiplying by + // distance to light scales the normal bias to the texel size at the fragment distance. + let depth_offset = (*light).shadow_depth_bias * normalize(surface_to_light.xyz); + let offset_position = frag_position.xyz + depth_offset; + + // similar largest-absolute-axis trick as above, but now with the offset fragment position + let frag_ls = offset_position.xyz - (*light).position_radius.xyz ; + let abs_position_ls = abs(frag_ls); + let major_axis_magnitude = max(abs_position_ls.x, max(abs_position_ls.y, abs_position_ls.z)); + + // NOTE: These simplifications come from multiplying: + // projection * vec4(0, 0, -major_axis_magnitude, 1.0) + // and keeping only the terms that have any impact on the depth. + // Projection-agnostic approach: + let zw = -major_axis_magnitude * (*light).light_custom_data.xy + (*light).light_custom_data.zw; + let depth = zw.x / zw.y; + + // Do the lookup, using HW PCF and comparison. Cubemaps assume a left-handed coordinate space, + // so we have to flip the z-axis when sampling. + let flip_z = vec3(1.0, 1.0, -1.0); + return sample_shadow_cubemap(frag_ls * flip_z, distance_to_light, depth, light_id); +} + +fn fetch_spot_shadow_without_normal(light_id: u32, frag_position: vec4) -> f32 { + let light = &clusterable_objects.data[light_id]; + + let surface_to_light = (*light).position_radius.xyz - frag_position.xyz; + + // construct the light view matrix + var spot_dir = vec3((*light).light_custom_data.x, 0.0, (*light).light_custom_data.y); + // reconstruct spot dir from x/z and y-direction flag + spot_dir.y = sqrt(max(0.0, 1.0 - spot_dir.x * spot_dir.x - spot_dir.z * spot_dir.z)); + if (((*light).flags & POINT_LIGHT_FLAGS_SPOT_LIGHT_Y_NEGATIVE) != 0u) { + spot_dir.y = -spot_dir.y; + } + + // view matrix z_axis is the reverse of transform.forward() + let fwd = -spot_dir; + let offset_position = + -surface_to_light + + ((*light).shadow_depth_bias * normalize(surface_to_light)); + + // the construction of the up and right vectors needs to precisely mirror the code + // in render/light.rs:spot_light_view_matrix + var sign = -1.0; + if (fwd.z >= 0.0) { + sign = 1.0; + } + let a = -1.0 / (fwd.z + sign); + let b = fwd.x * fwd.y * a; + let up_dir = vec3(1.0 + sign * fwd.x * fwd.x * a, sign * b, -sign * fwd.x); + let right_dir = vec3(-b, -sign - fwd.y * fwd.y * a, fwd.y); + let light_inv_rot = mat3x3(right_dir, up_dir, fwd); + + // because the matrix is a pure rotation matrix, the inverse is just the transpose, and to calculate + // the product of the transpose with a vector we can just post-multiply instead of pre-multiplying. + // this allows us to keep the matrix construction code identical between CPU and GPU. + let projected_position = offset_position * light_inv_rot; + + // divide xy by perspective matrix "f" and by -projected.z (projected.z is -projection matrix's w) + // to get ndc coordinates + let f_div_minus_z = 1.0 / ((*light).spot_light_tan_angle * -projected_position.z); + let shadow_xy_ndc = projected_position.xy * f_div_minus_z; + // convert to uv coordinates + let shadow_uv = shadow_xy_ndc * vec2(0.5, -0.5) + vec2(0.5, 0.5); + + // 0.1 must match POINT_LIGHT_NEAR_Z + let depth = 0.1 / -projected_position.z; + + return sample_shadow_map( + shadow_uv, + depth, + i32(light_id) + lights.spot_light_shadowmap_offset, + SPOT_SHADOW_TEXEL_SIZE + ); +} \ No newline at end of file diff --git a/crates/libmarathon/src/render/pbr/wireframe.rs b/crates/libmarathon/src/render/pbr/wireframe.rs new file mode 100644 index 0000000..2dbf92b --- /dev/null +++ b/crates/libmarathon/src/render/pbr/wireframe.rs @@ -0,0 +1,915 @@ +use crate::render::pbr::{ + DrawMesh, MeshPipeline, MeshPipelineKey, RenderMeshInstanceFlags, RenderMeshInstances, + SetMeshBindGroup, SetMeshViewBindGroup, SetMeshViewBindingArrayBindGroup, ViewKeyCache, + ViewSpecializationTicks, +}; +use bevy_app::{App, Plugin, PostUpdate, Startup, Update}; +use bevy_asset::{ + embedded_asset, load_embedded_asset, prelude::AssetChanged, AsAssetId, Asset, AssetApp, + AssetEventSystems, AssetId, AssetServer, Assets, Handle, UntypedAssetId, +}; +use bevy_camera::{visibility::ViewVisibility, Camera, Camera3d}; +use bevy_color::{Color, ColorToComponents}; +use crate::render::core_3d::graph::{Core3d, Node3d}; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::{ + component::Tick, + prelude::*, + query::QueryItem, + system::{lifetimeless::SRes, SystemChangeTick, SystemParamItem}, +}; +use bevy_mesh::{Mesh3d, MeshVertexBufferLayoutRef}; +use bevy_platform::{ + collections::{HashMap, HashSet}, + hash::FixedHasher, +}; +use bevy_reflect::{std_traits::ReflectDefault, Reflect}; +use crate::render::{ + batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport}, + camera::{extract_cameras, ExtractedCamera}, + diagnostic::RecordDiagnostics, + extract_resource::ExtractResource, + mesh::{ + allocator::{MeshAllocator, SlabId}, + RenderMesh, + }, + prelude::*, + render_asset::{ + prepare_assets, PrepareAssetError, RenderAsset, RenderAssetPlugin, RenderAssets, + }, + render_graph::{NodeRunError, RenderGraphContext, RenderGraphExt, ViewNode, ViewNodeRunner}, + render_phase::{ + AddRenderCommand, BinnedPhaseItem, BinnedRenderPhasePlugin, BinnedRenderPhaseType, + CachedRenderPipelinePhaseItem, DrawFunctionId, DrawFunctions, PhaseItem, + PhaseItemBatchSetKey, PhaseItemExtraIndex, RenderCommand, RenderCommandResult, + SetItemPipeline, TrackedRenderPass, ViewBinnedRenderPhases, + }, + render_resource::*, + renderer::{RenderContext, RenderDevice}, + sync_world::{MainEntity, MainEntityHashMap}, + view::{ + ExtractedView, NoIndirectDrawing, RenderVisibilityRanges, RenderVisibleEntities, + RetainedViewEntity, ViewDepthTexture, ViewTarget, + }, + Extract, Render, RenderApp, RenderDebugFlags, RenderStartup, RenderSystems, +}; +use bevy_shader::Shader; +use core::{hash::Hash, ops::Range}; +use tracing::{error, warn}; + +/// A [`Plugin`] that draws wireframes. +/// +/// Wireframes currently do not work when using webgl or webgpu. +/// Supported rendering backends: +/// - DX12 +/// - Vulkan +/// - Metal +/// +/// This is a native only feature. +#[derive(Debug, Default)] +pub struct WireframePlugin { + /// Debugging flags that can optionally be set when constructing the renderer. + pub debug_flags: RenderDebugFlags, +} + +impl WireframePlugin { + /// Creates a new [`WireframePlugin`] with the given debug flags. + pub fn new(debug_flags: RenderDebugFlags) -> Self { + Self { debug_flags } + } +} + +impl Plugin for WireframePlugin { + fn build(&self, app: &mut App) { + embedded_asset!(app, "render/wireframe.wgsl"); + + app.add_plugins(( + BinnedRenderPhasePlugin::::new(self.debug_flags), + RenderAssetPlugin::::default(), + )) + .init_asset::() + .init_resource::>() + .init_resource::() + .init_resource::() + .add_systems(Startup, setup_global_wireframe_material) + .add_systems( + Update, + ( + global_color_changed.run_if(resource_changed::), + wireframe_color_changed, + // Run `apply_global_wireframe_material` after `apply_wireframe_material` so that the global + // wireframe setting is applied to a mesh on the same frame its wireframe marker component is removed. + (apply_wireframe_material, apply_global_wireframe_material).chain(), + ), + ) + .add_systems( + PostUpdate, + check_wireframe_entities_needing_specialization + .after(AssetEventSystems) + .run_if(resource_exists::), + ); + } + + fn finish(&self, app: &mut App) { + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + let required_features = WgpuFeatures::POLYGON_MODE_LINE | WgpuFeatures::PUSH_CONSTANTS; + let render_device = render_app.world().resource::(); + if !render_device.features().contains(required_features) { + warn!( + "WireframePlugin not loaded. GPU lacks support for required features: {:?}.", + required_features + ); + return; + } + + render_app + .init_resource::() + .init_resource::() + .init_resource::>() + .add_render_command::() + .init_resource::() + .init_resource::>() + .add_render_graph_node::>(Core3d, Node3d::Wireframe) + .add_render_graph_edges( + Core3d, + ( + Node3d::EndMainPass, + Node3d::Wireframe, + Node3d::PostProcessing, + ), + ) + .add_systems(RenderStartup, init_wireframe_3d_pipeline) + .add_systems( + ExtractSchedule, + ( + extract_wireframe_3d_camera, + extract_wireframe_entities_needing_specialization.after(extract_cameras), + extract_wireframe_materials, + ), + ) + .add_systems( + Render, + ( + specialize_wireframes + .in_set(RenderSystems::PrepareMeshes) + .after(prepare_assets::) + .after(prepare_assets::), + queue_wireframes + .in_set(RenderSystems::QueueMeshes) + .after(prepare_assets::), + ), + ); + } +} + +/// Enables wireframe rendering for any entity it is attached to. +/// It will ignore the [`WireframeConfig`] global setting. +/// +/// This requires the [`WireframePlugin`] to be enabled. +#[derive(Component, Debug, Clone, Default, Reflect, Eq, PartialEq)] +#[reflect(Component, Default, Debug, PartialEq)] +pub struct Wireframe; + +pub struct Wireframe3d { + /// Determines which objects can be placed into a *batch set*. + /// + /// Objects in a single batch set can potentially be multi-drawn together, + /// if it's enabled and the current platform supports it. + pub batch_set_key: Wireframe3dBatchSetKey, + /// The key, which determines which can be batched. + pub bin_key: Wireframe3dBinKey, + /// An entity from which data will be fetched, including the mesh if + /// applicable. + pub representative_entity: (Entity, MainEntity), + /// The ranges of instances. + pub batch_range: Range, + /// An extra index, which is either a dynamic offset or an index in the + /// indirect parameters list. + pub extra_index: PhaseItemExtraIndex, +} + +impl PhaseItem for Wireframe3d { + fn entity(&self) -> Entity { + self.representative_entity.0 + } + + fn main_entity(&self) -> MainEntity { + self.representative_entity.1 + } + + fn draw_function(&self) -> DrawFunctionId { + self.batch_set_key.draw_function + } + + fn batch_range(&self) -> &Range { + &self.batch_range + } + + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index.clone() + } + + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) + } +} + +impl CachedRenderPipelinePhaseItem for Wireframe3d { + fn cached_pipeline(&self) -> CachedRenderPipelineId { + self.batch_set_key.pipeline + } +} + +impl BinnedPhaseItem for Wireframe3d { + type BinKey = Wireframe3dBinKey; + type BatchSetKey = Wireframe3dBatchSetKey; + + fn new( + batch_set_key: Self::BatchSetKey, + bin_key: Self::BinKey, + representative_entity: (Entity, MainEntity), + batch_range: Range, + extra_index: PhaseItemExtraIndex, + ) -> Self { + Self { + batch_set_key, + bin_key, + representative_entity, + batch_range, + extra_index, + } + } +} + +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Wireframe3dBatchSetKey { + /// The identifier of the render pipeline. + pub pipeline: CachedRenderPipelineId, + + /// The wireframe material asset ID. + pub asset_id: UntypedAssetId, + + /// The function used to draw. + pub draw_function: DrawFunctionId, + /// The ID of the slab of GPU memory that contains vertex data. + /// + /// For non-mesh items, you can fill this with 0 if your items can be + /// multi-drawn, or with a unique value if they can't. + pub vertex_slab: SlabId, + + /// The ID of the slab of GPU memory that contains index data, if present. + /// + /// For non-mesh items, you can safely fill this with `None`. + pub index_slab: Option, +} + +impl PhaseItemBatchSetKey for Wireframe3dBatchSetKey { + fn indexed(&self) -> bool { + self.index_slab.is_some() + } +} + +/// Data that must be identical in order to *batch* phase items together. +/// +/// Note that a *batch set* (if multi-draw is in use) contains multiple batches. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Wireframe3dBinKey { + /// The wireframe mesh asset ID. + pub asset_id: UntypedAssetId, +} + +pub struct SetWireframe3dPushConstants; + +impl RenderCommand

for SetWireframe3dPushConstants { + type Param = ( + SRes, + SRes>, + ); + type ViewQuery = (); + type ItemQuery = (); + + #[inline] + fn render<'w>( + item: &P, + _view: (), + _item_query: Option<()>, + (wireframe_instances, wireframe_assets): SystemParamItem<'w, '_, Self::Param>, + pass: &mut TrackedRenderPass<'w>, + ) -> RenderCommandResult { + let Some(wireframe_material) = wireframe_instances.get(&item.main_entity()) else { + return RenderCommandResult::Failure("No wireframe material found for entity"); + }; + let Some(wireframe_material) = wireframe_assets.get(*wireframe_material) else { + return RenderCommandResult::Failure("No wireframe material found for entity"); + }; + + pass.set_push_constants( + ShaderStages::FRAGMENT, + 0, + bytemuck::bytes_of(&wireframe_material.color), + ); + RenderCommandResult::Success + } +} + +pub type DrawWireframe3d = ( + SetItemPipeline, + SetMeshViewBindGroup<0>, + SetMeshViewBindingArrayBindGroup<1>, + SetMeshBindGroup<2>, + SetWireframe3dPushConstants, + DrawMesh, +); + +#[derive(Resource, Clone)] +pub struct Wireframe3dPipeline { + mesh_pipeline: MeshPipeline, + shader: Handle, +} + +pub fn init_wireframe_3d_pipeline( + mut commands: Commands, + mesh_pipeline: Res, + asset_server: Res, +) { + commands.insert_resource(Wireframe3dPipeline { + mesh_pipeline: mesh_pipeline.clone(), + shader: load_embedded_asset!(asset_server.as_ref(), "render/wireframe.wgsl"), + }); +} + +impl SpecializedMeshPipeline for Wireframe3dPipeline { + type Key = MeshPipelineKey; + + fn specialize( + &self, + key: Self::Key, + layout: &MeshVertexBufferLayoutRef, + ) -> Result { + let mut descriptor = self.mesh_pipeline.specialize(key, layout)?; + descriptor.label = Some("wireframe_3d_pipeline".into()); + descriptor.push_constant_ranges.push(PushConstantRange { + stages: ShaderStages::FRAGMENT, + range: 0..16, + }); + let fragment = descriptor.fragment.as_mut().unwrap(); + fragment.shader = self.shader.clone(); + descriptor.primitive.polygon_mode = PolygonMode::Line; + descriptor.depth_stencil.as_mut().unwrap().bias.slope_scale = 1.0; + Ok(descriptor) + } +} + +#[derive(Default)] +struct Wireframe3dNode; +impl ViewNode for Wireframe3dNode { + type ViewQuery = ( + &'static ExtractedCamera, + &'static ExtractedView, + &'static ViewTarget, + &'static ViewDepthTexture, + ); + + fn run<'w>( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + (camera, view, target, depth): QueryItem<'w, '_, Self::ViewQuery>, + world: &'w World, + ) -> Result<(), NodeRunError> { + let Some(wireframe_phase) = world.get_resource::>() + else { + return Ok(()); + }; + + let Some(wireframe_phase) = wireframe_phase.get(&view.retained_view_entity) else { + return Ok(()); + }; + + let diagnostics = render_context.diagnostic_recorder(); + + let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some("wireframe_3d"), + color_attachments: &[Some(target.get_color_attachment())], + depth_stencil_attachment: Some(depth.get_attachment(StoreOp::Store)), + timestamp_writes: None, + occlusion_query_set: None, + }); + let pass_span = diagnostics.pass_span(&mut render_pass, "wireframe_3d"); + + if let Some(viewport) = camera.viewport.as_ref() { + render_pass.set_camera_viewport(viewport); + } + + if let Err(err) = wireframe_phase.render(&mut render_pass, world, graph.view_entity()) { + error!("Error encountered while rendering the stencil phase {err:?}"); + return Err(NodeRunError::DrawError(err)); + } + + pass_span.end(&mut render_pass); + + Ok(()) + } +} + +/// Sets the color of the [`Wireframe`] of the entity it is attached to. +/// +/// If this component is present but there's no [`Wireframe`] component, +/// it will still affect the color of the wireframe when [`WireframeConfig::global`] is set to true. +/// +/// This overrides the [`WireframeConfig::default_color`]. +#[derive(Component, Debug, Clone, Default, Reflect)] +#[reflect(Component, Default, Debug)] +pub struct WireframeColor { + pub color: Color, +} + +#[derive(Component, Debug, Clone, Default)] +pub struct ExtractedWireframeColor { + pub color: [f32; 4], +} + +/// Disables wireframe rendering for any entity it is attached to. +/// It will ignore the [`WireframeConfig`] global setting. +/// +/// This requires the [`WireframePlugin`] to be enabled. +#[derive(Component, Debug, Clone, Default, Reflect, Eq, PartialEq)] +#[reflect(Component, Default, Debug, PartialEq)] +pub struct NoWireframe; + +#[derive(Resource, Debug, Clone, Default, ExtractResource, Reflect)] +#[reflect(Resource, Debug, Default)] +pub struct WireframeConfig { + /// Whether to show wireframes for all meshes. + /// Can be overridden for individual meshes by adding a [`Wireframe`] or [`NoWireframe`] component. + pub global: bool, + /// If [`Self::global`] is set, any [`Entity`] that does not have a [`Wireframe`] component attached to it will have + /// wireframes using this color. Otherwise, this will be the fallback color for any entity that has a [`Wireframe`], + /// but no [`WireframeColor`]. + pub default_color: Color, +} + +#[derive(Asset, Reflect, Clone, Debug, Default)] +#[reflect(Clone, Default)] +pub struct WireframeMaterial { + pub color: Color, +} + +pub struct RenderWireframeMaterial { + pub color: [f32; 4], +} + +#[derive(Component, Clone, Debug, Default, Deref, DerefMut, Reflect, PartialEq, Eq)] +#[reflect(Component, Default, Clone, PartialEq)] +pub struct Mesh3dWireframe(pub Handle); + +impl AsAssetId for Mesh3dWireframe { + type Asset = WireframeMaterial; + + fn as_asset_id(&self) -> AssetId { + self.0.id() + } +} + +impl RenderAsset for RenderWireframeMaterial { + type SourceAsset = WireframeMaterial; + type Param = (); + + fn prepare_asset( + source_asset: Self::SourceAsset, + _asset_id: AssetId, + _param: &mut SystemParamItem, + _previous_asset: Option<&Self>, + ) -> Result> { + Ok(RenderWireframeMaterial { + color: source_asset.color.to_linear().to_f32_array(), + }) + } +} + +#[derive(Resource, Deref, DerefMut, Default)] +pub struct RenderWireframeInstances(MainEntityHashMap>); + +#[derive(Clone, Resource, Deref, DerefMut, Debug, Default)] +pub struct WireframeEntitiesNeedingSpecialization { + #[deref] + pub entities: Vec, +} + +#[derive(Resource, Deref, DerefMut, Clone, Debug, Default)] +pub struct WireframeEntitySpecializationTicks { + pub entities: MainEntityHashMap, +} + +/// Stores the [`SpecializedWireframeViewPipelineCache`] for each view. +#[derive(Resource, Deref, DerefMut, Default)] +pub struct SpecializedWireframePipelineCache { + // view entity -> view pipeline cache + #[deref] + map: HashMap, +} + +/// Stores the cached render pipeline ID for each entity in a single view, as +/// well as the last time it was changed. +#[derive(Deref, DerefMut, Default)] +pub struct SpecializedWireframeViewPipelineCache { + // material entity -> (tick, pipeline_id) + #[deref] + map: MainEntityHashMap<(Tick, CachedRenderPipelineId)>, +} + +#[derive(Resource)] +struct GlobalWireframeMaterial { + // This handle will be reused when the global config is enabled + handle: Handle, +} + +pub fn extract_wireframe_materials( + mut material_instances: ResMut, + changed_meshes_query: Extract< + Query< + (Entity, &ViewVisibility, &Mesh3dWireframe), + Or<(Changed, Changed)>, + >, + >, + mut removed_visibilities_query: Extract>, + mut removed_materials_query: Extract>, +) { + for (entity, view_visibility, material) in &changed_meshes_query { + if view_visibility.get() { + material_instances.insert(entity.into(), material.id()); + } else { + material_instances.remove(&MainEntity::from(entity)); + } + } + + for entity in removed_visibilities_query + .read() + .chain(removed_materials_query.read()) + { + // Only queue a mesh for removal if we didn't pick it up above. + // It's possible that a necessary component was removed and re-added in + // the same frame. + if !changed_meshes_query.contains(entity) { + material_instances.remove(&MainEntity::from(entity)); + } + } +} + +fn setup_global_wireframe_material( + mut commands: Commands, + mut materials: ResMut>, + config: Res, +) { + // Create the handle used for the global material + commands.insert_resource(GlobalWireframeMaterial { + handle: materials.add(WireframeMaterial { + color: config.default_color, + }), + }); +} + +/// Updates the wireframe material of all entities without a [`WireframeColor`] or without a [`Wireframe`] component +fn global_color_changed( + config: Res, + mut materials: ResMut>, + global_material: Res, +) { + if let Some(global_material) = materials.get_mut(&global_material.handle) { + global_material.color = config.default_color; + } +} + +/// Updates the wireframe material when the color in [`WireframeColor`] changes +fn wireframe_color_changed( + mut materials: ResMut>, + mut colors_changed: Query< + (&mut Mesh3dWireframe, &WireframeColor), + (With, Changed), + >, +) { + for (mut handle, wireframe_color) in &mut colors_changed { + handle.0 = materials.add(WireframeMaterial { + color: wireframe_color.color, + }); + } +} + +/// Applies or remove the wireframe material to any mesh with a [`Wireframe`] component, and removes it +/// for any mesh with a [`NoWireframe`] component. +fn apply_wireframe_material( + mut commands: Commands, + mut materials: ResMut>, + wireframes: Query< + (Entity, Option<&WireframeColor>), + (With, Without), + >, + no_wireframes: Query, With)>, + mut removed_wireframes: RemovedComponents, + global_material: Res, +) { + for e in removed_wireframes.read().chain(no_wireframes.iter()) { + if let Ok(mut commands) = commands.get_entity(e) { + commands.remove::(); + } + } + + let mut material_to_spawn = vec![]; + for (e, maybe_color) in &wireframes { + let material = get_wireframe_material(maybe_color, &mut materials, &global_material); + material_to_spawn.push((e, Mesh3dWireframe(material))); + } + commands.try_insert_batch(material_to_spawn); +} + +type WireframeFilter = (With, Without, Without); + +/// Applies or removes a wireframe material on any mesh without a [`Wireframe`] or [`NoWireframe`] component. +fn apply_global_wireframe_material( + mut commands: Commands, + config: Res, + meshes_without_material: Query< + (Entity, Option<&WireframeColor>), + (WireframeFilter, Without), + >, + meshes_with_global_material: Query)>, + global_material: Res, + mut materials: ResMut>, +) { + if config.global { + let mut material_to_spawn = vec![]; + for (e, maybe_color) in &meshes_without_material { + let material = get_wireframe_material(maybe_color, &mut materials, &global_material); + // We only add the material handle but not the Wireframe component + // This makes it easy to detect which mesh is using the global material and which ones are user specified + material_to_spawn.push((e, Mesh3dWireframe(material))); + } + commands.try_insert_batch(material_to_spawn); + } else { + for e in &meshes_with_global_material { + commands.entity(e).remove::(); + } + } +} + +/// Gets a handle to a wireframe material with a fallback on the default material +fn get_wireframe_material( + maybe_color: Option<&WireframeColor>, + wireframe_materials: &mut Assets, + global_material: &GlobalWireframeMaterial, +) -> Handle { + if let Some(wireframe_color) = maybe_color { + wireframe_materials.add(WireframeMaterial { + color: wireframe_color.color, + }) + } else { + // If there's no color specified we can use the global material since it's already set to use the default_color + global_material.handle.clone() + } +} + +fn extract_wireframe_3d_camera( + mut wireframe_3d_phases: ResMut>, + cameras: Extract), With>>, + mut live_entities: Local>, + gpu_preprocessing_support: Res, +) { + live_entities.clear(); + for (main_entity, camera, no_indirect_drawing) in &cameras { + if !camera.is_active { + continue; + } + let gpu_preprocessing_mode = gpu_preprocessing_support.min(if !no_indirect_drawing { + GpuPreprocessingMode::Culling + } else { + GpuPreprocessingMode::PreprocessingOnly + }); + + let retained_view_entity = RetainedViewEntity::new(main_entity.into(), None, 0); + wireframe_3d_phases.prepare_for_new_frame(retained_view_entity, gpu_preprocessing_mode); + live_entities.insert(retained_view_entity); + } + + // Clear out all dead views. + wireframe_3d_phases.retain(|camera_entity, _| live_entities.contains(camera_entity)); +} + +pub fn extract_wireframe_entities_needing_specialization( + entities_needing_specialization: Extract>, + mut entity_specialization_ticks: ResMut, + views: Query<&ExtractedView>, + mut specialized_wireframe_pipeline_cache: ResMut, + mut removed_meshes_query: Extract>, + ticks: SystemChangeTick, +) { + for entity in entities_needing_specialization.iter() { + // Update the entity's specialization tick with this run's tick + entity_specialization_ticks.insert((*entity).into(), ticks.this_run()); + } + + for entity in removed_meshes_query.read() { + for view in &views { + if let Some(specialized_wireframe_pipeline_cache) = + specialized_wireframe_pipeline_cache.get_mut(&view.retained_view_entity) + { + specialized_wireframe_pipeline_cache.remove(&MainEntity::from(entity)); + } + } + } +} + +pub fn check_wireframe_entities_needing_specialization( + needs_specialization: Query< + Entity, + Or<( + Changed, + AssetChanged, + Changed, + AssetChanged, + )>, + >, + mut entities_needing_specialization: ResMut, +) { + entities_needing_specialization.clear(); + for entity in &needs_specialization { + entities_needing_specialization.push(entity); + } +} + +pub fn specialize_wireframes( + render_meshes: Res>, + render_mesh_instances: Res, + render_wireframe_instances: Res, + render_visibility_ranges: Res, + wireframe_phases: Res>, + views: Query<(&ExtractedView, &RenderVisibleEntities)>, + view_key_cache: Res, + entity_specialization_ticks: Res, + view_specialization_ticks: Res, + mut specialized_material_pipeline_cache: ResMut, + mut pipelines: ResMut>, + pipeline: Res, + pipeline_cache: Res, + ticks: SystemChangeTick, +) { + // Record the retained IDs of all views so that we can expire old + // pipeline IDs. + let mut all_views: HashSet = HashSet::default(); + + for (view, visible_entities) in &views { + all_views.insert(view.retained_view_entity); + + if !wireframe_phases.contains_key(&view.retained_view_entity) { + continue; + } + + let Some(view_key) = view_key_cache.get(&view.retained_view_entity) else { + continue; + }; + + let view_tick = view_specialization_ticks + .get(&view.retained_view_entity) + .unwrap(); + let view_specialized_material_pipeline_cache = specialized_material_pipeline_cache + .entry(view.retained_view_entity) + .or_default(); + + for (_, visible_entity) in visible_entities.iter::() { + if !render_wireframe_instances.contains_key(visible_entity) { + continue; + }; + let Some(mesh_instance) = render_mesh_instances.render_mesh_queue_data(*visible_entity) + else { + continue; + }; + let entity_tick = entity_specialization_ticks.get(visible_entity).unwrap(); + let last_specialized_tick = view_specialized_material_pipeline_cache + .get(visible_entity) + .map(|(tick, _)| *tick); + let needs_specialization = last_specialized_tick.is_none_or(|tick| { + view_tick.is_newer_than(tick, ticks.this_run()) + || entity_tick.is_newer_than(tick, ticks.this_run()) + }); + if !needs_specialization { + continue; + } + let Some(mesh) = render_meshes.get(mesh_instance.mesh_asset_id) else { + continue; + }; + + let mut mesh_key = *view_key; + mesh_key |= MeshPipelineKey::from_primitive_topology(mesh.primitive_topology()); + + if render_visibility_ranges.entity_has_crossfading_visibility_ranges(*visible_entity) { + mesh_key |= MeshPipelineKey::VISIBILITY_RANGE_DITHER; + } + + if view_key.contains(MeshPipelineKey::MOTION_VECTOR_PREPASS) { + // If the previous frame have skins or morph targets, note that. + if mesh_instance + .flags + .contains(RenderMeshInstanceFlags::HAS_PREVIOUS_SKIN) + { + mesh_key |= MeshPipelineKey::HAS_PREVIOUS_SKIN; + } + if mesh_instance + .flags + .contains(RenderMeshInstanceFlags::HAS_PREVIOUS_MORPH) + { + mesh_key |= MeshPipelineKey::HAS_PREVIOUS_MORPH; + } + } + + let pipeline_id = + pipelines.specialize(&pipeline_cache, &pipeline, mesh_key, &mesh.layout); + let pipeline_id = match pipeline_id { + Ok(id) => id, + Err(err) => { + error!("{}", err); + continue; + } + }; + + view_specialized_material_pipeline_cache + .insert(*visible_entity, (ticks.this_run(), pipeline_id)); + } + } + + // Delete specialized pipelines belonging to views that have expired. + specialized_material_pipeline_cache + .retain(|retained_view_entity, _| all_views.contains(retained_view_entity)); +} + +fn queue_wireframes( + custom_draw_functions: Res>, + render_mesh_instances: Res, + gpu_preprocessing_support: Res, + mesh_allocator: Res, + specialized_wireframe_pipeline_cache: Res, + render_wireframe_instances: Res, + mut wireframe_3d_phases: ResMut>, + mut views: Query<(&ExtractedView, &RenderVisibleEntities)>, +) { + for (view, visible_entities) in &mut views { + let Some(wireframe_phase) = wireframe_3d_phases.get_mut(&view.retained_view_entity) else { + continue; + }; + let draw_wireframe = custom_draw_functions.read().id::(); + + let Some(view_specialized_material_pipeline_cache) = + specialized_wireframe_pipeline_cache.get(&view.retained_view_entity) + else { + continue; + }; + + for (render_entity, visible_entity) in visible_entities.iter::() { + let Some(wireframe_instance) = render_wireframe_instances.get(visible_entity) else { + continue; + }; + let Some((current_change_tick, pipeline_id)) = view_specialized_material_pipeline_cache + .get(visible_entity) + .map(|(current_change_tick, pipeline_id)| (*current_change_tick, *pipeline_id)) + else { + continue; + }; + + // Skip the entity if it's cached in a bin and up to date. + if wireframe_phase.validate_cached_entity(*visible_entity, current_change_tick) { + continue; + } + let Some(mesh_instance) = render_mesh_instances.render_mesh_queue_data(*visible_entity) + else { + continue; + }; + let (vertex_slab, index_slab) = mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id); + let bin_key = Wireframe3dBinKey { + asset_id: mesh_instance.mesh_asset_id.untyped(), + }; + let batch_set_key = Wireframe3dBatchSetKey { + pipeline: pipeline_id, + asset_id: wireframe_instance.untyped(), + draw_function: draw_wireframe, + vertex_slab: vertex_slab.unwrap_or_default(), + index_slab, + }; + wireframe_phase.add( + batch_set_key, + bin_key, + (*render_entity, *visible_entity), + mesh_instance.current_uniform_index, + BinnedRenderPhaseType::mesh( + mesh_instance.should_batch(), + &gpu_preprocessing_support, + ), + current_change_tick, + ); + } + } +} diff --git a/crates/libmarathon/src/render/pipelined_rendering.rs b/crates/libmarathon/src/render/pipelined_rendering.rs new file mode 100644 index 0000000..35ebc0b --- /dev/null +++ b/crates/libmarathon/src/render/pipelined_rendering.rs @@ -0,0 +1,204 @@ +use async_channel::{Receiver, Sender}; + +use bevy_app::{App, AppExit, AppLabel, Plugin, SubApp}; +use bevy_ecs::{ + resource::Resource, + schedule::MainThreadExecutor, + world::{Mut, World}, +}; +use bevy_tasks::ComputeTaskPool; + +use crate::render::RenderApp; + +/// A Label for the sub app that runs the parts of pipelined rendering that need to run on the main thread. +/// +/// The Main schedule of this app can be used to run logic after the render schedule starts, but +/// before I/O processing. This can be useful for something like frame pacing. +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, AppLabel)] +pub struct RenderExtractApp; + +/// Channels used by the main app to send and receive the render app. +#[derive(Resource)] +pub struct RenderAppChannels { + app_to_render_sender: Sender, + render_to_app_receiver: Receiver, + render_app_in_render_thread: bool, +} + +impl RenderAppChannels { + /// Create a `RenderAppChannels` from a [`async_channel::Receiver`] and [`async_channel::Sender`] + pub fn new( + app_to_render_sender: Sender, + render_to_app_receiver: Receiver, + ) -> Self { + Self { + app_to_render_sender, + render_to_app_receiver, + render_app_in_render_thread: false, + } + } + + /// Send the `render_app` to the rendering thread. + pub fn send_blocking(&mut self, render_app: SubApp) { + self.app_to_render_sender.send_blocking(render_app).unwrap(); + self.render_app_in_render_thread = true; + } + + /// Receive the `render_app` from the rendering thread. + /// Return `None` if the render thread has panicked. + pub async fn recv(&mut self) -> Option { + let render_app = self.render_to_app_receiver.recv().await.ok()?; + self.render_app_in_render_thread = false; + Some(render_app) + } +} + +impl Drop for RenderAppChannels { + fn drop(&mut self) { + if self.render_app_in_render_thread { + // Any non-send data in the render world was initialized on the main thread. + // So on dropping the main world and ending the app, we block and wait for + // the render world to return to drop it. Which allows the non-send data + // drop methods to run on the correct thread. + self.render_to_app_receiver.recv_blocking().ok(); + } + } +} + +/// The [`PipelinedRenderingPlugin`] can be added to your application to enable pipelined rendering. +/// +/// This moves rendering into a different thread, so that the Nth frame's rendering can +/// be run at the same time as the N + 1 frame's simulation. +/// +/// ```text +/// |--------------------|--------------------|--------------------|--------------------| +/// | simulation thread | frame 1 simulation | frame 2 simulation | frame 3 simulation | +/// |--------------------|--------------------|--------------------|--------------------| +/// | rendering thread | | frame 1 rendering | frame 2 rendering | +/// |--------------------|--------------------|--------------------|--------------------| +/// ``` +/// +/// The plugin is dependent on the [`RenderApp`] added by [`crate::RenderPlugin`] and so must +/// be added after that plugin. If it is not added after, the plugin will do nothing. +/// +/// A single frame of execution looks something like below +/// +/// ```text +/// |---------------------------------------------------------------------------| +/// | | | RenderExtractApp schedule | winit events | main schedule | +/// | sync | extract |----------------------------------------------------------| +/// | | | extract commands | rendering schedule | +/// |---------------------------------------------------------------------------| +/// ``` +/// +/// - `sync` is the step where the entity-entity mapping between the main and render world is updated. +/// This is run on the main app's thread. For more information checkout [`SyncWorldPlugin`]. +/// - `extract` is the step where data is copied from the main world to the render world. +/// This is run on the main app's thread. +/// - On the render thread, we first apply the `extract commands`. This is not run during extract, so the +/// main schedule can start sooner. +/// - Then the `rendering schedule` is run. See [`RenderSystems`](crate::RenderSystems) for the standard steps in this process. +/// - In parallel to the rendering thread the [`RenderExtractApp`] schedule runs. By +/// default, this schedule is empty. But it is useful if you need something to run before I/O processing. +/// - Next all the `winit events` are processed. +/// - And finally the `main app schedule` is run. +/// - Once both the `main app schedule` and the `render schedule` are finished running, `extract` is run again. +/// +/// [`SyncWorldPlugin`]: crate::sync_world::SyncWorldPlugin +#[derive(Default)] +pub struct PipelinedRenderingPlugin; + +impl Plugin for PipelinedRenderingPlugin { + fn build(&self, app: &mut App) { + // Don't add RenderExtractApp if RenderApp isn't initialized. + if app.get_sub_app(RenderApp).is_none() { + return; + } + app.insert_resource(MainThreadExecutor::new()); + + let mut sub_app = SubApp::new(); + sub_app.set_extract(renderer_extract); + app.insert_sub_app(RenderExtractApp, sub_app); + } + + // Sets up the render thread and inserts resources into the main app used for controlling the render thread. + fn cleanup(&self, app: &mut App) { + // skip setting up when headless + if app.get_sub_app(RenderExtractApp).is_none() { + return; + } + + let (app_to_render_sender, app_to_render_receiver) = async_channel::bounded::(1); + let (render_to_app_sender, render_to_app_receiver) = async_channel::bounded::(1); + + let mut render_app = app + .remove_sub_app(RenderApp) + .expect("Unable to get RenderApp. Another plugin may have removed the RenderApp before PipelinedRenderingPlugin"); + + // clone main thread executor to render world + let executor = app.world().get_resource::().unwrap(); + render_app.world_mut().insert_resource(executor.clone()); + + render_to_app_sender.send_blocking(render_app).unwrap(); + + app.insert_resource(RenderAppChannels::new( + app_to_render_sender, + render_to_app_receiver, + )); + + std::thread::spawn(move || { + #[cfg(feature = "trace")] + let _span = tracing::info_span!("render thread").entered(); + + let compute_task_pool = ComputeTaskPool::get(); + loop { + // run a scope here to allow main world to use this thread while it's waiting for the render app + let sent_app = compute_task_pool + .scope(|s| { + s.spawn(async { app_to_render_receiver.recv().await }); + }) + .pop(); + let Some(Ok(mut render_app)) = sent_app else { + break; + }; + + { + #[cfg(feature = "trace")] + let _sub_app_span = tracing::info_span!("sub app", name = ?RenderApp).entered(); + render_app.update(); + } + + if render_to_app_sender.send_blocking(render_app).is_err() { + break; + } + } + + tracing::debug!("exiting pipelined rendering thread"); + }); + } +} + +// This function waits for the rendering world to be received, +// runs extract, and then sends the rendering world back to the render thread. +fn renderer_extract(app_world: &mut World, _world: &mut World) { + app_world.resource_scope(|world, main_thread_executor: Mut| { + world.resource_scope(|world, mut render_channels: Mut| { + // we use a scope here to run any main thread tasks that the render world still needs to run + // while we wait for the render world to be received. + if let Some(mut render_app) = ComputeTaskPool::get() + .scope_with_executor(true, Some(&*main_thread_executor.0), |s| { + s.spawn(async { render_channels.recv().await }); + }) + .pop() + .unwrap() + { + render_app.extract(world); + + render_channels.send_blocking(render_app); + } else { + // Renderer thread panicked + world.write_message(AppExit::error()); + } + }); + }); +} diff --git a/crates/libmarathon/src/render/prepass/mod.rs b/crates/libmarathon/src/render/prepass/mod.rs new file mode 100644 index 0000000..35cb1c5 --- /dev/null +++ b/crates/libmarathon/src/render/prepass/mod.rs @@ -0,0 +1,383 @@ +//! Run a prepass before the main pass to generate depth, normals, and/or motion vectors textures, sometimes called a thin g-buffer. +//! These textures are useful for various screen-space effects and reducing overdraw in the main pass. +//! +//! The prepass only runs for opaque meshes or meshes with an alpha mask. Transparent meshes are ignored. +//! +//! To enable the prepass, you need to add a prepass component to a [`bevy_camera::Camera3d`]. +//! +//! [`DepthPrepass`] +//! [`NormalPrepass`] +//! [`MotionVectorPrepass`] +//! +//! The textures are automatically added to the default mesh view bindings. You can also get the raw textures +//! by querying the [`ViewPrepassTextures`] component on any camera with a prepass component. +//! +//! The depth prepass will always run and generate the depth buffer as a side effect, but it won't copy it +//! to a separate texture unless the [`DepthPrepass`] is activated. This means that if any prepass component is present +//! it will always create a depth buffer that will be used by the main pass. +//! +//! When using the default mesh view bindings you should be able to use `prepass_depth()`, +//! `prepass_normal()`, and `prepass_motion_vector()` to load the related textures. +//! These functions are defined in `bevy_pbr::prepass_utils`. See the `shader_prepass` example that shows how to use them. +//! +//! The prepass runs for each `Material`. You can control if the prepass should run per-material by setting the `prepass_enabled` +//! flag on the `MaterialPlugin`. +//! +//! Currently only works for 3D. + +pub mod node; + +use core::ops::Range; + +use crate::render::deferred::{DEFERRED_LIGHTING_PASS_ID_FORMAT, DEFERRED_PREPASS_FORMAT}; +use bevy_asset::UntypedAssetId; +use bevy_ecs::prelude::*; +use bevy_math::Mat4; +use bevy_reflect::{std_traits::ReflectDefault, Reflect}; +use crate::render::mesh::allocator::SlabId; +use crate::render::render_phase::PhaseItemBatchSetKey; +use crate::render::sync_world::MainEntity; +use crate::render::{ + render_phase::{ + BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItem, + PhaseItemExtraIndex, + }, + render_resource::{ + CachedRenderPipelineId, ColorTargetState, ColorWrites, DynamicUniformBuffer, Extent3d, + ShaderType, TextureFormat, TextureView, + }, + texture::ColorAttachment, +}; + +pub const NORMAL_PREPASS_FORMAT: TextureFormat = TextureFormat::Rgb10a2Unorm; +pub const MOTION_VECTOR_PREPASS_FORMAT: TextureFormat = TextureFormat::Rg16Float; + +/// If added to a [`bevy_camera::Camera3d`] then depth values will be copied to a separate texture available to the main pass. +#[derive(Component, Default, Reflect, Clone)] +#[reflect(Component, Default, Clone)] +pub struct DepthPrepass; + +/// If added to a [`bevy_camera::Camera3d`] then vertex world normals will be copied to a separate texture available to the main pass. +/// Normals will have normal map textures already applied. +#[derive(Component, Default, Reflect, Clone)] +#[reflect(Component, Default, Clone)] +pub struct NormalPrepass; + +/// If added to a [`bevy_camera::Camera3d`] then screen space motion vectors will be copied to a separate texture available to the main pass. +#[derive(Component, Default, Reflect, Clone)] +#[reflect(Component, Default, Clone)] +pub struct MotionVectorPrepass; + +/// If added to a [`bevy_camera::Camera3d`] then deferred materials will be rendered to the deferred gbuffer texture and will be available to subsequent passes. +/// Note the default deferred lighting plugin also requires `DepthPrepass` to work correctly. +#[derive(Component, Default, Reflect)] +#[reflect(Component, Default)] +pub struct DeferredPrepass; + +/// View matrices from the previous frame. +/// +/// Useful for temporal rendering techniques that need access to last frame's camera data. +#[derive(Component, ShaderType, Clone)] +pub struct PreviousViewData { + pub view_from_world: Mat4, + pub clip_from_world: Mat4, + pub clip_from_view: Mat4, + pub world_from_clip: Mat4, + pub view_from_clip: Mat4, +} + +#[derive(Resource, Default)] +pub struct PreviousViewUniforms { + pub uniforms: DynamicUniformBuffer, +} + +#[derive(Component)] +pub struct PreviousViewUniformOffset { + pub offset: u32, +} + +/// Textures that are written to by the prepass. +/// +/// This component will only be present if any of the relevant prepass components are also present. +#[derive(Component)] +pub struct ViewPrepassTextures { + /// The depth texture generated by the prepass. + /// Exists only if [`DepthPrepass`] is added to the [`ViewTarget`](bevy_render::view::ViewTarget) + pub depth: Option, + /// The normals texture generated by the prepass. + /// Exists only if [`NormalPrepass`] is added to the [`ViewTarget`](bevy_render::view::ViewTarget) + pub normal: Option, + /// The motion vectors texture generated by the prepass. + /// Exists only if [`MotionVectorPrepass`] is added to the `ViewTarget` + pub motion_vectors: Option, + /// The deferred gbuffer generated by the deferred pass. + /// Exists only if [`DeferredPrepass`] is added to the `ViewTarget` + pub deferred: Option, + /// A texture that specifies the deferred lighting pass id for a material. + /// Exists only if [`DeferredPrepass`] is added to the `ViewTarget` + pub deferred_lighting_pass_id: Option, + /// The size of the textures. + pub size: Extent3d, +} + +impl ViewPrepassTextures { + pub fn depth_view(&self) -> Option<&TextureView> { + self.depth.as_ref().map(|t| &t.texture.default_view) + } + + pub fn normal_view(&self) -> Option<&TextureView> { + self.normal.as_ref().map(|t| &t.texture.default_view) + } + + pub fn motion_vectors_view(&self) -> Option<&TextureView> { + self.motion_vectors + .as_ref() + .map(|t| &t.texture.default_view) + } + + pub fn deferred_view(&self) -> Option<&TextureView> { + self.deferred.as_ref().map(|t| &t.texture.default_view) + } +} + +/// Opaque phase of the 3D prepass. +/// +/// Sorted by pipeline, then by mesh to improve batching. +/// +/// Used to render all 3D meshes with materials that have no transparency. +pub struct Opaque3dPrepass { + /// Determines which objects can be placed into a *batch set*. + /// + /// Objects in a single batch set can potentially be multi-drawn together, + /// if it's enabled and the current platform supports it. + pub batch_set_key: OpaqueNoLightmap3dBatchSetKey, + /// Information that separates items into bins. + pub bin_key: OpaqueNoLightmap3dBinKey, + + /// An entity from which Bevy fetches data common to all instances in this + /// batch, such as the mesh. + pub representative_entity: (Entity, MainEntity), + pub batch_range: Range, + pub extra_index: PhaseItemExtraIndex, +} + +/// Information that must be identical in order to place opaque meshes in the +/// same *batch set* in the prepass and deferred pass. +/// +/// A batch set is a set of batches that can be multi-drawn together, if +/// multi-draw is in use. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct OpaqueNoLightmap3dBatchSetKey { + /// The ID of the GPU pipeline. + pub pipeline: CachedRenderPipelineId, + + /// The function used to draw the mesh. + pub draw_function: DrawFunctionId, + + /// The ID of a bind group specific to the material. + /// + /// In the case of PBR, this is the `MaterialBindGroupIndex`. + pub material_bind_group_index: Option, + + /// The ID of the slab of GPU memory that contains vertex data. + /// + /// For non-mesh items, you can fill this with 0 if your items can be + /// multi-drawn, or with a unique value if they can't. + pub vertex_slab: SlabId, + + /// The ID of the slab of GPU memory that contains index data, if present. + /// + /// For non-mesh items, you can safely fill this with `None`. + pub index_slab: Option, +} + +impl PhaseItemBatchSetKey for OpaqueNoLightmap3dBatchSetKey { + fn indexed(&self) -> bool { + self.index_slab.is_some() + } +} + +// TODO: Try interning these. +/// The data used to bin each opaque 3D object in the prepass and deferred pass. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct OpaqueNoLightmap3dBinKey { + /// The ID of the asset. + pub asset_id: UntypedAssetId, +} + +impl PhaseItem for Opaque3dPrepass { + #[inline] + fn entity(&self) -> Entity { + self.representative_entity.0 + } + + fn main_entity(&self) -> MainEntity { + self.representative_entity.1 + } + + #[inline] + fn draw_function(&self) -> DrawFunctionId { + self.batch_set_key.draw_function + } + + #[inline] + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + #[inline] + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index.clone() + } + + #[inline] + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) + } +} + +impl BinnedPhaseItem for Opaque3dPrepass { + type BatchSetKey = OpaqueNoLightmap3dBatchSetKey; + type BinKey = OpaqueNoLightmap3dBinKey; + + #[inline] + fn new( + batch_set_key: Self::BatchSetKey, + bin_key: Self::BinKey, + representative_entity: (Entity, MainEntity), + batch_range: Range, + extra_index: PhaseItemExtraIndex, + ) -> Self { + Opaque3dPrepass { + batch_set_key, + bin_key, + representative_entity, + batch_range, + extra_index, + } + } +} + +impl CachedRenderPipelinePhaseItem for Opaque3dPrepass { + #[inline] + fn cached_pipeline(&self) -> CachedRenderPipelineId { + self.batch_set_key.pipeline + } +} + +/// Alpha mask phase of the 3D prepass. +/// +/// Sorted by pipeline, then by mesh to improve batching. +/// +/// Used to render all meshes with a material with an alpha mask. +pub struct AlphaMask3dPrepass { + /// Determines which objects can be placed into a *batch set*. + /// + /// Objects in a single batch set can potentially be multi-drawn together, + /// if it's enabled and the current platform supports it. + pub batch_set_key: OpaqueNoLightmap3dBatchSetKey, + /// Information that separates items into bins. + pub bin_key: OpaqueNoLightmap3dBinKey, + pub representative_entity: (Entity, MainEntity), + pub batch_range: Range, + pub extra_index: PhaseItemExtraIndex, +} + +impl PhaseItem for AlphaMask3dPrepass { + #[inline] + fn entity(&self) -> Entity { + self.representative_entity.0 + } + + fn main_entity(&self) -> MainEntity { + self.representative_entity.1 + } + + #[inline] + fn draw_function(&self) -> DrawFunctionId { + self.batch_set_key.draw_function + } + + #[inline] + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + #[inline] + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index.clone() + } + + #[inline] + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) + } +} + +impl BinnedPhaseItem for AlphaMask3dPrepass { + type BatchSetKey = OpaqueNoLightmap3dBatchSetKey; + type BinKey = OpaqueNoLightmap3dBinKey; + + #[inline] + fn new( + batch_set_key: Self::BatchSetKey, + bin_key: Self::BinKey, + representative_entity: (Entity, MainEntity), + batch_range: Range, + extra_index: PhaseItemExtraIndex, + ) -> Self { + Self { + batch_set_key, + bin_key, + representative_entity, + batch_range, + extra_index, + } + } +} + +impl CachedRenderPipelinePhaseItem for AlphaMask3dPrepass { + #[inline] + fn cached_pipeline(&self) -> CachedRenderPipelineId { + self.batch_set_key.pipeline + } +} + +pub fn prepass_target_descriptors( + normal_prepass: bool, + motion_vector_prepass: bool, + deferred_prepass: bool, +) -> Vec> { + vec![ + normal_prepass.then_some(ColorTargetState { + format: NORMAL_PREPASS_FORMAT, + blend: None, + write_mask: ColorWrites::ALL, + }), + motion_vector_prepass.then_some(ColorTargetState { + format: MOTION_VECTOR_PREPASS_FORMAT, + blend: None, + write_mask: ColorWrites::ALL, + }), + deferred_prepass.then_some(ColorTargetState { + format: DEFERRED_PREPASS_FORMAT, + blend: None, + write_mask: ColorWrites::ALL, + }), + deferred_prepass.then_some(ColorTargetState { + format: DEFERRED_LIGHTING_PASS_ID_FORMAT, + blend: None, + write_mask: ColorWrites::ALL, + }), + ] +} diff --git a/crates/libmarathon/src/render/prepass/node.rs b/crates/libmarathon/src/render/prepass/node.rs new file mode 100644 index 0000000..662b271 --- /dev/null +++ b/crates/libmarathon/src/render/prepass/node.rs @@ -0,0 +1,255 @@ +use bevy_camera::{MainPassResolutionOverride, Viewport}; +use bevy_ecs::{prelude::*, query::QueryItem}; +use crate::render::{ + camera::ExtractedCamera, + diagnostic::RecordDiagnostics, + experimental::occlusion_culling::OcclusionCulling, + render_graph::{NodeRunError, RenderGraphContext, ViewNode}, + render_phase::{TrackedRenderPass, ViewBinnedRenderPhases}, + render_resource::{CommandEncoderDescriptor, PipelineCache, RenderPassDescriptor, StoreOp}, + renderer::RenderContext, + view::{ExtractedView, NoIndirectDrawing, ViewDepthTexture, ViewUniformOffset}, +}; +use tracing::error; +#[cfg(feature = "trace")] +use tracing::info_span; + +use crate::render::skybox::prepass::{RenderSkyboxPrepassPipeline, SkyboxPrepassBindGroup}; + +use super::{ + AlphaMask3dPrepass, DeferredPrepass, Opaque3dPrepass, PreviousViewUniformOffset, + ViewPrepassTextures, +}; + +/// The phase of the prepass that draws meshes that were visible last frame. +/// +/// If occlusion culling isn't in use, this prepass simply draws all meshes. +/// +/// Like all prepass nodes, this is inserted before the main pass in the render +/// graph. +#[derive(Default)] +pub struct EarlyPrepassNode; + +impl ViewNode for EarlyPrepassNode { + type ViewQuery = ::ViewQuery; + + fn run<'w>( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + view_query: QueryItem<'w, '_, Self::ViewQuery>, + world: &'w World, + ) -> Result<(), NodeRunError> { + run_prepass(graph, render_context, view_query, world, "early prepass") + } +} + +/// The phase of the prepass that runs after occlusion culling against the +/// meshes that were visible last frame. +/// +/// If occlusion culling isn't in use, this is a no-op. +/// +/// Like all prepass nodes, this is inserted before the main pass in the render +/// graph. +#[derive(Default)] +pub struct LatePrepassNode; + +impl ViewNode for LatePrepassNode { + type ViewQuery = ( + ( + &'static ExtractedCamera, + &'static ExtractedView, + &'static ViewDepthTexture, + &'static ViewPrepassTextures, + &'static ViewUniformOffset, + ), + ( + Option<&'static DeferredPrepass>, + Option<&'static RenderSkyboxPrepassPipeline>, + Option<&'static SkyboxPrepassBindGroup>, + Option<&'static PreviousViewUniformOffset>, + Option<&'static MainPassResolutionOverride>, + ), + ( + Has, + Has, + Has, + ), + ); + + fn run<'w>( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + query: QueryItem<'w, '_, Self::ViewQuery>, + world: &'w World, + ) -> Result<(), NodeRunError> { + // We only need a late prepass if we have occlusion culling and indirect + // drawing. + let (_, _, (occlusion_culling, no_indirect_drawing, _)) = query; + if !occlusion_culling || no_indirect_drawing { + return Ok(()); + } + + run_prepass(graph, render_context, query, world, "late prepass") + } +} + +/// Runs a prepass that draws all meshes to the depth buffer, and possibly +/// normal and motion vector buffers as well. +/// +/// If occlusion culling isn't in use, and a prepass is enabled, then there's +/// only one prepass. If occlusion culling is in use, then any prepass is split +/// into two: an *early* prepass and a *late* prepass. The early prepass draws +/// what was visible last frame, and the last prepass performs occlusion culling +/// against a conservative hierarchical Z buffer before drawing unoccluded +/// meshes. +fn run_prepass<'w>( + graph: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + ( + (camera, extracted_view, view_depth_texture, view_prepass_textures, view_uniform_offset), + ( + deferred_prepass, + skybox_prepass_pipeline, + skybox_prepass_bind_group, + view_prev_uniform_offset, + resolution_override, + ), + (_, _, has_deferred), + ): QueryItem<'w, '_, ::ViewQuery>, + world: &'w World, + label: &'static str, +) -> Result<(), NodeRunError> { + // If we're using deferred rendering, there will be a deferred prepass + // instead of this one. Just bail out so we don't have to bother looking at + // the empty bins. + if has_deferred { + return Ok(()); + } + + let (Some(opaque_prepass_phases), Some(alpha_mask_prepass_phases)) = ( + world.get_resource::>(), + world.get_resource::>(), + ) else { + return Ok(()); + }; + + let (Some(opaque_prepass_phase), Some(alpha_mask_prepass_phase)) = ( + opaque_prepass_phases.get(&extracted_view.retained_view_entity), + alpha_mask_prepass_phases.get(&extracted_view.retained_view_entity), + ) else { + return Ok(()); + }; + + let diagnostics = render_context.diagnostic_recorder(); + + let mut color_attachments = vec![ + view_prepass_textures + .normal + .as_ref() + .map(|normals_texture| normals_texture.get_attachment()), + view_prepass_textures + .motion_vectors + .as_ref() + .map(|motion_vectors_texture| motion_vectors_texture.get_attachment()), + // Use None in place of deferred attachments + None, + None, + ]; + + // If all color attachments are none: clear the color attachment list so that no fragment shader is required + if color_attachments.iter().all(Option::is_none) { + color_attachments.clear(); + } + + let depth_stencil_attachment = Some(view_depth_texture.get_attachment(StoreOp::Store)); + + let view_entity = graph.view_entity(); + render_context.add_command_buffer_generation_task(move |render_device| { + #[cfg(feature = "trace")] + let _prepass_span = info_span!("prepass").entered(); + + // Command encoder setup + let mut command_encoder = render_device.create_command_encoder(&CommandEncoderDescriptor { + label: Some("prepass_command_encoder"), + }); + + // Render pass setup + let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor { + label: Some(label), + color_attachments: &color_attachments, + depth_stencil_attachment, + timestamp_writes: None, + occlusion_query_set: None, + }); + + let mut render_pass = TrackedRenderPass::new(&render_device, render_pass); + let pass_span = diagnostics.pass_span(&mut render_pass, label); + + if let Some(viewport) = + Viewport::from_viewport_and_override(camera.viewport.as_ref(), resolution_override) + { + render_pass.set_camera_viewport(&viewport); + } + + // Opaque draws + if !opaque_prepass_phase.is_empty() { + #[cfg(feature = "trace")] + let _opaque_prepass_span = info_span!("opaque_prepass").entered(); + if let Err(err) = opaque_prepass_phase.render(&mut render_pass, world, view_entity) { + error!("Error encountered while rendering the opaque prepass phase {err:?}"); + } + } + + // Alpha masked draws + if !alpha_mask_prepass_phase.is_empty() { + #[cfg(feature = "trace")] + let _alpha_mask_prepass_span = info_span!("alpha_mask_prepass").entered(); + if let Err(err) = alpha_mask_prepass_phase.render(&mut render_pass, world, view_entity) + { + error!("Error encountered while rendering the alpha mask prepass phase {err:?}"); + } + } + + // Skybox draw using a fullscreen triangle + if let ( + Some(skybox_prepass_pipeline), + Some(skybox_prepass_bind_group), + Some(view_prev_uniform_offset), + ) = ( + skybox_prepass_pipeline, + skybox_prepass_bind_group, + view_prev_uniform_offset, + ) { + let pipeline_cache = world.resource::(); + if let Some(pipeline) = pipeline_cache.get_render_pipeline(skybox_prepass_pipeline.0) { + render_pass.set_render_pipeline(pipeline); + render_pass.set_bind_group( + 0, + &skybox_prepass_bind_group.0, + &[view_uniform_offset.offset, view_prev_uniform_offset.offset], + ); + render_pass.draw(0..3, 0..1); + } + } + + pass_span.end(&mut render_pass); + drop(render_pass); + + // After rendering to the view depth texture, copy it to the prepass depth texture if deferred isn't going to + if deferred_prepass.is_none() + && let Some(prepass_depth_texture) = &view_prepass_textures.depth + { + command_encoder.copy_texture_to_texture( + view_depth_texture.texture.as_image_copy(), + prepass_depth_texture.texture.texture.as_image_copy(), + view_prepass_textures.size, + ); + } + + command_encoder.finish() + }); + + Ok(()) +} diff --git a/crates/libmarathon/src/render/render_asset.rs b/crates/libmarathon/src/render/render_asset.rs new file mode 100644 index 0000000..7e443f4 --- /dev/null +++ b/crates/libmarathon/src/render/render_asset.rs @@ -0,0 +1,516 @@ +use crate::render::{ + render_resource::AsBindGroupError, Extract, ExtractSchedule, MainWorld, Render, RenderApp, + RenderSystems, +}; +use bevy_app::{App, Plugin, SubApp}; +use bevy_asset::{Asset, AssetEvent, AssetId, Assets, RenderAssetUsages}; +use bevy_ecs::{ + prelude::{Commands, IntoScheduleConfigs, MessageReader, Res, ResMut, Resource}, + schedule::{ScheduleConfigs, SystemSet}, + system::{ScheduleSystem, StaticSystemParam, SystemParam, SystemParamItem, SystemState}, + world::{FromWorld, Mut}, +}; +use bevy_platform::collections::{HashMap, HashSet}; +use core::marker::PhantomData; +use core::sync::atomic::{AtomicUsize, Ordering}; +use thiserror::Error; +use tracing::{debug, error}; + +#[derive(Debug, Error)] +pub enum PrepareAssetError { + #[error("Failed to prepare asset")] + RetryNextUpdate(E), + #[error("Failed to build bind group: {0}")] + AsBindGroupError(AsBindGroupError), +} + +/// The system set during which we extract modified assets to the render world. +#[derive(SystemSet, Clone, PartialEq, Eq, Debug, Hash)] +pub struct AssetExtractionSystems; + +/// Deprecated alias for [`AssetExtractionSystems`]. +#[deprecated(since = "0.17.0", note = "Renamed to `AssetExtractionSystems`.")] +pub type ExtractAssetsSet = AssetExtractionSystems; + +/// Describes how an asset gets extracted and prepared for rendering. +/// +/// In the [`ExtractSchedule`] step the [`RenderAsset::SourceAsset`] is transferred +/// from the "main world" into the "render world". +/// +/// After that in the [`RenderSystems::PrepareAssets`] step the extracted asset +/// is transformed into its GPU-representation of type [`RenderAsset`]. +pub trait RenderAsset: Send + Sync + 'static + Sized { + /// The representation of the asset in the "main world". + type SourceAsset: Asset + Clone; + + /// Specifies all ECS data required by [`RenderAsset::prepare_asset`]. + /// + /// For convenience use the [`lifetimeless`](bevy_ecs::system::lifetimeless) [`SystemParam`]. + type Param: SystemParam; + + /// Whether or not to unload the asset after extracting it to the render world. + #[inline] + fn asset_usage(_source_asset: &Self::SourceAsset) -> RenderAssetUsages { + RenderAssetUsages::default() + } + + /// Size of the data the asset will upload to the gpu. Specifying a return value + /// will allow the asset to be throttled via [`RenderAssetBytesPerFrame`]. + #[inline] + #[expect( + unused_variables, + reason = "The parameters here are intentionally unused by the default implementation; however, putting underscores here will result in the underscores being copied by rust-analyzer's tab completion." + )] + fn byte_len(source_asset: &Self::SourceAsset) -> Option { + None + } + + /// Prepares the [`RenderAsset::SourceAsset`] for the GPU by transforming it into a [`RenderAsset`]. + /// + /// ECS data may be accessed via `param`. + fn prepare_asset( + source_asset: Self::SourceAsset, + asset_id: AssetId, + param: &mut SystemParamItem, + previous_asset: Option<&Self>, + ) -> Result>; + + /// Called whenever the [`RenderAsset::SourceAsset`] has been removed. + /// + /// You can implement this method if you need to access ECS data (via + /// `_param`) in order to perform cleanup tasks when the asset is removed. + /// + /// The default implementation does nothing. + fn unload_asset( + _source_asset: AssetId, + _param: &mut SystemParamItem, + ) { + } +} + +/// This plugin extracts the changed assets from the "app world" into the "render world" +/// and prepares them for the GPU. They can then be accessed from the [`RenderAssets`] resource. +/// +/// Therefore it sets up the [`ExtractSchedule`] and +/// [`RenderSystems::PrepareAssets`] steps for the specified [`RenderAsset`]. +/// +/// The `AFTER` generic parameter can be used to specify that `A::prepare_asset` should not be run until +/// `prepare_assets::` has completed. This allows the `prepare_asset` function to depend on another +/// prepared [`RenderAsset`], for example `Mesh::prepare_asset` relies on `RenderAssets::` for morph +/// targets, so the plugin is created as `RenderAssetPlugin::::default()`. +pub struct RenderAssetPlugin { + phantom: PhantomData (A, AFTER)>, +} + +impl Default + for RenderAssetPlugin +{ + fn default() -> Self { + Self { + phantom: Default::default(), + } + } +} + +impl Plugin + for RenderAssetPlugin +{ + fn build(&self, app: &mut App) { + app.init_resource::>(); + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + render_app + .init_resource::>() + .init_resource::>() + .init_resource::>() + .add_systems( + ExtractSchedule, + extract_render_asset::.in_set(AssetExtractionSystems), + ); + AFTER::register_system( + render_app, + prepare_assets::.in_set(RenderSystems::PrepareAssets), + ); + } + } +} + +// helper to allow specifying dependencies between render assets +pub trait RenderAssetDependency { + fn register_system(render_app: &mut SubApp, system: ScheduleConfigs); +} + +impl RenderAssetDependency for () { + fn register_system(render_app: &mut SubApp, system: ScheduleConfigs) { + render_app.add_systems(Render, system); + } +} + +impl RenderAssetDependency for A { + fn register_system(render_app: &mut SubApp, system: ScheduleConfigs) { + render_app.add_systems(Render, system.after(prepare_assets::)); + } +} + +/// Temporarily stores the extracted and removed assets of the current frame. +#[derive(Resource)] +pub struct ExtractedAssets { + /// The assets extracted this frame. + /// + /// These are assets that were either added or modified this frame. + pub extracted: Vec<(AssetId, A::SourceAsset)>, + + /// IDs of the assets that were removed this frame. + /// + /// These assets will not be present in [`ExtractedAssets::extracted`]. + pub removed: HashSet>, + + /// IDs of the assets that were modified this frame. + pub modified: HashSet>, + + /// IDs of the assets that were added this frame. + pub added: HashSet>, +} + +impl Default for ExtractedAssets { + fn default() -> Self { + Self { + extracted: Default::default(), + removed: Default::default(), + modified: Default::default(), + added: Default::default(), + } + } +} + +/// Stores all GPU representations ([`RenderAsset`]) +/// of [`RenderAsset::SourceAsset`] as long as they exist. +#[derive(Resource)] +pub struct RenderAssets(HashMap, A>); + +impl Default for RenderAssets { + fn default() -> Self { + Self(Default::default()) + } +} + +impl RenderAssets { + pub fn get(&self, id: impl Into>) -> Option<&A> { + self.0.get(&id.into()) + } + + pub fn get_mut(&mut self, id: impl Into>) -> Option<&mut A> { + self.0.get_mut(&id.into()) + } + + pub fn insert(&mut self, id: impl Into>, value: A) -> Option { + self.0.insert(id.into(), value) + } + + pub fn remove(&mut self, id: impl Into>) -> Option { + self.0.remove(&id.into()) + } + + pub fn iter(&self) -> impl Iterator, &A)> { + self.0.iter().map(|(k, v)| (*k, v)) + } + + pub fn iter_mut(&mut self) -> impl Iterator, &mut A)> { + self.0.iter_mut().map(|(k, v)| (*k, v)) + } +} + +#[derive(Resource)] +struct CachedExtractRenderAssetSystemState { + state: SystemState<( + MessageReader<'static, 'static, AssetEvent>, + ResMut<'static, Assets>, + )>, +} + +impl FromWorld for CachedExtractRenderAssetSystemState { + fn from_world(world: &mut bevy_ecs::world::World) -> Self { + Self { + state: SystemState::new(world), + } + } +} + +/// This system extracts all created or modified assets of the corresponding [`RenderAsset::SourceAsset`] type +/// into the "render world". +pub(crate) fn extract_render_asset( + mut commands: Commands, + mut main_world: ResMut, +) { + main_world.resource_scope( + |world, mut cached_state: Mut>| { + let (mut events, mut assets) = cached_state.state.get_mut(world); + + let mut needs_extracting = >::default(); + let mut removed = >::default(); + let mut modified = >::default(); + + for event in events.read() { + #[expect( + clippy::match_same_arms, + reason = "LoadedWithDependencies is marked as a TODO, so it's likely this will no longer lint soon." + )] + match event { + AssetEvent::Added { id } => { + needs_extracting.insert(*id); + } + AssetEvent::Modified { id } => { + needs_extracting.insert(*id); + modified.insert(*id); + } + AssetEvent::Removed { .. } => { + // We don't care that the asset was removed from Assets in the main world. + // An asset is only removed from RenderAssets when its last handle is dropped (AssetEvent::Unused). + } + AssetEvent::Unused { id } => { + needs_extracting.remove(id); + modified.remove(id); + removed.insert(*id); + } + AssetEvent::LoadedWithDependencies { .. } => { + // TODO: handle this + } + } + } + + let mut extracted_assets = Vec::new(); + let mut added = >::default(); + for id in needs_extracting.drain() { + if let Some(asset) = assets.get(id) { + let asset_usage = A::asset_usage(asset); + if asset_usage.contains(RenderAssetUsages::RENDER_WORLD) { + if asset_usage == RenderAssetUsages::RENDER_WORLD { + if let Some(asset) = assets.remove(id) { + extracted_assets.push((id, asset)); + added.insert(id); + } + } else { + extracted_assets.push((id, asset.clone())); + added.insert(id); + } + } + } + } + + commands.insert_resource(ExtractedAssets:: { + extracted: extracted_assets, + removed, + modified, + added, + }); + cached_state.state.apply(world); + }, + ); +} + +// TODO: consider storing inside system? +/// All assets that should be prepared next frame. +#[derive(Resource)] +pub struct PrepareNextFrameAssets { + assets: Vec<(AssetId, A::SourceAsset)>, +} + +impl Default for PrepareNextFrameAssets { + fn default() -> Self { + Self { + assets: Default::default(), + } + } +} + +/// This system prepares all assets of the corresponding [`RenderAsset::SourceAsset`] type +/// which where extracted this frame for the GPU. +pub fn prepare_assets( + mut extracted_assets: ResMut>, + mut render_assets: ResMut>, + mut prepare_next_frame: ResMut>, + param: StaticSystemParam<::Param>, + bpf: Res, +) { + let mut wrote_asset_count = 0; + + let mut param = param.into_inner(); + let queued_assets = core::mem::take(&mut prepare_next_frame.assets); + for (id, extracted_asset) in queued_assets { + if extracted_assets.removed.contains(&id) || extracted_assets.added.contains(&id) { + // skip previous frame's assets that have been removed or updated + continue; + } + + let write_bytes = if let Some(size) = A::byte_len(&extracted_asset) { + // we could check if available bytes > byte_len here, but we want to make some + // forward progress even if the asset is larger than the max bytes per frame. + // this way we always write at least one (sized) asset per frame. + // in future we could also consider partial asset uploads. + if bpf.exhausted() { + prepare_next_frame.assets.push((id, extracted_asset)); + continue; + } + size + } else { + 0 + }; + + let previous_asset = render_assets.get(id); + match A::prepare_asset(extracted_asset, id, &mut param, previous_asset) { + Ok(prepared_asset) => { + render_assets.insert(id, prepared_asset); + bpf.write_bytes(write_bytes); + wrote_asset_count += 1; + } + Err(PrepareAssetError::RetryNextUpdate(extracted_asset)) => { + prepare_next_frame.assets.push((id, extracted_asset)); + } + Err(PrepareAssetError::AsBindGroupError(e)) => { + error!( + "{} Bind group construction failed: {e}", + core::any::type_name::() + ); + } + } + } + + for removed in extracted_assets.removed.drain() { + render_assets.remove(removed); + A::unload_asset(removed, &mut param); + } + + for (id, extracted_asset) in extracted_assets.extracted.drain(..) { + // we remove previous here to ensure that if we are updating the asset then + // any users will not see the old asset after a new asset is extracted, + // even if the new asset is not yet ready or we are out of bytes to write. + let previous_asset = render_assets.remove(id); + + let write_bytes = if let Some(size) = A::byte_len(&extracted_asset) { + if bpf.exhausted() { + prepare_next_frame.assets.push((id, extracted_asset)); + continue; + } + size + } else { + 0 + }; + + match A::prepare_asset(extracted_asset, id, &mut param, previous_asset.as_ref()) { + Ok(prepared_asset) => { + render_assets.insert(id, prepared_asset); + bpf.write_bytes(write_bytes); + wrote_asset_count += 1; + } + Err(PrepareAssetError::RetryNextUpdate(extracted_asset)) => { + prepare_next_frame.assets.push((id, extracted_asset)); + } + Err(PrepareAssetError::AsBindGroupError(e)) => { + error!( + "{} Bind group construction failed: {e}", + core::any::type_name::() + ); + } + } + } + + if bpf.exhausted() && !prepare_next_frame.assets.is_empty() { + debug!( + "{} write budget exhausted with {} assets remaining (wrote {})", + core::any::type_name::(), + prepare_next_frame.assets.len(), + wrote_asset_count + ); + } +} + +pub fn reset_render_asset_bytes_per_frame( + mut bpf_limiter: ResMut, +) { + bpf_limiter.reset(); +} + +pub fn extract_render_asset_bytes_per_frame( + bpf: Extract>, + mut bpf_limiter: ResMut, +) { + bpf_limiter.max_bytes = bpf.max_bytes; +} + +/// A resource that defines the amount of data allowed to be transferred from CPU to GPU +/// each frame, preventing choppy frames at the cost of waiting longer for GPU assets +/// to become available. +#[derive(Resource, Default)] +pub struct RenderAssetBytesPerFrame { + pub max_bytes: Option, +} + +impl RenderAssetBytesPerFrame { + /// `max_bytes`: the number of bytes to write per frame. + /// + /// This is a soft limit: only full assets are written currently, uploading stops + /// after the first asset that exceeds the limit. + /// + /// To participate, assets should implement [`RenderAsset::byte_len`]. If the default + /// is not overridden, the assets are assumed to be small enough to upload without restriction. + pub fn new(max_bytes: usize) -> Self { + Self { + max_bytes: Some(max_bytes), + } + } +} + +/// A render-world resource that facilitates limiting the data transferred from CPU to GPU +/// each frame, preventing choppy frames at the cost of waiting longer for GPU assets +/// to become available. +#[derive(Resource, Default)] +pub struct RenderAssetBytesPerFrameLimiter { + /// Populated by [`RenderAssetBytesPerFrame`] during extraction. + pub max_bytes: Option, + /// Bytes written this frame. + pub bytes_written: AtomicUsize, +} + +impl RenderAssetBytesPerFrameLimiter { + /// Reset the available bytes. Called once per frame during extraction by [`crate::RenderPlugin`]. + pub fn reset(&mut self) { + if self.max_bytes.is_none() { + return; + } + self.bytes_written.store(0, Ordering::Relaxed); + } + + /// Check how many bytes are available for writing. + pub fn available_bytes(&self, required_bytes: usize) -> usize { + if let Some(max_bytes) = self.max_bytes { + let total_bytes = self + .bytes_written + .fetch_add(required_bytes, Ordering::Relaxed); + + // The bytes available is the inverse of the amount we overshot max_bytes + if total_bytes >= max_bytes { + required_bytes.saturating_sub(total_bytes - max_bytes) + } else { + required_bytes + } + } else { + required_bytes + } + } + + /// Decreases the available bytes for the current frame. + pub(crate) fn write_bytes(&self, bytes: usize) { + if self.max_bytes.is_some() && bytes > 0 { + self.bytes_written.fetch_add(bytes, Ordering::Relaxed); + } + } + + /// Returns `true` if there are no remaining bytes available for writing this frame. + pub(crate) fn exhausted(&self) -> bool { + if let Some(max_bytes) = self.max_bytes { + let bytes_written = self.bytes_written.load(Ordering::Relaxed); + bytes_written >= max_bytes + } else { + false + } + } +} diff --git a/crates/libmarathon/src/render/render_graph/app.rs b/crates/libmarathon/src/render/render_graph/app.rs new file mode 100644 index 0000000..879f28f --- /dev/null +++ b/crates/libmarathon/src/render/render_graph/app.rs @@ -0,0 +1,174 @@ +use bevy_app::{App, SubApp}; +use bevy_ecs::world::{FromWorld, World}; +use tracing::warn; + +use super::{IntoRenderNodeArray, Node, RenderGraph, RenderLabel, RenderSubGraph}; + +/// Adds common [`RenderGraph`] operations to [`SubApp`] (and [`App`]). +pub trait RenderGraphExt { + // Add a sub graph to the [`RenderGraph`] + fn add_render_sub_graph(&mut self, sub_graph: impl RenderSubGraph) -> &mut Self; + /// Add a [`Node`] to the [`RenderGraph`]: + /// * Create the [`Node`] using the [`FromWorld`] implementation + /// * Add it to the graph + fn add_render_graph_node( + &mut self, + sub_graph: impl RenderSubGraph, + node_label: impl RenderLabel, + ) -> &mut Self; + /// Automatically add the required node edges based on the given ordering + fn add_render_graph_edges( + &mut self, + sub_graph: impl RenderSubGraph, + edges: impl IntoRenderNodeArray, + ) -> &mut Self; + + /// Add node edge to the specified graph + fn add_render_graph_edge( + &mut self, + sub_graph: impl RenderSubGraph, + output_node: impl RenderLabel, + input_node: impl RenderLabel, + ) -> &mut Self; +} + +impl RenderGraphExt for World { + fn add_render_graph_node( + &mut self, + sub_graph: impl RenderSubGraph, + node_label: impl RenderLabel, + ) -> &mut Self { + let sub_graph = sub_graph.intern(); + let node = T::from_world(self); + let mut render_graph = self.get_resource_mut::().expect( + "RenderGraph not found. Make sure you are using add_render_graph_node on the RenderApp", + ); + if let Some(graph) = render_graph.get_sub_graph_mut(sub_graph) { + graph.add_node(node_label, node); + } else { + warn!( + "Tried adding a render graph node to {sub_graph:?} but the sub graph doesn't exist" + ); + } + self + } + + #[track_caller] + fn add_render_graph_edges( + &mut self, + sub_graph: impl RenderSubGraph, + edges: impl IntoRenderNodeArray, + ) -> &mut Self { + let sub_graph = sub_graph.intern(); + let mut render_graph = self.get_resource_mut::().expect( + "RenderGraph not found. Make sure you are using add_render_graph_edges on the RenderApp", + ); + if let Some(graph) = render_graph.get_sub_graph_mut(sub_graph) { + graph.add_node_edges(edges); + } else { + warn!( + "Tried adding render graph edges to {sub_graph:?} but the sub graph doesn't exist" + ); + } + self + } + + fn add_render_graph_edge( + &mut self, + sub_graph: impl RenderSubGraph, + output_node: impl RenderLabel, + input_node: impl RenderLabel, + ) -> &mut Self { + let sub_graph = sub_graph.intern(); + let mut render_graph = self.get_resource_mut::().expect( + "RenderGraph not found. Make sure you are using add_render_graph_edge on the RenderApp", + ); + if let Some(graph) = render_graph.get_sub_graph_mut(sub_graph) { + graph.add_node_edge(output_node, input_node); + } else { + warn!( + "Tried adding a render graph edge to {sub_graph:?} but the sub graph doesn't exist" + ); + } + self + } + + fn add_render_sub_graph(&mut self, sub_graph: impl RenderSubGraph) -> &mut Self { + let mut render_graph = self.get_resource_mut::().expect( + "RenderGraph not found. Make sure you are using add_render_sub_graph on the RenderApp", + ); + render_graph.add_sub_graph(sub_graph, RenderGraph::default()); + self + } +} + +impl RenderGraphExt for SubApp { + fn add_render_graph_node( + &mut self, + sub_graph: impl RenderSubGraph, + node_label: impl RenderLabel, + ) -> &mut Self { + World::add_render_graph_node::(self.world_mut(), sub_graph, node_label); + self + } + + fn add_render_graph_edge( + &mut self, + sub_graph: impl RenderSubGraph, + output_node: impl RenderLabel, + input_node: impl RenderLabel, + ) -> &mut Self { + World::add_render_graph_edge(self.world_mut(), sub_graph, output_node, input_node); + self + } + + #[track_caller] + fn add_render_graph_edges( + &mut self, + sub_graph: impl RenderSubGraph, + edges: impl IntoRenderNodeArray, + ) -> &mut Self { + World::add_render_graph_edges(self.world_mut(), sub_graph, edges); + self + } + + fn add_render_sub_graph(&mut self, sub_graph: impl RenderSubGraph) -> &mut Self { + World::add_render_sub_graph(self.world_mut(), sub_graph); + self + } +} + +impl RenderGraphExt for App { + fn add_render_graph_node( + &mut self, + sub_graph: impl RenderSubGraph, + node_label: impl RenderLabel, + ) -> &mut Self { + World::add_render_graph_node::(self.world_mut(), sub_graph, node_label); + self + } + + fn add_render_graph_edge( + &mut self, + sub_graph: impl RenderSubGraph, + output_node: impl RenderLabel, + input_node: impl RenderLabel, + ) -> &mut Self { + World::add_render_graph_edge(self.world_mut(), sub_graph, output_node, input_node); + self + } + + fn add_render_graph_edges( + &mut self, + sub_graph: impl RenderSubGraph, + edges: impl IntoRenderNodeArray, + ) -> &mut Self { + World::add_render_graph_edges(self.world_mut(), sub_graph, edges); + self + } + + fn add_render_sub_graph(&mut self, sub_graph: impl RenderSubGraph) -> &mut Self { + World::add_render_sub_graph(self.world_mut(), sub_graph); + self + } +} diff --git a/crates/libmarathon/src/render/render_graph/camera_driver_node.rs b/crates/libmarathon/src/render/render_graph/camera_driver_node.rs new file mode 100644 index 0000000..0f52396 --- /dev/null +++ b/crates/libmarathon/src/render/render_graph/camera_driver_node.rs @@ -0,0 +1,99 @@ +use crate::render::{ + camera::{ExtractedCamera, SortedCameras}, + render_graph::{Node, NodeRunError, RenderGraphContext}, + renderer::RenderContext, + view::ExtractedWindows, +}; +use bevy_camera::{ClearColor, NormalizedRenderTarget}; +use bevy_ecs::{entity::ContainsEntity, prelude::QueryState, world::World}; +use bevy_platform::collections::HashSet; +use wgpu::{LoadOp, Operations, RenderPassColorAttachment, RenderPassDescriptor, StoreOp}; + +pub struct CameraDriverNode { + cameras: QueryState<&'static ExtractedCamera>, +} + +impl CameraDriverNode { + pub fn new(world: &mut World) -> Self { + Self { + cameras: world.query(), + } + } +} + +impl Node for CameraDriverNode { + fn update(&mut self, world: &mut World) { + self.cameras.update_archetypes(world); + } + fn run( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext, + world: &World, + ) -> Result<(), NodeRunError> { + let sorted_cameras = world.resource::(); + let windows = world.resource::(); + let mut camera_windows = >::default(); + for sorted_camera in &sorted_cameras.0 { + let Ok(camera) = self.cameras.get_manual(world, sorted_camera.entity) else { + continue; + }; + + let mut run_graph = true; + if let Some(NormalizedRenderTarget::Window(window_ref)) = camera.target { + let window_entity = window_ref.entity(); + if windows + .windows + .get(&window_entity) + .is_some_and(|w| w.physical_width > 0 && w.physical_height > 0) + { + camera_windows.insert(window_entity); + } else { + // The window doesn't exist anymore or zero-sized so we don't need to run the graph + run_graph = false; + } + } + if run_graph { + graph.run_sub_graph(camera.render_graph, vec![], Some(sorted_camera.entity))?; + } + } + + let clear_color_global = world.resource::(); + + // wgpu (and some backends) require doing work for swap chains if you call `get_current_texture()` and `present()` + // This ensures that Bevy doesn't crash, even when there are no cameras (and therefore no work submitted). + for (id, window) in world.resource::().iter() { + if camera_windows.contains(id) && render_context.has_commands() { + continue; + } + + let Some(swap_chain_texture) = &window.swap_chain_texture_view else { + continue; + }; + + #[cfg(feature = "trace")] + let _span = tracing::info_span!("no_camera_clear_pass").entered(); + let pass_descriptor = RenderPassDescriptor { + label: Some("no_camera_clear_pass"), + color_attachments: &[Some(RenderPassColorAttachment { + view: swap_chain_texture, + depth_slice: None, + resolve_target: None, + ops: Operations { + load: LoadOp::Clear(clear_color_global.to_linear().into()), + store: StoreOp::Store, + }, + })], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + }; + + render_context + .command_encoder() + .begin_render_pass(&pass_descriptor); + } + + Ok(()) + } +} diff --git a/crates/libmarathon/src/render/render_graph/context.rs b/crates/libmarathon/src/render/render_graph/context.rs new file mode 100644 index 0000000..0fc508e --- /dev/null +++ b/crates/libmarathon/src/render/render_graph/context.rs @@ -0,0 +1,283 @@ +use crate::render::{ + render_graph::{NodeState, RenderGraph, SlotInfos, SlotLabel, SlotType, SlotValue}, + render_resource::{Buffer, Sampler, TextureView}, +}; +use std::borrow::Cow; +use bevy_ecs::{entity::Entity, intern::Interned}; +use thiserror::Error; + +use super::{InternedRenderSubGraph, RenderLabel, RenderSubGraph}; + +/// A command that signals the graph runner to run the sub graph corresponding to the `sub_graph` +/// with the specified `inputs` next. +pub struct RunSubGraph { + pub sub_graph: InternedRenderSubGraph, + pub inputs: Vec, + pub view_entity: Option, +} + +/// The context with all graph information required to run a [`Node`](super::Node). +/// This context is created for each node by the render graph runner. +/// +/// The slot input can be read from here and the outputs must be written back to the context for +/// passing them onto the next node. +/// +/// Sub graphs can be queued for running by adding a [`RunSubGraph`] command to the context. +/// After the node has finished running the graph runner is responsible for executing the sub graphs. +pub struct RenderGraphContext<'a> { + graph: &'a RenderGraph, + node: &'a NodeState, + inputs: &'a [SlotValue], + outputs: &'a mut [Option], + run_sub_graphs: Vec, + /// The `view_entity` associated with the render graph being executed + /// This is optional because you aren't required to have a `view_entity` for a node. + /// For example, compute shader nodes don't have one. + /// It should always be set when the [`RenderGraph`] is running on a View. + view_entity: Option, +} + +impl<'a> RenderGraphContext<'a> { + /// Creates a new render graph context for the `node`. + pub fn new( + graph: &'a RenderGraph, + node: &'a NodeState, + inputs: &'a [SlotValue], + outputs: &'a mut [Option], + ) -> Self { + Self { + graph, + node, + inputs, + outputs, + run_sub_graphs: Vec::new(), + view_entity: None, + } + } + + /// Returns the input slot values for the node. + #[inline] + pub fn inputs(&self) -> &[SlotValue] { + self.inputs + } + + /// Returns the [`SlotInfos`] of the inputs. + pub fn input_info(&self) -> &SlotInfos { + &self.node.input_slots + } + + /// Returns the [`SlotInfos`] of the outputs. + pub fn output_info(&self) -> &SlotInfos { + &self.node.output_slots + } + + /// Retrieves the input slot value referenced by the `label`. + pub fn get_input(&self, label: impl Into) -> Result<&SlotValue, InputSlotError> { + let label = label.into(); + let index = self + .input_info() + .get_slot_index(label.clone()) + .ok_or(InputSlotError::InvalidSlot(label))?; + Ok(&self.inputs[index]) + } + + // TODO: should this return an Arc or a reference? + /// Retrieves the input slot value referenced by the `label` as a [`TextureView`]. + pub fn get_input_texture( + &self, + label: impl Into, + ) -> Result<&TextureView, InputSlotError> { + let label = label.into(); + match self.get_input(label.clone())? { + SlotValue::TextureView(value) => Ok(value), + value => Err(InputSlotError::MismatchedSlotType { + label, + actual: value.slot_type(), + expected: SlotType::TextureView, + }), + } + } + + /// Retrieves the input slot value referenced by the `label` as a [`Sampler`]. + pub fn get_input_sampler( + &self, + label: impl Into, + ) -> Result<&Sampler, InputSlotError> { + let label = label.into(); + match self.get_input(label.clone())? { + SlotValue::Sampler(value) => Ok(value), + value => Err(InputSlotError::MismatchedSlotType { + label, + actual: value.slot_type(), + expected: SlotType::Sampler, + }), + } + } + + /// Retrieves the input slot value referenced by the `label` as a [`Buffer`]. + pub fn get_input_buffer(&self, label: impl Into) -> Result<&Buffer, InputSlotError> { + let label = label.into(); + match self.get_input(label.clone())? { + SlotValue::Buffer(value) => Ok(value), + value => Err(InputSlotError::MismatchedSlotType { + label, + actual: value.slot_type(), + expected: SlotType::Buffer, + }), + } + } + + /// Retrieves the input slot value referenced by the `label` as an [`Entity`]. + pub fn get_input_entity(&self, label: impl Into) -> Result { + let label = label.into(); + match self.get_input(label.clone())? { + SlotValue::Entity(value) => Ok(*value), + value => Err(InputSlotError::MismatchedSlotType { + label, + actual: value.slot_type(), + expected: SlotType::Entity, + }), + } + } + + /// Sets the output slot value referenced by the `label`. + pub fn set_output( + &mut self, + label: impl Into, + value: impl Into, + ) -> Result<(), OutputSlotError> { + let label = label.into(); + let value = value.into(); + let slot_index = self + .output_info() + .get_slot_index(label.clone()) + .ok_or_else(|| OutputSlotError::InvalidSlot(label.clone()))?; + let slot = self + .output_info() + .get_slot(slot_index) + .expect("slot is valid"); + if value.slot_type() != slot.slot_type { + return Err(OutputSlotError::MismatchedSlotType { + label, + actual: slot.slot_type, + expected: value.slot_type(), + }); + } + self.outputs[slot_index] = Some(value); + Ok(()) + } + + pub fn view_entity(&self) -> Entity { + self.view_entity.unwrap() + } + + pub fn get_view_entity(&self) -> Option { + self.view_entity + } + + pub fn set_view_entity(&mut self, view_entity: Entity) { + self.view_entity = Some(view_entity); + } + + /// Queues up a sub graph for execution after the node has finished running. + pub fn run_sub_graph( + &mut self, + name: impl RenderSubGraph, + inputs: Vec, + view_entity: Option, + ) -> Result<(), RunSubGraphError> { + let name = name.intern(); + let sub_graph = self + .graph + .get_sub_graph(name) + .ok_or(RunSubGraphError::MissingSubGraph(name))?; + if let Some(input_node) = sub_graph.get_input_node() { + for (i, input_slot) in input_node.input_slots.iter().enumerate() { + if let Some(input_value) = inputs.get(i) { + if input_slot.slot_type != input_value.slot_type() { + return Err(RunSubGraphError::MismatchedInputSlotType { + graph_name: name, + slot_index: i, + actual: input_value.slot_type(), + expected: input_slot.slot_type, + label: input_slot.name.clone().into(), + }); + } + } else { + return Err(RunSubGraphError::MissingInput { + slot_index: i, + slot_name: input_slot.name.clone(), + graph_name: name, + }); + } + } + } else if !inputs.is_empty() { + return Err(RunSubGraphError::SubGraphHasNoInputs(name)); + } + + self.run_sub_graphs.push(RunSubGraph { + sub_graph: name, + inputs, + view_entity, + }); + + Ok(()) + } + + /// Returns a human-readable label for this node, for debugging purposes. + pub fn label(&self) -> Interned { + self.node.label + } + + /// Finishes the context for this [`Node`](super::Node) by + /// returning the sub graphs to run next. + pub fn finish(self) -> Vec { + self.run_sub_graphs + } +} + +#[derive(Error, Debug, Eq, PartialEq)] +pub enum RunSubGraphError { + #[error("attempted to run sub-graph `{0:?}`, but it does not exist")] + MissingSubGraph(InternedRenderSubGraph), + #[error("attempted to pass inputs to sub-graph `{0:?}`, which has no input slots")] + SubGraphHasNoInputs(InternedRenderSubGraph), + #[error("sub graph (name: `{graph_name:?}`) could not be run because slot `{slot_name}` at index {slot_index} has no value")] + MissingInput { + slot_index: usize, + slot_name: Cow<'static, str>, + graph_name: InternedRenderSubGraph, + }, + #[error("attempted to use the wrong type for input slot")] + MismatchedInputSlotType { + graph_name: InternedRenderSubGraph, + slot_index: usize, + label: SlotLabel, + expected: SlotType, + actual: SlotType, + }, +} + +#[derive(Error, Debug, Eq, PartialEq)] +pub enum OutputSlotError { + #[error("output slot `{0:?}` does not exist")] + InvalidSlot(SlotLabel), + #[error("attempted to output a value of type `{actual}` to output slot `{label:?}`, which has type `{expected}`")] + MismatchedSlotType { + label: SlotLabel, + expected: SlotType, + actual: SlotType, + }, +} + +#[derive(Error, Debug, Eq, PartialEq)] +pub enum InputSlotError { + #[error("input slot `{0:?}` does not exist")] + InvalidSlot(SlotLabel), + #[error("attempted to retrieve a value of type `{actual}` from input slot `{label:?}`, which has type `{expected}`")] + MismatchedSlotType { + label: SlotLabel, + expected: SlotType, + actual: SlotType, + }, +} diff --git a/crates/libmarathon/src/render/render_graph/edge.rs b/crates/libmarathon/src/render/render_graph/edge.rs new file mode 100644 index 0000000..199b7e8 --- /dev/null +++ b/crates/libmarathon/src/render/render_graph/edge.rs @@ -0,0 +1,57 @@ +use super::InternedRenderLabel; + +/// An edge, which connects two [`Nodes`](super::Node) in +/// a [`RenderGraph`](crate::render_graph::RenderGraph). +/// +/// They are used to describe the ordering (which node has to run first) +/// and may be of two kinds: [`NodeEdge`](Self::NodeEdge) and [`SlotEdge`](Self::SlotEdge). +/// +/// Edges are added via the [`RenderGraph::add_node_edge`] and the +/// [`RenderGraph::add_slot_edge`] methods. +/// +/// The former simply states that the `output_node` has to be run before the `input_node`, +/// while the later connects an output slot of the `output_node` +/// with an input slot of the `input_node` to pass additional data along. +/// For more information see [`SlotType`](super::SlotType). +/// +/// [`RenderGraph::add_node_edge`]: crate::render_graph::RenderGraph::add_node_edge +/// [`RenderGraph::add_slot_edge`]: crate::render_graph::RenderGraph::add_slot_edge +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum Edge { + /// An edge describing to ordering of both nodes (`output_node` before `input_node`) + /// and connecting the output slot at the `output_index` of the `output_node` + /// with the slot at the `input_index` of the `input_node`. + SlotEdge { + input_node: InternedRenderLabel, + input_index: usize, + output_node: InternedRenderLabel, + output_index: usize, + }, + /// An edge describing to ordering of both nodes (`output_node` before `input_node`). + NodeEdge { + input_node: InternedRenderLabel, + output_node: InternedRenderLabel, + }, +} + +impl Edge { + /// Returns the id of the `input_node`. + pub fn get_input_node(&self) -> InternedRenderLabel { + match self { + Edge::SlotEdge { input_node, .. } | Edge::NodeEdge { input_node, .. } => *input_node, + } + } + + /// Returns the id of the `output_node`. + pub fn get_output_node(&self) -> InternedRenderLabel { + match self { + Edge::SlotEdge { output_node, .. } | Edge::NodeEdge { output_node, .. } => *output_node, + } + } +} + +#[derive(PartialEq, Eq)] +pub enum EdgeExistence { + Exists, + DoesNotExist, +} diff --git a/crates/libmarathon/src/render/render_graph/graph.rs b/crates/libmarathon/src/render/render_graph/graph.rs new file mode 100644 index 0000000..83e8288 --- /dev/null +++ b/crates/libmarathon/src/render/render_graph/graph.rs @@ -0,0 +1,918 @@ +use crate::render::{ + render_graph::{ + Edge, Node, NodeRunError, NodeState, RenderGraphContext, RenderGraphError, RenderLabel, + SlotInfo, SlotLabel, + }, + renderer::RenderContext, +}; +use bevy_ecs::{define_label, intern::Interned, prelude::World, resource::Resource}; +use bevy_platform::collections::HashMap; +use core::fmt::Debug; + +use super::{EdgeExistence, InternedRenderLabel, IntoRenderNodeArray}; + +pub use macros::RenderSubGraph; + +define_label!( + #[diagnostic::on_unimplemented( + note = "consider annotating `{Self}` with `#[derive(RenderSubGraph)]`" + )] + /// A strongly-typed class of labels used to identify a [`SubGraph`] in a render graph. + RenderSubGraph, + RENDER_SUB_GRAPH_INTERNER +); + +/// A shorthand for `Interned`. +pub type InternedRenderSubGraph = Interned; + +/// The render graph configures the modular and re-usable render logic. +/// +/// It is a retained and stateless (nodes themselves may have their own internal state) structure, +/// which can not be modified while it is executed by the graph runner. +/// +/// The render graph runner is responsible for executing the entire graph each frame. +/// It will execute each node in the graph in the correct order, based on the edges between the nodes. +/// +/// It consists of three main components: [`Nodes`](Node), [`Edges`](Edge) +/// and [`Slots`](super::SlotType). +/// +/// Nodes are responsible for generating draw calls and operating on input and output slots. +/// Edges specify the order of execution for nodes and connect input and output slots together. +/// Slots describe the render resources created or used by the nodes. +/// +/// Additionally a render graph can contain multiple sub graphs, which are run by the +/// corresponding nodes. Every render graph can have its own optional input node. +/// +/// ## Example +/// Here is a simple render graph example with two nodes connected by a node edge. +/// ```ignore +/// # TODO: Remove when #10645 is fixed +/// # use bevy_app::prelude::*; +/// # use bevy_ecs::prelude::World; +/// # use crate::render::render_graph::{RenderGraph, RenderLabel, Node, RenderGraphContext, NodeRunError}; +/// # use crate::render::renderer::RenderContext; +/// # +/// #[derive(RenderLabel)] +/// enum Labels { +/// A, +/// B, +/// } +/// +/// # struct MyNode; +/// # +/// # impl Node for MyNode { +/// # fn run(&self, graph: &mut RenderGraphContext, render_context: &mut RenderContext, world: &World) -> Result<(), NodeRunError> { +/// # unimplemented!() +/// # } +/// # } +/// # +/// let mut graph = RenderGraph::default(); +/// graph.add_node(Labels::A, MyNode); +/// graph.add_node(Labels::B, MyNode); +/// graph.add_node_edge(Labels::B, Labels::A); +/// ``` +#[derive(Resource, Default)] +pub struct RenderGraph { + nodes: HashMap, + sub_graphs: HashMap, +} + +/// The label for the input node of a graph. Used to connect other nodes to it. +#[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)] +pub struct GraphInput; + +impl RenderGraph { + /// Updates all nodes and sub graphs of the render graph. Should be called before executing it. + pub fn update(&mut self, world: &mut World) { + for node in self.nodes.values_mut() { + node.node.update(world); + } + + for sub_graph in self.sub_graphs.values_mut() { + sub_graph.update(world); + } + } + + /// Creates an [`GraphInputNode`] with the specified slots if not already present. + pub fn set_input(&mut self, inputs: Vec) { + assert!( + matches!( + self.get_node_state(GraphInput), + Err(RenderGraphError::InvalidNode(_)) + ), + "Graph already has an input node" + ); + + self.add_node(GraphInput, GraphInputNode { inputs }); + } + + /// Returns the [`NodeState`] of the input node of this graph. + /// + /// # See also + /// + /// - [`input_node`](Self::input_node) for an unchecked version. + #[inline] + pub fn get_input_node(&self) -> Option<&NodeState> { + self.get_node_state(GraphInput).ok() + } + + /// Returns the [`NodeState`] of the input node of this graph. + /// + /// # Panics + /// + /// Panics if there is no input node set. + /// + /// # See also + /// + /// - [`get_input_node`](Self::get_input_node) for a version which returns an [`Option`] instead. + #[inline] + pub fn input_node(&self) -> &NodeState { + self.get_input_node().unwrap() + } + + /// Adds the `node` with the `label` to the graph. + /// If the label is already present replaces it instead. + pub fn add_node(&mut self, label: impl RenderLabel, node: T) + where + T: Node, + { + let label = label.intern(); + let node_state = NodeState::new(label, node); + self.nodes.insert(label, node_state); + } + + /// Add `node_edge`s based on the order of the given `edges` array. + /// + /// Defining an edge that already exists is not considered an error with this api. + /// It simply won't create a new edge. + #[track_caller] + pub fn add_node_edges(&mut self, edges: impl IntoRenderNodeArray) { + for window in edges.into_array().windows(2) { + let [a, b] = window else { + break; + }; + if let Err(err) = self.try_add_node_edge(*a, *b) { + match err { + // Already existing edges are very easy to produce with this api + // and shouldn't cause a panic + RenderGraphError::EdgeAlreadyExists(_) => {} + _ => panic!("{err}"), + } + } + } + } + + /// Removes the `node` with the `label` from the graph. + /// If the label does not exist, nothing happens. + pub fn remove_node(&mut self, label: impl RenderLabel) -> Result<(), RenderGraphError> { + let label = label.intern(); + if let Some(node_state) = self.nodes.remove(&label) { + // Remove all edges from other nodes to this one. Note that as we're removing this + // node, we don't need to remove its input edges + for input_edge in node_state.edges.input_edges() { + match input_edge { + Edge::SlotEdge { output_node, .. } + | Edge::NodeEdge { + input_node: _, + output_node, + } => { + if let Ok(output_node) = self.get_node_state_mut(*output_node) { + output_node.edges.remove_output_edge(input_edge.clone())?; + } + } + } + } + // Remove all edges from this node to other nodes. Note that as we're removing this + // node, we don't need to remove its output edges + for output_edge in node_state.edges.output_edges() { + match output_edge { + Edge::SlotEdge { + output_node: _, + output_index: _, + input_node, + input_index: _, + } + | Edge::NodeEdge { + output_node: _, + input_node, + } => { + if let Ok(input_node) = self.get_node_state_mut(*input_node) { + input_node.edges.remove_input_edge(output_edge.clone())?; + } + } + } + } + } + + Ok(()) + } + + /// Retrieves the [`NodeState`] referenced by the `label`. + pub fn get_node_state(&self, label: impl RenderLabel) -> Result<&NodeState, RenderGraphError> { + let label = label.intern(); + self.nodes + .get(&label) + .ok_or(RenderGraphError::InvalidNode(label)) + } + + /// Retrieves the [`NodeState`] referenced by the `label` mutably. + pub fn get_node_state_mut( + &mut self, + label: impl RenderLabel, + ) -> Result<&mut NodeState, RenderGraphError> { + let label = label.intern(); + self.nodes + .get_mut(&label) + .ok_or(RenderGraphError::InvalidNode(label)) + } + + /// Retrieves the [`Node`] referenced by the `label`. + pub fn get_node(&self, label: impl RenderLabel) -> Result<&T, RenderGraphError> + where + T: Node, + { + self.get_node_state(label).and_then(|n| n.node()) + } + + /// Retrieves the [`Node`] referenced by the `label` mutably. + pub fn get_node_mut(&mut self, label: impl RenderLabel) -> Result<&mut T, RenderGraphError> + where + T: Node, + { + self.get_node_state_mut(label).and_then(|n| n.node_mut()) + } + + /// Adds the [`Edge::SlotEdge`] to the graph. This guarantees that the `output_node` + /// is run before the `input_node` and also connects the `output_slot` to the `input_slot`. + /// + /// Fails if any invalid [`RenderLabel`]s or [`SlotLabel`]s are given. + /// + /// # See also + /// + /// - [`add_slot_edge`](Self::add_slot_edge) for an infallible version. + pub fn try_add_slot_edge( + &mut self, + output_node: impl RenderLabel, + output_slot: impl Into, + input_node: impl RenderLabel, + input_slot: impl Into, + ) -> Result<(), RenderGraphError> { + let output_slot = output_slot.into(); + let input_slot = input_slot.into(); + + let output_node = output_node.intern(); + let input_node = input_node.intern(); + + let output_index = self + .get_node_state(output_node)? + .output_slots + .get_slot_index(output_slot.clone()) + .ok_or(RenderGraphError::InvalidOutputNodeSlot(output_slot))?; + let input_index = self + .get_node_state(input_node)? + .input_slots + .get_slot_index(input_slot.clone()) + .ok_or(RenderGraphError::InvalidInputNodeSlot(input_slot))?; + + let edge = Edge::SlotEdge { + output_node, + output_index, + input_node, + input_index, + }; + + self.validate_edge(&edge, EdgeExistence::DoesNotExist)?; + + { + let output_node = self.get_node_state_mut(output_node)?; + output_node.edges.add_output_edge(edge.clone())?; + } + let input_node = self.get_node_state_mut(input_node)?; + input_node.edges.add_input_edge(edge)?; + + Ok(()) + } + + /// Adds the [`Edge::SlotEdge`] to the graph. This guarantees that the `output_node` + /// is run before the `input_node` and also connects the `output_slot` to the `input_slot`. + /// + /// # Panics + /// + /// Any invalid [`RenderLabel`]s or [`SlotLabel`]s are given. + /// + /// # See also + /// + /// - [`try_add_slot_edge`](Self::try_add_slot_edge) for a fallible version. + pub fn add_slot_edge( + &mut self, + output_node: impl RenderLabel, + output_slot: impl Into, + input_node: impl RenderLabel, + input_slot: impl Into, + ) { + self.try_add_slot_edge(output_node, output_slot, input_node, input_slot) + .unwrap(); + } + + /// Removes the [`Edge::SlotEdge`] from the graph. If any nodes or slots do not exist then + /// nothing happens. + pub fn remove_slot_edge( + &mut self, + output_node: impl RenderLabel, + output_slot: impl Into, + input_node: impl RenderLabel, + input_slot: impl Into, + ) -> Result<(), RenderGraphError> { + let output_slot = output_slot.into(); + let input_slot = input_slot.into(); + + let output_node = output_node.intern(); + let input_node = input_node.intern(); + + let output_index = self + .get_node_state(output_node)? + .output_slots + .get_slot_index(output_slot.clone()) + .ok_or(RenderGraphError::InvalidOutputNodeSlot(output_slot))?; + let input_index = self + .get_node_state(input_node)? + .input_slots + .get_slot_index(input_slot.clone()) + .ok_or(RenderGraphError::InvalidInputNodeSlot(input_slot))?; + + let edge = Edge::SlotEdge { + output_node, + output_index, + input_node, + input_index, + }; + + self.validate_edge(&edge, EdgeExistence::Exists)?; + + { + let output_node = self.get_node_state_mut(output_node)?; + output_node.edges.remove_output_edge(edge.clone())?; + } + let input_node = self.get_node_state_mut(input_node)?; + input_node.edges.remove_input_edge(edge)?; + + Ok(()) + } + + /// Adds the [`Edge::NodeEdge`] to the graph. This guarantees that the `output_node` + /// is run before the `input_node`. + /// + /// Fails if any invalid [`RenderLabel`] is given. + /// + /// # See also + /// + /// - [`add_node_edge`](Self::add_node_edge) for an infallible version. + pub fn try_add_node_edge( + &mut self, + output_node: impl RenderLabel, + input_node: impl RenderLabel, + ) -> Result<(), RenderGraphError> { + let output_node = output_node.intern(); + let input_node = input_node.intern(); + + let edge = Edge::NodeEdge { + output_node, + input_node, + }; + + self.validate_edge(&edge, EdgeExistence::DoesNotExist)?; + + { + let output_node = self.get_node_state_mut(output_node)?; + output_node.edges.add_output_edge(edge.clone())?; + } + let input_node = self.get_node_state_mut(input_node)?; + input_node.edges.add_input_edge(edge)?; + + Ok(()) + } + + /// Adds the [`Edge::NodeEdge`] to the graph. This guarantees that the `output_node` + /// is run before the `input_node`. + /// + /// # Panics + /// + /// Panics if any invalid [`RenderLabel`] is given. + /// + /// # See also + /// + /// - [`try_add_node_edge`](Self::try_add_node_edge) for a fallible version. + pub fn add_node_edge(&mut self, output_node: impl RenderLabel, input_node: impl RenderLabel) { + self.try_add_node_edge(output_node, input_node).unwrap(); + } + + /// Removes the [`Edge::NodeEdge`] from the graph. If either node does not exist then nothing + /// happens. + pub fn remove_node_edge( + &mut self, + output_node: impl RenderLabel, + input_node: impl RenderLabel, + ) -> Result<(), RenderGraphError> { + let output_node = output_node.intern(); + let input_node = input_node.intern(); + + let edge = Edge::NodeEdge { + output_node, + input_node, + }; + + self.validate_edge(&edge, EdgeExistence::Exists)?; + + { + let output_node = self.get_node_state_mut(output_node)?; + output_node.edges.remove_output_edge(edge.clone())?; + } + let input_node = self.get_node_state_mut(input_node)?; + input_node.edges.remove_input_edge(edge)?; + + Ok(()) + } + + /// Verifies that the edge existence is as expected and + /// checks that slot edges are connected correctly. + pub fn validate_edge( + &mut self, + edge: &Edge, + should_exist: EdgeExistence, + ) -> Result<(), RenderGraphError> { + if should_exist == EdgeExistence::Exists && !self.has_edge(edge) { + return Err(RenderGraphError::EdgeDoesNotExist(edge.clone())); + } else if should_exist == EdgeExistence::DoesNotExist && self.has_edge(edge) { + return Err(RenderGraphError::EdgeAlreadyExists(edge.clone())); + } + + match *edge { + Edge::SlotEdge { + output_node, + output_index, + input_node, + input_index, + } => { + let output_node_state = self.get_node_state(output_node)?; + let input_node_state = self.get_node_state(input_node)?; + + let output_slot = output_node_state + .output_slots + .get_slot(output_index) + .ok_or(RenderGraphError::InvalidOutputNodeSlot(SlotLabel::Index( + output_index, + )))?; + let input_slot = input_node_state.input_slots.get_slot(input_index).ok_or( + RenderGraphError::InvalidInputNodeSlot(SlotLabel::Index(input_index)), + )?; + + if let Some(Edge::SlotEdge { + output_node: current_output_node, + .. + }) = input_node_state.edges.input_edges().iter().find(|e| { + if let Edge::SlotEdge { + input_index: current_input_index, + .. + } = e + { + input_index == *current_input_index + } else { + false + } + }) && should_exist == EdgeExistence::DoesNotExist + { + return Err(RenderGraphError::NodeInputSlotAlreadyOccupied { + node: input_node, + input_slot: input_index, + occupied_by_node: *current_output_node, + }); + } + + if output_slot.slot_type != input_slot.slot_type { + return Err(RenderGraphError::MismatchedNodeSlots { + output_node, + output_slot: output_index, + input_node, + input_slot: input_index, + }); + } + } + Edge::NodeEdge { .. } => { /* nothing to validate here */ } + } + + Ok(()) + } + + /// Checks whether the `edge` already exists in the graph. + pub fn has_edge(&self, edge: &Edge) -> bool { + let output_node_state = self.get_node_state(edge.get_output_node()); + let input_node_state = self.get_node_state(edge.get_input_node()); + if let Ok(output_node_state) = output_node_state + && output_node_state.edges.output_edges().contains(edge) + && let Ok(input_node_state) = input_node_state + && input_node_state.edges.input_edges().contains(edge) + { + return true; + } + + false + } + + /// Returns an iterator over the [`NodeStates`](NodeState). + pub fn iter_nodes(&self) -> impl Iterator { + self.nodes.values() + } + + /// Returns an iterator over the [`NodeStates`](NodeState), that allows modifying each value. + pub fn iter_nodes_mut(&mut self) -> impl Iterator { + self.nodes.values_mut() + } + + /// Returns an iterator over the sub graphs. + pub fn iter_sub_graphs(&self) -> impl Iterator { + self.sub_graphs.iter().map(|(name, graph)| (*name, graph)) + } + + /// Returns an iterator over the sub graphs, that allows modifying each value. + pub fn iter_sub_graphs_mut( + &mut self, + ) -> impl Iterator { + self.sub_graphs + .iter_mut() + .map(|(name, graph)| (*name, graph)) + } + + /// Returns an iterator over a tuple of the input edges and the corresponding output nodes + /// for the node referenced by the label. + pub fn iter_node_inputs( + &self, + label: impl RenderLabel, + ) -> Result, RenderGraphError> { + let node = self.get_node_state(label)?; + Ok(node + .edges + .input_edges() + .iter() + .map(|edge| (edge, edge.get_output_node())) + .map(move |(edge, output_node)| (edge, self.get_node_state(output_node).unwrap()))) + } + + /// Returns an iterator over a tuple of the output edges and the corresponding input nodes + /// for the node referenced by the label. + pub fn iter_node_outputs( + &self, + label: impl RenderLabel, + ) -> Result, RenderGraphError> { + let node = self.get_node_state(label)?; + Ok(node + .edges + .output_edges() + .iter() + .map(|edge| (edge, edge.get_input_node())) + .map(move |(edge, input_node)| (edge, self.get_node_state(input_node).unwrap()))) + } + + /// Adds the `sub_graph` with the `label` to the graph. + /// If the label is already present replaces it instead. + pub fn add_sub_graph(&mut self, label: impl RenderSubGraph, sub_graph: RenderGraph) { + self.sub_graphs.insert(label.intern(), sub_graph); + } + + /// Removes the `sub_graph` with the `label` from the graph. + /// If the label does not exist then nothing happens. + pub fn remove_sub_graph(&mut self, label: impl RenderSubGraph) { + self.sub_graphs.remove(&label.intern()); + } + + /// Retrieves the sub graph corresponding to the `label`. + pub fn get_sub_graph(&self, label: impl RenderSubGraph) -> Option<&RenderGraph> { + self.sub_graphs.get(&label.intern()) + } + + /// Retrieves the sub graph corresponding to the `label` mutably. + pub fn get_sub_graph_mut(&mut self, label: impl RenderSubGraph) -> Option<&mut RenderGraph> { + self.sub_graphs.get_mut(&label.intern()) + } + + /// Retrieves the sub graph corresponding to the `label`. + /// + /// # Panics + /// + /// Panics if any invalid subgraph label is given. + /// + /// # See also + /// + /// - [`get_sub_graph`](Self::get_sub_graph) for a fallible version. + pub fn sub_graph(&self, label: impl RenderSubGraph) -> &RenderGraph { + let label = label.intern(); + self.sub_graphs + .get(&label) + .unwrap_or_else(|| panic!("Subgraph {label:?} not found")) + } + + /// Retrieves the sub graph corresponding to the `label` mutably. + /// + /// # Panics + /// + /// Panics if any invalid subgraph label is given. + /// + /// # See also + /// + /// - [`get_sub_graph_mut`](Self::get_sub_graph_mut) for a fallible version. + pub fn sub_graph_mut(&mut self, label: impl RenderSubGraph) -> &mut RenderGraph { + let label = label.intern(); + self.sub_graphs + .get_mut(&label) + .unwrap_or_else(|| panic!("Subgraph {label:?} not found")) + } +} + +impl Debug for RenderGraph { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + for node in self.iter_nodes() { + writeln!(f, "{:?}", node.label)?; + writeln!(f, " in: {:?}", node.input_slots)?; + writeln!(f, " out: {:?}", node.output_slots)?; + } + + Ok(()) + } +} + +/// A [`Node`] which acts as an entry point for a [`RenderGraph`] with custom inputs. +/// It has the same input and output slots and simply copies them over when run. +pub struct GraphInputNode { + inputs: Vec, +} + +impl Node for GraphInputNode { + fn input(&self) -> Vec { + self.inputs.clone() + } + + fn output(&self) -> Vec { + self.inputs.clone() + } + + fn run( + &self, + graph: &mut RenderGraphContext, + _render_context: &mut RenderContext, + _world: &World, + ) -> Result<(), NodeRunError> { + for i in 0..graph.inputs().len() { + let input = graph.inputs()[i].clone(); + graph.set_output(i, input)?; + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use crate::render::{ + render_graph::{ + node::IntoRenderNodeArray, Edge, InternedRenderLabel, Node, NodeRunError, RenderGraph, + RenderGraphContext, RenderGraphError, RenderLabel, SlotInfo, SlotType, + }, + renderer::RenderContext, + }; + use bevy_ecs::world::{FromWorld, World}; + use bevy_platform::collections::HashSet; + + #[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)] + enum TestLabel { + A, + B, + C, + D, + } + + #[derive(Debug)] + struct TestNode { + inputs: Vec, + outputs: Vec, + } + + impl TestNode { + pub fn new(inputs: usize, outputs: usize) -> Self { + TestNode { + inputs: (0..inputs) + .map(|i| SlotInfo::new(format!("in_{i}"), SlotType::TextureView)) + .collect(), + outputs: (0..outputs) + .map(|i| SlotInfo::new(format!("out_{i}"), SlotType::TextureView)) + .collect(), + } + } + } + + impl Node for TestNode { + fn input(&self) -> Vec { + self.inputs.clone() + } + + fn output(&self) -> Vec { + self.outputs.clone() + } + + fn run( + &self, + _: &mut RenderGraphContext, + _: &mut RenderContext, + _: &World, + ) -> Result<(), NodeRunError> { + Ok(()) + } + } + + fn input_nodes(label: impl RenderLabel, graph: &RenderGraph) -> HashSet { + graph + .iter_node_inputs(label) + .unwrap() + .map(|(_edge, node)| node.label) + .collect::>() + } + + fn output_nodes(label: impl RenderLabel, graph: &RenderGraph) -> HashSet { + graph + .iter_node_outputs(label) + .unwrap() + .map(|(_edge, node)| node.label) + .collect::>() + } + + #[test] + fn test_graph_edges() { + let mut graph = RenderGraph::default(); + graph.add_node(TestLabel::A, TestNode::new(0, 1)); + graph.add_node(TestLabel::B, TestNode::new(0, 1)); + graph.add_node(TestLabel::C, TestNode::new(1, 1)); + graph.add_node(TestLabel::D, TestNode::new(1, 0)); + + graph.add_slot_edge(TestLabel::A, "out_0", TestLabel::C, "in_0"); + graph.add_node_edge(TestLabel::B, TestLabel::C); + graph.add_slot_edge(TestLabel::C, 0, TestLabel::D, 0); + + assert!( + input_nodes(TestLabel::A, &graph).is_empty(), + "A has no inputs" + ); + assert_eq!( + output_nodes(TestLabel::A, &graph), + HashSet::from_iter((TestLabel::C,).into_array()), + "A outputs to C" + ); + + assert!( + input_nodes(TestLabel::B, &graph).is_empty(), + "B has no inputs" + ); + assert_eq!( + output_nodes(TestLabel::B, &graph), + HashSet::from_iter((TestLabel::C,).into_array()), + "B outputs to C" + ); + + assert_eq!( + input_nodes(TestLabel::C, &graph), + HashSet::from_iter((TestLabel::A, TestLabel::B).into_array()), + "A and B input to C" + ); + assert_eq!( + output_nodes(TestLabel::C, &graph), + HashSet::from_iter((TestLabel::D,).into_array()), + "C outputs to D" + ); + + assert_eq!( + input_nodes(TestLabel::D, &graph), + HashSet::from_iter((TestLabel::C,).into_array()), + "C inputs to D" + ); + assert!( + output_nodes(TestLabel::D, &graph).is_empty(), + "D has no outputs" + ); + } + + #[test] + fn test_get_node_typed() { + struct MyNode { + value: usize, + } + + impl Node for MyNode { + fn run( + &self, + _: &mut RenderGraphContext, + _: &mut RenderContext, + _: &World, + ) -> Result<(), NodeRunError> { + Ok(()) + } + } + + let mut graph = RenderGraph::default(); + + graph.add_node(TestLabel::A, MyNode { value: 42 }); + + let node: &MyNode = graph.get_node(TestLabel::A).unwrap(); + assert_eq!(node.value, 42, "node value matches"); + + let result: Result<&TestNode, RenderGraphError> = graph.get_node(TestLabel::A); + assert_eq!( + result.unwrap_err(), + RenderGraphError::WrongNodeType, + "expect a wrong node type error" + ); + } + + #[test] + fn test_slot_already_occupied() { + let mut graph = RenderGraph::default(); + + graph.add_node(TestLabel::A, TestNode::new(0, 1)); + graph.add_node(TestLabel::B, TestNode::new(0, 1)); + graph.add_node(TestLabel::C, TestNode::new(1, 1)); + + graph.add_slot_edge(TestLabel::A, 0, TestLabel::C, 0); + assert_eq!( + graph.try_add_slot_edge(TestLabel::B, 0, TestLabel::C, 0), + Err(RenderGraphError::NodeInputSlotAlreadyOccupied { + node: TestLabel::C.intern(), + input_slot: 0, + occupied_by_node: TestLabel::A.intern(), + }), + "Adding to a slot that is already occupied should return an error" + ); + } + + #[test] + fn test_edge_already_exists() { + let mut graph = RenderGraph::default(); + + graph.add_node(TestLabel::A, TestNode::new(0, 1)); + graph.add_node(TestLabel::B, TestNode::new(1, 0)); + + graph.add_slot_edge(TestLabel::A, 0, TestLabel::B, 0); + assert_eq!( + graph.try_add_slot_edge(TestLabel::A, 0, TestLabel::B, 0), + Err(RenderGraphError::EdgeAlreadyExists(Edge::SlotEdge { + output_node: TestLabel::A.intern(), + output_index: 0, + input_node: TestLabel::B.intern(), + input_index: 0, + })), + "Adding to a duplicate edge should return an error" + ); + } + + #[test] + fn test_add_node_edges() { + struct SimpleNode; + impl Node for SimpleNode { + fn run( + &self, + _graph: &mut RenderGraphContext, + _render_context: &mut RenderContext, + _world: &World, + ) -> Result<(), NodeRunError> { + Ok(()) + } + } + impl FromWorld for SimpleNode { + fn from_world(_world: &mut World) -> Self { + Self + } + } + + let mut graph = RenderGraph::default(); + graph.add_node(TestLabel::A, SimpleNode); + graph.add_node(TestLabel::B, SimpleNode); + graph.add_node(TestLabel::C, SimpleNode); + + graph.add_node_edges((TestLabel::A, TestLabel::B, TestLabel::C)); + + assert_eq!( + output_nodes(TestLabel::A, &graph), + HashSet::from_iter((TestLabel::B,).into_array()), + "A -> B" + ); + assert_eq!( + input_nodes(TestLabel::B, &graph), + HashSet::from_iter((TestLabel::A,).into_array()), + "A -> B" + ); + assert_eq!( + output_nodes(TestLabel::B, &graph), + HashSet::from_iter((TestLabel::C,).into_array()), + "B -> C" + ); + assert_eq!( + input_nodes(TestLabel::C, &graph), + HashSet::from_iter((TestLabel::B,).into_array()), + "B -> C" + ); + } +} diff --git a/crates/libmarathon/src/render/render_graph/mod.rs b/crates/libmarathon/src/render/render_graph/mod.rs new file mode 100644 index 0000000..6f98a30 --- /dev/null +++ b/crates/libmarathon/src/render/render_graph/mod.rs @@ -0,0 +1,56 @@ +mod app; +mod camera_driver_node; +mod context; +mod edge; +mod graph; +mod node; +mod node_slot; + +pub use app::*; +pub use camera_driver_node::*; +pub use context::*; +pub use edge::*; +pub use graph::*; +pub use node::*; +pub use node_slot::*; + +use thiserror::Error; + +#[derive(Error, Debug, Eq, PartialEq)] +pub enum RenderGraphError { + #[error("node {0:?} does not exist")] + InvalidNode(InternedRenderLabel), + #[error("output node slot does not exist")] + InvalidOutputNodeSlot(SlotLabel), + #[error("input node slot does not exist")] + InvalidInputNodeSlot(SlotLabel), + #[error("node does not match the given type")] + WrongNodeType, + #[error("attempted to connect output slot {output_slot} from node {output_node:?} to incompatible input slot {input_slot} from node {input_node:?}")] + MismatchedNodeSlots { + output_node: InternedRenderLabel, + output_slot: usize, + input_node: InternedRenderLabel, + input_slot: usize, + }, + #[error("attempted to add an edge that already exists")] + EdgeAlreadyExists(Edge), + #[error("attempted to remove an edge that does not exist")] + EdgeDoesNotExist(Edge), + #[error("node {node:?} has an unconnected input slot {input_slot}")] + UnconnectedNodeInputSlot { + node: InternedRenderLabel, + input_slot: usize, + }, + #[error("node {node:?} has an unconnected output slot {output_slot}")] + UnconnectedNodeOutputSlot { + node: InternedRenderLabel, + output_slot: usize, + }, + #[error("node {node:?} input slot {input_slot} already occupied by {occupied_by_node:?}")] + NodeInputSlotAlreadyOccupied { + node: InternedRenderLabel, + input_slot: usize, + occupied_by_node: InternedRenderLabel, + }, +} diff --git a/crates/libmarathon/src/render/render_graph/node.rs b/crates/libmarathon/src/render/render_graph/node.rs new file mode 100644 index 0000000..df3ee5d --- /dev/null +++ b/crates/libmarathon/src/render/render_graph/node.rs @@ -0,0 +1,420 @@ +use crate::render::{ + render_graph::{ + Edge, InputSlotError, OutputSlotError, RenderGraphContext, RenderGraphError, + RunSubGraphError, SlotInfo, SlotInfos, + }, + render_phase::DrawError, + renderer::RenderContext, +}; +pub use bevy_ecs::label::DynEq; +use bevy_ecs::{ + define_label, + intern::Interned, + query::{QueryItem, QueryState, ReadOnlyQueryData}, + world::{FromWorld, World}, +}; +use core::fmt::Debug; +use downcast_rs::{impl_downcast, Downcast}; +use thiserror::Error; +use variadics_please::all_tuples_with_size; + +pub use macros::RenderLabel; + +use super::{InternedRenderSubGraph, RenderSubGraph}; + +define_label!( + #[diagnostic::on_unimplemented( + note = "consider annotating `{Self}` with `#[derive(RenderLabel)]`" + )] + /// A strongly-typed class of labels used to identify a [`Node`] in a render graph. + RenderLabel, + RENDER_LABEL_INTERNER +); + +/// A shorthand for `Interned`. +pub type InternedRenderLabel = Interned; + +pub trait IntoRenderNodeArray { + fn into_array(self) -> [InternedRenderLabel; N]; +} + +macro_rules! impl_render_label_tuples { + ($N: expr, $(#[$meta:meta])* $(($T: ident, $I: ident)),*) => { + $(#[$meta])* + impl<$($T: RenderLabel),*> IntoRenderNodeArray<$N> for ($($T,)*) { + #[inline] + fn into_array(self) -> [InternedRenderLabel; $N] { + let ($($I,)*) = self; + [$($I.intern(), )*] + } + } + } +} + +all_tuples_with_size!( + #[doc(fake_variadic)] + impl_render_label_tuples, + 1, + 32, + T, + l +); + +/// A render node that can be added to a [`RenderGraph`](super::RenderGraph). +/// +/// Nodes are the fundamental part of the graph and used to extend its functionality, by +/// generating draw calls and/or running subgraphs. +/// They are added via the `render_graph::add_node(my_node)` method. +/// +/// To determine their position in the graph and ensure that all required dependencies (inputs) +/// are already executed, [`Edges`](Edge) are used. +/// +/// A node can produce outputs used as dependencies by other nodes. +/// Those inputs and outputs are called slots and are the default way of passing render data +/// inside the graph. For more information see [`SlotType`](super::SlotType). +pub trait Node: Downcast + Send + Sync + 'static { + /// Specifies the required input slots for this node. + /// They will then be available during the run method inside the [`RenderGraphContext`]. + fn input(&self) -> Vec { + Vec::new() + } + + /// Specifies the produced output slots for this node. + /// They can then be passed one inside [`RenderGraphContext`] during the run method. + fn output(&self) -> Vec { + Vec::new() + } + + /// Updates internal node state using the current render [`World`] prior to the run method. + fn update(&mut self, _world: &mut World) {} + + /// Runs the graph node logic, issues draw calls, updates the output slots and + /// optionally queues up subgraphs for execution. The graph data, input and output values are + /// passed via the [`RenderGraphContext`]. + fn run<'w>( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + world: &'w World, + ) -> Result<(), NodeRunError>; +} + +impl_downcast!(Node); + +#[derive(Error, Debug, Eq, PartialEq)] +pub enum NodeRunError { + #[error("encountered an input slot error")] + InputSlotError(#[from] InputSlotError), + #[error("encountered an output slot error")] + OutputSlotError(#[from] OutputSlotError), + #[error("encountered an error when running a sub-graph")] + RunSubGraphError(#[from] RunSubGraphError), + #[error("encountered an error when executing draw command")] + DrawError(#[from] DrawError), +} + +/// A collection of input and output [`Edges`](Edge) for a [`Node`]. +#[derive(Debug)] +pub struct Edges { + label: InternedRenderLabel, + input_edges: Vec, + output_edges: Vec, +} + +impl Edges { + /// Returns all "input edges" (edges going "in") for this node . + #[inline] + pub fn input_edges(&self) -> &[Edge] { + &self.input_edges + } + + /// Returns all "output edges" (edges going "out") for this node . + #[inline] + pub fn output_edges(&self) -> &[Edge] { + &self.output_edges + } + + /// Returns this node's label. + #[inline] + pub fn label(&self) -> InternedRenderLabel { + self.label + } + + /// Adds an edge to the `input_edges` if it does not already exist. + pub(crate) fn add_input_edge(&mut self, edge: Edge) -> Result<(), RenderGraphError> { + if self.has_input_edge(&edge) { + return Err(RenderGraphError::EdgeAlreadyExists(edge)); + } + self.input_edges.push(edge); + Ok(()) + } + + /// Removes an edge from the `input_edges` if it exists. + pub(crate) fn remove_input_edge(&mut self, edge: Edge) -> Result<(), RenderGraphError> { + if let Some(index) = self.input_edges.iter().position(|e| *e == edge) { + self.input_edges.swap_remove(index); + Ok(()) + } else { + Err(RenderGraphError::EdgeDoesNotExist(edge)) + } + } + + /// Adds an edge to the `output_edges` if it does not already exist. + pub(crate) fn add_output_edge(&mut self, edge: Edge) -> Result<(), RenderGraphError> { + if self.has_output_edge(&edge) { + return Err(RenderGraphError::EdgeAlreadyExists(edge)); + } + self.output_edges.push(edge); + Ok(()) + } + + /// Removes an edge from the `output_edges` if it exists. + pub(crate) fn remove_output_edge(&mut self, edge: Edge) -> Result<(), RenderGraphError> { + if let Some(index) = self.output_edges.iter().position(|e| *e == edge) { + self.output_edges.swap_remove(index); + Ok(()) + } else { + Err(RenderGraphError::EdgeDoesNotExist(edge)) + } + } + + /// Checks whether the input edge already exists. + pub fn has_input_edge(&self, edge: &Edge) -> bool { + self.input_edges.contains(edge) + } + + /// Checks whether the output edge already exists. + pub fn has_output_edge(&self, edge: &Edge) -> bool { + self.output_edges.contains(edge) + } + + /// Searches the `input_edges` for a [`Edge::SlotEdge`], + /// which `input_index` matches the `index`; + pub fn get_input_slot_edge(&self, index: usize) -> Result<&Edge, RenderGraphError> { + self.input_edges + .iter() + .find(|e| { + if let Edge::SlotEdge { input_index, .. } = e { + *input_index == index + } else { + false + } + }) + .ok_or(RenderGraphError::UnconnectedNodeInputSlot { + input_slot: index, + node: self.label, + }) + } + + /// Searches the `output_edges` for a [`Edge::SlotEdge`], + /// which `output_index` matches the `index`; + pub fn get_output_slot_edge(&self, index: usize) -> Result<&Edge, RenderGraphError> { + self.output_edges + .iter() + .find(|e| { + if let Edge::SlotEdge { output_index, .. } = e { + *output_index == index + } else { + false + } + }) + .ok_or(RenderGraphError::UnconnectedNodeOutputSlot { + output_slot: index, + node: self.label, + }) + } +} + +/// The internal representation of a [`Node`], with all data required +/// by the [`RenderGraph`](super::RenderGraph). +/// +/// The `input_slots` and `output_slots` are provided by the `node`. +pub struct NodeState { + pub label: InternedRenderLabel, + /// The name of the type that implements [`Node`]. + pub type_name: &'static str, + pub node: Box, + pub input_slots: SlotInfos, + pub output_slots: SlotInfos, + pub edges: Edges, +} + +impl Debug for NodeState { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + writeln!(f, "{:?} ({})", self.label, self.type_name) + } +} + +impl NodeState { + /// Creates an [`NodeState`] without edges, but the `input_slots` and `output_slots` + /// are provided by the `node`. + pub fn new(label: InternedRenderLabel, node: T) -> Self + where + T: Node, + { + NodeState { + label, + input_slots: node.input().into(), + output_slots: node.output().into(), + node: Box::new(node), + type_name: core::any::type_name::(), + edges: Edges { + label, + input_edges: Vec::new(), + output_edges: Vec::new(), + }, + } + } + + /// Retrieves the [`Node`]. + pub fn node(&self) -> Result<&T, RenderGraphError> + where + T: Node, + { + self.node + .downcast_ref::() + .ok_or(RenderGraphError::WrongNodeType) + } + + /// Retrieves the [`Node`] mutably. + pub fn node_mut(&mut self) -> Result<&mut T, RenderGraphError> + where + T: Node, + { + self.node + .downcast_mut::() + .ok_or(RenderGraphError::WrongNodeType) + } + + /// Validates that each input slot corresponds to an input edge. + pub fn validate_input_slots(&self) -> Result<(), RenderGraphError> { + for i in 0..self.input_slots.len() { + self.edges.get_input_slot_edge(i)?; + } + + Ok(()) + } + + /// Validates that each output slot corresponds to an output edge. + pub fn validate_output_slots(&self) -> Result<(), RenderGraphError> { + for i in 0..self.output_slots.len() { + self.edges.get_output_slot_edge(i)?; + } + + Ok(()) + } +} + +/// A [`Node`] without any inputs, outputs and subgraphs, which does nothing when run. +/// Used (as a label) to bundle multiple dependencies into one inside +/// the [`RenderGraph`](super::RenderGraph). +#[derive(Default)] +pub struct EmptyNode; + +impl Node for EmptyNode { + fn run( + &self, + _graph: &mut RenderGraphContext, + _render_context: &mut RenderContext, + _world: &World, + ) -> Result<(), NodeRunError> { + Ok(()) + } +} + +/// A [`RenderGraph`](super::RenderGraph) [`Node`] that runs the configured subgraph once. +/// This makes it easier to insert sub-graph runs into a graph. +pub struct RunGraphOnViewNode { + sub_graph: InternedRenderSubGraph, +} + +impl RunGraphOnViewNode { + pub fn new(sub_graph: T) -> Self { + Self { + sub_graph: sub_graph.intern(), + } + } +} + +impl Node for RunGraphOnViewNode { + fn run( + &self, + graph: &mut RenderGraphContext, + _render_context: &mut RenderContext, + _world: &World, + ) -> Result<(), NodeRunError> { + graph.run_sub_graph(self.sub_graph, vec![], Some(graph.view_entity()))?; + Ok(()) + } +} + +/// This trait should be used instead of the [`Node`] trait when making a render node that runs on a view. +/// +/// It is intended to be used with [`ViewNodeRunner`] +pub trait ViewNode { + /// The query that will be used on the view entity. + /// It is guaranteed to run on the view entity, so there's no need for a filter + type ViewQuery: ReadOnlyQueryData; + + /// Updates internal node state using the current render [`World`] prior to the run method. + fn update(&mut self, _world: &mut World) {} + + /// Runs the graph node logic, issues draw calls, updates the output slots and + /// optionally queues up subgraphs for execution. The graph data, input and output values are + /// passed via the [`RenderGraphContext`]. + fn run<'w>( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + view_query: QueryItem<'w, '_, Self::ViewQuery>, + world: &'w World, + ) -> Result<(), NodeRunError>; +} + +/// This [`Node`] can be used to run any [`ViewNode`]. +/// It will take care of updating the view query in `update()` and running the query in `run()`. +/// +/// This [`Node`] exists to help reduce boilerplate when making a render node that runs on a view. +pub struct ViewNodeRunner { + view_query: QueryState, + node: N, +} + +impl ViewNodeRunner { + pub fn new(node: N, world: &mut World) -> Self { + Self { + view_query: world.query_filtered(), + node, + } + } +} + +impl FromWorld for ViewNodeRunner { + fn from_world(world: &mut World) -> Self { + Self::new(N::from_world(world), world) + } +} + +impl Node for ViewNodeRunner +where + T: ViewNode + Send + Sync + 'static, +{ + fn update(&mut self, world: &mut World) { + self.view_query.update_archetypes(world); + self.node.update(world); + } + + fn run<'w>( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + world: &'w World, + ) -> Result<(), NodeRunError> { + let Ok(view) = self.view_query.get_manual(world, graph.view_entity()) else { + return Ok(()); + }; + + ViewNode::run(&self.node, graph, render_context, view, world)?; + Ok(()) + } +} diff --git a/crates/libmarathon/src/render/render_graph/node_slot.rs b/crates/libmarathon/src/render/render_graph/node_slot.rs new file mode 100644 index 0000000..8f0bc6b --- /dev/null +++ b/crates/libmarathon/src/render/render_graph/node_slot.rs @@ -0,0 +1,165 @@ +use std::borrow::Cow; +use bevy_ecs::entity::Entity; +use core::fmt; +use derive_more::derive::From; + +use crate::render::render_resource::{Buffer, Sampler, TextureView}; + +/// A value passed between render [`Nodes`](super::Node). +/// Corresponds to the [`SlotType`] specified in the [`RenderGraph`](super::RenderGraph). +/// +/// Slots can have four different types of values: +/// [`Buffer`], [`TextureView`], [`Sampler`] and [`Entity`]. +/// +/// These values do not contain the actual render data, but only the ids to retrieve them. +#[derive(Debug, Clone, From)] +pub enum SlotValue { + /// A GPU-accessible [`Buffer`]. + Buffer(Buffer), + /// A [`TextureView`] describes a texture used in a pipeline. + TextureView(TextureView), + /// A texture [`Sampler`] defines how a pipeline will sample from a [`TextureView`]. + Sampler(Sampler), + /// An entity from the ECS. + Entity(Entity), +} + +impl SlotValue { + /// Returns the [`SlotType`] of this value. + pub fn slot_type(&self) -> SlotType { + match self { + SlotValue::Buffer(_) => SlotType::Buffer, + SlotValue::TextureView(_) => SlotType::TextureView, + SlotValue::Sampler(_) => SlotType::Sampler, + SlotValue::Entity(_) => SlotType::Entity, + } + } +} + +/// Describes the render resources created (output) or used (input) by +/// the render [`Nodes`](super::Node). +/// +/// This should not be confused with [`SlotValue`], which actually contains the passed data. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum SlotType { + /// A GPU-accessible [`Buffer`]. + Buffer, + /// A [`TextureView`] describes a texture used in a pipeline. + TextureView, + /// A texture [`Sampler`] defines how a pipeline will sample from a [`TextureView`]. + Sampler, + /// An entity from the ECS. + Entity, +} + +impl fmt::Display for SlotType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + SlotType::Buffer => "Buffer", + SlotType::TextureView => "TextureView", + SlotType::Sampler => "Sampler", + SlotType::Entity => "Entity", + }; + + f.write_str(s) + } +} + +/// A [`SlotLabel`] is used to reference a slot by either its name or index +/// inside the [`RenderGraph`](super::RenderGraph). +#[derive(Debug, Clone, Eq, PartialEq, From)] +pub enum SlotLabel { + Index(usize), + Name(Cow<'static, str>), +} + +impl From<&SlotLabel> for SlotLabel { + fn from(value: &SlotLabel) -> Self { + value.clone() + } +} + +impl From for SlotLabel { + fn from(value: String) -> Self { + SlotLabel::Name(value.into()) + } +} + +impl From<&'static str> for SlotLabel { + fn from(value: &'static str) -> Self { + SlotLabel::Name(value.into()) + } +} + +/// The internal representation of a slot, which specifies its [`SlotType`] and name. +#[derive(Clone, Debug)] +pub struct SlotInfo { + pub name: Cow<'static, str>, + pub slot_type: SlotType, +} + +impl SlotInfo { + pub fn new(name: impl Into>, slot_type: SlotType) -> Self { + SlotInfo { + name: name.into(), + slot_type, + } + } +} + +/// A collection of input or output [`SlotInfos`](SlotInfo) for +/// a [`NodeState`](super::NodeState). +#[derive(Default, Debug)] +pub struct SlotInfos { + slots: Vec, +} + +impl> From for SlotInfos { + fn from(slots: T) -> Self { + SlotInfos { + slots: slots.into_iter().collect(), + } + } +} + +impl SlotInfos { + /// Returns the count of slots. + #[inline] + pub fn len(&self) -> usize { + self.slots.len() + } + + /// Returns true if there are no slots. + #[inline] + pub fn is_empty(&self) -> bool { + self.slots.is_empty() + } + + /// Retrieves the [`SlotInfo`] for the provided label. + pub fn get_slot(&self, label: impl Into) -> Option<&SlotInfo> { + let label = label.into(); + let index = self.get_slot_index(label)?; + self.slots.get(index) + } + + /// Retrieves the [`SlotInfo`] for the provided label mutably. + pub fn get_slot_mut(&mut self, label: impl Into) -> Option<&mut SlotInfo> { + let label = label.into(); + let index = self.get_slot_index(label)?; + self.slots.get_mut(index) + } + + /// Retrieves the index (inside input or output slots) of the slot for the provided label. + pub fn get_slot_index(&self, label: impl Into) -> Option { + let label = label.into(); + match label { + SlotLabel::Index(index) => Some(index), + SlotLabel::Name(ref name) => self.slots.iter().position(|s| s.name == *name), + } + } + + /// Returns an iterator over the slot infos. + pub fn iter(&self) -> impl Iterator { + self.slots.iter() + } +} diff --git a/crates/libmarathon/src/render/render_phase/draw.rs b/crates/libmarathon/src/render/render_phase/draw.rs new file mode 100644 index 0000000..5136c72 --- /dev/null +++ b/crates/libmarathon/src/render/render_phase/draw.rs @@ -0,0 +1,398 @@ +use crate::render::render_phase::{PhaseItem, TrackedRenderPass}; +use bevy_app::{App, SubApp}; +use bevy_ecs::{ + entity::Entity, + query::{QueryEntityError, QueryState, ROQueryItem, ReadOnlyQueryData}, + resource::Resource, + system::{ReadOnlySystemParam, SystemParam, SystemParamItem, SystemState}, + world::World, +}; +use bevy_utils::TypeIdMap; +use core::{any::TypeId, fmt::Debug, hash::Hash}; +use std::sync::{PoisonError, RwLock, RwLockReadGuard, RwLockWriteGuard}; +use thiserror::Error; +use variadics_please::all_tuples; + +/// A draw function used to draw [`PhaseItem`]s. +/// +/// The draw function can retrieve and query the required ECS data from the render world. +/// +/// This trait can either be implemented directly or implicitly composed out of multiple modular +/// [`RenderCommand`]s. For more details and an example see the [`RenderCommand`] documentation. +pub trait Draw: Send + Sync + 'static { + /// Prepares the draw function to be used. This is called once and only once before the phase + /// begins. There may be zero or more [`draw`](Draw::draw) calls following a call to this function. + /// Implementing this is optional. + #[expect( + unused_variables, + reason = "The parameters here are intentionally unused by the default implementation; however, putting underscores here will result in the underscores being copied by rust-analyzer's tab completion." + )] + fn prepare(&mut self, world: &'_ World) {} + + /// Draws a [`PhaseItem`] by issuing zero or more `draw` calls via the [`TrackedRenderPass`]. + fn draw<'w>( + &mut self, + world: &'w World, + pass: &mut TrackedRenderPass<'w>, + view: Entity, + item: &P, + ) -> Result<(), DrawError>; +} + +#[derive(Error, Debug, PartialEq, Eq)] +pub enum DrawError { + #[error("Failed to execute render command {0:?}")] + RenderCommandFailure(&'static str), + #[error("Failed to get execute view query")] + InvalidViewQuery, + #[error("View entity not found")] + ViewEntityNotFound, +} + +// TODO: make this generic? +/// An identifier for a [`Draw`] function stored in [`DrawFunctions`]. +#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash)] +pub struct DrawFunctionId(u32); + +/// Stores all [`Draw`] functions for the [`PhaseItem`] type. +/// +/// For retrieval, the [`Draw`] functions are mapped to their respective [`TypeId`]s. +pub struct DrawFunctionsInternal { + pub draw_functions: Vec>>, + pub indices: TypeIdMap, +} + +impl DrawFunctionsInternal

{ + /// Prepares all draw function. This is called once and only once before the phase begins. + pub fn prepare(&mut self, world: &World) { + for function in &mut self.draw_functions { + function.prepare(world); + } + } + + /// Adds the [`Draw`] function and maps it to its own type. + pub fn add>(&mut self, draw_function: T) -> DrawFunctionId { + self.add_with::(draw_function) + } + + /// Adds the [`Draw`] function and maps it to the type `T` + pub fn add_with>(&mut self, draw_function: D) -> DrawFunctionId { + let id = DrawFunctionId(self.draw_functions.len().try_into().unwrap()); + self.draw_functions.push(Box::new(draw_function)); + self.indices.insert(TypeId::of::(), id); + id + } + + /// Retrieves the [`Draw`] function corresponding to the `id` mutably. + pub fn get_mut(&mut self, id: DrawFunctionId) -> Option<&mut dyn Draw

> { + self.draw_functions.get_mut(id.0 as usize).map(|f| &mut **f) + } + + /// Retrieves the id of the [`Draw`] function corresponding to their associated type `T`. + pub fn get_id(&self) -> Option { + self.indices.get(&TypeId::of::()).copied() + } + + /// Retrieves the id of the [`Draw`] function corresponding to their associated type `T`. + /// + /// Fallible wrapper for [`Self::get_id()`] + /// + /// ## Panics + /// If the id doesn't exist, this function will panic. + pub fn id(&self) -> DrawFunctionId { + self.get_id::().unwrap_or_else(|| { + panic!( + "Draw function {} not found for {}", + core::any::type_name::(), + core::any::type_name::

() + ) + }) + } +} + +/// Stores all draw functions for the [`PhaseItem`] type hidden behind a reader-writer lock. +/// +/// To access them the [`DrawFunctions::read`] and [`DrawFunctions::write`] methods are used. +#[derive(Resource)] +pub struct DrawFunctions { + internal: RwLock>, +} + +impl Default for DrawFunctions

{ + fn default() -> Self { + Self { + internal: RwLock::new(DrawFunctionsInternal { + draw_functions: Vec::new(), + indices: Default::default(), + }), + } + } +} + +impl DrawFunctions

{ + /// Accesses the draw functions in read mode. + pub fn read(&self) -> RwLockReadGuard<'_, DrawFunctionsInternal

> { + self.internal.read().unwrap_or_else(PoisonError::into_inner) + } + + /// Accesses the draw functions in write mode. + pub fn write(&self) -> RwLockWriteGuard<'_, DrawFunctionsInternal

> { + self.internal + .write() + .unwrap_or_else(PoisonError::into_inner) + } +} + +/// [`RenderCommand`]s are modular standardized pieces of render logic that can be composed into +/// [`Draw`] functions. +/// +/// To turn a stateless render command into a usable draw function it has to be wrapped by a +/// [`RenderCommandState`]. +/// This is done automatically when registering a render command as a [`Draw`] function via the +/// [`AddRenderCommand::add_render_command`] method. +/// +/// Compared to the draw function the required ECS data is fetched automatically +/// (by the [`RenderCommandState`]) from the render world. +/// Therefore the three types [`Param`](RenderCommand::Param), +/// [`ViewQuery`](RenderCommand::ViewQuery) and +/// [`ItemQuery`](RenderCommand::ItemQuery) are used. +/// They specify which information is required to execute the render command. +/// +/// Multiple render commands can be combined together by wrapping them in a tuple. +/// +/// # Example +/// +/// The `DrawMaterial` draw function is created from the following render command +/// tuple. Const generics are used to set specific bind group locations: +/// +/// ``` +/// # use crate::render::render_phase::SetItemPipeline; +/// # struct SetMeshViewBindGroup; +/// # struct SetMeshViewBindingArrayBindGroup; +/// # struct SetMeshBindGroup; +/// # struct SetMaterialBindGroup(std::marker::PhantomData); +/// # struct DrawMesh; +/// pub type DrawMaterial = ( +/// SetItemPipeline, +/// SetMeshViewBindGroup<0>, +/// SetMeshViewBindingArrayBindGroup<1>, +/// SetMeshBindGroup<2>, +/// SetMaterialBindGroup, +/// DrawMesh, +/// ); +/// ``` +pub trait RenderCommand { + /// Specifies the general ECS data (e.g. resources) required by [`RenderCommand::render`]. + /// + /// When fetching resources, note that, due to lifetime limitations of the `Deref` trait, + /// [`SRes::into_inner`] must be called on each [`SRes`] reference in the + /// [`RenderCommand::render`] method, instead of being automatically dereferenced as is the + /// case in normal `systems`. + /// + /// All parameters have to be read only. + /// + /// [`SRes`]: bevy_ecs::system::lifetimeless::SRes + /// [`SRes::into_inner`]: bevy_ecs::system::lifetimeless::SRes::into_inner + type Param: SystemParam + 'static; + /// Specifies the ECS data of the view entity required by [`RenderCommand::render`]. + /// + /// The view entity refers to the camera, or shadow-casting light, etc. from which the phase + /// item will be rendered from. + /// All components have to be accessed read only. + type ViewQuery: ReadOnlyQueryData; + /// Specifies the ECS data of the item entity required by [`RenderCommand::render`]. + /// + /// The item is the entity that will be rendered for the corresponding view. + /// All components have to be accessed read only. + /// + /// For efficiency reasons, Bevy doesn't always extract entities to the + /// render world; for instance, entities that simply consist of meshes are + /// often not extracted. If the entity doesn't exist in the render world, + /// the supplied query data will be `None`. + type ItemQuery: ReadOnlyQueryData; + + /// Renders a [`PhaseItem`] by recording commands (e.g. setting pipelines, binding bind groups, + /// issuing draw calls, etc.) via the [`TrackedRenderPass`]. + fn render<'w>( + item: &P, + view: ROQueryItem<'w, '_, Self::ViewQuery>, + entity: Option>, + param: SystemParamItem<'w, '_, Self::Param>, + pass: &mut TrackedRenderPass<'w>, + ) -> RenderCommandResult; +} + +/// The result of a [`RenderCommand`]. +#[derive(Debug)] +pub enum RenderCommandResult { + Success, + Skip, + Failure(&'static str), +} + +macro_rules! render_command_tuple_impl { + ($(#[$meta:meta])* $(($name: ident, $view: ident, $entity: ident)),*) => { + $(#[$meta])* + impl),*> RenderCommand

for ($($name,)*) { + type Param = ($($name::Param,)*); + type ViewQuery = ($($name::ViewQuery,)*); + type ItemQuery = ($($name::ItemQuery,)*); + + #[expect( + clippy::allow_attributes, + reason = "We are in a macro; as such, `non_snake_case` may not always lint." + )] + #[allow( + non_snake_case, + reason = "Parameter and variable names are provided by the macro invocation, not by us." + )] + fn render<'w>( + _item: &P, + ($($view,)*): ROQueryItem<'w, '_, Self::ViewQuery>, + maybe_entities: Option>, + ($($name,)*): SystemParamItem<'w, '_, Self::Param>, + _pass: &mut TrackedRenderPass<'w>, + ) -> RenderCommandResult { + match maybe_entities { + None => { + $( + match $name::render(_item, $view, None, $name, _pass) { + RenderCommandResult::Skip => return RenderCommandResult::Skip, + RenderCommandResult::Failure(reason) => return RenderCommandResult::Failure(reason), + _ => {}, + } + )* + } + Some(($($entity,)*)) => { + $( + match $name::render(_item, $view, Some($entity), $name, _pass) { + RenderCommandResult::Skip => return RenderCommandResult::Skip, + RenderCommandResult::Failure(reason) => return RenderCommandResult::Failure(reason), + _ => {}, + } + )* + } + } + RenderCommandResult::Success + } + } + }; +} + +all_tuples!( + #[doc(fake_variadic)] + render_command_tuple_impl, + 0, + 15, + C, + V, + E +); + +/// Wraps a [`RenderCommand`] into a state so that it can be used as a [`Draw`] function. +/// +/// The [`RenderCommand::Param`], [`RenderCommand::ViewQuery`] and +/// [`RenderCommand::ItemQuery`] are fetched from the ECS and passed to the command. +pub struct RenderCommandState> { + state: SystemState, + view: QueryState, + entity: QueryState, +} + +impl> RenderCommandState { + /// Creates a new [`RenderCommandState`] for the [`RenderCommand`]. + pub fn new(world: &mut World) -> Self { + Self { + state: SystemState::new(world), + view: world.query(), + entity: world.query(), + } + } +} + +impl + Send + Sync + 'static> Draw

for RenderCommandState +where + C::Param: ReadOnlySystemParam, +{ + /// Prepares the render command to be used. This is called once and only once before the phase + /// begins. There may be zero or more [`draw`](RenderCommandState::draw) calls following a call to this function. + fn prepare(&mut self, world: &'_ World) { + self.view.update_archetypes(world); + self.entity.update_archetypes(world); + } + + /// Fetches the ECS parameters for the wrapped [`RenderCommand`] and then renders it. + fn draw<'w>( + &mut self, + world: &'w World, + pass: &mut TrackedRenderPass<'w>, + view: Entity, + item: &P, + ) -> Result<(), DrawError> { + let param = self.state.get(world); + let view = match self.view.get_manual(world, view) { + Ok(view) => view, + Err(err) => match err { + QueryEntityError::EntityDoesNotExist(_) => { + return Err(DrawError::ViewEntityNotFound) + } + QueryEntityError::QueryDoesNotMatch(_, _) + | QueryEntityError::AliasedMutability(_) => { + return Err(DrawError::InvalidViewQuery) + } + }, + }; + + let entity = self.entity.get_manual(world, item.entity()).ok(); + match C::render(item, view, entity, param, pass) { + RenderCommandResult::Success | RenderCommandResult::Skip => Ok(()), + RenderCommandResult::Failure(reason) => Err(DrawError::RenderCommandFailure(reason)), + } + } +} + +/// Registers a [`RenderCommand`] as a [`Draw`] function. +/// They are stored inside the [`DrawFunctions`] resource of the app. +pub trait AddRenderCommand { + /// Adds the [`RenderCommand`] for the specified render phase to the app. + fn add_render_command + Send + Sync + 'static>( + &mut self, + ) -> &mut Self + where + C::Param: ReadOnlySystemParam; +} + +impl AddRenderCommand for SubApp { + fn add_render_command + Send + Sync + 'static>( + &mut self, + ) -> &mut Self + where + C::Param: ReadOnlySystemParam, + { + let draw_function = RenderCommandState::::new(self.world_mut()); + let draw_functions = self + .world() + .get_resource::>() + .unwrap_or_else(|| { + panic!( + "DrawFunctions<{}> must be added to the world as a resource \ + before adding render commands to it", + core::any::type_name::

(), + ); + }); + draw_functions.write().add_with::(draw_function); + self + } +} + +impl AddRenderCommand for App { + fn add_render_command + Send + Sync + 'static>( + &mut self, + ) -> &mut Self + where + C::Param: ReadOnlySystemParam, + { + SubApp::add_render_command::(self.main_mut()); + self + } +} diff --git a/crates/libmarathon/src/render/render_phase/draw_state.rs b/crates/libmarathon/src/render/render_phase/draw_state.rs new file mode 100644 index 0000000..96919db --- /dev/null +++ b/crates/libmarathon/src/render/render_phase/draw_state.rs @@ -0,0 +1,682 @@ +use crate::render::{ + diagnostic::internal::{Pass, PassKind, WritePipelineStatistics, WriteTimestamp}, + render_resource::{ + BindGroup, BindGroupId, Buffer, BufferId, BufferSlice, RenderPipeline, RenderPipelineId, + ShaderStages, + }, + renderer::RenderDevice, +}; +use bevy_camera::Viewport; +use bevy_color::LinearRgba; +use bevy_utils::default; +use core::ops::Range; +use wgpu::{IndexFormat, QuerySet, RenderPass}; + +#[cfg(feature = "detailed_trace")] +use tracing::trace; + +/// Tracks the state of a [`TrackedRenderPass`]. +/// +/// This is used to skip redundant operations on the [`TrackedRenderPass`] (e.g. setting an already +/// set pipeline, binding an already bound bind group). These operations can otherwise be fairly +/// costly due to IO to the GPU, so deduplicating these calls results in a speedup. +#[derive(Debug, Default)] +struct DrawState { + pipeline: Option, + bind_groups: Vec<(Option, Vec)>, + /// List of vertex buffers by [`BufferId`], offset, and size. See [`DrawState::buffer_slice_key`] + vertex_buffers: Vec>, + index_buffer: Option<(BufferId, u64, IndexFormat)>, + + /// Stores whether this state is populated or empty for quick state invalidation + stores_state: bool, +} + +impl DrawState { + /// Marks the `pipeline` as bound. + fn set_pipeline(&mut self, pipeline: RenderPipelineId) { + // TODO: do these need to be cleared? + // self.bind_groups.clear(); + // self.vertex_buffers.clear(); + // self.index_buffer = None; + self.pipeline = Some(pipeline); + self.stores_state = true; + } + + /// Checks, whether the `pipeline` is already bound. + fn is_pipeline_set(&self, pipeline: RenderPipelineId) -> bool { + self.pipeline == Some(pipeline) + } + + /// Marks the `bind_group` as bound to the `index`. + fn set_bind_group(&mut self, index: usize, bind_group: BindGroupId, dynamic_indices: &[u32]) { + let group = &mut self.bind_groups[index]; + group.0 = Some(bind_group); + group.1.clear(); + group.1.extend(dynamic_indices); + self.stores_state = true; + } + + /// Checks, whether the `bind_group` is already bound to the `index`. + fn is_bind_group_set( + &self, + index: usize, + bind_group: BindGroupId, + dynamic_indices: &[u32], + ) -> bool { + if let Some(current_bind_group) = self.bind_groups.get(index) { + current_bind_group.0 == Some(bind_group) && dynamic_indices == current_bind_group.1 + } else { + false + } + } + + /// Marks the vertex `buffer` as bound to the `index`. + fn set_vertex_buffer(&mut self, index: usize, buffer_slice: BufferSlice) { + self.vertex_buffers[index] = Some(self.buffer_slice_key(&buffer_slice)); + self.stores_state = true; + } + + /// Checks, whether the vertex `buffer` is already bound to the `index`. + fn is_vertex_buffer_set(&self, index: usize, buffer_slice: &BufferSlice) -> bool { + if let Some(current) = self.vertex_buffers.get(index) { + *current == Some(self.buffer_slice_key(buffer_slice)) + } else { + false + } + } + + /// Returns the value used for checking whether `BufferSlice`s are equivalent. + fn buffer_slice_key(&self, buffer_slice: &BufferSlice) -> (BufferId, u64, u64) { + ( + buffer_slice.id(), + buffer_slice.offset(), + buffer_slice.size(), + ) + } + + /// Marks the index `buffer` as bound. + fn set_index_buffer(&mut self, buffer: BufferId, offset: u64, index_format: IndexFormat) { + self.index_buffer = Some((buffer, offset, index_format)); + self.stores_state = true; + } + + /// Checks, whether the index `buffer` is already bound. + fn is_index_buffer_set( + &self, + buffer: BufferId, + offset: u64, + index_format: IndexFormat, + ) -> bool { + self.index_buffer == Some((buffer, offset, index_format)) + } + + /// Resets tracking state + pub fn reset_tracking(&mut self) { + if !self.stores_state { + return; + } + self.pipeline = None; + self.bind_groups.iter_mut().for_each(|val| { + val.0 = None; + val.1.clear(); + }); + self.vertex_buffers.iter_mut().for_each(|val| { + *val = None; + }); + self.index_buffer = None; + self.stores_state = false; + } +} + +/// A [`RenderPass`], which tracks the current pipeline state to skip redundant operations. +/// +/// It is used to set the current [`RenderPipeline`], [`BindGroup`]s and [`Buffer`]s. +/// After all requirements are specified, draw calls can be issued. +pub struct TrackedRenderPass<'a> { + pass: RenderPass<'a>, + state: DrawState, +} + +impl<'a> TrackedRenderPass<'a> { + /// Tracks the supplied render pass. + pub fn new(device: &RenderDevice, pass: RenderPass<'a>) -> Self { + let limits = device.limits(); + let max_bind_groups = limits.max_bind_groups as usize; + let max_vertex_buffers = limits.max_vertex_buffers as usize; + Self { + state: DrawState { + bind_groups: vec![(None, Vec::new()); max_bind_groups], + vertex_buffers: vec![None; max_vertex_buffers], + ..default() + }, + pass, + } + } + + /// Returns the wgpu [`RenderPass`]. + /// + /// Function invalidates internal tracking state, + /// some redundant pipeline operations may not be skipped. + pub fn wgpu_pass(&mut self) -> &mut RenderPass<'a> { + self.state.reset_tracking(); + &mut self.pass + } + + /// Sets the active [`RenderPipeline`]. + /// + /// Subsequent draw calls will exhibit the behavior defined by the `pipeline`. + pub fn set_render_pipeline(&mut self, pipeline: &'a RenderPipeline) { + #[cfg(feature = "detailed_trace")] + trace!("set pipeline: {:?}", pipeline); + if self.state.is_pipeline_set(pipeline.id()) { + return; + } + self.pass.set_pipeline(pipeline); + self.state.set_pipeline(pipeline.id()); + } + + /// Sets the active bind group for a given bind group index. The bind group layout + /// in the active pipeline when any `draw()` function is called must match the layout of + /// this bind group. + /// + /// If the bind group have dynamic offsets, provide them in binding order. + /// These offsets have to be aligned to [`WgpuLimits::min_uniform_buffer_offset_alignment`](crate::settings::WgpuLimits::min_uniform_buffer_offset_alignment) + /// or [`WgpuLimits::min_storage_buffer_offset_alignment`](crate::settings::WgpuLimits::min_storage_buffer_offset_alignment) appropriately. + pub fn set_bind_group( + &mut self, + index: usize, + bind_group: &'a BindGroup, + dynamic_uniform_indices: &[u32], + ) { + if self + .state + .is_bind_group_set(index, bind_group.id(), dynamic_uniform_indices) + { + #[cfg(feature = "detailed_trace")] + trace!( + "set bind_group {} (already set): {:?} ({:?})", + index, + bind_group, + dynamic_uniform_indices + ); + return; + } + #[cfg(feature = "detailed_trace")] + trace!( + "set bind_group {}: {:?} ({:?})", + index, + bind_group, + dynamic_uniform_indices + ); + + self.pass + .set_bind_group(index as u32, bind_group, dynamic_uniform_indices); + self.state + .set_bind_group(index, bind_group.id(), dynamic_uniform_indices); + } + + /// Assign a vertex buffer to a slot. + /// + /// Subsequent calls to [`draw`] and [`draw_indexed`] on this + /// [`TrackedRenderPass`] will use `buffer` as one of the source vertex buffers. + /// + /// The `slot_index` refers to the index of the matching descriptor in + /// [`VertexState::buffers`](crate::render_resource::VertexState::buffers). + /// + /// [`draw`]: TrackedRenderPass::draw + /// [`draw_indexed`]: TrackedRenderPass::draw_indexed + pub fn set_vertex_buffer(&mut self, slot_index: usize, buffer_slice: BufferSlice<'a>) { + if self.state.is_vertex_buffer_set(slot_index, &buffer_slice) { + #[cfg(feature = "detailed_trace")] + trace!( + "set vertex buffer {} (already set): {:?} (offset = {}, size = {})", + slot_index, + buffer_slice.id(), + buffer_slice.offset(), + buffer_slice.size(), + ); + return; + } + #[cfg(feature = "detailed_trace")] + trace!( + "set vertex buffer {}: {:?} (offset = {}, size = {})", + slot_index, + buffer_slice.id(), + buffer_slice.offset(), + buffer_slice.size(), + ); + + self.pass + .set_vertex_buffer(slot_index as u32, *buffer_slice); + self.state.set_vertex_buffer(slot_index, buffer_slice); + } + + /// Sets the active index buffer. + /// + /// Subsequent calls to [`TrackedRenderPass::draw_indexed`] will use the buffer referenced by + /// `buffer_slice` as the source index buffer. + pub fn set_index_buffer( + &mut self, + buffer_slice: BufferSlice<'a>, + offset: u64, + index_format: IndexFormat, + ) { + if self + .state + .is_index_buffer_set(buffer_slice.id(), offset, index_format) + { + #[cfg(feature = "detailed_trace")] + trace!( + "set index buffer (already set): {:?} ({})", + buffer_slice.id(), + offset + ); + return; + } + #[cfg(feature = "detailed_trace")] + trace!("set index buffer: {:?} ({})", buffer_slice.id(), offset); + self.pass.set_index_buffer(*buffer_slice, index_format); + self.state + .set_index_buffer(buffer_slice.id(), offset, index_format); + } + + /// Draws primitives from the active vertex buffer(s). + /// + /// The active vertex buffer(s) can be set with [`TrackedRenderPass::set_vertex_buffer`]. + pub fn draw(&mut self, vertices: Range, instances: Range) { + #[cfg(feature = "detailed_trace")] + trace!("draw: {:?} {:?}", vertices, instances); + self.pass.draw(vertices, instances); + } + + /// Draws indexed primitives using the active index buffer and the active vertex buffer(s). + /// + /// The active index buffer can be set with [`TrackedRenderPass::set_index_buffer`], while the + /// active vertex buffer(s) can be set with [`TrackedRenderPass::set_vertex_buffer`]. + pub fn draw_indexed(&mut self, indices: Range, base_vertex: i32, instances: Range) { + #[cfg(feature = "detailed_trace")] + trace!( + "draw indexed: {:?} {} {:?}", + indices, + base_vertex, + instances + ); + self.pass.draw_indexed(indices, base_vertex, instances); + } + + /// Draws primitives from the active vertex buffer(s) based on the contents of the + /// `indirect_buffer`. + /// + /// The active vertex buffers can be set with [`TrackedRenderPass::set_vertex_buffer`]. + /// + /// The structure expected in `indirect_buffer` is the following: + /// + /// ``` + /// #[repr(C)] + /// struct DrawIndirect { + /// vertex_count: u32, // The number of vertices to draw. + /// instance_count: u32, // The number of instances to draw. + /// first_vertex: u32, // The Index of the first vertex to draw. + /// first_instance: u32, // The instance ID of the first instance to draw. + /// // has to be 0, unless [`Features::INDIRECT_FIRST_INSTANCE`] is enabled. + /// } + /// ``` + pub fn draw_indirect(&mut self, indirect_buffer: &'a Buffer, indirect_offset: u64) { + #[cfg(feature = "detailed_trace")] + trace!("draw indirect: {:?} {}", indirect_buffer, indirect_offset); + self.pass.draw_indirect(indirect_buffer, indirect_offset); + } + + /// Draws indexed primitives using the active index buffer and the active vertex buffers, + /// based on the contents of the `indirect_buffer`. + /// + /// The active index buffer can be set with [`TrackedRenderPass::set_index_buffer`], while the + /// active vertex buffers can be set with [`TrackedRenderPass::set_vertex_buffer`]. + /// + /// The structure expected in `indirect_buffer` is the following: + /// + /// ``` + /// #[repr(C)] + /// struct DrawIndexedIndirect { + /// vertex_count: u32, // The number of vertices to draw. + /// instance_count: u32, // The number of instances to draw. + /// first_index: u32, // The base index within the index buffer. + /// vertex_offset: i32, // The value added to the vertex index before indexing into the vertex buffer. + /// first_instance: u32, // The instance ID of the first instance to draw. + /// // has to be 0, unless [`Features::INDIRECT_FIRST_INSTANCE`] is enabled. + /// } + /// ``` + pub fn draw_indexed_indirect(&mut self, indirect_buffer: &'a Buffer, indirect_offset: u64) { + #[cfg(feature = "detailed_trace")] + trace!( + "draw indexed indirect: {:?} {}", + indirect_buffer, + indirect_offset + ); + self.pass + .draw_indexed_indirect(indirect_buffer, indirect_offset); + } + + /// Dispatches multiple draw calls from the active vertex buffer(s) based on the contents of the + /// `indirect_buffer`.`count` draw calls are issued. + /// + /// The active vertex buffers can be set with [`TrackedRenderPass::set_vertex_buffer`]. + /// + /// `indirect_buffer` should contain `count` tightly packed elements of the following structure: + /// + /// ``` + /// #[repr(C)] + /// struct DrawIndirect { + /// vertex_count: u32, // The number of vertices to draw. + /// instance_count: u32, // The number of instances to draw. + /// first_vertex: u32, // The Index of the first vertex to draw. + /// first_instance: u32, // The instance ID of the first instance to draw. + /// // has to be 0, unless [`Features::INDIRECT_FIRST_INSTANCE`] is enabled. + /// } + /// ``` + pub fn multi_draw_indirect( + &mut self, + indirect_buffer: &'a Buffer, + indirect_offset: u64, + count: u32, + ) { + #[cfg(feature = "detailed_trace")] + trace!( + "multi draw indirect: {:?} {}, {}x", + indirect_buffer, + indirect_offset, + count + ); + self.pass + .multi_draw_indirect(indirect_buffer, indirect_offset, count); + } + + /// Dispatches multiple draw calls from the active vertex buffer(s) based on the contents of + /// the `indirect_buffer`. + /// The count buffer is read to determine how many draws to issue. + /// + /// The indirect buffer must be long enough to account for `max_count` draws, however only + /// `count` elements will be read, where `count` is the value read from `count_buffer` capped + /// at `max_count`. + /// + /// The active vertex buffers can be set with [`TrackedRenderPass::set_vertex_buffer`]. + /// + /// `indirect_buffer` should contain `count` tightly packed elements of the following structure: + /// + /// ``` + /// #[repr(C)] + /// struct DrawIndirect { + /// vertex_count: u32, // The number of vertices to draw. + /// instance_count: u32, // The number of instances to draw. + /// first_vertex: u32, // The Index of the first vertex to draw. + /// first_instance: u32, // The instance ID of the first instance to draw. + /// // has to be 0, unless [`Features::INDIRECT_FIRST_INSTANCE`] is enabled. + /// } + /// ``` + pub fn multi_draw_indirect_count( + &mut self, + indirect_buffer: &'a Buffer, + indirect_offset: u64, + count_buffer: &'a Buffer, + count_offset: u64, + max_count: u32, + ) { + #[cfg(feature = "detailed_trace")] + trace!( + "multi draw indirect count: {:?} {}, ({:?} {})x, max {}x", + indirect_buffer, + indirect_offset, + count_buffer, + count_offset, + max_count + ); + self.pass.multi_draw_indirect_count( + indirect_buffer, + indirect_offset, + count_buffer, + count_offset, + max_count, + ); + } + + /// Dispatches multiple draw calls from the active index buffer and the active vertex buffers, + /// based on the contents of the `indirect_buffer`. `count` draw calls are issued. + /// + /// The active index buffer can be set with [`TrackedRenderPass::set_index_buffer`], while the + /// active vertex buffers can be set with [`TrackedRenderPass::set_vertex_buffer`]. + /// + /// `indirect_buffer` should contain `count` tightly packed elements of the following structure: + /// + /// ``` + /// #[repr(C)] + /// struct DrawIndexedIndirect { + /// vertex_count: u32, // The number of vertices to draw. + /// instance_count: u32, // The number of instances to draw. + /// first_index: u32, // The base index within the index buffer. + /// vertex_offset: i32, // The value added to the vertex index before indexing into the vertex buffer. + /// first_instance: u32, // The instance ID of the first instance to draw. + /// // has to be 0, unless [`Features::INDIRECT_FIRST_INSTANCE`] is enabled. + /// } + /// ``` + pub fn multi_draw_indexed_indirect( + &mut self, + indirect_buffer: &'a Buffer, + indirect_offset: u64, + count: u32, + ) { + #[cfg(feature = "detailed_trace")] + trace!( + "multi draw indexed indirect: {:?} {}, {}x", + indirect_buffer, + indirect_offset, + count + ); + self.pass + .multi_draw_indexed_indirect(indirect_buffer, indirect_offset, count); + } + + /// Dispatches multiple draw calls from the active index buffer and the active vertex buffers, + /// based on the contents of the `indirect_buffer`. + /// The count buffer is read to determine how many draws to issue. + /// + /// The indirect buffer must be long enough to account for `max_count` draws, however only + /// `count` elements will be read, where `count` is the value read from `count_buffer` capped + /// at `max_count`. + /// + /// The active index buffer can be set with [`TrackedRenderPass::set_index_buffer`], while the + /// active vertex buffers can be set with [`TrackedRenderPass::set_vertex_buffer`]. + /// + /// `indirect_buffer` should contain `count` tightly packed elements of the following structure: + /// + /// ``` + /// #[repr(C)] + /// struct DrawIndexedIndirect { + /// vertex_count: u32, // The number of vertices to draw. + /// instance_count: u32, // The number of instances to draw. + /// first_index: u32, // The base index within the index buffer. + /// vertex_offset: i32, // The value added to the vertex index before indexing into the vertex buffer. + /// first_instance: u32, // The instance ID of the first instance to draw. + /// // has to be 0, unless [`Features::INDIRECT_FIRST_INSTANCE`] is enabled. + /// } + /// ``` + pub fn multi_draw_indexed_indirect_count( + &mut self, + indirect_buffer: &'a Buffer, + indirect_offset: u64, + count_buffer: &'a Buffer, + count_offset: u64, + max_count: u32, + ) { + #[cfg(feature = "detailed_trace")] + trace!( + "multi draw indexed indirect count: {:?} {}, ({:?} {})x, max {}x", + indirect_buffer, + indirect_offset, + count_buffer, + count_offset, + max_count + ); + self.pass.multi_draw_indexed_indirect_count( + indirect_buffer, + indirect_offset, + count_buffer, + count_offset, + max_count, + ); + } + + /// Sets the stencil reference. + /// + /// Subsequent stencil tests will test against this value. + pub fn set_stencil_reference(&mut self, reference: u32) { + #[cfg(feature = "detailed_trace")] + trace!("set stencil reference: {}", reference); + self.pass.set_stencil_reference(reference); + } + + /// Sets the scissor region. + /// + /// Subsequent draw calls will discard any fragments that fall outside this region. + pub fn set_scissor_rect(&mut self, x: u32, y: u32, width: u32, height: u32) { + #[cfg(feature = "detailed_trace")] + trace!("set_scissor_rect: {} {} {} {}", x, y, width, height); + self.pass.set_scissor_rect(x, y, width, height); + } + + /// Set push constant data. + /// + /// `Features::PUSH_CONSTANTS` must be enabled on the device in order to call these functions. + pub fn set_push_constants(&mut self, stages: ShaderStages, offset: u32, data: &[u8]) { + #[cfg(feature = "detailed_trace")] + trace!( + "set push constants: {:?} offset: {} data.len: {}", + stages, + offset, + data.len() + ); + self.pass.set_push_constants(stages, offset, data); + } + + /// Set the rendering viewport. + /// + /// Subsequent draw calls will be projected into that viewport. + pub fn set_viewport( + &mut self, + x: f32, + y: f32, + width: f32, + height: f32, + min_depth: f32, + max_depth: f32, + ) { + #[cfg(feature = "detailed_trace")] + trace!( + "set viewport: {} {} {} {} {} {}", + x, + y, + width, + height, + min_depth, + max_depth + ); + self.pass + .set_viewport(x, y, width, height, min_depth, max_depth); + } + + /// Set the rendering viewport to the given camera [`Viewport`]. + /// + /// Subsequent draw calls will be projected into that viewport. + pub fn set_camera_viewport(&mut self, viewport: &Viewport) { + self.set_viewport( + viewport.physical_position.x as f32, + viewport.physical_position.y as f32, + viewport.physical_size.x as f32, + viewport.physical_size.y as f32, + viewport.depth.start, + viewport.depth.end, + ); + } + + /// Insert a single debug marker. + /// + /// This is a GPU debugging feature. This has no effect on the rendering itself. + pub fn insert_debug_marker(&mut self, label: &str) { + #[cfg(feature = "detailed_trace")] + trace!("insert debug marker: {}", label); + self.pass.insert_debug_marker(label); + } + + /// Start a new debug group. + /// + /// Push a new debug group over the internal stack. Subsequent render commands and debug + /// markers are grouped into this new group, until [`pop_debug_group`] is called. + /// + /// ``` + /// # fn example(mut pass: bevy_render::render_phase::TrackedRenderPass<'static>) { + /// pass.push_debug_group("Render the car"); + /// // [setup pipeline etc...] + /// pass.draw(0..64, 0..1); + /// pass.pop_debug_group(); + /// # } + /// ``` + /// + /// Note that [`push_debug_group`] and [`pop_debug_group`] must always be called in pairs. + /// + /// This is a GPU debugging feature. This has no effect on the rendering itself. + /// + /// [`push_debug_group`]: TrackedRenderPass::push_debug_group + /// [`pop_debug_group`]: TrackedRenderPass::pop_debug_group + pub fn push_debug_group(&mut self, label: &str) { + #[cfg(feature = "detailed_trace")] + trace!("push_debug_group marker: {}", label); + self.pass.push_debug_group(label); + } + + /// End the current debug group. + /// + /// Subsequent render commands and debug markers are not grouped anymore in + /// this group, but in the previous one (if any) or the default top-level one + /// if the debug group was the last one on the stack. + /// + /// Note that [`push_debug_group`] and [`pop_debug_group`] must always be called in pairs. + /// + /// This is a GPU debugging feature. This has no effect on the rendering itself. + /// + /// [`push_debug_group`]: TrackedRenderPass::push_debug_group + /// [`pop_debug_group`]: TrackedRenderPass::pop_debug_group + pub fn pop_debug_group(&mut self) { + #[cfg(feature = "detailed_trace")] + trace!("pop_debug_group"); + self.pass.pop_debug_group(); + } + + /// Sets the blend color as used by some of the blending modes. + /// + /// Subsequent blending tests will test against this value. + pub fn set_blend_constant(&mut self, color: LinearRgba) { + #[cfg(feature = "detailed_trace")] + trace!("set blend constant: {:?}", color); + self.pass.set_blend_constant(wgpu::Color::from(color)); + } +} + +impl WriteTimestamp for TrackedRenderPass<'_> { + fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) { + self.pass.write_timestamp(query_set, index); + } +} + +impl WritePipelineStatistics for TrackedRenderPass<'_> { + fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32) { + self.pass.begin_pipeline_statistics_query(query_set, index); + } + + fn end_pipeline_statistics_query(&mut self) { + self.pass.end_pipeline_statistics_query(); + } +} + +impl Pass for TrackedRenderPass<'_> { + const KIND: PassKind = PassKind::Render; +} diff --git a/crates/libmarathon/src/render/render_phase/mod.rs b/crates/libmarathon/src/render/render_phase/mod.rs new file mode 100644 index 0000000..9a89fa9 --- /dev/null +++ b/crates/libmarathon/src/render/render_phase/mod.rs @@ -0,0 +1,1911 @@ +//! The modular rendering abstraction responsible for queuing, preparing, sorting and drawing +//! entities as part of separate render phases. +//! +//! In Bevy each view (camera, or shadow-casting light, etc.) has one or multiple render phases +//! (e.g. opaque, transparent, shadow, etc). +//! They are used to queue entities for rendering. +//! Multiple phases might be required due to different sorting/batching behaviors +//! (e.g. opaque: front to back, transparent: back to front) or because one phase depends on +//! the rendered texture of the previous phase (e.g. for screen-space reflections). +//! +//! To draw an entity, a corresponding [`PhaseItem`] has to be added to one or multiple of these +//! render phases for each view that it is visible in. +//! This must be done in the [`RenderSystems::Queue`]. +//! After that the render phase sorts them in the [`RenderSystems::PhaseSort`]. +//! Finally the items are rendered using a single [`TrackedRenderPass`], during +//! the [`RenderSystems::Render`]. +//! +//! Therefore each phase item is assigned a [`Draw`] function. +//! These set up the state of the [`TrackedRenderPass`] (i.e. select the +//! [`RenderPipeline`](crate::render_resource::RenderPipeline), configure the +//! [`BindGroup`](crate::render_resource::BindGroup)s, etc.) and then issue a draw call, +//! for the corresponding item. +//! +//! The [`Draw`] function trait can either be implemented directly or such a function can be +//! created by composing multiple [`RenderCommand`]s. + +mod draw; +mod draw_state; +mod rangefinder; + +use bevy_app::{App, Plugin}; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::component::Tick; +use bevy_ecs::entity::EntityHash; +use bevy_platform::collections::{hash_map::Entry, HashMap}; +use bevy_utils::default; +pub use draw::*; +pub use draw_state::*; +use encase::{internal::WriteInto, ShaderSize}; +use fixedbitset::{Block, FixedBitSet}; +use indexmap::IndexMap; +use nonmax::NonMaxU32; +pub use rangefinder::*; +use wgpu::Features; + +use crate::render::batching::gpu_preprocessing::{ + GpuPreprocessingMode, GpuPreprocessingSupport, PhaseBatchedInstanceBuffers, + PhaseIndirectParametersBuffers, +}; +use crate::render::renderer::RenderDevice; +use crate::render::sync_world::{MainEntity, MainEntityHashMap}; +use crate::render::view::RetainedViewEntity; +use crate::render::RenderDebugFlags; +use crate::render::{ + batching::{ + self, + gpu_preprocessing::{self, BatchedInstanceBuffers}, + no_gpu_preprocessing::{self, BatchedInstanceBuffer}, + GetFullBatchData, + }, + render_resource::{CachedRenderPipelineId, GpuArrayBufferIndex, PipelineCache}, + Render, RenderApp, RenderSystems, +}; +use bevy_ecs::intern::Interned; +use bevy_ecs::{ + define_label, + prelude::*, + system::{lifetimeless::SRes, SystemParamItem}, +}; +use crate::render::renderer::RenderAdapterInfo; +pub use macros::ShaderLabel; +use core::{fmt::Debug, hash::Hash, iter, marker::PhantomData, ops::Range, slice::SliceIndex}; +use smallvec::SmallVec; +use tracing::warn; + +define_label!( + #[diagnostic::on_unimplemented( + note = "consider annotating `{Self}` with `#[derive(ShaderLabel)]`" + )] + /// Labels used to uniquely identify types of material shaders + ShaderLabel, + SHADER_LABEL_INTERNER +); + +/// A shorthand for `Interned`. +pub type InternedShaderLabel = Interned; + +pub use macros::DrawFunctionLabel; + +define_label!( + #[diagnostic::on_unimplemented( + note = "consider annotating `{Self}` with `#[derive(DrawFunctionLabel)]`" + )] + /// Labels used to uniquely identify types of material shaders + DrawFunctionLabel, + DRAW_FUNCTION_LABEL_INTERNER +); + +pub type InternedDrawFunctionLabel = Interned; + +/// Stores the rendering instructions for a single phase that uses bins in all +/// views. +/// +/// They're cleared out every frame, but storing them in a resource like this +/// allows us to reuse allocations. +#[derive(Resource, Deref, DerefMut)] +pub struct ViewBinnedRenderPhases(pub HashMap>) +where + BPI: BinnedPhaseItem; + +/// A collection of all rendering instructions, that will be executed by the GPU, for a +/// single render phase for a single view. +/// +/// Each view (camera, or shadow-casting light, etc.) can have one or multiple render phases. +/// They are used to queue entities for rendering. +/// Multiple phases might be required due to different sorting/batching behaviors +/// (e.g. opaque: front to back, transparent: back to front) or because one phase depends on +/// the rendered texture of the previous phase (e.g. for screen-space reflections). +/// All [`PhaseItem`]s are then rendered using a single [`TrackedRenderPass`]. +/// The render pass might be reused for multiple phases to reduce GPU overhead. +/// +/// This flavor of render phase is used for phases in which the ordering is less +/// critical: for example, `Opaque3d`. It's generally faster than the +/// alternative [`SortedRenderPhase`]. +pub struct BinnedRenderPhase +where + BPI: BinnedPhaseItem, +{ + /// The multidrawable bins. + /// + /// Each batch set key maps to a *batch set*, which in this case is a set of + /// meshes that can be drawn together in one multidraw call. Each batch set + /// is subdivided into *bins*, each of which represents a particular mesh. + /// Each bin contains the entity IDs of instances of that mesh. + /// + /// So, for example, if there are two cubes and a sphere present in the + /// scene, we would generally have one batch set containing two bins, + /// assuming that the cubes and sphere meshes are allocated together and use + /// the same pipeline. The first bin, corresponding to the cubes, will have + /// two entities in it. The second bin, corresponding to the sphere, will + /// have one entity in it. + pub multidrawable_meshes: IndexMap>, + + /// The bins corresponding to batchable items that aren't multidrawable. + /// + /// For multidrawable entities, use `multidrawable_meshes`; for + /// unbatchable entities, use `unbatchable_values`. + pub batchable_meshes: IndexMap<(BPI::BatchSetKey, BPI::BinKey), RenderBin>, + + /// The unbatchable bins. + /// + /// Each entity here is rendered in a separate drawcall. + pub unbatchable_meshes: IndexMap<(BPI::BatchSetKey, BPI::BinKey), UnbatchableBinnedEntities>, + + /// Items in the bin that aren't meshes at all. + /// + /// Bevy itself doesn't place anything in this list, but plugins or your app + /// can in order to execute custom drawing commands. Draw functions for each + /// entity are simply called in order at rendering time. + /// + /// See the `custom_phase_item` example for an example of how to use this. + pub non_mesh_items: IndexMap<(BPI::BatchSetKey, BPI::BinKey), NonMeshEntities>, + + /// Information on each batch set. + /// + /// A *batch set* is a set of entities that will be batched together unless + /// we're on a platform that doesn't support storage buffers (e.g. WebGL 2) + /// and differing dynamic uniform indices force us to break batches. On + /// platforms that support storage buffers, a batch set always consists of + /// at most one batch. + /// + /// Multidrawable entities come first, then batchable entities, then + /// unbatchable entities. + pub(crate) batch_sets: BinnedRenderPhaseBatchSets, + + /// The batch and bin key for each entity. + /// + /// We retain these so that, when the entity changes, + /// [`Self::sweep_old_entities`] can quickly find the bin it was located in + /// and remove it. + cached_entity_bin_keys: IndexMap, EntityHash>, + + /// The set of indices in [`Self::cached_entity_bin_keys`] that are + /// confirmed to be up to date. + /// + /// Note that each bit in this bit set refers to an *index* in the + /// [`IndexMap`] (i.e. a bucket in the hash table). They aren't entity IDs. + valid_cached_entity_bin_keys: FixedBitSet, + + /// The set of entities that changed bins this frame. + /// + /// An entity will only be present in this list if it was in one bin on the + /// previous frame and is in a new bin on this frame. Each list entry + /// specifies the bin the entity used to be in. We use this in order to + /// remove the entity from the old bin during + /// [`BinnedRenderPhase::sweep_old_entities`]. + entities_that_changed_bins: Vec>, + /// The gpu preprocessing mode configured for the view this phase is associated + /// with. + gpu_preprocessing_mode: GpuPreprocessingMode, +} + +/// All entities that share a mesh and a material and can be batched as part of +/// a [`BinnedRenderPhase`]. +#[derive(Default)] +pub struct RenderBin { + /// A list of the entities in each bin, along with their cached + /// [`InputUniformIndex`]. + entities: IndexMap, +} + +/// Information that we track about an entity that was in one bin on the +/// previous frame and is in a different bin this frame. +struct EntityThatChangedBins +where + BPI: BinnedPhaseItem, +{ + /// The entity. + main_entity: MainEntity, + /// The key that identifies the bin that this entity used to be in. + old_cached_binned_entity: CachedBinnedEntity, +} + +/// Information that we keep about an entity currently within a bin. +pub struct CachedBinnedEntity +where + BPI: BinnedPhaseItem, +{ + /// Information that we use to identify a cached entity in a bin. + pub cached_bin_key: Option>, + /// The last modified tick of the entity. + /// + /// We use this to detect when the entity needs to be invalidated. + pub change_tick: Tick, +} + +/// Information that we use to identify a cached entity in a bin. +pub struct CachedBinKey +where + BPI: BinnedPhaseItem, +{ + /// The key of the batch set containing the entity. + pub batch_set_key: BPI::BatchSetKey, + /// The key of the bin containing the entity. + pub bin_key: BPI::BinKey, + /// The type of render phase that we use to render the entity: multidraw, + /// plain batch, etc. + pub phase_type: BinnedRenderPhaseType, +} + +impl Clone for CachedBinnedEntity +where + BPI: BinnedPhaseItem, +{ + fn clone(&self) -> Self { + CachedBinnedEntity { + cached_bin_key: self.cached_bin_key.clone(), + change_tick: self.change_tick, + } + } +} + +impl Clone for CachedBinKey +where + BPI: BinnedPhaseItem, +{ + fn clone(&self) -> Self { + CachedBinKey { + batch_set_key: self.batch_set_key.clone(), + bin_key: self.bin_key.clone(), + phase_type: self.phase_type, + } + } +} + +impl PartialEq for CachedBinKey +where + BPI: BinnedPhaseItem, +{ + fn eq(&self, other: &Self) -> bool { + self.batch_set_key == other.batch_set_key + && self.bin_key == other.bin_key + && self.phase_type == other.phase_type + } +} + +/// How we store and render the batch sets. +/// +/// Each one of these corresponds to a [`GpuPreprocessingMode`]. +pub enum BinnedRenderPhaseBatchSets { + /// Batches are grouped into batch sets based on dynamic uniforms. + /// + /// This corresponds to [`GpuPreprocessingMode::None`]. + DynamicUniforms(Vec>), + + /// Batches are never grouped into batch sets. + /// + /// This corresponds to [`GpuPreprocessingMode::PreprocessingOnly`]. + Direct(Vec), + + /// Batches are grouped together into batch sets based on their ability to + /// be multi-drawn together. + /// + /// This corresponds to [`GpuPreprocessingMode::Culling`]. + MultidrawIndirect(Vec>), +} + +/// A group of entities that will be batched together into a single multi-draw +/// call. +pub struct BinnedRenderPhaseBatchSet { + /// The first batch in this batch set. + pub(crate) first_batch: BinnedRenderPhaseBatch, + /// The key of the bin that the first batch corresponds to. + pub(crate) bin_key: BK, + /// The number of batches. + pub(crate) batch_count: u32, + /// The index of the batch set in the GPU buffer. + pub(crate) index: u32, +} + +impl BinnedRenderPhaseBatchSets { + fn clear(&mut self) { + match *self { + BinnedRenderPhaseBatchSets::DynamicUniforms(ref mut vec) => vec.clear(), + BinnedRenderPhaseBatchSets::Direct(ref mut vec) => vec.clear(), + BinnedRenderPhaseBatchSets::MultidrawIndirect(ref mut vec) => vec.clear(), + } + } +} + +/// Information about a single batch of entities rendered using binned phase +/// items. +#[derive(Debug)] +pub struct BinnedRenderPhaseBatch { + /// An entity that's *representative* of this batch. + /// + /// Bevy uses this to fetch the mesh. It can be any entity in the batch. + pub representative_entity: (Entity, MainEntity), + /// The range of instance indices in this batch. + pub instance_range: Range, + + /// The dynamic offset of the batch. + /// + /// Note that dynamic offsets are only used on platforms that don't support + /// storage buffers. + pub extra_index: PhaseItemExtraIndex, +} + +/// Information about the unbatchable entities in a bin. +pub struct UnbatchableBinnedEntities { + /// The entities. + pub entities: MainEntityHashMap, + + /// The GPU array buffer indices of each unbatchable binned entity. + pub(crate) buffer_indices: UnbatchableBinnedEntityIndexSet, +} + +/// Information about [`BinnedRenderPhaseType::NonMesh`] entities. +pub struct NonMeshEntities { + /// The entities. + pub entities: MainEntityHashMap, +} + +/// Stores instance indices and dynamic offsets for unbatchable entities in a +/// binned render phase. +/// +/// This is conceptually `Vec`, but it +/// avoids the overhead of storing dynamic offsets on platforms that support +/// them. In other words, this allows a fast path that avoids allocation on +/// platforms that aren't WebGL 2. +#[derive(Default)] + +pub(crate) enum UnbatchableBinnedEntityIndexSet { + /// There are no unbatchable entities in this bin (yet). + #[default] + NoEntities, + + /// The instances for all unbatchable entities in this bin are contiguous, + /// and there are no dynamic uniforms. + /// + /// This is the typical case on platforms other than WebGL 2. We special + /// case this to avoid allocation on those platforms. + Sparse { + /// The range of indices. + instance_range: Range, + /// The index of the first indirect instance parameters. + /// + /// The other indices immediately follow these. + first_indirect_parameters_index: Option, + }, + + /// Dynamic uniforms are present for unbatchable entities in this bin. + /// + /// We fall back to this on WebGL 2. + Dense(Vec), +} + +/// The instance index and dynamic offset (if present) for an unbatchable entity. +/// +/// This is only useful on platforms that don't support storage buffers. +#[derive(Clone)] +pub(crate) struct UnbatchableBinnedEntityIndices { + /// The instance index. + pub(crate) instance_index: u32, + /// The [`PhaseItemExtraIndex`], if present. + pub(crate) extra_index: PhaseItemExtraIndex, +} + +/// Identifies the list within [`BinnedRenderPhase`] that a phase item is to be +/// placed in. +#[derive(Clone, Copy, PartialEq, Debug)] +pub enum BinnedRenderPhaseType { + /// The item is a mesh that's eligible for multi-draw indirect rendering and + /// can be batched with other meshes of the same type. + MultidrawableMesh, + + /// The item is a mesh that can be batched with other meshes of the same type and + /// drawn in a single draw call. + BatchableMesh, + + /// The item is a mesh that's eligible for indirect rendering, but can't be + /// batched with other meshes of the same type. + UnbatchableMesh, + + /// The item isn't a mesh at all. + /// + /// Bevy will simply invoke the drawing commands for such items one after + /// another, with no further processing. + /// + /// The engine itself doesn't enqueue any items of this type, but it's + /// available for use in your application and/or plugins. + NonMesh, +} + +impl From> for UnbatchableBinnedEntityIndices +where + T: Clone + ShaderSize + WriteInto, +{ + fn from(value: GpuArrayBufferIndex) -> Self { + UnbatchableBinnedEntityIndices { + instance_index: value.index, + extra_index: PhaseItemExtraIndex::maybe_dynamic_offset(value.dynamic_offset), + } + } +} + +impl Default for ViewBinnedRenderPhases +where + BPI: BinnedPhaseItem, +{ + fn default() -> Self { + Self(default()) + } +} + +impl ViewBinnedRenderPhases +where + BPI: BinnedPhaseItem, +{ + pub fn prepare_for_new_frame( + &mut self, + retained_view_entity: RetainedViewEntity, + gpu_preprocessing: GpuPreprocessingMode, + ) { + match self.entry(retained_view_entity) { + Entry::Occupied(mut entry) => entry.get_mut().prepare_for_new_frame(), + Entry::Vacant(entry) => { + entry.insert(BinnedRenderPhase::::new(gpu_preprocessing)); + } + } + } +} + +/// The index of the uniform describing this object in the GPU buffer, when GPU +/// preprocessing is enabled. +/// +/// For example, for 3D meshes, this is the index of the `MeshInputUniform` in +/// the buffer. +/// +/// This field is ignored if GPU preprocessing isn't in use, such as (currently) +/// in the case of 2D meshes. In that case, it can be safely set to +/// [`core::default::Default::default`]. +#[derive(Clone, Copy, PartialEq, Default, Deref, DerefMut)] +#[repr(transparent)] +pub struct InputUniformIndex(pub u32); + +impl BinnedRenderPhase +where + BPI: BinnedPhaseItem, +{ + /// Bins a new entity. + /// + /// The `phase_type` parameter specifies whether the entity is a + /// preprocessable mesh and whether it can be binned with meshes of the same + /// type. + pub fn add( + &mut self, + batch_set_key: BPI::BatchSetKey, + bin_key: BPI::BinKey, + (entity, main_entity): (Entity, MainEntity), + input_uniform_index: InputUniformIndex, + mut phase_type: BinnedRenderPhaseType, + change_tick: Tick, + ) { + // If the user has overridden indirect drawing for this view, we need to + // force the phase type to be batchable instead. + if self.gpu_preprocessing_mode == GpuPreprocessingMode::PreprocessingOnly + && phase_type == BinnedRenderPhaseType::MultidrawableMesh + { + phase_type = BinnedRenderPhaseType::BatchableMesh; + } + + match phase_type { + BinnedRenderPhaseType::MultidrawableMesh => { + match self.multidrawable_meshes.entry(batch_set_key.clone()) { + indexmap::map::Entry::Occupied(mut entry) => { + entry + .get_mut() + .entry(bin_key.clone()) + .or_default() + .insert(main_entity, input_uniform_index); + } + indexmap::map::Entry::Vacant(entry) => { + let mut new_batch_set = IndexMap::default(); + new_batch_set.insert( + bin_key.clone(), + RenderBin::from_entity(main_entity, input_uniform_index), + ); + entry.insert(new_batch_set); + } + } + } + + BinnedRenderPhaseType::BatchableMesh => { + match self + .batchable_meshes + .entry((batch_set_key.clone(), bin_key.clone()).clone()) + { + indexmap::map::Entry::Occupied(mut entry) => { + entry.get_mut().insert(main_entity, input_uniform_index); + } + indexmap::map::Entry::Vacant(entry) => { + entry.insert(RenderBin::from_entity(main_entity, input_uniform_index)); + } + } + } + + BinnedRenderPhaseType::UnbatchableMesh => { + match self + .unbatchable_meshes + .entry((batch_set_key.clone(), bin_key.clone())) + { + indexmap::map::Entry::Occupied(mut entry) => { + entry.get_mut().entities.insert(main_entity, entity); + } + indexmap::map::Entry::Vacant(entry) => { + let mut entities = MainEntityHashMap::default(); + entities.insert(main_entity, entity); + entry.insert(UnbatchableBinnedEntities { + entities, + buffer_indices: default(), + }); + } + } + } + + BinnedRenderPhaseType::NonMesh => { + // We don't process these items further. + match self + .non_mesh_items + .entry((batch_set_key.clone(), bin_key.clone()).clone()) + { + indexmap::map::Entry::Occupied(mut entry) => { + entry.get_mut().entities.insert(main_entity, entity); + } + indexmap::map::Entry::Vacant(entry) => { + let mut entities = MainEntityHashMap::default(); + entities.insert(main_entity, entity); + entry.insert(NonMeshEntities { entities }); + } + } + } + } + + // Update the cache. + self.update_cache( + main_entity, + Some(CachedBinKey { + batch_set_key, + bin_key, + phase_type, + }), + change_tick, + ); + } + + /// Inserts an entity into the cache with the given change tick. + pub fn update_cache( + &mut self, + main_entity: MainEntity, + cached_bin_key: Option>, + change_tick: Tick, + ) { + let new_cached_binned_entity = CachedBinnedEntity { + cached_bin_key, + change_tick, + }; + + let (index, old_cached_binned_entity) = self + .cached_entity_bin_keys + .insert_full(main_entity, new_cached_binned_entity.clone()); + + // If the entity changed bins, record its old bin so that we can remove + // the entity from it. + if let Some(old_cached_binned_entity) = old_cached_binned_entity + && old_cached_binned_entity.cached_bin_key != new_cached_binned_entity.cached_bin_key + { + self.entities_that_changed_bins.push(EntityThatChangedBins { + main_entity, + old_cached_binned_entity, + }); + } + + // Mark the entity as valid. + self.valid_cached_entity_bin_keys.grow_and_insert(index); + } + + /// Encodes the GPU commands needed to render all entities in this phase. + pub fn render<'w>( + &self, + render_pass: &mut TrackedRenderPass<'w>, + world: &'w World, + view: Entity, + ) -> Result<(), DrawError> { + { + let draw_functions = world.resource::>(); + let mut draw_functions = draw_functions.write(); + draw_functions.prepare(world); + // Make sure to drop the reader-writer lock here to avoid recursive + // locks. + } + + self.render_batchable_meshes(render_pass, world, view)?; + self.render_unbatchable_meshes(render_pass, world, view)?; + self.render_non_meshes(render_pass, world, view)?; + + Ok(()) + } + + /// Renders all batchable meshes queued in this phase. + fn render_batchable_meshes<'w>( + &self, + render_pass: &mut TrackedRenderPass<'w>, + world: &'w World, + view: Entity, + ) -> Result<(), DrawError> { + let draw_functions = world.resource::>(); + let mut draw_functions = draw_functions.write(); + + let render_device = world.resource::(); + let render_adapter_info = world.resource::(); + let multi_draw_indirect_count_supported = render_device + .features() + .contains(Features::MULTI_DRAW_INDIRECT_COUNT) + // TODO: https://github.com/gfx-rs/wgpu/issues/7974 + && !matches!(render_adapter_info.backend, wgpu::Backend::Dx12); + + match self.batch_sets { + BinnedRenderPhaseBatchSets::DynamicUniforms(ref batch_sets) => { + debug_assert_eq!(self.batchable_meshes.len(), batch_sets.len()); + + for ((batch_set_key, bin_key), batch_set) in + self.batchable_meshes.keys().zip(batch_sets.iter()) + { + for batch in batch_set { + let binned_phase_item = BPI::new( + batch_set_key.clone(), + bin_key.clone(), + batch.representative_entity, + batch.instance_range.clone(), + batch.extra_index.clone(), + ); + + // Fetch the draw function. + let Some(draw_function) = + draw_functions.get_mut(binned_phase_item.draw_function()) + else { + continue; + }; + + draw_function.draw(world, render_pass, view, &binned_phase_item)?; + } + } + } + + BinnedRenderPhaseBatchSets::Direct(ref batch_set) => { + for (batch, (batch_set_key, bin_key)) in + batch_set.iter().zip(self.batchable_meshes.keys()) + { + let binned_phase_item = BPI::new( + batch_set_key.clone(), + bin_key.clone(), + batch.representative_entity, + batch.instance_range.clone(), + batch.extra_index.clone(), + ); + + // Fetch the draw function. + let Some(draw_function) = + draw_functions.get_mut(binned_phase_item.draw_function()) + else { + continue; + }; + + draw_function.draw(world, render_pass, view, &binned_phase_item)?; + } + } + + BinnedRenderPhaseBatchSets::MultidrawIndirect(ref batch_sets) => { + for (batch_set_key, batch_set) in self + .multidrawable_meshes + .keys() + .chain( + self.batchable_meshes + .keys() + .map(|(batch_set_key, _)| batch_set_key), + ) + .zip(batch_sets.iter()) + { + let batch = &batch_set.first_batch; + + let batch_set_index = if multi_draw_indirect_count_supported { + NonMaxU32::new(batch_set.index) + } else { + None + }; + + let binned_phase_item = BPI::new( + batch_set_key.clone(), + batch_set.bin_key.clone(), + batch.representative_entity, + batch.instance_range.clone(), + match batch.extra_index { + PhaseItemExtraIndex::None => PhaseItemExtraIndex::None, + PhaseItemExtraIndex::DynamicOffset(ref dynamic_offset) => { + PhaseItemExtraIndex::DynamicOffset(*dynamic_offset) + } + PhaseItemExtraIndex::IndirectParametersIndex { ref range, .. } => { + PhaseItemExtraIndex::IndirectParametersIndex { + range: range.start..(range.start + batch_set.batch_count), + batch_set_index, + } + } + }, + ); + + // Fetch the draw function. + let Some(draw_function) = + draw_functions.get_mut(binned_phase_item.draw_function()) + else { + continue; + }; + + draw_function.draw(world, render_pass, view, &binned_phase_item)?; + } + } + } + + Ok(()) + } + + /// Renders all unbatchable meshes queued in this phase. + fn render_unbatchable_meshes<'w>( + &self, + render_pass: &mut TrackedRenderPass<'w>, + world: &'w World, + view: Entity, + ) -> Result<(), DrawError> { + let draw_functions = world.resource::>(); + let mut draw_functions = draw_functions.write(); + + for (batch_set_key, bin_key) in self.unbatchable_meshes.keys() { + let unbatchable_entities = + &self.unbatchable_meshes[&(batch_set_key.clone(), bin_key.clone())]; + for (entity_index, entity) in unbatchable_entities.entities.iter().enumerate() { + let unbatchable_dynamic_offset = match &unbatchable_entities.buffer_indices { + UnbatchableBinnedEntityIndexSet::NoEntities => { + // Shouldn't happen… + continue; + } + UnbatchableBinnedEntityIndexSet::Sparse { + instance_range, + first_indirect_parameters_index, + } => UnbatchableBinnedEntityIndices { + instance_index: instance_range.start + entity_index as u32, + extra_index: match first_indirect_parameters_index { + None => PhaseItemExtraIndex::None, + Some(first_indirect_parameters_index) => { + let first_indirect_parameters_index_for_entity = + u32::from(*first_indirect_parameters_index) + + entity_index as u32; + PhaseItemExtraIndex::IndirectParametersIndex { + range: first_indirect_parameters_index_for_entity + ..(first_indirect_parameters_index_for_entity + 1), + batch_set_index: None, + } + } + }, + }, + UnbatchableBinnedEntityIndexSet::Dense(dynamic_offsets) => { + dynamic_offsets[entity_index].clone() + } + }; + + let binned_phase_item = BPI::new( + batch_set_key.clone(), + bin_key.clone(), + (*entity.1, *entity.0), + unbatchable_dynamic_offset.instance_index + ..(unbatchable_dynamic_offset.instance_index + 1), + unbatchable_dynamic_offset.extra_index, + ); + + // Fetch the draw function. + let Some(draw_function) = draw_functions.get_mut(binned_phase_item.draw_function()) + else { + continue; + }; + + draw_function.draw(world, render_pass, view, &binned_phase_item)?; + } + } + Ok(()) + } + + /// Renders all objects of type [`BinnedRenderPhaseType::NonMesh`]. + /// + /// These will have been added by plugins or the application. + fn render_non_meshes<'w>( + &self, + render_pass: &mut TrackedRenderPass<'w>, + world: &'w World, + view: Entity, + ) -> Result<(), DrawError> { + let draw_functions = world.resource::>(); + let mut draw_functions = draw_functions.write(); + + for ((batch_set_key, bin_key), non_mesh_entities) in &self.non_mesh_items { + for (main_entity, entity) in non_mesh_entities.entities.iter() { + // Come up with a fake batch range and extra index. The draw + // function is expected to manage any sort of batching logic itself. + let binned_phase_item = BPI::new( + batch_set_key.clone(), + bin_key.clone(), + (*entity, *main_entity), + 0..1, + PhaseItemExtraIndex::None, + ); + + let Some(draw_function) = draw_functions.get_mut(binned_phase_item.draw_function()) + else { + continue; + }; + + draw_function.draw(world, render_pass, view, &binned_phase_item)?; + } + } + + Ok(()) + } + + pub fn is_empty(&self) -> bool { + self.multidrawable_meshes.is_empty() + && self.batchable_meshes.is_empty() + && self.unbatchable_meshes.is_empty() + && self.non_mesh_items.is_empty() + } + + pub fn prepare_for_new_frame(&mut self) { + self.batch_sets.clear(); + + self.valid_cached_entity_bin_keys.clear(); + self.valid_cached_entity_bin_keys + .grow(self.cached_entity_bin_keys.len()); + self.valid_cached_entity_bin_keys + .set_range(self.cached_entity_bin_keys.len().., true); + + self.entities_that_changed_bins.clear(); + + for unbatchable_bin in self.unbatchable_meshes.values_mut() { + unbatchable_bin.buffer_indices.clear(); + } + } + + /// Checks to see whether the entity is in a bin and returns true if it's + /// both in a bin and up to date. + /// + /// If this function returns true, we also add the entry to the + /// `valid_cached_entity_bin_keys` list. + pub fn validate_cached_entity( + &mut self, + visible_entity: MainEntity, + current_change_tick: Tick, + ) -> bool { + if let indexmap::map::Entry::Occupied(entry) = + self.cached_entity_bin_keys.entry(visible_entity) + && entry.get().change_tick == current_change_tick + { + self.valid_cached_entity_bin_keys.insert(entry.index()); + return true; + } + + false + } + + /// Removes all entities not marked as clean from the bins. + /// + /// During `queue_material_meshes`, we process all visible entities and mark + /// each as clean as we come to it. Then, in [`sweep_old_entities`], we call + /// this method, which removes entities that aren't marked as clean from the + /// bins. + pub fn sweep_old_entities(&mut self) { + // Search for entities not marked as valid. We have to do this in + // reverse order because `swap_remove_index` will potentially invalidate + // all indices after the one we remove. + for index in ReverseFixedBitSetZeroesIterator::new(&self.valid_cached_entity_bin_keys) { + let Some((entity, cached_binned_entity)) = + self.cached_entity_bin_keys.swap_remove_index(index) + else { + continue; + }; + + if let Some(ref cached_bin_key) = cached_binned_entity.cached_bin_key { + remove_entity_from_bin( + entity, + cached_bin_key, + &mut self.multidrawable_meshes, + &mut self.batchable_meshes, + &mut self.unbatchable_meshes, + &mut self.non_mesh_items, + ); + } + } + + // If an entity changed bins, we need to remove it from its old bin. + for entity_that_changed_bins in self.entities_that_changed_bins.drain(..) { + let Some(ref old_cached_bin_key) = entity_that_changed_bins + .old_cached_binned_entity + .cached_bin_key + else { + continue; + }; + remove_entity_from_bin( + entity_that_changed_bins.main_entity, + old_cached_bin_key, + &mut self.multidrawable_meshes, + &mut self.batchable_meshes, + &mut self.unbatchable_meshes, + &mut self.non_mesh_items, + ); + } + } +} + +/// Removes an entity from a bin. +/// +/// If this makes the bin empty, this function removes the bin as well. +/// +/// This is a standalone function instead of a method on [`BinnedRenderPhase`] +/// for borrow check reasons. +fn remove_entity_from_bin( + entity: MainEntity, + entity_bin_key: &CachedBinKey, + multidrawable_meshes: &mut IndexMap>, + batchable_meshes: &mut IndexMap<(BPI::BatchSetKey, BPI::BinKey), RenderBin>, + unbatchable_meshes: &mut IndexMap<(BPI::BatchSetKey, BPI::BinKey), UnbatchableBinnedEntities>, + non_mesh_items: &mut IndexMap<(BPI::BatchSetKey, BPI::BinKey), NonMeshEntities>, +) where + BPI: BinnedPhaseItem, +{ + match entity_bin_key.phase_type { + BinnedRenderPhaseType::MultidrawableMesh => { + if let indexmap::map::Entry::Occupied(mut batch_set_entry) = + multidrawable_meshes.entry(entity_bin_key.batch_set_key.clone()) + { + if let indexmap::map::Entry::Occupied(mut bin_entry) = batch_set_entry + .get_mut() + .entry(entity_bin_key.bin_key.clone()) + { + bin_entry.get_mut().remove(entity); + + // If the bin is now empty, remove the bin. + if bin_entry.get_mut().is_empty() { + bin_entry.swap_remove(); + } + } + + // If the batch set is now empty, remove it. This will perturb + // the order, but that's OK because we're going to sort the bin + // afterwards. + if batch_set_entry.get_mut().is_empty() { + batch_set_entry.swap_remove(); + } + } + } + + BinnedRenderPhaseType::BatchableMesh => { + if let indexmap::map::Entry::Occupied(mut bin_entry) = batchable_meshes.entry(( + entity_bin_key.batch_set_key.clone(), + entity_bin_key.bin_key.clone(), + )) { + bin_entry.get_mut().remove(entity); + + // If the bin is now empty, remove the bin. + if bin_entry.get_mut().is_empty() { + bin_entry.swap_remove(); + } + } + } + + BinnedRenderPhaseType::UnbatchableMesh => { + if let indexmap::map::Entry::Occupied(mut bin_entry) = unbatchable_meshes.entry(( + entity_bin_key.batch_set_key.clone(), + entity_bin_key.bin_key.clone(), + )) { + bin_entry.get_mut().entities.remove(&entity); + + // If the bin is now empty, remove the bin. + if bin_entry.get_mut().entities.is_empty() { + bin_entry.swap_remove(); + } + } + } + + BinnedRenderPhaseType::NonMesh => { + if let indexmap::map::Entry::Occupied(mut bin_entry) = non_mesh_items.entry(( + entity_bin_key.batch_set_key.clone(), + entity_bin_key.bin_key.clone(), + )) { + bin_entry.get_mut().entities.remove(&entity); + + // If the bin is now empty, remove the bin. + if bin_entry.get_mut().entities.is_empty() { + bin_entry.swap_remove(); + } + } + } + } +} + +impl BinnedRenderPhase +where + BPI: BinnedPhaseItem, +{ + fn new(gpu_preprocessing: GpuPreprocessingMode) -> Self { + Self { + multidrawable_meshes: IndexMap::default(), + batchable_meshes: IndexMap::default(), + unbatchable_meshes: IndexMap::default(), + non_mesh_items: IndexMap::default(), + batch_sets: match gpu_preprocessing { + GpuPreprocessingMode::Culling => { + BinnedRenderPhaseBatchSets::MultidrawIndirect(vec![]) + } + GpuPreprocessingMode::PreprocessingOnly => { + BinnedRenderPhaseBatchSets::Direct(vec![]) + } + GpuPreprocessingMode::None => BinnedRenderPhaseBatchSets::DynamicUniforms(vec![]), + }, + cached_entity_bin_keys: IndexMap::default(), + valid_cached_entity_bin_keys: FixedBitSet::new(), + entities_that_changed_bins: vec![], + gpu_preprocessing_mode: gpu_preprocessing, + } + } +} + +impl UnbatchableBinnedEntityIndexSet { + /// Returns the [`UnbatchableBinnedEntityIndices`] for the given entity. + fn indices_for_entity_index( + &self, + entity_index: u32, + ) -> Option { + match self { + UnbatchableBinnedEntityIndexSet::NoEntities => None, + UnbatchableBinnedEntityIndexSet::Sparse { instance_range, .. } + if entity_index >= instance_range.len() as u32 => + { + None + } + UnbatchableBinnedEntityIndexSet::Sparse { + instance_range, + first_indirect_parameters_index: None, + } => Some(UnbatchableBinnedEntityIndices { + instance_index: instance_range.start + entity_index, + extra_index: PhaseItemExtraIndex::None, + }), + UnbatchableBinnedEntityIndexSet::Sparse { + instance_range, + first_indirect_parameters_index: Some(first_indirect_parameters_index), + } => { + let first_indirect_parameters_index_for_this_batch = + u32::from(*first_indirect_parameters_index) + entity_index; + Some(UnbatchableBinnedEntityIndices { + instance_index: instance_range.start + entity_index, + extra_index: PhaseItemExtraIndex::IndirectParametersIndex { + range: first_indirect_parameters_index_for_this_batch + ..(first_indirect_parameters_index_for_this_batch + 1), + batch_set_index: None, + }, + }) + } + UnbatchableBinnedEntityIndexSet::Dense(indices) => { + indices.get(entity_index as usize).cloned() + } + } + } +} + +/// A convenient abstraction for adding all the systems necessary for a binned +/// render phase to the render app. +/// +/// This is the version used when the pipeline supports GPU preprocessing: e.g. +/// 3D PBR meshes. +pub struct BinnedRenderPhasePlugin +where + BPI: BinnedPhaseItem, + GFBD: GetFullBatchData, +{ + /// Debugging flags that can optionally be set when constructing the renderer. + pub debug_flags: RenderDebugFlags, + phantom: PhantomData<(BPI, GFBD)>, +} + +impl BinnedRenderPhasePlugin +where + BPI: BinnedPhaseItem, + GFBD: GetFullBatchData, +{ + pub fn new(debug_flags: RenderDebugFlags) -> Self { + Self { + debug_flags, + phantom: PhantomData, + } + } +} + +impl Plugin for BinnedRenderPhasePlugin +where + BPI: BinnedPhaseItem, + GFBD: GetFullBatchData + Sync + Send + 'static, +{ + fn build(&self, app: &mut App) { + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + render_app + .init_resource::>() + .init_resource::>() + .insert_resource(PhaseIndirectParametersBuffers::::new( + self.debug_flags + .contains(RenderDebugFlags::ALLOW_COPIES_FROM_INDIRECT_PARAMETERS), + )) + .add_systems( + Render, + ( + batching::sort_binned_render_phase::.in_set(RenderSystems::PhaseSort), + ( + no_gpu_preprocessing::batch_and_prepare_binned_render_phase:: + .run_if(resource_exists::>), + gpu_preprocessing::batch_and_prepare_binned_render_phase:: + .run_if( + resource_exists::< + BatchedInstanceBuffers, + >, + ), + ) + .in_set(RenderSystems::PrepareResources), + sweep_old_entities::.in_set(RenderSystems::QueueSweep), + gpu_preprocessing::collect_buffers_for_phase:: + .run_if( + resource_exists::< + BatchedInstanceBuffers, + >, + ) + .in_set(RenderSystems::PrepareResourcesCollectPhaseBuffers), + ), + ); + } +} + +/// Stores the rendering instructions for a single phase that sorts items in all +/// views. +/// +/// They're cleared out every frame, but storing them in a resource like this +/// allows us to reuse allocations. +#[derive(Resource, Deref, DerefMut)] +pub struct ViewSortedRenderPhases(pub HashMap>) +where + SPI: SortedPhaseItem; + +impl Default for ViewSortedRenderPhases +where + SPI: SortedPhaseItem, +{ + fn default() -> Self { + Self(default()) + } +} + +impl ViewSortedRenderPhases +where + SPI: SortedPhaseItem, +{ + pub fn insert_or_clear(&mut self, retained_view_entity: RetainedViewEntity) { + match self.entry(retained_view_entity) { + Entry::Occupied(mut entry) => entry.get_mut().clear(), + Entry::Vacant(entry) => { + entry.insert(default()); + } + } + } +} + +/// A convenient abstraction for adding all the systems necessary for a sorted +/// render phase to the render app. +/// +/// This is the version used when the pipeline supports GPU preprocessing: e.g. +/// 3D PBR meshes. +pub struct SortedRenderPhasePlugin +where + SPI: SortedPhaseItem, + GFBD: GetFullBatchData, +{ + /// Debugging flags that can optionally be set when constructing the renderer. + pub debug_flags: RenderDebugFlags, + phantom: PhantomData<(SPI, GFBD)>, +} + +impl SortedRenderPhasePlugin +where + SPI: SortedPhaseItem, + GFBD: GetFullBatchData, +{ + pub fn new(debug_flags: RenderDebugFlags) -> Self { + Self { + debug_flags, + phantom: PhantomData, + } + } +} + +impl Plugin for SortedRenderPhasePlugin +where + SPI: SortedPhaseItem + CachedRenderPipelinePhaseItem, + GFBD: GetFullBatchData + Sync + Send + 'static, +{ + fn build(&self, app: &mut App) { + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + render_app + .init_resource::>() + .init_resource::>() + .insert_resource(PhaseIndirectParametersBuffers::::new( + self.debug_flags + .contains(RenderDebugFlags::ALLOW_COPIES_FROM_INDIRECT_PARAMETERS), + )) + .add_systems( + Render, + ( + ( + no_gpu_preprocessing::batch_and_prepare_sorted_render_phase:: + .run_if(resource_exists::>), + gpu_preprocessing::batch_and_prepare_sorted_render_phase:: + .run_if( + resource_exists::< + BatchedInstanceBuffers, + >, + ), + ) + .in_set(RenderSystems::PrepareResources), + gpu_preprocessing::collect_buffers_for_phase:: + .run_if( + resource_exists::< + BatchedInstanceBuffers, + >, + ) + .in_set(RenderSystems::PrepareResourcesCollectPhaseBuffers), + ), + ); + } +} + +impl UnbatchableBinnedEntityIndexSet { + /// Adds a new entity to the list of unbatchable binned entities. + pub fn add(&mut self, indices: UnbatchableBinnedEntityIndices) { + match self { + UnbatchableBinnedEntityIndexSet::NoEntities => { + match indices.extra_index { + PhaseItemExtraIndex::DynamicOffset(_) => { + // This is the first entity we've seen, and we don't have + // compute shaders. Initialize an array. + *self = UnbatchableBinnedEntityIndexSet::Dense(vec![indices]); + } + PhaseItemExtraIndex::None => { + // This is the first entity we've seen, and we have compute + // shaders. Initialize the fast path. + *self = UnbatchableBinnedEntityIndexSet::Sparse { + instance_range: indices.instance_index..indices.instance_index + 1, + first_indirect_parameters_index: None, + } + } + PhaseItemExtraIndex::IndirectParametersIndex { + range: ref indirect_parameters_index, + .. + } => { + // This is the first entity we've seen, and we have compute + // shaders. Initialize the fast path. + *self = UnbatchableBinnedEntityIndexSet::Sparse { + instance_range: indices.instance_index..indices.instance_index + 1, + first_indirect_parameters_index: NonMaxU32::new( + indirect_parameters_index.start, + ), + } + } + } + } + + UnbatchableBinnedEntityIndexSet::Sparse { + instance_range, + first_indirect_parameters_index, + } if instance_range.end == indices.instance_index + && ((first_indirect_parameters_index.is_none() + && indices.extra_index == PhaseItemExtraIndex::None) + || first_indirect_parameters_index.is_some_and( + |first_indirect_parameters_index| match indices.extra_index { + PhaseItemExtraIndex::IndirectParametersIndex { + range: ref this_range, + .. + } => { + u32::from(first_indirect_parameters_index) + instance_range.end + - instance_range.start + == this_range.start + } + PhaseItemExtraIndex::DynamicOffset(_) | PhaseItemExtraIndex::None => { + false + } + }, + )) => + { + // This is the normal case on non-WebGL 2. + instance_range.end += 1; + } + + UnbatchableBinnedEntityIndexSet::Sparse { instance_range, .. } => { + // We thought we were in non-WebGL 2 mode, but we got a dynamic + // offset or non-contiguous index anyway. This shouldn't happen, + // but let's go ahead and do the sensible thing anyhow: demote + // the compressed `NoDynamicOffsets` field to the full + // `DynamicOffsets` array. + warn!( + "Unbatchable binned entity index set was demoted from sparse to dense. \ + This is a bug in the renderer. Please report it.", + ); + let new_dynamic_offsets = (0..instance_range.len() as u32) + .flat_map(|entity_index| self.indices_for_entity_index(entity_index)) + .chain(iter::once(indices)) + .collect(); + *self = UnbatchableBinnedEntityIndexSet::Dense(new_dynamic_offsets); + } + + UnbatchableBinnedEntityIndexSet::Dense(dense_indices) => { + dense_indices.push(indices); + } + } + } + + /// Clears the unbatchable binned entity index set. + fn clear(&mut self) { + match self { + UnbatchableBinnedEntityIndexSet::Dense(dense_indices) => dense_indices.clear(), + UnbatchableBinnedEntityIndexSet::Sparse { .. } => { + *self = UnbatchableBinnedEntityIndexSet::NoEntities; + } + _ => {} + } + } +} + +/// A collection of all items to be rendered that will be encoded to GPU +/// commands for a single render phase for a single view. +/// +/// Each view (camera, or shadow-casting light, etc.) can have one or multiple render phases. +/// They are used to queue entities for rendering. +/// Multiple phases might be required due to different sorting/batching behaviors +/// (e.g. opaque: front to back, transparent: back to front) or because one phase depends on +/// the rendered texture of the previous phase (e.g. for screen-space reflections). +/// All [`PhaseItem`]s are then rendered using a single [`TrackedRenderPass`]. +/// The render pass might be reused for multiple phases to reduce GPU overhead. +/// +/// This flavor of render phase is used only for meshes that need to be sorted +/// back-to-front, such as transparent meshes. For items that don't need strict +/// sorting, [`BinnedRenderPhase`] is preferred, for performance. +pub struct SortedRenderPhase +where + I: SortedPhaseItem, +{ + /// The items within this [`SortedRenderPhase`]. + pub items: Vec, +} + +impl Default for SortedRenderPhase +where + I: SortedPhaseItem, +{ + fn default() -> Self { + Self { items: Vec::new() } + } +} + +impl SortedRenderPhase +where + I: SortedPhaseItem, +{ + /// Adds a [`PhaseItem`] to this render phase. + #[inline] + pub fn add(&mut self, item: I) { + self.items.push(item); + } + + /// Removes all [`PhaseItem`]s from this render phase. + #[inline] + pub fn clear(&mut self) { + self.items.clear(); + } + + /// Sorts all of its [`PhaseItem`]s. + pub fn sort(&mut self) { + I::sort(&mut self.items); + } + + /// An [`Iterator`] through the associated [`Entity`] for each [`PhaseItem`] in order. + #[inline] + pub fn iter_entities(&'_ self) -> impl Iterator + '_ { + self.items.iter().map(PhaseItem::entity) + } + + /// Renders all of its [`PhaseItem`]s using their corresponding draw functions. + pub fn render<'w>( + &self, + render_pass: &mut TrackedRenderPass<'w>, + world: &'w World, + view: Entity, + ) -> Result<(), DrawError> { + self.render_range(render_pass, world, view, ..) + } + + /// Renders all [`PhaseItem`]s in the provided `range` (based on their index in `self.items`) using their corresponding draw functions. + pub fn render_range<'w>( + &self, + render_pass: &mut TrackedRenderPass<'w>, + world: &'w World, + view: Entity, + range: impl SliceIndex<[I], Output = [I]>, + ) -> Result<(), DrawError> { + let items = self + .items + .get(range) + .expect("`Range` provided to `render_range()` is out of bounds"); + + let draw_functions = world.resource::>(); + let mut draw_functions = draw_functions.write(); + draw_functions.prepare(world); + + let mut index = 0; + while index < items.len() { + let item = &items[index]; + let batch_range = item.batch_range(); + if batch_range.is_empty() { + index += 1; + } else { + let draw_function = draw_functions.get_mut(item.draw_function()).unwrap(); + draw_function.draw(world, render_pass, view, item)?; + index += batch_range.len(); + } + } + Ok(()) + } +} + +/// An item (entity of the render world) which will be drawn to a texture or the screen, +/// as part of a render phase. +/// +/// The data required for rendering an entity is extracted from the main world in the +/// [`ExtractSchedule`](crate::ExtractSchedule). +/// Then it has to be queued up for rendering during the [`RenderSystems::Queue`], +/// by adding a corresponding phase item to a render phase. +/// Afterwards it will be possibly sorted and rendered automatically in the +/// [`RenderSystems::PhaseSort`] and [`RenderSystems::Render`], respectively. +/// +/// `PhaseItem`s come in two flavors: [`BinnedPhaseItem`]s and +/// [`SortedPhaseItem`]s. +/// +/// * Binned phase items have a `BinKey` which specifies what bin they're to be +/// placed in. All items in the same bin are eligible to be batched together. +/// The `BinKey`s are sorted, but the individual bin items aren't. Binned phase +/// items are good for opaque meshes, in which the order of rendering isn't +/// important. Generally, binned phase items are faster than sorted phase items. +/// +/// * Sorted phase items, on the other hand, are placed into one large buffer +/// and then sorted all at once. This is needed for transparent meshes, which +/// have to be sorted back-to-front to render with the painter's algorithm. +/// These types of phase items are generally slower than binned phase items. +pub trait PhaseItem: Sized + Send + Sync + 'static { + /// Whether or not this `PhaseItem` should be subjected to automatic batching. (Default: `true`) + const AUTOMATIC_BATCHING: bool = true; + + /// The corresponding entity that will be drawn. + /// + /// This is used to fetch the render data of the entity, required by the draw function, + /// from the render world . + fn entity(&self) -> Entity; + + /// The main world entity represented by this `PhaseItem`. + fn main_entity(&self) -> MainEntity; + + /// Specifies the [`Draw`] function used to render the item. + fn draw_function(&self) -> DrawFunctionId; + + /// The range of instances that the batch covers. After doing a batched draw, batch range + /// length phase items will be skipped. This design is to avoid having to restructure the + /// render phase unnecessarily. + fn batch_range(&self) -> &Range; + fn batch_range_mut(&mut self) -> &mut Range; + + /// Returns the [`PhaseItemExtraIndex`]. + /// + /// If present, this is either a dynamic offset or an indirect parameters + /// index. + fn extra_index(&self) -> PhaseItemExtraIndex; + + /// Returns a pair of mutable references to both the batch range and extra + /// index. + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex); +} + +/// The "extra index" associated with some [`PhaseItem`]s, alongside the +/// indirect instance index. +/// +/// Sometimes phase items require another index in addition to the range of +/// instances they already have. These can be: +/// +/// * The *dynamic offset*: a `wgpu` dynamic offset into the uniform buffer of +/// instance data. This is used on platforms that don't support storage +/// buffers, to work around uniform buffer size limitations. +/// +/// * The *indirect parameters index*: an index into the buffer that specifies +/// the indirect parameters for this [`PhaseItem`]'s drawcall. This is used when +/// indirect mode is on (as used for GPU culling). +/// +/// Note that our indirect draw functionality requires storage buffers, so it's +/// impossible to have both a dynamic offset and an indirect parameters index. +/// This convenient fact allows us to pack both indices into a single `u32`. +#[derive(Clone, PartialEq, Eq, Hash, Debug)] +pub enum PhaseItemExtraIndex { + /// No extra index is present. + None, + /// A `wgpu` dynamic offset into the uniform buffer of instance data. This + /// is used on platforms that don't support storage buffers, to work around + /// uniform buffer size limitations. + DynamicOffset(u32), + /// An index into the buffer that specifies the indirect parameters for this + /// [`PhaseItem`]'s drawcall. This is used when indirect mode is on (as used + /// for GPU culling). + IndirectParametersIndex { + /// The range of indirect parameters within the indirect parameters array. + /// + /// If we're using `multi_draw_indirect_count`, this specifies the + /// maximum range of indirect parameters within that array. If batches + /// are ultimately culled out on the GPU, the actual number of draw + /// commands might be lower than the length of this range. + range: Range, + /// If `multi_draw_indirect_count` is in use, and this phase item is + /// part of a batch set, specifies the index of the batch set that this + /// phase item is a part of. + /// + /// If `multi_draw_indirect_count` isn't in use, or this phase item + /// isn't part of a batch set, this is `None`. + batch_set_index: Option, + }, +} + +impl PhaseItemExtraIndex { + /// Returns either an indirect parameters index or + /// [`PhaseItemExtraIndex::None`], as appropriate. + pub fn maybe_indirect_parameters_index( + indirect_parameters_index: Option, + ) -> PhaseItemExtraIndex { + match indirect_parameters_index { + Some(indirect_parameters_index) => PhaseItemExtraIndex::IndirectParametersIndex { + range: u32::from(indirect_parameters_index) + ..(u32::from(indirect_parameters_index) + 1), + batch_set_index: None, + }, + None => PhaseItemExtraIndex::None, + } + } + + /// Returns either a dynamic offset index or [`PhaseItemExtraIndex::None`], + /// as appropriate. + pub fn maybe_dynamic_offset(dynamic_offset: Option) -> PhaseItemExtraIndex { + match dynamic_offset { + Some(dynamic_offset) => PhaseItemExtraIndex::DynamicOffset(dynamic_offset.into()), + None => PhaseItemExtraIndex::None, + } + } +} + +/// Represents phase items that are placed into bins. The `BinKey` specifies +/// which bin they're to be placed in. Bin keys are sorted, and items within the +/// same bin are eligible to be batched together. The elements within the bins +/// aren't themselves sorted. +/// +/// An example of a binned phase item is `Opaque3d`, for which the rendering +/// order isn't critical. +pub trait BinnedPhaseItem: PhaseItem { + /// The key used for binning [`PhaseItem`]s into bins. Order the members of + /// [`BinnedPhaseItem::BinKey`] by the order of binding for best + /// performance. For example, pipeline id, draw function id, mesh asset id, + /// lowest variable bind group id such as the material bind group id, and + /// its dynamic offsets if any, next bind group and offsets, etc. This + /// reduces the need for rebinding between bins and improves performance. + type BinKey: Clone + Send + Sync + PartialEq + Eq + Ord + Hash; + + /// The key used to combine batches into batch sets. + /// + /// A *batch set* is a set of meshes that can potentially be multi-drawn + /// together. + type BatchSetKey: PhaseItemBatchSetKey; + + /// Creates a new binned phase item from the key and per-entity data. + /// + /// Unlike [`SortedPhaseItem`]s, this is generally called "just in time" + /// before rendering. The resulting phase item isn't stored in any data + /// structures, resulting in significant memory savings. + fn new( + batch_set_key: Self::BatchSetKey, + bin_key: Self::BinKey, + representative_entity: (Entity, MainEntity), + batch_range: Range, + extra_index: PhaseItemExtraIndex, + ) -> Self; +} + +/// A key used to combine batches into batch sets. +/// +/// A *batch set* is a set of meshes that can potentially be multi-drawn +/// together. +pub trait PhaseItemBatchSetKey: Clone + Send + Sync + PartialEq + Eq + Ord + Hash { + /// Returns true if this batch set key describes indexed meshes or false if + /// it describes non-indexed meshes. + /// + /// Bevy uses this in order to determine which kind of indirect draw + /// parameters to use, if indirect drawing is enabled. + fn indexed(&self) -> bool; +} + +/// Represents phase items that must be sorted. The `SortKey` specifies the +/// order that these items are drawn in. These are placed into a single array, +/// and the array as a whole is then sorted. +/// +/// An example of a sorted phase item is `Transparent3d`, which must be sorted +/// back to front in order to correctly render with the painter's algorithm. +pub trait SortedPhaseItem: PhaseItem { + /// The type used for ordering the items. The smallest values are drawn first. + /// This order can be calculated using the [`ViewRangefinder3d`], + /// based on the view-space `Z` value of the corresponding view matrix. + type SortKey: Ord; + + /// Determines the order in which the items are drawn. + fn sort_key(&self) -> Self::SortKey; + + /// Sorts a slice of phase items into render order. Generally if the same type + /// is batched this should use a stable sort like [`slice::sort_by_key`]. + /// In almost all other cases, this should not be altered from the default, + /// which uses an unstable sort, as this provides the best balance of CPU and GPU + /// performance. + /// + /// Implementers can optionally not sort the list at all. This is generally advisable if and + /// only if the renderer supports a depth prepass, which is by default not supported by + /// the rest of Bevy's first party rendering crates. Even then, this may have a negative + /// impact on GPU-side performance due to overdraw. + /// + /// It's advised to always profile for performance changes when changing this implementation. + #[inline] + fn sort(items: &mut [Self]) { + items.sort_unstable_by_key(Self::sort_key); + } + + /// Whether this phase item targets indexed meshes (those with both vertex + /// and index buffers as opposed to just vertex buffers). + /// + /// Bevy needs this information in order to properly group phase items + /// together for multi-draw indirect, because the GPU layout of indirect + /// commands differs between indexed and non-indexed meshes. + /// + /// If you're implementing a custom phase item that doesn't describe a mesh, + /// you can safely return false here. + fn indexed(&self) -> bool; +} + +/// A [`PhaseItem`] item, that automatically sets the appropriate render pipeline, +/// cached in the [`PipelineCache`]. +/// +/// You can use the [`SetItemPipeline`] render command to set the pipeline for this item. +pub trait CachedRenderPipelinePhaseItem: PhaseItem { + /// The id of the render pipeline, cached in the [`PipelineCache`], that will be used to draw + /// this phase item. + fn cached_pipeline(&self) -> CachedRenderPipelineId; +} + +/// A [`RenderCommand`] that sets the pipeline for the [`CachedRenderPipelinePhaseItem`]. +pub struct SetItemPipeline; + +impl RenderCommand

for SetItemPipeline { + type Param = SRes; + type ViewQuery = (); + type ItemQuery = (); + #[inline] + fn render<'w>( + item: &P, + _view: (), + _entity: Option<()>, + pipeline_cache: SystemParamItem<'w, '_, Self::Param>, + pass: &mut TrackedRenderPass<'w>, + ) -> RenderCommandResult { + if let Some(pipeline) = pipeline_cache + .into_inner() + .get_render_pipeline(item.cached_pipeline()) + { + pass.set_render_pipeline(pipeline); + RenderCommandResult::Success + } else { + RenderCommandResult::Skip + } + } +} + +/// This system sorts the [`PhaseItem`]s of all [`SortedRenderPhase`]s of this +/// type. +pub fn sort_phase_system(mut render_phases: ResMut>) +where + I: SortedPhaseItem, +{ + for phase in render_phases.values_mut() { + phase.sort(); + } +} + +/// Removes entities that became invisible or changed phases from the bins. +/// +/// This must run after queuing. +pub fn sweep_old_entities(mut render_phases: ResMut>) +where + BPI: BinnedPhaseItem, +{ + for phase in render_phases.0.values_mut() { + phase.sweep_old_entities(); + } +} + +impl BinnedRenderPhaseType { + pub fn mesh( + batchable: bool, + gpu_preprocessing_support: &GpuPreprocessingSupport, + ) -> BinnedRenderPhaseType { + match (batchable, gpu_preprocessing_support.max_supported_mode) { + (true, GpuPreprocessingMode::Culling) => BinnedRenderPhaseType::MultidrawableMesh, + (true, _) => BinnedRenderPhaseType::BatchableMesh, + (false, _) => BinnedRenderPhaseType::UnbatchableMesh, + } + } +} + +impl RenderBin { + /// Creates a [`RenderBin`] containing a single entity. + fn from_entity(entity: MainEntity, uniform_index: InputUniformIndex) -> RenderBin { + let mut entities = IndexMap::default(); + entities.insert(entity, uniform_index); + RenderBin { entities } + } + + /// Inserts an entity into the bin. + fn insert(&mut self, entity: MainEntity, uniform_index: InputUniformIndex) { + self.entities.insert(entity, uniform_index); + } + + /// Removes an entity from the bin. + fn remove(&mut self, entity_to_remove: MainEntity) { + self.entities.swap_remove(&entity_to_remove); + } + + /// Returns true if the bin contains no entities. + fn is_empty(&self) -> bool { + self.entities.is_empty() + } + + /// Returns the [`IndexMap`] containing all the entities in the bin, along + /// with the cached [`InputUniformIndex`] of each. + #[inline] + pub fn entities(&self) -> &IndexMap { + &self.entities + } +} + +/// An iterator that efficiently finds the indices of all zero bits in a +/// [`FixedBitSet`] and returns them in reverse order. +/// +/// [`FixedBitSet`] doesn't natively offer this functionality, so we have to +/// implement it ourselves. +#[derive(Debug)] +struct ReverseFixedBitSetZeroesIterator<'a> { + /// The bit set. + bitset: &'a FixedBitSet, + /// The next bit index we're going to scan when [`Iterator::next`] is + /// called. + bit_index: isize, +} + +impl<'a> ReverseFixedBitSetZeroesIterator<'a> { + fn new(bitset: &'a FixedBitSet) -> ReverseFixedBitSetZeroesIterator<'a> { + ReverseFixedBitSetZeroesIterator { + bitset, + bit_index: (bitset.len() as isize) - 1, + } + } +} + +impl<'a> Iterator for ReverseFixedBitSetZeroesIterator<'a> { + type Item = usize; + + fn next(&mut self) -> Option { + while self.bit_index >= 0 { + // Unpack the bit index into block and bit. + let block_index = self.bit_index / (Block::BITS as isize); + let bit_pos = self.bit_index % (Block::BITS as isize); + + // Grab the block. Mask off all bits above the one we're scanning + // from by setting them all to 1. + let mut block = self.bitset.as_slice()[block_index as usize]; + if bit_pos + 1 < (Block::BITS as isize) { + block |= (!0) << (bit_pos + 1); + } + + // Search for the next unset bit. Note that the `leading_ones` + // function counts from the MSB to the LSB, so we need to flip it to + // get the bit number. + let pos = (Block::BITS as isize) - (block.leading_ones() as isize) - 1; + + // If we found an unset bit, return it. + if pos != -1 { + let result = block_index * (Block::BITS as isize) + pos; + self.bit_index = result - 1; + return Some(result as usize); + } + + // Otherwise, go to the previous block. + self.bit_index = block_index * (Block::BITS as isize) - 1; + } + + None + } +} + +#[cfg(test)] +mod test { + use super::ReverseFixedBitSetZeroesIterator; + use fixedbitset::FixedBitSet; + use proptest::{collection::vec, prop_assert_eq, proptest}; + + proptest! { + #[test] + fn reverse_fixed_bit_set_zeroes_iterator( + bits in vec(0usize..1024usize, 0usize..1024usize), + size in 0usize..1024usize, + ) { + // Build a random bit set. + let mut bitset = FixedBitSet::new(); + bitset.grow(size); + for bit in bits { + if bit < size { + bitset.set(bit, true); + } + } + + // Iterate over the bit set backwards in a naive way, and check that + // that iteration sequence corresponds to the optimized one. + let mut iter = ReverseFixedBitSetZeroesIterator::new(&bitset); + for bit_index in (0..size).rev() { + if !bitset.contains(bit_index) { + prop_assert_eq!(iter.next(), Some(bit_index)); + } + } + + prop_assert_eq!(iter.next(), None); + } + } +} diff --git a/crates/libmarathon/src/render/render_phase/rangefinder.rs b/crates/libmarathon/src/render/render_phase/rangefinder.rs new file mode 100644 index 0000000..0a93651 --- /dev/null +++ b/crates/libmarathon/src/render/render_phase/rangefinder.rs @@ -0,0 +1,50 @@ +use bevy_math::{Affine3A, Mat4, Vec3, Vec4}; + +/// A distance calculator for the draw order of [`PhaseItem`](crate::render_phase::PhaseItem)s. +pub struct ViewRangefinder3d { + view_from_world_row_2: Vec4, +} + +impl ViewRangefinder3d { + /// Creates a 3D rangefinder for a view matrix. + pub fn from_world_from_view(world_from_view: &Affine3A) -> ViewRangefinder3d { + let view_from_world = world_from_view.inverse(); + + ViewRangefinder3d { + view_from_world_row_2: Mat4::from(view_from_world).row(2), + } + } + + /// Calculates the distance, or view-space `Z` value, for the given `translation`. + #[inline] + pub fn distance_translation(&self, translation: &Vec3) -> f32 { + // NOTE: row 2 of the inverse view matrix dotted with the translation from the model matrix + // gives the z component of translation of the mesh in view-space + self.view_from_world_row_2.dot(translation.extend(1.0)) + } + + /// Calculates the distance, or view-space `Z` value, for the given `transform`. + #[inline] + pub fn distance(&self, transform: &Mat4) -> f32 { + // NOTE: row 2 of the inverse view matrix dotted with column 3 of the model matrix + // gives the z component of translation of the mesh in view-space + self.view_from_world_row_2.dot(transform.col(3)) + } +} + +#[cfg(test)] +mod tests { + use super::ViewRangefinder3d; + use bevy_math::{Affine3A, Mat4, Vec3}; + + #[test] + fn distance() { + let view_matrix = Affine3A::from_translation(Vec3::new(0.0, 0.0, -1.0)); + let rangefinder = ViewRangefinder3d::from_world_from_view(&view_matrix); + assert_eq!(rangefinder.distance(&Mat4::IDENTITY), 1.0); + assert_eq!( + rangefinder.distance(&Mat4::from_translation(Vec3::new(0.0, 0.0, 1.0))), + 2.0 + ); + } +} diff --git a/crates/libmarathon/src/render/render_resource/batched_uniform_buffer.rs b/crates/libmarathon/src/render/render_resource/batched_uniform_buffer.rs new file mode 100644 index 0000000..a644a8b --- /dev/null +++ b/crates/libmarathon/src/render/render_resource/batched_uniform_buffer.rs @@ -0,0 +1,157 @@ +use super::{GpuArrayBufferIndex, GpuArrayBufferable}; +use crate::render::{ + render_resource::DynamicUniformBuffer, + renderer::{RenderDevice, RenderQueue}, +}; +use core::{marker::PhantomData, num::NonZero}; +use encase::{ + private::{ArrayMetadata, BufferMut, Metadata, RuntimeSizedArray, WriteInto, Writer}, + ShaderType, +}; +use nonmax::NonMaxU32; +use wgpu::{BindingResource, Limits}; + +// 1MB else we will make really large arrays on macOS which reports very large +// `max_uniform_buffer_binding_size`. On macOS this ends up being the minimum +// size of the uniform buffer as well as the size of each chunk of data at a +// dynamic offset. +#[cfg(any( + not(feature = "webgl"), + not(target_arch = "wasm32"), + feature = "webgpu" +))] +const MAX_REASONABLE_UNIFORM_BUFFER_BINDING_SIZE: u32 = 1 << 20; + +// WebGL2 quirk: using uniform buffers larger than 4KB will cause extremely +// long shader compilation times, so the limit needs to be lower on WebGL2. +// This is due to older shader compilers/GPUs that don't support dynamically +// indexing uniform buffers, and instead emulate it with large switch statements +// over buffer indices that take a long time to compile. +#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] +const MAX_REASONABLE_UNIFORM_BUFFER_BINDING_SIZE: u32 = 1 << 12; + +/// Similar to [`DynamicUniformBuffer`], except every N elements (depending on size) +/// are grouped into a batch as an `array` in WGSL. +/// +/// This reduces the number of rebindings required due to having to pass dynamic +/// offsets to bind group commands, and if indices into the array can be passed +/// in via other means, it enables batching of draw commands. +pub struct BatchedUniformBuffer { + // Batches of fixed-size arrays of T are written to this buffer so that + // each batch in a fixed-size array can be bound at a dynamic offset. + uniforms: DynamicUniformBuffer>>, + // A batch of T are gathered into this `MaxCapacityArray` until it is full, + // then it is written into the `DynamicUniformBuffer`, cleared, and new T + // are gathered here, and so on for each batch. + temp: MaxCapacityArray>, + current_offset: u32, + dynamic_offset_alignment: u32, +} + +impl BatchedUniformBuffer { + pub fn batch_size(limits: &Limits) -> usize { + (limits + .max_uniform_buffer_binding_size + .min(MAX_REASONABLE_UNIFORM_BUFFER_BINDING_SIZE) as u64 + / T::min_size().get()) as usize + } + + pub fn new(limits: &Limits) -> Self { + let capacity = Self::batch_size(limits); + let alignment = limits.min_uniform_buffer_offset_alignment; + + Self { + uniforms: DynamicUniformBuffer::new_with_alignment(alignment as u64), + temp: MaxCapacityArray(Vec::with_capacity(capacity), capacity), + current_offset: 0, + dynamic_offset_alignment: alignment, + } + } + + #[inline] + pub fn size(&self) -> NonZero { + self.temp.size() + } + + pub fn clear(&mut self) { + self.uniforms.clear(); + self.current_offset = 0; + self.temp.0.clear(); + } + + pub fn push(&mut self, component: T) -> GpuArrayBufferIndex { + let result = GpuArrayBufferIndex { + index: self.temp.0.len() as u32, + dynamic_offset: NonMaxU32::new(self.current_offset), + element_type: PhantomData, + }; + self.temp.0.push(component); + if self.temp.0.len() == self.temp.1 { + self.flush(); + } + result + } + + pub fn flush(&mut self) { + self.uniforms.push(&self.temp); + + self.current_offset += + align_to_next(self.temp.size().get(), self.dynamic_offset_alignment as u64) as u32; + + self.temp.0.clear(); + } + + pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) { + if !self.temp.0.is_empty() { + self.flush(); + } + self.uniforms.write_buffer(device, queue); + } + + #[inline] + pub fn binding(&self) -> Option> { + let mut binding = self.uniforms.binding(); + if let Some(BindingResource::Buffer(binding)) = &mut binding { + // MaxCapacityArray is runtime-sized so can't use T::min_size() + binding.size = Some(self.size()); + } + binding + } +} + +#[inline] +fn align_to_next(value: u64, alignment: u64) -> u64 { + debug_assert!(alignment.is_power_of_two()); + ((value - 1) | (alignment - 1)) + 1 +} + +// ---------------------------------------------------------------------------- +// MaxCapacityArray was implemented by Teodor Tanasoaia for encase. It was +// copied here as it was not yet included in an encase release and it is +// unclear if it is the correct long-term solution for encase. + +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] +struct MaxCapacityArray(T, usize); + +impl ShaderType for MaxCapacityArray +where + T: ShaderType, +{ + type ExtraMetadata = ArrayMetadata; + + const METADATA: Metadata = T::METADATA; + + fn size(&self) -> NonZero { + Self::METADATA.stride().mul(self.1.max(1) as u64).0 + } +} + +impl WriteInto for MaxCapacityArray +where + T: WriteInto + RuntimeSizedArray, +{ + fn write_into(&self, writer: &mut Writer) { + debug_assert!(self.0.len() <= self.1); + self.0.write_into(writer); + } +} diff --git a/crates/libmarathon/src/render/render_resource/bind_group.rs b/crates/libmarathon/src/render/render_resource/bind_group.rs new file mode 100644 index 0000000..c7b6f7e --- /dev/null +++ b/crates/libmarathon/src/render/render_resource/bind_group.rs @@ -0,0 +1,725 @@ +use crate::render::{ + define_atomic_id, + render_asset::RenderAssets, + render_resource::{BindGroupLayout, Buffer, Sampler, TextureView}, + renderer::{RenderDevice, WgpuWrapper}, + texture::GpuImage, +}; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::system::{SystemParam, SystemParamItem}; +pub use macros::AsBindGroup; +use core::ops::Deref; +use encase::ShaderType; +use thiserror::Error; +use wgpu::{ + BindGroupEntry, BindGroupLayoutEntry, BindingResource, SamplerBindingType, TextureViewDimension, +}; + +use super::{BindlessDescriptor, BindlessSlabResourceLimit}; + +define_atomic_id!(BindGroupId); + +/// Bind groups are responsible for binding render resources (e.g. buffers, textures, samplers) +/// to a [`TrackedRenderPass`](crate::render_phase::TrackedRenderPass). +/// This makes them accessible in the pipeline (shaders) as uniforms. +/// +/// This is a lightweight thread-safe wrapper around wgpu's own [`BindGroup`](wgpu::BindGroup), +/// which can be cloned as needed to workaround lifetime management issues. It may be converted +/// from and dereferences to wgpu's [`BindGroup`](wgpu::BindGroup). +/// +/// Can be created via [`RenderDevice::create_bind_group`](RenderDevice::create_bind_group). +#[derive(Clone, Debug)] +pub struct BindGroup { + id: BindGroupId, + value: WgpuWrapper, +} + +impl BindGroup { + /// Returns the [`BindGroupId`] representing the unique ID of the bind group. + #[inline] + pub fn id(&self) -> BindGroupId { + self.id + } +} + +impl PartialEq for BindGroup { + fn eq(&self, other: &Self) -> bool { + self.id == other.id + } +} + +impl Eq for BindGroup {} + +impl core::hash::Hash for BindGroup { + fn hash(&self, state: &mut H) { + self.id.0.hash(state); + } +} + +impl From for BindGroup { + fn from(value: wgpu::BindGroup) -> Self { + BindGroup { + id: BindGroupId::new(), + value: WgpuWrapper::new(value), + } + } +} + +impl<'a> From<&'a BindGroup> for Option<&'a wgpu::BindGroup> { + fn from(value: &'a BindGroup) -> Self { + Some(value.deref()) + } +} + +impl<'a> From<&'a mut BindGroup> for Option<&'a wgpu::BindGroup> { + fn from(value: &'a mut BindGroup) -> Self { + Some(&*value) + } +} + +impl Deref for BindGroup { + type Target = wgpu::BindGroup; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.value + } +} + +/// Converts a value to a [`BindGroup`] with a given [`BindGroupLayout`], which can then be used in Bevy shaders. +/// This trait can be derived (and generally should be). Read on for details and examples. +/// +/// This is an opinionated trait that is intended to make it easy to generically +/// convert a type into a [`BindGroup`]. It provides access to specific render resources, +/// such as [`RenderAssets`] and [`crate::texture::FallbackImage`]. If a type has a [`Handle`](bevy_asset::Handle), +/// these can be used to retrieve the corresponding [`Texture`](crate::render_resource::Texture) resource. +/// +/// [`AsBindGroup::as_bind_group`] is intended to be called once, then the result cached somewhere. It is generally +/// ok to do "expensive" work here, such as creating a [`Buffer`] for a uniform. +/// +/// If for some reason a [`BindGroup`] cannot be created yet (for example, the [`Texture`](crate::render_resource::Texture) +/// for an [`Image`](bevy_image::Image) hasn't loaded yet), just return [`AsBindGroupError::RetryNextUpdate`], which signals that the caller +/// should retry again later. +/// +/// # Deriving +/// +/// This trait can be derived. Field attributes like `uniform` and `texture` are used to define which fields should be bindings, +/// what their binding type is, and what index they should be bound at: +/// +/// ``` +/// # use crate::render::render_resource::*; +/// # use bevy_image::Image; +/// # use bevy_color::LinearRgba; +/// # use bevy_asset::Handle; +/// # use crate::render::storage::ShaderStorageBuffer; +/// +/// #[derive(AsBindGroup)] +/// struct CoolMaterial { +/// #[uniform(0)] +/// color: LinearRgba, +/// #[texture(1)] +/// #[sampler(2)] +/// color_texture: Handle, +/// #[storage(3, read_only)] +/// storage_buffer: Handle, +/// #[storage(4, read_only, buffer)] +/// raw_buffer: Buffer, +/// #[storage_texture(5)] +/// storage_texture: Handle, +/// } +/// ``` +/// +/// In WGSL shaders, the binding would look like this: +/// +/// ```wgsl +/// @group(#{MATERIAL_BIND_GROUP}) @binding(0) var color: vec4; +/// @group(#{MATERIAL_BIND_GROUP}) @binding(1) var color_texture: texture_2d; +/// @group(#{MATERIAL_BIND_GROUP}) @binding(2) var color_sampler: sampler; +/// @group(#{MATERIAL_BIND_GROUP}) @binding(3) var storage_buffer: array; +/// @group(#{MATERIAL_BIND_GROUP}) @binding(4) var raw_buffer: array; +/// @group(#{MATERIAL_BIND_GROUP}) @binding(5) var storage_texture: texture_storage_2d; +/// ``` +/// Note that the "group" index is determined by the usage context. It is not defined in [`AsBindGroup`]. For example, in Bevy material bind groups +/// are generally bound to group 2. +/// +/// The following field-level attributes are supported: +/// +/// ## `uniform(BINDING_INDEX)` +/// +/// * The field will be converted to a shader-compatible type using the [`ShaderType`] trait, written to a [`Buffer`], and bound as a uniform. +/// [`ShaderType`] is implemented for most math types already, such as [`f32`], [`Vec4`](bevy_math::Vec4), and +/// [`LinearRgba`](bevy_color::LinearRgba). It can also be derived for custom structs. +/// +/// ## `texture(BINDING_INDEX, arguments)` +/// +/// * This field's [`Handle`](bevy_asset::Handle) will be used to look up the matching [`Texture`](crate::render_resource::Texture) +/// GPU resource, which will be bound as a texture in shaders. The field will be assumed to implement [`Into>>`]. In practice, +/// most fields should be a [`Handle`](bevy_asset::Handle) or [`Option>`]. If the value of an [`Option>`] is +/// [`None`], the [`crate::texture::FallbackImage`] resource will be used instead. This attribute can be used in conjunction with a `sampler` binding attribute +/// (with a different binding index) if a binding of the sampler for the [`Image`](bevy_image::Image) is also required. +/// +/// | Arguments | Values | Default | +/// |-----------------------|-------------------------------------------------------------------------|----------------------| +/// | `dimension` = "..." | `"1d"`, `"2d"`, `"2d_array"`, `"3d"`, `"cube"`, `"cube_array"` | `"2d"` | +/// | `sample_type` = "..." | `"float"`, `"depth"`, `"s_int"` or `"u_int"` | `"float"` | +/// | `filterable` = ... | `true`, `false` | `true` | +/// | `multisampled` = ... | `true`, `false` | `false` | +/// | `visibility(...)` | `all`, `none`, or a list-combination of `vertex`, `fragment`, `compute` | `vertex`, `fragment` | +/// +/// ## `storage_texture(BINDING_INDEX, arguments)` +/// +/// * This field's [`Handle`](bevy_asset::Handle) will be used to look up the matching [`Texture`](crate::render_resource::Texture) +/// GPU resource, which will be bound as a storage texture in shaders. The field will be assumed to implement [`Into>>`]. In practice, +/// most fields should be a [`Handle`](bevy_asset::Handle) or [`Option>`]. If the value of an [`Option>`] is +/// [`None`], the [`crate::texture::FallbackImage`] resource will be used instead. +/// +/// | Arguments | Values | Default | +/// |------------------------|--------------------------------------------------------------------------------------------|---------------| +/// | `dimension` = "..." | `"1d"`, `"2d"`, `"2d_array"`, `"3d"`, `"cube"`, `"cube_array"` | `"2d"` | +/// | `image_format` = ... | any member of [`TextureFormat`](crate::render_resource::TextureFormat) | `Rgba8Unorm` | +/// | `access` = ... | any member of [`StorageTextureAccess`](crate::render_resource::StorageTextureAccess) | `ReadWrite` | +/// | `visibility(...)` | `all`, `none`, or a list-combination of `vertex`, `fragment`, `compute` | `compute` | +/// +/// ## `sampler(BINDING_INDEX, arguments)` +/// +/// * This field's [`Handle`](bevy_asset::Handle) will be used to look up the matching [`Sampler`] GPU +/// resource, which will be bound as a sampler in shaders. The field will be assumed to implement [`Into>>`]. In practice, +/// most fields should be a [`Handle`](bevy_asset::Handle) or [`Option>`]. If the value of an [`Option>`] is +/// [`None`], the [`crate::texture::FallbackImage`] resource will be used instead. This attribute can be used in conjunction with a `texture` binding attribute +/// (with a different binding index) if a binding of the texture for the [`Image`](bevy_image::Image) is also required. +/// +/// | Arguments | Values | Default | +/// |------------------------|-------------------------------------------------------------------------|------------------------| +/// | `sampler_type` = "..." | `"filtering"`, `"non_filtering"`, `"comparison"`. | `"filtering"` | +/// | `visibility(...)` | `all`, `none`, or a list-combination of `vertex`, `fragment`, `compute` | `vertex`, `fragment` | +/// +/// ## `storage(BINDING_INDEX, arguments)` +/// +/// * The field's [`Handle`](bevy_asset::Handle) will be used to look +/// up the matching [`Buffer`] GPU resource, which will be bound as a storage +/// buffer in shaders. If the `storage` attribute is used, the field is expected +/// a raw buffer, and the buffer will be bound as a storage buffer in shaders. +/// In bindless mode, `binding_array()` argument that specifies the binding +/// number of the resulting storage buffer binding array must be present. +/// +/// | Arguments | Values | Default | +/// |------------------------|-------------------------------------------------------------------------|------------------------| +/// | `visibility(...)` | `all`, `none`, or a list-combination of `vertex`, `fragment`, `compute` | `vertex`, `fragment` | +/// | `read_only` | if present then value is true, otherwise false | `false` | +/// | `buffer` | if present then the field will be assumed to be a raw wgpu buffer | | +/// | `binding_array(...)` | the binding number of the binding array, for bindless mode | bindless mode disabled | +/// +/// Note that fields without field-level binding attributes will be ignored. +/// ``` +/// # use crate::render::{render_resource::AsBindGroup}; +/// # use bevy_color::LinearRgba; +/// # use bevy_asset::Handle; +/// #[derive(AsBindGroup)] +/// struct CoolMaterial { +/// #[uniform(0)] +/// color: LinearRgba, +/// this_field_is_ignored: String, +/// } +/// ``` +/// +/// As mentioned above, [`Option>`] is also supported: +/// ``` +/// # use bevy_asset::Handle; +/// # use bevy_color::LinearRgba; +/// # use bevy_image::Image; +/// # use crate::render::render_resource::AsBindGroup; +/// #[derive(AsBindGroup)] +/// struct CoolMaterial { +/// #[uniform(0)] +/// color: LinearRgba, +/// #[texture(1)] +/// #[sampler(2)] +/// color_texture: Option>, +/// } +/// ``` +/// This is useful if you want a texture to be optional. When the value is [`None`], the [`crate::texture::FallbackImage`] will be used for the binding instead, which defaults +/// to "pure white". +/// +/// Field uniforms with the same index will be combined into a single binding: +/// ``` +/// # use crate::render::{render_resource::AsBindGroup}; +/// # use bevy_color::LinearRgba; +/// #[derive(AsBindGroup)] +/// struct CoolMaterial { +/// #[uniform(0)] +/// color: LinearRgba, +/// #[uniform(0)] +/// roughness: f32, +/// } +/// ``` +/// +/// In WGSL shaders, the binding would look like this: +/// ```wgsl +/// struct CoolMaterial { +/// color: vec4, +/// roughness: f32, +/// }; +/// +/// @group(#{MATERIAL_BIND_GROUP}) @binding(0) var material: CoolMaterial; +/// ``` +/// +/// Some less common scenarios will require "struct-level" attributes. These are the currently supported struct-level attributes: +/// ## `uniform(BINDING_INDEX, ConvertedShaderType)` +/// +/// * This also creates a [`Buffer`] using [`ShaderType`] and binds it as a +/// uniform, much like the field-level `uniform` attribute. The difference is +/// that the entire [`AsBindGroup`] value is converted to `ConvertedShaderType`, +/// which must implement [`ShaderType`], instead of a specific field +/// implementing [`ShaderType`]. This is useful if more complicated conversion +/// logic is required, or when using bindless mode (see below). The conversion +/// is done using the [`AsBindGroupShaderType`] trait, +/// which is automatically implemented if `&Self` implements +/// [`Into`]. Outside of bindless mode, only use +/// [`AsBindGroupShaderType`] if access to resources like +/// [`RenderAssets`] is required. +/// +/// * In bindless mode (see `bindless(COUNT)`), this attribute becomes +/// `uniform(BINDLESS_INDEX, ConvertedShaderType, +/// binding_array(BINDING_INDEX))`. The resulting uniform buffers will be +/// available in the shader as a binding array at the given `BINDING_INDEX`. The +/// `BINDLESS_INDEX` specifies the offset of the buffer in the bindless index +/// table. +/// +/// For example, suppose that the material slot is stored in a variable named +/// `slot`, the bindless index table is named `material_indices`, and that the +/// first field (index 0) of the bindless index table type is named +/// `material`. Then specifying `#[uniform(0, StandardMaterialUniform, +/// binding_array(10)]` will create a binding array buffer declared in the +/// shader as `var material_array: +/// binding_array` and accessible as +/// `material_array[material_indices[slot].material]`. +/// +/// ## `data(BINDING_INDEX, ConvertedShaderType, binding_array(BINDING_INDEX))` +/// +/// * This is very similar to `uniform(BINDING_INDEX, ConvertedShaderType, +/// binding_array(BINDING_INDEX)` and in fact is identical if bindless mode +/// isn't being used. The difference is that, in bindless mode, the `data` +/// attribute produces a single buffer containing an array, not an array of +/// buffers. For example, suppose you had the following declaration: +/// +/// ```ignore +/// #[uniform(0, StandardMaterialUniform, binding_array(10))] +/// struct StandardMaterial { ... } +/// ``` +/// +/// In bindless mode, this will produce a binding matching the following WGSL +/// declaration: +/// +/// ```wgsl +/// @group(#{MATERIAL_BIND_GROUP}) @binding(10) var material_array: binding_array; +/// ``` +/// +/// On the other hand, if you write this declaration: +/// +/// ```ignore +/// #[data(0, StandardMaterialUniform, binding_array(10))] +/// struct StandardMaterial { ... } +/// ``` +/// +/// Then Bevy produces a binding that matches this WGSL declaration instead: +/// +/// ```wgsl +/// @group(#{MATERIAL_BIND_GROUP}) @binding(10) var material_array: array; +/// ``` +/// +/// * Just as with the structure-level `uniform` attribute, Bevy converts the +/// entire [`AsBindGroup`] to `ConvertedShaderType`, using the +/// [`AsBindGroupShaderType`] trait. +/// +/// * In non-bindless mode, the structure-level `data` attribute is the same as +/// the structure-level `uniform` attribute and produces a single uniform buffer +/// in the shader. The above example would result in a binding that looks like +/// this in WGSL in non-bindless mode: +/// +/// ```wgsl +/// @group(#{MATERIAL_BIND_GROUP}) @binding(0) var material: StandardMaterial; +/// ``` +/// +/// * For efficiency reasons, `data` is generally preferred over `uniform` +/// unless you need to place your data in individual buffers. +/// +/// ## `bind_group_data(DataType)` +/// +/// * The [`AsBindGroup`] type will be converted to some `DataType` using [`Into`] and stored +/// as [`AsBindGroup::Data`] as part of the [`AsBindGroup::as_bind_group`] call. This is useful if data needs to be stored alongside +/// the generated bind group, such as a unique identifier for a material's bind group. The most common use case for this attribute +/// is "shader pipeline specialization". See [`SpecializedRenderPipeline`](crate::render_resource::SpecializedRenderPipeline). +/// +/// ## `bindless` +/// +/// * This switch enables *bindless resources*, which changes the way Bevy +/// supplies resources (textures, and samplers) to the shader. When bindless +/// resources are enabled, and the current platform supports them, Bevy will +/// allocate textures, and samplers into *binding arrays*, separated based on +/// type and will supply your shader with indices into those arrays. +/// * Bindless textures and samplers are placed into the appropriate global +/// array defined in `bevy_render::bindless` (`bindless.wgsl`). +/// * Bevy doesn't currently support bindless buffers, except for those created +/// with the `uniform(BINDLESS_INDEX, ConvertedShaderType, +/// binding_array(BINDING_INDEX))` attribute. If you need to include a buffer in +/// your object, and you can't create the data in that buffer with the `uniform` +/// attribute, consider a non-bindless object instead. +/// * If bindless mode is enabled, the `BINDLESS` definition will be +/// available. Because not all platforms support bindless resources, you +/// should check for the presence of this definition via `#ifdef` and fall +/// back to standard bindings if it isn't present. +/// * By default, in bindless mode, binding 0 becomes the *bindless index +/// table*, which is an array of structures, each of which contains as many +/// fields of type `u32` as the highest binding number in the structure +/// annotated with `#[derive(AsBindGroup)]`. Again by default, the *i*th field +/// of the bindless index table contains the index of the resource with binding +/// *i* within the appropriate binding array. +/// * In the case of materials, the index of the applicable table within the +/// bindless index table list corresponding to the mesh currently being drawn +/// can be retrieved with +/// `mesh[in.instance_index].material_and_lightmap_bind_group_slot & 0xffffu`. +/// * You can limit the size of the bindless slabs to N resources with the +/// `limit(N)` declaration. For example, `#[bindless(limit(16))]` ensures that +/// each slab will have no more than 16 total resources in it. If you don't +/// specify a limit, Bevy automatically picks a reasonable one for the current +/// platform. +/// * The `index_table(range(M..N), binding(B))` declaration allows you to +/// customize the layout of the bindless index table. This is useful for +/// materials that are composed of multiple bind groups, such as +/// `ExtendedMaterial`. In such cases, there will be multiple bindless index +/// tables, so they can't both be assigned to binding 0 or their bindings will +/// conflict. +/// - The `binding(B)` attribute of the `index_table` attribute allows you to +/// customize the binding (`@binding(B)`, in the shader) at which the index +/// table will be bound. +/// - The `range(M, N)` attribute of the `index_table` attribute allows you to +/// change the mapping from the field index in the bindless index table to the +/// bindless index. Instead of the field at index $i$ being mapped to the +/// bindless index $i$, with the `range(M, N)` attribute the field at index +/// $i$ in the bindless index table is mapped to the bindless index $i$ + M. +/// The size of the index table will be set to N - M. Note that this may +/// result in the table being too small to contain all the bindless bindings. +/// * The purpose of bindless mode is to improve performance by reducing +/// state changes. By grouping resources together into binding arrays, Bevy +/// doesn't have to modify GPU state as often, decreasing API and driver +/// overhead. +/// * See the `shaders/shader_material_bindless` example for an example of how +/// to use bindless mode. See the `shaders/extended_material_bindless` example +/// for a more exotic example of bindless mode that demonstrates the +/// `index_table` attribute. +/// * The following diagram illustrates how bindless mode works using a subset +/// of `StandardMaterial`: +/// +/// ```text +/// Shader Bindings Sampler Binding Array +/// +----+-----------------------------+ +-----------+-----------+-----+ +/// +---| 0 | material_indices | +->| sampler 0 | sampler 1 | ... | +/// | +----+-----------------------------+ | +-----------+-----------+-----+ +/// | | 1 | bindless_samplers_filtering +--+ ^ +/// | +----+-----------------------------+ +-------------------------------+ +/// | | .. | ... | | +/// | +----+-----------------------------+ Texture Binding Array | +/// | | 5 | bindless_textures_2d +--+ +-----------+-----------+-----+ | +/// | +----+-----------------------------+ +->| texture 0 | texture 1 | ... | | +/// | | .. | ... | +-----------+-----------+-----+ | +/// | +----+-----------------------------+ ^ | +/// | + 10 | material_array +--+ +---------------------------+ | +/// | +----+-----------------------------+ | | | +/// | | Buffer Binding Array | | +/// | | +----------+----------+-----+ | | +/// | +->| buffer 0 | buffer 1 | ... | | | +/// | Material Bindless Indices +----------+----------+-----+ | | +/// | +----+-----------------------------+ ^ | | +/// +-->| 0 | material +----------+ | | +/// +----+-----------------------------+ | | +/// | 1 | base_color_texture +---------------------------------------+ | +/// +----+-----------------------------+ | +/// | 2 | base_color_sampler +-------------------------------------------+ +/// +----+-----------------------------+ +/// | .. | ... | +/// +----+-----------------------------+ +/// ``` +/// +/// The previous `CoolMaterial` example illustrating "combining multiple field-level uniform attributes with the same binding index" can +/// also be equivalently represented with a single struct-level uniform attribute: +/// ``` +/// # use crate::render::{render_resource::{AsBindGroup, ShaderType}}; +/// # use bevy_color::LinearRgba; +/// #[derive(AsBindGroup)] +/// #[uniform(0, CoolMaterialUniform)] +/// struct CoolMaterial { +/// color: LinearRgba, +/// roughness: f32, +/// } +/// +/// #[derive(ShaderType)] +/// struct CoolMaterialUniform { +/// color: LinearRgba, +/// roughness: f32, +/// } +/// +/// impl From<&CoolMaterial> for CoolMaterialUniform { +/// fn from(material: &CoolMaterial) -> CoolMaterialUniform { +/// CoolMaterialUniform { +/// color: material.color, +/// roughness: material.roughness, +/// } +/// } +/// } +/// ``` +/// +/// Setting `bind_group_data` looks like this: +/// ``` +/// # use crate::render::{render_resource::AsBindGroup}; +/// # use bevy_color::LinearRgba; +/// #[derive(AsBindGroup)] +/// #[bind_group_data(CoolMaterialKey)] +/// struct CoolMaterial { +/// #[uniform(0)] +/// color: LinearRgba, +/// is_shaded: bool, +/// } +/// +/// // Materials keys are intended to be small, cheap to hash, and +/// // uniquely identify a specific material permutation. +/// #[repr(C)] +/// #[derive(Copy, Clone, Hash, Eq, PartialEq)] +/// struct CoolMaterialKey { +/// is_shaded: bool, +/// } +/// +/// impl From<&CoolMaterial> for CoolMaterialKey { +/// fn from(material: &CoolMaterial) -> CoolMaterialKey { +/// CoolMaterialKey { +/// is_shaded: material.is_shaded, +/// } +/// } +/// } +/// ``` +pub trait AsBindGroup { + /// Data that will be stored alongside the "prepared" bind group. + type Data: Send + Sync; + + type Param: SystemParam + 'static; + + /// The number of slots per bind group, if bindless mode is enabled. + /// + /// If this bind group doesn't use bindless, then this will be `None`. + /// + /// Note that the *actual* slot count may be different from this value, due + /// to platform limitations. For example, if bindless resources aren't + /// supported on this platform, the actual slot count will be 1. + fn bindless_slot_count() -> Option { + None + } + + /// True if the hardware *actually* supports bindless textures for this + /// type, taking the device and driver capabilities into account. + /// + /// If this type doesn't use bindless textures, then the return value from + /// this function is meaningless. + fn bindless_supported(_: &RenderDevice) -> bool { + true + } + + /// label + fn label() -> Option<&'static str> { + None + } + + /// Creates a bind group for `self` matching the layout defined in [`AsBindGroup::bind_group_layout`]. + fn as_bind_group( + &self, + layout: &BindGroupLayout, + render_device: &RenderDevice, + param: &mut SystemParamItem<'_, '_, Self::Param>, + ) -> Result { + let UnpreparedBindGroup { bindings } = + Self::unprepared_bind_group(self, layout, render_device, param, false)?; + + let entries = bindings + .iter() + .map(|(index, binding)| BindGroupEntry { + binding: *index, + resource: binding.get_binding(), + }) + .collect::>(); + + let bind_group = render_device.create_bind_group(Self::label(), layout, &entries); + + Ok(PreparedBindGroup { + bindings, + bind_group, + }) + } + + fn bind_group_data(&self) -> Self::Data; + + /// Returns a vec of (binding index, `OwnedBindingResource`). + /// + /// In cases where `OwnedBindingResource` is not available (as for bindless + /// texture arrays currently), an implementor may return + /// `AsBindGroupError::CreateBindGroupDirectly` from this function and + /// instead define `as_bind_group` directly. This may prevent certain + /// features, such as bindless mode, from working correctly. + /// + /// Set `force_no_bindless` to true to require that bindless textures *not* + /// be used. `ExtendedMaterial` uses this in order to ensure that the base + /// material doesn't use bindless mode if the extension doesn't. + fn unprepared_bind_group( + &self, + layout: &BindGroupLayout, + render_device: &RenderDevice, + param: &mut SystemParamItem<'_, '_, Self::Param>, + force_no_bindless: bool, + ) -> Result; + + /// Creates the bind group layout matching all bind groups returned by + /// [`AsBindGroup::as_bind_group`] + fn bind_group_layout(render_device: &RenderDevice) -> BindGroupLayout + where + Self: Sized, + { + render_device.create_bind_group_layout( + Self::label(), + &Self::bind_group_layout_entries(render_device, false), + ) + } + + /// Returns a vec of bind group layout entries. + /// + /// Set `force_no_bindless` to true to require that bindless textures *not* + /// be used. `ExtendedMaterial` uses this in order to ensure that the base + /// material doesn't use bindless mode if the extension doesn't. + fn bind_group_layout_entries( + render_device: &RenderDevice, + force_no_bindless: bool, + ) -> Vec + where + Self: Sized; + + fn bindless_descriptor() -> Option { + None + } +} + +/// An error that occurs during [`AsBindGroup::as_bind_group`] calls. +#[derive(Debug, Error)] +pub enum AsBindGroupError { + /// The bind group could not be generated. Try again next frame. + #[error("The bind group could not be generated")] + RetryNextUpdate, + #[error("Create the bind group via `as_bind_group()` instead")] + CreateBindGroupDirectly, + #[error("At binding index {0}, the provided image sampler `{1}` does not match the required sampler type(s) `{2}`.")] + InvalidSamplerType(u32, String, String), +} + +/// A prepared bind group returned as a result of [`AsBindGroup::as_bind_group`]. +pub struct PreparedBindGroup { + pub bindings: BindingResources, + pub bind_group: BindGroup, +} + +/// a map containing `OwnedBindingResource`s, keyed by the target binding index +pub struct UnpreparedBindGroup { + pub bindings: BindingResources, +} + +/// A pair of binding index and binding resource, used as part of +/// [`PreparedBindGroup`] and [`UnpreparedBindGroup`]. +#[derive(Deref, DerefMut)] +pub struct BindingResources(pub Vec<(u32, OwnedBindingResource)>); + +/// An owned binding resource of any type (ex: a [`Buffer`], [`TextureView`], etc). +/// This is used by types like [`PreparedBindGroup`] to hold a single list of all +/// render resources used by bindings. +#[derive(Debug)] +pub enum OwnedBindingResource { + Buffer(Buffer), + TextureView(TextureViewDimension, TextureView), + Sampler(SamplerBindingType, Sampler), + Data(OwnedData), +} + +/// Data that will be copied into a GPU buffer. +/// +/// This corresponds to the `#[data]` attribute in `AsBindGroup`. +#[derive(Debug, Deref, DerefMut)] +pub struct OwnedData(pub Vec); + +impl OwnedBindingResource { + /// Creates a [`BindingResource`] reference to this + /// [`OwnedBindingResource`]. + /// + /// Note that this operation panics if passed a + /// [`OwnedBindingResource::Data`], because [`OwnedData`] doesn't itself + /// correspond to any binding and instead requires the + /// `MaterialBindGroupAllocator` to pack it into a buffer. + pub fn get_binding(&self) -> BindingResource<'_> { + match self { + OwnedBindingResource::Buffer(buffer) => buffer.as_entire_binding(), + OwnedBindingResource::TextureView(_, view) => BindingResource::TextureView(view), + OwnedBindingResource::Sampler(_, sampler) => BindingResource::Sampler(sampler), + OwnedBindingResource::Data(_) => panic!("`OwnedData` has no binding resource"), + } + } +} + +/// Converts a value to a [`ShaderType`] for use in a bind group. +/// +/// This is automatically implemented for references that implement [`Into`]. +/// Generally normal [`Into`] / [`From`] impls should be preferred, but +/// sometimes additional runtime metadata is required. +/// This exists largely to make some [`AsBindGroup`] use cases easier. +pub trait AsBindGroupShaderType { + /// Return the `T` [`ShaderType`] for `self`. When used in [`AsBindGroup`] + /// derives, it is safe to assume that all images in `self` exist. + fn as_bind_group_shader_type(&self, images: &RenderAssets) -> T; +} + +impl AsBindGroupShaderType for T +where + for<'a> &'a T: Into, +{ + #[inline] + fn as_bind_group_shader_type(&self, _images: &RenderAssets) -> U { + self.into() + } +} + +#[cfg(test)] +mod test { + use super::*; + use bevy_asset::Handle; + use bevy_image::Image; + + #[test] + fn texture_visibility() { + #[expect( + dead_code, + reason = "This is a derive macro compilation test. It will not be constructed." + )] + #[derive(AsBindGroup)] + pub struct TextureVisibilityTest { + #[texture(0, visibility(all))] + pub all: Handle, + #[texture(1, visibility(none))] + pub none: Handle, + #[texture(2, visibility(fragment))] + pub fragment: Handle, + #[texture(3, visibility(vertex))] + pub vertex: Handle, + #[texture(4, visibility(compute))] + pub compute: Handle, + #[texture(5, visibility(vertex, fragment))] + pub vertex_fragment: Handle, + #[texture(6, visibility(vertex, compute))] + pub vertex_compute: Handle, + #[texture(7, visibility(fragment, compute))] + pub fragment_compute: Handle, + #[texture(8, visibility(vertex, fragment, compute))] + pub vertex_fragment_compute: Handle, + } + } +} diff --git a/crates/libmarathon/src/render/render_resource/bind_group_entries.rs b/crates/libmarathon/src/render/render_resource/bind_group_entries.rs new file mode 100644 index 0000000..274aa11 --- /dev/null +++ b/crates/libmarathon/src/render/render_resource/bind_group_entries.rs @@ -0,0 +1,322 @@ +use variadics_please::all_tuples_with_size; +use wgpu::{BindGroupEntry, BindingResource}; + +use super::{Sampler, TextureView}; + +/// Helper for constructing bindgroups. +/// +/// Allows constructing the descriptor's entries as: +/// ```ignore (render_device cannot be easily accessed) +/// render_device.create_bind_group( +/// "my_bind_group", +/// &my_layout, +/// &BindGroupEntries::with_indices(( +/// (2, &my_sampler), +/// (3, my_uniform), +/// )), +/// ); +/// ``` +/// +/// instead of +/// +/// ```ignore (render_device cannot be easily accessed) +/// render_device.create_bind_group( +/// "my_bind_group", +/// &my_layout, +/// &[ +/// BindGroupEntry { +/// binding: 2, +/// resource: BindingResource::Sampler(&my_sampler), +/// }, +/// BindGroupEntry { +/// binding: 3, +/// resource: my_uniform, +/// }, +/// ], +/// ); +/// ``` +/// +/// or +/// +/// ```ignore (render_device cannot be easily accessed) +/// render_device.create_bind_group( +/// "my_bind_group", +/// &my_layout, +/// &BindGroupEntries::sequential(( +/// &my_sampler, +/// my_uniform, +/// )), +/// ); +/// ``` +/// +/// instead of +/// +/// ```ignore (render_device cannot be easily accessed) +/// render_device.create_bind_group( +/// "my_bind_group", +/// &my_layout, +/// &[ +/// BindGroupEntry { +/// binding: 0, +/// resource: BindingResource::Sampler(&my_sampler), +/// }, +/// BindGroupEntry { +/// binding: 1, +/// resource: my_uniform, +/// }, +/// ], +/// ); +/// ``` +/// +/// or +/// +/// ```ignore (render_device cannot be easily accessed) +/// render_device.create_bind_group( +/// "my_bind_group", +/// &my_layout, +/// &BindGroupEntries::single(my_uniform), +/// ); +/// ``` +/// +/// instead of +/// +/// ```ignore (render_device cannot be easily accessed) +/// render_device.create_bind_group( +/// "my_bind_group", +/// &my_layout, +/// &[ +/// BindGroupEntry { +/// binding: 0, +/// resource: my_uniform, +/// }, +/// ], +/// ); +/// ``` +pub struct BindGroupEntries<'b, const N: usize = 1> { + entries: [BindGroupEntry<'b>; N], +} + +impl<'b, const N: usize> BindGroupEntries<'b, N> { + #[inline] + pub fn sequential(resources: impl IntoBindingArray<'b, N>) -> Self { + let mut i = 0; + Self { + entries: resources.into_array().map(|resource| { + let binding = i; + i += 1; + BindGroupEntry { binding, resource } + }), + } + } + + #[inline] + pub fn with_indices(indexed_resources: impl IntoIndexedBindingArray<'b, N>) -> Self { + Self { + entries: indexed_resources + .into_array() + .map(|(binding, resource)| BindGroupEntry { binding, resource }), + } + } +} + +impl<'b> BindGroupEntries<'b, 1> { + pub fn single(resource: impl IntoBinding<'b>) -> [BindGroupEntry<'b>; 1] { + [BindGroupEntry { + binding: 0, + resource: resource.into_binding(), + }] + } +} + +impl<'b, const N: usize> core::ops::Deref for BindGroupEntries<'b, N> { + type Target = [BindGroupEntry<'b>]; + + fn deref(&self) -> &[BindGroupEntry<'b>] { + &self.entries + } +} + +pub trait IntoBinding<'a> { + fn into_binding(self) -> BindingResource<'a>; +} + +impl<'a> IntoBinding<'a> for &'a TextureView { + #[inline] + fn into_binding(self) -> BindingResource<'a> { + BindingResource::TextureView(self) + } +} + +impl<'a> IntoBinding<'a> for &'a wgpu::TextureView { + #[inline] + fn into_binding(self) -> BindingResource<'a> { + BindingResource::TextureView(self) + } +} + +impl<'a> IntoBinding<'a> for &'a [&'a wgpu::TextureView] { + #[inline] + fn into_binding(self) -> BindingResource<'a> { + BindingResource::TextureViewArray(self) + } +} + +impl<'a> IntoBinding<'a> for &'a Sampler { + #[inline] + fn into_binding(self) -> BindingResource<'a> { + BindingResource::Sampler(self) + } +} + +impl<'a> IntoBinding<'a> for &'a [&'a wgpu::Sampler] { + #[inline] + fn into_binding(self) -> BindingResource<'a> { + BindingResource::SamplerArray(self) + } +} + +impl<'a> IntoBinding<'a> for BindingResource<'a> { + #[inline] + fn into_binding(self) -> BindingResource<'a> { + self + } +} + +impl<'a> IntoBinding<'a> for wgpu::BufferBinding<'a> { + #[inline] + fn into_binding(self) -> BindingResource<'a> { + BindingResource::Buffer(self) + } +} + +impl<'a> IntoBinding<'a> for &'a [wgpu::BufferBinding<'a>] { + #[inline] + fn into_binding(self) -> BindingResource<'a> { + BindingResource::BufferArray(self) + } +} + +pub trait IntoBindingArray<'b, const N: usize> { + fn into_array(self) -> [BindingResource<'b>; N]; +} + +macro_rules! impl_to_binding_slice { + ($N: expr, $(#[$meta:meta])* $(($T: ident, $I: ident)),*) => { + $(#[$meta])* + impl<'b, $($T: IntoBinding<'b>),*> IntoBindingArray<'b, $N> for ($($T,)*) { + #[inline] + fn into_array(self) -> [BindingResource<'b>; $N] { + let ($($I,)*) = self; + [$($I.into_binding(), )*] + } + } + } +} + +all_tuples_with_size!( + #[doc(fake_variadic)] + impl_to_binding_slice, + 1, + 32, + T, + s +); + +pub trait IntoIndexedBindingArray<'b, const N: usize> { + fn into_array(self) -> [(u32, BindingResource<'b>); N]; +} + +macro_rules! impl_to_indexed_binding_slice { + ($N: expr, $(($T: ident, $S: ident, $I: ident)),*) => { + impl<'b, $($T: IntoBinding<'b>),*> IntoIndexedBindingArray<'b, $N> for ($((u32, $T),)*) { + #[inline] + fn into_array(self) -> [(u32, BindingResource<'b>); $N] { + let ($(($S, $I),)*) = self; + [$(($S, $I.into_binding())), *] + } + } + } +} + +all_tuples_with_size!(impl_to_indexed_binding_slice, 1, 32, T, n, s); + +pub struct DynamicBindGroupEntries<'b> { + entries: Vec>, +} + +impl<'b> Default for DynamicBindGroupEntries<'b> { + fn default() -> Self { + Self::new() + } +} + +impl<'b> DynamicBindGroupEntries<'b> { + pub fn sequential(entries: impl IntoBindingArray<'b, N>) -> Self { + Self { + entries: entries + .into_array() + .into_iter() + .enumerate() + .map(|(ix, resource)| BindGroupEntry { + binding: ix as u32, + resource, + }) + .collect(), + } + } + + pub fn extend_sequential( + mut self, + entries: impl IntoBindingArray<'b, N>, + ) -> Self { + let start = self.entries.last().unwrap().binding + 1; + self.entries.extend( + entries + .into_array() + .into_iter() + .enumerate() + .map(|(ix, resource)| BindGroupEntry { + binding: start + ix as u32, + resource, + }), + ); + self + } + + pub fn new_with_indices(entries: impl IntoIndexedBindingArray<'b, N>) -> Self { + Self { + entries: entries + .into_array() + .into_iter() + .map(|(binding, resource)| BindGroupEntry { binding, resource }) + .collect(), + } + } + + pub fn new() -> Self { + Self { + entries: Vec::new(), + } + } + + pub fn extend_with_indices( + mut self, + entries: impl IntoIndexedBindingArray<'b, N>, + ) -> Self { + self.entries.extend( + entries + .into_array() + .into_iter() + .map(|(binding, resource)| BindGroupEntry { binding, resource }), + ); + self + } +} + +impl<'b> core::ops::Deref for DynamicBindGroupEntries<'b> { + type Target = [BindGroupEntry<'b>]; + + fn deref(&self) -> &[BindGroupEntry<'b>] { + &self.entries + } +} diff --git a/crates/libmarathon/src/render/render_resource/bind_group_layout.rs b/crates/libmarathon/src/render/render_resource/bind_group_layout.rs new file mode 100644 index 0000000..dfc2b0a --- /dev/null +++ b/crates/libmarathon/src/render/render_resource/bind_group_layout.rs @@ -0,0 +1,81 @@ +use crate::render::{define_atomic_id, renderer::RenderDevice, renderer::WgpuWrapper}; +use bevy_ecs::system::Res; +use bevy_platform::sync::OnceLock; +use core::ops::Deref; + +define_atomic_id!(BindGroupLayoutId); + +/// Bind group layouts define the interface of resources (e.g. buffers, textures, samplers) +/// for a shader. The actual resource binding is done via a [`BindGroup`](super::BindGroup). +/// +/// This is a lightweight thread-safe wrapper around wgpu's own [`BindGroupLayout`](wgpu::BindGroupLayout), +/// which can be cloned as needed to workaround lifetime management issues. It may be converted +/// from and dereferences to wgpu's [`BindGroupLayout`](wgpu::BindGroupLayout). +/// +/// Can be created via [`RenderDevice::create_bind_group_layout`](crate::renderer::RenderDevice::create_bind_group_layout). +#[derive(Clone, Debug)] +pub struct BindGroupLayout { + id: BindGroupLayoutId, + value: WgpuWrapper, +} + +impl PartialEq for BindGroupLayout { + fn eq(&self, other: &Self) -> bool { + self.id == other.id + } +} + +impl Eq for BindGroupLayout {} + +impl core::hash::Hash for BindGroupLayout { + fn hash(&self, state: &mut H) { + self.id.0.hash(state); + } +} + +impl BindGroupLayout { + /// Returns the [`BindGroupLayoutId`] representing the unique ID of the bind group layout. + #[inline] + pub fn id(&self) -> BindGroupLayoutId { + self.id + } + + #[inline] + pub fn value(&self) -> &wgpu::BindGroupLayout { + &self.value + } +} + +impl From for BindGroupLayout { + fn from(value: wgpu::BindGroupLayout) -> Self { + BindGroupLayout { + id: BindGroupLayoutId::new(), + value: WgpuWrapper::new(value), + } + } +} + +impl Deref for BindGroupLayout { + type Target = wgpu::BindGroupLayout; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.value + } +} + +static EMPTY_BIND_GROUP_LAYOUT: OnceLock = OnceLock::new(); + +pub(crate) fn init_empty_bind_group_layout(render_device: Res) { + let layout = render_device.create_bind_group_layout(Some("empty_bind_group_layout"), &[]); + EMPTY_BIND_GROUP_LAYOUT + .set(layout) + .expect("init_empty_bind_group_layout was called more than once"); +} + +pub fn empty_bind_group_layout() -> BindGroupLayout { + EMPTY_BIND_GROUP_LAYOUT + .get() + .expect("init_empty_bind_group_layout was not called") + .clone() +} diff --git a/crates/libmarathon/src/render/render_resource/bind_group_layout_entries.rs b/crates/libmarathon/src/render/render_resource/bind_group_layout_entries.rs new file mode 100644 index 0000000..99f2662 --- /dev/null +++ b/crates/libmarathon/src/render/render_resource/bind_group_layout_entries.rs @@ -0,0 +1,592 @@ +use core::num::NonZero; +use variadics_please::all_tuples_with_size; +use wgpu::{BindGroupLayoutEntry, BindingType, ShaderStages}; + +/// Helper for constructing bind group layouts. +/// +/// Allows constructing the layout's entries as: +/// ```ignore (render_device cannot be easily accessed) +/// let layout = render_device.create_bind_group_layout( +/// "my_bind_group_layout", +/// &BindGroupLayoutEntries::with_indices( +/// // The layout entries will only be visible in the fragment stage +/// ShaderStages::FRAGMENT, +/// ( +/// // Screen texture +/// (2, texture_2d(TextureSampleType::Float { filterable: true })), +/// // Sampler +/// (3, sampler(SamplerBindingType::Filtering)), +/// ), +/// ), +/// ); +/// ``` +/// +/// instead of +/// +/// ```ignore (render_device cannot be easily accessed) +/// let layout = render_device.create_bind_group_layout( +/// "my_bind_group_layout", +/// &[ +/// // Screen texture +/// BindGroupLayoutEntry { +/// binding: 2, +/// visibility: ShaderStages::FRAGMENT, +/// ty: BindingType::Texture { +/// sample_type: TextureSampleType::Float { filterable: true }, +/// view_dimension: TextureViewDimension::D2, +/// multisampled: false, +/// }, +/// count: None, +/// }, +/// // Sampler +/// BindGroupLayoutEntry { +/// binding: 3, +/// visibility: ShaderStages::FRAGMENT, +/// ty: BindingType::Sampler(SamplerBindingType::Filtering), +/// count: None, +/// }, +/// ], +/// ); +/// ``` +/// +/// or +/// +/// ```ignore (render_device cannot be easily accessed) +/// render_device.create_bind_group_layout( +/// "my_bind_group_layout", +/// &BindGroupLayoutEntries::sequential( +/// ShaderStages::FRAGMENT, +/// ( +/// // Screen texture +/// texture_2d(TextureSampleType::Float { filterable: true }), +/// // Sampler +/// sampler(SamplerBindingType::Filtering), +/// ), +/// ), +/// ); +/// ``` +/// +/// instead of +/// +/// ```ignore (render_device cannot be easily accessed) +/// let layout = render_device.create_bind_group_layout( +/// "my_bind_group_layout", +/// &[ +/// // Screen texture +/// BindGroupLayoutEntry { +/// binding: 0, +/// visibility: ShaderStages::FRAGMENT, +/// ty: BindingType::Texture { +/// sample_type: TextureSampleType::Float { filterable: true }, +/// view_dimension: TextureViewDimension::D2, +/// multisampled: false, +/// }, +/// count: None, +/// }, +/// // Sampler +/// BindGroupLayoutEntry { +/// binding: 1, +/// visibility: ShaderStages::FRAGMENT, +/// ty: BindingType::Sampler(SamplerBindingType::Filtering), +/// count: None, +/// }, +/// ], +/// ); +/// ``` +/// +/// or +/// +/// ```ignore (render_device cannot be easily accessed) +/// render_device.create_bind_group_layout( +/// "my_bind_group_layout", +/// &BindGroupLayoutEntries::single( +/// ShaderStages::FRAGMENT, +/// texture_2d(TextureSampleType::Float { filterable: true }), +/// ), +/// ); +/// ``` +/// +/// instead of +/// +/// ```ignore (render_device cannot be easily accessed) +/// let layout = render_device.create_bind_group_layout( +/// "my_bind_group_layout", +/// &[ +/// BindGroupLayoutEntry { +/// binding: 0, +/// visibility: ShaderStages::FRAGMENT, +/// ty: BindingType::Texture { +/// sample_type: TextureSampleType::Float { filterable: true }, +/// view_dimension: TextureViewDimension::D2, +/// multisampled: false, +/// }, +/// count: None, +/// }, +/// ], +/// ); +/// ``` + +#[derive(Clone, Copy)] +pub struct BindGroupLayoutEntryBuilder { + ty: BindingType, + visibility: Option, + count: Option>, +} + +impl BindGroupLayoutEntryBuilder { + pub fn visibility(mut self, visibility: ShaderStages) -> Self { + self.visibility = Some(visibility); + self + } + + pub fn count(mut self, count: NonZero) -> Self { + self.count = Some(count); + self + } + + pub fn build(&self, binding: u32, default_visibility: ShaderStages) -> BindGroupLayoutEntry { + BindGroupLayoutEntry { + binding, + ty: self.ty, + visibility: self.visibility.unwrap_or(default_visibility), + count: self.count, + } + } +} + +pub struct BindGroupLayoutEntries { + entries: [BindGroupLayoutEntry; N], +} + +impl BindGroupLayoutEntries { + #[inline] + pub fn sequential( + default_visibility: ShaderStages, + entries_ext: impl IntoBindGroupLayoutEntryBuilderArray, + ) -> Self { + let mut i = 0; + Self { + entries: entries_ext.into_array().map(|entry| { + let binding = i; + i += 1; + entry.build(binding, default_visibility) + }), + } + } + + #[inline] + pub fn with_indices( + default_visibility: ShaderStages, + indexed_entries: impl IntoIndexedBindGroupLayoutEntryBuilderArray, + ) -> Self { + Self { + entries: indexed_entries + .into_array() + .map(|(binding, entry)| entry.build(binding, default_visibility)), + } + } +} + +impl BindGroupLayoutEntries<1> { + pub fn single( + visibility: ShaderStages, + resource: impl IntoBindGroupLayoutEntryBuilder, + ) -> [BindGroupLayoutEntry; 1] { + [resource + .into_bind_group_layout_entry_builder() + .build(0, visibility)] + } +} + +impl core::ops::Deref for BindGroupLayoutEntries { + type Target = [BindGroupLayoutEntry]; + fn deref(&self) -> &[BindGroupLayoutEntry] { + &self.entries + } +} + +pub trait IntoBindGroupLayoutEntryBuilder { + fn into_bind_group_layout_entry_builder(self) -> BindGroupLayoutEntryBuilder; +} + +impl IntoBindGroupLayoutEntryBuilder for BindingType { + fn into_bind_group_layout_entry_builder(self) -> BindGroupLayoutEntryBuilder { + BindGroupLayoutEntryBuilder { + ty: self, + visibility: None, + count: None, + } + } +} + +impl IntoBindGroupLayoutEntryBuilder for BindGroupLayoutEntry { + fn into_bind_group_layout_entry_builder(self) -> BindGroupLayoutEntryBuilder { + if self.binding != u32::MAX { + tracing::warn!("The BindGroupLayoutEntries api ignores the binding index when converting a raw wgpu::BindGroupLayoutEntry. You can ignore this warning by setting it to u32::MAX."); + } + BindGroupLayoutEntryBuilder { + ty: self.ty, + visibility: Some(self.visibility), + count: self.count, + } + } +} + +impl IntoBindGroupLayoutEntryBuilder for BindGroupLayoutEntryBuilder { + fn into_bind_group_layout_entry_builder(self) -> BindGroupLayoutEntryBuilder { + self + } +} + +pub trait IntoBindGroupLayoutEntryBuilderArray { + fn into_array(self) -> [BindGroupLayoutEntryBuilder; N]; +} +macro_rules! impl_to_binding_type_slice { + ($N: expr, $(#[$meta:meta])* $(($T: ident, $I: ident)),*) => { + $(#[$meta])* + impl<$($T: IntoBindGroupLayoutEntryBuilder),*> IntoBindGroupLayoutEntryBuilderArray<$N> for ($($T,)*) { + #[inline] + fn into_array(self) -> [BindGroupLayoutEntryBuilder; $N] { + let ($($I,)*) = self; + [$($I.into_bind_group_layout_entry_builder(), )*] + } + } + } +} +all_tuples_with_size!( + #[doc(fake_variadic)] + impl_to_binding_type_slice, + 1, + 32, + T, + s +); + +pub trait IntoIndexedBindGroupLayoutEntryBuilderArray { + fn into_array(self) -> [(u32, BindGroupLayoutEntryBuilder); N]; +} +macro_rules! impl_to_indexed_binding_type_slice { + ($N: expr, $(($T: ident, $S: ident, $I: ident)),*) => { + impl<$($T: IntoBindGroupLayoutEntryBuilder),*> IntoIndexedBindGroupLayoutEntryBuilderArray<$N> for ($((u32, $T),)*) { + #[inline] + fn into_array(self) -> [(u32, BindGroupLayoutEntryBuilder); $N] { + let ($(($S, $I),)*) = self; + [$(($S, $I.into_bind_group_layout_entry_builder())), *] + } + } + } +} +all_tuples_with_size!(impl_to_indexed_binding_type_slice, 1, 32, T, n, s); + +impl IntoBindGroupLayoutEntryBuilderArray for [BindGroupLayoutEntry; N] { + fn into_array(self) -> [BindGroupLayoutEntryBuilder; N] { + self.map(IntoBindGroupLayoutEntryBuilder::into_bind_group_layout_entry_builder) + } +} + +pub struct DynamicBindGroupLayoutEntries { + default_visibility: ShaderStages, + entries: Vec, +} + +impl DynamicBindGroupLayoutEntries { + pub fn sequential( + default_visibility: ShaderStages, + entries: impl IntoBindGroupLayoutEntryBuilderArray, + ) -> Self { + Self { + default_visibility, + entries: entries + .into_array() + .into_iter() + .enumerate() + .map(|(ix, resource)| resource.build(ix as u32, default_visibility)) + .collect(), + } + } + + pub fn extend_sequential( + mut self, + entries: impl IntoBindGroupLayoutEntryBuilderArray, + ) -> Self { + let start = self.entries.last().unwrap().binding + 1; + self.entries.extend( + entries + .into_array() + .into_iter() + .enumerate() + .map(|(ix, resource)| resource.build(start + ix as u32, self.default_visibility)), + ); + self + } + + pub fn new_with_indices( + default_visibility: ShaderStages, + entries: impl IntoIndexedBindGroupLayoutEntryBuilderArray, + ) -> Self { + Self { + default_visibility, + entries: entries + .into_array() + .into_iter() + .map(|(binding, resource)| resource.build(binding, default_visibility)) + .collect(), + } + } + + pub fn new(default_visibility: ShaderStages) -> Self { + Self { + default_visibility, + entries: Vec::new(), + } + } + + pub fn extend_with_indices( + mut self, + entries: impl IntoIndexedBindGroupLayoutEntryBuilderArray, + ) -> Self { + self.entries.extend( + entries + .into_array() + .into_iter() + .map(|(binding, resource)| resource.build(binding, self.default_visibility)), + ); + self + } +} + +impl core::ops::Deref for DynamicBindGroupLayoutEntries { + type Target = [BindGroupLayoutEntry]; + + fn deref(&self) -> &[BindGroupLayoutEntry] { + &self.entries + } +} + +pub mod binding_types { + use crate::render::render_resource::{ + BufferBindingType, SamplerBindingType, TextureSampleType, TextureViewDimension, + }; + use core::num::NonZero; + use encase::ShaderType; + use wgpu::{StorageTextureAccess, TextureFormat}; + + use super::*; + + pub fn storage_buffer(has_dynamic_offset: bool) -> BindGroupLayoutEntryBuilder { + storage_buffer_sized(has_dynamic_offset, Some(T::min_size())) + } + + pub fn storage_buffer_sized( + has_dynamic_offset: bool, + min_binding_size: Option>, + ) -> BindGroupLayoutEntryBuilder { + BindingType::Buffer { + ty: BufferBindingType::Storage { read_only: false }, + has_dynamic_offset, + min_binding_size, + } + .into_bind_group_layout_entry_builder() + } + + pub fn storage_buffer_read_only( + has_dynamic_offset: bool, + ) -> BindGroupLayoutEntryBuilder { + storage_buffer_read_only_sized(has_dynamic_offset, Some(T::min_size())) + } + + pub fn storage_buffer_read_only_sized( + has_dynamic_offset: bool, + min_binding_size: Option>, + ) -> BindGroupLayoutEntryBuilder { + BindingType::Buffer { + ty: BufferBindingType::Storage { read_only: true }, + has_dynamic_offset, + min_binding_size, + } + .into_bind_group_layout_entry_builder() + } + + pub fn uniform_buffer(has_dynamic_offset: bool) -> BindGroupLayoutEntryBuilder { + uniform_buffer_sized(has_dynamic_offset, Some(T::min_size())) + } + + pub fn uniform_buffer_sized( + has_dynamic_offset: bool, + min_binding_size: Option>, + ) -> BindGroupLayoutEntryBuilder { + BindingType::Buffer { + ty: BufferBindingType::Uniform, + has_dynamic_offset, + min_binding_size, + } + .into_bind_group_layout_entry_builder() + } + + pub fn texture_1d(sample_type: TextureSampleType) -> BindGroupLayoutEntryBuilder { + BindingType::Texture { + sample_type, + view_dimension: TextureViewDimension::D1, + multisampled: false, + } + .into_bind_group_layout_entry_builder() + } + + pub fn texture_2d(sample_type: TextureSampleType) -> BindGroupLayoutEntryBuilder { + BindingType::Texture { + sample_type, + view_dimension: TextureViewDimension::D2, + multisampled: false, + } + .into_bind_group_layout_entry_builder() + } + + pub fn texture_2d_multisampled(sample_type: TextureSampleType) -> BindGroupLayoutEntryBuilder { + BindingType::Texture { + sample_type, + view_dimension: TextureViewDimension::D2, + multisampled: true, + } + .into_bind_group_layout_entry_builder() + } + + pub fn texture_2d_array(sample_type: TextureSampleType) -> BindGroupLayoutEntryBuilder { + BindingType::Texture { + sample_type, + view_dimension: TextureViewDimension::D2Array, + multisampled: false, + } + .into_bind_group_layout_entry_builder() + } + + pub fn texture_2d_array_multisampled( + sample_type: TextureSampleType, + ) -> BindGroupLayoutEntryBuilder { + BindingType::Texture { + sample_type, + view_dimension: TextureViewDimension::D2Array, + multisampled: true, + } + .into_bind_group_layout_entry_builder() + } + + pub fn texture_depth_2d() -> BindGroupLayoutEntryBuilder { + texture_2d(TextureSampleType::Depth).into_bind_group_layout_entry_builder() + } + + pub fn texture_depth_2d_multisampled() -> BindGroupLayoutEntryBuilder { + texture_2d_multisampled(TextureSampleType::Depth).into_bind_group_layout_entry_builder() + } + + pub fn texture_cube(sample_type: TextureSampleType) -> BindGroupLayoutEntryBuilder { + BindingType::Texture { + sample_type, + view_dimension: TextureViewDimension::Cube, + multisampled: false, + } + .into_bind_group_layout_entry_builder() + } + + pub fn texture_cube_multisampled( + sample_type: TextureSampleType, + ) -> BindGroupLayoutEntryBuilder { + BindingType::Texture { + sample_type, + view_dimension: TextureViewDimension::Cube, + multisampled: true, + } + .into_bind_group_layout_entry_builder() + } + + pub fn texture_cube_array(sample_type: TextureSampleType) -> BindGroupLayoutEntryBuilder { + BindingType::Texture { + sample_type, + view_dimension: TextureViewDimension::CubeArray, + multisampled: false, + } + .into_bind_group_layout_entry_builder() + } + + pub fn texture_cube_array_multisampled( + sample_type: TextureSampleType, + ) -> BindGroupLayoutEntryBuilder { + BindingType::Texture { + sample_type, + view_dimension: TextureViewDimension::CubeArray, + multisampled: true, + } + .into_bind_group_layout_entry_builder() + } + + pub fn texture_3d(sample_type: TextureSampleType) -> BindGroupLayoutEntryBuilder { + BindingType::Texture { + sample_type, + view_dimension: TextureViewDimension::D3, + multisampled: false, + } + .into_bind_group_layout_entry_builder() + } + + pub fn texture_3d_multisampled(sample_type: TextureSampleType) -> BindGroupLayoutEntryBuilder { + BindingType::Texture { + sample_type, + view_dimension: TextureViewDimension::D3, + multisampled: true, + } + .into_bind_group_layout_entry_builder() + } + + pub fn sampler(sampler_binding_type: SamplerBindingType) -> BindGroupLayoutEntryBuilder { + BindingType::Sampler(sampler_binding_type).into_bind_group_layout_entry_builder() + } + + pub fn texture_storage_2d( + format: TextureFormat, + access: StorageTextureAccess, + ) -> BindGroupLayoutEntryBuilder { + BindingType::StorageTexture { + access, + format, + view_dimension: TextureViewDimension::D2, + } + .into_bind_group_layout_entry_builder() + } + + pub fn texture_storage_2d_array( + format: TextureFormat, + access: StorageTextureAccess, + ) -> BindGroupLayoutEntryBuilder { + BindingType::StorageTexture { + access, + format, + view_dimension: TextureViewDimension::D2Array, + } + .into_bind_group_layout_entry_builder() + } + + pub fn texture_storage_3d( + format: TextureFormat, + access: StorageTextureAccess, + ) -> BindGroupLayoutEntryBuilder { + BindingType::StorageTexture { + access, + format, + view_dimension: TextureViewDimension::D3, + } + .into_bind_group_layout_entry_builder() + } + + pub fn acceleration_structure() -> BindGroupLayoutEntryBuilder { + BindingType::AccelerationStructure { + vertex_return: false, + } + .into_bind_group_layout_entry_builder() + } + + pub fn acceleration_structure_vertex_return() -> BindGroupLayoutEntryBuilder { + BindingType::AccelerationStructure { + vertex_return: true, + } + .into_bind_group_layout_entry_builder() + } +} diff --git a/crates/libmarathon/src/render/render_resource/bindless.rs b/crates/libmarathon/src/render/render_resource/bindless.rs new file mode 100644 index 0000000..0d819dd --- /dev/null +++ b/crates/libmarathon/src/render/render_resource/bindless.rs @@ -0,0 +1,374 @@ +//! Types and functions relating to bindless resources. + +use std::borrow::Cow; +use core::{ + num::{NonZeroU32, NonZeroU64}, + ops::Range, +}; + +use bevy_derive::{Deref, DerefMut}; +use wgpu::{ + BindGroupLayoutEntry, SamplerBindingType, ShaderStages, TextureSampleType, TextureViewDimension, +}; + +use crate::render::render_resource::binding_types::storage_buffer_read_only_sized; + +use super::binding_types::{ + sampler, texture_1d, texture_2d, texture_2d_array, texture_3d, texture_cube, texture_cube_array, +}; + +/// The default value for the number of resources that can be stored in a slab +/// on this platform. +/// +/// See the documentation for [`BindlessSlabResourceLimit`] for more +/// information. +#[cfg(any(target_os = "macos", target_os = "ios"))] +pub const AUTO_BINDLESS_SLAB_RESOURCE_LIMIT: u32 = 64; +/// The default value for the number of resources that can be stored in a slab +/// on this platform. +/// +/// See the documentation for [`BindlessSlabResourceLimit`] for more +/// information. +#[cfg(not(any(target_os = "macos", target_os = "ios")))] +pub const AUTO_BINDLESS_SLAB_RESOURCE_LIMIT: u32 = 2048; + +/// The binding numbers for the built-in binding arrays of each bindless +/// resource type. +/// +/// In the case of materials, the material allocator manages these binding +/// arrays. +/// +/// `bindless.wgsl` contains declarations of these arrays for use in your +/// shaders. If you change these, make sure to update that file as well. +pub static BINDING_NUMBERS: [(BindlessResourceType, BindingNumber); 9] = [ + (BindlessResourceType::SamplerFiltering, BindingNumber(1)), + (BindlessResourceType::SamplerNonFiltering, BindingNumber(2)), + (BindlessResourceType::SamplerComparison, BindingNumber(3)), + (BindlessResourceType::Texture1d, BindingNumber(4)), + (BindlessResourceType::Texture2d, BindingNumber(5)), + (BindlessResourceType::Texture2dArray, BindingNumber(6)), + (BindlessResourceType::Texture3d, BindingNumber(7)), + (BindlessResourceType::TextureCube, BindingNumber(8)), + (BindlessResourceType::TextureCubeArray, BindingNumber(9)), +]; + +/// The maximum number of resources that can be stored in a slab. +/// +/// This limit primarily exists in order to work around `wgpu` performance +/// problems involving large numbers of bindless resources. Also, some +/// platforms, such as Metal, currently enforce limits on the number of +/// resources in use. +/// +/// This corresponds to `LIMIT` in the `#[bindless(LIMIT)]` attribute when +/// deriving [`crate::render_resource::AsBindGroup`]. +#[derive(Clone, Copy, Default, PartialEq, Debug)] +pub enum BindlessSlabResourceLimit { + /// Allows the renderer to choose a reasonable value for the resource limit + /// based on the platform. + /// + /// This value has been tuned, so you should default to this value unless + /// you have special platform-specific considerations that prevent you from + /// using it. + #[default] + Auto, + + /// A custom value for the resource limit. + /// + /// Bevy will allocate no more than this number of resources in a slab, + /// unless exceeding this value is necessary in order to allocate at all + /// (i.e. unless the number of bindless resources in your bind group exceeds + /// this value), in which case Bevy can exceed it. + Custom(u32), +} + +/// Information about the bindless resources in this object. +/// +/// The material bind group allocator uses this descriptor in order to create +/// and maintain bind groups. The fields within this bindless descriptor are +/// [`Cow`]s in order to support both the common case in which the fields are +/// simply `static` constants and the more unusual case in which the fields are +/// dynamically generated efficiently. An example of the latter case is +/// `ExtendedMaterial`, which needs to assemble a bindless descriptor from those +/// of the base material and the material extension at runtime. +/// +/// This structure will only be present if this object is bindless. +pub struct BindlessDescriptor { + /// The bindless resource types that this object uses, in order of bindless + /// index. + /// + /// The resource assigned to binding index 0 will be at index 0, the + /// resource assigned to binding index will be at index 1 in this array, and + /// so on. Unused binding indices are set to [`BindlessResourceType::None`]. + pub resources: Cow<'static, [BindlessResourceType]>, + /// The [`BindlessBufferDescriptor`] for each bindless buffer that this + /// object uses. + /// + /// The order of this array is irrelevant. + pub buffers: Cow<'static, [BindlessBufferDescriptor]>, + /// The [`BindlessIndexTableDescriptor`]s describing each bindless index + /// table. + /// + /// This list must be sorted by the first bindless index. + pub index_tables: Cow<'static, [BindlessIndexTableDescriptor]>, +} + +/// The type of potentially-bindless resource. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] +pub enum BindlessResourceType { + /// No bindless resource. + /// + /// This is used as a placeholder to fill holes in the + /// [`BindlessDescriptor::resources`] list. + None, + /// A storage buffer. + Buffer, + /// A filtering sampler. + SamplerFiltering, + /// A non-filtering sampler (nearest neighbor). + SamplerNonFiltering, + /// A comparison sampler (typically used for shadow maps). + SamplerComparison, + /// A 1D texture. + Texture1d, + /// A 2D texture. + Texture2d, + /// A 2D texture array. + /// + /// Note that this differs from a binding array. 2D texture arrays must all + /// have the same size and format. + Texture2dArray, + /// A 3D texture. + Texture3d, + /// A cubemap texture. + TextureCube, + /// A cubemap texture array. + /// + /// Note that this differs from a binding array. Cubemap texture arrays must + /// all have the same size and format. + TextureCubeArray, + /// Multiple instances of plain old data concatenated into a single buffer. + /// + /// This corresponds to the `#[data]` declaration in + /// [`crate::render_resource::AsBindGroup`]. + /// + /// Note that this resource doesn't itself map to a GPU-level binding + /// resource and instead depends on the `MaterialBindGroupAllocator` to + /// create a binding resource for it. + DataBuffer, +} + +/// Describes a bindless buffer. +/// +/// Unlike samplers and textures, each buffer in a bind group gets its own +/// unique bind group entry. That is, there isn't any `bindless_buffers` binding +/// array to go along with `bindless_textures_2d`, +/// `bindless_samplers_filtering`, etc. Therefore, this descriptor contains two +/// indices: the *binding number* and the *bindless index*. The binding number +/// is the `@binding` number used in the shader, while the bindless index is the +/// index of the buffer in the bindless index table (which is itself +/// conventionally bound to binding number 0). +/// +/// When declaring the buffer in a derived implementation +/// [`crate::render_resource::AsBindGroup`] with syntax like +/// `#[uniform(BINDLESS_INDEX, StandardMaterialUniform, +/// bindless(BINDING_NUMBER)]`, the bindless index is `BINDLESS_INDEX`, and the +/// binding number is `BINDING_NUMBER`. Note the order. +#[derive(Clone, Copy, Debug)] +pub struct BindlessBufferDescriptor { + /// The actual binding number of the buffer. + /// + /// This is declared with `@binding` in WGSL. When deriving + /// [`crate::render_resource::AsBindGroup`], this is the `BINDING_NUMBER` in + /// `#[uniform(BINDLESS_INDEX, StandardMaterialUniform, + /// bindless(BINDING_NUMBER)]`. + pub binding_number: BindingNumber, + /// The index of the buffer in the bindless index table. + /// + /// In the shader, this is the index into the table bound to binding 0. When + /// deriving [`crate::render_resource::AsBindGroup`], this is the + /// `BINDLESS_INDEX` in `#[uniform(BINDLESS_INDEX, StandardMaterialUniform, + /// bindless(BINDING_NUMBER)]`. + pub bindless_index: BindlessIndex, + /// The size of the buffer in bytes, if known. + pub size: Option, +} + +/// Describes the layout of the bindless index table, which maps bindless +/// indices to indices within the binding arrays. +#[derive(Clone)] +pub struct BindlessIndexTableDescriptor { + /// The range of bindless indices that this descriptor covers. + pub indices: Range, + /// The binding at which the index table itself will be bound. + /// + /// By default, this is binding 0, but it can be changed with the + /// `#[bindless(index_table(binding(B)))]` attribute. + pub binding_number: BindingNumber, +} + +/// The index of the actual binding in the bind group. +/// +/// This is the value specified in WGSL as `@binding`. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Deref, DerefMut)] +pub struct BindingNumber(pub u32); + +/// The index in the bindless index table. +/// +/// This table is conventionally bound to binding number 0. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Hash, Debug, Deref, DerefMut)] +pub struct BindlessIndex(pub u32); + +/// Creates the bind group layout entries common to all shaders that use +/// bindless bind groups. +/// +/// `bindless_resource_count` specifies the total number of bindless resources. +/// `bindless_slab_resource_limit` specifies the resolved +/// [`BindlessSlabResourceLimit`] value. +pub fn create_bindless_bind_group_layout_entries( + bindless_index_table_length: u32, + bindless_slab_resource_limit: u32, + bindless_index_table_binding_number: BindingNumber, +) -> Vec { + let bindless_slab_resource_limit = + NonZeroU32::new(bindless_slab_resource_limit).expect("Bindless slot count must be nonzero"); + + // The maximum size of a binding array is the + // `bindless_slab_resource_limit`, which would occur if all of the bindless + // resources were of the same type. So we create our binding arrays with + // that size. + + vec![ + // Start with the bindless index table, bound to binding number 0. + storage_buffer_read_only_sized( + false, + NonZeroU64::new(bindless_index_table_length as u64 * size_of::() as u64), + ) + .build( + *bindless_index_table_binding_number, + ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE, + ), + // Continue with the common bindless resource arrays. + sampler(SamplerBindingType::Filtering) + .count(bindless_slab_resource_limit) + .build( + 1, + ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE, + ), + sampler(SamplerBindingType::NonFiltering) + .count(bindless_slab_resource_limit) + .build( + 2, + ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE, + ), + sampler(SamplerBindingType::Comparison) + .count(bindless_slab_resource_limit) + .build( + 3, + ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE, + ), + texture_1d(TextureSampleType::Float { filterable: true }) + .count(bindless_slab_resource_limit) + .build( + 4, + ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE, + ), + texture_2d(TextureSampleType::Float { filterable: true }) + .count(bindless_slab_resource_limit) + .build( + 5, + ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE, + ), + texture_2d_array(TextureSampleType::Float { filterable: true }) + .count(bindless_slab_resource_limit) + .build( + 6, + ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE, + ), + texture_3d(TextureSampleType::Float { filterable: true }) + .count(bindless_slab_resource_limit) + .build( + 7, + ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE, + ), + texture_cube(TextureSampleType::Float { filterable: true }) + .count(bindless_slab_resource_limit) + .build( + 8, + ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE, + ), + texture_cube_array(TextureSampleType::Float { filterable: true }) + .count(bindless_slab_resource_limit) + .build( + 9, + ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE, + ), + ] +} + +impl BindlessSlabResourceLimit { + /// Determines the actual bindless slab resource limit on this platform. + pub fn resolve(&self) -> u32 { + match *self { + BindlessSlabResourceLimit::Auto => AUTO_BINDLESS_SLAB_RESOURCE_LIMIT, + BindlessSlabResourceLimit::Custom(limit) => limit, + } + } +} + +impl BindlessResourceType { + /// Returns the binding number for the common array of this resource type. + /// + /// For example, if you pass `BindlessResourceType::Texture2d`, this will + /// return 5, in order to match the `@group(2) @binding(5) var + /// bindless_textures_2d: binding_array>` declaration in + /// `bindless.wgsl`. + /// + /// Not all resource types have fixed binding numbers. If you call + /// [`Self::binding_number`] on such a resource type, it returns `None`. + /// + /// Note that this returns a static reference to the binding number, not the + /// binding number itself. This is to conform to an idiosyncratic API in + /// `wgpu` whereby binding numbers for binding arrays are taken by `&u32` + /// *reference*, not by `u32` value. + pub fn binding_number(&self) -> Option<&'static BindingNumber> { + match BINDING_NUMBERS.binary_search_by_key(self, |(key, _)| *key) { + Ok(binding_number) => Some(&BINDING_NUMBERS[binding_number].1), + Err(_) => None, + } + } +} + +impl From for BindlessResourceType { + fn from(texture_view_dimension: TextureViewDimension) -> Self { + match texture_view_dimension { + TextureViewDimension::D1 => BindlessResourceType::Texture1d, + TextureViewDimension::D2 => BindlessResourceType::Texture2d, + TextureViewDimension::D2Array => BindlessResourceType::Texture2dArray, + TextureViewDimension::Cube => BindlessResourceType::TextureCube, + TextureViewDimension::CubeArray => BindlessResourceType::TextureCubeArray, + TextureViewDimension::D3 => BindlessResourceType::Texture3d, + } + } +} + +impl From for BindlessResourceType { + fn from(sampler_binding_type: SamplerBindingType) -> Self { + match sampler_binding_type { + SamplerBindingType::Filtering => BindlessResourceType::SamplerFiltering, + SamplerBindingType::NonFiltering => BindlessResourceType::SamplerNonFiltering, + SamplerBindingType::Comparison => BindlessResourceType::SamplerComparison, + } + } +} + +impl From for BindlessIndex { + fn from(value: u32) -> Self { + Self(value) + } +} + +impl From for BindingNumber { + fn from(value: u32) -> Self { + Self(value) + } +} diff --git a/crates/libmarathon/src/render/render_resource/buffer.rs b/crates/libmarathon/src/render/render_resource/buffer.rs new file mode 100644 index 0000000..734bbc2 --- /dev/null +++ b/crates/libmarathon/src/render/render_resource/buffer.rs @@ -0,0 +1,95 @@ +use crate::render::define_atomic_id; +use crate::render::renderer::WgpuWrapper; +use core::ops::{Bound, Deref, RangeBounds}; + +define_atomic_id!(BufferId); + +#[derive(Clone, Debug)] +pub struct Buffer { + id: BufferId, + value: WgpuWrapper, +} + +impl Buffer { + #[inline] + pub fn id(&self) -> BufferId { + self.id + } + + pub fn slice(&self, bounds: impl RangeBounds) -> BufferSlice<'_> { + // need to compute and store this manually because wgpu doesn't export offset and size on wgpu::BufferSlice + let offset = match bounds.start_bound() { + Bound::Included(&bound) => bound, + Bound::Excluded(&bound) => bound + 1, + Bound::Unbounded => 0, + }; + let size = match bounds.end_bound() { + Bound::Included(&bound) => bound + 1, + Bound::Excluded(&bound) => bound, + Bound::Unbounded => self.value.size(), + } - offset; + BufferSlice { + id: self.id, + offset, + size, + value: self.value.slice(bounds), + } + } + + #[inline] + pub fn unmap(&self) { + self.value.unmap(); + } +} + +impl From for Buffer { + fn from(value: wgpu::Buffer) -> Self { + Buffer { + id: BufferId::new(), + value: WgpuWrapper::new(value), + } + } +} + +impl Deref for Buffer { + type Target = wgpu::Buffer; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.value + } +} + +#[derive(Clone, Debug)] +pub struct BufferSlice<'a> { + id: BufferId, + offset: wgpu::BufferAddress, + value: wgpu::BufferSlice<'a>, + size: wgpu::BufferAddress, +} + +impl<'a> BufferSlice<'a> { + #[inline] + pub fn id(&self) -> BufferId { + self.id + } + + #[inline] + pub fn offset(&self) -> wgpu::BufferAddress { + self.offset + } + + #[inline] + pub fn size(&self) -> wgpu::BufferAddress { + self.size + } +} + +impl<'a> Deref for BufferSlice<'a> { + type Target = wgpu::BufferSlice<'a>; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.value + } +} diff --git a/crates/libmarathon/src/render/render_resource/buffer_vec.rs b/crates/libmarathon/src/render/render_resource/buffer_vec.rs new file mode 100644 index 0000000..99811c0 --- /dev/null +++ b/crates/libmarathon/src/render/render_resource/buffer_vec.rs @@ -0,0 +1,587 @@ +use core::{iter, marker::PhantomData}; + +use crate::render::{ + render_resource::Buffer, + renderer::{RenderDevice, RenderQueue}, +}; +use bytemuck::{must_cast_slice, NoUninit}; +use encase::{ + internal::{WriteInto, Writer}, + ShaderType, +}; +use thiserror::Error; +use wgpu::{BindingResource, BufferAddress, BufferUsages}; + +use super::GpuArrayBufferable; + +/// A structure for storing raw bytes that have already been properly formatted +/// for use by the GPU. +/// +/// "Properly formatted" means that item data already meets the alignment and padding +/// requirements for how it will be used on the GPU. The item type must implement [`NoUninit`] +/// for its data representation to be directly copyable. +/// +/// Index, vertex, and instance-rate vertex buffers have no alignment nor padding requirements and +/// so this helper type is a good choice for them. +/// +/// The contained data is stored in system RAM. Calling [`reserve`](RawBufferVec::reserve) +/// allocates VRAM from the [`RenderDevice`]. +/// [`write_buffer`](RawBufferVec::write_buffer) queues copying of the data +/// from system RAM to VRAM. +/// +/// Other options for storing GPU-accessible data are: +/// * [`BufferVec`] +/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer) +/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer) +/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer) +/// * [`StorageBuffer`](crate::render_resource::StorageBuffer) +/// * [`Texture`](crate::render_resource::Texture) +/// * [`UniformBuffer`](crate::render_resource::UniformBuffer) +pub struct RawBufferVec { + values: Vec, + buffer: Option, + capacity: usize, + item_size: usize, + buffer_usage: BufferUsages, + label: Option, + changed: bool, +} + +impl RawBufferVec { + /// Creates a new [`RawBufferVec`] with the given [`BufferUsages`]. + pub const fn new(buffer_usage: BufferUsages) -> Self { + Self { + values: Vec::new(), + buffer: None, + capacity: 0, + item_size: size_of::(), + buffer_usage, + label: None, + changed: false, + } + } + + /// Returns a handle to the buffer, if the data has been uploaded. + #[inline] + pub fn buffer(&self) -> Option<&Buffer> { + self.buffer.as_ref() + } + + /// Returns the binding for the buffer if the data has been uploaded. + #[inline] + pub fn binding(&self) -> Option> { + Some(BindingResource::Buffer( + self.buffer()?.as_entire_buffer_binding(), + )) + } + + /// Returns the amount of space that the GPU will use before reallocating. + #[inline] + pub fn capacity(&self) -> usize { + self.capacity + } + + /// Returns the number of items that have been pushed to this buffer. + #[inline] + pub fn len(&self) -> usize { + self.values.len() + } + + /// Returns true if the buffer is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.values.is_empty() + } + + /// Adds a new value and returns its index. + pub fn push(&mut self, value: T) -> usize { + let index = self.values.len(); + self.values.push(value); + index + } + + pub fn append(&mut self, other: &mut RawBufferVec) { + self.values.append(&mut other.values); + } + + /// Returns the value at the given index. + pub fn get(&self, index: u32) -> Option<&T> { + self.values.get(index as usize) + } + + /// Sets the value at the given index. + /// + /// The index must be less than [`RawBufferVec::len`]. + pub fn set(&mut self, index: u32, value: T) { + self.values[index as usize] = value; + } + + /// Preallocates space for `count` elements in the internal CPU-side buffer. + /// + /// Unlike [`RawBufferVec::reserve`], this doesn't have any effect on the GPU buffer. + pub fn reserve_internal(&mut self, count: usize) { + self.values.reserve(count); + } + + /// Changes the debugging label of the buffer. + /// + /// The next time the buffer is updated (via [`reserve`](Self::reserve)), Bevy will inform + /// the driver of the new label. + pub fn set_label(&mut self, label: Option<&str>) { + let label = label.map(str::to_string); + + if label != self.label { + self.changed = true; + } + + self.label = label; + } + + /// Returns the label + pub fn get_label(&self) -> Option<&str> { + self.label.as_deref() + } + + /// Creates a [`Buffer`] on the [`RenderDevice`] with size + /// at least `size_of::() * capacity`, unless a such a buffer already exists. + /// + /// If a [`Buffer`] exists, but is too small, references to it will be discarded, + /// and a new [`Buffer`] will be created. Any previously created [`Buffer`]s + /// that are no longer referenced will be deleted by the [`RenderDevice`] + /// once it is done using them (typically 1-2 frames). + /// + /// In addition to any [`BufferUsages`] provided when + /// the `RawBufferVec` was created, the buffer on the [`RenderDevice`] + /// is marked as [`BufferUsages::COPY_DST`](BufferUsages). + pub fn reserve(&mut self, capacity: usize, device: &RenderDevice) { + let size = self.item_size * capacity; + if capacity > self.capacity || (self.changed && size > 0) { + self.capacity = capacity; + self.buffer = Some(device.create_buffer(&wgpu::BufferDescriptor { + label: self.label.as_deref(), + size: size as BufferAddress, + usage: BufferUsages::COPY_DST | self.buffer_usage, + mapped_at_creation: false, + })); + self.changed = false; + } + } + + /// Queues writing of data from system RAM to VRAM using the [`RenderDevice`] + /// and the provided [`RenderQueue`]. + /// + /// Before queuing the write, a [`reserve`](RawBufferVec::reserve) operation + /// is executed. + pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) { + if self.values.is_empty() { + return; + } + self.reserve(self.values.len(), device); + if let Some(buffer) = &self.buffer { + let range = 0..self.item_size * self.values.len(); + let bytes: &[u8] = must_cast_slice(&self.values); + queue.write_buffer(buffer, 0, &bytes[range]); + } + } + + /// Queues writing of data from system RAM to VRAM using the [`RenderDevice`] + /// and the provided [`RenderQueue`]. + /// + /// If the buffer is not initialized on the GPU or the range is bigger than the capacity it will + /// return an error. You'll need to either reserve a new buffer which will lose data on the GPU + /// or create a new buffer and copy the old data to it. + /// + /// This will only write the data contained in the given range. It is useful if you only want + /// to update a part of the buffer. + pub fn write_buffer_range( + &mut self, + render_queue: &RenderQueue, + range: core::ops::Range, + ) -> Result<(), WriteBufferRangeError> { + if self.values.is_empty() { + return Err(WriteBufferRangeError::NoValuesToUpload); + } + if range.end > self.item_size * self.capacity { + return Err(WriteBufferRangeError::RangeBiggerThanBuffer); + } + if let Some(buffer) = &self.buffer { + // Cast only the bytes we need to write + let bytes: &[u8] = must_cast_slice(&self.values[range.start..range.end]); + render_queue.write_buffer(buffer, (range.start * self.item_size) as u64, bytes); + Ok(()) + } else { + Err(WriteBufferRangeError::BufferNotInitialized) + } + } + + /// Reduces the length of the buffer. + pub fn truncate(&mut self, len: usize) { + self.values.truncate(len); + } + + /// Removes all elements from the buffer. + pub fn clear(&mut self) { + self.values.clear(); + } + + /// Removes and returns the last element in the buffer. + pub fn pop(&mut self) -> Option { + self.values.pop() + } + + pub fn values(&self) -> &Vec { + &self.values + } + + pub fn values_mut(&mut self) -> &mut Vec { + &mut self.values + } +} + +impl RawBufferVec +where + T: NoUninit + Default, +{ + pub fn grow_set(&mut self, index: u32, value: T) { + while index as usize + 1 > self.len() { + self.values.push(T::default()); + } + self.values[index as usize] = value; + } +} + +impl Extend for RawBufferVec { + #[inline] + fn extend>(&mut self, iter: I) { + self.values.extend(iter); + } +} + +/// Like [`RawBufferVec`], but doesn't require that the data type `T` be +/// [`NoUninit`]. +/// +/// This is a high-performance data structure that you should use whenever +/// possible if your data is more complex than is suitable for [`RawBufferVec`]. +/// The [`ShaderType`] trait from the `encase` library is used to ensure that +/// the data is correctly aligned for use by the GPU. +/// +/// For performance reasons, unlike [`RawBufferVec`], this type doesn't allow +/// CPU access to the data after it's been added via [`BufferVec::push`]. If you +/// need CPU access to the data, consider another type, such as +/// [`StorageBuffer`][super::StorageBuffer]. +/// +/// Other options for storing GPU-accessible data are: +/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer) +/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer) +/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer) +/// * [`RawBufferVec`] +/// * [`StorageBuffer`](crate::render_resource::StorageBuffer) +/// * [`Texture`](crate::render_resource::Texture) +/// * [`UniformBuffer`](crate::render_resource::UniformBuffer) +pub struct BufferVec +where + T: ShaderType + WriteInto, +{ + data: Vec, + buffer: Option, + capacity: usize, + buffer_usage: BufferUsages, + label: Option, + label_changed: bool, + phantom: PhantomData, +} + +impl BufferVec +where + T: ShaderType + WriteInto, +{ + /// Creates a new [`BufferVec`] with the given [`BufferUsages`]. + pub const fn new(buffer_usage: BufferUsages) -> Self { + Self { + data: vec![], + buffer: None, + capacity: 0, + buffer_usage, + label: None, + label_changed: false, + phantom: PhantomData, + } + } + + /// Returns a handle to the buffer, if the data has been uploaded. + #[inline] + pub fn buffer(&self) -> Option<&Buffer> { + self.buffer.as_ref() + } + + /// Returns the binding for the buffer if the data has been uploaded. + #[inline] + pub fn binding(&self) -> Option> { + Some(BindingResource::Buffer( + self.buffer()?.as_entire_buffer_binding(), + )) + } + + /// Returns the amount of space that the GPU will use before reallocating. + #[inline] + pub fn capacity(&self) -> usize { + self.capacity + } + + /// Returns the number of items that have been pushed to this buffer. + #[inline] + pub fn len(&self) -> usize { + self.data.len() / u64::from(T::min_size()) as usize + } + + /// Returns true if the buffer is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.data.is_empty() + } + + /// Adds a new value and returns its index. + pub fn push(&mut self, value: T) -> usize { + let element_size = u64::from(T::min_size()) as usize; + let offset = self.data.len(); + + // TODO: Consider using unsafe code to push uninitialized, to prevent + // the zeroing. It shows up in profiles. + self.data.extend(iter::repeat_n(0, element_size)); + + // Take a slice of the new data for `write_into` to use. This is + // important: it hoists the bounds check up here so that the compiler + // can eliminate all the bounds checks that `write_into` will emit. + let mut dest = &mut self.data[offset..(offset + element_size)]; + value.write_into(&mut Writer::new(&value, &mut dest, 0).unwrap()); + + offset / u64::from(T::min_size()) as usize + } + + /// Changes the debugging label of the buffer. + /// + /// The next time the buffer is updated (via [`Self::reserve`]), Bevy will inform + /// the driver of the new label. + pub fn set_label(&mut self, label: Option<&str>) { + let label = label.map(str::to_string); + + if label != self.label { + self.label_changed = true; + } + + self.label = label; + } + + /// Returns the label + pub fn get_label(&self) -> Option<&str> { + self.label.as_deref() + } + + /// Creates a [`Buffer`] on the [`RenderDevice`] with size + /// at least `size_of::() * capacity`, unless such a buffer already exists. + /// + /// If a [`Buffer`] exists, but is too small, references to it will be discarded, + /// and a new [`Buffer`] will be created. Any previously created [`Buffer`]s + /// that are no longer referenced will be deleted by the [`RenderDevice`] + /// once it is done using them (typically 1-2 frames). + /// + /// In addition to any [`BufferUsages`] provided when + /// the `BufferVec` was created, the buffer on the [`RenderDevice`] + /// is marked as [`BufferUsages::COPY_DST`](BufferUsages). + pub fn reserve(&mut self, capacity: usize, device: &RenderDevice) { + if capacity <= self.capacity && !self.label_changed { + return; + } + + self.capacity = capacity; + let size = u64::from(T::min_size()) as usize * capacity; + self.buffer = Some(device.create_buffer(&wgpu::BufferDescriptor { + label: self.label.as_deref(), + size: size as BufferAddress, + usage: BufferUsages::COPY_DST | self.buffer_usage, + mapped_at_creation: false, + })); + self.label_changed = false; + } + + /// Queues writing of data from system RAM to VRAM using the [`RenderDevice`] + /// and the provided [`RenderQueue`]. + /// + /// Before queuing the write, a [`reserve`](BufferVec::reserve) operation is + /// executed. + pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) { + if self.data.is_empty() { + return; + } + + self.reserve(self.data.len() / u64::from(T::min_size()) as usize, device); + + let Some(buffer) = &self.buffer else { return }; + queue.write_buffer(buffer, 0, &self.data); + } + + /// Queues writing of data from system RAM to VRAM using the [`RenderDevice`] + /// and the provided [`RenderQueue`]. + /// + /// If the buffer is not initialized on the GPU or the range is bigger than the capacity it will + /// return an error. You'll need to either reserve a new buffer which will lose data on the GPU + /// or create a new buffer and copy the old data to it. + /// + /// This will only write the data contained in the given range. It is useful if you only want + /// to update a part of the buffer. + pub fn write_buffer_range( + &mut self, + render_queue: &RenderQueue, + range: core::ops::Range, + ) -> Result<(), WriteBufferRangeError> { + if self.data.is_empty() { + return Err(WriteBufferRangeError::NoValuesToUpload); + } + let item_size = u64::from(T::min_size()) as usize; + if range.end > item_size * self.capacity { + return Err(WriteBufferRangeError::RangeBiggerThanBuffer); + } + if let Some(buffer) = &self.buffer { + let bytes = &self.data[range.start..range.end]; + render_queue.write_buffer(buffer, (range.start * item_size) as u64, bytes); + Ok(()) + } else { + Err(WriteBufferRangeError::BufferNotInitialized) + } + } + + /// Reduces the length of the buffer. + pub fn truncate(&mut self, len: usize) { + self.data.truncate(u64::from(T::min_size()) as usize * len); + } + + /// Removes all elements from the buffer. + pub fn clear(&mut self) { + self.data.clear(); + } +} + +/// Like a [`BufferVec`], but only reserves space on the GPU for elements +/// instead of initializing them CPU-side. +/// +/// This type is useful when you're accumulating "output slots" for a GPU +/// compute shader to write into. +/// +/// The type `T` need not be [`NoUninit`], unlike [`RawBufferVec`]; it only has to +/// be [`GpuArrayBufferable`]. +pub struct UninitBufferVec +where + T: GpuArrayBufferable, +{ + buffer: Option, + len: usize, + capacity: usize, + item_size: usize, + buffer_usage: BufferUsages, + label: Option, + label_changed: bool, + phantom: PhantomData, +} + +impl UninitBufferVec +where + T: GpuArrayBufferable, +{ + /// Creates a new [`UninitBufferVec`] with the given [`BufferUsages`]. + pub const fn new(buffer_usage: BufferUsages) -> Self { + Self { + len: 0, + buffer: None, + capacity: 0, + item_size: size_of::(), + buffer_usage, + label: None, + label_changed: false, + phantom: PhantomData, + } + } + + /// Returns the buffer, if allocated. + #[inline] + pub fn buffer(&self) -> Option<&Buffer> { + self.buffer.as_ref() + } + + /// Returns the binding for the buffer if the data has been uploaded. + #[inline] + pub fn binding(&self) -> Option> { + Some(BindingResource::Buffer( + self.buffer()?.as_entire_buffer_binding(), + )) + } + + /// Reserves space for one more element in the buffer and returns its index. + pub fn add(&mut self) -> usize { + self.add_multiple(1) + } + + /// Reserves space for the given number of elements in the buffer and + /// returns the index of the first one. + pub fn add_multiple(&mut self, count: usize) -> usize { + let index = self.len; + self.len += count; + index + } + + /// Returns true if no elements have been added to this [`UninitBufferVec`]. + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Removes all elements from the buffer. + pub fn clear(&mut self) { + self.len = 0; + } + + /// Returns the length of the buffer. + pub fn len(&self) -> usize { + self.len + } + + /// Materializes the buffer on the GPU with space for `capacity` elements. + /// + /// If the buffer is already big enough, this function doesn't reallocate + /// the buffer. + pub fn reserve(&mut self, capacity: usize, device: &RenderDevice) { + if capacity <= self.capacity && !self.label_changed { + return; + } + + self.capacity = capacity; + let size = self.item_size * capacity; + self.buffer = Some(device.create_buffer(&wgpu::BufferDescriptor { + label: self.label.as_deref(), + size: size as BufferAddress, + usage: BufferUsages::COPY_DST | self.buffer_usage, + mapped_at_creation: false, + })); + + self.label_changed = false; + } + + /// Materializes the buffer on the GPU, with an appropriate size for the + /// elements that have been pushed so far. + pub fn write_buffer(&mut self, device: &RenderDevice) { + if !self.is_empty() { + self.reserve(self.len, device); + } + } +} + +/// Error returned when `write_buffer_range` fails +/// +/// See [`RawBufferVec::write_buffer_range`] [`BufferVec::write_buffer_range`] +#[derive(Debug, Eq, PartialEq, Copy, Clone, Error)] +pub enum WriteBufferRangeError { + #[error("the range is bigger than the capacity of the buffer")] + RangeBiggerThanBuffer, + #[error("the gpu buffer is not initialized")] + BufferNotInitialized, + #[error("there are no values to upload")] + NoValuesToUpload, +} diff --git a/crates/libmarathon/src/render/render_resource/gpu_array_buffer.rs b/crates/libmarathon/src/render/render_resource/gpu_array_buffer.rs new file mode 100644 index 0000000..59fb3c6 --- /dev/null +++ b/crates/libmarathon/src/render/render_resource/gpu_array_buffer.rs @@ -0,0 +1,118 @@ +use super::{ + binding_types::{storage_buffer_read_only, uniform_buffer_sized}, + BindGroupLayoutEntryBuilder, BufferVec, +}; +use crate::render::{ + render_resource::batched_uniform_buffer::BatchedUniformBuffer, + renderer::{RenderDevice, RenderQueue}, +}; +use bevy_ecs::{prelude::Component, resource::Resource}; +use core::marker::PhantomData; +use encase::{private::WriteInto, ShaderSize, ShaderType}; +use nonmax::NonMaxU32; +use wgpu::{BindingResource, BufferUsages}; + +/// Trait for types able to go in a [`GpuArrayBuffer`]. +pub trait GpuArrayBufferable: ShaderType + ShaderSize + WriteInto + Clone {} + +impl GpuArrayBufferable for T {} + +/// Stores an array of elements to be transferred to the GPU and made accessible to shaders as a read-only array. +/// +/// On platforms that support storage buffers, this is equivalent to +/// [`BufferVec`]. Otherwise, this falls back to a dynamic offset +/// uniform buffer with the largest array of T that fits within a uniform buffer +/// binding (within reasonable limits). +/// +/// Other options for storing GPU-accessible data are: +/// * [`BufferVec`] +/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer) +/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer) +/// * [`RawBufferVec`](crate::render_resource::RawBufferVec) +/// * [`StorageBuffer`](crate::render_resource::StorageBuffer) +/// * [`Texture`](crate::render_resource::Texture) +/// * [`UniformBuffer`](crate::render_resource::UniformBuffer) +#[derive(Resource)] +pub enum GpuArrayBuffer { + Uniform(BatchedUniformBuffer), + Storage(BufferVec), +} + +impl GpuArrayBuffer { + pub fn new(device: &RenderDevice) -> Self { + let limits = device.limits(); + if limits.max_storage_buffers_per_shader_stage == 0 { + GpuArrayBuffer::Uniform(BatchedUniformBuffer::new(&limits)) + } else { + GpuArrayBuffer::Storage(BufferVec::new(BufferUsages::STORAGE)) + } + } + + pub fn clear(&mut self) { + match self { + GpuArrayBuffer::Uniform(buffer) => buffer.clear(), + GpuArrayBuffer::Storage(buffer) => buffer.clear(), + } + } + + pub fn push(&mut self, value: T) -> GpuArrayBufferIndex { + match self { + GpuArrayBuffer::Uniform(buffer) => buffer.push(value), + GpuArrayBuffer::Storage(buffer) => { + let index = buffer.push(value) as u32; + GpuArrayBufferIndex { + index, + dynamic_offset: None, + element_type: PhantomData, + } + } + } + } + + pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) { + match self { + GpuArrayBuffer::Uniform(buffer) => buffer.write_buffer(device, queue), + GpuArrayBuffer::Storage(buffer) => buffer.write_buffer(device, queue), + } + } + + pub fn binding_layout(device: &RenderDevice) -> BindGroupLayoutEntryBuilder { + if device.limits().max_storage_buffers_per_shader_stage == 0 { + uniform_buffer_sized( + true, + // BatchedUniformBuffer uses a MaxCapacityArray that is runtime-sized, so we use + // None here and let wgpu figure out the size. + None, + ) + } else { + storage_buffer_read_only::(false) + } + } + + pub fn binding(&self) -> Option> { + match self { + GpuArrayBuffer::Uniform(buffer) => buffer.binding(), + GpuArrayBuffer::Storage(buffer) => buffer.binding(), + } + } + + pub fn batch_size(device: &RenderDevice) -> Option { + let limits = device.limits(); + if limits.max_storage_buffers_per_shader_stage == 0 { + Some(BatchedUniformBuffer::::batch_size(&limits) as u32) + } else { + None + } + } +} + +/// An index into a [`GpuArrayBuffer`] for a given element. +#[derive(Component, Clone)] +pub struct GpuArrayBufferIndex { + /// The index to use in a shader into the array. + pub index: u32, + /// The dynamic offset to use when setting the bind group in a pass. + /// Only used on platforms that don't support storage buffers. + pub dynamic_offset: Option, + pub element_type: PhantomData, +} diff --git a/crates/libmarathon/src/render/render_resource/mod.rs b/crates/libmarathon/src/render/render_resource/mod.rs new file mode 100644 index 0000000..0a41dfd --- /dev/null +++ b/crates/libmarathon/src/render/render_resource/mod.rs @@ -0,0 +1,75 @@ +mod batched_uniform_buffer; +mod bind_group; +mod bind_group_entries; +mod bind_group_layout; +mod bind_group_layout_entries; +mod bindless; +mod buffer; +mod buffer_vec; +mod gpu_array_buffer; +mod pipeline; +mod pipeline_cache; +mod pipeline_specializer; +pub mod resource_macros; +mod specializer; +mod storage_buffer; +mod texture; +mod uniform_buffer; + +pub use bind_group::*; +pub use bind_group_entries::*; +pub use bind_group_layout::*; +pub use bind_group_layout_entries::*; +pub use bindless::*; +pub use buffer::*; +pub use buffer_vec::*; +pub use gpu_array_buffer::*; +pub use pipeline::*; +pub use pipeline_cache::*; +pub use pipeline_specializer::*; +pub use specializer::*; +pub use storage_buffer::*; +pub use texture::*; +pub use uniform_buffer::*; + +// TODO: decide where re-exports should go +pub use wgpu::{ + util::{ + BufferInitDescriptor, DispatchIndirectArgs, DrawIndexedIndirectArgs, DrawIndirectArgs, + TextureDataOrder, + }, + AccelerationStructureFlags, AccelerationStructureGeometryFlags, + AccelerationStructureUpdateMode, AdapterInfo as WgpuAdapterInfo, AddressMode, AstcBlock, + AstcChannel, BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor, + BindGroupLayoutEntry, BindingResource, BindingType, Blas, BlasBuildEntry, BlasGeometries, + BlasGeometrySizeDescriptors, BlasTriangleGeometry, BlasTriangleGeometrySizeDescriptor, + BlendComponent, BlendFactor, BlendOperation, BlendState, BufferAddress, BufferAsyncError, + BufferBinding, BufferBindingType, BufferDescriptor, BufferSize, BufferUsages, ColorTargetState, + ColorWrites, CommandEncoder, CommandEncoderDescriptor, CompareFunction, ComputePass, + ComputePassDescriptor, ComputePipelineDescriptor as RawComputePipelineDescriptor, + CreateBlasDescriptor, CreateTlasDescriptor, DepthBiasState, DepthStencilState, DownlevelFlags, + Extent3d, Face, Features as WgpuFeatures, FilterMode, FragmentState as RawFragmentState, + FrontFace, ImageSubresourceRange, IndexFormat, Limits as WgpuLimits, LoadOp, MapMode, + MultisampleState, Operations, Origin3d, PipelineCompilationOptions, PipelineLayout, + PipelineLayoutDescriptor, PollType, PolygonMode, PrimitiveState, PrimitiveTopology, + PushConstantRange, RenderPassColorAttachment, RenderPassDepthStencilAttachment, + RenderPassDescriptor, RenderPipelineDescriptor as RawRenderPipelineDescriptor, + Sampler as WgpuSampler, SamplerBindingType, SamplerBindingType as WgpuSamplerBindingType, + SamplerDescriptor, ShaderModule, ShaderModuleDescriptor, ShaderSource, ShaderStages, + StencilFaceState, StencilOperation, StencilState, StorageTextureAccess, StoreOp, + TexelCopyBufferInfo, TexelCopyBufferLayout, TexelCopyTextureInfo, TextureAspect, + TextureDescriptor, TextureDimension, TextureFormat, TextureFormatFeatureFlags, + TextureFormatFeatures, TextureSampleType, TextureUsages, TextureView as WgpuTextureView, + TextureViewDescriptor, TextureViewDimension, Tlas, TlasInstance, VertexAttribute, + VertexBufferLayout as RawVertexBufferLayout, VertexFormat, VertexState as RawVertexState, + VertexStepMode, COPY_BUFFER_ALIGNMENT, +}; + +pub mod encase { + pub use bevy_encase_derive::ShaderType; + pub use encase::*; +} + +pub use self::encase::{ShaderSize, ShaderType}; + +pub use naga::ShaderStage; diff --git a/crates/libmarathon/src/render/render_resource/pipeline.rs b/crates/libmarathon/src/render/render_resource/pipeline.rs new file mode 100644 index 0000000..6ff2226 --- /dev/null +++ b/crates/libmarathon/src/render/render_resource/pipeline.rs @@ -0,0 +1,183 @@ +use super::empty_bind_group_layout; +use crate::render::renderer::WgpuWrapper; +use crate::render::{define_atomic_id, render_resource::BindGroupLayout}; +use std::borrow::Cow; +use bevy_asset::Handle; +use bevy_mesh::VertexBufferLayout; +use bevy_shader::{Shader, ShaderDefVal}; +use core::iter; +use core::ops::Deref; +use thiserror::Error; +use wgpu::{ + ColorTargetState, DepthStencilState, MultisampleState, PrimitiveState, PushConstantRange, +}; + +define_atomic_id!(RenderPipelineId); + +/// A [`RenderPipeline`] represents a graphics pipeline and its stages (shaders), bindings and vertex buffers. +/// +/// May be converted from and dereferences to a wgpu [`RenderPipeline`](wgpu::RenderPipeline). +/// Can be created via [`RenderDevice::create_render_pipeline`](crate::renderer::RenderDevice::create_render_pipeline). +#[derive(Clone, Debug)] +pub struct RenderPipeline { + id: RenderPipelineId, + value: WgpuWrapper, +} + +impl RenderPipeline { + #[inline] + pub fn id(&self) -> RenderPipelineId { + self.id + } +} + +impl From for RenderPipeline { + fn from(value: wgpu::RenderPipeline) -> Self { + RenderPipeline { + id: RenderPipelineId::new(), + value: WgpuWrapper::new(value), + } + } +} + +impl Deref for RenderPipeline { + type Target = wgpu::RenderPipeline; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.value + } +} + +define_atomic_id!(ComputePipelineId); + +/// A [`ComputePipeline`] represents a compute pipeline and its single shader stage. +/// +/// May be converted from and dereferences to a wgpu [`ComputePipeline`](wgpu::ComputePipeline). +/// Can be created via [`RenderDevice::create_compute_pipeline`](crate::renderer::RenderDevice::create_compute_pipeline). +#[derive(Clone, Debug)] +pub struct ComputePipeline { + id: ComputePipelineId, + value: WgpuWrapper, +} + +impl ComputePipeline { + /// Returns the [`ComputePipelineId`]. + #[inline] + pub fn id(&self) -> ComputePipelineId { + self.id + } +} + +impl From for ComputePipeline { + fn from(value: wgpu::ComputePipeline) -> Self { + ComputePipeline { + id: ComputePipelineId::new(), + value: WgpuWrapper::new(value), + } + } +} + +impl Deref for ComputePipeline { + type Target = wgpu::ComputePipeline; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.value + } +} + +/// Describes a render (graphics) pipeline. +#[derive(Clone, Debug, PartialEq, Default)] +pub struct RenderPipelineDescriptor { + /// Debug label of the pipeline. This will show up in graphics debuggers for easy identification. + pub label: Option>, + /// The layout of bind groups for this pipeline. + pub layout: Vec, + /// The push constant ranges for this pipeline. + /// Supply an empty vector if the pipeline doesn't use push constants. + pub push_constant_ranges: Vec, + /// The compiled vertex stage, its entry point, and the input buffers layout. + pub vertex: VertexState, + /// The properties of the pipeline at the primitive assembly and rasterization level. + pub primitive: PrimitiveState, + /// The effect of draw calls on the depth and stencil aspects of the output target, if any. + pub depth_stencil: Option, + /// The multi-sampling properties of the pipeline. + pub multisample: MultisampleState, + /// The compiled fragment stage, its entry point, and the color targets. + pub fragment: Option, + /// Whether to zero-initialize workgroup memory by default. If you're not sure, set this to true. + /// If this is false, reading from workgroup variables before writing to them will result in garbage values. + pub zero_initialize_workgroup_memory: bool, +} + +#[derive(Copy, Clone, Debug, Error)] +#[error("RenderPipelineDescriptor has no FragmentState configured")] +pub struct NoFragmentStateError; + +impl RenderPipelineDescriptor { + pub fn fragment_mut(&mut self) -> Result<&mut FragmentState, NoFragmentStateError> { + self.fragment.as_mut().ok_or(NoFragmentStateError) + } + + pub fn set_layout(&mut self, index: usize, layout: BindGroupLayout) { + filling_set_at(&mut self.layout, index, empty_bind_group_layout(), layout); + } +} + +#[derive(Clone, Debug, Eq, PartialEq, Default)] +pub struct VertexState { + /// The compiled shader module for this stage. + pub shader: Handle, + pub shader_defs: Vec, + /// The name of the entry point in the compiled shader, or `None` if the default entry point + /// is used. + pub entry_point: Option>, + /// The format of any vertex buffers used with this pipeline. + pub buffers: Vec, +} + +/// Describes the fragment process in a render pipeline. +#[derive(Clone, Debug, PartialEq, Eq, Default)] +pub struct FragmentState { + /// The compiled shader module for this stage. + pub shader: Handle, + pub shader_defs: Vec, + /// The name of the entry point in the compiled shader, or `None` if the default entry point + /// is used. + pub entry_point: Option>, + /// The color state of the render targets. + pub targets: Vec>, +} + +impl FragmentState { + pub fn set_target(&mut self, index: usize, target: ColorTargetState) { + filling_set_at(&mut self.targets, index, None, Some(target)); + } +} + +/// Describes a compute pipeline. +#[derive(Clone, Debug, PartialEq, Eq, Default)] +pub struct ComputePipelineDescriptor { + pub label: Option>, + pub layout: Vec, + pub push_constant_ranges: Vec, + /// The compiled shader module for this stage. + pub shader: Handle, + pub shader_defs: Vec, + /// The name of the entry point in the compiled shader, or `None` if the default entry point + /// is used. + pub entry_point: Option>, + /// Whether to zero-initialize workgroup memory by default. If you're not sure, set this to true. + /// If this is false, reading from workgroup variables before writing to them will result in garbage values. + pub zero_initialize_workgroup_memory: bool, +} + +// utility function to set a value at the specified index, extending with +// a filler value if the index is out of bounds. +fn filling_set_at(vec: &mut Vec, index: usize, filler: T, value: T) { + let num_to_fill = (index + 1).saturating_sub(vec.len()); + vec.extend(iter::repeat_n(filler, num_to_fill)); + vec[index] = value; +} diff --git a/crates/libmarathon/src/render/render_resource/pipeline_cache.rs b/crates/libmarathon/src/render/render_resource/pipeline_cache.rs new file mode 100644 index 0000000..33399b6 --- /dev/null +++ b/crates/libmarathon/src/render/render_resource/pipeline_cache.rs @@ -0,0 +1,831 @@ +use crate::render::{ + render_resource::*, + renderer::{RenderAdapter, RenderDevice, WgpuWrapper}, + Extract, +}; +use std::{borrow::Cow, sync::Arc}; +use bevy_asset::{AssetEvent, AssetId, Assets, Handle}; +use bevy_ecs::{ + message::MessageReader, + resource::Resource, + system::{Res, ResMut}, +}; +use bevy_platform::collections::{HashMap, HashSet}; +use bevy_shader::{ + CachedPipelineId, PipelineCacheError, Shader, ShaderCache, ShaderCacheSource, ShaderDefVal, + ValidateShader, +}; +use bevy_tasks::Task; +use bevy_utils::default; +use core::{future::Future, hash::Hash, mem}; +use std::sync::{Mutex, PoisonError}; +use tracing::error; +use wgpu::{PipelineCompilationOptions, VertexBufferLayout as RawVertexBufferLayout}; + +/// A descriptor for a [`Pipeline`]. +/// +/// Used to store a heterogenous collection of render and compute pipeline descriptors together. +#[derive(Debug)] +pub enum PipelineDescriptor { + RenderPipelineDescriptor(Box), + ComputePipelineDescriptor(Box), +} + +/// A pipeline defining the data layout and shader logic for a specific GPU task. +/// +/// Used to store a heterogenous collection of render and compute pipelines together. +#[derive(Debug)] +pub enum Pipeline { + RenderPipeline(RenderPipeline), + ComputePipeline(ComputePipeline), +} + +/// Index of a cached render pipeline in a [`PipelineCache`]. +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, PartialOrd, Ord)] +pub struct CachedRenderPipelineId(CachedPipelineId); + +impl CachedRenderPipelineId { + /// An invalid cached render pipeline index, often used to initialize a variable. + pub const INVALID: Self = CachedRenderPipelineId(usize::MAX); + + #[inline] + pub fn id(&self) -> usize { + self.0 + } +} + +/// Index of a cached compute pipeline in a [`PipelineCache`]. +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)] +pub struct CachedComputePipelineId(CachedPipelineId); + +impl CachedComputePipelineId { + /// An invalid cached compute pipeline index, often used to initialize a variable. + pub const INVALID: Self = CachedComputePipelineId(usize::MAX); + + #[inline] + pub fn id(&self) -> usize { + self.0 + } +} + +pub struct CachedPipeline { + pub descriptor: PipelineDescriptor, + pub state: CachedPipelineState, +} + +/// State of a cached pipeline inserted into a [`PipelineCache`]. +#[cfg_attr( + not(target_arch = "wasm32"), + expect( + clippy::large_enum_variant, + reason = "See https://github.com/bevyengine/bevy/issues/19220" + ) +)] +#[derive(Debug)] +pub enum CachedPipelineState { + /// The pipeline GPU object is queued for creation. + Queued, + /// The pipeline GPU object is being created. + Creating(Task>), + /// The pipeline GPU object was created successfully and is available (allocated on the GPU). + Ok(Pipeline), + /// An error occurred while trying to create the pipeline GPU object. + Err(PipelineCacheError), +} + +impl CachedPipelineState { + /// Convenience method to "unwrap" a pipeline state into its underlying GPU object. + /// + /// # Returns + /// + /// The method returns the allocated pipeline GPU object. + /// + /// # Panics + /// + /// This method panics if the pipeline GPU object is not available, either because it is + /// pending creation or because an error occurred while attempting to create GPU object. + pub fn unwrap(&self) -> &Pipeline { + match self { + CachedPipelineState::Ok(pipeline) => pipeline, + CachedPipelineState::Queued => { + panic!("Pipeline has not been compiled yet. It is still in the 'Queued' state.") + } + CachedPipelineState::Creating(..) => { + panic!("Pipeline has not been compiled yet. It is still in the 'Creating' state.") + } + CachedPipelineState::Err(err) => panic!("{}", err), + } + } +} + +type LayoutCacheKey = (Vec, Vec); +#[derive(Default)] +struct LayoutCache { + layouts: HashMap>>, +} + +impl LayoutCache { + fn get( + &mut self, + render_device: &RenderDevice, + bind_group_layouts: &[BindGroupLayout], + push_constant_ranges: Vec, + ) -> Arc> { + let bind_group_ids = bind_group_layouts.iter().map(BindGroupLayout::id).collect(); + self.layouts + .entry((bind_group_ids, push_constant_ranges)) + .or_insert_with_key(|(_, push_constant_ranges)| { + let bind_group_layouts = bind_group_layouts + .iter() + .map(BindGroupLayout::value) + .collect::>(); + Arc::new(WgpuWrapper::new(render_device.create_pipeline_layout( + &PipelineLayoutDescriptor { + bind_group_layouts: &bind_group_layouts, + push_constant_ranges, + ..default() + }, + ))) + }) + .clone() + } +} + +#[expect( + clippy::result_large_err, + reason = "See https://github.com/bevyengine/bevy/issues/19220" +)] +fn load_module( + render_device: &RenderDevice, + shader_source: ShaderCacheSource, + validate_shader: &ValidateShader, +) -> Result, PipelineCacheError> { + let shader_source = match shader_source { + #[cfg(feature = "shader_format_spirv")] + ShaderCacheSource::SpirV(data) => wgpu::util::make_spirv(data), + #[cfg(not(feature = "shader_format_spirv"))] + ShaderCacheSource::SpirV(_) => { + unimplemented!("Enable feature \"shader_format_spirv\" to use SPIR-V shaders") + } + ShaderCacheSource::Wgsl(src) => ShaderSource::Wgsl(Cow::Owned(src)), + #[cfg(not(feature = "decoupled_naga"))] + ShaderCacheSource::Naga(src) => ShaderSource::Naga(Cow::Owned(src)), + }; + let module_descriptor = ShaderModuleDescriptor { + label: None, + source: shader_source, + }; + + render_device + .wgpu_device() + .push_error_scope(wgpu::ErrorFilter::Validation); + + let shader_module = WgpuWrapper::new(match validate_shader { + ValidateShader::Enabled => { + render_device.create_and_validate_shader_module(module_descriptor) + } + // SAFETY: we are interfacing with shader code, which may contain undefined behavior, + // such as indexing out of bounds. + // The checks required are prohibitively expensive and a poor default for game engines. + ValidateShader::Disabled => unsafe { + render_device.create_shader_module(module_descriptor) + }, + }); + + let error = render_device.wgpu_device().pop_error_scope(); + + // `now_or_never` will return Some if the future is ready and None otherwise. + // On native platforms, wgpu will yield the error immediately while on wasm it may take longer since the browser APIs are asynchronous. + // So to keep the complexity of the ShaderCache low, we will only catch this error early on native platforms, + // and on wasm the error will be handled by wgpu and crash the application. + if let Some(Some(wgpu::Error::Validation { description, .. })) = + bevy_tasks::futures::now_or_never(error) + { + return Err(PipelineCacheError::CreateShaderModule(description)); + } + + Ok(shader_module) +} + +/// Cache for render and compute pipelines. +/// +/// The cache stores existing render and compute pipelines allocated on the GPU, as well as +/// pending creation. Pipelines inserted into the cache are identified by a unique ID, which +/// can be used to retrieve the actual GPU object once it's ready. The creation of the GPU +/// pipeline object is deferred to the [`RenderSystems::Render`] step, just before the render +/// graph starts being processed, as this requires access to the GPU. +/// +/// Note that the cache does not perform automatic deduplication of identical pipelines. It is +/// up to the user not to insert the same pipeline twice to avoid wasting GPU resources. +/// +/// [`RenderSystems::Render`]: crate::RenderSystems::Render +#[derive(Resource)] +pub struct PipelineCache { + layout_cache: Arc>, + shader_cache: Arc, RenderDevice>>>, + device: RenderDevice, + pipelines: Vec, + waiting_pipelines: HashSet, + new_pipelines: Mutex>, + global_shader_defs: Vec, + /// If `true`, disables asynchronous pipeline compilation. + /// This has no effect on macOS, wasm, or without the `multi_threaded` feature. + synchronous_pipeline_compilation: bool, +} + +impl PipelineCache { + /// Returns an iterator over the pipelines in the pipeline cache. + pub fn pipelines(&self) -> impl Iterator { + self.pipelines.iter() + } + + /// Returns a iterator of the IDs of all currently waiting pipelines. + pub fn waiting_pipelines(&self) -> impl Iterator + '_ { + self.waiting_pipelines.iter().copied() + } + + /// Create a new pipeline cache associated with the given render device. + pub fn new( + device: RenderDevice, + render_adapter: RenderAdapter, + synchronous_pipeline_compilation: bool, + ) -> Self { + let mut global_shader_defs = Vec::new(); + #[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] + { + global_shader_defs.push("NO_ARRAY_TEXTURES_SUPPORT".into()); + global_shader_defs.push("NO_CUBE_ARRAY_TEXTURES_SUPPORT".into()); + global_shader_defs.push("SIXTEEN_BYTE_ALIGNMENT".into()); + } + + if cfg!(target_abi = "sim") { + global_shader_defs.push("NO_CUBE_ARRAY_TEXTURES_SUPPORT".into()); + } + + global_shader_defs.push(ShaderDefVal::UInt( + String::from("AVAILABLE_STORAGE_BUFFER_BINDINGS"), + device.limits().max_storage_buffers_per_shader_stage, + )); + + Self { + shader_cache: Arc::new(Mutex::new(ShaderCache::new( + device.features(), + render_adapter.get_downlevel_capabilities().flags, + load_module, + ))), + device, + layout_cache: default(), + waiting_pipelines: default(), + new_pipelines: default(), + pipelines: default(), + global_shader_defs, + synchronous_pipeline_compilation, + } + } + + /// Get the state of a cached render pipeline. + /// + /// See [`PipelineCache::queue_render_pipeline()`]. + #[inline] + pub fn get_render_pipeline_state(&self, id: CachedRenderPipelineId) -> &CachedPipelineState { + // If the pipeline id isn't in `pipelines`, it's queued in `new_pipelines` + self.pipelines + .get(id.0) + .map_or(&CachedPipelineState::Queued, |pipeline| &pipeline.state) + } + + /// Get the state of a cached compute pipeline. + /// + /// See [`PipelineCache::queue_compute_pipeline()`]. + #[inline] + pub fn get_compute_pipeline_state(&self, id: CachedComputePipelineId) -> &CachedPipelineState { + // If the pipeline id isn't in `pipelines`, it's queued in `new_pipelines` + self.pipelines + .get(id.0) + .map_or(&CachedPipelineState::Queued, |pipeline| &pipeline.state) + } + + /// Get the render pipeline descriptor a cached render pipeline was inserted from. + /// + /// See [`PipelineCache::queue_render_pipeline()`]. + /// + /// **Note**: Be careful calling this method. It will panic if called with a pipeline that + /// has been queued but has not yet been processed by [`PipelineCache::process_queue()`]. + #[inline] + pub fn get_render_pipeline_descriptor( + &self, + id: CachedRenderPipelineId, + ) -> &RenderPipelineDescriptor { + match &self.pipelines[id.0].descriptor { + PipelineDescriptor::RenderPipelineDescriptor(descriptor) => descriptor, + PipelineDescriptor::ComputePipelineDescriptor(_) => unreachable!(), + } + } + + /// Get the compute pipeline descriptor a cached render pipeline was inserted from. + /// + /// See [`PipelineCache::queue_compute_pipeline()`]. + /// + /// **Note**: Be careful calling this method. It will panic if called with a pipeline that + /// has been queued but has not yet been processed by [`PipelineCache::process_queue()`]. + #[inline] + pub fn get_compute_pipeline_descriptor( + &self, + id: CachedComputePipelineId, + ) -> &ComputePipelineDescriptor { + match &self.pipelines[id.0].descriptor { + PipelineDescriptor::RenderPipelineDescriptor(_) => unreachable!(), + PipelineDescriptor::ComputePipelineDescriptor(descriptor) => descriptor, + } + } + + /// Try to retrieve a render pipeline GPU object from a cached ID. + /// + /// # Returns + /// + /// This method returns a successfully created render pipeline if any, or `None` if the pipeline + /// was not created yet or if there was an error during creation. You can check the actual creation + /// state with [`PipelineCache::get_render_pipeline_state()`]. + #[inline] + pub fn get_render_pipeline(&self, id: CachedRenderPipelineId) -> Option<&RenderPipeline> { + if let CachedPipelineState::Ok(Pipeline::RenderPipeline(pipeline)) = + &self.pipelines.get(id.0)?.state + { + Some(pipeline) + } else { + None + } + } + + /// Wait for a render pipeline to finish compiling. + #[inline] + pub fn block_on_render_pipeline(&mut self, id: CachedRenderPipelineId) { + if self.pipelines.len() <= id.0 { + self.process_queue(); + } + + let state = &mut self.pipelines[id.0].state; + if let CachedPipelineState::Creating(task) = state { + *state = match bevy_tasks::block_on(task) { + Ok(p) => CachedPipelineState::Ok(p), + Err(e) => CachedPipelineState::Err(e), + }; + } + } + + /// Try to retrieve a compute pipeline GPU object from a cached ID. + /// + /// # Returns + /// + /// This method returns a successfully created compute pipeline if any, or `None` if the pipeline + /// was not created yet or if there was an error during creation. You can check the actual creation + /// state with [`PipelineCache::get_compute_pipeline_state()`]. + #[inline] + pub fn get_compute_pipeline(&self, id: CachedComputePipelineId) -> Option<&ComputePipeline> { + if let CachedPipelineState::Ok(Pipeline::ComputePipeline(pipeline)) = + &self.pipelines.get(id.0)?.state + { + Some(pipeline) + } else { + None + } + } + + /// Insert a render pipeline into the cache, and queue its creation. + /// + /// The pipeline is always inserted and queued for creation. There is no attempt to deduplicate it with + /// an already cached pipeline. + /// + /// # Returns + /// + /// This method returns the unique render shader ID of the cached pipeline, which can be used to query + /// the caching state with [`get_render_pipeline_state()`] and to retrieve the created GPU pipeline once + /// it's ready with [`get_render_pipeline()`]. + /// + /// [`get_render_pipeline_state()`]: PipelineCache::get_render_pipeline_state + /// [`get_render_pipeline()`]: PipelineCache::get_render_pipeline + pub fn queue_render_pipeline( + &self, + descriptor: RenderPipelineDescriptor, + ) -> CachedRenderPipelineId { + let mut new_pipelines = self + .new_pipelines + .lock() + .unwrap_or_else(PoisonError::into_inner); + let id = CachedRenderPipelineId(self.pipelines.len() + new_pipelines.len()); + new_pipelines.push(CachedPipeline { + descriptor: PipelineDescriptor::RenderPipelineDescriptor(Box::new(descriptor)), + state: CachedPipelineState::Queued, + }); + id + } + + /// Insert a compute pipeline into the cache, and queue its creation. + /// + /// The pipeline is always inserted and queued for creation. There is no attempt to deduplicate it with + /// an already cached pipeline. + /// + /// # Returns + /// + /// This method returns the unique compute shader ID of the cached pipeline, which can be used to query + /// the caching state with [`get_compute_pipeline_state()`] and to retrieve the created GPU pipeline once + /// it's ready with [`get_compute_pipeline()`]. + /// + /// [`get_compute_pipeline_state()`]: PipelineCache::get_compute_pipeline_state + /// [`get_compute_pipeline()`]: PipelineCache::get_compute_pipeline + pub fn queue_compute_pipeline( + &self, + descriptor: ComputePipelineDescriptor, + ) -> CachedComputePipelineId { + let mut new_pipelines = self + .new_pipelines + .lock() + .unwrap_or_else(PoisonError::into_inner); + let id = CachedComputePipelineId(self.pipelines.len() + new_pipelines.len()); + new_pipelines.push(CachedPipeline { + descriptor: PipelineDescriptor::ComputePipelineDescriptor(Box::new(descriptor)), + state: CachedPipelineState::Queued, + }); + id + } + + fn set_shader(&mut self, id: AssetId, shader: Shader) { + let mut shader_cache = self.shader_cache.lock().unwrap(); + let pipelines_to_queue = shader_cache.set_shader(id, shader); + for cached_pipeline in pipelines_to_queue { + self.pipelines[cached_pipeline].state = CachedPipelineState::Queued; + self.waiting_pipelines.insert(cached_pipeline); + } + } + + fn remove_shader(&mut self, shader: AssetId) { + let mut shader_cache = self.shader_cache.lock().unwrap(); + let pipelines_to_queue = shader_cache.remove(shader); + for cached_pipeline in pipelines_to_queue { + self.pipelines[cached_pipeline].state = CachedPipelineState::Queued; + self.waiting_pipelines.insert(cached_pipeline); + } + } + + fn start_create_render_pipeline( + &mut self, + id: CachedPipelineId, + descriptor: RenderPipelineDescriptor, + ) -> CachedPipelineState { + let device = self.device.clone(); + let shader_cache = self.shader_cache.clone(); + let layout_cache = self.layout_cache.clone(); + + create_pipeline_task( + async move { + let mut shader_cache = shader_cache.lock().unwrap(); + let mut layout_cache = layout_cache.lock().unwrap(); + + let vertex_module = match shader_cache.get( + &device, + id, + descriptor.vertex.shader.id(), + &descriptor.vertex.shader_defs, + ) { + Ok(module) => module, + Err(err) => return Err(err), + }; + + let fragment_module = match &descriptor.fragment { + Some(fragment) => { + match shader_cache.get( + &device, + id, + fragment.shader.id(), + &fragment.shader_defs, + ) { + Ok(module) => Some(module), + Err(err) => return Err(err), + } + } + None => None, + }; + + let layout = + if descriptor.layout.is_empty() && descriptor.push_constant_ranges.is_empty() { + None + } else { + Some(layout_cache.get( + &device, + &descriptor.layout, + descriptor.push_constant_ranges.to_vec(), + )) + }; + + drop((shader_cache, layout_cache)); + + let vertex_buffer_layouts = descriptor + .vertex + .buffers + .iter() + .map(|layout| RawVertexBufferLayout { + array_stride: layout.array_stride, + attributes: &layout.attributes, + step_mode: layout.step_mode, + }) + .collect::>(); + + let fragment_data = descriptor.fragment.as_ref().map(|fragment| { + ( + fragment_module.unwrap(), + fragment.entry_point.as_deref(), + fragment.targets.as_slice(), + ) + }); + + // TODO: Expose the rest of this somehow + let compilation_options = PipelineCompilationOptions { + constants: &[], + zero_initialize_workgroup_memory: descriptor.zero_initialize_workgroup_memory, + }; + + let descriptor = RawRenderPipelineDescriptor { + multiview: None, + depth_stencil: descriptor.depth_stencil.clone(), + label: descriptor.label.as_deref(), + layout: layout.as_ref().map(|layout| -> &PipelineLayout { layout }), + multisample: descriptor.multisample, + primitive: descriptor.primitive, + vertex: RawVertexState { + buffers: &vertex_buffer_layouts, + entry_point: descriptor.vertex.entry_point.as_deref(), + module: &vertex_module, + // TODO: Should this be the same as the fragment compilation options? + compilation_options: compilation_options.clone(), + }, + fragment: fragment_data + .as_ref() + .map(|(module, entry_point, targets)| RawFragmentState { + entry_point: entry_point.as_deref(), + module, + targets, + // TODO: Should this be the same as the vertex compilation options? + compilation_options, + }), + cache: None, + }; + + Ok(Pipeline::RenderPipeline( + device.create_render_pipeline(&descriptor), + )) + }, + self.synchronous_pipeline_compilation, + ) + } + + fn start_create_compute_pipeline( + &mut self, + id: CachedPipelineId, + descriptor: ComputePipelineDescriptor, + ) -> CachedPipelineState { + let device = self.device.clone(); + let shader_cache = self.shader_cache.clone(); + let layout_cache = self.layout_cache.clone(); + + create_pipeline_task( + async move { + let mut shader_cache = shader_cache.lock().unwrap(); + let mut layout_cache = layout_cache.lock().unwrap(); + + let compute_module = match shader_cache.get( + &device, + id, + descriptor.shader.id(), + &descriptor.shader_defs, + ) { + Ok(module) => module, + Err(err) => return Err(err), + }; + + let layout = + if descriptor.layout.is_empty() && descriptor.push_constant_ranges.is_empty() { + None + } else { + Some(layout_cache.get( + &device, + &descriptor.layout, + descriptor.push_constant_ranges.to_vec(), + )) + }; + + drop((shader_cache, layout_cache)); + + let descriptor = RawComputePipelineDescriptor { + label: descriptor.label.as_deref(), + layout: layout.as_ref().map(|layout| -> &PipelineLayout { layout }), + module: &compute_module, + entry_point: descriptor.entry_point.as_deref(), + // TODO: Expose the rest of this somehow + compilation_options: PipelineCompilationOptions { + constants: &[], + zero_initialize_workgroup_memory: descriptor + .zero_initialize_workgroup_memory, + }, + cache: None, + }; + + Ok(Pipeline::ComputePipeline( + device.create_compute_pipeline(&descriptor), + )) + }, + self.synchronous_pipeline_compilation, + ) + } + + /// Process the pipeline queue and create all pending pipelines if possible. + /// + /// This is generally called automatically during the [`RenderSystems::Render`] step, but can + /// be called manually to force creation at a different time. + /// + /// [`RenderSystems::Render`]: crate::RenderSystems::Render + pub fn process_queue(&mut self) { + let mut waiting_pipelines = mem::take(&mut self.waiting_pipelines); + let mut pipelines = mem::take(&mut self.pipelines); + + { + let mut new_pipelines = self + .new_pipelines + .lock() + .unwrap_or_else(PoisonError::into_inner); + for new_pipeline in new_pipelines.drain(..) { + let id = pipelines.len(); + pipelines.push(new_pipeline); + waiting_pipelines.insert(id); + } + } + + for id in waiting_pipelines { + self.process_pipeline(&mut pipelines[id], id); + } + + self.pipelines = pipelines; + } + + fn process_pipeline(&mut self, cached_pipeline: &mut CachedPipeline, id: usize) { + match &mut cached_pipeline.state { + CachedPipelineState::Queued => { + cached_pipeline.state = match &cached_pipeline.descriptor { + PipelineDescriptor::RenderPipelineDescriptor(descriptor) => { + self.start_create_render_pipeline(id, *descriptor.clone()) + } + PipelineDescriptor::ComputePipelineDescriptor(descriptor) => { + self.start_create_compute_pipeline(id, *descriptor.clone()) + } + }; + } + + CachedPipelineState::Creating(task) => match bevy_tasks::futures::check_ready(task) { + Some(Ok(pipeline)) => { + cached_pipeline.state = CachedPipelineState::Ok(pipeline); + return; + } + Some(Err(err)) => cached_pipeline.state = CachedPipelineState::Err(err), + _ => (), + }, + + CachedPipelineState::Err(err) => match err { + // Retry + PipelineCacheError::ShaderNotLoaded(_) + | PipelineCacheError::ShaderImportNotYetAvailable => { + cached_pipeline.state = CachedPipelineState::Queued; + } + + // Shader could not be processed ... retrying won't help + PipelineCacheError::ProcessShaderError(err) => { + let error_detail = + err.emit_to_string(&self.shader_cache.lock().unwrap().composer); + if std::env::var("VERBOSE_SHADER_ERROR") + .is_ok_and(|v| !(v.is_empty() || v == "0" || v == "false")) + { + error!("{}", pipeline_error_context(cached_pipeline)); + } + error!("failed to process shader error:\n{}", error_detail); + return; + } + PipelineCacheError::CreateShaderModule(description) => { + error!("failed to create shader module: {}", description); + return; + } + }, + + CachedPipelineState::Ok(_) => return, + } + + // Retry + self.waiting_pipelines.insert(id); + } + + pub(crate) fn process_pipeline_queue_system(mut cache: ResMut) { + cache.process_queue(); + } + + pub(crate) fn extract_shaders( + mut cache: ResMut, + shaders: Extract>>, + mut events: Extract>>, + ) { + for event in events.read() { + #[expect( + clippy::match_same_arms, + reason = "LoadedWithDependencies is marked as a TODO, so it's likely this will no longer lint soon." + )] + match event { + // PERF: Instead of blocking waiting for the shader cache lock, try again next frame if the lock is currently held + AssetEvent::Added { id } | AssetEvent::Modified { id } => { + if let Some(shader) = shaders.get(*id) { + let mut shader = shader.clone(); + shader.shader_defs.extend(cache.global_shader_defs.clone()); + + cache.set_shader(*id, shader); + } + } + AssetEvent::Removed { id } => cache.remove_shader(*id), + AssetEvent::Unused { .. } => {} + AssetEvent::LoadedWithDependencies { .. } => { + // TODO: handle this + } + } + } + } +} + +fn pipeline_error_context(cached_pipeline: &CachedPipeline) -> String { + fn format( + shader: &Handle, + entry: &Option>, + shader_defs: &[ShaderDefVal], + ) -> String { + let source = match shader.path() { + Some(path) => path.path().to_string_lossy().to_string(), + None => String::new(), + }; + let entry = match entry { + Some(entry) => entry.to_string(), + None => String::new(), + }; + let shader_defs = shader_defs + .iter() + .flat_map(|def| match def { + ShaderDefVal::Bool(k, v) if *v => Some(k.to_string()), + ShaderDefVal::Int(k, v) => Some(format!("{k} = {v}")), + ShaderDefVal::UInt(k, v) => Some(format!("{k} = {v}")), + _ => None, + }) + .collect::>() + .join(", "); + format!("{source}:{entry}\nshader defs: {shader_defs}") + } + match &cached_pipeline.descriptor { + PipelineDescriptor::RenderPipelineDescriptor(desc) => { + let vert = &desc.vertex; + let vert_str = format(&vert.shader, &vert.entry_point, &vert.shader_defs); + let Some(frag) = desc.fragment.as_ref() else { + return vert_str; + }; + let frag_str = format(&frag.shader, &frag.entry_point, &frag.shader_defs); + format!("vertex {vert_str}\nfragment {frag_str}") + } + PipelineDescriptor::ComputePipelineDescriptor(desc) => { + format(&desc.shader, &desc.entry_point, &desc.shader_defs) + } + } +} + +#[cfg(all( + not(target_arch = "wasm32"), + not(target_os = "macos"), + feature = "multi_threaded" +))] +fn create_pipeline_task( + task: impl Future> + Send + 'static, + sync: bool, +) -> CachedPipelineState { + if !sync { + return CachedPipelineState::Creating(bevy_tasks::AsyncComputeTaskPool::get().spawn(task)); + } + + match bevy_tasks::block_on(task) { + Ok(pipeline) => CachedPipelineState::Ok(pipeline), + Err(err) => CachedPipelineState::Err(err), + } +} + +#[cfg(any( + target_arch = "wasm32", + target_os = "macos", + not(feature = "multi_threaded") +))] +fn create_pipeline_task( + task: impl Future> + Send + 'static, + _sync: bool, +) -> CachedPipelineState { + match bevy_tasks::block_on(task) { + Ok(pipeline) => CachedPipelineState::Ok(pipeline), + Err(err) => CachedPipelineState::Err(err), + } +} diff --git a/crates/libmarathon/src/render/render_resource/pipeline_specializer.rs b/crates/libmarathon/src/render/render_resource/pipeline_specializer.rs new file mode 100644 index 0000000..137ab95 --- /dev/null +++ b/crates/libmarathon/src/render/render_resource/pipeline_specializer.rs @@ -0,0 +1,259 @@ +use crate::render::render_resource::{ + CachedComputePipelineId, CachedRenderPipelineId, ComputePipelineDescriptor, PipelineCache, + RenderPipelineDescriptor, +}; +use bevy_ecs::resource::Resource; +use bevy_mesh::{MeshVertexBufferLayoutRef, MissingVertexAttributeError, VertexBufferLayout}; +use bevy_platform::{ + collections::{ + hash_map::{Entry, RawEntryMut, VacantEntry}, + HashMap, + }, + hash::FixedHasher, +}; +use bevy_utils::default; +use core::{fmt::Debug, hash::Hash}; +use thiserror::Error; +use tracing::error; + +/// A trait that allows constructing different variants of a render pipeline from a key. +/// +/// Note: This is intended for modifying your pipeline descriptor on the basis of a key. If your key +/// contains no data then you don't need to specialize. For example, if you are using the +/// [`AsBindGroup`](crate::render_resource::AsBindGroup) without the `#[bind_group_data]` attribute, +/// you don't need to specialize. Instead, create the pipeline directly from [`PipelineCache`] and +/// store its ID. +/// +/// See [`SpecializedRenderPipelines`] for more info. +pub trait SpecializedRenderPipeline { + /// The key that defines each "variant" of the render pipeline. + type Key: Clone + Hash + PartialEq + Eq; + + /// Construct a new render pipeline based on the provided key. + fn specialize(&self, key: Self::Key) -> RenderPipelineDescriptor; +} + +/// A convenience cache for creating different variants of a render pipeline based on some key. +/// +/// Some render pipelines may need to be configured differently depending on the exact situation. +/// This cache allows constructing different render pipelines for each situation based on a key, +/// making it easy to A) construct the necessary pipelines, and B) reuse already constructed +/// pipelines. +/// +/// Note: This is intended for modifying your pipeline descriptor on the basis of a key. If your key +/// contains no data then you don't need to specialize. For example, if you are using the +/// [`AsBindGroup`](crate::render_resource::AsBindGroup) without the `#[bind_group_data]` attribute, +/// you don't need to specialize. Instead, create the pipeline directly from [`PipelineCache`] and +/// store its ID. +#[derive(Resource)] +pub struct SpecializedRenderPipelines { + cache: HashMap, +} + +impl Default for SpecializedRenderPipelines { + fn default() -> Self { + Self { cache: default() } + } +} + +impl SpecializedRenderPipelines { + /// Get or create a specialized instance of the pipeline corresponding to `key`. + pub fn specialize( + &mut self, + cache: &PipelineCache, + pipeline_specializer: &S, + key: S::Key, + ) -> CachedRenderPipelineId { + *self.cache.entry(key.clone()).or_insert_with(|| { + let descriptor = pipeline_specializer.specialize(key); + cache.queue_render_pipeline(descriptor) + }) + } +} + +/// A trait that allows constructing different variants of a compute pipeline from a key. +/// +/// Note: This is intended for modifying your pipeline descriptor on the basis of a key. If your key +/// contains no data then you don't need to specialize. For example, if you are using the +/// [`AsBindGroup`](crate::render_resource::AsBindGroup) without the `#[bind_group_data]` attribute, +/// you don't need to specialize. Instead, create the pipeline directly from [`PipelineCache`] and +/// store its ID. +/// +/// See [`SpecializedComputePipelines`] for more info. +pub trait SpecializedComputePipeline { + /// The key that defines each "variant" of the compute pipeline. + type Key: Clone + Hash + PartialEq + Eq; + + /// Construct a new compute pipeline based on the provided key. + fn specialize(&self, key: Self::Key) -> ComputePipelineDescriptor; +} + +/// A convenience cache for creating different variants of a compute pipeline based on some key. +/// +/// Some compute pipelines may need to be configured differently depending on the exact situation. +/// This cache allows constructing different compute pipelines for each situation based on a key, +/// making it easy to A) construct the necessary pipelines, and B) reuse already constructed +/// pipelines. +/// +/// Note: This is intended for modifying your pipeline descriptor on the basis of a key. If your key +/// contains no data then you don't need to specialize. For example, if you are using the +/// [`AsBindGroup`](crate::render_resource::AsBindGroup) without the `#[bind_group_data]` attribute, +/// you don't need to specialize. Instead, create the pipeline directly from [`PipelineCache`] and +/// store its ID. +#[derive(Resource)] +pub struct SpecializedComputePipelines { + cache: HashMap, +} + +impl Default for SpecializedComputePipelines { + fn default() -> Self { + Self { cache: default() } + } +} + +impl SpecializedComputePipelines { + /// Get or create a specialized instance of the pipeline corresponding to `key`. + pub fn specialize( + &mut self, + cache: &PipelineCache, + specialize_pipeline: &S, + key: S::Key, + ) -> CachedComputePipelineId { + *self.cache.entry(key.clone()).or_insert_with(|| { + let descriptor = specialize_pipeline.specialize(key); + cache.queue_compute_pipeline(descriptor) + }) + } +} + +/// A trait that allows constructing different variants of a render pipeline from a key and the +/// particular mesh's vertex buffer layout. +/// +/// See [`SpecializedMeshPipelines`] for more info. +pub trait SpecializedMeshPipeline { + /// The key that defines each "variant" of the render pipeline. + type Key: Clone + Hash + PartialEq + Eq; + + /// Construct a new render pipeline based on the provided key and vertex layout. + /// + /// The returned pipeline descriptor should have a single vertex buffer, which is derived from + /// `layout`. + fn specialize( + &self, + key: Self::Key, + layout: &MeshVertexBufferLayoutRef, + ) -> Result; +} + +/// A cache of different variants of a render pipeline based on a key and the particular mesh's +/// vertex buffer layout. +#[derive(Resource)] +pub struct SpecializedMeshPipelines { + mesh_layout_cache: HashMap<(MeshVertexBufferLayoutRef, S::Key), CachedRenderPipelineId>, + vertex_layout_cache: VertexLayoutCache, +} + +type VertexLayoutCache = HashMap< + VertexBufferLayout, + HashMap<::Key, CachedRenderPipelineId>, +>; + +impl Default for SpecializedMeshPipelines { + fn default() -> Self { + Self { + mesh_layout_cache: Default::default(), + vertex_layout_cache: Default::default(), + } + } +} + +impl SpecializedMeshPipelines { + /// Construct a new render pipeline based on the provided key and the mesh's vertex buffer + /// layout. + #[inline] + pub fn specialize( + &mut self, + cache: &PipelineCache, + pipeline_specializer: &S, + key: S::Key, + layout: &MeshVertexBufferLayoutRef, + ) -> Result { + return match self.mesh_layout_cache.entry((layout.clone(), key.clone())) { + Entry::Occupied(entry) => Ok(*entry.into_mut()), + Entry::Vacant(entry) => specialize_slow( + &mut self.vertex_layout_cache, + cache, + pipeline_specializer, + key, + layout, + entry, + ), + }; + + #[cold] + fn specialize_slow( + vertex_layout_cache: &mut VertexLayoutCache, + cache: &PipelineCache, + specialize_pipeline: &S, + key: S::Key, + layout: &MeshVertexBufferLayoutRef, + entry: VacantEntry< + (MeshVertexBufferLayoutRef, S::Key), + CachedRenderPipelineId, + FixedHasher, + >, + ) -> Result + where + S: SpecializedMeshPipeline, + { + let descriptor = specialize_pipeline + .specialize(key.clone(), layout) + .map_err(|mut err| { + { + let SpecializedMeshPipelineError::MissingVertexAttribute(err) = &mut err; + err.pipeline_type = Some(core::any::type_name::()); + } + err + })?; + // Different MeshVertexBufferLayouts can produce the same final VertexBufferLayout + // We want compatible vertex buffer layouts to use the same pipelines, so we must "deduplicate" them + let layout_map = match vertex_layout_cache + .raw_entry_mut() + .from_key(&descriptor.vertex.buffers[0]) + { + RawEntryMut::Occupied(entry) => entry.into_mut(), + RawEntryMut::Vacant(entry) => { + entry + .insert(descriptor.vertex.buffers[0].clone(), Default::default()) + .1 + } + }; + Ok(*entry.insert(match layout_map.entry(key) { + Entry::Occupied(entry) => { + if cfg!(debug_assertions) { + let stored_descriptor = cache.get_render_pipeline_descriptor(*entry.get()); + if stored_descriptor != &descriptor { + error!( + "The cached pipeline descriptor for {} is not \ + equal to the generated descriptor for the given key. \ + This means the SpecializePipeline implementation uses \ + unused' MeshVertexBufferLayout information to specialize \ + the pipeline. This is not allowed because it would invalidate \ + the pipeline cache.", + core::any::type_name::() + ); + } + } + *entry.into_mut() + } + Entry::Vacant(entry) => *entry.insert(cache.queue_render_pipeline(descriptor)), + })) + } + } +} + +#[derive(Error, Debug)] +pub enum SpecializedMeshPipelineError { + #[error(transparent)] + MissingVertexAttribute(#[from] MissingVertexAttributeError), +} diff --git a/crates/libmarathon/src/render/render_resource/resource_macros.rs b/crates/libmarathon/src/render/render_resource/resource_macros.rs new file mode 100644 index 0000000..6cdf3b6 --- /dev/null +++ b/crates/libmarathon/src/render/render_resource/resource_macros.rs @@ -0,0 +1,39 @@ +#[macro_export] +macro_rules! define_atomic_id { + ($atomic_id_type:ident) => { + #[derive(Copy, Clone, Hash, Eq, PartialEq, PartialOrd, Ord, Debug)] + pub struct $atomic_id_type(core::num::NonZero); + + impl $atomic_id_type { + #[expect( + clippy::new_without_default, + reason = "Implementing the `Default` trait on atomic IDs would imply that two `::default()` equal each other. By only implementing `new()`, we indicate that each atomic ID created will be unique." + )] + pub fn new() -> Self { + use core::sync::atomic::{AtomicU32, Ordering}; + + static COUNTER: AtomicU32 = AtomicU32::new(1); + + let counter = COUNTER.fetch_add(1, Ordering::Relaxed); + Self(core::num::NonZero::::new(counter).unwrap_or_else(|| { + panic!( + "The system ran out of unique `{}`s.", + stringify!($atomic_id_type) + ); + })) + } + } + + impl From<$atomic_id_type> for core::num::NonZero { + fn from(value: $atomic_id_type) -> Self { + value.0 + } + } + + impl From> for $atomic_id_type { + fn from(value: core::num::NonZero) -> Self { + Self(value) + } + } + }; +} diff --git a/crates/libmarathon/src/render/render_resource/specializer.rs b/crates/libmarathon/src/render/render_resource/specializer.rs new file mode 100644 index 0000000..31edf62 --- /dev/null +++ b/crates/libmarathon/src/render/render_resource/specializer.rs @@ -0,0 +1,353 @@ +use super::{ + CachedComputePipelineId, CachedRenderPipelineId, ComputePipeline, ComputePipelineDescriptor, + PipelineCache, RenderPipeline, RenderPipelineDescriptor, +}; +use bevy_ecs::error::BevyError; +use bevy_platform::{ + collections::{ + hash_map::{Entry, VacantEntry}, + HashMap, + }, + hash::FixedHasher, +}; +use core::{hash::Hash, marker::PhantomData}; +use tracing::error; +use variadics_please::all_tuples; + +pub use macros::{Specializer, SpecializerKey}; + +/// Defines a type that is able to be "specialized" and cached by creating and transforming +/// its descriptor type. This is implemented for [`RenderPipeline`] and [`ComputePipeline`], and +/// likely will not have much utility for other types. +/// +/// See docs on [`Specializer`] for more info. +pub trait Specializable { + type Descriptor: PartialEq + Clone + Send + Sync; + type CachedId: Clone + Send + Sync; + fn queue(pipeline_cache: &PipelineCache, descriptor: Self::Descriptor) -> Self::CachedId; + fn get_descriptor(pipeline_cache: &PipelineCache, id: Self::CachedId) -> &Self::Descriptor; +} + +impl Specializable for RenderPipeline { + type Descriptor = RenderPipelineDescriptor; + type CachedId = CachedRenderPipelineId; + + fn queue(pipeline_cache: &PipelineCache, descriptor: Self::Descriptor) -> Self::CachedId { + pipeline_cache.queue_render_pipeline(descriptor) + } + + fn get_descriptor( + pipeline_cache: &PipelineCache, + id: CachedRenderPipelineId, + ) -> &Self::Descriptor { + pipeline_cache.get_render_pipeline_descriptor(id) + } +} + +impl Specializable for ComputePipeline { + type Descriptor = ComputePipelineDescriptor; + + type CachedId = CachedComputePipelineId; + + fn queue(pipeline_cache: &PipelineCache, descriptor: Self::Descriptor) -> Self::CachedId { + pipeline_cache.queue_compute_pipeline(descriptor) + } + + fn get_descriptor( + pipeline_cache: &PipelineCache, + id: CachedComputePipelineId, + ) -> &Self::Descriptor { + pipeline_cache.get_compute_pipeline_descriptor(id) + } +} + +/// Defines a type capable of "specializing" values of a type T. +/// +/// Specialization is the process of generating variants of a type T +/// from small hashable keys, and specializers themselves can be +/// thought of as [pure functions] from the key type to `T`, that +/// [memoize] their results based on the key. +/// +///

+/// +/// Since compiling render and compute pipelines can be so slow, +/// specialization allows a Bevy app to detect when it would compile +/// a duplicate pipeline and reuse what's already in the cache. While +/// pipelines could all be memoized hashing each whole descriptor, this +/// would be much slower and could still create duplicates. In contrast, +/// memoizing groups of *related* pipelines based on a small hashable +/// key is much faster. See the docs on [`SpecializerKey`] for more info. +/// +/// ## Composing Specializers +/// +/// This trait can be derived with `#[derive(Specializer)]` for structs whose +/// fields all implement [`Specializer`]. This allows for composing multiple +/// specializers together, and makes encapsulation and separating concerns +/// between specializers much nicer. One could make individual specializers +/// for common operations and place them in entirely separate modules, then +/// compose them together with a single `#[derive]` +/// +/// ```rust +/// # use bevy_ecs::error::BevyError; +/// # use crate::render::render_resource::Specializer; +/// # use crate::render::render_resource::SpecializerKey; +/// # use crate::render::render_resource::RenderPipeline; +/// # use crate::render::render_resource::RenderPipelineDescriptor; +/// struct A; +/// struct B; +/// #[derive(Copy, Clone, PartialEq, Eq, Hash, SpecializerKey)] +/// struct BKey { contrived_number: u32 }; +/// +/// impl Specializer for A { +/// type Key = (); +/// +/// fn specialize( +/// &self, +/// key: (), +/// descriptor: &mut RenderPipelineDescriptor +/// ) -> Result<(), BevyError> { +/// # let _ = descriptor; +/// // mutate the descriptor here +/// Ok(key) +/// } +/// } +/// +/// impl Specializer for B { +/// type Key = BKey; +/// +/// fn specialize( +/// &self, +/// key: BKey, +/// descriptor: &mut RenderPipelineDescriptor +/// ) -> Result { +/// # let _ = descriptor; +/// // mutate the descriptor here +/// Ok(key) +/// } +/// } +/// +/// #[derive(Specializer)] +/// #[specialize(RenderPipeline)] +/// struct C { +/// #[key(default)] +/// a: A, +/// b: B, +/// } +/// +/// /* +/// The generated implementation: +/// impl Specializer for C { +/// type Key = BKey; +/// fn specialize( +/// &self, +/// key: Self::Key, +/// descriptor: &mut RenderPipelineDescriptor +/// ) -> Result, BevyError> { +/// let _ = self.a.specialize((), descriptor); +/// let key = self.b.specialize(key, descriptor); +/// Ok(key) +/// } +/// } +/// */ +/// ``` +/// +/// The key type for a composed specializer will be a tuple of the keys +/// of each field, and their specialization logic will be applied in field +/// order. Since derive macros can't have generic parameters, the derive macro +/// requires an additional `#[specialize(..targets)]` attribute to specify a +/// list of types to target for the implementation. `#[specialize(all)]` is +/// also allowed, and will generate a fully generic implementation at the cost +/// of slightly worse error messages. +/// +/// Additionally, each field can optionally take a `#[key]` attribute to +/// specify a "key override". This will hide that field's key from being +/// exposed by the wrapper, and always use the value given by the attribute. +/// Values for this attribute may either be `default` which will use the key's +/// [`Default`] implementation, or a valid rust expression of the key type. +/// +/// [pure functions]: https://en.wikipedia.org/wiki/Pure_function +/// [memoize]: https://en.wikipedia.org/wiki/Memoization +pub trait Specializer: Send + Sync + 'static { + type Key: SpecializerKey; + fn specialize( + &self, + key: Self::Key, + descriptor: &mut T::Descriptor, + ) -> Result, BevyError>; +} + +// TODO: update docs for `SpecializerKey` with a more concrete example +// once we've migrated mesh layout specialization + +/// Defines a type that is able to be used as a key for [`Specializer`]s +/// +///
+/// Most types should implement this trait with the included derive macro.
+/// This generates a "canonical" key type, with IS_CANONICAL = true, and Canonical = Self +///
+/// +/// ## What's a "canonical" key? +/// +/// The specialization API memoizes pipelines based on the hash of each key, but this +/// can still produce duplicates. For example, if one used a list of vertex attributes +/// as a key, even if all the same attributes were present they could be in any order. +/// In each case, though the keys would be "different" they would produce the same +/// pipeline. +/// +/// To address this, during specialization keys are processed into a [canonical] +/// (or "standard") form that represents the actual descriptor that was produced. +/// In the previous example, that would be the final `VertexBufferLayout` contained +/// by the pipeline descriptor. This new key is used by [`Variants`] to +/// perform additional checks for duplicates, but only if required. If a key is +/// canonical from the start, then there's no need. +/// +/// For implementors: the main property of a canonical key is that if two keys hash +/// differently, they should nearly always produce different descriptors. +/// +/// [canonical]: https://en.wikipedia.org/wiki/Canonicalization +pub trait SpecializerKey: Clone + Hash + Eq { + /// Denotes whether this key is canonical or not. This should only be `true` + /// if and only if `Canonical = Self`. + const IS_CANONICAL: bool; + + /// The canonical key type to convert this into during specialization. + type Canonical: Hash + Eq; +} + +pub type Canonical = ::Canonical; + +impl Specializer for () { + type Key = (); + + fn specialize( + &self, + _key: Self::Key, + _descriptor: &mut T::Descriptor, + ) -> Result<(), BevyError> { + Ok(()) + } +} + +impl Specializer for PhantomData { + type Key = (); + + fn specialize( + &self, + _key: Self::Key, + _descriptor: &mut T::Descriptor, + ) -> Result<(), BevyError> { + Ok(()) + } +} + +macro_rules! impl_specialization_key_tuple { + ($(#[$meta:meta])* $($T:ident),*) => { + $(#[$meta])* + impl <$($T: SpecializerKey),*> SpecializerKey for ($($T,)*) { + const IS_CANONICAL: bool = true $(&& <$T as SpecializerKey>::IS_CANONICAL)*; + type Canonical = ($(Canonical<$T>,)*); + } + }; +} + +all_tuples!( + #[doc(fake_variadic)] + impl_specialization_key_tuple, + 0, + 12, + T +); + +/// A cache for variants of a resource type created by a specializer. +/// At most one resource will be created for each key. +pub struct Variants> { + specializer: S, + base_descriptor: T::Descriptor, + primary_cache: HashMap, + secondary_cache: HashMap, T::CachedId>, +} + +impl> Variants { + /// Creates a new [`Variants`] from a [`Specializer`] and a base descriptor. + #[inline] + pub fn new(specializer: S, base_descriptor: T::Descriptor) -> Self { + Self { + specializer, + base_descriptor, + primary_cache: Default::default(), + secondary_cache: Default::default(), + } + } + + /// Specializes a resource given the [`Specializer`]'s key type. + #[inline] + pub fn specialize( + &mut self, + pipeline_cache: &PipelineCache, + key: S::Key, + ) -> Result { + let entry = self.primary_cache.entry(key.clone()); + match entry { + Entry::Occupied(entry) => Ok(entry.get().clone()), + Entry::Vacant(entry) => Self::specialize_slow( + &self.specializer, + self.base_descriptor.clone(), + pipeline_cache, + key, + entry, + &mut self.secondary_cache, + ), + } + } + + #[cold] + fn specialize_slow( + specializer: &S, + base_descriptor: T::Descriptor, + pipeline_cache: &PipelineCache, + key: S::Key, + primary_entry: VacantEntry, + secondary_cache: &mut HashMap, T::CachedId>, + ) -> Result { + let mut descriptor = base_descriptor.clone(); + let canonical_key = specializer.specialize(key.clone(), &mut descriptor)?; + + // if the whole key is canonical, the secondary cache isn't needed. + if ::IS_CANONICAL { + return Ok(primary_entry + .insert(::queue(pipeline_cache, descriptor)) + .clone()); + } + + let id = match secondary_cache.entry(canonical_key) { + Entry::Occupied(entry) => { + if cfg!(debug_assertions) { + let stored_descriptor = + ::get_descriptor(pipeline_cache, entry.get().clone()); + if &descriptor != stored_descriptor { + error!( + "Invalid Specializer<{}> impl for {}: the cached descriptor \ + is not equal to the generated descriptor for the given key. \ + This means the Specializer implementation uses unused information \ + from the key to specialize the pipeline. This is not allowed \ + because it would invalidate the cache.", + core::any::type_name::(), + core::any::type_name::() + ); + } + } + entry.into_mut().clone() + } + Entry::Vacant(entry) => entry + .insert(::queue(pipeline_cache, descriptor)) + .clone(), + }; + + primary_entry.insert(id.clone()); + Ok(id) + } +} diff --git a/crates/libmarathon/src/render/render_resource/storage_buffer.rs b/crates/libmarathon/src/render/render_resource/storage_buffer.rs new file mode 100644 index 0000000..b1eeeb9 --- /dev/null +++ b/crates/libmarathon/src/render/render_resource/storage_buffer.rs @@ -0,0 +1,285 @@ +use core::marker::PhantomData; + +use super::Buffer; +use crate::render::renderer::{RenderDevice, RenderQueue}; +use encase::{ + internal::WriteInto, DynamicStorageBuffer as DynamicStorageBufferWrapper, ShaderType, + StorageBuffer as StorageBufferWrapper, +}; +use wgpu::{util::BufferInitDescriptor, BindingResource, BufferBinding, BufferSize, BufferUsages}; + +use super::IntoBinding; + +/// Stores data to be transferred to the GPU and made accessible to shaders as a storage buffer. +/// +/// Storage buffers can be made available to shaders in some combination of read/write mode, and can store large amounts of data. +/// Note however that WebGL2 does not support storage buffers, so consider alternative options in this case. +/// +/// Storage buffers can store runtime-sized arrays, but only if they are the last field in a structure. +/// +/// The contained data is stored in system RAM. [`write_buffer`](StorageBuffer::write_buffer) queues +/// copying of the data from system RAM to VRAM. Storage buffers must conform to [std430 alignment/padding requirements], which +/// is automatically enforced by this structure. +/// +/// Other options for storing GPU-accessible data are: +/// * [`BufferVec`](crate::render_resource::BufferVec) +/// * [`DynamicStorageBuffer`] +/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer) +/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer) +/// * [`RawBufferVec`](crate::render_resource::RawBufferVec) +/// * [`Texture`](crate::render_resource::Texture) +/// * [`UniformBuffer`](crate::render_resource::UniformBuffer) +/// +/// [std430 alignment/padding requirements]: https://www.w3.org/TR/WGSL/#address-spaces-storage +pub struct StorageBuffer { + value: T, + scratch: StorageBufferWrapper>, + buffer: Option, + label: Option, + changed: bool, + buffer_usage: BufferUsages, + last_written_size: Option, +} + +impl From for StorageBuffer { + fn from(value: T) -> Self { + Self { + value, + scratch: StorageBufferWrapper::new(Vec::new()), + buffer: None, + label: None, + changed: false, + buffer_usage: BufferUsages::COPY_DST | BufferUsages::STORAGE, + last_written_size: None, + } + } +} + +impl Default for StorageBuffer { + fn default() -> Self { + Self { + value: T::default(), + scratch: StorageBufferWrapper::new(Vec::new()), + buffer: None, + label: None, + changed: false, + buffer_usage: BufferUsages::COPY_DST | BufferUsages::STORAGE, + last_written_size: None, + } + } +} + +impl StorageBuffer { + #[inline] + pub fn buffer(&self) -> Option<&Buffer> { + self.buffer.as_ref() + } + + #[inline] + pub fn binding(&self) -> Option> { + Some(BindingResource::Buffer(BufferBinding { + buffer: self.buffer()?, + offset: 0, + size: self.last_written_size, + })) + } + + pub fn set(&mut self, value: T) { + self.value = value; + } + + pub fn get(&self) -> &T { + &self.value + } + + pub fn get_mut(&mut self) -> &mut T { + &mut self.value + } + + pub fn set_label(&mut self, label: Option<&str>) { + let label = label.map(str::to_string); + + if label != self.label { + self.changed = true; + } + + self.label = label; + } + + pub fn get_label(&self) -> Option<&str> { + self.label.as_deref() + } + + /// Add more [`BufferUsages`] to the buffer. + /// + /// This method only allows addition of flags to the default usage flags. + /// + /// The default values for buffer usage are `BufferUsages::COPY_DST` and `BufferUsages::STORAGE`. + pub fn add_usages(&mut self, usage: BufferUsages) { + self.buffer_usage |= usage; + self.changed = true; + } + + /// Queues writing of data from system RAM to VRAM using the [`RenderDevice`] + /// and the provided [`RenderQueue`]. + /// + /// If there is no GPU-side buffer allocated to hold the data currently stored, or if a GPU-side buffer previously + /// allocated does not have enough capacity, a new GPU-side buffer is created. + pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) { + self.scratch.write(&self.value).unwrap(); + + let capacity = self.buffer.as_deref().map(wgpu::Buffer::size).unwrap_or(0); + let size = self.scratch.as_ref().len() as u64; + + if capacity < size || self.changed { + self.buffer = Some(device.create_buffer_with_data(&BufferInitDescriptor { + label: self.label.as_deref(), + usage: self.buffer_usage, + contents: self.scratch.as_ref(), + })); + self.changed = false; + } else if let Some(buffer) = &self.buffer { + queue.write_buffer(buffer, 0, self.scratch.as_ref()); + } + + self.last_written_size = BufferSize::new(size); + } +} + +impl<'a, T: ShaderType + WriteInto> IntoBinding<'a> for &'a StorageBuffer { + #[inline] + fn into_binding(self) -> BindingResource<'a> { + self.binding().expect("Failed to get buffer") + } +} + +/// Stores data to be transferred to the GPU and made accessible to shaders as a dynamic storage buffer. +/// +/// This is just a [`StorageBuffer`], but also allows you to set dynamic offsets. +/// +/// Dynamic storage buffers can be made available to shaders in some combination of read/write mode, and can store large amounts +/// of data. Note however that WebGL2 does not support storage buffers, so consider alternative options in this case. Dynamic +/// storage buffers support multiple separate bindings at dynamic byte offsets and so have a +/// [`push`](DynamicStorageBuffer::push) method. +/// +/// The contained data is stored in system RAM. [`write_buffer`](DynamicStorageBuffer::write_buffer) +/// queues copying of the data from system RAM to VRAM. The data within a storage buffer binding must conform to +/// [std430 alignment/padding requirements]. `DynamicStorageBuffer` takes care of serializing the inner type to conform to +/// these requirements. Each item [`push`](DynamicStorageBuffer::push)ed into this structure +/// will additionally be aligned to meet dynamic offset alignment requirements. +/// +/// Other options for storing GPU-accessible data are: +/// * [`BufferVec`](crate::render_resource::BufferVec) +/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer) +/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer) +/// * [`RawBufferVec`](crate::render_resource::RawBufferVec) +/// * [`StorageBuffer`] +/// * [`Texture`](crate::render_resource::Texture) +/// * [`UniformBuffer`](crate::render_resource::UniformBuffer) +/// +/// [std430 alignment/padding requirements]: https://www.w3.org/TR/WGSL/#address-spaces-storage +pub struct DynamicStorageBuffer { + scratch: DynamicStorageBufferWrapper>, + buffer: Option, + label: Option, + changed: bool, + buffer_usage: BufferUsages, + last_written_size: Option, + _marker: PhantomData T>, +} + +impl Default for DynamicStorageBuffer { + fn default() -> Self { + Self { + scratch: DynamicStorageBufferWrapper::new(Vec::new()), + buffer: None, + label: None, + changed: false, + buffer_usage: BufferUsages::COPY_DST | BufferUsages::STORAGE, + last_written_size: None, + _marker: PhantomData, + } + } +} + +impl DynamicStorageBuffer { + #[inline] + pub fn buffer(&self) -> Option<&Buffer> { + self.buffer.as_ref() + } + + #[inline] + pub fn binding(&self) -> Option> { + Some(BindingResource::Buffer(BufferBinding { + buffer: self.buffer()?, + offset: 0, + size: self.last_written_size, + })) + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.scratch.as_ref().is_empty() + } + + #[inline] + pub fn push(&mut self, value: T) -> u32 { + self.scratch.write(&value).unwrap() as u32 + } + + pub fn set_label(&mut self, label: Option<&str>) { + let label = label.map(str::to_string); + + if label != self.label { + self.changed = true; + } + + self.label = label; + } + + pub fn get_label(&self) -> Option<&str> { + self.label.as_deref() + } + + /// Add more [`BufferUsages`] to the buffer. + /// + /// This method only allows addition of flags to the default usage flags. + /// + /// The default values for buffer usage are `BufferUsages::COPY_DST` and `BufferUsages::STORAGE`. + pub fn add_usages(&mut self, usage: BufferUsages) { + self.buffer_usage |= usage; + self.changed = true; + } + + #[inline] + pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) { + let capacity = self.buffer.as_deref().map(wgpu::Buffer::size).unwrap_or(0); + let size = self.scratch.as_ref().len() as u64; + + if capacity < size || (self.changed && size > 0) { + self.buffer = Some(device.create_buffer_with_data(&BufferInitDescriptor { + label: self.label.as_deref(), + usage: self.buffer_usage, + contents: self.scratch.as_ref(), + })); + self.changed = false; + } else if let Some(buffer) = &self.buffer { + queue.write_buffer(buffer, 0, self.scratch.as_ref()); + } + + self.last_written_size = BufferSize::new(size); + } + + #[inline] + pub fn clear(&mut self) { + self.scratch.as_mut().clear(); + self.scratch.set_offset(0); + } +} + +impl<'a, T: ShaderType + WriteInto> IntoBinding<'a> for &'a DynamicStorageBuffer { + #[inline] + fn into_binding(self) -> BindingResource<'a> { + self.binding().expect("Failed to get buffer") + } +} diff --git a/crates/libmarathon/src/render/render_resource/texture.rs b/crates/libmarathon/src/render/render_resource/texture.rs new file mode 100644 index 0000000..25aad67 --- /dev/null +++ b/crates/libmarathon/src/render/render_resource/texture.rs @@ -0,0 +1,166 @@ +use crate::render::define_atomic_id; +use crate::render::renderer::WgpuWrapper; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::resource::Resource; +use core::ops::Deref; + +define_atomic_id!(TextureId); + +/// A GPU-accessible texture. +/// +/// May be converted from and dereferences to a wgpu [`Texture`](wgpu::Texture). +/// Can be created via [`RenderDevice::create_texture`](crate::renderer::RenderDevice::create_texture). +/// +/// Other options for storing GPU-accessible data are: +/// * [`BufferVec`](crate::render_resource::BufferVec) +/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer) +/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer) +/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer) +/// * [`RawBufferVec`](crate::render_resource::RawBufferVec) +/// * [`StorageBuffer`](crate::render_resource::StorageBuffer) +/// * [`UniformBuffer`](crate::render_resource::UniformBuffer) +#[derive(Clone, Debug)] +pub struct Texture { + id: TextureId, + value: WgpuWrapper, +} + +impl Texture { + /// Returns the [`TextureId`]. + #[inline] + pub fn id(&self) -> TextureId { + self.id + } + + /// Creates a view of this texture. + pub fn create_view(&self, desc: &wgpu::TextureViewDescriptor) -> TextureView { + TextureView::from(self.value.create_view(desc)) + } +} + +impl From for Texture { + fn from(value: wgpu::Texture) -> Self { + Texture { + id: TextureId::new(), + value: WgpuWrapper::new(value), + } + } +} + +impl Deref for Texture { + type Target = wgpu::Texture; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.value + } +} + +define_atomic_id!(TextureViewId); + +/// Describes a [`Texture`] with its associated metadata required by a pipeline or [`BindGroup`](super::BindGroup). +#[derive(Clone, Debug)] +pub struct TextureView { + id: TextureViewId, + value: WgpuWrapper, +} + +pub struct SurfaceTexture { + value: WgpuWrapper, +} + +impl SurfaceTexture { + pub fn present(self) { + self.value.into_inner().present(); + } +} + +impl TextureView { + /// Returns the [`TextureViewId`]. + #[inline] + pub fn id(&self) -> TextureViewId { + self.id + } +} + +impl From for TextureView { + fn from(value: wgpu::TextureView) -> Self { + TextureView { + id: TextureViewId::new(), + value: WgpuWrapper::new(value), + } + } +} + +impl From for SurfaceTexture { + fn from(value: wgpu::SurfaceTexture) -> Self { + SurfaceTexture { + value: WgpuWrapper::new(value), + } + } +} + +impl Deref for TextureView { + type Target = wgpu::TextureView; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.value + } +} + +impl Deref for SurfaceTexture { + type Target = wgpu::SurfaceTexture; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.value + } +} + +define_atomic_id!(SamplerId); + +/// A Sampler defines how a pipeline will sample from a [`TextureView`]. +/// They define image filters (including anisotropy) and address (wrapping) modes, among other things. +/// +/// May be converted from and dereferences to a wgpu [`Sampler`](wgpu::Sampler). +/// Can be created via [`RenderDevice::create_sampler`](crate::renderer::RenderDevice::create_sampler). +#[derive(Clone, Debug)] +pub struct Sampler { + id: SamplerId, + value: WgpuWrapper, +} + +impl Sampler { + /// Returns the [`SamplerId`]. + #[inline] + pub fn id(&self) -> SamplerId { + self.id + } +} + +impl From for Sampler { + fn from(value: wgpu::Sampler) -> Self { + Sampler { + id: SamplerId::new(), + value: WgpuWrapper::new(value), + } + } +} + +impl Deref for Sampler { + type Target = wgpu::Sampler; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.value + } +} + +/// A rendering resource for the default image sampler which is set during renderer +/// initialization. +/// +/// The [`ImagePlugin`](bevy_image::ImagePlugin) can be set during app initialization to change the default +/// image sampler. +#[derive(Resource, Debug, Clone, Deref, DerefMut)] +pub struct DefaultImageSampler(pub(crate) Sampler); diff --git a/crates/libmarathon/src/render/render_resource/uniform_buffer.rs b/crates/libmarathon/src/render/render_resource/uniform_buffer.rs new file mode 100644 index 0000000..41efeef --- /dev/null +++ b/crates/libmarathon/src/render/render_resource/uniform_buffer.rs @@ -0,0 +1,402 @@ +use core::{marker::PhantomData, num::NonZero}; + +use crate::render::{ + render_resource::Buffer, + renderer::{RenderDevice, RenderQueue}, +}; +use encase::{ + internal::{AlignmentValue, BufferMut, WriteInto}, + DynamicUniformBuffer as DynamicUniformBufferWrapper, ShaderType, + UniformBuffer as UniformBufferWrapper, +}; +use wgpu::{ + util::BufferInitDescriptor, BindingResource, BufferBinding, BufferDescriptor, BufferUsages, +}; + +use super::IntoBinding; + +/// Stores data to be transferred to the GPU and made accessible to shaders as a uniform buffer. +/// +/// Uniform buffers are available to shaders on a read-only basis. Uniform buffers are commonly used to make available to shaders +/// parameters that are constant during shader execution, and are best used for data that is relatively small in size as they are +/// only guaranteed to support up to 16kB per binding. +/// +/// The contained data is stored in system RAM. [`write_buffer`](UniformBuffer::write_buffer) queues +/// copying of the data from system RAM to VRAM. Data in uniform buffers must follow [std140 alignment/padding requirements], +/// which is automatically enforced by this structure. Per the WGPU spec, uniform buffers cannot store runtime-sized array +/// (vectors), or structures with fields that are vectors. +/// +/// Other options for storing GPU-accessible data are: +/// * [`BufferVec`](crate::render_resource::BufferVec) +/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer) +/// * [`DynamicUniformBuffer`] +/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer) +/// * [`RawBufferVec`](crate::render_resource::RawBufferVec) +/// * [`StorageBuffer`](crate::render_resource::StorageBuffer) +/// * [`Texture`](crate::render_resource::Texture) +/// +/// [std140 alignment/padding requirements]: https://www.w3.org/TR/WGSL/#address-spaces-uniform +pub struct UniformBuffer { + value: T, + scratch: UniformBufferWrapper>, + buffer: Option, + label: Option, + changed: bool, + buffer_usage: BufferUsages, +} + +impl From for UniformBuffer { + fn from(value: T) -> Self { + Self { + value, + scratch: UniformBufferWrapper::new(Vec::new()), + buffer: None, + label: None, + changed: false, + buffer_usage: BufferUsages::COPY_DST | BufferUsages::UNIFORM, + } + } +} + +impl Default for UniformBuffer { + fn default() -> Self { + Self { + value: T::default(), + scratch: UniformBufferWrapper::new(Vec::new()), + buffer: None, + label: None, + changed: false, + buffer_usage: BufferUsages::COPY_DST | BufferUsages::UNIFORM, + } + } +} + +impl UniformBuffer { + #[inline] + pub fn buffer(&self) -> Option<&Buffer> { + self.buffer.as_ref() + } + + #[inline] + pub fn binding(&self) -> Option> { + Some(BindingResource::Buffer( + self.buffer()?.as_entire_buffer_binding(), + )) + } + + /// Set the data the buffer stores. + pub fn set(&mut self, value: T) { + self.value = value; + } + + pub fn get(&self) -> &T { + &self.value + } + + pub fn get_mut(&mut self) -> &mut T { + &mut self.value + } + + pub fn set_label(&mut self, label: Option<&str>) { + let label = label.map(str::to_string); + + if label != self.label { + self.changed = true; + } + + self.label = label; + } + + pub fn get_label(&self) -> Option<&str> { + self.label.as_deref() + } + + /// Add more [`BufferUsages`] to the buffer. + /// + /// This method only allows addition of flags to the default usage flags. + /// + /// The default values for buffer usage are `BufferUsages::COPY_DST` and `BufferUsages::UNIFORM`. + pub fn add_usages(&mut self, usage: BufferUsages) { + self.buffer_usage |= usage; + self.changed = true; + } + + /// Queues writing of data from system RAM to VRAM using the [`RenderDevice`] + /// and the provided [`RenderQueue`], if a GPU-side backing buffer already exists. + /// + /// If a GPU-side buffer does not already exist for this data, such a buffer is initialized with currently + /// available data. + pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) { + self.scratch.write(&self.value).unwrap(); + + if self.changed || self.buffer.is_none() { + self.buffer = Some(device.create_buffer_with_data(&BufferInitDescriptor { + label: self.label.as_deref(), + usage: self.buffer_usage, + contents: self.scratch.as_ref(), + })); + self.changed = false; + } else if let Some(buffer) = &self.buffer { + queue.write_buffer(buffer, 0, self.scratch.as_ref()); + } + } +} + +impl<'a, T: ShaderType + WriteInto> IntoBinding<'a> for &'a UniformBuffer { + #[inline] + fn into_binding(self) -> BindingResource<'a> { + self.buffer() + .expect("Failed to get buffer") + .as_entire_buffer_binding() + .into_binding() + } +} + +/// Stores data to be transferred to the GPU and made accessible to shaders as a dynamic uniform buffer. +/// +/// Dynamic uniform buffers are available to shaders on a read-only basis. Dynamic uniform buffers are commonly used to make +/// available to shaders runtime-sized arrays of parameters that are otherwise constant during shader execution, and are best +/// suited to data that is relatively small in size as they are only guaranteed to support up to 16kB per binding. +/// +/// The contained data is stored in system RAM. [`write_buffer`](DynamicUniformBuffer::write_buffer) queues +/// copying of the data from system RAM to VRAM. Data in uniform buffers must follow [std140 alignment/padding requirements], +/// which is automatically enforced by this structure. Per the WGPU spec, uniform buffers cannot store runtime-sized array +/// (vectors), or structures with fields that are vectors. +/// +/// Other options for storing GPU-accessible data are: +/// * [`BufferVec`](crate::render_resource::BufferVec) +/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer) +/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer) +/// * [`RawBufferVec`](crate::render_resource::RawBufferVec) +/// * [`StorageBuffer`](crate::render_resource::StorageBuffer) +/// * [`Texture`](crate::render_resource::Texture) +/// * [`UniformBuffer`] +/// +/// [std140 alignment/padding requirements]: https://www.w3.org/TR/WGSL/#address-spaces-uniform +pub struct DynamicUniformBuffer { + scratch: DynamicUniformBufferWrapper>, + buffer: Option, + label: Option, + changed: bool, + buffer_usage: BufferUsages, + _marker: PhantomData T>, +} + +impl Default for DynamicUniformBuffer { + fn default() -> Self { + Self { + scratch: DynamicUniformBufferWrapper::new(Vec::new()), + buffer: None, + label: None, + changed: false, + buffer_usage: BufferUsages::COPY_DST | BufferUsages::UNIFORM, + _marker: PhantomData, + } + } +} + +impl DynamicUniformBuffer { + pub fn new_with_alignment(alignment: u64) -> Self { + Self { + scratch: DynamicUniformBufferWrapper::new_with_alignment(Vec::new(), alignment), + buffer: None, + label: None, + changed: false, + buffer_usage: BufferUsages::COPY_DST | BufferUsages::UNIFORM, + _marker: PhantomData, + } + } + + #[inline] + pub fn buffer(&self) -> Option<&Buffer> { + self.buffer.as_ref() + } + + #[inline] + pub fn binding(&self) -> Option> { + Some(BindingResource::Buffer(BufferBinding { + buffer: self.buffer()?, + offset: 0, + size: Some(T::min_size()), + })) + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.scratch.as_ref().is_empty() + } + + /// Push data into the `DynamicUniformBuffer`'s internal vector (residing on system RAM). + #[inline] + pub fn push(&mut self, value: &T) -> u32 { + self.scratch.write(value).unwrap() as u32 + } + + pub fn set_label(&mut self, label: Option<&str>) { + let label = label.map(str::to_string); + + if label != self.label { + self.changed = true; + } + + self.label = label; + } + + pub fn get_label(&self) -> Option<&str> { + self.label.as_deref() + } + + /// Add more [`BufferUsages`] to the buffer. + /// + /// This method only allows addition of flags to the default usage flags. + /// + /// The default values for buffer usage are `BufferUsages::COPY_DST` and `BufferUsages::UNIFORM`. + pub fn add_usages(&mut self, usage: BufferUsages) { + self.buffer_usage |= usage; + self.changed = true; + } + + /// Creates a writer that can be used to directly write elements into the target buffer. + /// + /// This method uses less memory and performs fewer memory copies using over [`push`] and [`write_buffer`]. + /// + /// `max_count` *must* be greater than or equal to the number of elements that are to be written to the buffer, or + /// the writer will panic while writing. Dropping the writer will schedule the buffer write into the provided + /// [`RenderQueue`]. + /// + /// If there is no GPU-side buffer allocated to hold the data currently stored, or if a GPU-side buffer previously + /// allocated does not have enough capacity to hold `max_count` elements, a new GPU-side buffer is created. + /// + /// Returns `None` if there is no allocated GPU-side buffer, and `max_count` is 0. + /// + /// [`push`]: Self::push + /// [`write_buffer`]: Self::write_buffer + #[inline] + pub fn get_writer<'a>( + &'a mut self, + max_count: usize, + device: &RenderDevice, + queue: &'a RenderQueue, + ) -> Option> { + let alignment = if cfg!(target_abi = "sim") { + // On iOS simulator on silicon macs, metal validation check that the host OS alignment + // is respected, but the device reports the correct value for iOS, which is smaller. + // Use the larger value. + // See https://github.com/gfx-rs/wgpu/issues/7057 - remove if it's not needed anymore. + AlignmentValue::new(256) + } else { + AlignmentValue::new(device.limits().min_uniform_buffer_offset_alignment as u64) + }; + + let mut capacity = self.buffer.as_deref().map(wgpu::Buffer::size).unwrap_or(0); + let size = alignment + .round_up(T::min_size().get()) + .checked_mul(max_count as u64) + .unwrap(); + + if capacity < size || (self.changed && size > 0) { + let buffer = device.create_buffer(&BufferDescriptor { + label: self.label.as_deref(), + usage: self.buffer_usage, + size, + mapped_at_creation: false, + }); + capacity = buffer.size(); + self.buffer = Some(buffer); + self.changed = false; + } + + if let Some(buffer) = self.buffer.as_deref() { + let buffer_view = queue + .write_buffer_with(buffer, 0, NonZero::::new(buffer.size())?) + .unwrap(); + Some(DynamicUniformBufferWriter { + buffer: encase::DynamicUniformBuffer::new_with_alignment( + QueueWriteBufferViewWrapper { + capacity: capacity as usize, + buffer_view, + }, + alignment.get(), + ), + _marker: PhantomData, + }) + } else { + None + } + } + + /// Queues writing of data from system RAM to VRAM using the [`RenderDevice`] + /// and the provided [`RenderQueue`]. + /// + /// If there is no GPU-side buffer allocated to hold the data currently stored, or if a GPU-side buffer previously + /// allocated does not have enough capacity, a new GPU-side buffer is created. + #[inline] + pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) { + let capacity = self.buffer.as_deref().map(wgpu::Buffer::size).unwrap_or(0); + let size = self.scratch.as_ref().len() as u64; + + if capacity < size || (self.changed && size > 0) { + self.buffer = Some(device.create_buffer_with_data(&BufferInitDescriptor { + label: self.label.as_deref(), + usage: self.buffer_usage, + contents: self.scratch.as_ref(), + })); + self.changed = false; + } else if let Some(buffer) = &self.buffer { + queue.write_buffer(buffer, 0, self.scratch.as_ref()); + } + } + + #[inline] + pub fn clear(&mut self) { + self.scratch.as_mut().clear(); + self.scratch.set_offset(0); + } +} + +/// A writer that can be used to directly write elements into the target buffer. +/// +/// For more information, see [`DynamicUniformBuffer::get_writer`]. +pub struct DynamicUniformBufferWriter<'a, T> { + buffer: encase::DynamicUniformBuffer>, + _marker: PhantomData T>, +} + +impl<'a, T: ShaderType + WriteInto> DynamicUniformBufferWriter<'a, T> { + pub fn write(&mut self, value: &T) -> u32 { + self.buffer.write(value).unwrap() as u32 + } +} + +/// A wrapper to work around the orphan rule so that [`wgpu::QueueWriteBufferView`] can implement +/// [`BufferMut`]. +struct QueueWriteBufferViewWrapper<'a> { + buffer_view: wgpu::QueueWriteBufferView<'a>, + // Must be kept separately and cannot be retrieved from buffer_view, as the read-only access will + // invoke a panic. + capacity: usize, +} + +impl<'a> BufferMut for QueueWriteBufferViewWrapper<'a> { + #[inline] + fn capacity(&self) -> usize { + self.capacity + } + + #[inline] + fn write(&mut self, offset: usize, val: &[u8; N]) { + self.buffer_view.write(offset, val); + } + + #[inline] + fn write_slice(&mut self, offset: usize, val: &[u8]) { + self.buffer_view.write_slice(offset, val); + } +} + +impl<'a, T: ShaderType + WriteInto> IntoBinding<'a> for &'a DynamicUniformBuffer { + #[inline] + fn into_binding(self) -> BindingResource<'a> { + self.binding().unwrap() + } +} diff --git a/crates/libmarathon/src/render/renderer/graph_runner.rs b/crates/libmarathon/src/render/renderer/graph_runner.rs new file mode 100644 index 0000000..3c4ffda --- /dev/null +++ b/crates/libmarathon/src/render/renderer/graph_runner.rs @@ -0,0 +1,267 @@ +use bevy_ecs::{prelude::Entity, world::World}; +use bevy_platform::collections::HashMap; +#[cfg(feature = "trace")] +use tracing::info_span; + +use std::{borrow::Cow, collections::VecDeque}; +use smallvec::{smallvec, SmallVec}; +use thiserror::Error; + +use crate::render::{ + diagnostic::internal::{DiagnosticsRecorder, RenderDiagnosticsMutex}, + render_graph::{ + Edge, InternedRenderLabel, InternedRenderSubGraph, NodeRunError, NodeState, RenderGraph, + RenderGraphContext, SlotLabel, SlotType, SlotValue, + }, + renderer::{RenderContext, RenderDevice}, +}; + +/// The [`RenderGraphRunner`] is responsible for executing a [`RenderGraph`]. +/// +/// It will run all nodes in the graph sequentially in the correct order (defined by the edges). +/// Each [`Node`](crate::render_graph::Node) can run any arbitrary code, but will generally +/// either send directly a [`CommandBuffer`] or a task that will asynchronously generate a [`CommandBuffer`] +/// +/// After running the graph, the [`RenderGraphRunner`] will execute in parallel all the tasks to get +/// an ordered list of [`CommandBuffer`]s to execute. These [`CommandBuffer`] will be submitted to the GPU +/// sequentially in the order that the tasks were submitted. (which is the order of the [`RenderGraph`]) +/// +/// [`CommandBuffer`]: wgpu::CommandBuffer +pub(crate) struct RenderGraphRunner; + +#[derive(Error, Debug)] +pub enum RenderGraphRunnerError { + #[error(transparent)] + NodeRunError(#[from] NodeRunError), + #[error("node output slot not set (index {slot_index}, name {slot_name})")] + EmptyNodeOutputSlot { + type_name: &'static str, + slot_index: usize, + slot_name: Cow<'static, str>, + }, + #[error("graph '{sub_graph:?}' could not be run because slot '{slot_name}' at index {slot_index} has no value")] + MissingInput { + slot_index: usize, + slot_name: Cow<'static, str>, + sub_graph: Option, + }, + #[error("attempted to use the wrong type for input slot")] + MismatchedInputSlotType { + slot_index: usize, + label: SlotLabel, + expected: SlotType, + actual: SlotType, + }, + #[error( + "node (name: '{node_name:?}') has {slot_count} input slots, but was provided {value_count} values" + )] + MismatchedInputCount { + node_name: InternedRenderLabel, + slot_count: usize, + value_count: usize, + }, +} + +impl RenderGraphRunner { + pub fn run( + graph: &RenderGraph, + render_device: RenderDevice, + mut diagnostics_recorder: Option, + queue: &wgpu::Queue, + world: &World, + finalizer: impl FnOnce(&mut wgpu::CommandEncoder), + ) -> Result, RenderGraphRunnerError> { + if let Some(recorder) = &mut diagnostics_recorder { + recorder.begin_frame(); + } + + let mut render_context = RenderContext::new(render_device, diagnostics_recorder); + Self::run_graph(graph, None, &mut render_context, world, &[], None)?; + finalizer(render_context.command_encoder()); + + let (render_device, mut diagnostics_recorder) = { + let (commands, render_device, diagnostics_recorder) = render_context.finish(); + + #[cfg(feature = "trace")] + let _span = info_span!("submit_graph_commands").entered(); + queue.submit(commands); + + (render_device, diagnostics_recorder) + }; + + if let Some(recorder) = &mut diagnostics_recorder { + let render_diagnostics_mutex = world.resource::().0.clone(); + recorder.finish_frame(&render_device, move |diagnostics| { + *render_diagnostics_mutex.lock().expect("lock poisoned") = Some(diagnostics); + }); + } + + Ok(diagnostics_recorder) + } + + /// Runs the [`RenderGraph`] and all its sub-graphs sequentially, making sure that all nodes are + /// run in the correct order. (a node only runs when all its dependencies have finished running) + fn run_graph<'w>( + graph: &RenderGraph, + sub_graph: Option, + render_context: &mut RenderContext<'w>, + world: &'w World, + inputs: &[SlotValue], + view_entity: Option, + ) -> Result<(), RenderGraphRunnerError> { + let mut node_outputs: HashMap> = + HashMap::default(); + #[cfg(feature = "trace")] + let span = if let Some(label) = &sub_graph { + info_span!("run_graph", name = format!("{label:?}")) + } else { + info_span!("run_graph", name = "main_graph") + }; + #[cfg(feature = "trace")] + let _guard = span.enter(); + + // Queue up nodes without inputs, which can be run immediately + let mut node_queue: VecDeque<&NodeState> = graph + .iter_nodes() + .filter(|node| node.input_slots.is_empty()) + .collect(); + + // pass inputs into the graph + if let Some(input_node) = graph.get_input_node() { + let mut input_values: SmallVec<[SlotValue; 4]> = SmallVec::new(); + for (i, input_slot) in input_node.input_slots.iter().enumerate() { + if let Some(input_value) = inputs.get(i) { + if input_slot.slot_type != input_value.slot_type() { + return Err(RenderGraphRunnerError::MismatchedInputSlotType { + slot_index: i, + actual: input_value.slot_type(), + expected: input_slot.slot_type, + label: input_slot.name.clone().into(), + }); + } + input_values.push(input_value.clone()); + } else { + return Err(RenderGraphRunnerError::MissingInput { + slot_index: i, + slot_name: input_slot.name.clone(), + sub_graph, + }); + } + } + + node_outputs.insert(input_node.label, input_values); + + for (_, node_state) in graph + .iter_node_outputs(input_node.label) + .expect("node exists") + { + node_queue.push_front(node_state); + } + } + + 'handle_node: while let Some(node_state) = node_queue.pop_back() { + // skip nodes that are already processed + if node_outputs.contains_key(&node_state.label) { + continue; + } + + let mut slot_indices_and_inputs: SmallVec<[(usize, SlotValue); 4]> = SmallVec::new(); + // check if all dependencies have finished running + for (edge, input_node) in graph + .iter_node_inputs(node_state.label) + .expect("node is in graph") + { + match edge { + Edge::SlotEdge { + output_index, + input_index, + .. + } => { + if let Some(outputs) = node_outputs.get(&input_node.label) { + slot_indices_and_inputs + .push((*input_index, outputs[*output_index].clone())); + } else { + node_queue.push_front(node_state); + continue 'handle_node; + } + } + Edge::NodeEdge { .. } => { + if !node_outputs.contains_key(&input_node.label) { + node_queue.push_front(node_state); + continue 'handle_node; + } + } + } + } + + // construct final sorted input list + slot_indices_and_inputs.sort_by_key(|(index, _)| *index); + let inputs: SmallVec<[SlotValue; 4]> = slot_indices_and_inputs + .into_iter() + .map(|(_, value)| value) + .collect(); + + if inputs.len() != node_state.input_slots.len() { + return Err(RenderGraphRunnerError::MismatchedInputCount { + node_name: node_state.label, + slot_count: node_state.input_slots.len(), + value_count: inputs.len(), + }); + } + + let mut outputs: SmallVec<[Option; 4]> = + smallvec![None; node_state.output_slots.len()]; + { + let mut context = RenderGraphContext::new(graph, node_state, &inputs, &mut outputs); + if let Some(view_entity) = view_entity { + context.set_view_entity(view_entity); + } + + { + #[cfg(feature = "trace")] + let _span = info_span!("node", name = node_state.type_name).entered(); + + node_state.node.run(&mut context, render_context, world)?; + } + + for run_sub_graph in context.finish() { + let sub_graph = graph + .get_sub_graph(run_sub_graph.sub_graph) + .expect("sub graph exists because it was validated when queued."); + Self::run_graph( + sub_graph, + Some(run_sub_graph.sub_graph), + render_context, + world, + &run_sub_graph.inputs, + run_sub_graph.view_entity, + )?; + } + } + + let mut values: SmallVec<[SlotValue; 4]> = SmallVec::new(); + for (i, output) in outputs.into_iter().enumerate() { + if let Some(value) = output { + values.push(value); + } else { + let empty_slot = node_state.output_slots.get_slot(i).unwrap(); + return Err(RenderGraphRunnerError::EmptyNodeOutputSlot { + type_name: node_state.type_name, + slot_index: i, + slot_name: empty_slot.name.clone(), + }); + } + } + node_outputs.insert(node_state.label, values); + + for (_, node_state) in graph + .iter_node_outputs(node_state.label) + .expect("node exists") + { + node_queue.push_front(node_state); + } + } + + Ok(()) + } +} diff --git a/crates/libmarathon/src/render/renderer/mod.rs b/crates/libmarathon/src/render/renderer/mod.rs new file mode 100644 index 0000000..b003c64 --- /dev/null +++ b/crates/libmarathon/src/render/renderer/mod.rs @@ -0,0 +1,662 @@ +mod graph_runner; +#[cfg(feature = "raw_vulkan_init")] +pub mod raw_vulkan_init; +mod render_device; +mod wgpu_wrapper; + +pub use graph_runner::*; +pub use render_device::*; +pub use wgpu_wrapper::WgpuWrapper; + +use crate::render::{ + diagnostic::{internal::DiagnosticsRecorder, RecordDiagnostics}, + render_graph::RenderGraph, + render_phase::TrackedRenderPass, + render_resource::RenderPassDescriptor, + settings::{RenderResources, WgpuSettings, WgpuSettingsPriority}, + view::{ExtractedWindows, ViewTarget}, +}; +use std::sync::Arc; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::{prelude::*, system::SystemState}; +use bevy_platform::time::Instant; +use bevy_time::TimeSender; +use bevy_window::RawHandleWrapperHolder; +use tracing::{debug, error, info, info_span, warn}; +use wgpu::{ + Adapter, AdapterInfo, Backends, CommandBuffer, CommandEncoder, DeviceType, Instance, Queue, + RequestAdapterOptions, Trace, +}; + +/// Updates the [`RenderGraph`] with all of its nodes and then runs it to render the entire frame. +pub fn render_system(world: &mut World, state: &mut SystemState>>) { + world.resource_scope(|world, mut graph: Mut| { + graph.update(world); + }); + + let diagnostics_recorder = world.remove_resource::(); + + let graph = world.resource::(); + let render_device = world.resource::(); + let render_queue = world.resource::(); + + let res = RenderGraphRunner::run( + graph, + render_device.clone(), // TODO: is this clone really necessary? + diagnostics_recorder, + &render_queue.0, + world, + |encoder| { + crate::render::view::screenshot::submit_screenshot_commands(world, encoder); + crate::render::gpu_readback::submit_readback_commands(world, encoder); + }, + ); + + match res { + Ok(Some(diagnostics_recorder)) => { + world.insert_resource(diagnostics_recorder); + } + Ok(None) => {} + Err(e) => { + error!("Error running render graph:"); + { + let mut src: &dyn core::error::Error = &e; + loop { + error!("> {}", src); + match src.source() { + Some(s) => src = s, + None => break, + } + } + } + + panic!("Error running render graph: {e}"); + } + } + + { + let _span = info_span!("present_frames").entered(); + + // Remove ViewTarget components to ensure swap chain TextureViews are dropped. + // If all TextureViews aren't dropped before present, acquiring the next swap chain texture will fail. + let view_entities = state.get(world).iter().collect::>(); + for view_entity in view_entities { + world.entity_mut(view_entity).remove::(); + } + + let mut windows = world.resource_mut::(); + for window in windows.values_mut() { + if let Some(surface_texture) = window.swap_chain_texture.take() { + // TODO(clean): winit docs recommends calling pre_present_notify before this. + // though `present()` doesn't present the frame, it schedules it to be presented + // by wgpu. + // https://docs.rs/winit/0.29.9/wasm32-unknown-unknown/winit/window/struct.Window.html#method.pre_present_notify + surface_texture.present(); + } + } + + #[cfg(feature = "tracing-tracy")] + tracing::event!( + tracing::Level::INFO, + message = "finished frame", + tracy.frame_mark = true + ); + } + + crate::render::view::screenshot::collect_screenshots(world); + + // update the time and send it to the app world + let time_sender = world.resource::(); + if let Err(error) = time_sender.0.try_send(Instant::now()) { + match error { + bevy_time::TrySendError::Full(_) => { + panic!("The TimeSender channel should always be empty during render. You might need to add the bevy::core::time_system to your app.",); + } + bevy_time::TrySendError::Disconnected(_) => { + // ignore disconnected errors, the main world probably just got dropped during shutdown + } + } + } +} + +/// This queue is used to enqueue tasks for the GPU to execute asynchronously. +#[derive(Resource, Clone, Deref, DerefMut)] +pub struct RenderQueue(pub Arc>); + +/// The handle to the physical device being used for rendering. +/// See [`Adapter`] for more info. +#[derive(Resource, Clone, Debug, Deref, DerefMut)] +pub struct RenderAdapter(pub Arc>); + +/// The GPU instance is used to initialize the [`RenderQueue`] and [`RenderDevice`], +/// as well as to create [`WindowSurfaces`](crate::view::window::WindowSurfaces). +#[derive(Resource, Clone, Deref, DerefMut)] +pub struct RenderInstance(pub Arc>); + +/// The [`AdapterInfo`] of the adapter in use by the renderer. +#[derive(Resource, Clone, Deref, DerefMut)] +pub struct RenderAdapterInfo(pub WgpuWrapper); + +const GPU_NOT_FOUND_ERROR_MESSAGE: &str = if cfg!(target_os = "linux") { + "Unable to find a GPU! Make sure you have installed required drivers! For extra information, see: https://github.com/bevyengine/bevy/blob/latest/docs/linux_dependencies.md" +} else { + "Unable to find a GPU! Make sure you have installed required drivers!" +}; + +#[cfg(not(target_family = "wasm"))] +fn find_adapter_by_name( + instance: &Instance, + options: &WgpuSettings, + compatible_surface: Option<&wgpu::Surface<'_>>, + adapter_name: &str, +) -> Option { + for adapter in + instance.enumerate_adapters(options.backends.expect( + "The `backends` field of `WgpuSettings` must be set to use a specific adapter.", + )) + { + tracing::trace!("Checking adapter: {:?}", adapter.get_info()); + let info = adapter.get_info(); + if let Some(surface) = compatible_surface + && !adapter.is_surface_supported(surface) + { + continue; + } + + if info.name.eq_ignore_ascii_case(adapter_name) { + return Some(adapter); + } + } + None +} + +/// Initializes the renderer by retrieving and preparing the GPU instance, device and queue +/// for the specified backend. +pub async fn initialize_renderer( + backends: Backends, + primary_window: Option, + options: &WgpuSettings, + #[cfg(feature = "raw_vulkan_init")] + raw_vulkan_init_settings: raw_vulkan_init::RawVulkanInitSettings, +) -> RenderResources { + let instance_descriptor = wgpu::InstanceDescriptor { + backends, + flags: options.instance_flags, + memory_budget_thresholds: options.instance_memory_budget_thresholds, + backend_options: wgpu::BackendOptions { + gl: wgpu::GlBackendOptions { + gles_minor_version: options.gles3_minor_version, + fence_behavior: wgpu::GlFenceBehavior::Normal, + }, + dx12: wgpu::Dx12BackendOptions { + shader_compiler: options.dx12_shader_compiler.clone(), + }, + noop: wgpu::NoopBackendOptions { enable: false }, + }, + }; + + #[cfg(not(feature = "raw_vulkan_init"))] + let instance = Instance::new(&instance_descriptor); + #[cfg(feature = "raw_vulkan_init")] + let mut additional_vulkan_features = raw_vulkan_init::AdditionalVulkanFeatures::default(); + #[cfg(feature = "raw_vulkan_init")] + let instance = raw_vulkan_init::create_raw_vulkan_instance( + &instance_descriptor, + &raw_vulkan_init_settings, + &mut additional_vulkan_features, + ); + + let surface = primary_window.and_then(|wrapper| { + let maybe_handle = wrapper + .0 + .lock() + .expect("Couldn't get the window handle in time for renderer initialization"); + if let Some(wrapper) = maybe_handle.as_ref() { + // SAFETY: Plugins should be set up on the main thread. + let handle = unsafe { wrapper.get_handle() }; + Some( + instance + .create_surface(handle) + .expect("Failed to create wgpu surface"), + ) + } else { + None + } + }); + + let force_fallback_adapter = std::env::var("WGPU_FORCE_FALLBACK_ADAPTER") + .map_or(options.force_fallback_adapter, |v| { + !(v.is_empty() || v == "0" || v == "false") + }); + + let desired_adapter_name = std::env::var("WGPU_ADAPTER_NAME") + .as_deref() + .map_or(options.adapter_name.clone(), |x| Some(x.to_lowercase())); + + let request_adapter_options = RequestAdapterOptions { + power_preference: options.power_preference, + compatible_surface: surface.as_ref(), + force_fallback_adapter, + }; + + #[cfg(not(target_family = "wasm"))] + let mut selected_adapter = desired_adapter_name.and_then(|adapter_name| { + find_adapter_by_name( + &instance, + options, + request_adapter_options.compatible_surface, + &adapter_name, + ) + }); + #[cfg(target_family = "wasm")] + let mut selected_adapter = None; + + #[cfg(target_family = "wasm")] + if desired_adapter_name.is_some() { + warn!("Choosing an adapter is not supported on wasm."); + } + + if selected_adapter.is_none() { + debug!( + "Searching for adapter with options: {:?}", + request_adapter_options + ); + selected_adapter = instance + .request_adapter(&request_adapter_options) + .await + .ok(); + } + + let adapter = selected_adapter.expect(GPU_NOT_FOUND_ERROR_MESSAGE); + let adapter_info = adapter.get_info(); + info!("{:?}", adapter_info); + + if adapter_info.device_type == DeviceType::Cpu { + warn!( + "The selected adapter is using a driver that only supports software rendering. \ + This is likely to be very slow. See https://bevy.org/learn/errors/b0006/" + ); + } + + // Maybe get features and limits based on what is supported by the adapter/backend + let mut features = wgpu::Features::empty(); + let mut limits = options.limits.clone(); + if matches!(options.priority, WgpuSettingsPriority::Functionality) { + features = adapter.features(); + if adapter_info.device_type == DeviceType::DiscreteGpu { + // `MAPPABLE_PRIMARY_BUFFERS` can have a significant, negative performance impact for + // discrete GPUs due to having to transfer data across the PCI-E bus and so it + // should not be automatically enabled in this case. It is however beneficial for + // integrated GPUs. + features.remove(wgpu::Features::MAPPABLE_PRIMARY_BUFFERS); + } + + limits = adapter.limits(); + } + + // Enforce the disabled features + if let Some(disabled_features) = options.disabled_features { + features.remove(disabled_features); + } + // NOTE: |= is used here to ensure that any explicitly-enabled features are respected. + features |= options.features; + + // Enforce the limit constraints + if let Some(constrained_limits) = options.constrained_limits.as_ref() { + // NOTE: Respect the configured limits as an 'upper bound'. This means for 'max' limits, we + // take the minimum of the calculated limits according to the adapter/backend and the + // specified max_limits. For 'min' limits, take the maximum instead. This is intended to + // err on the side of being conservative. We can't claim 'higher' limits that are supported + // but we can constrain to 'lower' limits. + limits = wgpu::Limits { + max_texture_dimension_1d: limits + .max_texture_dimension_1d + .min(constrained_limits.max_texture_dimension_1d), + max_texture_dimension_2d: limits + .max_texture_dimension_2d + .min(constrained_limits.max_texture_dimension_2d), + max_texture_dimension_3d: limits + .max_texture_dimension_3d + .min(constrained_limits.max_texture_dimension_3d), + max_texture_array_layers: limits + .max_texture_array_layers + .min(constrained_limits.max_texture_array_layers), + max_bind_groups: limits + .max_bind_groups + .min(constrained_limits.max_bind_groups), + max_dynamic_uniform_buffers_per_pipeline_layout: limits + .max_dynamic_uniform_buffers_per_pipeline_layout + .min(constrained_limits.max_dynamic_uniform_buffers_per_pipeline_layout), + max_dynamic_storage_buffers_per_pipeline_layout: limits + .max_dynamic_storage_buffers_per_pipeline_layout + .min(constrained_limits.max_dynamic_storage_buffers_per_pipeline_layout), + max_sampled_textures_per_shader_stage: limits + .max_sampled_textures_per_shader_stage + .min(constrained_limits.max_sampled_textures_per_shader_stage), + max_samplers_per_shader_stage: limits + .max_samplers_per_shader_stage + .min(constrained_limits.max_samplers_per_shader_stage), + max_storage_buffers_per_shader_stage: limits + .max_storage_buffers_per_shader_stage + .min(constrained_limits.max_storage_buffers_per_shader_stage), + max_storage_textures_per_shader_stage: limits + .max_storage_textures_per_shader_stage + .min(constrained_limits.max_storage_textures_per_shader_stage), + max_uniform_buffers_per_shader_stage: limits + .max_uniform_buffers_per_shader_stage + .min(constrained_limits.max_uniform_buffers_per_shader_stage), + max_binding_array_elements_per_shader_stage: limits + .max_binding_array_elements_per_shader_stage + .min(constrained_limits.max_binding_array_elements_per_shader_stage), + max_binding_array_sampler_elements_per_shader_stage: limits + .max_binding_array_sampler_elements_per_shader_stage + .min(constrained_limits.max_binding_array_sampler_elements_per_shader_stage), + max_uniform_buffer_binding_size: limits + .max_uniform_buffer_binding_size + .min(constrained_limits.max_uniform_buffer_binding_size), + max_storage_buffer_binding_size: limits + .max_storage_buffer_binding_size + .min(constrained_limits.max_storage_buffer_binding_size), + max_vertex_buffers: limits + .max_vertex_buffers + .min(constrained_limits.max_vertex_buffers), + max_vertex_attributes: limits + .max_vertex_attributes + .min(constrained_limits.max_vertex_attributes), + max_vertex_buffer_array_stride: limits + .max_vertex_buffer_array_stride + .min(constrained_limits.max_vertex_buffer_array_stride), + max_push_constant_size: limits + .max_push_constant_size + .min(constrained_limits.max_push_constant_size), + min_uniform_buffer_offset_alignment: limits + .min_uniform_buffer_offset_alignment + .max(constrained_limits.min_uniform_buffer_offset_alignment), + min_storage_buffer_offset_alignment: limits + .min_storage_buffer_offset_alignment + .max(constrained_limits.min_storage_buffer_offset_alignment), + max_inter_stage_shader_components: limits + .max_inter_stage_shader_components + .min(constrained_limits.max_inter_stage_shader_components), + max_compute_workgroup_storage_size: limits + .max_compute_workgroup_storage_size + .min(constrained_limits.max_compute_workgroup_storage_size), + max_compute_invocations_per_workgroup: limits + .max_compute_invocations_per_workgroup + .min(constrained_limits.max_compute_invocations_per_workgroup), + max_compute_workgroup_size_x: limits + .max_compute_workgroup_size_x + .min(constrained_limits.max_compute_workgroup_size_x), + max_compute_workgroup_size_y: limits + .max_compute_workgroup_size_y + .min(constrained_limits.max_compute_workgroup_size_y), + max_compute_workgroup_size_z: limits + .max_compute_workgroup_size_z + .min(constrained_limits.max_compute_workgroup_size_z), + max_compute_workgroups_per_dimension: limits + .max_compute_workgroups_per_dimension + .min(constrained_limits.max_compute_workgroups_per_dimension), + max_buffer_size: limits + .max_buffer_size + .min(constrained_limits.max_buffer_size), + max_bindings_per_bind_group: limits + .max_bindings_per_bind_group + .min(constrained_limits.max_bindings_per_bind_group), + max_non_sampler_bindings: limits + .max_non_sampler_bindings + .min(constrained_limits.max_non_sampler_bindings), + max_blas_primitive_count: limits + .max_blas_primitive_count + .min(constrained_limits.max_blas_primitive_count), + max_blas_geometry_count: limits + .max_blas_geometry_count + .min(constrained_limits.max_blas_geometry_count), + max_tlas_instance_count: limits + .max_tlas_instance_count + .min(constrained_limits.max_tlas_instance_count), + max_color_attachments: limits + .max_color_attachments + .min(constrained_limits.max_color_attachments), + max_color_attachment_bytes_per_sample: limits + .max_color_attachment_bytes_per_sample + .min(constrained_limits.max_color_attachment_bytes_per_sample), + min_subgroup_size: limits + .min_subgroup_size + .max(constrained_limits.min_subgroup_size), + max_subgroup_size: limits + .max_subgroup_size + .min(constrained_limits.max_subgroup_size), + max_acceleration_structures_per_shader_stage: 0, + }; + } + + let device_descriptor = wgpu::DeviceDescriptor { + label: options.device_label.as_ref().map(AsRef::as_ref), + required_features: features, + required_limits: limits, + memory_hints: options.memory_hints.clone(), + // See https://github.com/gfx-rs/wgpu/issues/5974 + trace: Trace::Off, + }; + + #[cfg(not(feature = "raw_vulkan_init"))] + let (device, queue) = adapter.request_device(&device_descriptor).await.unwrap(); + + #[cfg(feature = "raw_vulkan_init")] + let (device, queue) = raw_vulkan_init::create_raw_device( + &adapter, + &device_descriptor, + &raw_vulkan_init_settings, + &mut additional_vulkan_features, + ) + .await + .unwrap(); + + debug!("Configured wgpu adapter Limits: {:#?}", device.limits()); + debug!("Configured wgpu adapter Features: {:#?}", device.features()); + + RenderResources( + RenderDevice::from(device), + RenderQueue(Arc::new(WgpuWrapper::new(queue))), + RenderAdapterInfo(WgpuWrapper::new(adapter_info)), + RenderAdapter(Arc::new(WgpuWrapper::new(adapter))), + RenderInstance(Arc::new(WgpuWrapper::new(instance))), + #[cfg(feature = "raw_vulkan_init")] + additional_vulkan_features, + ) +} + +/// The context with all information required to interact with the GPU. +/// +/// The [`RenderDevice`] is used to create render resources and the +/// the [`CommandEncoder`] is used to record a series of GPU operations. +pub struct RenderContext<'w> { + render_device: RenderDevice, + command_encoder: Option, + command_buffer_queue: Vec>, + diagnostics_recorder: Option>, +} + +impl<'w> RenderContext<'w> { + /// Creates a new [`RenderContext`] from a [`RenderDevice`]. + pub fn new( + render_device: RenderDevice, + diagnostics_recorder: Option, + ) -> Self { + Self { + render_device, + command_encoder: None, + command_buffer_queue: Vec::new(), + diagnostics_recorder: diagnostics_recorder.map(Arc::new), + } + } + + /// Gets the underlying [`RenderDevice`]. + pub fn render_device(&self) -> &RenderDevice { + &self.render_device + } + + /// Gets the diagnostics recorder, used to track elapsed time and pipeline statistics + /// of various render and compute passes. + pub fn diagnostic_recorder(&self) -> impl RecordDiagnostics + use<> { + self.diagnostics_recorder.clone() + } + + /// Gets the current [`CommandEncoder`]. + pub fn command_encoder(&mut self) -> &mut CommandEncoder { + self.command_encoder.get_or_insert_with(|| { + self.render_device + .create_command_encoder(&wgpu::CommandEncoderDescriptor::default()) + }) + } + + pub(crate) fn has_commands(&mut self) -> bool { + self.command_encoder.is_some() || !self.command_buffer_queue.is_empty() + } + + /// Creates a new [`TrackedRenderPass`] for the context, + /// configured using the provided `descriptor`. + pub fn begin_tracked_render_pass<'a>( + &'a mut self, + descriptor: RenderPassDescriptor<'_>, + ) -> TrackedRenderPass<'a> { + // Cannot use command_encoder() as we need to split the borrow on self + let command_encoder = self.command_encoder.get_or_insert_with(|| { + self.render_device + .create_command_encoder(&wgpu::CommandEncoderDescriptor::default()) + }); + + let render_pass = command_encoder.begin_render_pass(&descriptor); + TrackedRenderPass::new(&self.render_device, render_pass) + } + + /// Append a [`CommandBuffer`] to the command buffer queue. + /// + /// If present, this will flush the currently unflushed [`CommandEncoder`] + /// into a [`CommandBuffer`] into the queue before appending the provided + /// buffer. + pub fn add_command_buffer(&mut self, command_buffer: CommandBuffer) { + self.flush_encoder(); + + self.command_buffer_queue + .push(QueuedCommandBuffer::Ready(command_buffer)); + } + + /// Append a function that will generate a [`CommandBuffer`] to the + /// command buffer queue, to be ran later. + /// + /// If present, this will flush the currently unflushed [`CommandEncoder`] + /// into a [`CommandBuffer`] into the queue before appending the provided + /// buffer. + pub fn add_command_buffer_generation_task( + &mut self, + #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))] + task: impl FnOnce(RenderDevice) -> CommandBuffer + 'w + Send, + #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))] + task: impl FnOnce(RenderDevice) -> CommandBuffer + 'w, + ) { + self.flush_encoder(); + + self.command_buffer_queue + .push(QueuedCommandBuffer::Task(Box::new(task))); + } + + /// Finalizes and returns the queue of [`CommandBuffer`]s. + /// + /// This function will wait until all command buffer generation tasks are complete + /// by running them in parallel (where supported). + /// + /// The [`CommandBuffer`]s will be returned in the order that they were added. + pub fn finish( + mut self, + ) -> ( + Vec, + RenderDevice, + Option, + ) { + self.flush_encoder(); + + let mut command_buffers = Vec::with_capacity(self.command_buffer_queue.len()); + + #[cfg(feature = "trace")] + let _command_buffer_generation_tasks_span = + info_span!("command_buffer_generation_tasks").entered(); + + #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))] + { + let mut task_based_command_buffers = + bevy_tasks::ComputeTaskPool::get().scope(|task_pool| { + for (i, queued_command_buffer) in + self.command_buffer_queue.into_iter().enumerate() + { + match queued_command_buffer { + QueuedCommandBuffer::Ready(command_buffer) => { + command_buffers.push((i, command_buffer)); + } + QueuedCommandBuffer::Task(command_buffer_generation_task) => { + let render_device = self.render_device.clone(); + task_pool.spawn(async move { + (i, command_buffer_generation_task(render_device)) + }); + } + } + } + }); + command_buffers.append(&mut task_based_command_buffers); + } + + #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))] + for (i, queued_command_buffer) in self.command_buffer_queue.into_iter().enumerate() { + match queued_command_buffer { + QueuedCommandBuffer::Ready(command_buffer) => { + command_buffers.push((i, command_buffer)); + } + QueuedCommandBuffer::Task(command_buffer_generation_task) => { + let render_device = self.render_device.clone(); + command_buffers.push((i, command_buffer_generation_task(render_device))); + } + } + } + + #[cfg(feature = "trace")] + drop(_command_buffer_generation_tasks_span); + + command_buffers.sort_unstable_by_key(|(i, _)| *i); + + let mut command_buffers = command_buffers + .into_iter() + .map(|(_, cb)| cb) + .collect::>(); + + let mut diagnostics_recorder = self.diagnostics_recorder.take().map(|v| { + Arc::try_unwrap(v) + .ok() + .expect("diagnostic recorder shouldn't be held longer than necessary") + }); + + if let Some(recorder) = &mut diagnostics_recorder { + let mut command_encoder = self + .render_device + .create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); + recorder.resolve(&mut command_encoder); + command_buffers.push(command_encoder.finish()); + } + + (command_buffers, self.render_device, diagnostics_recorder) + } + + fn flush_encoder(&mut self) { + if let Some(encoder) = self.command_encoder.take() { + self.command_buffer_queue + .push(QueuedCommandBuffer::Ready(encoder.finish())); + } + } +} + +enum QueuedCommandBuffer<'w> { + Ready(CommandBuffer), + #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))] + Task(Box CommandBuffer + 'w + Send>), + #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))] + Task(Box CommandBuffer + 'w>), +} diff --git a/crates/libmarathon/src/render/renderer/raw_vulkan_init.rs b/crates/libmarathon/src/render/renderer/raw_vulkan_init.rs new file mode 100644 index 0000000..660caff --- /dev/null +++ b/crates/libmarathon/src/render/renderer/raw_vulkan_init.rs @@ -0,0 +1,148 @@ +use std::sync::Arc; +use bevy_ecs::resource::Resource; +use bevy_platform::collections::HashSet; +use core::any::{Any, TypeId}; +use thiserror::Error; +use wgpu::{ + hal::api::Vulkan, Adapter, Device, DeviceDescriptor, Instance, InstanceDescriptor, Queue, +}; + +/// When the `raw_vulkan_init` feature is enabled, these settings will be used to configure the raw vulkan instance. +#[derive(Resource, Default, Clone)] +pub struct RawVulkanInitSettings { + // SAFETY: this must remain private to ensure that registering callbacks is unsafe + create_instance_callbacks: Vec< + Arc< + dyn Fn( + &mut wgpu::hal::vulkan::CreateInstanceCallbackArgs, + &mut AdditionalVulkanFeatures, + ) + Send + + Sync, + >, + >, + // SAFETY: this must remain private to ensure that registering callbacks is unsafe + create_device_callbacks: Vec< + Arc< + dyn Fn( + &mut wgpu::hal::vulkan::CreateDeviceCallbackArgs, + &wgpu::hal::vulkan::Adapter, + &mut AdditionalVulkanFeatures, + ) + Send + + Sync, + >, + >, +} + +impl RawVulkanInitSettings { + /// Adds a new Vulkan create instance callback. See [`wgpu::hal::vulkan::Instance::init_with_callback`] for details. + /// + /// # Safety + /// - Callback must not remove features. + /// - Callback must not change anything to what the instance does not support. + pub unsafe fn add_create_instance_callback( + &mut self, + callback: impl Fn(&mut wgpu::hal::vulkan::CreateInstanceCallbackArgs, &mut AdditionalVulkanFeatures) + + Send + + Sync + + 'static, + ) { + self.create_instance_callbacks.push(Arc::new(callback)); + } + + /// Adds a new Vulkan create device callback. See [`wgpu::hal::vulkan::Adapter::open_with_callback`] for details. + /// + /// # Safety + /// - Callback must not remove features. + /// - Callback must not change anything to what the device does not support. + pub unsafe fn add_create_device_callback( + &mut self, + callback: impl Fn( + &mut wgpu::hal::vulkan::CreateDeviceCallbackArgs, + &wgpu::hal::vulkan::Adapter, + &mut AdditionalVulkanFeatures, + ) + Send + + Sync + + 'static, + ) { + self.create_device_callbacks.push(Arc::new(callback)); + } +} + +pub(crate) fn create_raw_vulkan_instance( + instance_descriptor: &InstanceDescriptor, + settings: &RawVulkanInitSettings, + additional_features: &mut AdditionalVulkanFeatures, +) -> Instance { + // SAFETY: Registering callbacks is unsafe. Callback authors promise not to remove features + // or change the instance to something it does not support + unsafe { + wgpu::hal::vulkan::Instance::init_with_callback( + &wgpu::hal::InstanceDescriptor { + name: "wgpu", + flags: instance_descriptor.flags, + memory_budget_thresholds: instance_descriptor.memory_budget_thresholds, + backend_options: instance_descriptor.backend_options.clone(), + }, + Some(Box::new(|mut args| { + for callback in &settings.create_instance_callbacks { + (callback)(&mut args, additional_features); + } + })), + ) + .map(|raw_instance| Instance::from_hal::(raw_instance)) + .unwrap_or_else(|_| Instance::new(instance_descriptor)) + } +} + +pub(crate) async fn create_raw_device( + adapter: &Adapter, + device_descriptor: &DeviceDescriptor<'_>, + settings: &RawVulkanInitSettings, + additional_features: &mut AdditionalVulkanFeatures, +) -> Result<(Device, Queue), CreateRawVulkanDeviceError> { + // SAFETY: Registering callbacks is unsafe. Callback authors promise not to remove features + // or change the adapter to something it does not support + unsafe { + let Some(raw_adapter) = adapter.as_hal::() else { + return Ok(adapter.request_device(device_descriptor).await?); + }; + let open_device = raw_adapter.open_with_callback( + device_descriptor.required_features, + &device_descriptor.memory_hints, + Some(Box::new(|mut args| { + for callback in &settings.create_device_callbacks { + (callback)(&mut args, &raw_adapter, additional_features); + } + })), + )?; + + Ok(adapter.create_device_from_hal::(open_device, device_descriptor)?) + } +} + +#[derive(Error, Debug)] +pub(crate) enum CreateRawVulkanDeviceError { + #[error(transparent)] + RequestDeviceError(#[from] wgpu::RequestDeviceError), + #[error(transparent)] + DeviceError(#[from] wgpu::hal::DeviceError), +} + +/// A list of additional Vulkan features that are supported by the current wgpu instance / adapter. This is populated +/// by callbacks defined in [`RawVulkanInitSettings`] +#[derive(Resource, Default, Clone)] +pub struct AdditionalVulkanFeatures(HashSet); + +impl AdditionalVulkanFeatures { + pub fn insert(&mut self) { + self.0.insert(TypeId::of::()); + } + + pub fn has(&self) -> bool { + self.0.contains(&TypeId::of::()) + } + + pub fn remove(&mut self) { + self.0.remove(&TypeId::of::()); + } +} diff --git a/crates/libmarathon/src/render/renderer/render_device.rs b/crates/libmarathon/src/render/renderer/render_device.rs new file mode 100644 index 0000000..c56755d --- /dev/null +++ b/crates/libmarathon/src/render/renderer/render_device.rs @@ -0,0 +1,311 @@ +use super::RenderQueue; +use crate::render::render_resource::{ + BindGroup, BindGroupLayout, Buffer, ComputePipeline, RawRenderPipelineDescriptor, + RenderPipeline, Sampler, Texture, +}; +use crate::render::renderer::WgpuWrapper; +use bevy_ecs::resource::Resource; +use wgpu::{ + util::DeviceExt, BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor, + BindGroupLayoutEntry, BufferAsyncError, BufferBindingType, PollError, PollStatus, +}; + +/// This GPU device is responsible for the creation of most rendering and compute resources. +#[derive(Resource, Clone)] +pub struct RenderDevice { + device: WgpuWrapper, +} + +impl From for RenderDevice { + fn from(device: wgpu::Device) -> Self { + Self::new(WgpuWrapper::new(device)) + } +} + +impl RenderDevice { + pub fn new(device: WgpuWrapper) -> Self { + Self { device } + } + + /// List all [`Features`](wgpu::Features) that may be used with this device. + /// + /// Functions may panic if you use unsupported features. + #[inline] + pub fn features(&self) -> wgpu::Features { + self.device.features() + } + + /// List all [`Limits`](wgpu::Limits) that were requested of this device. + /// + /// If any of these limits are exceeded, functions may panic. + #[inline] + pub fn limits(&self) -> wgpu::Limits { + self.device.limits() + } + + /// Creates a [`ShaderModule`](wgpu::ShaderModule) from either SPIR-V or WGSL source code. + /// + /// # Safety + /// + /// Creates a shader module with user-customizable runtime checks which allows shaders to + /// perform operations which can lead to undefined behavior like indexing out of bounds, + /// To avoid UB, ensure any unchecked shaders are sound! + /// This method should never be called for user-supplied shaders. + #[inline] + pub unsafe fn create_shader_module( + &self, + desc: wgpu::ShaderModuleDescriptor, + ) -> wgpu::ShaderModule { + #[cfg(feature = "spirv_shader_passthrough")] + match &desc.source { + wgpu::ShaderSource::SpirV(source) + if self + .features() + .contains(wgpu::Features::SPIRV_SHADER_PASSTHROUGH) => + { + // SAFETY: + // This call passes binary data to the backend as-is and can potentially result in a driver crash or bogus behavior. + // No attempt is made to ensure that data is valid SPIR-V. + unsafe { + self.device.create_shader_module_passthrough( + wgpu::ShaderModuleDescriptorPassthrough::SpirV( + wgpu::ShaderModuleDescriptorSpirV { + label: desc.label, + source: source.clone(), + }, + ), + ) + } + } + // SAFETY: + // + // This call passes binary data to the backend as-is and can potentially result in a driver crash or bogus behavior. + // No attempt is made to ensure that data is valid SPIR-V. + _ => unsafe { + self.device + .create_shader_module_trusted(desc, wgpu::ShaderRuntimeChecks::unchecked()) + }, + } + #[cfg(not(feature = "spirv_shader_passthrough"))] + // SAFETY: the caller is responsible for upholding the safety requirements + unsafe { + self.device + .create_shader_module_trusted(desc, wgpu::ShaderRuntimeChecks::unchecked()) + } + } + + /// Creates and validates a [`ShaderModule`](wgpu::ShaderModule) from either SPIR-V or WGSL source code. + /// + /// See [`ValidateShader`](bevy_shader::ValidateShader) for more information on the tradeoffs involved with shader validation. + #[inline] + pub fn create_and_validate_shader_module( + &self, + desc: wgpu::ShaderModuleDescriptor, + ) -> wgpu::ShaderModule { + #[cfg(feature = "spirv_shader_passthrough")] + match &desc.source { + wgpu::ShaderSource::SpirV(_source) => panic!("no safety checks are performed for spirv shaders. use `create_shader_module` instead"), + _ => self.device.create_shader_module(desc), + } + #[cfg(not(feature = "spirv_shader_passthrough"))] + self.device.create_shader_module(desc) + } + + /// Check for resource cleanups and mapping callbacks. + /// + /// Return `true` if the queue is empty, or `false` if there are more queue + /// submissions still in flight. (Note that, unless access to the [`wgpu::Queue`] is + /// coordinated somehow, this information could be out of date by the time + /// the caller receives it. `Queue`s can be shared between threads, so + /// other threads could submit new work at any time.) + /// + /// no-op on the web, device is automatically polled. + #[inline] + pub fn poll(&self, maintain: wgpu::PollType) -> Result { + self.device.poll(maintain) + } + + /// Creates an empty [`CommandEncoder`](wgpu::CommandEncoder). + #[inline] + pub fn create_command_encoder( + &self, + desc: &wgpu::CommandEncoderDescriptor, + ) -> wgpu::CommandEncoder { + self.device.create_command_encoder(desc) + } + + /// Creates an empty [`RenderBundleEncoder`](wgpu::RenderBundleEncoder). + #[inline] + pub fn create_render_bundle_encoder( + &self, + desc: &wgpu::RenderBundleEncoderDescriptor, + ) -> wgpu::RenderBundleEncoder<'_> { + self.device.create_render_bundle_encoder(desc) + } + + /// Creates a new [`BindGroup`](wgpu::BindGroup). + #[inline] + pub fn create_bind_group<'a>( + &self, + label: impl Into>, + layout: &'a BindGroupLayout, + entries: &'a [BindGroupEntry<'a>], + ) -> BindGroup { + let wgpu_bind_group = self.device.create_bind_group(&BindGroupDescriptor { + label: label.into(), + layout, + entries, + }); + BindGroup::from(wgpu_bind_group) + } + + /// Creates a [`BindGroupLayout`](wgpu::BindGroupLayout). + #[inline] + pub fn create_bind_group_layout<'a>( + &self, + label: impl Into>, + entries: &'a [BindGroupLayoutEntry], + ) -> BindGroupLayout { + BindGroupLayout::from( + self.device + .create_bind_group_layout(&BindGroupLayoutDescriptor { + label: label.into(), + entries, + }), + ) + } + + /// Creates a [`PipelineLayout`](wgpu::PipelineLayout). + #[inline] + pub fn create_pipeline_layout( + &self, + desc: &wgpu::PipelineLayoutDescriptor, + ) -> wgpu::PipelineLayout { + self.device.create_pipeline_layout(desc) + } + + /// Creates a [`RenderPipeline`]. + #[inline] + pub fn create_render_pipeline(&self, desc: &RawRenderPipelineDescriptor) -> RenderPipeline { + let wgpu_render_pipeline = self.device.create_render_pipeline(desc); + RenderPipeline::from(wgpu_render_pipeline) + } + + /// Creates a [`ComputePipeline`]. + #[inline] + pub fn create_compute_pipeline( + &self, + desc: &wgpu::ComputePipelineDescriptor, + ) -> ComputePipeline { + let wgpu_compute_pipeline = self.device.create_compute_pipeline(desc); + ComputePipeline::from(wgpu_compute_pipeline) + } + + /// Creates a [`Buffer`]. + pub fn create_buffer(&self, desc: &wgpu::BufferDescriptor) -> Buffer { + let wgpu_buffer = self.device.create_buffer(desc); + Buffer::from(wgpu_buffer) + } + + /// Creates a [`Buffer`] and initializes it with the specified data. + pub fn create_buffer_with_data(&self, desc: &wgpu::util::BufferInitDescriptor) -> Buffer { + let wgpu_buffer = self.device.create_buffer_init(desc); + Buffer::from(wgpu_buffer) + } + + /// Creates a new [`Texture`] and initializes it with the specified data. + /// + /// `desc` specifies the general format of the texture. + /// `data` is the raw data. + pub fn create_texture_with_data( + &self, + render_queue: &RenderQueue, + desc: &wgpu::TextureDescriptor, + order: wgpu::util::TextureDataOrder, + data: &[u8], + ) -> Texture { + let wgpu_texture = + self.device + .create_texture_with_data(render_queue.as_ref(), desc, order, data); + Texture::from(wgpu_texture) + } + + /// Creates a new [`Texture`]. + /// + /// `desc` specifies the general format of the texture. + pub fn create_texture(&self, desc: &wgpu::TextureDescriptor) -> Texture { + let wgpu_texture = self.device.create_texture(desc); + Texture::from(wgpu_texture) + } + + /// Creates a new [`Sampler`]. + /// + /// `desc` specifies the behavior of the sampler. + pub fn create_sampler(&self, desc: &wgpu::SamplerDescriptor) -> Sampler { + let wgpu_sampler = self.device.create_sampler(desc); + Sampler::from(wgpu_sampler) + } + + /// Initializes [`Surface`](wgpu::Surface) for presentation. + /// + /// # Panics + /// + /// - A old [`SurfaceTexture`](wgpu::SurfaceTexture) is still alive referencing an old surface. + /// - Texture format requested is unsupported on the surface. + pub fn configure_surface(&self, surface: &wgpu::Surface, config: &wgpu::SurfaceConfiguration) { + surface.configure(&self.device, config); + } + + /// Returns the wgpu [`Device`](wgpu::Device). + pub fn wgpu_device(&self) -> &wgpu::Device { + &self.device + } + + pub fn map_buffer( + &self, + buffer: &wgpu::BufferSlice, + map_mode: wgpu::MapMode, + callback: impl FnOnce(Result<(), BufferAsyncError>) + Send + 'static, + ) { + buffer.map_async(map_mode, callback); + } + + // Rounds up `row_bytes` to be a multiple of [`wgpu::COPY_BYTES_PER_ROW_ALIGNMENT`]. + pub const fn align_copy_bytes_per_row(row_bytes: usize) -> usize { + let align = wgpu::COPY_BYTES_PER_ROW_ALIGNMENT as usize; + + // If row_bytes is aligned calculate a value just under the next aligned value. + // Otherwise calculate a value greater than the next aligned value. + let over_aligned = row_bytes + align - 1; + + // Round the number *down* to the nearest aligned value. + (over_aligned / align) * align + } + + pub fn get_supported_read_only_binding_type( + &self, + buffers_per_shader_stage: u32, + ) -> BufferBindingType { + if self.limits().max_storage_buffers_per_shader_stage >= buffers_per_shader_stage { + BufferBindingType::Storage { read_only: true } + } else { + BufferBindingType::Uniform + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn align_copy_bytes_per_row() { + // Test for https://github.com/bevyengine/bevy/issues/16992 + let align = wgpu::COPY_BYTES_PER_ROW_ALIGNMENT as usize; + + assert_eq!(RenderDevice::align_copy_bytes_per_row(0), 0); + assert_eq!(RenderDevice::align_copy_bytes_per_row(1), align); + assert_eq!(RenderDevice::align_copy_bytes_per_row(align + 1), align * 2); + assert_eq!(RenderDevice::align_copy_bytes_per_row(align), align); + } +} diff --git a/crates/libmarathon/src/render/renderer/wgpu_wrapper.rs b/crates/libmarathon/src/render/renderer/wgpu_wrapper.rs new file mode 100644 index 0000000..272d0dd --- /dev/null +++ b/crates/libmarathon/src/render/renderer/wgpu_wrapper.rs @@ -0,0 +1,50 @@ +/// A wrapper to safely make `wgpu` types Send / Sync on web with atomics enabled. +/// +/// On web with `atomics` enabled the inner value can only be accessed +/// or dropped on the `wgpu` thread or else a panic will occur. +/// On other platforms the wrapper simply contains the wrapped value. +#[derive(Debug, Clone)] +pub struct WgpuWrapper( + #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))] T, + #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))] send_wrapper::SendWrapper, +); + +// SAFETY: SendWrapper is always Send + Sync. +#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))] +#[expect(unsafe_code, reason = "Blanket-impl Send requires unsafe.")] +unsafe impl Send for WgpuWrapper {} +#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))] +#[expect(unsafe_code, reason = "Blanket-impl Sync requires unsafe.")] +unsafe impl Sync for WgpuWrapper {} + +impl WgpuWrapper { + /// Constructs a new instance of `WgpuWrapper` which will wrap the specified value. + pub fn new(t: T) -> Self { + #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))] + return Self(t); + #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))] + return Self(send_wrapper::SendWrapper::new(t)); + } + + /// Unwraps the value. + pub fn into_inner(self) -> T { + #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))] + return self.0; + #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))] + return self.0.take(); + } +} + +impl core::ops::Deref for WgpuWrapper { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl core::ops::DerefMut for WgpuWrapper { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} diff --git a/crates/libmarathon/src/render/settings.rs b/crates/libmarathon/src/render/settings.rs new file mode 100644 index 0000000..266c1da --- /dev/null +++ b/crates/libmarathon/src/render/settings.rs @@ -0,0 +1,226 @@ +use crate::render::renderer::{ + RenderAdapter, RenderAdapterInfo, RenderDevice, RenderInstance, RenderQueue, +}; +use std::borrow::Cow; + +pub use wgpu::{ + Backends, Dx12Compiler, Features as WgpuFeatures, Gles3MinorVersion, InstanceFlags, + Limits as WgpuLimits, MemoryHints, PowerPreference, +}; +use wgpu::{DxcShaderModel, MemoryBudgetThresholds}; + +/// Configures the priority used when automatically configuring the features/limits of `wgpu`. +#[derive(Clone)] +pub enum WgpuSettingsPriority { + /// WebGPU default features and limits + Compatibility, + /// The maximum supported features and limits of the adapter and backend + Functionality, + /// WebGPU default limits plus additional constraints in order to be compatible with WebGL2 + WebGL2, +} + +/// Provides configuration for renderer initialization. Use [`RenderDevice::features`](RenderDevice::features), +/// [`RenderDevice::limits`](RenderDevice::limits), and the [`RenderAdapterInfo`] +/// resource to get runtime information about the actual adapter, backend, features, and limits. +/// NOTE: [`Backends::DX12`](Backends::DX12), [`Backends::METAL`](Backends::METAL), and +/// [`Backends::VULKAN`](Backends::VULKAN) are enabled by default for non-web and the best choice +/// is automatically selected. Web using the `webgl` feature uses [`Backends::GL`](Backends::GL). +/// NOTE: If you want to use [`Backends::GL`](Backends::GL) in a native app on `Windows` and/or `macOS`, you must +/// use [`ANGLE`](https://github.com/gfx-rs/wgpu#angle) and enable the `gles` feature. This is +/// because wgpu requires EGL to create a GL context without a window and only ANGLE supports that. +#[derive(Clone)] +pub struct WgpuSettings { + pub device_label: Option>, + pub backends: Option, + pub power_preference: PowerPreference, + pub priority: WgpuSettingsPriority, + /// The features to ensure are enabled regardless of what the adapter/backend supports. + /// Setting these explicitly may cause renderer initialization to fail. + pub features: WgpuFeatures, + /// The features to ensure are disabled regardless of what the adapter/backend supports + pub disabled_features: Option, + /// The imposed limits. + pub limits: WgpuLimits, + /// The constraints on limits allowed regardless of what the adapter/backend supports + pub constrained_limits: Option, + /// The shader compiler to use for the DX12 backend. + pub dx12_shader_compiler: Dx12Compiler, + /// Allows you to choose which minor version of GLES3 to use (3.0, 3.1, 3.2, or automatic) + /// This only applies when using ANGLE and the GL backend. + pub gles3_minor_version: Gles3MinorVersion, + /// These are for controlling WGPU's debug information to eg. enable validation and shader debug info in release builds. + pub instance_flags: InstanceFlags, + /// This hints to the WGPU device about the preferred memory allocation strategy. + pub memory_hints: MemoryHints, + /// The thresholds for device memory budget. + pub instance_memory_budget_thresholds: MemoryBudgetThresholds, + /// If true, will force wgpu to use a software renderer, if available. + pub force_fallback_adapter: bool, + /// The name of the adapter to use. + pub adapter_name: Option, +} + +impl Default for WgpuSettings { + fn default() -> Self { + let default_backends = if cfg!(all( + feature = "webgl", + target_arch = "wasm32", + not(feature = "webgpu") + )) { + Backends::GL + } else if cfg!(all(feature = "webgpu", target_arch = "wasm32")) { + Backends::BROWSER_WEBGPU + } else { + Backends::all() + }; + + let backends = Some(Backends::from_env().unwrap_or(default_backends)); + + let power_preference = + PowerPreference::from_env().unwrap_or(PowerPreference::HighPerformance); + + let priority = settings_priority_from_env().unwrap_or(WgpuSettingsPriority::Functionality); + + let limits = if cfg!(all( + feature = "webgl", + target_arch = "wasm32", + not(feature = "webgpu") + )) || matches!(priority, WgpuSettingsPriority::WebGL2) + { + wgpu::Limits::downlevel_webgl2_defaults() + } else { + #[expect(clippy::allow_attributes, reason = "`unused_mut` is not always linted")] + #[allow( + unused_mut, + reason = "This variable needs to be mutable if the `ci_limits` feature is enabled" + )] + let mut limits = wgpu::Limits::default(); + #[cfg(feature = "ci_limits")] + { + limits.max_storage_textures_per_shader_stage = 4; + limits.max_texture_dimension_3d = 1024; + } + limits + }; + + let dx12_shader_compiler = + Dx12Compiler::from_env().unwrap_or(if cfg!(feature = "statically-linked-dxc") { + Dx12Compiler::StaticDxc + } else { + let dxc = "dxcompiler.dll"; + + if cfg!(target_os = "windows") && std::fs::metadata(dxc).is_ok() { + Dx12Compiler::DynamicDxc { + dxc_path: String::from(dxc), + max_shader_model: DxcShaderModel::V6_7, + } + } else { + Dx12Compiler::Fxc + } + }); + + let gles3_minor_version = Gles3MinorVersion::from_env().unwrap_or_default(); + + let instance_flags = InstanceFlags::default().with_env(); + + Self { + device_label: Default::default(), + backends, + power_preference, + priority, + features: wgpu::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES, + disabled_features: None, + limits, + constrained_limits: None, + dx12_shader_compiler, + gles3_minor_version, + instance_flags, + memory_hints: MemoryHints::default(), + instance_memory_budget_thresholds: MemoryBudgetThresholds::default(), + force_fallback_adapter: false, + adapter_name: None, + } + } +} + +#[derive(Clone)] +pub struct RenderResources( + pub RenderDevice, + pub RenderQueue, + pub RenderAdapterInfo, + pub RenderAdapter, + pub RenderInstance, + #[cfg(feature = "raw_vulkan_init")] + pub crate::renderer::raw_vulkan_init::AdditionalVulkanFeatures, +); + +/// An enum describing how the renderer will initialize resources. This is used when creating the [`RenderPlugin`](crate::RenderPlugin). +#[expect( + clippy::large_enum_variant, + reason = "See https://github.com/bevyengine/bevy/issues/19220" +)] +pub enum RenderCreation { + /// Allows renderer resource initialization to happen outside of the rendering plugin. + Manual(RenderResources), + /// Lets the rendering plugin create resources itself. + Automatic(WgpuSettings), +} + +impl RenderCreation { + /// Function to create a [`RenderCreation::Manual`] variant. + pub fn manual( + device: RenderDevice, + queue: RenderQueue, + adapter_info: RenderAdapterInfo, + adapter: RenderAdapter, + instance: RenderInstance, + #[cfg(feature = "raw_vulkan_init")] + additional_vulkan_features: crate::renderer::raw_vulkan_init::AdditionalVulkanFeatures, + ) -> Self { + RenderResources( + device, + queue, + adapter_info, + adapter, + instance, + #[cfg(feature = "raw_vulkan_init")] + additional_vulkan_features, + ) + .into() + } +} + +impl From for RenderCreation { + fn from(value: RenderResources) -> Self { + Self::Manual(value) + } +} + +impl Default for RenderCreation { + fn default() -> Self { + Self::Automatic(Default::default()) + } +} + +impl From for RenderCreation { + fn from(value: WgpuSettings) -> Self { + Self::Automatic(value) + } +} + +/// Get a features/limits priority from the environment variable `WGPU_SETTINGS_PRIO` +pub fn settings_priority_from_env() -> Option { + Some( + match std::env::var("WGPU_SETTINGS_PRIO") + .as_deref() + .map(str::to_lowercase) + .as_deref() + { + Ok("compatibility") => WgpuSettingsPriority::Compatibility, + Ok("functionality") => WgpuSettingsPriority::Functionality, + Ok("webgl2") => WgpuSettingsPriority::WebGL2, + _ => return None, + }, + ) +} diff --git a/crates/libmarathon/src/render/skybox/mod.rs b/crates/libmarathon/src/render/skybox/mod.rs new file mode 100644 index 0000000..35598bb --- /dev/null +++ b/crates/libmarathon/src/render/skybox/mod.rs @@ -0,0 +1,305 @@ +use bevy_app::{App, Plugin}; +use bevy_asset::{embedded_asset, load_embedded_asset, AssetServer, Handle}; +use bevy_camera::Exposure; +use bevy_ecs::{ + prelude::{Component, Entity}, + query::{QueryItem, With}, + reflect::ReflectComponent, + resource::Resource, + schedule::IntoScheduleConfigs, + system::{Commands, Query, Res, ResMut}, +}; +use bevy_image::{BevyDefault, Image}; +use bevy_math::{Mat4, Quat}; +use bevy_reflect::{std_traits::ReflectDefault, Reflect}; +use crate::render::{ + extract_component::{ + ComponentUniforms, DynamicUniformIndex, ExtractComponent, ExtractComponentPlugin, + UniformComponentPlugin, + }, + render_asset::RenderAssets, + render_resource::{ + binding_types::{sampler, texture_cube, uniform_buffer}, + *, + }, + renderer::RenderDevice, + texture::GpuImage, + view::{ExtractedView, Msaa, ViewTarget, ViewUniform, ViewUniforms}, + Render, RenderApp, RenderStartup, RenderSystems, +}; +use bevy_shader::Shader; +use bevy_transform::components::Transform; +use bevy_utils::default; +use prepass::SkyboxPrepassPipeline; + +use crate::render::{ + core_3d::CORE_3D_DEPTH_FORMAT, prepass::PreviousViewUniforms, + skybox::prepass::init_skybox_prepass_pipeline, +}; + +pub mod prepass; + +pub struct SkyboxPlugin; + +impl Plugin for SkyboxPlugin { + fn build(&self, app: &mut App) { + embedded_asset!(app, "skybox.wgsl"); + embedded_asset!(app, "skybox_prepass.wgsl"); + + app.add_plugins(( + ExtractComponentPlugin::::default(), + UniformComponentPlugin::::default(), + )); + + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + render_app + .init_resource::>() + .init_resource::>() + .init_resource::() + .add_systems( + RenderStartup, + (init_skybox_pipeline, init_skybox_prepass_pipeline), + ) + .add_systems( + Render, + ( + prepare_skybox_pipelines.in_set(RenderSystems::Prepare), + prepass::prepare_skybox_prepass_pipelines.in_set(RenderSystems::Prepare), + prepare_skybox_bind_groups.in_set(RenderSystems::PrepareBindGroups), + prepass::prepare_skybox_prepass_bind_groups + .in_set(RenderSystems::PrepareBindGroups), + ), + ); + } +} + +/// Adds a skybox to a 3D camera, based on a cubemap texture. +/// +/// Note that this component does not (currently) affect the scene's lighting. +/// To do so, use `EnvironmentMapLight` alongside this component. +/// +/// See also . +#[derive(Component, Clone, Reflect)] +#[reflect(Component, Default, Clone)] +pub struct Skybox { + pub image: Handle, + /// Scale factor applied to the skybox image. + /// After applying this multiplier to the image samples, the resulting values should + /// be in units of [cd/m^2](https://en.wikipedia.org/wiki/Candela_per_square_metre). + pub brightness: f32, + + /// View space rotation applied to the skybox cubemap. + /// This is useful for users who require a different axis, such as the Z-axis, to serve + /// as the vertical axis. + pub rotation: Quat, +} + +impl Default for Skybox { + fn default() -> Self { + Skybox { + image: Handle::default(), + brightness: 0.0, + rotation: Quat::IDENTITY, + } + } +} + +impl ExtractComponent for Skybox { + type QueryData = (&'static Self, Option<&'static Exposure>); + type QueryFilter = (); + type Out = (Self, SkyboxUniforms); + + fn extract_component( + (skybox, exposure): QueryItem<'_, '_, Self::QueryData>, + ) -> Option { + let exposure = exposure + .map(Exposure::exposure) + .unwrap_or_else(|| Exposure::default().exposure()); + + Some(( + skybox.clone(), + SkyboxUniforms { + brightness: skybox.brightness * exposure, + transform: Transform::from_rotation(skybox.rotation.inverse()).to_matrix(), + #[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] + _wasm_padding_8b: 0, + #[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] + _wasm_padding_12b: 0, + #[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] + _wasm_padding_16b: 0, + }, + )) + } +} + +// TODO: Replace with a push constant once WebGPU gets support for that +#[derive(Component, ShaderType, Clone)] +pub struct SkyboxUniforms { + brightness: f32, + transform: Mat4, + #[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] + _wasm_padding_8b: u32, + #[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] + _wasm_padding_12b: u32, + #[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] + _wasm_padding_16b: u32, +} + +#[derive(Resource)] +struct SkyboxPipeline { + bind_group_layout: BindGroupLayout, + shader: Handle, +} + +impl SkyboxPipeline { + fn new(render_device: &RenderDevice, shader: Handle) -> Self { + Self { + bind_group_layout: render_device.create_bind_group_layout( + "skybox_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::FRAGMENT, + ( + texture_cube(TextureSampleType::Float { filterable: true }), + sampler(SamplerBindingType::Filtering), + uniform_buffer::(true) + .visibility(ShaderStages::VERTEX_FRAGMENT), + uniform_buffer::(true), + ), + ), + ), + shader, + } + } +} + +fn init_skybox_pipeline( + mut commands: Commands, + render_device: Res, + asset_server: Res, +) { + let shader = load_embedded_asset!(asset_server.as_ref(), "skybox.wgsl"); + commands.insert_resource(SkyboxPipeline::new(&render_device, shader)); +} + +#[derive(PartialEq, Eq, Hash, Clone, Copy)] +struct SkyboxPipelineKey { + hdr: bool, + samples: u32, + depth_format: TextureFormat, +} + +impl SpecializedRenderPipeline for SkyboxPipeline { + type Key = SkyboxPipelineKey; + + fn specialize(&self, key: Self::Key) -> RenderPipelineDescriptor { + RenderPipelineDescriptor { + label: Some("skybox_pipeline".into()), + layout: vec![self.bind_group_layout.clone()], + vertex: VertexState { + shader: self.shader.clone(), + ..default() + }, + depth_stencil: Some(DepthStencilState { + format: key.depth_format, + depth_write_enabled: false, + depth_compare: CompareFunction::GreaterEqual, + stencil: StencilState { + front: StencilFaceState::IGNORE, + back: StencilFaceState::IGNORE, + read_mask: 0, + write_mask: 0, + }, + bias: DepthBiasState { + constant: 0, + slope_scale: 0.0, + clamp: 0.0, + }, + }), + multisample: MultisampleState { + count: key.samples, + mask: !0, + alpha_to_coverage_enabled: false, + }, + fragment: Some(FragmentState { + shader: self.shader.clone(), + targets: vec![Some(ColorTargetState { + format: if key.hdr { + ViewTarget::TEXTURE_FORMAT_HDR + } else { + TextureFormat::bevy_default() + }, + // BlendState::REPLACE is not needed here, and None will be potentially much faster in some cases. + blend: None, + write_mask: ColorWrites::ALL, + })], + ..default() + }), + ..default() + } + } +} + +#[derive(Component)] +pub struct SkyboxPipelineId(pub CachedRenderPipelineId); + +fn prepare_skybox_pipelines( + mut commands: Commands, + pipeline_cache: Res, + mut pipelines: ResMut>, + pipeline: Res, + views: Query<(Entity, &ExtractedView, &Msaa), With>, +) { + for (entity, view, msaa) in &views { + let pipeline_id = pipelines.specialize( + &pipeline_cache, + &pipeline, + SkyboxPipelineKey { + hdr: view.hdr, + samples: msaa.samples(), + depth_format: CORE_3D_DEPTH_FORMAT, + }, + ); + + commands + .entity(entity) + .insert(SkyboxPipelineId(pipeline_id)); + } +} + +#[derive(Component)] +pub struct SkyboxBindGroup(pub (BindGroup, u32)); + +fn prepare_skybox_bind_groups( + mut commands: Commands, + pipeline: Res, + view_uniforms: Res, + skybox_uniforms: Res>, + images: Res>, + render_device: Res, + views: Query<(Entity, &Skybox, &DynamicUniformIndex)>, +) { + for (entity, skybox, skybox_uniform_index) in &views { + if let (Some(skybox), Some(view_uniforms), Some(skybox_uniforms)) = ( + images.get(&skybox.image), + view_uniforms.uniforms.binding(), + skybox_uniforms.binding(), + ) { + let bind_group = render_device.create_bind_group( + "skybox_bind_group", + &pipeline.bind_group_layout, + &BindGroupEntries::sequential(( + &skybox.texture_view, + &skybox.sampler, + view_uniforms, + skybox_uniforms, + )), + ); + + commands + .entity(entity) + .insert(SkyboxBindGroup((bind_group, skybox_uniform_index.index()))); + } + } +} diff --git a/crates/libmarathon/src/render/skybox/prepass.rs b/crates/libmarathon/src/render/skybox/prepass.rs new file mode 100644 index 0000000..2e15f94 --- /dev/null +++ b/crates/libmarathon/src/render/skybox/prepass.rs @@ -0,0 +1,164 @@ +//! Adds motion vector support to skyboxes. See [`SkyboxPrepassPipeline`] for details. + +use bevy_asset::{load_embedded_asset, AssetServer, Handle}; +use bevy_ecs::{ + component::Component, + entity::Entity, + query::{Has, With}, + resource::Resource, + system::{Commands, Query, Res, ResMut}, +}; +use crate::render::{ + render_resource::{ + binding_types::uniform_buffer, BindGroup, BindGroupEntries, BindGroupLayout, + BindGroupLayoutEntries, CachedRenderPipelineId, CompareFunction, DepthStencilState, + FragmentState, MultisampleState, PipelineCache, RenderPipelineDescriptor, ShaderStages, + SpecializedRenderPipeline, SpecializedRenderPipelines, + }, + renderer::RenderDevice, + view::{Msaa, ViewUniform, ViewUniforms}, +}; +use bevy_shader::Shader; +use bevy_utils::prelude::default; + +use crate::render::{ + core_3d::CORE_3D_DEPTH_FORMAT, + prepass::{ + prepass_target_descriptors, MotionVectorPrepass, NormalPrepass, PreviousViewData, + PreviousViewUniforms, + }, + FullscreenShader, Skybox, +}; + +/// This pipeline writes motion vectors to the prepass for all [`Skybox`]es. +/// +/// This allows features like motion blur and TAA to work correctly on the skybox. Without this, for +/// example, motion blur would not be applied to the skybox when the camera is rotated and motion +/// blur is enabled. +#[derive(Resource)] +pub struct SkyboxPrepassPipeline { + bind_group_layout: BindGroupLayout, + fullscreen_shader: FullscreenShader, + fragment_shader: Handle, +} + +/// Used to specialize the [`SkyboxPrepassPipeline`]. +#[derive(PartialEq, Eq, Hash, Clone, Copy)] +pub struct SkyboxPrepassPipelineKey { + samples: u32, + normal_prepass: bool, +} + +/// Stores the ID for a camera's specialized pipeline, so it can be retrieved from the +/// [`PipelineCache`]. +#[derive(Component)] +pub struct RenderSkyboxPrepassPipeline(pub CachedRenderPipelineId); + +/// Stores the [`SkyboxPrepassPipeline`] bind group for a camera. This is later used by the prepass +/// render graph node to add this binding to the prepass's render pass. +#[derive(Component)] +pub struct SkyboxPrepassBindGroup(pub BindGroup); + +pub fn init_skybox_prepass_pipeline( + mut commands: Commands, + render_device: Res, + fullscreen_shader: Res, + asset_server: Res, +) { + commands.insert_resource(SkyboxPrepassPipeline { + bind_group_layout: render_device.create_bind_group_layout( + "skybox_prepass_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::FRAGMENT, + ( + uniform_buffer::(true), + uniform_buffer::(true), + ), + ), + ), + fullscreen_shader: fullscreen_shader.clone(), + fragment_shader: load_embedded_asset!(asset_server.as_ref(), "skybox_prepass.wgsl"), + }); +} + +impl SpecializedRenderPipeline for SkyboxPrepassPipeline { + type Key = SkyboxPrepassPipelineKey; + + fn specialize(&self, key: Self::Key) -> RenderPipelineDescriptor { + RenderPipelineDescriptor { + label: Some("skybox_prepass_pipeline".into()), + layout: vec![self.bind_group_layout.clone()], + vertex: self.fullscreen_shader.to_vertex_state(), + depth_stencil: Some(DepthStencilState { + format: CORE_3D_DEPTH_FORMAT, + depth_write_enabled: false, + depth_compare: CompareFunction::GreaterEqual, + stencil: default(), + bias: default(), + }), + multisample: MultisampleState { + count: key.samples, + mask: !0, + alpha_to_coverage_enabled: false, + }, + fragment: Some(FragmentState { + shader: self.fragment_shader.clone(), + targets: prepass_target_descriptors(key.normal_prepass, true, false), + ..default() + }), + ..default() + } + } +} + +/// Specialize and cache the [`SkyboxPrepassPipeline`] for each camera with a [`Skybox`]. +pub fn prepare_skybox_prepass_pipelines( + mut commands: Commands, + pipeline_cache: Res, + mut pipelines: ResMut>, + pipeline: Res, + views: Query<(Entity, Has, &Msaa), (With, With)>, +) { + for (entity, normal_prepass, msaa) in &views { + let pipeline_key = SkyboxPrepassPipelineKey { + samples: msaa.samples(), + normal_prepass, + }; + + let render_skybox_prepass_pipeline = + pipelines.specialize(&pipeline_cache, &pipeline, pipeline_key); + commands + .entity(entity) + .insert(RenderSkyboxPrepassPipeline(render_skybox_prepass_pipeline)); + } +} + +/// Creates the required bind groups for the [`SkyboxPrepassPipeline`]. This binds the view uniforms +/// from the CPU for access in the prepass shader on the GPU, allowing us to compute camera motion +/// between frames. This is then stored in the [`SkyboxPrepassBindGroup`] component on the camera. +pub fn prepare_skybox_prepass_bind_groups( + mut commands: Commands, + pipeline: Res, + view_uniforms: Res, + prev_view_uniforms: Res, + render_device: Res, + views: Query, With)>, +) { + for entity in &views { + let (Some(view_uniforms), Some(prev_view_uniforms)) = ( + view_uniforms.uniforms.binding(), + prev_view_uniforms.uniforms.binding(), + ) else { + continue; + }; + let bind_group = render_device.create_bind_group( + "skybox_prepass_bind_group", + &pipeline.bind_group_layout, + &BindGroupEntries::sequential((view_uniforms, prev_view_uniforms)), + ); + + commands + .entity(entity) + .insert(SkyboxPrepassBindGroup(bind_group)); + } +} diff --git a/crates/libmarathon/src/render/skybox/skybox.wgsl b/crates/libmarathon/src/render/skybox/skybox.wgsl new file mode 100644 index 0000000..7982370 --- /dev/null +++ b/crates/libmarathon/src/render/skybox/skybox.wgsl @@ -0,0 +1,81 @@ +#import bevy_render::view::View +#import bevy_pbr::utils::coords_to_viewport_uv + +struct SkyboxUniforms { + brightness: f32, + transform: mat4x4, +#ifdef SIXTEEN_BYTE_ALIGNMENT + _wasm_padding_8b: u32, + _wasm_padding_12b: u32, + _wasm_padding_16b: u32, +#endif +} + +@group(0) @binding(0) var skybox: texture_cube; +@group(0) @binding(1) var skybox_sampler: sampler; +@group(0) @binding(2) var view: View; +@group(0) @binding(3) var uniforms: SkyboxUniforms; + +fn coords_to_ray_direction(position: vec2, viewport: vec4) -> vec3 { + // Using world positions of the fragment and camera to calculate a ray direction + // breaks down at large translations. This code only needs to know the ray direction. + // The ray direction is along the direction from the camera to the fragment position. + // In view space, the camera is at the origin, so the view space ray direction is + // along the direction of the fragment position - (0,0,0) which is just the + // fragment position. + // Use the position on the near clipping plane to avoid -inf world position + // because the far plane of an infinite reverse projection is at infinity. + let view_position_homogeneous = view.view_from_clip * vec4( + coords_to_viewport_uv(position, viewport) * vec2(2.0, -2.0) + vec2(-1.0, 1.0), + 1.0, + 1.0, + ); + + // Transforming the view space ray direction by the skybox transform matrix, it is + // equivalent to rotating the skybox itself. + var view_ray_direction = view_position_homogeneous.xyz / view_position_homogeneous.w; + view_ray_direction = (view.world_from_view * vec4(view_ray_direction, 0.0)).xyz; + + // Transforming the view space ray direction by the view matrix, transforms the + // direction to world space. Note that the w element is set to 0.0, as this is a + // vector direction, not a position, That causes the matrix multiplication to ignore + // the translations from the view matrix. + let ray_direction = (uniforms.transform * vec4(view_ray_direction, 0.0)).xyz; + + return normalize(ray_direction); +} + +struct VertexOutput { + @builtin(position) position: vec4, +}; + +// 3 | 2. +// 2 | : `. +// 1 | x-----x. +// 0 | | s | `. +// -1 | 0-----x.....1 +// +--------------- +// -1 0 1 2 3 +// +// The axes are clip-space x and y. The region marked s is the visible region. +// The digits in the corners of the right-angled triangle are the vertex +// indices. +@vertex +fn skybox_vertex(@builtin(vertex_index) vertex_index: u32) -> VertexOutput { + // See the explanation above for how this works. + let clip_position = vec2( + f32(vertex_index & 1u), + f32((vertex_index >> 1u) & 1u), + ) * 4.0 - vec2(1.0); + + return VertexOutput(vec4(clip_position, 0.0, 1.0)); +} + +@fragment +fn skybox_fragment(in: VertexOutput) -> @location(0) vec4 { + let ray_direction = coords_to_ray_direction(in.position.xy, view.viewport); + + // Cube maps are left-handed so we negate the z coordinate. + let out = textureSample(skybox, skybox_sampler, ray_direction * vec3(1.0, 1.0, -1.0)); + return vec4(out.rgb * uniforms.brightness, out.a); +} diff --git a/crates/libmarathon/src/render/skybox/skybox_prepass.wgsl b/crates/libmarathon/src/render/skybox/skybox_prepass.wgsl new file mode 100644 index 0000000..e4ecb47 --- /dev/null +++ b/crates/libmarathon/src/render/skybox/skybox_prepass.wgsl @@ -0,0 +1,24 @@ +#import bevy_render::view::View +#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput +#import bevy_pbr::view_transformations::uv_to_ndc + +struct PreviousViewUniforms { + view_from_world: mat4x4, + clip_from_world: mat4x4, + clip_from_view: mat4x4, + world_from_clip: mat4x4, + view_from_clip: mat4x4, +} + +@group(0) @binding(0) var view: View; +@group(0) @binding(1) var previous_view: PreviousViewUniforms; + +@fragment +fn fragment(in: FullscreenVertexOutput) -> @location(1) vec4 { + let clip_pos = uv_to_ndc(in.uv); // Convert from uv to clip space + let world_pos = view.world_from_clip * vec4(clip_pos, 0.0, 1.0); + let prev_clip_pos = (previous_view.clip_from_world * world_pos).xy; + let velocity = (clip_pos - prev_clip_pos) * vec2(0.5, -0.5); // Copied from mesh motion vectors + + return vec4(velocity.x, velocity.y, 0.0, 1.0); +} diff --git a/crates/libmarathon/src/render/storage.rs b/crates/libmarathon/src/render/storage.rs new file mode 100644 index 0000000..f2e7fa2 --- /dev/null +++ b/crates/libmarathon/src/render/storage.rs @@ -0,0 +1,135 @@ +use crate::render::{ + render_asset::{PrepareAssetError, RenderAsset, RenderAssetPlugin}, + render_resource::{Buffer, BufferUsages}, + renderer::RenderDevice, +}; +use bevy_app::{App, Plugin}; +use bevy_asset::{Asset, AssetApp, AssetId, RenderAssetUsages}; +use bevy_ecs::system::{lifetimeless::SRes, SystemParamItem}; +use bevy_reflect::{prelude::ReflectDefault, Reflect}; +use bevy_utils::default; +use encase::{internal::WriteInto, ShaderType}; +use wgpu::util::BufferInitDescriptor; + +/// Adds [`ShaderStorageBuffer`] as an asset that is extracted and uploaded to the GPU. +#[derive(Default)] +pub struct StoragePlugin; + +impl Plugin for StoragePlugin { + fn build(&self, app: &mut App) { + app.add_plugins(RenderAssetPlugin::::default()) + .init_asset::() + .register_asset_reflect::(); + } +} + +/// A storage buffer that is prepared as a [`RenderAsset`] and uploaded to the GPU. +#[derive(Asset, Reflect, Debug, Clone)] +#[reflect(opaque)] +#[reflect(Default, Debug, Clone)] +pub struct ShaderStorageBuffer { + /// Optional data used to initialize the buffer. + pub data: Option>, + /// The buffer description used to create the buffer. + pub buffer_description: wgpu::BufferDescriptor<'static>, + /// The asset usage of the storage buffer. + pub asset_usage: RenderAssetUsages, +} + +impl Default for ShaderStorageBuffer { + fn default() -> Self { + Self { + data: None, + buffer_description: wgpu::BufferDescriptor { + label: None, + size: 0, + usage: BufferUsages::STORAGE, + mapped_at_creation: false, + }, + asset_usage: RenderAssetUsages::default(), + } + } +} + +impl ShaderStorageBuffer { + /// Creates a new storage buffer with the given data and asset usage. + pub fn new(data: &[u8], asset_usage: RenderAssetUsages) -> Self { + let mut storage = ShaderStorageBuffer { + data: Some(data.to_vec()), + ..default() + }; + storage.asset_usage = asset_usage; + storage + } + + /// Creates a new storage buffer with the given size and asset usage. + pub fn with_size(size: usize, asset_usage: RenderAssetUsages) -> Self { + let mut storage = ShaderStorageBuffer { + data: None, + ..default() + }; + storage.buffer_description.size = size as u64; + storage.buffer_description.mapped_at_creation = false; + storage.asset_usage = asset_usage; + storage + } + + /// Sets the data of the storage buffer to the given [`ShaderType`]. + pub fn set_data(&mut self, value: T) + where + T: ShaderType + WriteInto, + { + let size = value.size().get() as usize; + let mut wrapper = encase::StorageBuffer::>::new(Vec::with_capacity(size)); + wrapper.write(&value).unwrap(); + self.data = Some(wrapper.into_inner()); + } +} + +impl From for ShaderStorageBuffer +where + T: ShaderType + WriteInto, +{ + fn from(value: T) -> Self { + let size = value.size().get() as usize; + let mut wrapper = encase::StorageBuffer::>::new(Vec::with_capacity(size)); + wrapper.write(&value).unwrap(); + Self::new(wrapper.as_ref(), RenderAssetUsages::default()) + } +} + +/// A storage buffer that is prepared as a [`RenderAsset`] and uploaded to the GPU. +pub struct GpuShaderStorageBuffer { + pub buffer: Buffer, +} + +impl RenderAsset for GpuShaderStorageBuffer { + type SourceAsset = ShaderStorageBuffer; + type Param = SRes; + + fn asset_usage(source_asset: &Self::SourceAsset) -> RenderAssetUsages { + source_asset.asset_usage + } + + fn prepare_asset( + source_asset: Self::SourceAsset, + _: AssetId, + render_device: &mut SystemParamItem, + _: Option<&Self>, + ) -> Result> { + match source_asset.data { + Some(data) => { + let buffer = render_device.create_buffer_with_data(&BufferInitDescriptor { + label: source_asset.buffer_description.label, + contents: &data, + usage: source_asset.buffer_description.usage, + }); + Ok(GpuShaderStorageBuffer { buffer }) + } + None => { + let buffer = render_device.create_buffer(&source_asset.buffer_description); + Ok(GpuShaderStorageBuffer { buffer }) + } + } + } +} diff --git a/crates/libmarathon/src/render/sync_component.rs b/crates/libmarathon/src/render/sync_component.rs new file mode 100644 index 0000000..462fa75 --- /dev/null +++ b/crates/libmarathon/src/render/sync_component.rs @@ -0,0 +1,42 @@ +use core::marker::PhantomData; + +use bevy_app::{App, Plugin}; +use bevy_ecs::component::Component; + +use crate::render::sync_world::{EntityRecord, PendingSyncEntity, SyncToRenderWorld}; + +/// Plugin that registers a component for automatic sync to the render world. See [`SyncWorldPlugin`] for more information. +/// +/// This plugin is automatically added by [`ExtractComponentPlugin`], and only needs to be added for manual extraction implementations. +/// +/// # Implementation details +/// +/// It adds [`SyncToRenderWorld`] as a required component to make the [`SyncWorldPlugin`] aware of the component, and +/// handles cleanup of the component in the render world when it is removed from an entity. +/// +/// # Warning +/// When the component is removed from the main world entity, all components are removed from the entity in the render world. +/// This is done in order to handle components with custom extraction logic and derived state. +/// +/// [`ExtractComponentPlugin`]: crate::extract_component::ExtractComponentPlugin +/// [`SyncWorldPlugin`]: crate::sync_world::SyncWorldPlugin +pub struct SyncComponentPlugin(PhantomData); + +impl Default for SyncComponentPlugin { + fn default() -> Self { + Self(PhantomData) + } +} + +impl Plugin for SyncComponentPlugin { + fn build(&self, app: &mut App) { + app.register_required_components::(); + + app.world_mut() + .register_component_hooks::() + .on_remove(|mut world, context| { + let mut pending = world.resource_mut::(); + pending.push(EntityRecord::ComponentRemoved(context.entity)); + }); + } +} diff --git a/crates/libmarathon/src/render/sync_world.rs b/crates/libmarathon/src/render/sync_world.rs new file mode 100644 index 0000000..d9a8a43 --- /dev/null +++ b/crates/libmarathon/src/render/sync_world.rs @@ -0,0 +1,580 @@ +use bevy_app::Plugin; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::{ + component::Component, + entity::{ContainsEntity, Entity, EntityEquivalent, EntityHash}, + lifecycle::{Add, Remove}, + observer::On, + query::With, + reflect::ReflectComponent, + resource::Resource, + system::{Local, Query, ResMut, SystemState}, + world::{Mut, World}, +}; +use bevy_platform::collections::{HashMap, HashSet}; +use bevy_reflect::{std_traits::ReflectDefault, Reflect}; + +/// A plugin that synchronizes entities with [`SyncToRenderWorld`] between the main world and the render world. +/// +/// All entities with the [`SyncToRenderWorld`] component are kept in sync. It +/// is automatically added as a required component by [`ExtractComponentPlugin`] +/// and [`SyncComponentPlugin`], so it doesn't need to be added manually when +/// spawning or as a required component when either of these plugins are used. +/// +/// # Implementation +/// +/// Bevy's renderer is architected independently from the main app. +/// It operates in its own separate ECS [`World`], so the renderer logic can run in parallel with the main world logic. +/// This is called "Pipelined Rendering", see [`PipelinedRenderingPlugin`] for more information. +/// +/// [`SyncWorldPlugin`] is the first thing that runs every frame and it maintains an entity-to-entity mapping +/// between the main world and the render world. +/// It does so by spawning and despawning entities in the render world, to match spawned and despawned entities in the main world. +/// The link between synced entities is maintained by the [`RenderEntity`] and [`MainEntity`] components. +/// +/// The [`RenderEntity`] contains the corresponding render world entity of a main world entity, while [`MainEntity`] contains +/// the corresponding main world entity of a render world entity. +/// For convenience, [`QueryData`](bevy_ecs::query::QueryData) implementations are provided for both components: +/// adding [`MainEntity`] to a query (without a `&`) will return the corresponding main world [`Entity`], +/// and adding [`RenderEntity`] will return the corresponding render world [`Entity`]. +/// If you have access to the component itself, the underlying entities can be accessed by calling `.id()`. +/// +/// Synchronization is necessary preparation for extraction ([`ExtractSchedule`](crate::ExtractSchedule)), which copies over component data from the main +/// to the render world for these entities. +/// +/// ```text +/// |--------------------------------------------------------------------| +/// | | | Main world update | +/// | sync | extract |---------------------------------------------------| +/// | | | Render world update | +/// |--------------------------------------------------------------------| +/// ``` +/// +/// An example for synchronized main entities 1v1 and 18v1 +/// +/// ```text +/// |---------------------------Main World------------------------------| +/// | Entity | Component | +/// |-------------------------------------------------------------------| +/// | ID: 1v1 | PointLight | RenderEntity(ID: 3V1) | SyncToRenderWorld | +/// | ID: 18v1 | PointLight | RenderEntity(ID: 5V1) | SyncToRenderWorld | +/// |-------------------------------------------------------------------| +/// +/// |----------Render World-----------| +/// | Entity | Component | +/// |---------------------------------| +/// | ID: 3v1 | MainEntity(ID: 1V1) | +/// | ID: 5v1 | MainEntity(ID: 18V1) | +/// |---------------------------------| +/// +/// ``` +/// +/// Note that this effectively establishes a link between the main world entity and the render world entity. +/// Not every entity needs to be synchronized, however; only entities with the [`SyncToRenderWorld`] component are synced. +/// Adding [`SyncToRenderWorld`] to a main world component will establish such a link. +/// Once a synchronized main entity is despawned, its corresponding render entity will be automatically +/// despawned in the next `sync`. +/// +/// The sync step does not copy any of component data between worlds, since its often not necessary to transfer over all +/// the components of a main world entity. +/// The render world probably cares about a `Position` component, but not a `Velocity` component. +/// The extraction happens in its own step, independently from, and after synchronization. +/// +/// Moreover, [`SyncWorldPlugin`] only synchronizes *entities*. [`RenderAsset`](crate::render_asset::RenderAsset)s like meshes and textures are handled +/// differently. +/// +/// [`PipelinedRenderingPlugin`]: crate::pipelined_rendering::PipelinedRenderingPlugin +/// [`ExtractComponentPlugin`]: crate::extract_component::ExtractComponentPlugin +/// [`SyncComponentPlugin`]: crate::sync_component::SyncComponentPlugin +#[derive(Default)] +pub struct SyncWorldPlugin; + +impl Plugin for SyncWorldPlugin { + fn build(&self, app: &mut bevy_app::App) { + app.init_resource::(); + app.add_observer( + |add: On, mut pending: ResMut| { + pending.push(EntityRecord::Added(add.entity)); + }, + ); + app.add_observer( + |remove: On, + mut pending: ResMut, + query: Query<&RenderEntity>| { + if let Ok(e) = query.get(remove.entity) { + pending.push(EntityRecord::Removed(*e)); + }; + }, + ); + } +} +/// Marker component that indicates that its entity needs to be synchronized to the render world. +/// +/// This component is automatically added as a required component by [`ExtractComponentPlugin`] and [`SyncComponentPlugin`]. +/// For more information see [`SyncWorldPlugin`]. +/// +/// NOTE: This component should persist throughout the entity's entire lifecycle. +/// If this component is removed from its entity, the entity will be despawned. +/// +/// [`ExtractComponentPlugin`]: crate::extract_component::ExtractComponentPlugin +/// [`SyncComponentPlugin`]: crate::sync_component::SyncComponentPlugin +#[derive(Component, Copy, Clone, Debug, Default, Reflect)] +#[reflect[Component, Default, Clone]] +#[component(storage = "SparseSet")] +pub struct SyncToRenderWorld; + +/// Component added on the main world entities that are synced to the Render World in order to keep track of the corresponding render world entity. +/// +/// Can also be used as a newtype wrapper for render world entities. +#[derive(Component, Deref, Copy, Clone, Debug, Eq, Hash, PartialEq, Reflect)] +#[component(clone_behavior = Ignore)] +#[reflect(Component, Clone)] +pub struct RenderEntity(Entity); +impl RenderEntity { + #[inline] + pub fn id(&self) -> Entity { + self.0 + } +} + +impl From for RenderEntity { + fn from(entity: Entity) -> Self { + RenderEntity(entity) + } +} + +impl ContainsEntity for RenderEntity { + fn entity(&self) -> Entity { + self.id() + } +} + +// SAFETY: RenderEntity is a newtype around Entity that derives its comparison traits. +unsafe impl EntityEquivalent for RenderEntity {} + +/// Component added on the render world entities to keep track of the corresponding main world entity. +/// +/// Can also be used as a newtype wrapper for main world entities. +#[derive(Component, Deref, Copy, Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord, Reflect)] +#[reflect(Component, Clone)] +pub struct MainEntity(Entity); +impl MainEntity { + #[inline] + pub fn id(&self) -> Entity { + self.0 + } +} + +impl From for MainEntity { + fn from(entity: Entity) -> Self { + MainEntity(entity) + } +} + +impl ContainsEntity for MainEntity { + fn entity(&self) -> Entity { + self.id() + } +} + +// SAFETY: RenderEntity is a newtype around Entity that derives its comparison traits. +unsafe impl EntityEquivalent for MainEntity {} + +/// A [`HashMap`] pre-configured to use [`EntityHash`] hashing with a [`MainEntity`]. +pub type MainEntityHashMap = HashMap; + +/// A [`HashSet`] pre-configured to use [`EntityHash`] hashing with a [`MainEntity`].. +pub type MainEntityHashSet = HashSet; + +/// Marker component that indicates that its entity needs to be despawned at the end of the frame. +#[derive(Component, Copy, Clone, Debug, Default, Reflect)] +#[reflect(Component, Default, Clone)] +pub struct TemporaryRenderEntity; + +/// A record enum to what entities with [`SyncToRenderWorld`] have been added or removed. +#[derive(Debug)] +pub(crate) enum EntityRecord { + /// When an entity is spawned on the main world, notify the render world so that it can spawn a corresponding + /// entity. This contains the main world entity. + Added(Entity), + /// When an entity is despawned on the main world, notify the render world so that the corresponding entity can be + /// despawned. This contains the render world entity. + Removed(RenderEntity), + /// When a component is removed from an entity, notify the render world so that the corresponding component can be + /// removed. This contains the main world entity. + ComponentRemoved(Entity), +} + +// Entity Record in MainWorld pending to Sync +#[derive(Resource, Default, Deref, DerefMut)] +pub(crate) struct PendingSyncEntity { + records: Vec, +} + +pub(crate) fn entity_sync_system(main_world: &mut World, render_world: &mut World) { + main_world.resource_scope(|world, mut pending: Mut| { + // TODO : batching record + for record in pending.drain(..) { + match record { + EntityRecord::Added(e) => { + if let Ok(mut main_entity) = world.get_entity_mut(e) { + match main_entity.entry::() { + bevy_ecs::world::ComponentEntry::Occupied(_) => { + panic!("Attempting to synchronize an entity that has already been synchronized!"); + } + bevy_ecs::world::ComponentEntry::Vacant(entry) => { + let id = render_world.spawn(MainEntity(e)).id(); + + entry.insert(RenderEntity(id)); + } + }; + } + } + EntityRecord::Removed(render_entity) => { + if let Ok(ec) = render_world.get_entity_mut(render_entity.id()) { + ec.despawn(); + }; + } + EntityRecord::ComponentRemoved(main_entity) => { + let Some(mut render_entity) = world.get_mut::(main_entity) else { + continue; + }; + if let Ok(render_world_entity) = render_world.get_entity_mut(render_entity.id()) { + // In order to handle components that extract to derived components, we clear the entity + // and let the extraction system re-add the components. + render_world_entity.despawn(); + + let id = render_world.spawn(MainEntity(main_entity)).id(); + render_entity.0 = id; + } + }, + } + } + }); +} + +pub(crate) fn despawn_temporary_render_entities( + world: &mut World, + state: &mut SystemState>>, + mut local: Local>, +) { + let query = state.get(world); + + local.extend(query.iter()); + + // Ensure next frame allocation keeps order + local.sort_unstable_by_key(|e| e.index()); + for e in local.drain(..).rev() { + world.despawn(e); + } +} + +/// This module exists to keep the complex unsafe code out of the main module. +/// +/// The implementations for both [`MainEntity`] and [`RenderEntity`] should stay in sync, +/// and are based off of the `&T` implementation in `bevy_ecs`. +mod render_entities_world_query_impls { + use super::{MainEntity, RenderEntity}; + + use bevy_ecs::{ + archetype::Archetype, + component::{ComponentId, Components, Tick}, + entity::Entity, + query::{FilteredAccess, QueryData, ReadOnlyQueryData, ReleaseStateQueryData, WorldQuery}, + storage::{Table, TableRow}, + world::{unsafe_world_cell::UnsafeWorldCell, World}, + }; + + /// SAFETY: defers completely to `&RenderEntity` implementation, + /// and then only modifies the output safely. + unsafe impl WorldQuery for RenderEntity { + type Fetch<'w> = <&'static RenderEntity as WorldQuery>::Fetch<'w>; + type State = <&'static RenderEntity as WorldQuery>::State; + + fn shrink_fetch<'wlong: 'wshort, 'wshort>( + fetch: Self::Fetch<'wlong>, + ) -> Self::Fetch<'wshort> { + fetch + } + + #[inline] + unsafe fn init_fetch<'w, 's>( + world: UnsafeWorldCell<'w>, + component_id: &'s ComponentId, + last_run: Tick, + this_run: Tick, + ) -> Self::Fetch<'w> { + // SAFETY: defers to the `&T` implementation, with T set to `RenderEntity`. + unsafe { + <&RenderEntity as WorldQuery>::init_fetch(world, component_id, last_run, this_run) + } + } + + const IS_DENSE: bool = <&'static RenderEntity as WorldQuery>::IS_DENSE; + + #[inline] + unsafe fn set_archetype<'w, 's>( + fetch: &mut Self::Fetch<'w>, + component_id: &'s ComponentId, + archetype: &'w Archetype, + table: &'w Table, + ) { + // SAFETY: defers to the `&T` implementation, with T set to `RenderEntity`. + unsafe { + <&RenderEntity as WorldQuery>::set_archetype(fetch, component_id, archetype, table); + } + } + + #[inline] + unsafe fn set_table<'w, 's>( + fetch: &mut Self::Fetch<'w>, + &component_id: &'s ComponentId, + table: &'w Table, + ) { + // SAFETY: defers to the `&T` implementation, with T set to `RenderEntity`. + unsafe { <&RenderEntity as WorldQuery>::set_table(fetch, &component_id, table) } + } + + fn update_component_access(&component_id: &ComponentId, access: &mut FilteredAccess) { + <&RenderEntity as WorldQuery>::update_component_access(&component_id, access); + } + + fn init_state(world: &mut World) -> ComponentId { + <&RenderEntity as WorldQuery>::init_state(world) + } + + fn get_state(components: &Components) -> Option { + <&RenderEntity as WorldQuery>::get_state(components) + } + + fn matches_component_set( + &state: &ComponentId, + set_contains_id: &impl Fn(ComponentId) -> bool, + ) -> bool { + <&RenderEntity as WorldQuery>::matches_component_set(&state, set_contains_id) + } + } + + // SAFETY: Component access of Self::ReadOnly is a subset of Self. + // Self::ReadOnly matches exactly the same archetypes/tables as Self. + unsafe impl QueryData for RenderEntity { + const IS_READ_ONLY: bool = true; + type ReadOnly = RenderEntity; + type Item<'w, 's> = Entity; + + fn shrink<'wlong: 'wshort, 'wshort, 's>( + item: Self::Item<'wlong, 's>, + ) -> Self::Item<'wshort, 's> { + item + } + + #[inline(always)] + unsafe fn fetch<'w, 's>( + state: &'s Self::State, + fetch: &mut Self::Fetch<'w>, + entity: Entity, + table_row: TableRow, + ) -> Self::Item<'w, 's> { + // SAFETY: defers to the `&T` implementation, with T set to `RenderEntity`. + let component = + unsafe { <&RenderEntity as QueryData>::fetch(state, fetch, entity, table_row) }; + component.id() + } + } + + // SAFETY: the underlying `Entity` is copied, and no mutable access is provided. + unsafe impl ReadOnlyQueryData for RenderEntity {} + + impl ReleaseStateQueryData for RenderEntity { + fn release_state<'w>(item: Self::Item<'w, '_>) -> Self::Item<'w, 'static> { + item + } + } + + /// SAFETY: defers completely to `&RenderEntity` implementation, + /// and then only modifies the output safely. + unsafe impl WorldQuery for MainEntity { + type Fetch<'w> = <&'static MainEntity as WorldQuery>::Fetch<'w>; + type State = <&'static MainEntity as WorldQuery>::State; + + fn shrink_fetch<'wlong: 'wshort, 'wshort>( + fetch: Self::Fetch<'wlong>, + ) -> Self::Fetch<'wshort> { + fetch + } + + #[inline] + unsafe fn init_fetch<'w, 's>( + world: UnsafeWorldCell<'w>, + component_id: &'s ComponentId, + last_run: Tick, + this_run: Tick, + ) -> Self::Fetch<'w> { + // SAFETY: defers to the `&T` implementation, with T set to `MainEntity`. + unsafe { + <&MainEntity as WorldQuery>::init_fetch(world, component_id, last_run, this_run) + } + } + + const IS_DENSE: bool = <&'static MainEntity as WorldQuery>::IS_DENSE; + + #[inline] + unsafe fn set_archetype<'w, 's>( + fetch: &mut Self::Fetch<'w>, + component_id: &ComponentId, + archetype: &'w Archetype, + table: &'w Table, + ) { + // SAFETY: defers to the `&T` implementation, with T set to `MainEntity`. + unsafe { + <&MainEntity as WorldQuery>::set_archetype(fetch, component_id, archetype, table); + } + } + + #[inline] + unsafe fn set_table<'w, 's>( + fetch: &mut Self::Fetch<'w>, + &component_id: &'s ComponentId, + table: &'w Table, + ) { + // SAFETY: defers to the `&T` implementation, with T set to `MainEntity`. + unsafe { <&MainEntity as WorldQuery>::set_table(fetch, &component_id, table) } + } + + fn update_component_access(&component_id: &ComponentId, access: &mut FilteredAccess) { + <&MainEntity as WorldQuery>::update_component_access(&component_id, access); + } + + fn init_state(world: &mut World) -> ComponentId { + <&MainEntity as WorldQuery>::init_state(world) + } + + fn get_state(components: &Components) -> Option { + <&MainEntity as WorldQuery>::get_state(components) + } + + fn matches_component_set( + &state: &ComponentId, + set_contains_id: &impl Fn(ComponentId) -> bool, + ) -> bool { + <&MainEntity as WorldQuery>::matches_component_set(&state, set_contains_id) + } + } + + // SAFETY: Component access of Self::ReadOnly is a subset of Self. + // Self::ReadOnly matches exactly the same archetypes/tables as Self. + unsafe impl QueryData for MainEntity { + const IS_READ_ONLY: bool = true; + type ReadOnly = MainEntity; + type Item<'w, 's> = Entity; + + fn shrink<'wlong: 'wshort, 'wshort, 's>( + item: Self::Item<'wlong, 's>, + ) -> Self::Item<'wshort, 's> { + item + } + + #[inline(always)] + unsafe fn fetch<'w, 's>( + state: &'s Self::State, + fetch: &mut Self::Fetch<'w>, + entity: Entity, + table_row: TableRow, + ) -> Self::Item<'w, 's> { + // SAFETY: defers to the `&T` implementation, with T set to `MainEntity`. + let component = + unsafe { <&MainEntity as QueryData>::fetch(state, fetch, entity, table_row) }; + component.id() + } + } + + // SAFETY: the underlying `Entity` is copied, and no mutable access is provided. + unsafe impl ReadOnlyQueryData for MainEntity {} + + impl ReleaseStateQueryData for MainEntity { + fn release_state<'w>(item: Self::Item<'w, '_>) -> Self::Item<'w, 'static> { + item + } + } +} + +#[cfg(test)] +mod tests { + use bevy_ecs::{ + component::Component, + entity::Entity, + lifecycle::{Add, Remove}, + observer::On, + query::With, + system::{Query, ResMut}, + world::World, + }; + + use super::{ + entity_sync_system, EntityRecord, MainEntity, PendingSyncEntity, RenderEntity, + SyncToRenderWorld, + }; + + #[derive(Component)] + struct RenderDataComponent; + + #[test] + fn sync_world() { + let mut main_world = World::new(); + let mut render_world = World::new(); + main_world.init_resource::(); + + main_world.add_observer( + |add: On, mut pending: ResMut| { + pending.push(EntityRecord::Added(add.entity)); + }, + ); + main_world.add_observer( + |remove: On, + mut pending: ResMut, + query: Query<&RenderEntity>| { + if let Ok(e) = query.get(remove.entity) { + pending.push(EntityRecord::Removed(*e)); + }; + }, + ); + + // spawn some empty entities for test + for _ in 0..99 { + main_world.spawn_empty(); + } + + // spawn + let main_entity = main_world + .spawn(RenderDataComponent) + // indicates that its entity needs to be synchronized to the render world + .insert(SyncToRenderWorld) + .id(); + + entity_sync_system(&mut main_world, &mut render_world); + + let mut q = render_world.query_filtered::>(); + + // Only one synchronized entity + assert!(q.iter(&render_world).count() == 1); + + let render_entity = q.single(&render_world).unwrap(); + let render_entity_component = main_world.get::(main_entity).unwrap(); + + assert!(render_entity_component.id() == render_entity); + + let main_entity_component = render_world + .get::(render_entity_component.id()) + .unwrap(); + + assert!(main_entity_component.id() == main_entity); + + // despawn + main_world.despawn(main_entity); + + entity_sync_system(&mut main_world, &mut render_world); + + // Only one synchronized entity + assert!(q.iter(&render_world).count() == 0); + } +} diff --git a/crates/libmarathon/src/render/texture/fallback_image.rs b/crates/libmarathon/src/render/texture/fallback_image.rs new file mode 100644 index 0000000..bea52ed --- /dev/null +++ b/crates/libmarathon/src/render/texture/fallback_image.rs @@ -0,0 +1,272 @@ +use crate::render::{ + render_resource::*, + renderer::{RenderDevice, RenderQueue}, + texture::{DefaultImageSampler, GpuImage}, +}; +use bevy_asset::RenderAssetUsages; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::{ + prelude::{FromWorld, Res, ResMut}, + resource::Resource, + system::SystemParam, +}; +use bevy_image::{BevyDefault, Image, ImageSampler, TextureFormatPixelInfo}; +use bevy_platform::collections::HashMap; + +/// A [`RenderApp`](crate::RenderApp) resource that contains the default "fallback image", +/// which can be used in situations where an image was not explicitly defined. The most common +/// use case is [`AsBindGroup`] implementations (such as materials) that support optional textures. +/// +/// Defaults to a 1x1 fully opaque white texture, (1.0, 1.0, 1.0, 1.0) which makes multiplying +/// it with other colors a no-op. +#[derive(Resource)] +pub struct FallbackImage { + /// Fallback image for [`TextureViewDimension::D1`]. + pub d1: GpuImage, + /// Fallback image for [`TextureViewDimension::D2`]. + pub d2: GpuImage, + /// Fallback image for [`TextureViewDimension::D2Array`]. + pub d2_array: GpuImage, + /// Fallback image for [`TextureViewDimension::Cube`]. + pub cube: GpuImage, + /// Fallback image for [`TextureViewDimension::CubeArray`]. + pub cube_array: GpuImage, + /// Fallback image for [`TextureViewDimension::D3`]. + pub d3: GpuImage, +} + +impl FallbackImage { + /// Returns the appropriate fallback image for the given texture dimension. + pub fn get(&self, texture_dimension: TextureViewDimension) -> &GpuImage { + match texture_dimension { + TextureViewDimension::D1 => &self.d1, + TextureViewDimension::D2 => &self.d2, + TextureViewDimension::D2Array => &self.d2_array, + TextureViewDimension::Cube => &self.cube, + TextureViewDimension::CubeArray => &self.cube_array, + TextureViewDimension::D3 => &self.d3, + } + } +} + +/// A [`RenderApp`](crate::RenderApp) resource that contains a _zero-filled_ "fallback image", +/// which can be used in place of [`FallbackImage`], when a fully transparent or black fallback +/// is required instead of fully opaque white. +/// +/// Defaults to a 1x1 fully transparent black texture, (0.0, 0.0, 0.0, 0.0) which makes adding +/// or alpha-blending it to other colors a no-op. +#[derive(Resource, Deref)] +pub struct FallbackImageZero(GpuImage); + +/// A [`RenderApp`](crate::RenderApp) resource that contains a "cubemap fallback image", +/// which can be used in situations where an image was not explicitly defined. The most common +/// use case is [`AsBindGroup`] implementations (such as materials) that support optional textures. +#[derive(Resource, Deref)] +pub struct FallbackImageCubemap(GpuImage); + +fn fallback_image_new( + render_device: &RenderDevice, + render_queue: &RenderQueue, + default_sampler: &DefaultImageSampler, + format: TextureFormat, + dimension: TextureViewDimension, + samples: u32, + value: u8, +) -> GpuImage { + // TODO make this configurable per channel + + let extents = Extent3d { + width: 1, + height: 1, + depth_or_array_layers: match dimension { + TextureViewDimension::Cube | TextureViewDimension::CubeArray => 6, + _ => 1, + }, + }; + + // We can't create textures with data when it's a depth texture or when using multiple samples + let create_texture_with_data = !format.is_depth_stencil_format() && samples == 1; + + let image_dimension = dimension.compatible_texture_dimension(); + let mut image = if create_texture_with_data { + let data = vec![value; format.pixel_size().unwrap_or(0)]; + Image::new_fill( + extents, + image_dimension, + &data, + format, + RenderAssetUsages::RENDER_WORLD, + ) + } else { + let mut image = Image::default_uninit(); + image.texture_descriptor.dimension = TextureDimension::D2; + image.texture_descriptor.size = extents; + image.texture_descriptor.format = format; + image + }; + image.texture_descriptor.sample_count = samples; + if image_dimension == TextureDimension::D2 { + image.texture_descriptor.usage |= TextureUsages::RENDER_ATTACHMENT; + } + + let texture = if create_texture_with_data { + render_device.create_texture_with_data( + render_queue, + &image.texture_descriptor, + TextureDataOrder::default(), + &image.data.expect("Image has no data"), + ) + } else { + render_device.create_texture(&image.texture_descriptor) + }; + + let texture_view = texture.create_view(&TextureViewDescriptor { + dimension: Some(dimension), + array_layer_count: Some(extents.depth_or_array_layers), + ..TextureViewDescriptor::default() + }); + let sampler = match image.sampler { + ImageSampler::Default => (**default_sampler).clone(), + ImageSampler::Descriptor(ref descriptor) => { + render_device.create_sampler(&descriptor.as_wgpu()) + } + }; + GpuImage { + texture, + texture_view, + texture_format: image.texture_descriptor.format, + sampler, + size: image.texture_descriptor.size, + mip_level_count: image.texture_descriptor.mip_level_count, + } +} + +impl FromWorld for FallbackImage { + fn from_world(world: &mut bevy_ecs::prelude::World) -> Self { + let render_device = world.resource::(); + let render_queue = world.resource::(); + let default_sampler = world.resource::(); + Self { + d1: fallback_image_new( + render_device, + render_queue, + default_sampler, + TextureFormat::bevy_default(), + TextureViewDimension::D1, + 1, + 255, + ), + d2: fallback_image_new( + render_device, + render_queue, + default_sampler, + TextureFormat::bevy_default(), + TextureViewDimension::D2, + 1, + 255, + ), + d2_array: fallback_image_new( + render_device, + render_queue, + default_sampler, + TextureFormat::bevy_default(), + TextureViewDimension::D2Array, + 1, + 255, + ), + cube: fallback_image_new( + render_device, + render_queue, + default_sampler, + TextureFormat::bevy_default(), + TextureViewDimension::Cube, + 1, + 255, + ), + cube_array: fallback_image_new( + render_device, + render_queue, + default_sampler, + TextureFormat::bevy_default(), + TextureViewDimension::CubeArray, + 1, + 255, + ), + d3: fallback_image_new( + render_device, + render_queue, + default_sampler, + TextureFormat::bevy_default(), + TextureViewDimension::D3, + 1, + 255, + ), + } + } +} + +impl FromWorld for FallbackImageZero { + fn from_world(world: &mut bevy_ecs::prelude::World) -> Self { + let render_device = world.resource::(); + let render_queue = world.resource::(); + let default_sampler = world.resource::(); + Self(fallback_image_new( + render_device, + render_queue, + default_sampler, + TextureFormat::bevy_default(), + TextureViewDimension::D2, + 1, + 0, + )) + } +} + +impl FromWorld for FallbackImageCubemap { + fn from_world(world: &mut bevy_ecs::prelude::World) -> Self { + let render_device = world.resource::(); + let render_queue = world.resource::(); + let default_sampler = world.resource::(); + Self(fallback_image_new( + render_device, + render_queue, + default_sampler, + TextureFormat::bevy_default(), + TextureViewDimension::Cube, + 1, + 255, + )) + } +} + +/// A Cache of fallback textures that uses the sample count and `TextureFormat` as a key +/// +/// # WARNING +/// Images using MSAA with sample count > 1 are not initialized with data, therefore, +/// you shouldn't sample them before writing data to them first. +#[derive(Resource, Deref, DerefMut, Default)] +pub struct FallbackImageFormatMsaaCache(HashMap<(u32, TextureFormat), GpuImage>); + +#[derive(SystemParam)] +pub struct FallbackImageMsaa<'w> { + cache: ResMut<'w, FallbackImageFormatMsaaCache>, + render_device: Res<'w, RenderDevice>, + render_queue: Res<'w, RenderQueue>, + default_sampler: Res<'w, DefaultImageSampler>, +} + +impl<'w> FallbackImageMsaa<'w> { + pub fn image_for_samplecount(&mut self, sample_count: u32, format: TextureFormat) -> &GpuImage { + self.cache.entry((sample_count, format)).or_insert_with(|| { + fallback_image_new( + &self.render_device, + &self.render_queue, + &self.default_sampler, + format, + TextureViewDimension::D2, + sample_count, + 255, + ) + }) + } +} diff --git a/crates/libmarathon/src/render/texture/gpu_image.rs b/crates/libmarathon/src/render/texture/gpu_image.rs new file mode 100644 index 0000000..f72ad79 --- /dev/null +++ b/crates/libmarathon/src/render/texture/gpu_image.rs @@ -0,0 +1,131 @@ +use crate::render::{ + render_asset::{PrepareAssetError, RenderAsset}, + render_resource::{DefaultImageSampler, Sampler, Texture, TextureView}, + renderer::{RenderDevice, RenderQueue}, +}; +use bevy_asset::{AssetId, RenderAssetUsages}; +use bevy_ecs::system::{lifetimeless::SRes, SystemParamItem}; +use bevy_image::{Image, ImageSampler}; +use bevy_math::{AspectRatio, UVec2}; +use tracing::warn; +use wgpu::{Extent3d, TextureFormat, TextureViewDescriptor}; + +/// The GPU-representation of an [`Image`]. +/// Consists of the [`Texture`], its [`TextureView`] and the corresponding [`Sampler`], and the texture's size. +#[derive(Debug, Clone)] +pub struct GpuImage { + pub texture: Texture, + pub texture_view: TextureView, + pub texture_format: TextureFormat, + pub sampler: Sampler, + pub size: Extent3d, + pub mip_level_count: u32, +} + +impl RenderAsset for GpuImage { + type SourceAsset = Image; + type Param = ( + SRes, + SRes, + SRes, + ); + + #[inline] + fn asset_usage(image: &Self::SourceAsset) -> RenderAssetUsages { + image.asset_usage + } + + #[inline] + fn byte_len(image: &Self::SourceAsset) -> Option { + image.data.as_ref().map(Vec::len) + } + + /// Converts the extracted image into a [`GpuImage`]. + fn prepare_asset( + image: Self::SourceAsset, + _: AssetId, + (render_device, render_queue, default_sampler): &mut SystemParamItem, + previous_asset: Option<&Self>, + ) -> Result> { + let texture = if let Some(ref data) = image.data { + render_device.create_texture_with_data( + render_queue, + &image.texture_descriptor, + image.data_order, + data, + ) + } else { + let new_texture = render_device.create_texture(&image.texture_descriptor); + if image.copy_on_resize { + if let Some(previous) = previous_asset { + let mut command_encoder = + render_device.create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("copy_image_on_resize"), + }); + let copy_size = Extent3d { + width: image.texture_descriptor.size.width.min(previous.size.width), + height: image + .texture_descriptor + .size + .height + .min(previous.size.height), + depth_or_array_layers: image + .texture_descriptor + .size + .depth_or_array_layers + .min(previous.size.depth_or_array_layers), + }; + + command_encoder.copy_texture_to_texture( + previous.texture.as_image_copy(), + new_texture.as_image_copy(), + copy_size, + ); + render_queue.submit([command_encoder.finish()]); + } else { + warn!("No previous asset to copy from for image: {:?}", image); + } + } + new_texture + }; + + let texture_view = texture.create_view( + image + .texture_view_descriptor + .or_else(|| Some(TextureViewDescriptor::default())) + .as_ref() + .unwrap(), + ); + let sampler = match image.sampler { + ImageSampler::Default => (***default_sampler).clone(), + ImageSampler::Descriptor(descriptor) => { + render_device.create_sampler(&descriptor.as_wgpu()) + } + }; + + Ok(GpuImage { + texture, + texture_view, + texture_format: image.texture_descriptor.format, + sampler, + size: image.texture_descriptor.size, + mip_level_count: image.texture_descriptor.mip_level_count, + }) + } +} + +impl GpuImage { + /// Returns the aspect ratio (width / height) of a 2D image. + #[inline] + pub fn aspect_ratio(&self) -> AspectRatio { + AspectRatio::try_from_pixels(self.size.width, self.size.height).expect( + "Failed to calculate aspect ratio: Image dimensions must be positive, non-zero values", + ) + } + + /// Returns the size of a 2D image. + #[inline] + pub fn size_2d(&self) -> UVec2 { + UVec2::new(self.size.width, self.size.height) + } +} diff --git a/crates/libmarathon/src/render/texture/manual_texture_view.rs b/crates/libmarathon/src/render/texture/manual_texture_view.rs new file mode 100644 index 0000000..898d041 --- /dev/null +++ b/crates/libmarathon/src/render/texture/manual_texture_view.rs @@ -0,0 +1,68 @@ +use bevy_camera::ManualTextureViewHandle; +use bevy_ecs::{prelude::Component, resource::Resource}; +use bevy_image::BevyDefault; +use bevy_math::UVec2; +use bevy_platform::collections::HashMap; +use macros::ExtractResource; +use wgpu::TextureFormat; + +use crate::render::render_resource::TextureView; + +/// A manually managed [`TextureView`] for use as a [`bevy_camera::RenderTarget`]. +#[derive(Debug, Clone, Component)] +pub struct ManualTextureView { + pub texture_view: TextureView, + pub size: UVec2, + pub format: TextureFormat, +} + +impl ManualTextureView { + pub fn with_default_format(texture_view: TextureView, size: UVec2) -> Self { + Self { + texture_view, + size, + format: TextureFormat::bevy_default(), + } + } +} + +/// Resource that stores manually managed [`ManualTextureView`]s for use as a [`RenderTarget`](bevy_camera::RenderTarget). +/// This type dereferences to a `HashMap`. +/// To add a new texture view, pick a new [`ManualTextureViewHandle`] and insert it into the map. +/// Then, to render to the view, set a [`Camera`](bevy_camera::Camera)s `target` to `RenderTarget::TextureView(handle)`. +/// ```ignore +/// # use bevy_ecs::prelude::*; +/// # let mut world = World::default(); +/// # world.insert_resource(ManualTextureViews::default()); +/// # let texture_view = todo!(); +/// let manual_views = world.resource_mut::(); +/// let manual_view = ManualTextureView::with_default_format(texture_view, UVec2::new(1024, 1024)); +/// +/// // Choose an unused handle value; it's likely only you are inserting manual views. +/// const MANUAL_VIEW_HANDLE: ManualTextureViewHandle = ManualTextureViewHandle::new(42); +/// manual_views.insert(MANUAL_VIEW_HANDLE, manual_view); +/// +/// // Now you can spawn a Cemera that renders to the manual view: +/// # use bevy_camera::{Camera, RenderTarget}; +/// world.spawn(Camera { +/// target: RenderTarget::TextureView(MANUAL_VIEW_HANDLE), +/// ..Default::default() +/// }); +/// ``` +/// Bevy will then use the `ManualTextureViews` resource to find your texture view and render to it. +#[derive(Default, Clone, Resource, ExtractResource)] +pub struct ManualTextureViews(HashMap); + +impl core::ops::Deref for ManualTextureViews { + type Target = HashMap; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl core::ops::DerefMut for ManualTextureViews { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} diff --git a/crates/libmarathon/src/render/texture/mod.rs b/crates/libmarathon/src/render/texture/mod.rs new file mode 100644 index 0000000..13e042d --- /dev/null +++ b/crates/libmarathon/src/render/texture/mod.rs @@ -0,0 +1,73 @@ +mod fallback_image; +mod gpu_image; +mod manual_texture_view; +mod texture_attachment; +mod texture_cache; + +pub use crate::render::render_resource::DefaultImageSampler; +use bevy_image::{CompressedImageFormatSupport, CompressedImageFormats, ImageLoader, ImagePlugin}; +pub use fallback_image::*; +pub use gpu_image::*; +pub use manual_texture_view::*; +pub use texture_attachment::*; +pub use texture_cache::*; + +use crate::render::{ + extract_resource::ExtractResourcePlugin, render_asset::RenderAssetPlugin, + renderer::RenderDevice, Render, RenderApp, RenderSystems, +}; +use bevy_app::{App, Plugin}; +use bevy_asset::AssetApp; +use bevy_ecs::prelude::*; +use tracing::warn; + +#[derive(Default)] +pub struct TexturePlugin; + +impl Plugin for TexturePlugin { + fn build(&self, app: &mut App) { + app.add_plugins(( + RenderAssetPlugin::::default(), + ExtractResourcePlugin::::default(), + )) + .init_resource::(); + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + render_app.init_resource::().add_systems( + Render, + update_texture_cache_system.in_set(RenderSystems::Cleanup), + ); + } + } + + fn finish(&self, app: &mut App) { + if !ImageLoader::SUPPORTED_FORMATS.is_empty() { + let supported_compressed_formats = if let Some(resource) = + app.world().get_resource::() + { + resource.0 + } else { + warn!("CompressedImageFormatSupport resource not found. It should either be initialized in finish() of \ + RenderPlugin, or manually if not using the RenderPlugin or the WGPU backend."); + CompressedImageFormats::NONE + }; + + app.register_asset_loader(ImageLoader::new(supported_compressed_formats)); + } + let default_sampler = app.get_added_plugins::()[0] + .default_sampler + .clone(); + + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + let default_sampler = { + let device = render_app.world().resource::(); + device.create_sampler(&default_sampler.as_wgpu()) + }; + render_app + .insert_resource(DefaultImageSampler(default_sampler)) + .init_resource::() + .init_resource::() + .init_resource::() + .init_resource::(); + } + } +} diff --git a/crates/libmarathon/src/render/texture/texture_attachment.rs b/crates/libmarathon/src/render/texture/texture_attachment.rs new file mode 100644 index 0000000..fb5a3cf --- /dev/null +++ b/crates/libmarathon/src/render/texture/texture_attachment.rs @@ -0,0 +1,162 @@ +use super::CachedTexture; +use crate::render::render_resource::{TextureFormat, TextureView}; +use std::sync::Arc; +use bevy_color::LinearRgba; +use core::sync::atomic::{AtomicBool, Ordering}; +use wgpu::{ + LoadOp, Operations, RenderPassColorAttachment, RenderPassDepthStencilAttachment, StoreOp, +}; + +/// A wrapper for a [`CachedTexture`] that is used as a [`RenderPassColorAttachment`]. +#[derive(Clone)] +pub struct ColorAttachment { + pub texture: CachedTexture, + pub resolve_target: Option, + clear_color: Option, + is_first_call: Arc, +} + +impl ColorAttachment { + pub fn new( + texture: CachedTexture, + resolve_target: Option, + clear_color: Option, + ) -> Self { + Self { + texture, + resolve_target, + clear_color, + is_first_call: Arc::new(AtomicBool::new(true)), + } + } + + /// Get this texture view as an attachment. The attachment will be cleared with a value of + /// `clear_color` if this is the first time calling this function, otherwise it will be loaded. + /// + /// The returned attachment will always have writing enabled (`store: StoreOp::Load`). + pub fn get_attachment(&self) -> RenderPassColorAttachment<'_> { + if let Some(resolve_target) = self.resolve_target.as_ref() { + let first_call = self.is_first_call.fetch_and(false, Ordering::SeqCst); + + RenderPassColorAttachment { + view: &resolve_target.default_view, + depth_slice: None, + resolve_target: Some(&self.texture.default_view), + ops: Operations { + load: match (self.clear_color, first_call) { + (Some(clear_color), true) => LoadOp::Clear(clear_color.into()), + (None, _) | (Some(_), false) => LoadOp::Load, + }, + store: StoreOp::Store, + }, + } + } else { + self.get_unsampled_attachment() + } + } + + /// Get this texture view as an attachment, without the resolve target. The attachment will be cleared with + /// a value of `clear_color` if this is the first time calling this function, otherwise it will be loaded. + /// + /// The returned attachment will always have writing enabled (`store: StoreOp::Load`). + pub fn get_unsampled_attachment(&self) -> RenderPassColorAttachment<'_> { + let first_call = self.is_first_call.fetch_and(false, Ordering::SeqCst); + + RenderPassColorAttachment { + view: &self.texture.default_view, + depth_slice: None, + resolve_target: None, + ops: Operations { + load: match (self.clear_color, first_call) { + (Some(clear_color), true) => LoadOp::Clear(clear_color.into()), + (None, _) | (Some(_), false) => LoadOp::Load, + }, + store: StoreOp::Store, + }, + } + } + + pub(crate) fn mark_as_cleared(&self) { + self.is_first_call.store(false, Ordering::SeqCst); + } +} + +/// A wrapper for a [`TextureView`] that is used as a depth-only [`RenderPassDepthStencilAttachment`]. +#[derive(Clone)] +pub struct DepthAttachment { + pub view: TextureView, + clear_value: Option, + is_first_call: Arc, +} + +impl DepthAttachment { + pub fn new(view: TextureView, clear_value: Option) -> Self { + Self { + view, + clear_value, + is_first_call: Arc::new(AtomicBool::new(clear_value.is_some())), + } + } + + /// Get this texture view as an attachment. The attachment will be cleared with a value of + /// `clear_value` if this is the first time calling this function with `store` == [`StoreOp::Store`], + /// and a clear value was provided, otherwise it will be loaded. + pub fn get_attachment(&self, store: StoreOp) -> RenderPassDepthStencilAttachment<'_> { + let first_call = self + .is_first_call + .fetch_and(store != StoreOp::Store, Ordering::SeqCst); + + RenderPassDepthStencilAttachment { + view: &self.view, + depth_ops: Some(Operations { + load: if first_call { + // If first_call is true, then a clear value will always have been provided in the constructor + LoadOp::Clear(self.clear_value.unwrap()) + } else { + LoadOp::Load + }, + store, + }), + stencil_ops: None, + } + } +} + +/// A wrapper for a [`TextureView`] that is used as a [`RenderPassColorAttachment`] for a view +/// target's final output texture. +#[derive(Clone)] +pub struct OutputColorAttachment { + pub view: TextureView, + pub format: TextureFormat, + is_first_call: Arc, +} + +impl OutputColorAttachment { + pub fn new(view: TextureView, format: TextureFormat) -> Self { + Self { + view, + format, + is_first_call: Arc::new(AtomicBool::new(true)), + } + } + + /// Get this texture view as an attachment. The attachment will be cleared with a value of + /// the provided `clear_color` if this is the first time calling this function, otherwise it + /// will be loaded. + pub fn get_attachment(&self, clear_color: Option) -> RenderPassColorAttachment<'_> { + let first_call = self.is_first_call.fetch_and(false, Ordering::SeqCst); + + RenderPassColorAttachment { + view: &self.view, + depth_slice: None, + resolve_target: None, + ops: Operations { + load: match (clear_color, first_call) { + (Some(clear_color), true) => LoadOp::Clear(clear_color.into()), + (None, _) | (Some(_), false) => LoadOp::Load, + }, + store: StoreOp::Store, + }, + } + } +} diff --git a/crates/libmarathon/src/render/texture/texture_cache.rs b/crates/libmarathon/src/render/texture/texture_cache.rs new file mode 100644 index 0000000..6d79d4b --- /dev/null +++ b/crates/libmarathon/src/render/texture/texture_cache.rs @@ -0,0 +1,108 @@ +use crate::render::{ + render_resource::{Texture, TextureView}, + renderer::RenderDevice, +}; +use bevy_ecs::{prelude::ResMut, resource::Resource}; +use bevy_platform::collections::{hash_map::Entry, HashMap}; +use wgpu::{TextureDescriptor, TextureViewDescriptor}; + +/// The internal representation of a [`CachedTexture`] used to track whether it was recently used +/// and is currently taken. +struct CachedTextureMeta { + texture: Texture, + default_view: TextureView, + taken: bool, + frames_since_last_use: usize, +} + +/// A cached GPU [`Texture`] with corresponding [`TextureView`]. +/// +/// This is useful for textures that are created repeatedly (each frame) in the rendering process +/// to reduce the amount of GPU memory allocations. +#[derive(Clone)] +pub struct CachedTexture { + pub texture: Texture, + pub default_view: TextureView, +} + +/// This resource caches textures that are created repeatedly in the rendering process and +/// are only required for one frame. +#[derive(Resource, Default)] +pub struct TextureCache { + textures: HashMap, Vec>, +} + +impl TextureCache { + /// Retrieves a texture that matches the `descriptor`. If no matching one is found a new + /// [`CachedTexture`] is created. + pub fn get( + &mut self, + render_device: &RenderDevice, + descriptor: TextureDescriptor<'static>, + ) -> CachedTexture { + match self.textures.entry(descriptor) { + Entry::Occupied(mut entry) => { + for texture in entry.get_mut().iter_mut() { + if !texture.taken { + texture.frames_since_last_use = 0; + texture.taken = true; + return CachedTexture { + texture: texture.texture.clone(), + default_view: texture.default_view.clone(), + }; + } + } + + let texture = render_device.create_texture(&entry.key().clone()); + let default_view = texture.create_view(&TextureViewDescriptor::default()); + entry.get_mut().push(CachedTextureMeta { + texture: texture.clone(), + default_view: default_view.clone(), + frames_since_last_use: 0, + taken: true, + }); + CachedTexture { + texture, + default_view, + } + } + Entry::Vacant(entry) => { + let texture = render_device.create_texture(entry.key()); + let default_view = texture.create_view(&TextureViewDescriptor::default()); + entry.insert(vec![CachedTextureMeta { + texture: texture.clone(), + default_view: default_view.clone(), + taken: true, + frames_since_last_use: 0, + }]); + CachedTexture { + texture, + default_view, + } + } + } + } + + /// Returns `true` if the texture cache contains no textures. + pub fn is_empty(&self) -> bool { + self.textures.is_empty() + } + + /// Updates the cache and only retains recently used textures. + pub fn update(&mut self) { + self.textures.retain(|_, textures| { + for texture in textures.iter_mut() { + texture.frames_since_last_use += 1; + texture.taken = false; + } + + textures.retain(|texture| texture.frames_since_last_use < 3); + !textures.is_empty() + }); + } +} + +/// Updates the [`TextureCache`] to only retains recently used textures. +pub fn update_texture_cache_system(mut texture_cache: ResMut) { + texture_cache.update(); +} diff --git a/crates/libmarathon/src/render/tonemapping/lut_bindings.wgsl b/crates/libmarathon/src/render/tonemapping/lut_bindings.wgsl new file mode 100644 index 0000000..997f9ef --- /dev/null +++ b/crates/libmarathon/src/render/tonemapping/lut_bindings.wgsl @@ -0,0 +1,5 @@ +#define_import_path bevy_core_pipeline::tonemapping_lut_bindings + +@group(0) @binding(#TONEMAPPING_LUT_TEXTURE_BINDING_INDEX) var dt_lut_texture: texture_3d; +@group(0) @binding(#TONEMAPPING_LUT_SAMPLER_BINDING_INDEX) var dt_lut_sampler: sampler; + diff --git a/crates/libmarathon/src/render/tonemapping/luts/AgX-default_contrast.ktx2 b/crates/libmarathon/src/render/tonemapping/luts/AgX-default_contrast.ktx2 new file mode 100644 index 0000000..040fb1d Binary files /dev/null and b/crates/libmarathon/src/render/tonemapping/luts/AgX-default_contrast.ktx2 differ diff --git a/crates/libmarathon/src/render/tonemapping/luts/Blender_-11_12.ktx2 b/crates/libmarathon/src/render/tonemapping/luts/Blender_-11_12.ktx2 new file mode 100644 index 0000000..db07c84 Binary files /dev/null and b/crates/libmarathon/src/render/tonemapping/luts/Blender_-11_12.ktx2 differ diff --git a/crates/libmarathon/src/render/tonemapping/luts/info.txt b/crates/libmarathon/src/render/tonemapping/luts/info.txt new file mode 100644 index 0000000..e3b6b8a --- /dev/null +++ b/crates/libmarathon/src/render/tonemapping/luts/info.txt @@ -0,0 +1,22 @@ +--- Process for recreating AgX-default_contrast.ktx2 --- +Download: +https://github.com/MrLixm/AgXc/blob/898198e0490b0551ed81412a0c22e0b72fffb7cd/obs/obs-script/AgX-default_contrast.lut.png +Convert to vertical strip exr with: +https://gist.github.com/DGriffin91/fc8e0cfd55aaa175ac10199403bc19b8 +Convert exr to 3D ktx2 with: +https://gist.github.com/DGriffin91/49401c43378b58bce32059291097d4ca + +--- Process for recreating tony_mc_mapface.ktx2 --- +Download: +https://github.com/h3r2tic/tony-mc-mapface/blob/909e51c8a74251fd828770248476cb084081e08c/tony_mc_mapface.dds +Convert dds to 3D ktx2 with: +https://gist.github.com/DGriffin91/49401c43378b58bce32059291097d4ca + +--- Process for recreating Blender_-11_12.ktx2 --- +Create LUT stimulus with: +https://gist.github.com/DGriffin91/e119bf32b520e219f6e102a6eba4a0cf +Open LUT image in Blender's image editor and make sure color space is set to linear. +Export from Blender as 32bit EXR, override color space to Filmic sRGB. +Import EXR back into blender set color space to sRGB, then export as 32bit EXR override color space to linear. +Convert exr to 3D ktx2 with: +https://gist.github.com/DGriffin91/49401c43378b58bce32059291097d4ca diff --git a/crates/libmarathon/src/render/tonemapping/luts/tony_mc_mapface.ktx2 b/crates/libmarathon/src/render/tonemapping/luts/tony_mc_mapface.ktx2 new file mode 100644 index 0000000..1437594 Binary files /dev/null and b/crates/libmarathon/src/render/tonemapping/luts/tony_mc_mapface.ktx2 differ diff --git a/crates/libmarathon/src/render/tonemapping/mod.rs b/crates/libmarathon/src/render/tonemapping/mod.rs new file mode 100644 index 0000000..272dcaa --- /dev/null +++ b/crates/libmarathon/src/render/tonemapping/mod.rs @@ -0,0 +1,456 @@ +use bevy_app::prelude::*; +use bevy_asset::{ + embedded_asset, load_embedded_asset, AssetServer, Assets, Handle, RenderAssetUsages, +}; +use bevy_camera::Camera; +use bevy_ecs::prelude::*; +use bevy_image::{CompressedImageFormats, Image, ImageSampler, ImageType}; +use bevy_reflect::{std_traits::ReflectDefault, Reflect}; +use crate::render::{ + extract_component::{ExtractComponent, ExtractComponentPlugin}, + extract_resource::{ExtractResource, ExtractResourcePlugin}, + render_asset::RenderAssets, + render_resource::{ + binding_types::{sampler, texture_2d, texture_3d, uniform_buffer}, + *, + }, + renderer::RenderDevice, + texture::{FallbackImage, GpuImage}, + view::{ExtractedView, ViewTarget, ViewUniform}, + Render, RenderApp, RenderStartup, RenderSystems, +}; +use bevy_shader::{load_shader_library, Shader, ShaderDefVal}; +use bitflags::bitflags; +#[cfg(not(feature = "tonemapping_luts"))] +use tracing::error; + +mod node; + +use bevy_utils::default; +pub use node::TonemappingNode; + +use crate::render::FullscreenShader; + +/// 3D LUT (look up table) textures used for tonemapping +#[derive(Resource, Clone, ExtractResource)] +pub struct TonemappingLuts { + pub blender_filmic: Handle, + pub agx: Handle, + pub tony_mc_mapface: Handle, +} + +pub struct TonemappingPlugin; + +impl Plugin for TonemappingPlugin { + fn build(&self, app: &mut App) { + load_shader_library!(app, "tonemapping_shared.wgsl"); + load_shader_library!(app, "lut_bindings.wgsl"); + + embedded_asset!(app, "tonemapping.wgsl"); + + if !app.world().is_resource_added::() { + let mut images = app.world_mut().resource_mut::>(); + + #[cfg(feature = "tonemapping_luts")] + let tonemapping_luts = { + TonemappingLuts { + blender_filmic: images.add(setup_tonemapping_lut_image( + include_bytes!("luts/Blender_-11_12.ktx2"), + ImageType::Extension("ktx2"), + )), + agx: images.add(setup_tonemapping_lut_image( + include_bytes!("luts/AgX-default_contrast.ktx2"), + ImageType::Extension("ktx2"), + )), + tony_mc_mapface: images.add(setup_tonemapping_lut_image( + include_bytes!("luts/tony_mc_mapface.ktx2"), + ImageType::Extension("ktx2"), + )), + } + }; + + #[cfg(not(feature = "tonemapping_luts"))] + let tonemapping_luts = { + let placeholder = images.add(lut_placeholder()); + TonemappingLuts { + blender_filmic: placeholder.clone(), + agx: placeholder.clone(), + tony_mc_mapface: placeholder, + } + }; + + app.insert_resource(tonemapping_luts); + } + + app.add_plugins(ExtractResourcePlugin::::default()); + + app.add_plugins(( + ExtractComponentPlugin::::default(), + ExtractComponentPlugin::::default(), + )); + + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + render_app + .init_resource::>() + .add_systems(RenderStartup, init_tonemapping_pipeline) + .add_systems( + Render, + prepare_view_tonemapping_pipelines.in_set(RenderSystems::Prepare), + ); + } +} + +#[derive(Resource)] +pub struct TonemappingPipeline { + texture_bind_group: BindGroupLayout, + sampler: Sampler, + fullscreen_shader: FullscreenShader, + fragment_shader: Handle, +} + +/// Optionally enables a tonemapping shader that attempts to map linear input stimulus into a perceptually uniform image for a given [`Camera`] entity. +#[derive( + Component, Debug, Hash, Clone, Copy, Reflect, Default, ExtractComponent, PartialEq, Eq, +)] +#[extract_component_filter(With)] +#[reflect(Component, Debug, Hash, Default, PartialEq)] +pub enum Tonemapping { + /// Bypass tonemapping. + None, + /// Suffers from lots hue shifting, brights don't desaturate naturally. + /// Bright primaries and secondaries don't desaturate at all. + Reinhard, + /// Suffers from hue shifting. Brights don't desaturate much at all across the spectrum. + ReinhardLuminance, + /// Same base implementation that Godot 4.0 uses for Tonemap ACES. + /// + /// Not neutral, has a very specific aesthetic, intentional and dramatic hue shifting. + /// Bright greens and reds turn orange. Bright blues turn magenta. + /// Significantly increased contrast. Brights desaturate across the spectrum. + AcesFitted, + /// By Troy Sobotka + /// + /// Very neutral. Image is somewhat desaturated when compared to other tonemappers. + /// Little to no hue shifting. Subtle [Abney shifting](https://en.wikipedia.org/wiki/Abney_effect). + /// NOTE: Requires the `tonemapping_luts` cargo feature. + AgX, + /// By Tomasz Stachowiak + /// Has little hue shifting in the darks and mids, but lots in the brights. Brights desaturate across the spectrum. + /// Is sort of between Reinhard and `ReinhardLuminance`. Conceptually similar to reinhard-jodie. + /// Designed as a compromise if you want e.g. decent skin tones in low light, but can't afford to re-do your + /// VFX to look good without hue shifting. + SomewhatBoringDisplayTransform, + /// Current Bevy default. + /// By Tomasz Stachowiak + /// + /// Very neutral. Subtle but intentional hue shifting. Brights desaturate across the spectrum. + /// Comment from author: + /// Tony is a display transform intended for real-time applications such as games. + /// It is intentionally boring, does not increase contrast or saturation, and stays close to the + /// input stimulus where compression isn't necessary. + /// Brightness-equivalent luminance of the input stimulus is compressed. The non-linearity resembles Reinhard. + /// Color hues are preserved during compression, except for a deliberate [Bezold–Brücke shift](https://en.wikipedia.org/wiki/Bezold%E2%80%93Br%C3%BCcke_shift). + /// To avoid posterization, selective desaturation is employed, with care to avoid the [Abney effect](https://en.wikipedia.org/wiki/Abney_effect). + /// NOTE: Requires the `tonemapping_luts` cargo feature. + #[default] + TonyMcMapface, + /// Default Filmic Display Transform from blender. + /// Somewhat neutral. Suffers from hue shifting. Brights desaturate across the spectrum. + /// NOTE: Requires the `tonemapping_luts` cargo feature. + BlenderFilmic, +} + +impl Tonemapping { + pub fn is_enabled(&self) -> bool { + *self != Tonemapping::None + } +} + +bitflags! { + /// Various flags describing what tonemapping needs to do. + /// + /// This allows the shader to skip unneeded steps. + #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] + pub struct TonemappingPipelineKeyFlags: u8 { + /// The hue needs to be changed. + const HUE_ROTATE = 0x01; + /// The white balance needs to be adjusted. + const WHITE_BALANCE = 0x02; + /// Saturation/contrast/gamma/gain/lift for one or more sections + /// (shadows, midtones, highlights) need to be adjusted. + const SECTIONAL_COLOR_GRADING = 0x04; + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub struct TonemappingPipelineKey { + deband_dither: DebandDither, + tonemapping: Tonemapping, + flags: TonemappingPipelineKeyFlags, +} + +impl SpecializedRenderPipeline for TonemappingPipeline { + type Key = TonemappingPipelineKey; + + fn specialize(&self, key: Self::Key) -> RenderPipelineDescriptor { + let mut shader_defs = Vec::new(); + + shader_defs.push(ShaderDefVal::UInt( + "TONEMAPPING_LUT_TEXTURE_BINDING_INDEX".into(), + 3, + )); + shader_defs.push(ShaderDefVal::UInt( + "TONEMAPPING_LUT_SAMPLER_BINDING_INDEX".into(), + 4, + )); + + if let DebandDither::Enabled = key.deband_dither { + shader_defs.push("DEBAND_DITHER".into()); + } + + // Define shader flags depending on the color grading options in use. + if key.flags.contains(TonemappingPipelineKeyFlags::HUE_ROTATE) { + shader_defs.push("HUE_ROTATE".into()); + } + if key + .flags + .contains(TonemappingPipelineKeyFlags::WHITE_BALANCE) + { + shader_defs.push("WHITE_BALANCE".into()); + } + if key + .flags + .contains(TonemappingPipelineKeyFlags::SECTIONAL_COLOR_GRADING) + { + shader_defs.push("SECTIONAL_COLOR_GRADING".into()); + } + + match key.tonemapping { + Tonemapping::None => shader_defs.push("TONEMAP_METHOD_NONE".into()), + Tonemapping::Reinhard => shader_defs.push("TONEMAP_METHOD_REINHARD".into()), + Tonemapping::ReinhardLuminance => { + shader_defs.push("TONEMAP_METHOD_REINHARD_LUMINANCE".into()); + } + Tonemapping::AcesFitted => shader_defs.push("TONEMAP_METHOD_ACES_FITTED".into()), + Tonemapping::AgX => { + #[cfg(not(feature = "tonemapping_luts"))] + error!( + "AgX tonemapping requires the `tonemapping_luts` feature. + Either enable the `tonemapping_luts` feature for bevy in `Cargo.toml` (recommended), + or use a different `Tonemapping` method for your `Camera2d`/`Camera3d`." + ); + shader_defs.push("TONEMAP_METHOD_AGX".into()); + } + Tonemapping::SomewhatBoringDisplayTransform => { + shader_defs.push("TONEMAP_METHOD_SOMEWHAT_BORING_DISPLAY_TRANSFORM".into()); + } + Tonemapping::TonyMcMapface => { + #[cfg(not(feature = "tonemapping_luts"))] + error!( + "TonyMcMapFace tonemapping requires the `tonemapping_luts` feature. + Either enable the `tonemapping_luts` feature for bevy in `Cargo.toml` (recommended), + or use a different `Tonemapping` method for your `Camera2d`/`Camera3d`." + ); + shader_defs.push("TONEMAP_METHOD_TONY_MC_MAPFACE".into()); + } + Tonemapping::BlenderFilmic => { + #[cfg(not(feature = "tonemapping_luts"))] + error!( + "BlenderFilmic tonemapping requires the `tonemapping_luts` feature. + Either enable the `tonemapping_luts` feature for bevy in `Cargo.toml` (recommended), + or use a different `Tonemapping` method for your `Camera2d`/`Camera3d`." + ); + shader_defs.push("TONEMAP_METHOD_BLENDER_FILMIC".into()); + } + } + RenderPipelineDescriptor { + label: Some("tonemapping pipeline".into()), + layout: vec![self.texture_bind_group.clone()], + vertex: self.fullscreen_shader.to_vertex_state(), + fragment: Some(FragmentState { + shader: self.fragment_shader.clone(), + shader_defs, + targets: vec![Some(ColorTargetState { + format: ViewTarget::TEXTURE_FORMAT_HDR, + blend: None, + write_mask: ColorWrites::ALL, + })], + ..default() + }), + ..default() + } + } +} + +pub fn init_tonemapping_pipeline( + mut commands: Commands, + render_device: Res, + fullscreen_shader: Res, + asset_server: Res, +) { + let mut entries = DynamicBindGroupLayoutEntries::new_with_indices( + ShaderStages::FRAGMENT, + ( + (0, uniform_buffer::(true)), + ( + 1, + texture_2d(TextureSampleType::Float { filterable: false }), + ), + (2, sampler(SamplerBindingType::NonFiltering)), + ), + ); + let lut_layout_entries = get_lut_bind_group_layout_entries(); + entries = entries.extend_with_indices(((3, lut_layout_entries[0]), (4, lut_layout_entries[1]))); + + let tonemap_texture_bind_group = render_device + .create_bind_group_layout("tonemapping_hdr_texture_bind_group_layout", &entries); + + let sampler = render_device.create_sampler(&SamplerDescriptor::default()); + + commands.insert_resource(TonemappingPipeline { + texture_bind_group: tonemap_texture_bind_group, + sampler, + fullscreen_shader: fullscreen_shader.clone(), + fragment_shader: load_embedded_asset!(asset_server.as_ref(), "tonemapping.wgsl"), + }); +} + +#[derive(Component)] +pub struct ViewTonemappingPipeline(CachedRenderPipelineId); + +pub fn prepare_view_tonemapping_pipelines( + mut commands: Commands, + pipeline_cache: Res, + mut pipelines: ResMut>, + upscaling_pipeline: Res, + view_targets: Query< + ( + Entity, + &ExtractedView, + Option<&Tonemapping>, + Option<&DebandDither>, + ), + With, + >, +) { + for (entity, view, tonemapping, dither) in view_targets.iter() { + // As an optimization, we omit parts of the shader that are unneeded. + let mut flags = TonemappingPipelineKeyFlags::empty(); + flags.set( + TonemappingPipelineKeyFlags::HUE_ROTATE, + view.color_grading.global.hue != 0.0, + ); + flags.set( + TonemappingPipelineKeyFlags::WHITE_BALANCE, + view.color_grading.global.temperature != 0.0 || view.color_grading.global.tint != 0.0, + ); + flags.set( + TonemappingPipelineKeyFlags::SECTIONAL_COLOR_GRADING, + view.color_grading + .all_sections() + .any(|section| *section != default()), + ); + + let key = TonemappingPipelineKey { + deband_dither: *dither.unwrap_or(&DebandDither::Disabled), + tonemapping: *tonemapping.unwrap_or(&Tonemapping::None), + flags, + }; + let pipeline = pipelines.specialize(&pipeline_cache, &upscaling_pipeline, key); + + commands + .entity(entity) + .insert(ViewTonemappingPipeline(pipeline)); + } +} +/// Enables a debanding shader that applies dithering to mitigate color banding in the final image for a given [`Camera`] entity. +#[derive( + Component, Debug, Hash, Clone, Copy, Reflect, Default, ExtractComponent, PartialEq, Eq, +)] +#[extract_component_filter(With)] +#[reflect(Component, Debug, Hash, Default, PartialEq)] +pub enum DebandDither { + #[default] + Disabled, + Enabled, +} + +pub fn get_lut_bindings<'a>( + images: &'a RenderAssets, + tonemapping_luts: &'a TonemappingLuts, + tonemapping: &Tonemapping, + fallback_image: &'a FallbackImage, +) -> (&'a TextureView, &'a Sampler) { + let image = match tonemapping { + // AgX lut texture used when tonemapping doesn't need a texture since it's very small (32x32x32) + Tonemapping::None + | Tonemapping::Reinhard + | Tonemapping::ReinhardLuminance + | Tonemapping::AcesFitted + | Tonemapping::AgX + | Tonemapping::SomewhatBoringDisplayTransform => &tonemapping_luts.agx, + Tonemapping::TonyMcMapface => &tonemapping_luts.tony_mc_mapface, + Tonemapping::BlenderFilmic => &tonemapping_luts.blender_filmic, + }; + let lut_image = images.get(image).unwrap_or(&fallback_image.d3); + (&lut_image.texture_view, &lut_image.sampler) +} + +pub fn get_lut_bind_group_layout_entries() -> [BindGroupLayoutEntryBuilder; 2] { + [ + texture_3d(TextureSampleType::Float { filterable: true }), + sampler(SamplerBindingType::Filtering), + ] +} + +#[expect(clippy::allow_attributes, reason = "`dead_code` is not always linted.")] +#[allow( + dead_code, + reason = "There is unused code when the `tonemapping_luts` feature is disabled." +)] +fn setup_tonemapping_lut_image(bytes: &[u8], image_type: ImageType) -> Image { + let image_sampler = ImageSampler::Descriptor(bevy_image::ImageSamplerDescriptor { + label: Some("Tonemapping LUT sampler".to_string()), + address_mode_u: bevy_image::ImageAddressMode::ClampToEdge, + address_mode_v: bevy_image::ImageAddressMode::ClampToEdge, + address_mode_w: bevy_image::ImageAddressMode::ClampToEdge, + mag_filter: bevy_image::ImageFilterMode::Linear, + min_filter: bevy_image::ImageFilterMode::Linear, + mipmap_filter: bevy_image::ImageFilterMode::Linear, + ..default() + }); + Image::from_buffer( + bytes, + image_type, + CompressedImageFormats::NONE, + false, + image_sampler, + RenderAssetUsages::RENDER_WORLD, + ) + .unwrap() +} + +pub fn lut_placeholder() -> Image { + let format = TextureFormat::Rgba8Unorm; + let data = vec![255, 0, 255, 255]; + Image { + data: Some(data), + data_order: TextureDataOrder::default(), + texture_descriptor: TextureDescriptor { + size: Extent3d::default(), + format, + dimension: TextureDimension::D3, + label: None, + mip_level_count: 1, + sample_count: 1, + usage: TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_DST, + view_formats: &[], + }, + sampler: ImageSampler::Default, + texture_view_descriptor: None, + asset_usage: RenderAssetUsages::RENDER_WORLD, + copy_on_resize: false, + } +} diff --git a/crates/libmarathon/src/render/tonemapping/node.rs b/crates/libmarathon/src/render/tonemapping/node.rs new file mode 100644 index 0000000..02c5f96 --- /dev/null +++ b/crates/libmarathon/src/render/tonemapping/node.rs @@ -0,0 +1,148 @@ +use std::sync::Mutex; + +use crate::render::tonemapping::{TonemappingLuts, TonemappingPipeline, ViewTonemappingPipeline}; + +use bevy_ecs::{prelude::*, query::QueryItem}; +use crate::render::{ + diagnostic::RecordDiagnostics, + render_asset::RenderAssets, + render_graph::{NodeRunError, RenderGraphContext, ViewNode}, + render_resource::{ + BindGroup, BindGroupEntries, BufferId, LoadOp, Operations, PipelineCache, + RenderPassColorAttachment, RenderPassDescriptor, StoreOp, TextureViewId, + }, + renderer::RenderContext, + texture::{FallbackImage, GpuImage}, + view::{ViewTarget, ViewUniformOffset, ViewUniforms}, +}; + +use super::{get_lut_bindings, Tonemapping}; + +#[derive(Default)] +pub struct TonemappingNode { + cached_bind_group: Mutex>, + last_tonemapping: Mutex>, +} + +impl ViewNode for TonemappingNode { + type ViewQuery = ( + &'static ViewUniformOffset, + &'static ViewTarget, + &'static ViewTonemappingPipeline, + &'static Tonemapping, + ); + + fn run( + &self, + _graph: &mut RenderGraphContext, + render_context: &mut RenderContext, + (view_uniform_offset, target, view_tonemapping_pipeline, tonemapping): QueryItem< + Self::ViewQuery, + >, + world: &World, + ) -> Result<(), NodeRunError> { + let pipeline_cache = world.resource::(); + let tonemapping_pipeline = world.resource::(); + let gpu_images = world.get_resource::>().unwrap(); + let fallback_image = world.resource::(); + let view_uniforms_resource = world.resource::(); + let view_uniforms = &view_uniforms_resource.uniforms; + let view_uniforms_id = view_uniforms.buffer().unwrap().id(); + + if *tonemapping == Tonemapping::None { + return Ok(()); + } + + if !target.is_hdr() { + return Ok(()); + } + + let Some(pipeline) = pipeline_cache.get_render_pipeline(view_tonemapping_pipeline.0) else { + return Ok(()); + }; + + let diagnostics = render_context.diagnostic_recorder(); + + let post_process = target.post_process_write(); + let source = post_process.source; + let destination = post_process.destination; + + let mut last_tonemapping = self.last_tonemapping.lock().unwrap(); + + let tonemapping_changed = if let Some(last_tonemapping) = &*last_tonemapping { + tonemapping != last_tonemapping + } else { + true + }; + if tonemapping_changed { + *last_tonemapping = Some(*tonemapping); + } + + let mut cached_bind_group = self.cached_bind_group.lock().unwrap(); + let bind_group = match &mut *cached_bind_group { + Some((buffer_id, texture_id, lut_id, bind_group)) + if view_uniforms_id == *buffer_id + && source.id() == *texture_id + && *lut_id != fallback_image.d3.texture_view.id() + && !tonemapping_changed => + { + bind_group + } + cached_bind_group => { + let tonemapping_luts = world.resource::(); + + let lut_bindings = + get_lut_bindings(gpu_images, tonemapping_luts, tonemapping, fallback_image); + + let bind_group = render_context.render_device().create_bind_group( + None, + &tonemapping_pipeline.texture_bind_group, + &BindGroupEntries::sequential(( + view_uniforms, + source, + &tonemapping_pipeline.sampler, + lut_bindings.0, + lut_bindings.1, + )), + ); + + let (_, _, _, bind_group) = cached_bind_group.insert(( + view_uniforms_id, + source.id(), + lut_bindings.0.id(), + bind_group, + )); + bind_group + } + }; + + let pass_descriptor = RenderPassDescriptor { + label: Some("tonemapping"), + color_attachments: &[Some(RenderPassColorAttachment { + view: destination, + depth_slice: None, + resolve_target: None, + ops: Operations { + load: LoadOp::Clear(Default::default()), // TODO shouldn't need to be cleared + store: StoreOp::Store, + }, + })], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + }; + + let mut render_pass = render_context + .command_encoder() + .begin_render_pass(&pass_descriptor); + let pass_span = diagnostics.pass_span(&mut render_pass, "tonemapping"); + + render_pass.set_pipeline(pipeline); + render_pass.set_bind_group(0, bind_group, &[view_uniform_offset.offset]); + render_pass.draw(0..3, 0..1); + + pass_span.end(&mut render_pass); + + Ok(()) + } +} diff --git a/crates/libmarathon/src/render/tonemapping/tonemapping.wgsl b/crates/libmarathon/src/render/tonemapping/tonemapping.wgsl new file mode 100644 index 0000000..015cd48 --- /dev/null +++ b/crates/libmarathon/src/render/tonemapping/tonemapping.wgsl @@ -0,0 +1,34 @@ +#define TONEMAPPING_PASS + +#import bevy_render::{ + view::View, + maths::powsafe, +} +#import bevy_core_pipeline::{ + fullscreen_vertex_shader::FullscreenVertexOutput, + tonemapping::{tone_mapping, screen_space_dither}, +} + +@group(0) @binding(0) var view: View; + +@group(0) @binding(1) var hdr_texture: texture_2d; +@group(0) @binding(2) var hdr_sampler: sampler; +@group(0) @binding(3) var dt_lut_texture: texture_3d; +@group(0) @binding(4) var dt_lut_sampler: sampler; + +@fragment +fn fragment(in: FullscreenVertexOutput) -> @location(0) vec4 { + let hdr_color = textureSample(hdr_texture, hdr_sampler, in.uv); + + var output_rgb = tone_mapping(hdr_color, view.color_grading).rgb; + +#ifdef DEBAND_DITHER + output_rgb = powsafe(output_rgb.rgb, 1.0 / 2.2); + output_rgb = output_rgb + screen_space_dither(in.position.xy); + // This conversion back to linear space is required because our output texture format is + // SRGB; the GPU will assume our output is linear and will apply an SRGB conversion. + output_rgb = powsafe(output_rgb.rgb, 2.2); +#endif + + return vec4(output_rgb, hdr_color.a); +} diff --git a/crates/libmarathon/src/render/tonemapping/tonemapping_shared.wgsl b/crates/libmarathon/src/render/tonemapping/tonemapping_shared.wgsl new file mode 100644 index 0000000..52d1ddc --- /dev/null +++ b/crates/libmarathon/src/render/tonemapping/tonemapping_shared.wgsl @@ -0,0 +1,405 @@ +#define_import_path bevy_core_pipeline::tonemapping + +#import bevy_render::{ + view::ColorGrading, + color_operations::{hsv_to_rgb, rgb_to_hsv}, + maths::{PI_2, powsafe}, +} + +#import bevy_core_pipeline::tonemapping_lut_bindings::{ + dt_lut_texture, + dt_lut_sampler, +} + +// Half the size of the crossfade region between shadows and midtones and +// between midtones and highlights. This value, 0.1, corresponds to 10% of the +// gamut on either side of the cutoff point. +const LEVEL_MARGIN: f32 = 0.1; + +// The inverse reciprocal of twice the above, used when scaling the midtone +// region. +const LEVEL_MARGIN_DIV: f32 = 0.5 / LEVEL_MARGIN; + +fn sample_current_lut(p: vec3) -> vec3 { + // Don't include code that will try to sample from LUTs if tonemap method doesn't require it + // Allows this file to be imported without necessarily needing the lut texture bindings +#ifdef TONEMAP_METHOD_AGX + return textureSampleLevel(dt_lut_texture, dt_lut_sampler, p, 0.0).rgb; +#else ifdef TONEMAP_METHOD_TONY_MC_MAPFACE + return textureSampleLevel(dt_lut_texture, dt_lut_sampler, p, 0.0).rgb; +#else ifdef TONEMAP_METHOD_BLENDER_FILMIC + return textureSampleLevel(dt_lut_texture, dt_lut_sampler, p, 0.0).rgb; +#else + return vec3(1.0, 0.0, 1.0); + #endif +} + +// -------------------------------------- +// --- SomewhatBoringDisplayTransform --- +// -------------------------------------- +// By Tomasz Stachowiak + +fn rgb_to_ycbcr(col: vec3) -> vec3 { + let m = mat3x3( + 0.2126, 0.7152, 0.0722, + -0.1146, -0.3854, 0.5, + 0.5, -0.4542, -0.0458 + ); + return col * m; +} + +fn ycbcr_to_rgb(col: vec3) -> vec3 { + let m = mat3x3( + 1.0, 0.0, 1.5748, + 1.0, -0.1873, -0.4681, + 1.0, 1.8556, 0.0 + ); + return max(vec3(0.0), col * m); +} + +fn tonemap_curve(v: f32) -> f32 { +#ifdef 0 + // Large linear part in the lows, but compresses highs. + float c = v + v * v + 0.5 * v * v * v; + return c / (1.0 + c); +#else + return 1.0 - exp(-v); +#endif +} + +fn tonemap_curve3_(v: vec3) -> vec3 { + return vec3(tonemap_curve(v.r), tonemap_curve(v.g), tonemap_curve(v.b)); +} + +fn somewhat_boring_display_transform(col: vec3) -> vec3 { + var boring_color = col; + let ycbcr = rgb_to_ycbcr(boring_color); + + let bt = tonemap_curve(length(ycbcr.yz) * 2.4); + var desat = max((bt - 0.7) * 0.8, 0.0); + desat *= desat; + + let desat_col = mix(boring_color.rgb, ycbcr.xxx, desat); + + let tm_luma = tonemap_curve(ycbcr.x); + let tm0 = boring_color.rgb * max(0.0, tm_luma / max(1e-5, tonemapping_luminance(boring_color.rgb))); + let final_mult = 0.97; + let tm1 = tonemap_curve3_(desat_col); + + boring_color = mix(tm0, tm1, bt * bt); + + return boring_color * final_mult; +} + +// ------------------------------------------ +// ------------- Tony McMapface ------------- +// ------------------------------------------ +// By Tomasz Stachowiak +// https://github.com/h3r2tic/tony-mc-mapface + +const TONY_MC_MAPFACE_LUT_DIMS: f32 = 48.0; + +fn sample_tony_mc_mapface_lut(stimulus: vec3) -> vec3 { + var uv = (stimulus / (stimulus + 1.0)) * (f32(TONY_MC_MAPFACE_LUT_DIMS - 1.0) / f32(TONY_MC_MAPFACE_LUT_DIMS)) + 0.5 / f32(TONY_MC_MAPFACE_LUT_DIMS); + return sample_current_lut(saturate(uv)).rgb; +} + +// --------------------------------- +// ---------- ACES Fitted ---------- +// --------------------------------- + +// Same base implementation that Godot 4.0 uses for Tonemap ACES. + +// https://github.com/TheRealMJP/BakingLab/blob/master/BakingLab/ACES.hlsl + +// The code in this file was originally written by Stephen Hill (@self_shadow), who deserves all +// credit for coming up with this fit and implementing it. Buy him a beer next time you see him. :) + +fn RRTAndODTFit(v: vec3) -> vec3 { + let a = v * (v + 0.0245786) - 0.000090537; + let b = v * (0.983729 * v + 0.4329510) + 0.238081; + return a / b; +} + +fn ACESFitted(color: vec3) -> vec3 { + var fitted_color = color; + + // sRGB => XYZ => D65_2_D60 => AP1 => RRT_SAT + let rgb_to_rrt = mat3x3( + vec3(0.59719, 0.35458, 0.04823), + vec3(0.07600, 0.90834, 0.01566), + vec3(0.02840, 0.13383, 0.83777) + ); + + // ODT_SAT => XYZ => D60_2_D65 => sRGB + let odt_to_rgb = mat3x3( + vec3(1.60475, -0.53108, -0.07367), + vec3(-0.10208, 1.10813, -0.00605), + vec3(-0.00327, -0.07276, 1.07602) + ); + + fitted_color *= rgb_to_rrt; + + // Apply RRT and ODT + fitted_color = RRTAndODTFit(fitted_color); + + fitted_color *= odt_to_rgb; + + // Clamp to [0, 1] + fitted_color = saturate(fitted_color); + + return fitted_color; +} + +// ------------------------------- +// ------------- AgX ------------- +// ------------------------------- +// By Troy Sobotka +// https://github.com/MrLixm/AgXc +// https://github.com/sobotka/AgX + +/* + Increase color saturation of the given color data. + :param color: expected sRGB primaries input + :param saturationAmount: expected 0-1 range with 1=neutral, 0=no saturation. + -- ref[2] [4] +*/ +fn saturation(color: vec3, saturationAmount: f32) -> vec3 { + let luma = tonemapping_luminance(color); + return mix(vec3(luma), color, vec3(saturationAmount)); +} + +/* + Output log domain encoded data. + Similar to OCIO lg2 AllocationTransform. + ref[0] +*/ +fn convertOpenDomainToNormalizedLog2_(color: vec3, minimum_ev: f32, maximum_ev: f32) -> vec3 { + let in_midgray = 0.18; + + // remove negative before log transform + var normalized_color = max(vec3(0.0), color); + // avoid infinite issue with log -- ref[1] + normalized_color = select(normalized_color, 0.00001525878 + normalized_color, normalized_color < vec3(0.00003051757)); + normalized_color = clamp( + log2(normalized_color / in_midgray), + vec3(minimum_ev), + vec3(maximum_ev) + ); + let total_exposure = maximum_ev - minimum_ev; + + return (normalized_color - minimum_ev) / total_exposure; +} + +// Inverse of above +fn convertNormalizedLog2ToOpenDomain(color: vec3, minimum_ev: f32, maximum_ev: f32) -> vec3 { + var open_color = color; + let in_midgray = 0.18; + let total_exposure = maximum_ev - minimum_ev; + + open_color = (open_color * total_exposure) + minimum_ev; + open_color = pow(vec3(2.0), open_color); + open_color = open_color * in_midgray; + + return open_color; +} + + +/*================= + Main processes +=================*/ + +// Prepare the data for display encoding. Converted to log domain. +fn applyAgXLog(Image: vec3) -> vec3 { + var prepared_image = max(vec3(0.0), Image); // clamp negatives + let r = dot(prepared_image, vec3(0.84247906, 0.0784336, 0.07922375)); + let g = dot(prepared_image, vec3(0.04232824, 0.87846864, 0.07916613)); + let b = dot(prepared_image, vec3(0.04237565, 0.0784336, 0.87914297)); + prepared_image = vec3(r, g, b); + + prepared_image = convertOpenDomainToNormalizedLog2_(prepared_image, -10.0, 6.5); + + prepared_image = clamp(prepared_image, vec3(0.0), vec3(1.0)); + return prepared_image; +} + +fn applyLUT3D(Image: vec3, block_size: f32) -> vec3 { + return sample_current_lut(Image * ((block_size - 1.0) / block_size) + 0.5 / block_size).rgb; +} + +// ------------------------- +// ------------------------- +// ------------------------- + +fn sample_blender_filmic_lut(stimulus: vec3) -> vec3 { + let block_size = 64.0; + let normalized = saturate(convertOpenDomainToNormalizedLog2_(stimulus, -11.0, 12.0)); + return applyLUT3D(normalized, block_size); +} + +// from https://64.github.io/tonemapping/ +// reinhard on RGB oversaturates colors +fn tonemapping_reinhard(color: vec3) -> vec3 { + return color / (1.0 + color); +} + +fn tonemapping_reinhard_extended(color: vec3, max_white: f32) -> vec3 { + let numerator = color * (1.0 + (color / vec3(max_white * max_white))); + return numerator / (1.0 + color); +} + +// luminance coefficients from Rec. 709. +// https://en.wikipedia.org/wiki/Rec._709 +fn tonemapping_luminance(v: vec3) -> f32 { + return dot(v, vec3(0.2126, 0.7152, 0.0722)); +} + +fn tonemapping_change_luminance(c_in: vec3, l_out: f32) -> vec3 { + let l_in = tonemapping_luminance(c_in); + return c_in * (l_out / l_in); +} + +fn tonemapping_reinhard_luminance(color: vec3) -> vec3 { + let l_old = tonemapping_luminance(color); + let l_new = l_old / (1.0 + l_old); + return tonemapping_change_luminance(color, l_new); +} + +fn rgb_to_srgb_simple(color: vec3) -> vec3 { + return pow(color, vec3(1.0 / 2.2)); +} + +// Source: Advanced VR Rendering, GDC 2015, Alex Vlachos, Valve, Slide 49 +// https://media.steampowered.com/apps/valve/2015/Alex_Vlachos_Advanced_VR_Rendering_GDC2015.pdf +fn screen_space_dither(frag_coord: vec2) -> vec3 { + var dither = vec3(dot(vec2(171.0, 231.0), frag_coord)).xxx; + dither = fract(dither.rgb / vec3(103.0, 71.0, 97.0)); + return (dither - 0.5) / 255.0; +} + +// Performs the "sectional" color grading: i.e. the color grading that applies +// individually to shadows, midtones, and highlights. +fn sectional_color_grading( + in: vec3, + color_grading: ptr, +) -> vec3 { + var color = in; + + // Determine whether the color is a shadow, midtone, or highlight. Colors + // close to the edges are considered a mix of both, to avoid sharp + // discontinuities. The formulas are taken from Blender's compositor. + + let level = (color.r + color.g + color.b) / 3.0; + + // Determine whether this color is a shadow, midtone, or highlight. If close + // to the cutoff points, blend between the two to avoid sharp color + // discontinuities. + var levels = vec3(0.0); + let midtone_range = (*color_grading).midtone_range; + if (level < midtone_range.x - LEVEL_MARGIN) { + levels.x = 1.0; + } else if (level < midtone_range.x + LEVEL_MARGIN) { + levels.y = ((level - midtone_range.x) * LEVEL_MARGIN_DIV) + 0.5; + levels.z = 1.0 - levels.y; + } else if (level < midtone_range.y - LEVEL_MARGIN) { + levels.y = 1.0; + } else if (level < midtone_range.y + LEVEL_MARGIN) { + levels.z = ((level - midtone_range.y) * LEVEL_MARGIN_DIV) + 0.5; + levels.y = 1.0 - levels.z; + } else { + levels.z = 1.0; + } + + // Calculate contrast/saturation/gamma/gain/lift. + let contrast = dot(levels, (*color_grading).contrast); + let saturation = dot(levels, (*color_grading).saturation); + let gamma = dot(levels, (*color_grading).gamma); + let gain = dot(levels, (*color_grading).gain); + let lift = dot(levels, (*color_grading).lift); + + // Adjust saturation and contrast. + let luma = tonemapping_luminance(color); + color = luma + saturation * (color - luma); + color = 0.5 + (color - 0.5) * contrast; + + // The [ASC CDL] formula for color correction. Given *i*, an input color, we + // have: + // + // out = (i × s + o)ⁿ + // + // Following the normal photographic naming convention, *gain* is the *s* + // factor, *lift* is the *o* term, and the inverse of *gamma* is the *n* + // exponent. + // + // [ASC CDL]: https://en.wikipedia.org/wiki/ASC_CDL#Combined_Function + color = powsafe(color * gain + lift, 1.0 / gamma); + + // Account for exposure. + color = color * powsafe(vec3(2.0), (*color_grading).exposure); + return max(color, vec3(0.0)); +} + +fn tone_mapping(in: vec4, in_color_grading: ColorGrading) -> vec4 { + var color = max(in.rgb, vec3(0.0)); + var color_grading = in_color_grading; // So we can take pointers to it. + + // Rotate hue if needed, by converting to and from HSV. Remember that hue is + // an angle, so it needs to be modulo 2π. +#ifdef HUE_ROTATE + var hsv = rgb_to_hsv(color); + hsv.r = (hsv.r + color_grading.hue) % PI_2; + color = hsv_to_rgb(hsv); +#endif + + // Perform white balance correction. Conveniently, this is a linear + // transform. The matrix was pre-calculated from the temperature and tint + // values on the CPU. +#ifdef WHITE_BALANCE + color = max(color_grading.balance * color, vec3(0.0)); +#endif + + // Perform the "sectional" color grading: i.e. the color grading that + // applies individually to shadows, midtones, and highlights. +#ifdef SECTIONAL_COLOR_GRADING + color = sectional_color_grading(color, &color_grading); +#else + // If we're not doing sectional color grading, the exposure might still need + // to be applied, for example when using auto exposure. + color = color * powsafe(vec3(2.0), color_grading.exposure); +#endif + + // tone_mapping +#ifdef TONEMAP_METHOD_NONE + color = color; +#else ifdef TONEMAP_METHOD_REINHARD + color = tonemapping_reinhard(color.rgb); +#else ifdef TONEMAP_METHOD_REINHARD_LUMINANCE + color = tonemapping_reinhard_luminance(color.rgb); +#else ifdef TONEMAP_METHOD_ACES_FITTED + color = ACESFitted(color.rgb); +#else ifdef TONEMAP_METHOD_AGX + color = applyAgXLog(color); + color = applyLUT3D(color, 32.0); +#else ifdef TONEMAP_METHOD_SOMEWHAT_BORING_DISPLAY_TRANSFORM + color = somewhat_boring_display_transform(color.rgb); +#else ifdef TONEMAP_METHOD_TONY_MC_MAPFACE + color = sample_tony_mc_mapface_lut(color); +#else ifdef TONEMAP_METHOD_BLENDER_FILMIC + color = sample_blender_filmic_lut(color.rgb); +#endif + + // Perceptual post tonemapping grading + color = saturation(color, color_grading.post_saturation); + + return vec4(color, in.a); +} + +// This is an **incredibly crude** approximation of the inverse of the tone mapping function. +// We assume here that there's a simple linear relationship between the input and output +// which is not true at all, but useful to at least preserve the overall luminance of colors +// when sampling from an already tonemapped image. (e.g. for transmissive materials when HDR is off) +fn approximate_inverse_tone_mapping(in: vec4, color_grading: ColorGrading) -> vec4 { + let out = tone_mapping(in, color_grading); + let approximate_ratio = length(in.rgb) / length(out.rgb); + return vec4(in.rgb * approximate_ratio, in.a); +} diff --git a/crates/libmarathon/src/render/upscaling/mod.rs b/crates/libmarathon/src/render/upscaling/mod.rs new file mode 100644 index 0000000..2a679ad --- /dev/null +++ b/crates/libmarathon/src/render/upscaling/mod.rs @@ -0,0 +1,88 @@ +use crate::render::blit::{BlitPipeline, BlitPipelineKey}; +use bevy_app::prelude::*; +use bevy_camera::CameraOutputMode; +use bevy_ecs::prelude::*; +use bevy_platform::collections::HashSet; +use crate::render::{ + camera::ExtractedCamera, render_resource::*, view::ViewTarget, Render, RenderApp, RenderSystems, +}; + +mod node; + +pub use node::UpscalingNode; + +pub struct UpscalingPlugin; + +impl Plugin for UpscalingPlugin { + fn build(&self, app: &mut App) { + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + render_app.add_systems( + Render, + // This system should probably technically be run *after* all of the other systems + // that might modify `PipelineCache` via interior mutability, but for now, + // we've chosen to simply ignore the ambiguities out of a desire for a better refactor + // and aversion to extensive and intrusive system ordering. + // See https://github.com/bevyengine/bevy/issues/14770 for more context. + prepare_view_upscaling_pipelines + .in_set(RenderSystems::Prepare) + .ambiguous_with_all(), + ); + } + } +} + +#[derive(Component)] +pub struct ViewUpscalingPipeline(CachedRenderPipelineId); + +fn prepare_view_upscaling_pipelines( + mut commands: Commands, + mut pipeline_cache: ResMut, + mut pipelines: ResMut>, + blit_pipeline: Res, + view_targets: Query<(Entity, &ViewTarget, Option<&ExtractedCamera>)>, +) { + let mut output_textures = >::default(); + for (entity, view_target, camera) in view_targets.iter() { + let out_texture_id = view_target.out_texture().id(); + let blend_state = if let Some(extracted_camera) = camera { + match extracted_camera.output_mode { + CameraOutputMode::Skip => None, + CameraOutputMode::Write { blend_state, .. } => { + let already_seen = output_textures.contains(&out_texture_id); + output_textures.insert(out_texture_id); + + match blend_state { + None => { + // If we've already seen this output for a camera and it doesn't have an output blend + // mode configured, default to alpha blend so that we don't accidentally overwrite + // the output texture + if already_seen { + Some(BlendState::ALPHA_BLENDING) + } else { + None + } + } + _ => blend_state, + } + } + } + } else { + output_textures.insert(out_texture_id); + None + }; + + let key = BlitPipelineKey { + texture_format: view_target.out_texture_format(), + blend_state, + samples: 1, + }; + let pipeline = pipelines.specialize(&pipeline_cache, &blit_pipeline, key); + + // Ensure the pipeline is loaded before continuing the frame to prevent frames without any GPU work submitted + pipeline_cache.block_on_render_pipeline(pipeline); + + commands + .entity(entity) + .insert(ViewUpscalingPipeline(pipeline)); + } +} diff --git a/crates/libmarathon/src/render/upscaling/node.rs b/crates/libmarathon/src/render/upscaling/node.rs new file mode 100644 index 0000000..57ac164 --- /dev/null +++ b/crates/libmarathon/src/render/upscaling/node.rs @@ -0,0 +1,104 @@ +use crate::render::{blit::BlitPipeline, upscaling::ViewUpscalingPipeline}; +use bevy_camera::{CameraOutputMode, ClearColor, ClearColorConfig}; +use bevy_ecs::{prelude::*, query::QueryItem}; +use crate::render::{ + camera::ExtractedCamera, + diagnostic::RecordDiagnostics, + render_graph::{NodeRunError, RenderGraphContext, ViewNode}, + render_resource::{BindGroup, PipelineCache, RenderPassDescriptor, TextureViewId}, + renderer::RenderContext, + view::ViewTarget, +}; +use std::sync::Mutex; + +#[derive(Default)] +pub struct UpscalingNode { + cached_texture_bind_group: Mutex>, +} + +impl ViewNode for UpscalingNode { + type ViewQuery = ( + &'static ViewTarget, + &'static ViewUpscalingPipeline, + Option<&'static ExtractedCamera>, + ); + + fn run( + &self, + _graph: &mut RenderGraphContext, + render_context: &mut RenderContext, + (target, upscaling_target, camera): QueryItem, + world: &World, + ) -> Result<(), NodeRunError> { + let pipeline_cache = world.resource::(); + let blit_pipeline = world.resource::(); + let clear_color_global = world.resource::(); + + let diagnostics = render_context.diagnostic_recorder(); + + let clear_color = if let Some(camera) = camera { + match camera.output_mode { + CameraOutputMode::Write { clear_color, .. } => clear_color, + CameraOutputMode::Skip => return Ok(()), + } + } else { + ClearColorConfig::Default + }; + let clear_color = match clear_color { + ClearColorConfig::Default => Some(clear_color_global.0), + ClearColorConfig::Custom(color) => Some(color), + ClearColorConfig::None => None, + }; + let converted_clear_color = clear_color.map(Into::into); + // texture to be upscaled to the output texture + let main_texture_view = target.main_texture_view(); + + let mut cached_bind_group = self.cached_texture_bind_group.lock().unwrap(); + let bind_group = match &mut *cached_bind_group { + Some((id, bind_group)) if main_texture_view.id() == *id => bind_group, + cached_bind_group => { + let bind_group = blit_pipeline + .create_bind_group(render_context.render_device(), main_texture_view); + + let (_, bind_group) = + cached_bind_group.insert((main_texture_view.id(), bind_group)); + bind_group + } + }; + + let Some(pipeline) = pipeline_cache.get_render_pipeline(upscaling_target.0) else { + return Ok(()); + }; + + let pass_descriptor = RenderPassDescriptor { + label: Some("upscaling"), + color_attachments: &[Some( + target.out_texture_color_attachment(converted_clear_color), + )], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + }; + + let mut render_pass = render_context + .command_encoder() + .begin_render_pass(&pass_descriptor); + let pass_span = diagnostics.pass_span(&mut render_pass, "upscaling"); + + if let Some(camera) = camera + && let Some(viewport) = &camera.viewport + { + let size = viewport.physical_size; + let position = viewport.physical_position; + render_pass.set_scissor_rect(position.x, position.y, size.x, size.y); + } + + render_pass.set_pipeline(pipeline); + render_pass.set_bind_group(0, bind_group, &[]); + render_pass.draw(0..3, 0..1); + + pass_span.end(&mut render_pass); + + Ok(()) + } +} diff --git a/crates/libmarathon/src/render/view/mod.rs b/crates/libmarathon/src/render/view/mod.rs new file mode 100644 index 0000000..d116455 --- /dev/null +++ b/crates/libmarathon/src/render/view/mod.rs @@ -0,0 +1,1135 @@ +pub mod visibility; +pub mod window; + +use bevy_camera::{ + primitives::Frustum, CameraMainTextureUsages, ClearColor, ClearColorConfig, Exposure, + MainPassResolutionOverride, NormalizedRenderTarget, +}; +use bevy_diagnostic::FrameCount; +pub use visibility::*; +pub use window::*; + +use crate::render::{ + camera::{ExtractedCamera, MipBias, NormalizedRenderTargetExt as _, TemporalJitter}, + experimental::occlusion_culling::OcclusionCulling, + extract_component::ExtractComponentPlugin, + render_asset::RenderAssets, + render_phase::ViewRangefinder3d, + render_resource::{DynamicUniformBuffer, ShaderType, Texture, TextureView}, + renderer::{RenderDevice, RenderQueue}, + sync_world::MainEntity, + texture::{ + CachedTexture, ColorAttachment, DepthAttachment, GpuImage, ManualTextureViews, + OutputColorAttachment, TextureCache, + }, + Render, RenderApp, RenderSystems, +}; +use std::sync::Arc; +use bevy_app::{App, Plugin}; +use bevy_color::LinearRgba; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::prelude::*; +use bevy_image::{BevyDefault as _, ToExtents}; +use bevy_math::{mat3, vec2, vec3, Mat3, Mat4, UVec4, Vec2, Vec3, Vec4, Vec4Swizzles}; +use bevy_platform::collections::{hash_map::Entry, HashMap}; +use bevy_reflect::{std_traits::ReflectDefault, Reflect}; +use macros::ExtractComponent; +use bevy_shader::load_shader_library; +use bevy_transform::components::GlobalTransform; +use core::{ + ops::Range, + sync::atomic::{AtomicUsize, Ordering}, +}; +use wgpu::{ + BufferUsages, RenderPassColorAttachment, RenderPassDepthStencilAttachment, StoreOp, + TextureDescriptor, TextureDimension, TextureFormat, TextureUsages, +}; + +/// The matrix that converts from the RGB to the LMS color space. +/// +/// To derive this, first we convert from RGB to [CIE 1931 XYZ]: +/// +/// ```text +/// ⎡ X ⎤ ⎡ 0.490 0.310 0.200 ⎤ ⎡ R ⎤ +/// ⎢ Y ⎥ = ⎢ 0.177 0.812 0.011 ⎥ ⎢ G ⎥ +/// ⎣ Z ⎦ ⎣ 0.000 0.010 0.990 ⎦ ⎣ B ⎦ +/// ``` +/// +/// Then we convert to LMS according to the [CAM16 standard matrix]: +/// +/// ```text +/// ⎡ L ⎤ ⎡ 0.401 0.650 -0.051 ⎤ ⎡ X ⎤ +/// ⎢ M ⎥ = ⎢ -0.250 1.204 0.046 ⎥ ⎢ Y ⎥ +/// ⎣ S ⎦ ⎣ -0.002 0.049 0.953 ⎦ ⎣ Z ⎦ +/// ``` +/// +/// The resulting matrix is just the concatenation of these two matrices, to do +/// the conversion in one step. +/// +/// [CIE 1931 XYZ]: https://en.wikipedia.org/wiki/CIE_1931_color_space +/// [CAM16 standard matrix]: https://en.wikipedia.org/wiki/LMS_color_space +static RGB_TO_LMS: Mat3 = mat3( + vec3(0.311692, 0.0905138, 0.00764433), + vec3(0.652085, 0.901341, 0.0486554), + vec3(0.0362225, 0.00814478, 0.943700), +); + +/// The inverse of the [`RGB_TO_LMS`] matrix, converting from the LMS color +/// space back to RGB. +static LMS_TO_RGB: Mat3 = mat3( + vec3(4.06305, -0.40791, -0.0118812), + vec3(-2.93241, 1.40437, -0.0486532), + vec3(-0.130646, 0.00353630, 1.0605344), +); + +/// The [CIE 1931] *xy* chromaticity coordinates of the [D65 white point]. +/// +/// [CIE 1931]: https://en.wikipedia.org/wiki/CIE_1931_color_space +/// [D65 white point]: https://en.wikipedia.org/wiki/Standard_illuminant#D65_values +static D65_XY: Vec2 = vec2(0.31272, 0.32903); + +/// The [D65 white point] in [LMS color space]. +/// +/// [LMS color space]: https://en.wikipedia.org/wiki/LMS_color_space +/// [D65 white point]: https://en.wikipedia.org/wiki/Standard_illuminant#D65_values +static D65_LMS: Vec3 = vec3(0.975538, 1.01648, 1.08475); + +pub struct ViewPlugin; + +impl Plugin for ViewPlugin { + fn build(&self, app: &mut App) { + load_shader_library!(app, "view.wgsl"); + + app + // NOTE: windows.is_changed() handles cases where a window was resized + .add_plugins(( + ExtractComponentPlugin::::default(), + ExtractComponentPlugin::::default(), + ExtractComponentPlugin::::default(), + RenderVisibilityRangePlugin, + )); + + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + render_app.add_systems( + Render, + ( + // `TextureView`s need to be dropped before reconfiguring window surfaces. + clear_view_attachments + .in_set(RenderSystems::ManageViews) + .before(create_surfaces), + prepare_view_attachments + .in_set(RenderSystems::ManageViews) + .before(prepare_view_targets) + .after(prepare_windows), + prepare_view_targets + .in_set(RenderSystems::ManageViews) + .after(prepare_windows) + .after(crate::render::render_asset::prepare_assets::) + .ambiguous_with(crate::render::camera::sort_cameras), // doesn't use `sorted_camera_index_for_target` + prepare_view_uniforms.in_set(RenderSystems::PrepareResources), + ), + ); + } + } + + fn finish(&self, app: &mut App) { + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + render_app + .init_resource::() + .init_resource::(); + } + } +} + +/// Component for configuring the number of samples for [Multi-Sample Anti-Aliasing](https://en.wikipedia.org/wiki/Multisample_anti-aliasing) +/// for a [`Camera`](bevy_camera::Camera). +/// +/// Defaults to 4 samples. A higher number of samples results in smoother edges. +/// +/// Some advanced rendering features may require that MSAA is disabled. +/// +/// Note that the web currently only supports 1 or 4 samples. +#[derive( + Component, + Default, + Clone, + Copy, + ExtractComponent, + Reflect, + PartialEq, + PartialOrd, + Eq, + Hash, + Debug, +)] +#[reflect(Component, Default, PartialEq, Hash, Debug)] +pub enum Msaa { + Off = 1, + Sample2 = 2, + #[default] + Sample4 = 4, + Sample8 = 8, +} + +impl Msaa { + #[inline] + pub fn samples(&self) -> u32 { + *self as u32 + } + + pub fn from_samples(samples: u32) -> Self { + match samples { + 1 => Msaa::Off, + 2 => Msaa::Sample2, + 4 => Msaa::Sample4, + 8 => Msaa::Sample8, + _ => panic!("Unsupported MSAA sample count: {samples}"), + } + } +} + +/// If this component is added to a camera, the camera will use an intermediate "high dynamic range" render texture. +/// This allows rendering with a wider range of lighting values. However, this does *not* affect +/// whether the camera will render with hdr display output (which bevy does not support currently) +/// and only affects the intermediate render texture. +#[derive( + Component, Default, Copy, Clone, ExtractComponent, Reflect, PartialEq, Eq, Hash, Debug, +)] +#[reflect(Component, Default, PartialEq, Hash, Debug)] +pub struct Hdr; + +/// An identifier for a view that is stable across frames. +/// +/// We can't use [`Entity`] for this because render world entities aren't +/// stable, and we can't use just [`MainEntity`] because some main world views +/// extract to multiple render world views. For example, a directional light +/// extracts to one render world view per cascade, and a point light extracts to +/// one render world view per cubemap face. So we pair the main entity with an +/// *auxiliary entity* and a *subview index*, which *together* uniquely identify +/// a view in the render world in a way that's stable from frame to frame. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct RetainedViewEntity { + /// The main entity that this view corresponds to. + pub main_entity: MainEntity, + + /// Another entity associated with the view entity. + /// + /// This is currently used for shadow cascades. If there are multiple + /// cameras, each camera needs to have its own set of shadow cascades. Thus + /// the light and subview index aren't themselves enough to uniquely + /// identify a shadow cascade: we need the camera that the cascade is + /// associated with as well. This entity stores that camera. + /// + /// If not present, this will be `MainEntity(Entity::PLACEHOLDER)`. + pub auxiliary_entity: MainEntity, + + /// The index of the view corresponding to the entity. + /// + /// For example, for point lights that cast shadows, this is the index of + /// the cubemap face (0 through 5 inclusive). For directional lights, this + /// is the index of the cascade. + pub subview_index: u32, +} + +impl RetainedViewEntity { + /// Creates a new [`RetainedViewEntity`] from the given main world entity, + /// auxiliary main world entity, and subview index. + /// + /// See [`RetainedViewEntity::subview_index`] for an explanation of what + /// `auxiliary_entity` and `subview_index` are. + pub fn new( + main_entity: MainEntity, + auxiliary_entity: Option, + subview_index: u32, + ) -> Self { + Self { + main_entity, + auxiliary_entity: auxiliary_entity.unwrap_or(Entity::PLACEHOLDER.into()), + subview_index, + } + } +} + +/// Describes a camera in the render world. +/// +/// Each entity in the main world can potentially extract to multiple subviews, +/// each of which has a [`RetainedViewEntity::subview_index`]. For instance, 3D +/// cameras extract to both a 3D camera subview with index 0 and a special UI +/// subview with index 1. Likewise, point lights with shadows extract to 6 +/// subviews, one for each side of the shadow cubemap. +#[derive(Component)] +pub struct ExtractedView { + /// The entity in the main world corresponding to this render world view. + pub retained_view_entity: RetainedViewEntity, + /// Typically a column-major right-handed projection matrix, one of either: + /// + /// Perspective (infinite reverse z) + /// ```text + /// f = 1 / tan(fov_y_radians / 2) + /// + /// ⎡ f / aspect 0 0 0 ⎤ + /// ⎢ 0 f 0 0 ⎥ + /// ⎢ 0 0 0 near ⎥ + /// ⎣ 0 0 -1 0 ⎦ + /// ``` + /// + /// Orthographic + /// ```text + /// w = right - left + /// h = top - bottom + /// d = far - near + /// cw = -right - left + /// ch = -top - bottom + /// + /// ⎡ 2 / w 0 0 cw / w ⎤ + /// ⎢ 0 2 / h 0 ch / h ⎥ + /// ⎢ 0 0 1 / d far / d ⎥ + /// ⎣ 0 0 0 1 ⎦ + /// ``` + /// + /// `clip_from_view[3][3] == 1.0` is the standard way to check if a projection is orthographic + /// + /// Glam matrices are column major, so for example getting the near plane of a perspective projection is `clip_from_view[3][2]` + /// + /// Custom projections are also possible however. + pub clip_from_view: Mat4, + pub world_from_view: GlobalTransform, + // The view-projection matrix. When provided it is used instead of deriving it from + // `projection` and `transform` fields, which can be helpful in cases where numerical + // stability matters and there is a more direct way to derive the view-projection matrix. + pub clip_from_world: Option, + pub hdr: bool, + // uvec4(origin.x, origin.y, width, height) + pub viewport: UVec4, + pub color_grading: ColorGrading, +} + +impl ExtractedView { + /// Creates a 3D rangefinder for a view + pub fn rangefinder3d(&self) -> ViewRangefinder3d { + ViewRangefinder3d::from_world_from_view(&self.world_from_view.affine()) + } +} + +/// Configures filmic color grading parameters to adjust the image appearance. +/// +/// Color grading is applied just before tonemapping for a given +/// [`Camera`](bevy_camera::Camera) entity, with the sole exception of the +/// `post_saturation` value in [`ColorGradingGlobal`], which is applied after +/// tonemapping. +#[derive(Component, Reflect, Debug, Default, Clone)] +#[reflect(Component, Default, Debug, Clone)] +pub struct ColorGrading { + /// Filmic color grading values applied to the image as a whole (as opposed + /// to individual sections, like shadows and highlights). + pub global: ColorGradingGlobal, + + /// Color grading values that are applied to the darker parts of the image. + /// + /// The cutoff points can be customized with the + /// [`ColorGradingGlobal::midtones_range`] field. + pub shadows: ColorGradingSection, + + /// Color grading values that are applied to the parts of the image with + /// intermediate brightness. + /// + /// The cutoff points can be customized with the + /// [`ColorGradingGlobal::midtones_range`] field. + pub midtones: ColorGradingSection, + + /// Color grading values that are applied to the lighter parts of the image. + /// + /// The cutoff points can be customized with the + /// [`ColorGradingGlobal::midtones_range`] field. + pub highlights: ColorGradingSection, +} + +/// Filmic color grading values applied to the image as a whole (as opposed to +/// individual sections, like shadows and highlights). +#[derive(Clone, Debug, Reflect)] +#[reflect(Default, Clone)] +pub struct ColorGradingGlobal { + /// Exposure value (EV) offset, measured in stops. + pub exposure: f32, + + /// An adjustment made to the [CIE 1931] chromaticity *x* value. + /// + /// Positive values make the colors redder. Negative values make the colors + /// bluer. This has no effect on luminance (brightness). + /// + /// [CIE 1931]: https://en.wikipedia.org/wiki/CIE_1931_color_space#CIE_xy_chromaticity_diagram_and_the_CIE_xyY_color_space + pub temperature: f32, + + /// An adjustment made to the [CIE 1931] chromaticity *y* value. + /// + /// Positive values make the colors more magenta. Negative values make the + /// colors greener. This has no effect on luminance (brightness). + /// + /// [CIE 1931]: https://en.wikipedia.org/wiki/CIE_1931_color_space#CIE_xy_chromaticity_diagram_and_the_CIE_xyY_color_space + pub tint: f32, + + /// An adjustment to the [hue], in radians. + /// + /// Adjusting this value changes the perceived colors in the image: red to + /// yellow to green to blue, etc. It has no effect on the saturation or + /// brightness of the colors. + /// + /// [hue]: https://en.wikipedia.org/wiki/HSL_and_HSV#Formal_derivation + pub hue: f32, + + /// Saturation adjustment applied after tonemapping. + /// Values below 1.0 desaturate, with a value of 0.0 resulting in a grayscale image + /// with luminance defined by ITU-R BT.709 + /// Values above 1.0 increase saturation. + pub post_saturation: f32, + + /// The luminance (brightness) ranges that are considered part of the + /// "midtones" of the image. + /// + /// This affects which [`ColorGradingSection`]s apply to which colors. Note + /// that the sections smoothly blend into one another, to avoid abrupt + /// transitions. + /// + /// The default value is 0.2 to 0.7. + pub midtones_range: Range, +} + +/// The [`ColorGrading`] structure, packed into the most efficient form for the +/// GPU. +#[derive(Clone, Copy, Debug, ShaderType)] +pub struct ColorGradingUniform { + pub balance: Mat3, + pub saturation: Vec3, + pub contrast: Vec3, + pub gamma: Vec3, + pub gain: Vec3, + pub lift: Vec3, + pub midtone_range: Vec2, + pub exposure: f32, + pub hue: f32, + pub post_saturation: f32, +} + +/// A section of color grading values that can be selectively applied to +/// shadows, midtones, and highlights. +#[derive(Reflect, Debug, Copy, Clone, PartialEq)] +#[reflect(Clone, PartialEq)] +pub struct ColorGradingSection { + /// Values below 1.0 desaturate, with a value of 0.0 resulting in a grayscale image + /// with luminance defined by ITU-R BT.709. + /// Values above 1.0 increase saturation. + pub saturation: f32, + + /// Adjusts the range of colors. + /// + /// A value of 1.0 applies no changes. Values below 1.0 move the colors more + /// toward a neutral gray. Values above 1.0 spread the colors out away from + /// the neutral gray. + pub contrast: f32, + + /// A nonlinear luminance adjustment, mainly affecting the high end of the + /// range. + /// + /// This is the *n* exponent in the standard [ASC CDL] formula for color + /// correction: + /// + /// ```text + /// out = (i × s + o)ⁿ + /// ``` + /// + /// [ASC CDL]: https://en.wikipedia.org/wiki/ASC_CDL#Combined_Function + pub gamma: f32, + + /// A linear luminance adjustment, mainly affecting the middle part of the + /// range. + /// + /// This is the *s* factor in the standard [ASC CDL] formula for color + /// correction: + /// + /// ```text + /// out = (i × s + o)ⁿ + /// ``` + /// + /// [ASC CDL]: https://en.wikipedia.org/wiki/ASC_CDL#Combined_Function + pub gain: f32, + + /// A fixed luminance adjustment, mainly affecting the lower part of the + /// range. + /// + /// This is the *o* term in the standard [ASC CDL] formula for color + /// correction: + /// + /// ```text + /// out = (i × s + o)ⁿ + /// ``` + /// + /// [ASC CDL]: https://en.wikipedia.org/wiki/ASC_CDL#Combined_Function + pub lift: f32, +} + +impl Default for ColorGradingGlobal { + fn default() -> Self { + Self { + exposure: 0.0, + temperature: 0.0, + tint: 0.0, + hue: 0.0, + post_saturation: 1.0, + midtones_range: 0.2..0.7, + } + } +} + +impl Default for ColorGradingSection { + fn default() -> Self { + Self { + saturation: 1.0, + contrast: 1.0, + gamma: 1.0, + gain: 1.0, + lift: 0.0, + } + } +} + +impl ColorGrading { + /// Creates a new [`ColorGrading`] instance in which shadows, midtones, and + /// highlights all have the same set of color grading values. + pub fn with_identical_sections( + global: ColorGradingGlobal, + section: ColorGradingSection, + ) -> ColorGrading { + ColorGrading { + global, + highlights: section, + midtones: section, + shadows: section, + } + } + + /// Returns an iterator that visits the shadows, midtones, and highlights + /// sections, in that order. + pub fn all_sections(&self) -> impl Iterator { + [&self.shadows, &self.midtones, &self.highlights].into_iter() + } + + /// Applies the given mutating function to the shadows, midtones, and + /// highlights sections, in that order. + /// + /// Returns an array composed of the results of such evaluation, in that + /// order. + pub fn all_sections_mut(&mut self) -> impl Iterator { + [&mut self.shadows, &mut self.midtones, &mut self.highlights].into_iter() + } +} + +#[derive(Clone, ShaderType)] +pub struct ViewUniform { + pub clip_from_world: Mat4, + pub unjittered_clip_from_world: Mat4, + pub world_from_clip: Mat4, + pub world_from_view: Mat4, + pub view_from_world: Mat4, + /// Typically a column-major right-handed projection matrix, one of either: + /// + /// Perspective (infinite reverse z) + /// ```text + /// f = 1 / tan(fov_y_radians / 2) + /// + /// ⎡ f / aspect 0 0 0 ⎤ + /// ⎢ 0 f 0 0 ⎥ + /// ⎢ 0 0 0 near ⎥ + /// ⎣ 0 0 -1 0 ⎦ + /// ``` + /// + /// Orthographic + /// ```text + /// w = right - left + /// h = top - bottom + /// d = far - near + /// cw = -right - left + /// ch = -top - bottom + /// + /// ⎡ 2 / w 0 0 cw / w ⎤ + /// ⎢ 0 2 / h 0 ch / h ⎥ + /// ⎢ 0 0 1 / d far / d ⎥ + /// ⎣ 0 0 0 1 ⎦ + /// ``` + /// + /// `clip_from_view[3][3] == 1.0` is the standard way to check if a projection is orthographic + /// + /// Glam matrices are column major, so for example getting the near plane of a perspective projection is `clip_from_view[3][2]` + /// + /// Custom projections are also possible however. + pub clip_from_view: Mat4, + pub view_from_clip: Mat4, + pub world_position: Vec3, + pub exposure: f32, + // viewport(x_origin, y_origin, width, height) + pub viewport: Vec4, + pub main_pass_viewport: Vec4, + /// 6 world-space half spaces (normal: vec3, distance: f32) ordered left, right, top, bottom, near, far. + /// The normal vectors point towards the interior of the frustum. + /// A half space contains `p` if `normal.dot(p) + distance > 0.` + pub frustum: [Vec4; 6], + pub color_grading: ColorGradingUniform, + pub mip_bias: f32, + pub frame_count: u32, +} + +#[derive(Resource)] +pub struct ViewUniforms { + pub uniforms: DynamicUniformBuffer, +} + +impl FromWorld for ViewUniforms { + fn from_world(world: &mut World) -> Self { + let mut uniforms = DynamicUniformBuffer::default(); + uniforms.set_label(Some("view_uniforms_buffer")); + + let render_device = world.resource::(); + if render_device.limits().max_storage_buffers_per_shader_stage > 0 { + uniforms.add_usages(BufferUsages::STORAGE); + } + + Self { uniforms } + } +} + +#[derive(Component)] +pub struct ViewUniformOffset { + pub offset: u32, +} + +#[derive(Component)] +pub struct ViewTarget { + main_textures: MainTargetTextures, + main_texture_format: TextureFormat, + /// 0 represents `main_textures.a`, 1 represents `main_textures.b` + /// This is shared across view targets with the same render target + main_texture: Arc, + out_texture: OutputColorAttachment, +} + +/// Contains [`OutputColorAttachment`] used for each target present on any view in the current +/// frame, after being prepared by [`prepare_view_attachments`]. Users that want to override +/// the default output color attachment for a specific target can do so by adding a +/// [`OutputColorAttachment`] to this resource before [`prepare_view_targets`] is called. +#[derive(Resource, Default, Deref, DerefMut)] +pub struct ViewTargetAttachments(HashMap); + +pub struct PostProcessWrite<'a> { + pub source: &'a TextureView, + pub source_texture: &'a Texture, + pub destination: &'a TextureView, + pub destination_texture: &'a Texture, +} + +impl From for ColorGradingUniform { + fn from(component: ColorGrading) -> Self { + // Compute the balance matrix that will be used to apply the white + // balance adjustment to an RGB color. Our general approach will be to + // convert both the color and the developer-supplied white point to the + // LMS color space, apply the conversion, and then convert back. + // + // First, we start with the CIE 1931 *xy* values of the standard D65 + // illuminant: + // + // + // We then adjust them based on the developer's requested white balance. + let white_point_xy = D65_XY + vec2(-component.global.temperature, component.global.tint); + + // Convert the white point from CIE 1931 *xy* to LMS. First, we convert to XYZ: + // + // Y Y + // Y = 1 X = ─ x Z = ─ (1 - x - y) + // y y + // + // Then we convert from XYZ to LMS color space, using the CAM16 matrix + // from : + // + // ⎡ L ⎤ ⎡ 0.401 0.650 -0.051 ⎤ ⎡ X ⎤ + // ⎢ M ⎥ = ⎢ -0.250 1.204 0.046 ⎥ ⎢ Y ⎥ + // ⎣ S ⎦ ⎣ -0.002 0.049 0.953 ⎦ ⎣ Z ⎦ + // + // The following formula is just a simplification of the above. + + let white_point_lms = vec3(0.701634, 1.15856, -0.904175) + + (vec3(-0.051461, 0.045854, 0.953127) + + vec3(0.452749, -0.296122, -0.955206) * white_point_xy.x) + / white_point_xy.y; + + // Now that we're in LMS space, perform the white point scaling. + let white_point_adjustment = Mat3::from_diagonal(D65_LMS / white_point_lms); + + // Finally, combine the RGB → LMS → corrected LMS → corrected RGB + // pipeline into a single 3×3 matrix. + let balance = LMS_TO_RGB * white_point_adjustment * RGB_TO_LMS; + + Self { + balance, + saturation: vec3( + component.shadows.saturation, + component.midtones.saturation, + component.highlights.saturation, + ), + contrast: vec3( + component.shadows.contrast, + component.midtones.contrast, + component.highlights.contrast, + ), + gamma: vec3( + component.shadows.gamma, + component.midtones.gamma, + component.highlights.gamma, + ), + gain: vec3( + component.shadows.gain, + component.midtones.gain, + component.highlights.gain, + ), + lift: vec3( + component.shadows.lift, + component.midtones.lift, + component.highlights.lift, + ), + midtone_range: vec2( + component.global.midtones_range.start, + component.global.midtones_range.end, + ), + exposure: component.global.exposure, + hue: component.global.hue, + post_saturation: component.global.post_saturation, + } + } +} + +/// Add this component to a camera to disable *indirect mode*. +/// +/// Indirect mode, automatically enabled on supported hardware, allows Bevy to +/// offload transform and cull operations to the GPU, reducing CPU overhead. +/// Doing this, however, reduces the amount of control that your app has over +/// instancing decisions. In certain circumstances, you may want to disable +/// indirect drawing so that your app can manually instance meshes as it sees +/// fit. See the `custom_shader_instancing` example. +/// +/// The vast majority of applications will not need to use this component, as it +/// generally reduces rendering performance. +/// +/// Note: This component should only be added when initially spawning a camera. Adding +/// or removing after spawn can result in unspecified behavior. +#[derive(Component, Default)] +pub struct NoIndirectDrawing; + +impl ViewTarget { + pub const TEXTURE_FORMAT_HDR: TextureFormat = TextureFormat::Rgba16Float; + + /// Retrieve this target's main texture's color attachment. + pub fn get_color_attachment(&self) -> RenderPassColorAttachment<'_> { + if self.main_texture.load(Ordering::SeqCst) == 0 { + self.main_textures.a.get_attachment() + } else { + self.main_textures.b.get_attachment() + } + } + + /// Retrieve this target's "unsampled" main texture's color attachment. + pub fn get_unsampled_color_attachment(&self) -> RenderPassColorAttachment<'_> { + if self.main_texture.load(Ordering::SeqCst) == 0 { + self.main_textures.a.get_unsampled_attachment() + } else { + self.main_textures.b.get_unsampled_attachment() + } + } + + /// The "main" unsampled texture. + pub fn main_texture(&self) -> &Texture { + if self.main_texture.load(Ordering::SeqCst) == 0 { + &self.main_textures.a.texture.texture + } else { + &self.main_textures.b.texture.texture + } + } + + /// The _other_ "main" unsampled texture. + /// In most cases you should use [`Self::main_texture`] instead and never this. + /// The textures will naturally be swapped when [`Self::post_process_write`] is called. + /// + /// A use case for this is to be able to prepare a bind group for all main textures + /// ahead of time. + pub fn main_texture_other(&self) -> &Texture { + if self.main_texture.load(Ordering::SeqCst) == 0 { + &self.main_textures.b.texture.texture + } else { + &self.main_textures.a.texture.texture + } + } + + /// The "main" unsampled texture. + pub fn main_texture_view(&self) -> &TextureView { + if self.main_texture.load(Ordering::SeqCst) == 0 { + &self.main_textures.a.texture.default_view + } else { + &self.main_textures.b.texture.default_view + } + } + + /// The _other_ "main" unsampled texture view. + /// In most cases you should use [`Self::main_texture_view`] instead and never this. + /// The textures will naturally be swapped when [`Self::post_process_write`] is called. + /// + /// A use case for this is to be able to prepare a bind group for all main textures + /// ahead of time. + pub fn main_texture_other_view(&self) -> &TextureView { + if self.main_texture.load(Ordering::SeqCst) == 0 { + &self.main_textures.b.texture.default_view + } else { + &self.main_textures.a.texture.default_view + } + } + + /// The "main" sampled texture. + pub fn sampled_main_texture(&self) -> Option<&Texture> { + self.main_textures + .a + .resolve_target + .as_ref() + .map(|sampled| &sampled.texture) + } + + /// The "main" sampled texture view. + pub fn sampled_main_texture_view(&self) -> Option<&TextureView> { + self.main_textures + .a + .resolve_target + .as_ref() + .map(|sampled| &sampled.default_view) + } + + #[inline] + pub fn main_texture_format(&self) -> TextureFormat { + self.main_texture_format + } + + /// Returns `true` if and only if the main texture is [`Self::TEXTURE_FORMAT_HDR`] + #[inline] + pub fn is_hdr(&self) -> bool { + self.main_texture_format == ViewTarget::TEXTURE_FORMAT_HDR + } + + /// The final texture this view will render to. + #[inline] + pub fn out_texture(&self) -> &TextureView { + &self.out_texture.view + } + + pub fn out_texture_color_attachment( + &self, + clear_color: Option, + ) -> RenderPassColorAttachment<'_> { + self.out_texture.get_attachment(clear_color) + } + + /// The format of the final texture this view will render to + #[inline] + pub fn out_texture_format(&self) -> TextureFormat { + self.out_texture.format + } + + /// This will start a new "post process write", which assumes that the caller + /// will write the [`PostProcessWrite`]'s `source` to the `destination`. + /// + /// `source` is the "current" main texture. This will internally flip this + /// [`ViewTarget`]'s main texture to the `destination` texture, so the caller + /// _must_ ensure `source` is copied to `destination`, with or without modifications. + /// Failing to do so will cause the current main texture information to be lost. + pub fn post_process_write(&self) -> PostProcessWrite<'_> { + let old_is_a_main_texture = self.main_texture.fetch_xor(1, Ordering::SeqCst); + // if the old main texture is a, then the post processing must write from a to b + if old_is_a_main_texture == 0 { + self.main_textures.b.mark_as_cleared(); + PostProcessWrite { + source: &self.main_textures.a.texture.default_view, + source_texture: &self.main_textures.a.texture.texture, + destination: &self.main_textures.b.texture.default_view, + destination_texture: &self.main_textures.b.texture.texture, + } + } else { + self.main_textures.a.mark_as_cleared(); + PostProcessWrite { + source: &self.main_textures.b.texture.default_view, + source_texture: &self.main_textures.b.texture.texture, + destination: &self.main_textures.a.texture.default_view, + destination_texture: &self.main_textures.a.texture.texture, + } + } + } +} + +#[derive(Component)] +pub struct ViewDepthTexture { + pub texture: Texture, + attachment: DepthAttachment, +} + +impl ViewDepthTexture { + pub fn new(texture: CachedTexture, clear_value: Option) -> Self { + Self { + texture: texture.texture, + attachment: DepthAttachment::new(texture.default_view, clear_value), + } + } + + pub fn get_attachment(&self, store: StoreOp) -> RenderPassDepthStencilAttachment<'_> { + self.attachment.get_attachment(store) + } + + pub fn view(&self) -> &TextureView { + &self.attachment.view + } +} + +pub fn prepare_view_uniforms( + mut commands: Commands, + render_device: Res, + render_queue: Res, + mut view_uniforms: ResMut, + views: Query<( + Entity, + Option<&ExtractedCamera>, + &ExtractedView, + Option<&Frustum>, + Option<&TemporalJitter>, + Option<&MipBias>, + Option<&MainPassResolutionOverride>, + )>, + frame_count: Res, +) { + let view_iter = views.iter(); + let view_count = view_iter.len(); + let Some(mut writer) = + view_uniforms + .uniforms + .get_writer(view_count, &render_device, &render_queue) + else { + return; + }; + for ( + entity, + extracted_camera, + extracted_view, + frustum, + temporal_jitter, + mip_bias, + resolution_override, + ) in &views + { + let viewport = extracted_view.viewport.as_vec4(); + let mut main_pass_viewport = viewport; + if let Some(resolution_override) = resolution_override { + main_pass_viewport.z = resolution_override.0.x as f32; + main_pass_viewport.w = resolution_override.0.y as f32; + } + + let unjittered_projection = extracted_view.clip_from_view; + let mut clip_from_view = unjittered_projection; + + if let Some(temporal_jitter) = temporal_jitter { + temporal_jitter.jitter_projection(&mut clip_from_view, main_pass_viewport.zw()); + } + + let view_from_clip = clip_from_view.inverse(); + let world_from_view = extracted_view.world_from_view.to_matrix(); + let view_from_world = world_from_view.inverse(); + + let clip_from_world = if temporal_jitter.is_some() { + clip_from_view * view_from_world + } else { + extracted_view + .clip_from_world + .unwrap_or_else(|| clip_from_view * view_from_world) + }; + + // Map Frustum type to shader array, 6> + let frustum = frustum + .map(|frustum| frustum.half_spaces.map(|h| h.normal_d())) + .unwrap_or([Vec4::ZERO; 6]); + + let view_uniforms = ViewUniformOffset { + offset: writer.write(&ViewUniform { + clip_from_world, + unjittered_clip_from_world: unjittered_projection * view_from_world, + world_from_clip: world_from_view * view_from_clip, + world_from_view, + view_from_world, + clip_from_view, + view_from_clip, + world_position: extracted_view.world_from_view.translation(), + exposure: extracted_camera + .map(|c| c.exposure) + .unwrap_or_else(|| Exposure::default().exposure()), + viewport, + main_pass_viewport, + frustum, + color_grading: extracted_view.color_grading.clone().into(), + mip_bias: mip_bias.unwrap_or(&MipBias(0.0)).0, + frame_count: frame_count.0, + }), + }; + + commands.entity(entity).insert(view_uniforms); + } +} + +#[derive(Clone)] +struct MainTargetTextures { + a: ColorAttachment, + b: ColorAttachment, + /// 0 represents `main_textures.a`, 1 represents `main_textures.b` + /// This is shared across view targets with the same render target + main_texture: Arc, +} + +/// Prepares the view target [`OutputColorAttachment`] for each view in the current frame. +pub fn prepare_view_attachments( + windows: Res, + images: Res>, + manual_texture_views: Res, + cameras: Query<&ExtractedCamera>, + mut view_target_attachments: ResMut, +) { + for camera in cameras.iter() { + let Some(target) = &camera.target else { + continue; + }; + + match view_target_attachments.entry(target.clone()) { + Entry::Occupied(_) => {} + Entry::Vacant(entry) => { + let Some(attachment) = target + .get_texture_view(&windows, &images, &manual_texture_views) + .cloned() + .zip(target.get_texture_format(&windows, &images, &manual_texture_views)) + .map(|(view, format)| { + OutputColorAttachment::new(view.clone(), format.add_srgb_suffix()) + }) + else { + continue; + }; + entry.insert(attachment); + } + }; + } +} + +/// Clears the view target [`OutputColorAttachment`]s. +pub fn clear_view_attachments(mut view_target_attachments: ResMut) { + view_target_attachments.clear(); +} + +pub fn prepare_view_targets( + mut commands: Commands, + clear_color_global: Res, + render_device: Res, + mut texture_cache: ResMut, + cameras: Query<( + Entity, + &ExtractedCamera, + &ExtractedView, + &CameraMainTextureUsages, + &Msaa, + )>, + view_target_attachments: Res, +) { + let mut textures = >::default(); + for (entity, camera, view, texture_usage, msaa) in cameras.iter() { + let (Some(target_size), Some(target)) = (camera.physical_target_size, &camera.target) + else { + continue; + }; + + let Some(out_attachment) = view_target_attachments.get(target) else { + continue; + }; + + let main_texture_format = if view.hdr { + ViewTarget::TEXTURE_FORMAT_HDR + } else { + TextureFormat::bevy_default() + }; + + let clear_color = match camera.clear_color { + ClearColorConfig::Custom(color) => Some(color), + ClearColorConfig::None => None, + _ => Some(clear_color_global.0), + }; + + let (a, b, sampled, main_texture) = textures + .entry((camera.target.clone(), texture_usage.0, view.hdr, msaa)) + .or_insert_with(|| { + let descriptor = TextureDescriptor { + label: None, + size: target_size.to_extents(), + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: main_texture_format, + usage: texture_usage.0, + view_formats: match main_texture_format { + TextureFormat::Bgra8Unorm => &[TextureFormat::Bgra8UnormSrgb], + TextureFormat::Rgba8Unorm => &[TextureFormat::Rgba8UnormSrgb], + _ => &[], + }, + }; + let a = texture_cache.get( + &render_device, + TextureDescriptor { + label: Some("main_texture_a"), + ..descriptor + }, + ); + let b = texture_cache.get( + &render_device, + TextureDescriptor { + label: Some("main_texture_b"), + ..descriptor + }, + ); + let sampled = if msaa.samples() > 1 { + let sampled = texture_cache.get( + &render_device, + TextureDescriptor { + label: Some("main_texture_sampled"), + size: target_size.to_extents(), + mip_level_count: 1, + sample_count: msaa.samples(), + dimension: TextureDimension::D2, + format: main_texture_format, + usage: TextureUsages::RENDER_ATTACHMENT, + view_formats: descriptor.view_formats, + }, + ); + Some(sampled) + } else { + None + }; + let main_texture = Arc::new(AtomicUsize::new(0)); + (a, b, sampled, main_texture) + }); + + let converted_clear_color = clear_color.map(Into::into); + + let main_textures = MainTargetTextures { + a: ColorAttachment::new(a.clone(), sampled.clone(), converted_clear_color), + b: ColorAttachment::new(b.clone(), sampled.clone(), converted_clear_color), + main_texture: main_texture.clone(), + }; + + commands.entity(entity).insert(ViewTarget { + main_texture: main_textures.main_texture.clone(), + main_textures, + main_texture_format, + out_texture: out_attachment.clone(), + }); + } +} diff --git a/crates/libmarathon/src/render/view/view.wgsl b/crates/libmarathon/src/render/view/view.wgsl new file mode 100644 index 0000000..23ded53 --- /dev/null +++ b/crates/libmarathon/src/render/view/view.wgsl @@ -0,0 +1,272 @@ +#define_import_path bevy_render::view + +struct ColorGrading { + balance: mat3x3, + saturation: vec3, + contrast: vec3, + gamma: vec3, + gain: vec3, + lift: vec3, + midtone_range: vec2, + exposure: f32, + hue: f32, + post_saturation: f32, +} + +struct View { + clip_from_world: mat4x4, + unjittered_clip_from_world: mat4x4, + world_from_clip: mat4x4, + world_from_view: mat4x4, + view_from_world: mat4x4, + // Typically a column-major right-handed projection matrix, one of either: + // + // Perspective (infinite reverse z) + // ``` + // f = 1 / tan(fov_y_radians / 2) + // + // ⎡ f / aspect 0 0 0 ⎤ + // ⎢ 0 f 0 0 ⎥ + // ⎢ 0 0 0 near ⎥ + // ⎣ 0 0 -1 0 ⎦ + // ``` + // + // Orthographic + // ``` + // w = right - left + // h = top - bottom + // d = far - near + // cw = -right - left + // ch = -top - bottom + // + // ⎡ 2 / w 0 0 cw / w ⎤ + // ⎢ 0 2 / h 0 ch / h ⎥ + // ⎢ 0 0 1 / d far / d ⎥ + // ⎣ 0 0 0 1 ⎦ + // ``` + // + // `clip_from_view[3][3] == 1.0` is the standard way to check if a projection is orthographic + // + // Wgsl matrices are column major, so for example getting the near plane of a perspective projection is `clip_from_view[3][2]` + // + // Custom projections are also possible however. + clip_from_view: mat4x4, + view_from_clip: mat4x4, + world_position: vec3, + exposure: f32, + // viewport(x_origin, y_origin, width, height) + viewport: vec4, + main_pass_viewport: vec4, + // 6 world-space half spaces (normal: vec3, distance: f32) ordered left, right, top, bottom, near, far. + // The normal vectors point towards the interior of the frustum. + // A half space contains `p` if `normal.dot(p) + distance > 0.` + frustum: array, 6>, + color_grading: ColorGrading, + mip_bias: f32, + frame_count: u32, +}; + +/// World space: +/// +y is up + +/// View space: +/// -z is forward, +x is right, +y is up +/// Forward is from the camera position into the scene. +/// (0.0, 0.0, -1.0) is linear distance of 1.0 in front of the camera's view relative to the camera's rotation +/// (0.0, 1.0, 0.0) is linear distance of 1.0 above the camera's view relative to the camera's rotation + +/// NDC (normalized device coordinate): +/// https://www.w3.org/TR/webgpu/#coordinate-systems +/// (-1.0, -1.0) in NDC is located at the bottom-left corner of NDC +/// (1.0, 1.0) in NDC is located at the top-right corner of NDC +/// Z is depth where: +/// 1.0 is near clipping plane +/// Perspective projection: 0.0 is inf far away +/// Orthographic projection: 0.0 is far clipping plane + +/// Clip space: +/// This is NDC before the perspective divide, still in homogenous coordinate space. +/// Dividing a clip space point by its w component yields a point in NDC space. + +/// UV space: +/// 0.0, 0.0 is the top left +/// 1.0, 1.0 is the bottom right + + +// ----------------- +// TO WORLD -------- +// ----------------- + +/// Convert a view space position to world space +fn position_view_to_world(view_pos: vec3, world_from_view: mat4x4) -> vec3 { + let world_pos = world_from_view * vec4(view_pos, 1.0); + return world_pos.xyz; +} + +/// Convert a clip space position to world space +fn position_clip_to_world(clip_pos: vec4, world_from_clip: mat4x4) -> vec3 { + let world_pos = world_from_clip * clip_pos; + return world_pos.xyz; +} + +/// Convert a ndc space position to world space +fn position_ndc_to_world(ndc_pos: vec3, world_from_clip: mat4x4) -> vec3 { + let world_pos = world_from_clip * vec4(ndc_pos, 1.0); + return world_pos.xyz / world_pos.w; +} + +/// Convert a view space direction to world space +fn direction_view_to_world(view_dir: vec3, world_from_view: mat4x4) -> vec3 { + let world_dir = world_from_view * vec4(view_dir, 0.0); + return world_dir.xyz; +} + +/// Convert a clip space direction to world space +fn direction_clip_to_world(clip_dir: vec4, world_from_clip: mat4x4) -> vec3 { + let world_dir = world_from_clip * clip_dir; + return world_dir.xyz; +} + +// ----------------- +// TO VIEW --------- +// ----------------- + +/// Convert a world space position to view space +fn position_world_to_view(world_pos: vec3, view_from_world: mat4x4) -> vec3 { + let view_pos = view_from_world * vec4(world_pos, 1.0); + return view_pos.xyz; +} + +/// Convert a clip space position to view space +fn position_clip_to_view(clip_pos: vec4, view_from_clip: mat4x4) -> vec3 { + let view_pos = view_from_clip * clip_pos; + return view_pos.xyz; +} + +/// Convert a ndc space position to view space +fn position_ndc_to_view(ndc_pos: vec3, view_from_clip: mat4x4) -> vec3 { + let view_pos = view_from_clip * vec4(ndc_pos, 1.0); + return view_pos.xyz / view_pos.w; +} + +/// Convert a world space direction to view space +fn direction_world_to_view(world_dir: vec3, view_from_world: mat4x4) -> vec3 { + let view_dir = view_from_world * vec4(world_dir, 0.0); + return view_dir.xyz; +} + +/// Convert a clip space direction to view space +fn direction_clip_to_view(clip_dir: vec4, view_from_clip: mat4x4) -> vec3 { + let view_dir = view_from_clip * clip_dir; + return view_dir.xyz; +} + +// ----------------- +// TO CLIP --------- +// ----------------- + +/// Convert a world space position to clip space +fn position_world_to_clip(world_pos: vec3, clip_from_world: mat4x4) -> vec4 { + let clip_pos = clip_from_world * vec4(world_pos, 1.0); + return clip_pos; +} + +/// Convert a view space position to clip space +fn position_view_to_clip(view_pos: vec3, clip_from_view: mat4x4) -> vec4 { + let clip_pos = clip_from_view * vec4(view_pos, 1.0); + return clip_pos; +} + +/// Convert a world space direction to clip space +fn direction_world_to_clip(world_dir: vec3, clip_from_world: mat4x4) -> vec4 { + let clip_dir = clip_from_world * vec4(world_dir, 0.0); + return clip_dir; +} + +/// Convert a view space direction to clip space +fn direction_view_to_clip(view_dir: vec3, clip_from_view: mat4x4) -> vec4 { + let clip_dir = clip_from_view * vec4(view_dir, 0.0); + return clip_dir; +} + +// ----------------- +// TO NDC ---------- +// ----------------- + +/// Convert a world space position to ndc space +fn position_world_to_ndc(world_pos: vec3, clip_from_world: mat4x4) -> vec3 { + let ndc_pos = clip_from_world * vec4(world_pos, 1.0); + return ndc_pos.xyz / ndc_pos.w; +} + +/// Convert a view space position to ndc space +fn position_view_to_ndc(view_pos: vec3, clip_from_view: mat4x4) -> vec3 { + let ndc_pos = clip_from_view * vec4(view_pos, 1.0); + return ndc_pos.xyz / ndc_pos.w; +} + +// ----------------- +// DEPTH ----------- +// ----------------- + +/// Retrieve the perspective camera near clipping plane +fn perspective_camera_near(clip_from_view: mat4x4) -> f32 { + return clip_from_view[3][2]; +} + +/// Convert ndc depth to linear view z. +/// Note: Depth values in front of the camera will be negative as -z is forward +fn depth_ndc_to_view_z(ndc_depth: f32, clip_from_view: mat4x4, view_from_clip: mat4x4) -> f32 { +#ifdef VIEW_PROJECTION_PERSPECTIVE + return -perspective_camera_near(clip_from_view) / ndc_depth; +#else ifdef VIEW_PROJECTION_ORTHOGRAPHIC + return -(clip_from_view[3][2] - ndc_depth) / clip_from_view[2][2]; +#else + let view_pos = view_from_clip * vec4(0.0, 0.0, ndc_depth, 1.0); + return view_pos.z / view_pos.w; +#endif +} + +/// Convert linear view z to ndc depth. +/// Note: View z input should be negative for values in front of the camera as -z is forward +fn view_z_to_depth_ndc(view_z: f32, clip_from_view: mat4x4) -> f32 { +#ifdef VIEW_PROJECTION_PERSPECTIVE + return -perspective_camera_near(clip_from_view) / view_z; +#else ifdef VIEW_PROJECTION_ORTHOGRAPHIC + return clip_from_view[3][2] + view_z * clip_from_view[2][2]; +#else + let ndc_pos = clip_from_view * vec4(0.0, 0.0, view_z, 1.0); + return ndc_pos.z / ndc_pos.w; +#endif +} + +// ----------------- +// UV -------------- +// ----------------- + +/// Convert ndc space xy coordinate [-1.0 .. 1.0] to uv [0.0 .. 1.0] +fn ndc_to_uv(ndc: vec2) -> vec2 { + return ndc * vec2(0.5, -0.5) + vec2(0.5); +} + +/// Convert uv [0.0 .. 1.0] coordinate to ndc space xy [-1.0 .. 1.0] +fn uv_to_ndc(uv: vec2) -> vec2 { + return uv * vec2(2.0, -2.0) + vec2(-1.0, 1.0); +} + +/// returns the (0.0, 0.0) .. (1.0, 1.0) position within the viewport for the current render target +/// [0 .. render target viewport size] eg. [(0.0, 0.0) .. (1280.0, 720.0)] to [(0.0, 0.0) .. (1.0, 1.0)] +fn frag_coord_to_uv(frag_coord: vec2, viewport: vec4) -> vec2 { + return (frag_coord - viewport.xy) / viewport.zw; +} + +/// Convert frag coord to ndc +fn frag_coord_to_ndc(frag_coord: vec4, viewport: vec4) -> vec3 { + return vec3(uv_to_ndc(frag_coord_to_uv(frag_coord.xy, viewport)), frag_coord.z); +} + +/// Convert ndc space xy coordinate [-1.0 .. 1.0] to [0 .. render target +/// viewport size] +fn ndc_to_frag_coord(ndc: vec2, viewport: vec4) -> vec2 { + return ndc_to_uv(ndc) * viewport.zw; +} diff --git a/crates/libmarathon/src/render/view/visibility/mod.rs b/crates/libmarathon/src/render/view/visibility/mod.rs new file mode 100644 index 0000000..353da4b --- /dev/null +++ b/crates/libmarathon/src/render/view/visibility/mod.rs @@ -0,0 +1,54 @@ +use core::any::TypeId; + +use bevy_ecs::{component::Component, entity::Entity, prelude::ReflectComponent}; +use bevy_reflect::{prelude::ReflectDefault, Reflect}; +use bevy_utils::TypeIdMap; + +use crate::render::sync_world::MainEntity; + +mod range; +use bevy_camera::visibility::*; +pub use range::*; + +/// Collection of entities visible from the current view. +/// +/// This component is extracted from [`VisibleEntities`]. +#[derive(Clone, Component, Default, Debug, Reflect)] +#[reflect(Component, Default, Debug, Clone)] +pub struct RenderVisibleEntities { + #[reflect(ignore, clone)] + pub entities: TypeIdMap>, +} + +impl RenderVisibleEntities { + pub fn get(&self) -> &[(Entity, MainEntity)] + where + QF: 'static, + { + match self.entities.get(&TypeId::of::()) { + Some(entities) => &entities[..], + None => &[], + } + } + + pub fn iter(&self) -> impl DoubleEndedIterator + where + QF: 'static, + { + self.get::().iter() + } + + pub fn len(&self) -> usize + where + QF: 'static, + { + self.get::().len() + } + + pub fn is_empty(&self) -> bool + where + QF: 'static, + { + self.get::().is_empty() + } +} diff --git a/crates/libmarathon/src/render/view/visibility/range.rs b/crates/libmarathon/src/render/view/visibility/range.rs new file mode 100644 index 0000000..a6d2532 --- /dev/null +++ b/crates/libmarathon/src/render/view/visibility/range.rs @@ -0,0 +1,228 @@ +//! Specific distances from the camera in which entities are visible, also known +//! as *hierarchical levels of detail* or *HLOD*s. + +use super::VisibilityRange; +use bevy_app::{App, Plugin}; +use bevy_ecs::{ + entity::Entity, + lifecycle::RemovedComponents, + query::Changed, + resource::Resource, + schedule::IntoScheduleConfigs as _, + system::{Query, Res, ResMut}, +}; +use bevy_math::{vec4, Vec4}; +use bevy_platform::collections::HashMap; +use bevy_utils::prelude::default; +use nonmax::NonMaxU16; +use wgpu::{BufferBindingType, BufferUsages}; + +use crate::render::{ + render_resource::BufferVec, + renderer::{RenderDevice, RenderQueue}, + sync_world::{MainEntity, MainEntityHashMap}, + Extract, ExtractSchedule, Render, RenderApp, RenderSystems, +}; + +/// We need at least 4 storage buffer bindings available to enable the +/// visibility range buffer. +/// +/// Even though we only use one storage buffer, the first 3 available storage +/// buffers will go to various light-related buffers. We will grab the fourth +/// buffer slot. +pub const VISIBILITY_RANGES_STORAGE_BUFFER_COUNT: u32 = 4; + +/// The size of the visibility ranges buffer in elements (not bytes) when fewer +/// than 6 storage buffers are available and we're forced to use a uniform +/// buffer instead (most notably, on WebGL 2). +const VISIBILITY_RANGE_UNIFORM_BUFFER_SIZE: usize = 64; + +/// A plugin that enables [`RenderVisibilityRanges`]s, which allow entities to be +/// hidden or shown based on distance to the camera. +pub struct RenderVisibilityRangePlugin; + +impl Plugin for RenderVisibilityRangePlugin { + fn build(&self, app: &mut App) { + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + render_app + .init_resource::() + .add_systems(ExtractSchedule, extract_visibility_ranges) + .add_systems( + Render, + write_render_visibility_ranges.in_set(RenderSystems::PrepareResourcesFlush), + ); + } +} + +/// Stores information related to [`VisibilityRange`]s in the render world. +#[derive(Resource)] +pub struct RenderVisibilityRanges { + /// Information corresponding to each entity. + entities: MainEntityHashMap, + + /// Maps a [`VisibilityRange`] to its index within the `buffer`. + /// + /// This map allows us to deduplicate identical visibility ranges, which + /// saves GPU memory. + range_to_index: HashMap, + + /// The GPU buffer that stores [`VisibilityRange`]s. + /// + /// Each [`Vec4`] contains the start margin start, start margin end, end + /// margin start, and end margin end distances, in that order. + buffer: BufferVec, + + /// True if the buffer has been changed since the last frame and needs to be + /// reuploaded to the GPU. + buffer_dirty: bool, +} + +/// Per-entity information related to [`VisibilityRange`]s. +struct RenderVisibilityEntityInfo { + /// The index of the range within the GPU buffer. + buffer_index: NonMaxU16, + /// True if the range is abrupt: i.e. has no crossfade. + is_abrupt: bool, +} + +impl Default for RenderVisibilityRanges { + fn default() -> Self { + Self { + entities: default(), + range_to_index: default(), + buffer: BufferVec::new( + BufferUsages::STORAGE | BufferUsages::UNIFORM | BufferUsages::VERTEX, + ), + buffer_dirty: true, + } + } +} + +impl RenderVisibilityRanges { + /// Clears out the [`RenderVisibilityRanges`] in preparation for a new + /// frame. + fn clear(&mut self) { + self.entities.clear(); + self.range_to_index.clear(); + self.buffer.clear(); + self.buffer_dirty = true; + } + + /// Inserts a new entity into the [`RenderVisibilityRanges`]. + fn insert(&mut self, entity: MainEntity, visibility_range: &VisibilityRange) { + // Grab a slot in the GPU buffer, or take the existing one if there + // already is one. + let buffer_index = *self + .range_to_index + .entry(visibility_range.clone()) + .or_insert_with(|| { + NonMaxU16::try_from(self.buffer.push(vec4( + visibility_range.start_margin.start, + visibility_range.start_margin.end, + visibility_range.end_margin.start, + visibility_range.end_margin.end, + )) as u16) + .unwrap_or_default() + }); + + self.entities.insert( + entity, + RenderVisibilityEntityInfo { + buffer_index, + is_abrupt: visibility_range.is_abrupt(), + }, + ); + } + + /// Returns the index in the GPU buffer corresponding to the visible range + /// for the given entity. + /// + /// If the entity has no visible range, returns `None`. + #[inline] + pub fn lod_index_for_entity(&self, entity: MainEntity) -> Option { + self.entities.get(&entity).map(|info| info.buffer_index) + } + + /// Returns true if the entity has a visibility range and it isn't abrupt: + /// i.e. if it has a crossfade. + #[inline] + pub fn entity_has_crossfading_visibility_ranges(&self, entity: MainEntity) -> bool { + self.entities + .get(&entity) + .is_some_and(|info| !info.is_abrupt) + } + + /// Returns a reference to the GPU buffer that stores visibility ranges. + #[inline] + pub fn buffer(&self) -> &BufferVec { + &self.buffer + } +} + +/// Extracts all [`VisibilityRange`] components from the main world to the +/// render world and inserts them into [`RenderVisibilityRanges`]. +pub fn extract_visibility_ranges( + mut render_visibility_ranges: ResMut, + visibility_ranges_query: Extract>, + changed_ranges_query: Extract>>, + mut removed_visibility_ranges: Extract>, +) { + if changed_ranges_query.is_empty() && removed_visibility_ranges.read().next().is_none() { + return; + } + + render_visibility_ranges.clear(); + for (entity, visibility_range) in visibility_ranges_query.iter() { + render_visibility_ranges.insert(entity.into(), visibility_range); + } +} + +/// Writes the [`RenderVisibilityRanges`] table to the GPU. +pub fn write_render_visibility_ranges( + render_device: Res, + render_queue: Res, + mut render_visibility_ranges: ResMut, +) { + // If there haven't been any changes, early out. + if !render_visibility_ranges.buffer_dirty { + return; + } + + // Mess with the length of the buffer to meet API requirements if necessary. + match render_device.get_supported_read_only_binding_type(VISIBILITY_RANGES_STORAGE_BUFFER_COUNT) + { + // If we're using a uniform buffer, we must have *exactly* + // `VISIBILITY_RANGE_UNIFORM_BUFFER_SIZE` elements. + BufferBindingType::Uniform + if render_visibility_ranges.buffer.len() > VISIBILITY_RANGE_UNIFORM_BUFFER_SIZE => + { + render_visibility_ranges + .buffer + .truncate(VISIBILITY_RANGE_UNIFORM_BUFFER_SIZE); + } + BufferBindingType::Uniform + if render_visibility_ranges.buffer.len() < VISIBILITY_RANGE_UNIFORM_BUFFER_SIZE => + { + while render_visibility_ranges.buffer.len() < VISIBILITY_RANGE_UNIFORM_BUFFER_SIZE { + render_visibility_ranges.buffer.push(default()); + } + } + + // Otherwise, if we're using a storage buffer, just ensure there's + // something in the buffer, or else it won't get allocated. + BufferBindingType::Storage { .. } if render_visibility_ranges.buffer.is_empty() => { + render_visibility_ranges.buffer.push(default()); + } + + _ => {} + } + + // Schedule the write. + render_visibility_ranges + .buffer + .write_buffer(&render_device, &render_queue); + render_visibility_ranges.buffer_dirty = false; +} diff --git a/crates/libmarathon/src/render/view/window/mod.rs b/crates/libmarathon/src/render/view/window/mod.rs new file mode 100644 index 0000000..d73e734 --- /dev/null +++ b/crates/libmarathon/src/render/view/window/mod.rs @@ -0,0 +1,401 @@ +use crate::render::renderer::WgpuWrapper; +use crate::render::{ + render_resource::{SurfaceTexture, TextureView}, + renderer::{RenderAdapter, RenderDevice, RenderInstance}, + Extract, ExtractSchedule, Render, RenderApp, RenderSystems, +}; +use bevy_app::{App, Plugin}; +use bevy_ecs::{entity::EntityHashMap, prelude::*}; +use bevy_platform::collections::HashSet; +use bevy_utils::default; +use bevy_window::{ + CompositeAlphaMode, PresentMode, PrimaryWindow, RawHandleWrapper, Window, WindowClosing, +}; +use core::{ + num::NonZero, + ops::{Deref, DerefMut}, +}; +use tracing::{debug, warn}; +use wgpu::{ + SurfaceConfiguration, SurfaceTargetUnsafe, TextureFormat, TextureUsages, TextureViewDescriptor, +}; + +pub mod screenshot; + +use screenshot::ScreenshotPlugin; + +pub struct WindowRenderPlugin; + +impl Plugin for WindowRenderPlugin { + fn build(&self, app: &mut App) { + app.add_plugins(ScreenshotPlugin); + + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + render_app + .init_resource::() + .init_resource::() + .add_systems(ExtractSchedule, extract_windows) + .add_systems( + Render, + create_surfaces + .run_if(need_surface_configuration) + .before(prepare_windows), + ) + .add_systems(Render, prepare_windows.in_set(RenderSystems::ManageViews)); + } + } +} + +pub struct ExtractedWindow { + /// An entity that contains the components in [`Window`]. + pub entity: Entity, + pub handle: RawHandleWrapper, + pub physical_width: u32, + pub physical_height: u32, + pub present_mode: PresentMode, + pub desired_maximum_frame_latency: Option>, + /// Note: this will not always be the swap chain texture view. When taking a screenshot, + /// this will point to an alternative texture instead to allow for copying the render result + /// to CPU memory. + pub swap_chain_texture_view: Option, + pub swap_chain_texture: Option, + pub swap_chain_texture_format: Option, + pub size_changed: bool, + pub present_mode_changed: bool, + pub alpha_mode: CompositeAlphaMode, +} + +impl ExtractedWindow { + fn set_swapchain_texture(&mut self, frame: wgpu::SurfaceTexture) { + let texture_view_descriptor = TextureViewDescriptor { + format: Some(frame.texture.format().add_srgb_suffix()), + ..default() + }; + self.swap_chain_texture_view = Some(TextureView::from( + frame.texture.create_view(&texture_view_descriptor), + )); + self.swap_chain_texture = Some(SurfaceTexture::from(frame)); + } +} + +#[derive(Default, Resource)] +pub struct ExtractedWindows { + pub primary: Option, + pub windows: EntityHashMap, +} + +impl Deref for ExtractedWindows { + type Target = EntityHashMap; + + fn deref(&self) -> &Self::Target { + &self.windows + } +} + +impl DerefMut for ExtractedWindows { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.windows + } +} + +fn extract_windows( + mut extracted_windows: ResMut, + mut closing: Extract>, + windows: Extract)>>, + mut removed: Extract>, + mut window_surfaces: ResMut, +) { + for (entity, window, handle, primary) in windows.iter() { + if primary.is_some() { + extracted_windows.primary = Some(entity); + } + + let (new_width, new_height) = ( + window.resolution.physical_width().max(1), + window.resolution.physical_height().max(1), + ); + + let extracted_window = extracted_windows.entry(entity).or_insert(ExtractedWindow { + entity, + handle: handle.clone(), + physical_width: new_width, + physical_height: new_height, + present_mode: window.present_mode, + desired_maximum_frame_latency: window.desired_maximum_frame_latency, + swap_chain_texture: None, + swap_chain_texture_view: None, + size_changed: false, + swap_chain_texture_format: None, + present_mode_changed: false, + alpha_mode: window.composite_alpha_mode, + }); + + // NOTE: Drop the swap chain frame here + extracted_window.swap_chain_texture_view = None; + extracted_window.size_changed = new_width != extracted_window.physical_width + || new_height != extracted_window.physical_height; + extracted_window.present_mode_changed = + window.present_mode != extracted_window.present_mode; + + if extracted_window.size_changed { + debug!( + "Window size changed from {}x{} to {}x{}", + extracted_window.physical_width, + extracted_window.physical_height, + new_width, + new_height + ); + extracted_window.physical_width = new_width; + extracted_window.physical_height = new_height; + } + + if extracted_window.present_mode_changed { + debug!( + "Window Present Mode changed from {:?} to {:?}", + extracted_window.present_mode, window.present_mode + ); + extracted_window.present_mode = window.present_mode; + } + } + + for closing_window in closing.read() { + extracted_windows.remove(&closing_window.window); + window_surfaces.remove(&closing_window.window); + } + for removed_window in removed.read() { + extracted_windows.remove(&removed_window); + window_surfaces.remove(&removed_window); + } +} + +struct SurfaceData { + // TODO: what lifetime should this be? + surface: WgpuWrapper>, + configuration: SurfaceConfiguration, +} + +#[derive(Resource, Default)] +pub struct WindowSurfaces { + surfaces: EntityHashMap, + /// List of windows that we have already called the initial `configure_surface` for + configured_windows: HashSet, +} + +impl WindowSurfaces { + fn remove(&mut self, window: &Entity) { + self.surfaces.remove(window); + self.configured_windows.remove(window); + } +} + +/// (re)configures window surfaces, and obtains a swapchain texture for rendering. +/// +/// NOTE: `get_current_texture` in `prepare_windows` can take a long time if the GPU workload is +/// the performance bottleneck. This can be seen in profiles as multiple prepare-set systems all +/// taking an unusually long time to complete, and all finishing at about the same time as the +/// `prepare_windows` system. Improvements in bevy are planned to avoid this happening when it +/// should not but it will still happen as it is easy for a user to create a large GPU workload +/// relative to the GPU performance and/or CPU workload. +/// This can be caused by many reasons, but several of them are: +/// - GPU workload is more than your current GPU can manage +/// - Error / performance bug in your custom shaders +/// - wgpu was unable to detect a proper GPU hardware-accelerated device given the chosen +/// [`Backends`](crate::settings::Backends), [`WgpuLimits`](crate::settings::WgpuLimits), +/// and/or [`WgpuFeatures`](crate::settings::WgpuFeatures). For example, on Windows currently +/// `DirectX 11` is not supported by wgpu 0.12 and so if your GPU/drivers do not support Vulkan, +/// it may be that a software renderer called "Microsoft Basic Render Driver" using `DirectX 12` +/// will be chosen and performance will be very poor. This is visible in a log message that is +/// output during renderer initialization. +/// Another alternative is to try to use [`ANGLE`](https://github.com/gfx-rs/wgpu#angle) and +/// [`Backends::GL`](crate::settings::Backends::GL) with the `gles` feature enabled if your +/// GPU/drivers support `OpenGL 4.3` / `OpenGL ES 3.0` or later. +pub fn prepare_windows( + mut windows: ResMut, + mut window_surfaces: ResMut, + render_device: Res, + #[cfg(target_os = "linux")] render_instance: Res, +) { + for window in windows.windows.values_mut() { + let window_surfaces = window_surfaces.deref_mut(); + let Some(surface_data) = window_surfaces.surfaces.get(&window.entity) else { + continue; + }; + + // A recurring issue is hitting `wgpu::SurfaceError::Timeout` on certain Linux + // mesa driver implementations. This seems to be a quirk of some drivers. + // We'd rather keep panicking when not on Linux mesa, because in those case, + // the `Timeout` is still probably the symptom of a degraded unrecoverable + // application state. + // see https://github.com/bevyengine/bevy/pull/5957 + // and https://github.com/gfx-rs/wgpu/issues/1218 + #[cfg(target_os = "linux")] + let may_erroneously_timeout = || { + render_instance + .enumerate_adapters(wgpu::Backends::VULKAN) + .iter() + .any(|adapter| { + let name = adapter.get_info().name; + name.starts_with("Radeon") + || name.starts_with("AMD") + || name.starts_with("Intel") + }) + }; + + let surface = &surface_data.surface; + match surface.get_current_texture() { + Ok(frame) => { + window.set_swapchain_texture(frame); + } + Err(wgpu::SurfaceError::Outdated) => { + render_device.configure_surface(surface, &surface_data.configuration); + let frame = match surface.get_current_texture() { + Ok(frame) => frame, + Err(err) => { + // This is a common occurrence on X11 and Xwayland with NVIDIA drivers + // when opening and resizing the window. + warn!("Couldn't get swap chain texture after configuring. Cause: '{err}'"); + continue; + } + }; + window.set_swapchain_texture(frame); + } + #[cfg(target_os = "linux")] + Err(wgpu::SurfaceError::Timeout) if may_erroneously_timeout() => { + tracing::trace!( + "Couldn't get swap chain texture. This is probably a quirk \ + of your Linux GPU driver, so it can be safely ignored." + ); + } + Err(err) => { + panic!("Couldn't get swap chain texture, operation unrecoverable: {err}"); + } + } + window.swap_chain_texture_format = Some(surface_data.configuration.format); + } +} + +pub fn need_surface_configuration( + windows: Res, + window_surfaces: Res, +) -> bool { + for window in windows.windows.values() { + if !window_surfaces.configured_windows.contains(&window.entity) + || window.size_changed + || window.present_mode_changed + { + return true; + } + } + false +} + +// 2 is wgpu's default/what we've been using so far. +// 1 is the minimum, but may cause lower framerates due to the cpu waiting for the gpu to finish +// all work for the previous frame before starting work on the next frame, which then means the gpu +// has to wait for the cpu to finish to start on the next frame. +const DEFAULT_DESIRED_MAXIMUM_FRAME_LATENCY: u32 = 2; + +/// Creates window surfaces. +pub fn create_surfaces( + // By accessing a NonSend resource, we tell the scheduler to put this system on the main thread, + // which is necessary for some OS's + #[cfg(any(target_os = "macos", target_os = "ios"))] _marker: bevy_ecs::system::NonSendMarker, + windows: Res, + mut window_surfaces: ResMut, + render_instance: Res, + render_adapter: Res, + render_device: Res, +) { + for window in windows.windows.values() { + let data = window_surfaces + .surfaces + .entry(window.entity) + .or_insert_with(|| { + let surface_target = SurfaceTargetUnsafe::RawHandle { + raw_display_handle: window.handle.get_display_handle(), + raw_window_handle: window.handle.get_window_handle(), + }; + // SAFETY: The window handles in ExtractedWindows will always be valid objects to create surfaces on + let surface = unsafe { + // NOTE: On some OSes this MUST be called from the main thread. + // As of wgpu 0.15, only fallible if the given window is a HTML canvas and obtaining a WebGPU or WebGL2 context fails. + render_instance + .create_surface_unsafe(surface_target) + .expect("Failed to create wgpu surface") + }; + let caps = surface.get_capabilities(&render_adapter); + let formats = caps.formats; + // For future HDR output support, we'll need to request a format that supports HDR, + // but as of wgpu 0.15 that is not yet supported. + // Prefer sRGB formats for surfaces, but fall back to first available format if no sRGB formats are available. + let mut format = *formats.first().expect("No supported formats for surface"); + for available_format in formats { + // Rgba8UnormSrgb and Bgra8UnormSrgb and the only sRGB formats wgpu exposes that we can use for surfaces. + if available_format == TextureFormat::Rgba8UnormSrgb + || available_format == TextureFormat::Bgra8UnormSrgb + { + format = available_format; + break; + } + } + + let configuration = SurfaceConfiguration { + format, + width: window.physical_width, + height: window.physical_height, + usage: TextureUsages::RENDER_ATTACHMENT, + present_mode: match window.present_mode { + PresentMode::Fifo => wgpu::PresentMode::Fifo, + PresentMode::FifoRelaxed => wgpu::PresentMode::FifoRelaxed, + PresentMode::Mailbox => wgpu::PresentMode::Mailbox, + PresentMode::Immediate => wgpu::PresentMode::Immediate, + PresentMode::AutoVsync => wgpu::PresentMode::AutoVsync, + PresentMode::AutoNoVsync => wgpu::PresentMode::AutoNoVsync, + }, + desired_maximum_frame_latency: window + .desired_maximum_frame_latency + .map(NonZero::::get) + .unwrap_or(DEFAULT_DESIRED_MAXIMUM_FRAME_LATENCY), + alpha_mode: match window.alpha_mode { + CompositeAlphaMode::Auto => wgpu::CompositeAlphaMode::Auto, + CompositeAlphaMode::Opaque => wgpu::CompositeAlphaMode::Opaque, + CompositeAlphaMode::PreMultiplied => { + wgpu::CompositeAlphaMode::PreMultiplied + } + CompositeAlphaMode::PostMultiplied => { + wgpu::CompositeAlphaMode::PostMultiplied + } + CompositeAlphaMode::Inherit => wgpu::CompositeAlphaMode::Inherit, + }, + view_formats: if !format.is_srgb() { + vec![format.add_srgb_suffix()] + } else { + vec![] + }, + }; + + render_device.configure_surface(&surface, &configuration); + + SurfaceData { + surface: WgpuWrapper::new(surface), + configuration, + } + }); + + if window.size_changed || window.present_mode_changed { + data.configuration.width = window.physical_width; + data.configuration.height = window.physical_height; + data.configuration.present_mode = match window.present_mode { + PresentMode::Fifo => wgpu::PresentMode::Fifo, + PresentMode::FifoRelaxed => wgpu::PresentMode::FifoRelaxed, + PresentMode::Mailbox => wgpu::PresentMode::Mailbox, + PresentMode::Immediate => wgpu::PresentMode::Immediate, + PresentMode::AutoVsync => wgpu::PresentMode::AutoVsync, + PresentMode::AutoNoVsync => wgpu::PresentMode::AutoNoVsync, + }; + render_device.configure_surface(&data.surface, &data.configuration); + } + + window_surfaces.configured_windows.insert(window.entity); + } +} diff --git a/crates/libmarathon/src/render/view/window/screenshot.rs b/crates/libmarathon/src/render/view/window/screenshot.rs new file mode 100644 index 0000000..f56ac4a --- /dev/null +++ b/crates/libmarathon/src/render/view/window/screenshot.rs @@ -0,0 +1,695 @@ +use super::ExtractedWindows; +use crate::render::{ + gpu_readback, + render_asset::RenderAssets, + render_resource::{ + binding_types::texture_2d, BindGroup, BindGroupEntries, BindGroupLayout, + BindGroupLayoutEntries, Buffer, BufferUsages, CachedRenderPipelineId, FragmentState, + PipelineCache, RenderPipelineDescriptor, SpecializedRenderPipeline, + SpecializedRenderPipelines, Texture, TextureUsages, TextureView, VertexState, + }, + renderer::RenderDevice, + texture::{GpuImage, ManualTextureViews, OutputColorAttachment}, + view::{prepare_view_attachments, prepare_view_targets, ViewTargetAttachments, WindowSurfaces}, + ExtractSchedule, MainWorld, Render, RenderApp, RenderStartup, RenderSystems, +}; +use std::{borrow::Cow, sync::Arc}; +use bevy_app::{First, Plugin, Update}; +use bevy_asset::{embedded_asset, load_embedded_asset, AssetServer, Handle, RenderAssetUsages}; +use bevy_camera::{ManualTextureViewHandle, NormalizedRenderTarget, RenderTarget}; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::{ + entity::EntityHashMap, message::message_update_system, prelude::*, system::SystemState, +}; +use bevy_image::{Image, TextureFormatPixelInfo, ToExtents}; +use bevy_platform::collections::HashSet; +use bevy_reflect::Reflect; +use bevy_shader::Shader; +use bevy_tasks::AsyncComputeTaskPool; +use bevy_utils::default; +use bevy_window::{PrimaryWindow, WindowRef}; +use core::ops::Deref; +use std::{ + path::Path, + sync::{ + mpsc::{Receiver, Sender}, + Mutex, + }, +}; +use tracing::{error, info, warn}; +use wgpu::{CommandEncoder, Extent3d, TextureFormat}; + +#[derive(EntityEvent, Reflect, Deref, DerefMut, Debug)] +#[reflect(Debug)] +pub struct ScreenshotCaptured { + pub entity: Entity, + #[deref] + pub image: Image, +} + +/// A component that signals to the renderer to capture a screenshot this frame. +/// +/// This component should be spawned on a new entity with an observer that will trigger +/// with [`ScreenshotCaptured`] when the screenshot is ready. +/// +/// Screenshots are captured asynchronously and may not be available immediately after the frame +/// that the component is spawned on. The observer should be used to handle the screenshot when it +/// is ready. +/// +/// Note that the screenshot entity will be despawned after the screenshot is captured and the +/// observer is triggered. +/// +/// # Usage +/// +/// ``` +/// # use bevy_ecs::prelude::*; +/// # use crate::render::view::screenshot::{save_to_disk, Screenshot}; +/// +/// fn take_screenshot(mut commands: Commands) { +/// commands.spawn(Screenshot::primary_window()) +/// .observe(save_to_disk("screenshot.png")); +/// } +/// ``` +#[derive(Component, Deref, DerefMut, Reflect, Debug)] +#[reflect(Component, Debug)] +pub struct Screenshot(pub RenderTarget); + +/// A marker component that indicates that a screenshot is currently being captured. +#[derive(Component, Default)] +pub struct Capturing; + +/// A marker component that indicates that a screenshot has been captured, the image is ready, and +/// the screenshot entity can be despawned. +#[derive(Component, Default)] +pub struct Captured; + +impl Screenshot { + /// Capture a screenshot of the provided window entity. + pub fn window(window: Entity) -> Self { + Self(RenderTarget::Window(WindowRef::Entity(window))) + } + + /// Capture a screenshot of the primary window, if one exists. + pub fn primary_window() -> Self { + Self(RenderTarget::Window(WindowRef::Primary)) + } + + /// Capture a screenshot of the provided render target image. + pub fn image(image: Handle) -> Self { + Self(RenderTarget::Image(image.into())) + } + + /// Capture a screenshot of the provided manual texture view. + pub fn texture_view(texture_view: ManualTextureViewHandle) -> Self { + Self(RenderTarget::TextureView(texture_view)) + } +} + +struct ScreenshotPreparedState { + pub texture: Texture, + pub buffer: Buffer, + pub bind_group: BindGroup, + pub pipeline_id: CachedRenderPipelineId, + pub size: Extent3d, +} + +#[derive(Resource, Deref, DerefMut)] +pub struct CapturedScreenshots(pub Arc>>); + +#[derive(Resource, Deref, DerefMut, Default)] +struct RenderScreenshotTargets(EntityHashMap); + +#[derive(Resource, Deref, DerefMut, Default)] +struct RenderScreenshotsPrepared(EntityHashMap); + +#[derive(Resource, Deref, DerefMut)] +struct RenderScreenshotsSender(Sender<(Entity, Image)>); + +/// Saves the captured screenshot to disk at the provided path. +pub fn save_to_disk(path: impl AsRef) -> impl FnMut(On) { + let path = path.as_ref().to_owned(); + move |screenshot_captured| { + let img = screenshot_captured.image.clone(); + match img.try_into_dynamic() { + Ok(dyn_img) => match image::ImageFormat::from_path(&path) { + Ok(format) => { + // discard the alpha channel which stores brightness values when HDR is enabled to make sure + // the screenshot looks right + let img = dyn_img.to_rgb8(); + #[cfg(not(target_arch = "wasm32"))] + match img.save_with_format(&path, format) { + Ok(_) => info!("Screenshot saved to {}", path.display()), + Err(e) => error!("Cannot save screenshot, IO error: {e}"), + } + + #[cfg(target_arch = "wasm32")] + { + let save_screenshot = || { + use image::EncodableLayout; + use wasm_bindgen::{JsCast, JsValue}; + + let mut image_buffer = std::io::Cursor::new(Vec::new()); + img.write_to(&mut image_buffer, format) + .map_err(|e| JsValue::from_str(&format!("{e}")))?; + // SAFETY: `image_buffer` only exist in this closure, and is not used after this line + let parts = js_sys::Array::of1(&unsafe { + js_sys::Uint8Array::view(image_buffer.into_inner().as_bytes()) + .into() + }); + let blob = web_sys::Blob::new_with_u8_array_sequence(&parts)?; + let url = web_sys::Url::create_object_url_with_blob(&blob)?; + let window = web_sys::window().unwrap(); + let document = window.document().unwrap(); + let link = document.create_element("a")?; + link.set_attribute("href", &url)?; + link.set_attribute( + "download", + path.file_name() + .and_then(|filename| filename.to_str()) + .ok_or_else(|| JsValue::from_str("Invalid filename"))?, + )?; + let html_element = link.dyn_into::()?; + html_element.click(); + web_sys::Url::revoke_object_url(&url)?; + Ok::<(), JsValue>(()) + }; + + match (save_screenshot)() { + Ok(_) => info!("Screenshot saved to {}", path.display()), + Err(e) => error!("Cannot save screenshot, error: {e:?}"), + }; + } + } + Err(e) => error!("Cannot save screenshot, requested format not recognized: {e}"), + }, + Err(e) => error!("Cannot save screenshot, screen format cannot be understood: {e}"), + } + } +} + +fn clear_screenshots(mut commands: Commands, screenshots: Query>) { + for entity in screenshots.iter() { + commands.entity(entity).despawn(); + } +} + +pub fn trigger_screenshots( + mut commands: Commands, + captured_screenshots: ResMut, +) { + let captured_screenshots = captured_screenshots.lock().unwrap(); + while let Ok((entity, image)) = captured_screenshots.try_recv() { + commands.entity(entity).insert(Captured); + commands.trigger(ScreenshotCaptured { image, entity }); + } +} + +fn extract_screenshots( + mut targets: ResMut, + mut main_world: ResMut, + mut system_state: Local< + Option< + SystemState<( + Commands, + Query>, + Query<(Entity, &Screenshot), Without>, + )>, + >, + >, + mut seen_targets: Local>, +) { + if system_state.is_none() { + *system_state = Some(SystemState::new(&mut main_world)); + } + let system_state = system_state.as_mut().unwrap(); + let (mut commands, primary_window, screenshots) = system_state.get_mut(&mut main_world); + + targets.clear(); + seen_targets.clear(); + + let primary_window = primary_window.iter().next(); + + for (entity, screenshot) in screenshots.iter() { + let render_target = screenshot.0.clone(); + let Some(render_target) = render_target.normalize(primary_window) else { + warn!( + "Unknown render target for screenshot, skipping: {:?}", + render_target + ); + continue; + }; + if seen_targets.contains(&render_target) { + warn!( + "Duplicate render target for screenshot, skipping entity {}: {:?}", + entity, render_target + ); + // If we don't despawn the entity here, it will be captured again in the next frame + commands.entity(entity).despawn(); + continue; + } + seen_targets.insert(render_target.clone()); + targets.insert(entity, render_target); + commands.entity(entity).insert(Capturing); + } + + system_state.apply(&mut main_world); +} + +fn prepare_screenshots( + targets: Res, + mut prepared: ResMut, + window_surfaces: Res, + render_device: Res, + screenshot_pipeline: Res, + pipeline_cache: Res, + mut pipelines: ResMut>, + images: Res>, + manual_texture_views: Res, + mut view_target_attachments: ResMut, +) { + prepared.clear(); + for (entity, target) in targets.iter() { + match target { + NormalizedRenderTarget::Window(window) => { + let window = window.entity(); + let Some(surface_data) = window_surfaces.surfaces.get(&window) else { + warn!("Unknown window for screenshot, skipping: {}", window); + continue; + }; + let format = surface_data.configuration.format.add_srgb_suffix(); + let size = Extent3d { + width: surface_data.configuration.width, + height: surface_data.configuration.height, + ..default() + }; + let (texture_view, state) = prepare_screenshot_state( + size, + format, + &render_device, + &screenshot_pipeline, + &pipeline_cache, + &mut pipelines, + ); + prepared.insert(*entity, state); + view_target_attachments.insert( + target.clone(), + OutputColorAttachment::new(texture_view.clone(), format.add_srgb_suffix()), + ); + } + NormalizedRenderTarget::Image(image) => { + let Some(gpu_image) = images.get(&image.handle) else { + warn!("Unknown image for screenshot, skipping: {:?}", image); + continue; + }; + let format = gpu_image.texture_format; + let (texture_view, state) = prepare_screenshot_state( + gpu_image.size, + format, + &render_device, + &screenshot_pipeline, + &pipeline_cache, + &mut pipelines, + ); + prepared.insert(*entity, state); + view_target_attachments.insert( + target.clone(), + OutputColorAttachment::new(texture_view.clone(), format.add_srgb_suffix()), + ); + } + NormalizedRenderTarget::TextureView(texture_view) => { + let Some(manual_texture_view) = manual_texture_views.get(texture_view) else { + warn!( + "Unknown manual texture view for screenshot, skipping: {:?}", + texture_view + ); + continue; + }; + let format = manual_texture_view.format; + let size = manual_texture_view.size.to_extents(); + let (texture_view, state) = prepare_screenshot_state( + size, + format, + &render_device, + &screenshot_pipeline, + &pipeline_cache, + &mut pipelines, + ); + prepared.insert(*entity, state); + view_target_attachments.insert( + target.clone(), + OutputColorAttachment::new(texture_view.clone(), format.add_srgb_suffix()), + ); + } + NormalizedRenderTarget::None { .. } => { + // Nothing to screenshot! + } + } + } +} + +fn prepare_screenshot_state( + size: Extent3d, + format: TextureFormat, + render_device: &RenderDevice, + pipeline: &ScreenshotToScreenPipeline, + pipeline_cache: &PipelineCache, + pipelines: &mut SpecializedRenderPipelines, +) -> (TextureView, ScreenshotPreparedState) { + let texture = render_device.create_texture(&wgpu::TextureDescriptor { + label: Some("screenshot-capture-rendertarget"), + size, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format, + usage: TextureUsages::RENDER_ATTACHMENT + | TextureUsages::COPY_SRC + | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }); + let texture_view = texture.create_view(&Default::default()); + let buffer = render_device.create_buffer(&wgpu::BufferDescriptor { + label: Some("screenshot-transfer-buffer"), + size: gpu_readback::get_aligned_size(size, format.pixel_size().unwrap_or(0) as u32) as u64, + usage: BufferUsages::MAP_READ | BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + let bind_group = render_device.create_bind_group( + "screenshot-to-screen-bind-group", + &pipeline.bind_group_layout, + &BindGroupEntries::single(&texture_view), + ); + let pipeline_id = pipelines.specialize(pipeline_cache, pipeline, format); + + ( + texture_view, + ScreenshotPreparedState { + texture, + buffer, + bind_group, + pipeline_id, + size, + }, + ) +} + +pub struct ScreenshotPlugin; + +impl Plugin for ScreenshotPlugin { + fn build(&self, app: &mut bevy_app::App) { + embedded_asset!(app, "screenshot.wgsl"); + + let (tx, rx) = std::sync::mpsc::channel(); + app.insert_resource(CapturedScreenshots(Arc::new(Mutex::new(rx)))) + .add_systems( + First, + clear_screenshots + .after(message_update_system) + .before(ApplyDeferred), + ) + .add_systems(Update, trigger_screenshots); + + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + render_app + .insert_resource(RenderScreenshotsSender(tx)) + .init_resource::() + .init_resource::() + .init_resource::>() + .add_systems(RenderStartup, init_screenshot_to_screen_pipeline) + .add_systems(ExtractSchedule, extract_screenshots.ambiguous_with_all()) + .add_systems( + Render, + prepare_screenshots + .after(prepare_view_attachments) + .before(prepare_view_targets) + .in_set(RenderSystems::ManageViews), + ); + } +} + +#[derive(Resource)] +pub struct ScreenshotToScreenPipeline { + pub bind_group_layout: BindGroupLayout, + pub shader: Handle, +} + +pub fn init_screenshot_to_screen_pipeline( + mut commands: Commands, + render_device: Res, + asset_server: Res, +) { + let bind_group_layout = render_device.create_bind_group_layout( + "screenshot-to-screen-bgl", + &BindGroupLayoutEntries::single( + wgpu::ShaderStages::FRAGMENT, + texture_2d(wgpu::TextureSampleType::Float { filterable: false }), + ), + ); + + let shader = load_embedded_asset!(asset_server.as_ref(), "screenshot.wgsl"); + + commands.insert_resource(ScreenshotToScreenPipeline { + bind_group_layout, + shader, + }); +} + +impl SpecializedRenderPipeline for ScreenshotToScreenPipeline { + type Key = TextureFormat; + + fn specialize(&self, key: Self::Key) -> RenderPipelineDescriptor { + RenderPipelineDescriptor { + label: Some(Cow::Borrowed("screenshot-to-screen")), + layout: vec![self.bind_group_layout.clone()], + vertex: VertexState { + shader: self.shader.clone(), + ..default() + }, + primitive: wgpu::PrimitiveState { + cull_mode: Some(wgpu::Face::Back), + ..Default::default() + }, + multisample: Default::default(), + fragment: Some(FragmentState { + shader: self.shader.clone(), + targets: vec![Some(wgpu::ColorTargetState { + format: key, + blend: None, + write_mask: wgpu::ColorWrites::ALL, + })], + ..default() + }), + ..default() + } + } +} + +pub(crate) fn submit_screenshot_commands(world: &World, encoder: &mut CommandEncoder) { + let targets = world.resource::(); + let prepared = world.resource::(); + let pipelines = world.resource::(); + let gpu_images = world.resource::>(); + let windows = world.resource::(); + let manual_texture_views = world.resource::(); + + for (entity, render_target) in targets.iter() { + match render_target { + NormalizedRenderTarget::Window(window) => { + let window = window.entity(); + let Some(window) = windows.get(&window) else { + continue; + }; + let width = window.physical_width; + let height = window.physical_height; + let Some(texture_format) = window.swap_chain_texture_format else { + continue; + }; + let Some(swap_chain_texture) = window.swap_chain_texture.as_ref() else { + continue; + }; + let texture_view = swap_chain_texture.texture.create_view(&Default::default()); + render_screenshot( + encoder, + prepared, + pipelines, + entity, + width, + height, + texture_format, + &texture_view, + ); + } + NormalizedRenderTarget::Image(image) => { + let Some(gpu_image) = gpu_images.get(&image.handle) else { + warn!("Unknown image for screenshot, skipping: {:?}", image); + continue; + }; + let width = gpu_image.size.width; + let height = gpu_image.size.height; + let texture_format = gpu_image.texture_format; + let texture_view = gpu_image.texture_view.deref(); + render_screenshot( + encoder, + prepared, + pipelines, + entity, + width, + height, + texture_format, + texture_view, + ); + } + NormalizedRenderTarget::TextureView(texture_view) => { + let Some(texture_view) = manual_texture_views.get(texture_view) else { + warn!( + "Unknown manual texture view for screenshot, skipping: {:?}", + texture_view + ); + continue; + }; + let width = texture_view.size.x; + let height = texture_view.size.y; + let texture_format = texture_view.format; + let texture_view = texture_view.texture_view.deref(); + render_screenshot( + encoder, + prepared, + pipelines, + entity, + width, + height, + texture_format, + texture_view, + ); + } + NormalizedRenderTarget::None { .. } => { + // Nothing to screenshot! + } + }; + } +} + +fn render_screenshot( + encoder: &mut CommandEncoder, + prepared: &RenderScreenshotsPrepared, + pipelines: &PipelineCache, + entity: &Entity, + width: u32, + height: u32, + texture_format: TextureFormat, + texture_view: &wgpu::TextureView, +) { + if let Some(prepared_state) = &prepared.get(entity) { + let extent = Extent3d { + width, + height, + depth_or_array_layers: 1, + }; + encoder.copy_texture_to_buffer( + prepared_state.texture.as_image_copy(), + wgpu::TexelCopyBufferInfo { + buffer: &prepared_state.buffer, + layout: gpu_readback::layout_data(extent, texture_format), + }, + extent, + ); + + if let Some(pipeline) = pipelines.get_render_pipeline(prepared_state.pipeline_id) { + let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: Some("screenshot_to_screen_pass"), + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: texture_view, + depth_slice: None, + resolve_target: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Load, + store: wgpu::StoreOp::Store, + }, + })], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + }); + pass.set_pipeline(pipeline); + pass.set_bind_group(0, &prepared_state.bind_group, &[]); + pass.draw(0..3, 0..1); + } + } +} + +pub(crate) fn collect_screenshots(world: &mut World) { + #[cfg(feature = "trace")] + let _span = tracing::info_span!("collect_screenshots").entered(); + + let sender = world.resource::().deref().clone(); + let prepared = world.resource::(); + + for (entity, prepared) in prepared.iter() { + let entity = *entity; + let sender = sender.clone(); + let width = prepared.size.width; + let height = prepared.size.height; + let texture_format = prepared.texture.format(); + let Ok(pixel_size) = texture_format.pixel_size() else { + continue; + }; + let buffer = prepared.buffer.clone(); + + let finish = async move { + let (tx, rx) = async_channel::bounded(1); + let buffer_slice = buffer.slice(..); + // The polling for this map call is done every frame when the command queue is submitted. + buffer_slice.map_async(wgpu::MapMode::Read, move |result| { + let err = result.err(); + if err.is_some() { + panic!("{}", err.unwrap().to_string()); + } + tx.try_send(()).unwrap(); + }); + rx.recv().await.unwrap(); + let data = buffer_slice.get_mapped_range(); + // we immediately move the data to CPU memory to avoid holding the mapped view for long + let mut result = Vec::from(&*data); + drop(data); + + if result.len() != ((width * height) as usize * pixel_size) { + // Our buffer has been padded because we needed to align to a multiple of 256. + // We remove this padding here + let initial_row_bytes = width as usize * pixel_size; + let buffered_row_bytes = + gpu_readback::align_byte_size(width * pixel_size as u32) as usize; + + let mut take_offset = buffered_row_bytes; + let mut place_offset = initial_row_bytes; + for _ in 1..height { + result.copy_within(take_offset..take_offset + buffered_row_bytes, place_offset); + take_offset += buffered_row_bytes; + place_offset += initial_row_bytes; + } + result.truncate(initial_row_bytes * height as usize); + } + + if let Err(e) = sender.send(( + entity, + Image::new( + Extent3d { + width, + height, + depth_or_array_layers: 1, + }, + wgpu::TextureDimension::D2, + result, + texture_format, + RenderAssetUsages::RENDER_WORLD, + ), + )) { + error!("Failed to send screenshot: {}", e); + } + }; + + AsyncComputeTaskPool::get().spawn(finish).detach(); + } +} diff --git a/crates/libmarathon/src/render/view/window/screenshot.wgsl b/crates/libmarathon/src/render/view/window/screenshot.wgsl new file mode 100644 index 0000000..2743fa1 --- /dev/null +++ b/crates/libmarathon/src/render/view/window/screenshot.wgsl @@ -0,0 +1,16 @@ +// This vertex shader will create a triangle that will cover the entire screen +// with minimal effort, avoiding the need for a vertex buffer etc. +@vertex +fn vs_main(@builtin(vertex_index) in_vertex_index: u32) -> @builtin(position) vec4 { + let x = f32((in_vertex_index & 1u) << 2u); + let y = f32((in_vertex_index & 2u) << 1u); + return vec4(x - 1.0, y - 1.0, 0.0, 1.0); +} + +@group(0) @binding(0) var t: texture_2d; + +@fragment +fn fs_main(@builtin(position) pos: vec4) -> @location(0) vec4 { + let coords = floor(pos.xy); + return textureLoad(t, vec2(coords), 0i); +} diff --git a/crates/libmarathon/src/sync.rs b/crates/libmarathon/src/sync.rs index a181ebc..98accd2 100644 --- a/crates/libmarathon/src/sync.rs +++ b/crates/libmarathon/src/sync.rs @@ -17,8 +17,8 @@ use serde::{ Deserialize, Serialize, }; -// Re-export the Synced derive macro -pub use sync_macros::Synced; +// TODO: Re-export the Synced derive macro (not part of bevy_render_macros) +// pub use macros::Synced; pub type NodeId = uuid::Uuid; diff --git a/crates/sync-macros/Cargo.toml b/crates/macros/Cargo.toml similarity index 90% rename from crates/sync-macros/Cargo.toml rename to crates/macros/Cargo.toml index 24d5e82..88bfd7a 100644 --- a/crates/sync-macros/Cargo.toml +++ b/crates/macros/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "sync-macros" +name = "macros" version = "0.1.0" edition.workspace = true @@ -12,6 +12,7 @@ quote = "1.0" proc-macro2 = "1.0" inventory = { workspace = true } bytes = "1.0" +bevy_macro_utils = "0.17.2" [dev-dependencies] libmarathon = { path = "../libmarathon" } diff --git a/crates/macros/src/as_bind_group.rs b/crates/macros/src/as_bind_group.rs new file mode 100644 index 0000000..2df46dd --- /dev/null +++ b/crates/macros/src/as_bind_group.rs @@ -0,0 +1,1817 @@ +use bevy_macro_utils::{get_lit_bool, get_lit_str, BevyManifest, Symbol}; +use proc_macro::TokenStream; +use proc_macro2::{Ident, Span}; +use quote::{quote, ToTokens}; +use syn::{ + parenthesized, + parse::{Parse, ParseStream}, + punctuated::Punctuated, + token::{Comma, DotDot}, + Data, DataStruct, Error, Fields, LitInt, LitStr, Meta, MetaList, Result, +}; + +const UNIFORM_ATTRIBUTE_NAME: Symbol = Symbol("uniform"); +const TEXTURE_ATTRIBUTE_NAME: Symbol = Symbol("texture"); +const STORAGE_TEXTURE_ATTRIBUTE_NAME: Symbol = Symbol("storage_texture"); +const SAMPLER_ATTRIBUTE_NAME: Symbol = Symbol("sampler"); +const STORAGE_ATTRIBUTE_NAME: Symbol = Symbol("storage"); +const BIND_GROUP_DATA_ATTRIBUTE_NAME: Symbol = Symbol("bind_group_data"); +const BINDLESS_ATTRIBUTE_NAME: Symbol = Symbol("bindless"); +const DATA_ATTRIBUTE_NAME: Symbol = Symbol("data"); +const BINDING_ARRAY_MODIFIER_NAME: Symbol = Symbol("binding_array"); +const LIMIT_MODIFIER_NAME: Symbol = Symbol("limit"); +const INDEX_TABLE_MODIFIER_NAME: Symbol = Symbol("index_table"); +const RANGE_MODIFIER_NAME: Symbol = Symbol("range"); +const BINDING_MODIFIER_NAME: Symbol = Symbol("binding"); + +#[derive(Copy, Clone, Debug)] +enum BindingType { + Uniform, + Texture, + StorageTexture, + Sampler, + Storage, +} + +#[derive(Clone)] +enum BindingState<'a> { + Free, + Occupied { + binding_type: BindingType, + ident: &'a Ident, + }, + OccupiedConvertedUniform, + OccupiedMergeableUniform { + uniform_fields: Vec<&'a syn::Field>, + }, +} + +enum BindlessSlabResourceLimitAttr { + Auto, + Limit(LitInt), +} + +// The `bindless(index_table(range(M..N)))` attribute. +struct BindlessIndexTableRangeAttr { + start: LitInt, + end: LitInt, +} + +pub fn derive_as_bind_group(ast: syn::DeriveInput) -> Result { + let manifest = BevyManifest::shared(); + let render_path = crate::bevy_render_path(); + let image_path = manifest.get_path("bevy_image"); + let asset_path = manifest.get_path("bevy_asset"); + let ecs_path = manifest.get_path("bevy_ecs"); + + let mut binding_states: Vec = Vec::new(); + let mut binding_impls = Vec::new(); + let mut bindless_binding_layouts = Vec::new(); + let mut non_bindless_binding_layouts = Vec::new(); + let mut bindless_resource_types = Vec::new(); + let mut bindless_buffer_descriptors = Vec::new(); + let mut attr_prepared_data_ident = None; + // After the first attribute pass, this will be `None` if the object isn't + // bindless and `Some` if it is. + let mut attr_bindless_count = None; + let mut attr_bindless_index_table_range = None; + let mut attr_bindless_index_table_binding = None; + + // `actual_bindless_slot_count` holds the actual number of bindless slots + // per bind group, taking into account whether the current platform supports + // bindless resources. + let actual_bindless_slot_count = Ident::new("actual_bindless_slot_count", Span::call_site()); + let bind_group_layout_entries = Ident::new("bind_group_layout_entries", Span::call_site()); + + // The `BufferBindingType` and corresponding `BufferUsages` used for + // uniforms. We need this because bindless uniforms don't exist, so in + // bindless mode we must promote uniforms to storage buffers. + let uniform_binding_type = Ident::new("uniform_binding_type", Span::call_site()); + let uniform_buffer_usages = Ident::new("uniform_buffer_usages", Span::call_site()); + + // Read struct-level attributes, first pass. + for attr in &ast.attrs { + if let Some(attr_ident) = attr.path().get_ident() { + if attr_ident == BIND_GROUP_DATA_ATTRIBUTE_NAME { + if let Ok(prepared_data_ident) = + attr.parse_args_with(|input: ParseStream| input.parse::()) + { + attr_prepared_data_ident = Some(prepared_data_ident); + } + } else if attr_ident == BINDLESS_ATTRIBUTE_NAME { + attr_bindless_count = Some(BindlessSlabResourceLimitAttr::Auto); + if let Meta::List(_) = attr.meta { + // Parse bindless features. + attr.parse_nested_meta(|submeta| { + if submeta.path.is_ident(&LIMIT_MODIFIER_NAME) { + let content; + parenthesized!(content in submeta.input); + let lit: LitInt = content.parse()?; + + attr_bindless_count = Some(BindlessSlabResourceLimitAttr::Limit(lit)); + return Ok(()); + } + + if submeta.path.is_ident(&INDEX_TABLE_MODIFIER_NAME) { + submeta.parse_nested_meta(|subsubmeta| { + if subsubmeta.path.is_ident(&RANGE_MODIFIER_NAME) { + let content; + parenthesized!(content in subsubmeta.input); + let start: LitInt = content.parse()?; + content.parse::()?; + let end: LitInt = content.parse()?; + attr_bindless_index_table_range = + Some(BindlessIndexTableRangeAttr { start, end }); + return Ok(()); + } + + if subsubmeta.path.is_ident(&BINDING_MODIFIER_NAME) { + let content; + parenthesized!(content in subsubmeta.input); + let lit: LitInt = content.parse()?; + + attr_bindless_index_table_binding = Some(lit); + return Ok(()); + } + + Err(Error::new_spanned( + attr, + "Expected `range(M..N)` or `binding(N)`", + )) + })?; + return Ok(()); + } + + Err(Error::new_spanned( + attr, + "Expected `limit` or `index_table`", + )) + })?; + } + } + } + } + + // Read struct-level attributes, second pass. + for attr in &ast.attrs { + if let Some(attr_ident) = attr.path().get_ident() + && (attr_ident == UNIFORM_ATTRIBUTE_NAME || attr_ident == DATA_ATTRIBUTE_NAME) + { + let UniformBindingAttr { + binding_type, + binding_index, + converted_shader_type, + binding_array: binding_array_binding, + } = get_uniform_binding_attr(attr)?; + match binding_type { + UniformBindingAttrType::Uniform => { + binding_impls.push(quote! {{ + use #render_path::render_resource::AsBindGroupShaderType; + let mut buffer = #render_path::render_resource::encase::UniformBuffer::new(Vec::new()); + let converted: #converted_shader_type = self.as_bind_group_shader_type(&images); + buffer.write(&converted).unwrap(); + ( + #binding_index, + #render_path::render_resource::OwnedBindingResource::Buffer(render_device.create_buffer_with_data( + &#render_path::render_resource::BufferInitDescriptor { + label: None, + usage: #uniform_buffer_usages, + contents: buffer.as_ref(), + }, + )) + ) + }}); + + match (&binding_array_binding, &attr_bindless_count) { + (&None, &Some(_)) => { + return Err(Error::new_spanned( + attr, + "Must specify `binding_array(...)` with `#[uniform]` if the \ + object is bindless", + )); + } + (&Some(_), &None) => { + return Err(Error::new_spanned( + attr, + "`binding_array(...)` with `#[uniform]` requires the object to \ + be bindless", + )); + } + _ => {} + } + + let binding_array_binding = binding_array_binding.unwrap_or(0); + bindless_binding_layouts.push(quote! { + #bind_group_layout_entries.push( + #render_path::render_resource::BindGroupLayoutEntry { + binding: #binding_array_binding, + visibility: #render_path::render_resource::ShaderStages::FRAGMENT | #render_path::render_resource::ShaderStages::VERTEX | #render_path::render_resource::ShaderStages::COMPUTE, + ty: #render_path::render_resource::BindingType::Buffer { + ty: #uniform_binding_type, + has_dynamic_offset: false, + min_binding_size: Some(<#converted_shader_type as #render_path::render_resource::ShaderType>::min_size()), + }, + count: #actual_bindless_slot_count, + } + ); + }); + + add_bindless_resource_type( + &render_path, + &mut bindless_resource_types, + binding_index, + quote! { #render_path::render_resource::BindlessResourceType::Buffer }, + ); + } + + UniformBindingAttrType::Data => { + binding_impls.push(quote! {{ + use #render_path::render_resource::AsBindGroupShaderType; + use #render_path::render_resource::encase::{ShaderType, internal::WriteInto}; + let mut buffer: Vec = Vec::new(); + let converted: #converted_shader_type = self.as_bind_group_shader_type(&images); + converted.write_into( + &mut #render_path::render_resource::encase::internal::Writer::new( + &converted, + &mut buffer, + 0, + ).unwrap(), + ); + let min_size = <#converted_shader_type as #render_path::render_resource::ShaderType>::min_size().get() as usize; + while buffer.len() < min_size { + buffer.push(0); + } + ( + #binding_index, + #render_path::render_resource::OwnedBindingResource::Data( + #render_path::render_resource::OwnedData(buffer) + ) + ) + }}); + + let binding_array_binding = binding_array_binding.unwrap_or(0); + bindless_binding_layouts.push(quote! { + #bind_group_layout_entries.push( + #render_path::render_resource::BindGroupLayoutEntry { + binding: #binding_array_binding, + visibility: #render_path::render_resource::ShaderStages::FRAGMENT | #render_path::render_resource::ShaderStages::VERTEX | #render_path::render_resource::ShaderStages::COMPUTE, + ty: #render_path::render_resource::BindingType::Buffer { + ty: #uniform_binding_type, + has_dynamic_offset: false, + min_binding_size: Some(<#converted_shader_type as #render_path::render_resource::ShaderType>::min_size()), + }, + count: None, + } + ); + }); + + add_bindless_resource_type( + &render_path, + &mut bindless_resource_types, + binding_index, + quote! { #render_path::render_resource::BindlessResourceType::DataBuffer }, + ); + } + } + + // Push the non-bindless binding layout. + + non_bindless_binding_layouts.push(quote!{ + #bind_group_layout_entries.push( + #render_path::render_resource::BindGroupLayoutEntry { + binding: #binding_index, + visibility: #render_path::render_resource::ShaderStages::FRAGMENT | #render_path::render_resource::ShaderStages::VERTEX | #render_path::render_resource::ShaderStages::COMPUTE, + ty: #render_path::render_resource::BindingType::Buffer { + ty: #uniform_binding_type, + has_dynamic_offset: false, + min_binding_size: Some(<#converted_shader_type as #render_path::render_resource::ShaderType>::min_size()), + }, + count: None, + } + ); + }); + + bindless_buffer_descriptors.push(quote! { + #render_path::render_resource::BindlessBufferDescriptor { + // Note that, because this is bindless, *binding + // index* here refers to the index in the + // bindless index table (`bindless_index`), and + // the actual binding number is the *binding + // array binding*. + binding_number: #render_path::render_resource::BindingNumber( + #binding_array_binding + ), + bindless_index: + #render_path::render_resource::BindlessIndex(#binding_index), + size: Some( + < + #converted_shader_type as + #render_path::render_resource::ShaderType + >::min_size().get() as usize + ), + } + }); + + let required_len = binding_index as usize + 1; + if required_len > binding_states.len() { + binding_states.resize(required_len, BindingState::Free); + } + binding_states[binding_index as usize] = BindingState::OccupiedConvertedUniform; + } + } + + let fields = match &ast.data { + Data::Struct(DataStruct { + fields: Fields::Named(fields), + .. + }) => &fields.named, + _ => { + return Err(Error::new_spanned( + ast, + "Expected a struct with named fields", + )); + } + }; + + // Count the number of sampler fields needed. We might have to disable + // bindless if bindless arrays take the GPU over the maximum number of + // samplers. + let mut sampler_binding_count: u32 = 0; + + // Read field-level attributes + for field in fields { + // Search ahead for texture attributes so we can use them with any + // corresponding sampler attribute. + let mut tex_attrs = None; + for attr in &field.attrs { + let Some(attr_ident) = attr.path().get_ident() else { + continue; + }; + if attr_ident == TEXTURE_ATTRIBUTE_NAME { + let (_binding_index, nested_meta_items) = get_binding_nested_attr(attr)?; + tex_attrs = Some(get_texture_attrs(nested_meta_items)?); + } + } + + for attr in &field.attrs { + let Some(attr_ident) = attr.path().get_ident() else { + continue; + }; + + let binding_type = if attr_ident == UNIFORM_ATTRIBUTE_NAME { + BindingType::Uniform + } else if attr_ident == TEXTURE_ATTRIBUTE_NAME { + BindingType::Texture + } else if attr_ident == STORAGE_TEXTURE_ATTRIBUTE_NAME { + BindingType::StorageTexture + } else if attr_ident == SAMPLER_ATTRIBUTE_NAME { + BindingType::Sampler + } else if attr_ident == STORAGE_ATTRIBUTE_NAME { + BindingType::Storage + } else { + continue; + }; + + let (binding_index, nested_meta_items) = get_binding_nested_attr(attr)?; + + let field_name = field.ident.as_ref().unwrap(); + let required_len = binding_index as usize + 1; + if required_len > binding_states.len() { + binding_states.resize(required_len, BindingState::Free); + } + + match &mut binding_states[binding_index as usize] { + value @ BindingState::Free => { + *value = match binding_type { + BindingType::Uniform => BindingState::OccupiedMergeableUniform { + uniform_fields: vec![field], + }, + _ => { + // only populate bind group entries for non-uniforms + // uniform entries are deferred until the end + BindingState::Occupied { + binding_type, + ident: field_name, + } + } + } + } + BindingState::Occupied { + binding_type, + ident: occupied_ident, + } => { + return Err(Error::new_spanned( + attr, + format!("The '{field_name}' field cannot be assigned to binding {binding_index} because it is already occupied by the field '{occupied_ident}' of type {binding_type:?}.") + )); + } + BindingState::OccupiedConvertedUniform => { + return Err(Error::new_spanned( + attr, + format!("The '{field_name}' field cannot be assigned to binding {binding_index} because it is already occupied by a struct-level uniform binding at the same index.") + )); + } + BindingState::OccupiedMergeableUniform { uniform_fields } => match binding_type { + BindingType::Uniform => { + uniform_fields.push(field); + } + _ => { + return Err(Error::new_spanned( + attr, + format!("The '{field_name}' field cannot be assigned to binding {binding_index} because it is already occupied by a {:?}.", BindingType::Uniform) + )); + } + }, + } + + match binding_type { + BindingType::Uniform => { + if attr_bindless_count.is_some() { + return Err(Error::new_spanned( + attr, + "Only structure-level `#[uniform]` attributes are supported in \ + bindless mode", + )); + } + + // uniform codegen is deferred to account for combined uniform bindings + } + + BindingType::Storage => { + let StorageAttrs { + visibility, + binding_array: binding_array_binding, + read_only, + buffer, + } = get_storage_binding_attr(nested_meta_items)?; + let visibility = + visibility.hygienic_quote("e! { #render_path::render_resource }); + + let field_name = field.ident.as_ref().unwrap(); + + if buffer { + binding_impls.push(quote! { + ( + #binding_index, + #render_path::render_resource::OwnedBindingResource::Buffer({ + self.#field_name.clone() + }) + ) + }); + } else { + binding_impls.push(quote! { + ( + #binding_index, + #render_path::render_resource::OwnedBindingResource::Buffer({ + let handle: &#asset_path::Handle<#render_path::storage::ShaderStorageBuffer> = (&self.#field_name); + storage_buffers.get(handle).ok_or_else(|| #render_path::render_resource::AsBindGroupError::RetryNextUpdate)?.buffer.clone() + }) + ) + }); + } + + non_bindless_binding_layouts.push(quote! { + #bind_group_layout_entries.push( + #render_path::render_resource::BindGroupLayoutEntry { + binding: #binding_index, + visibility: #visibility, + ty: #render_path::render_resource::BindingType::Buffer { + ty: #render_path::render_resource::BufferBindingType::Storage { read_only: #read_only }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: #actual_bindless_slot_count, + } + ); + }); + + if let Some(binding_array_binding) = binding_array_binding { + // Add the storage buffer to the `BindlessResourceType` list + // in the bindless descriptor. + let bindless_resource_type = quote! { + #render_path::render_resource::BindlessResourceType::Buffer + }; + add_bindless_resource_type( + &render_path, + &mut bindless_resource_types, + binding_index, + bindless_resource_type, + ); + + // Push the buffer descriptor. + bindless_buffer_descriptors.push(quote! { + #render_path::render_resource::BindlessBufferDescriptor { + // Note that, because this is bindless, *binding + // index* here refers to the index in the bindless + // index table (`bindless_index`), and the actual + // binding number is the *binding array binding*. + binding_number: #render_path::render_resource::BindingNumber( + #binding_array_binding + ), + bindless_index: + #render_path::render_resource::BindlessIndex(#binding_index), + size: None, + } + }); + + // Declare the binding array. + bindless_binding_layouts.push(quote!{ + #bind_group_layout_entries.push( + #render_path::render_resource::BindGroupLayoutEntry { + binding: #binding_array_binding, + visibility: #render_path::render_resource::ShaderStages::FRAGMENT | #render_path::render_resource::ShaderStages::VERTEX | #render_path::render_resource::ShaderStages::COMPUTE, + ty: #render_path::render_resource::BindingType::Buffer { + ty: #render_path::render_resource::BufferBindingType::Storage { + read_only: #read_only + }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: #actual_bindless_slot_count, + } + ); + }); + } + } + + BindingType::StorageTexture => { + if attr_bindless_count.is_some() { + return Err(Error::new_spanned( + attr, + "Storage textures are unsupported in bindless mode", + )); + } + + let StorageTextureAttrs { + dimension, + image_format, + access, + visibility, + } = get_storage_texture_binding_attr(nested_meta_items)?; + + let visibility = + visibility.hygienic_quote("e! { #render_path::render_resource }); + + let fallback_image = get_fallback_image(&render_path, dimension); + + // insert fallible texture-based entries at 0 so that if we fail here, we exit before allocating any buffers + binding_impls.insert(0, quote! { + ( #binding_index, + #render_path::render_resource::OwnedBindingResource::TextureView( + #render_path::render_resource::#dimension, + { + let handle: Option<&#asset_path::Handle<#image_path::Image>> = (&self.#field_name).into(); + if let Some(handle) = handle { + images.get(handle).ok_or_else(|| #render_path::render_resource::AsBindGroupError::RetryNextUpdate)?.texture_view.clone() + } else { + #fallback_image.texture_view.clone() + } + } + ) + ) + }); + + non_bindless_binding_layouts.push(quote! { + #bind_group_layout_entries.push( + #render_path::render_resource::BindGroupLayoutEntry { + binding: #binding_index, + visibility: #visibility, + ty: #render_path::render_resource::BindingType::StorageTexture { + access: #render_path::render_resource::StorageTextureAccess::#access, + format: #render_path::render_resource::TextureFormat::#image_format, + view_dimension: #render_path::render_resource::#dimension, + }, + count: #actual_bindless_slot_count, + } + ); + }); + } + + BindingType::Texture => { + let TextureAttrs { + dimension, + sample_type, + multisampled, + visibility, + } = tex_attrs.as_ref().unwrap(); + + let visibility = + visibility.hygienic_quote("e! { #render_path::render_resource }); + + let fallback_image = get_fallback_image(&render_path, *dimension); + + // insert fallible texture-based entries at 0 so that if we fail here, we exit before allocating any buffers + binding_impls.insert(0, quote! { + ( + #binding_index, + #render_path::render_resource::OwnedBindingResource::TextureView( + #render_path::render_resource::#dimension, + { + let handle: Option<&#asset_path::Handle<#image_path::Image>> = (&self.#field_name).into(); + if let Some(handle) = handle { + images.get(handle).ok_or_else(|| #render_path::render_resource::AsBindGroupError::RetryNextUpdate)?.texture_view.clone() + } else { + #fallback_image.texture_view.clone() + } + } + ) + ) + }); + + sampler_binding_count += 1; + + non_bindless_binding_layouts.push(quote! { + #bind_group_layout_entries.push( + #render_path::render_resource::BindGroupLayoutEntry { + binding: #binding_index, + visibility: #visibility, + ty: #render_path::render_resource::BindingType::Texture { + multisampled: #multisampled, + sample_type: #render_path::render_resource::#sample_type, + view_dimension: #render_path::render_resource::#dimension, + }, + count: #actual_bindless_slot_count, + } + ); + }); + + let bindless_resource_type = match *dimension { + BindingTextureDimension::D1 => { + quote! { + #render_path::render_resource::BindlessResourceType::Texture1d + } + } + BindingTextureDimension::D2 => { + quote! { + #render_path::render_resource::BindlessResourceType::Texture2d + } + } + BindingTextureDimension::D2Array => { + quote! { + #render_path::render_resource::BindlessResourceType::Texture2dArray + } + } + BindingTextureDimension::Cube => { + quote! { + #render_path::render_resource::BindlessResourceType::TextureCube + } + } + BindingTextureDimension::CubeArray => { + quote! { + #render_path::render_resource::BindlessResourceType::TextureCubeArray + } + } + BindingTextureDimension::D3 => { + quote! { + #render_path::render_resource::BindlessResourceType::Texture3d + } + } + }; + + // Add the texture to the `BindlessResourceType` list in the + // bindless descriptor. + add_bindless_resource_type( + &render_path, + &mut bindless_resource_types, + binding_index, + bindless_resource_type, + ); + } + + BindingType::Sampler => { + let SamplerAttrs { + sampler_binding_type, + visibility, + .. + } = get_sampler_attrs(nested_meta_items)?; + let TextureAttrs { dimension, .. } = tex_attrs + .as_ref() + .expect("sampler attribute must have matching texture attribute"); + + let visibility = + visibility.hygienic_quote("e! { #render_path::render_resource }); + + let fallback_image = get_fallback_image(&render_path, *dimension); + + let expected_samplers = match sampler_binding_type { + SamplerBindingType::Filtering => { + quote!( [#render_path::render_resource::TextureSampleType::Float { filterable: true }] ) + } + SamplerBindingType::NonFiltering => quote!([ + #render_path::render_resource::TextureSampleType::Float { filterable: false }, + #render_path::render_resource::TextureSampleType::Sint, + #render_path::render_resource::TextureSampleType::Uint, + ]), + SamplerBindingType::Comparison => { + quote!( [#render_path::render_resource::TextureSampleType::Depth] ) + } + }; + + // insert fallible texture-based entries at 0 so that if we fail here, we exit before allocating any buffers + binding_impls.insert(0, quote! { + ( + #binding_index, + #render_path::render_resource::OwnedBindingResource::Sampler( + // TODO: Support other types. + #render_path::render_resource::WgpuSamplerBindingType::Filtering, + { + let handle: Option<&#asset_path::Handle<#image_path::Image>> = (&self.#field_name).into(); + if let Some(handle) = handle { + let image = images.get(handle).ok_or_else(|| #render_path::render_resource::AsBindGroupError::RetryNextUpdate)?; + + let Some(sample_type) = image.texture_format.sample_type(None, Some(render_device.features())) else { + return Err(#render_path::render_resource::AsBindGroupError::InvalidSamplerType( + #binding_index, + "None".to_string(), + format!("{:?}", #expected_samplers), + )); + }; + + let valid = #expected_samplers.contains(&sample_type); + + if !valid { + return Err(#render_path::render_resource::AsBindGroupError::InvalidSamplerType( + #binding_index, + format!("{:?}", sample_type), + format!("{:?}", #expected_samplers), + )); + } + image.sampler.clone() + } else { + #fallback_image.sampler.clone() + } + }) + ) + }); + + sampler_binding_count += 1; + + non_bindless_binding_layouts.push(quote!{ + #bind_group_layout_entries.push( + #render_path::render_resource::BindGroupLayoutEntry { + binding: #binding_index, + visibility: #visibility, + ty: #render_path::render_resource::BindingType::Sampler(#render_path::render_resource::#sampler_binding_type), + count: #actual_bindless_slot_count, + } + ); + }); + + // Add the sampler to the `BindlessResourceType` list in the + // bindless descriptor. + // + // TODO: Support other types of samplers. + add_bindless_resource_type( + &render_path, + &mut bindless_resource_types, + binding_index, + quote! { + #render_path::render_resource::BindlessResourceType::SamplerFiltering + }, + ); + } + } + } + } + + // Produce impls for fields with uniform bindings + let struct_name = &ast.ident; + let struct_name_literal = struct_name.to_string(); + let struct_name_literal = struct_name_literal.as_str(); + let mut field_struct_impls = Vec::new(); + + let uniform_binding_type_declarations = match attr_bindless_count { + Some(_) => { + quote! { + let (#uniform_binding_type, #uniform_buffer_usages) = + if Self::bindless_supported(render_device) && !force_no_bindless { + ( + #render_path::render_resource::BufferBindingType::Storage { read_only: true }, + #render_path::render_resource::BufferUsages::STORAGE, + ) + } else { + ( + #render_path::render_resource::BufferBindingType::Uniform, + #render_path::render_resource::BufferUsages::UNIFORM, + ) + }; + } + } + None => { + quote! { + let (#uniform_binding_type, #uniform_buffer_usages) = ( + #render_path::render_resource::BufferBindingType::Uniform, + #render_path::render_resource::BufferUsages::UNIFORM, + ); + } + } + }; + + for (binding_index, binding_state) in binding_states.iter().enumerate() { + let binding_index = binding_index as u32; + if let BindingState::OccupiedMergeableUniform { uniform_fields } = binding_state { + // single field uniform bindings for a given index can use a straightforward binding + if uniform_fields.len() == 1 { + let field = &uniform_fields[0]; + let field_name = field.ident.as_ref().unwrap(); + let field_ty = &field.ty; + binding_impls.push(quote! {{ + let mut buffer = #render_path::render_resource::encase::UniformBuffer::new(Vec::new()); + buffer.write(&self.#field_name).unwrap(); + ( + #binding_index, + #render_path::render_resource::OwnedBindingResource::Buffer(render_device.create_buffer_with_data( + &#render_path::render_resource::BufferInitDescriptor { + label: None, + usage: #uniform_buffer_usages, + contents: buffer.as_ref(), + }, + )) + ) + }}); + + non_bindless_binding_layouts.push(quote!{ + #bind_group_layout_entries.push( + #render_path::render_resource::BindGroupLayoutEntry { + binding: #binding_index, + visibility: #render_path::render_resource::ShaderStages::FRAGMENT | #render_path::render_resource::ShaderStages::VERTEX | #render_path::render_resource::ShaderStages::COMPUTE, + ty: #render_path::render_resource::BindingType::Buffer { + ty: #uniform_binding_type, + has_dynamic_offset: false, + min_binding_size: Some(<#field_ty as #render_path::render_resource::ShaderType>::min_size()), + }, + count: #actual_bindless_slot_count, + } + ); + }); + // multi-field uniform bindings for a given index require an intermediate struct to derive ShaderType + } else { + let uniform_struct_name = Ident::new( + &format!("_{struct_name}AsBindGroupUniformStructBindGroup{binding_index}"), + Span::call_site(), + ); + + let field_name = uniform_fields.iter().map(|f| f.ident.as_ref().unwrap()); + let field_type = uniform_fields.iter().map(|f| &f.ty); + field_struct_impls.push(quote! { + #[derive(#render_path::render_resource::ShaderType)] + struct #uniform_struct_name<'a> { + #(#field_name: &'a #field_type,)* + } + }); + + let field_name = uniform_fields.iter().map(|f| f.ident.as_ref().unwrap()); + binding_impls.push(quote! {{ + let mut buffer = #render_path::render_resource::encase::UniformBuffer::new(Vec::new()); + buffer.write(&#uniform_struct_name { + #(#field_name: &self.#field_name,)* + }).unwrap(); + ( + #binding_index, + #render_path::render_resource::OwnedBindingResource::Buffer(render_device.create_buffer_with_data( + &#render_path::render_resource::BufferInitDescriptor { + label: None, + usage: #uniform_buffer_usages, + contents: buffer.as_ref(), + }, + )) + ) + }}); + + non_bindless_binding_layouts.push(quote!{ + #bind_group_layout_entries.push(#render_path::render_resource::BindGroupLayoutEntry { + binding: #binding_index, + visibility: #render_path::render_resource::ShaderStages::FRAGMENT | #render_path::render_resource::ShaderStages::VERTEX | #render_path::render_resource::ShaderStages::COMPUTE, + ty: #render_path::render_resource::BindingType::Buffer { + ty: #uniform_binding_type, + has_dynamic_offset: false, + min_binding_size: Some(<#uniform_struct_name as #render_path::render_resource::ShaderType>::min_size()), + }, + count: #actual_bindless_slot_count, + }); + }); + } + } + } + + let generics = ast.generics; + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + + let (prepared_data, get_prepared_data) = if let Some(prepared) = attr_prepared_data_ident { + let get_prepared_data = quote! { self.into() }; + (quote! {#prepared}, get_prepared_data) + } else { + let prepared_data = quote! { () }; + (prepared_data.clone(), prepared_data) + }; + + // Calculate the number of samplers that we need, so that we don't go over + // the limit on certain platforms. See + // https://github.com/bevyengine/bevy/issues/16988. + let bindless_count_syntax = match attr_bindless_count { + Some(BindlessSlabResourceLimitAttr::Auto) => { + quote! { #render_path::render_resource::AUTO_BINDLESS_SLAB_RESOURCE_LIMIT } + } + Some(BindlessSlabResourceLimitAttr::Limit(ref count)) => { + quote! { #count } + } + None => quote! { 0 }, + }; + + // Calculate the actual bindless index table range, taking the + // `#[bindless(index_table(range(M..N)))]` attribute into account. + let bindless_index_table_range = match attr_bindless_index_table_range { + None => { + let resource_count = bindless_resource_types.len() as u32; + quote! { + #render_path::render_resource::BindlessIndex(0).. + #render_path::render_resource::BindlessIndex(#resource_count) + } + } + Some(BindlessIndexTableRangeAttr { start, end }) => { + quote! { + #render_path::render_resource::BindlessIndex(#start).. + #render_path::render_resource::BindlessIndex(#end) + } + } + }; + + // Calculate the actual binding number of the bindless index table, taking + // the `#[bindless(index_table(binding(B)))]` into account. + let bindless_index_table_binding_number = match attr_bindless_index_table_binding { + None => quote! { #render_path::render_resource::BindingNumber(0) }, + Some(binding_number) => { + quote! { #render_path::render_resource::BindingNumber(#binding_number) } + } + }; + + // Calculate the actual number of bindless slots, taking hardware + // limitations into account. + let (bindless_slot_count, actual_bindless_slot_count_declaration, bindless_descriptor_syntax) = + match attr_bindless_count { + Some(ref bindless_count) => { + let bindless_supported_syntax = quote! { + fn bindless_supported( + render_device: &#render_path::renderer::RenderDevice + ) -> bool { + render_device.features().contains( + #render_path::settings::WgpuFeatures::BUFFER_BINDING_ARRAY | + #render_path::settings::WgpuFeatures::TEXTURE_BINDING_ARRAY + ) && + render_device.limits().max_storage_buffers_per_shader_stage > 0 && + render_device.limits().max_samplers_per_shader_stage >= + (#sampler_binding_count * #bindless_count_syntax) + } + }; + let actual_bindless_slot_count_declaration = quote! { + let #actual_bindless_slot_count = if Self::bindless_supported(render_device) && + !force_no_bindless { + ::core::num::NonZeroU32::new(#bindless_count_syntax) + } else { + None + }; + }; + let bindless_slot_count_declaration = match bindless_count { + BindlessSlabResourceLimitAttr::Auto => { + quote! { + fn bindless_slot_count() -> Option< + #render_path::render_resource::BindlessSlabResourceLimit + > { + Some(#render_path::render_resource::BindlessSlabResourceLimit::Auto) + } + } + } + BindlessSlabResourceLimitAttr::Limit(lit) => { + quote! { + fn bindless_slot_count() -> Option< + #render_path::render_resource::BindlessSlabResourceLimit + > { + Some(#render_path::render_resource::BindlessSlabResourceLimit::Custom(#lit)) + } + } + } + }; + + let bindless_buffer_descriptor_count = bindless_buffer_descriptors.len(); + + // We use `LazyLock` so that we can call `min_size`, which isn't + // a `const fn`. + let bindless_descriptor_syntax = quote! { + static RESOURCES: &[#render_path::render_resource::BindlessResourceType] = &[ + #(#bindless_resource_types),* + ]; + static BUFFERS: ::std::sync::LazyLock<[ + #render_path::render_resource::BindlessBufferDescriptor; + #bindless_buffer_descriptor_count + ]> = ::std::sync::LazyLock::new(|| { + [#(#bindless_buffer_descriptors),*] + }); + static INDEX_TABLES: &[ + #render_path::render_resource::BindlessIndexTableDescriptor + ] = &[ + #render_path::render_resource::BindlessIndexTableDescriptor { + indices: #bindless_index_table_range, + binding_number: #bindless_index_table_binding_number, + } + ]; + Some(#render_path::render_resource::BindlessDescriptor { + resources: ::std::borrow::Cow::Borrowed(RESOURCES), + buffers: ::std::borrow::Cow::Borrowed(&*BUFFERS), + index_tables: ::std::borrow::Cow::Borrowed(&*INDEX_TABLES), + }) + }; + + ( + quote! { + #bindless_slot_count_declaration + #bindless_supported_syntax + }, + actual_bindless_slot_count_declaration, + bindless_descriptor_syntax, + ) + } + None => ( + TokenStream::new().into(), + quote! { let #actual_bindless_slot_count: Option<::core::num::NonZeroU32> = None; }, + quote! { None }, + ), + }; + + Ok(TokenStream::from(quote! { + #(#field_struct_impls)* + + impl #impl_generics #render_path::render_resource::AsBindGroup for #struct_name #ty_generics #where_clause { + type Data = #prepared_data; + + type Param = ( + #ecs_path::system::lifetimeless::SRes<#render_path::render_asset::RenderAssets<#render_path::texture::GpuImage>>, + #ecs_path::system::lifetimeless::SRes<#render_path::texture::FallbackImage>, + #ecs_path::system::lifetimeless::SRes<#render_path::render_asset::RenderAssets<#render_path::storage::GpuShaderStorageBuffer>>, + ); + + #bindless_slot_count + + fn label() -> Option<&'static str> { + Some(#struct_name_literal) + } + + fn unprepared_bind_group( + &self, + layout: &#render_path::render_resource::BindGroupLayout, + render_device: &#render_path::renderer::RenderDevice, + (images, fallback_image, storage_buffers): &mut #ecs_path::system::SystemParamItem<'_, '_, Self::Param>, + force_no_bindless: bool, + ) -> Result<#render_path::render_resource::UnpreparedBindGroup, #render_path::render_resource::AsBindGroupError> { + #uniform_binding_type_declarations + + let bindings = #render_path::render_resource::BindingResources(vec![#(#binding_impls,)*]); + + Ok(#render_path::render_resource::UnpreparedBindGroup { + bindings, + }) + } + + #[allow(clippy::unused_unit)] + fn bind_group_data(&self) -> Self::Data { + #get_prepared_data + } + + fn bind_group_layout_entries( + render_device: &#render_path::renderer::RenderDevice, + force_no_bindless: bool + ) -> Vec<#render_path::render_resource::BindGroupLayoutEntry> { + #actual_bindless_slot_count_declaration + #uniform_binding_type_declarations + + let mut #bind_group_layout_entries = Vec::new(); + match #actual_bindless_slot_count { + Some(bindless_slot_count) => { + let bindless_index_table_range = #bindless_index_table_range; + #bind_group_layout_entries.extend( + #render_path::render_resource::create_bindless_bind_group_layout_entries( + bindless_index_table_range.end.0 - + bindless_index_table_range.start.0, + bindless_slot_count.into(), + #bindless_index_table_binding_number, + ).into_iter() + ); + #(#bindless_binding_layouts)*; + } + None => { + #(#non_bindless_binding_layouts)*; + } + }; + #bind_group_layout_entries + } + + fn bindless_descriptor() -> Option<#render_path::render_resource::BindlessDescriptor> { + #bindless_descriptor_syntax + } + } + })) +} + +/// Adds a bindless resource type to the `BindlessResourceType` array in the +/// bindless descriptor we're building up. +/// +/// See the `bevy_render::render_resource::bindless::BindlessResourceType` +/// documentation for more information. +fn add_bindless_resource_type( + render_path: &syn::Path, + bindless_resource_types: &mut Vec, + binding_index: u32, + bindless_resource_type: proc_macro2::TokenStream, +) { + // If we need to grow the array, pad the unused fields with + // `BindlessResourceType::None`. + if bindless_resource_types.len() < (binding_index as usize + 1) { + bindless_resource_types.resize_with(binding_index as usize + 1, || { + quote! { #render_path::render_resource::BindlessResourceType::None } + }); + } + + // Assign the `BindlessResourceType`. + bindless_resource_types[binding_index as usize] = bindless_resource_type; +} + +fn get_fallback_image( + render_path: &syn::Path, + dimension: BindingTextureDimension, +) -> proc_macro2::TokenStream { + quote! { + match #render_path::render_resource::#dimension { + #render_path::render_resource::TextureViewDimension::D1 => &fallback_image.d1, + #render_path::render_resource::TextureViewDimension::D2 => &fallback_image.d2, + #render_path::render_resource::TextureViewDimension::D2Array => &fallback_image.d2_array, + #render_path::render_resource::TextureViewDimension::Cube => &fallback_image.cube, + #render_path::render_resource::TextureViewDimension::CubeArray => &fallback_image.cube_array, + #render_path::render_resource::TextureViewDimension::D3 => &fallback_image.d3, + } + } +} + +/// Represents the arguments for the `uniform` binding attribute. +/// +/// If parsed, represents an attribute +/// like `#[uniform(LitInt, Ident)]` +struct UniformBindingMeta { + lit_int: LitInt, + ident: Ident, + binding_array: Option, +} + +/// The parsed structure-level `#[uniform]` or `#[data]` attribute. +/// +/// The corresponding syntax is `#[uniform(BINDING_INDEX, CONVERTED_SHADER_TYPE, +/// binding_array(BINDING_ARRAY)]`, optionally replacing `uniform` with `data`. +struct UniformBindingAttr { + /// Whether the declaration is `#[uniform]` or `#[data]`. + binding_type: UniformBindingAttrType, + /// The binding index. + binding_index: u32, + /// The uniform data type. + converted_shader_type: Ident, + /// The binding number of the binding array, if this is a bindless material. + binding_array: Option, +} + +/// Whether a structure-level shader type declaration is `#[uniform]` or +/// `#[data]`. +enum UniformBindingAttrType { + /// `#[uniform]`: i.e. in bindless mode, we need a separate buffer per data + /// instance. + Uniform, + /// `#[data]`: i.e. in bindless mode, we concatenate all instance data into + /// a single buffer. + Data, +} + +/// Represents the arguments for any general binding attribute. +/// +/// If parsed, represents an attribute +/// like `#[foo(LitInt, ...)]` where the rest is optional [`Meta`]. +enum BindingMeta { + IndexOnly(LitInt), + IndexWithOptions(BindingIndexOptions), +} + +/// Represents the arguments for an attribute with a list of arguments. +/// +/// This represents, for example, `#[texture(0, dimension = "2d_array")]`. +struct BindingIndexOptions { + lit_int: LitInt, + _comma: Comma, + meta_list: Punctuated, +} + +impl Parse for BindingMeta { + fn parse(input: ParseStream) -> Result { + if input.peek2(Comma) { + input.parse().map(Self::IndexWithOptions) + } else { + input.parse().map(Self::IndexOnly) + } + } +} + +impl Parse for BindingIndexOptions { + fn parse(input: ParseStream) -> Result { + Ok(Self { + lit_int: input.parse()?, + _comma: input.parse()?, + meta_list: input.parse_terminated(Meta::parse, Comma)?, + }) + } +} + +impl Parse for UniformBindingMeta { + // Parse syntax like `#[uniform(0, StandardMaterial, binding_array(10))]`. + fn parse(input: ParseStream) -> Result { + let lit_int = input.parse()?; + input.parse::()?; + let ident = input.parse()?; + + // Look for a `binding_array(BINDING_NUMBER)` declaration. + let mut binding_array: Option = None; + if input.parse::().is_ok() { + if input + .parse::()? + .get_ident() + .is_none_or(|ident| *ident != BINDING_ARRAY_MODIFIER_NAME) + { + return Err(Error::new_spanned(ident, "Expected `binding_array`")); + } + let parser; + parenthesized!(parser in input); + binding_array = Some(parser.parse()?); + } + + Ok(Self { + lit_int, + ident, + binding_array, + }) + } +} + +/// Parses a structure-level `#[uniform]` attribute (not a field-level +/// `#[uniform]` attribute). +fn get_uniform_binding_attr(attr: &syn::Attribute) -> Result { + let attr_ident = attr + .path() + .get_ident() + .expect("Shouldn't be here if we didn't have an attribute"); + + let uniform_binding_meta = attr.parse_args_with(UniformBindingMeta::parse)?; + + let binding_index = uniform_binding_meta.lit_int.base10_parse()?; + let ident = uniform_binding_meta.ident; + let binding_array = match uniform_binding_meta.binding_array { + None => None, + Some(binding_array) => Some(binding_array.base10_parse()?), + }; + + Ok(UniformBindingAttr { + binding_type: if attr_ident == UNIFORM_ATTRIBUTE_NAME { + UniformBindingAttrType::Uniform + } else { + UniformBindingAttrType::Data + }, + binding_index, + converted_shader_type: ident, + binding_array, + }) +} + +fn get_binding_nested_attr(attr: &syn::Attribute) -> Result<(u32, Vec)> { + let binding_meta = attr.parse_args_with(BindingMeta::parse)?; + + match binding_meta { + BindingMeta::IndexOnly(lit_int) => Ok((lit_int.base10_parse()?, Vec::new())), + BindingMeta::IndexWithOptions(BindingIndexOptions { + lit_int, + _comma: _, + meta_list, + }) => Ok((lit_int.base10_parse()?, meta_list.into_iter().collect())), + } +} + +#[derive(Default)] +enum ShaderStageVisibility { + #[default] + All, + None, + Flags(VisibilityFlags), +} + +#[derive(Default)] +struct VisibilityFlags { + vertex: bool, + fragment: bool, + compute: bool, +} + +impl ShaderStageVisibility { + fn vertex_fragment() -> Self { + Self::Flags(VisibilityFlags::vertex_fragment()) + } + + fn compute() -> Self { + Self::Flags(VisibilityFlags::compute()) + } +} + +impl VisibilityFlags { + fn vertex_fragment() -> Self { + Self { + vertex: true, + fragment: true, + ..Default::default() + } + } + + fn compute() -> Self { + Self { + compute: true, + ..Default::default() + } + } +} + +impl ShaderStageVisibility { + fn hygienic_quote(&self, path: &proc_macro2::TokenStream) -> proc_macro2::TokenStream { + match self { + ShaderStageVisibility::All => quote! { + if cfg!(feature = "webgpu") { + todo!("Please use a more specific shader stage: https://github.com/gfx-rs/wgpu/issues/7708") + } else { + #path::ShaderStages::all() + } + }, + ShaderStageVisibility::None => quote! { #path::ShaderStages::NONE }, + ShaderStageVisibility::Flags(flags) => { + let mut quoted = Vec::new(); + + if flags.vertex { + quoted.push(quote! { #path::ShaderStages::VERTEX }); + } + if flags.fragment { + quoted.push(quote! { #path::ShaderStages::FRAGMENT }); + } + if flags.compute { + quoted.push(quote! { #path::ShaderStages::COMPUTE }); + } + + quote! { #(#quoted)|* } + } + } + } +} + +const VISIBILITY: Symbol = Symbol("visibility"); +const VISIBILITY_VERTEX: Symbol = Symbol("vertex"); +const VISIBILITY_FRAGMENT: Symbol = Symbol("fragment"); +const VISIBILITY_COMPUTE: Symbol = Symbol("compute"); +const VISIBILITY_ALL: Symbol = Symbol("all"); +const VISIBILITY_NONE: Symbol = Symbol("none"); + +fn get_visibility_flag_value(meta_list: &MetaList) -> Result { + let mut flags = Vec::new(); + + meta_list.parse_nested_meta(|meta| { + flags.push(meta.path); + Ok(()) + })?; + + if flags.is_empty() { + return Err(Error::new_spanned( + meta_list, + "Invalid visibility format. Must be `visibility(flags)`, flags can be `all`, `none`, or a list-combination of `vertex`, `fragment` and/or `compute`." + )); + } + + if flags.len() == 1 + && let Some(flag) = flags.first() + { + if flag == VISIBILITY_ALL { + return Ok(ShaderStageVisibility::All); + } else if flag == VISIBILITY_NONE { + return Ok(ShaderStageVisibility::None); + } + } + + let mut visibility = VisibilityFlags::default(); + + for flag in flags { + if flag == VISIBILITY_VERTEX { + visibility.vertex = true; + } else if flag == VISIBILITY_FRAGMENT { + visibility.fragment = true; + } else if flag == VISIBILITY_COMPUTE { + visibility.compute = true; + } else { + return Err(Error::new_spanned( + flag, + "Not a valid visibility flag. Must be `all`, `none`, or a list-combination of `vertex`, `fragment` and/or `compute`." + )); + } + } + + Ok(ShaderStageVisibility::Flags(visibility)) +} + +// Returns the `binding_array(10)` part of a field-level declaration like +// `#[storage(binding_array(10))]`. +fn get_binding_array_flag_value(meta_list: &MetaList) -> Result { + meta_list + .parse_args_with(|input: ParseStream| input.parse::())? + .base10_parse() +} + +#[derive(Clone, Copy, Default)] +enum BindingTextureDimension { + D1, + #[default] + D2, + D2Array, + Cube, + CubeArray, + D3, +} + +enum BindingTextureSampleType { + Float { filterable: bool }, + Depth, + Sint, + Uint, +} + +impl ToTokens for BindingTextureDimension { + fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) { + tokens.extend(match self { + BindingTextureDimension::D1 => quote! { TextureViewDimension::D1 }, + BindingTextureDimension::D2 => quote! { TextureViewDimension::D2 }, + BindingTextureDimension::D2Array => quote! { TextureViewDimension::D2Array }, + BindingTextureDimension::Cube => quote! { TextureViewDimension::Cube }, + BindingTextureDimension::CubeArray => quote! { TextureViewDimension::CubeArray }, + BindingTextureDimension::D3 => quote! { TextureViewDimension::D3 }, + }); + } +} + +impl ToTokens for BindingTextureSampleType { + fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) { + tokens.extend(match self { + BindingTextureSampleType::Float { filterable } => { + quote! { TextureSampleType::Float { filterable: #filterable } } + } + BindingTextureSampleType::Depth => quote! { TextureSampleType::Depth }, + BindingTextureSampleType::Sint => quote! { TextureSampleType::Sint }, + BindingTextureSampleType::Uint => quote! { TextureSampleType::Uint }, + }); + } +} + +struct TextureAttrs { + dimension: BindingTextureDimension, + sample_type: BindingTextureSampleType, + multisampled: bool, + visibility: ShaderStageVisibility, +} + +impl Default for BindingTextureSampleType { + fn default() -> Self { + BindingTextureSampleType::Float { filterable: true } + } +} + +impl Default for TextureAttrs { + fn default() -> Self { + Self { + dimension: Default::default(), + sample_type: Default::default(), + multisampled: true, + visibility: Default::default(), + } + } +} + +struct StorageTextureAttrs { + dimension: BindingTextureDimension, + // Parsing of the image_format parameter is deferred to the type checker, + // which will error if the format is not member of the TextureFormat enum. + image_format: proc_macro2::TokenStream, + // Parsing of the access parameter is deferred to the type checker, + // which will error if the access is not member of the StorageTextureAccess enum. + access: proc_macro2::TokenStream, + visibility: ShaderStageVisibility, +} + +impl Default for StorageTextureAttrs { + fn default() -> Self { + Self { + dimension: Default::default(), + image_format: quote! { Rgba8Unorm }, + access: quote! { ReadWrite }, + visibility: ShaderStageVisibility::compute(), + } + } +} + +fn get_storage_texture_binding_attr(metas: Vec) -> Result { + let mut storage_texture_attrs = StorageTextureAttrs::default(); + + for meta in metas { + use syn::Meta::{List, NameValue}; + match meta { + // Parse #[storage_texture(0, dimension = "...")]. + NameValue(m) if m.path == DIMENSION => { + let value = get_lit_str(DIMENSION, &m.value)?; + storage_texture_attrs.dimension = get_texture_dimension_value(value)?; + } + // Parse #[storage_texture(0, format = ...))]. + NameValue(m) if m.path == IMAGE_FORMAT => { + storage_texture_attrs.image_format = m.value.into_token_stream(); + } + // Parse #[storage_texture(0, access = ...))]. + NameValue(m) if m.path == ACCESS => { + storage_texture_attrs.access = m.value.into_token_stream(); + } + // Parse #[storage_texture(0, visibility(...))]. + List(m) if m.path == VISIBILITY => { + storage_texture_attrs.visibility = get_visibility_flag_value(&m)?; + } + NameValue(m) => { + return Err(Error::new_spanned( + m.path, + "Not a valid name. Available attributes: `dimension`, `image_format`, `access`.", + )); + } + _ => { + return Err(Error::new_spanned( + meta, + "Not a name value pair: `foo = \"...\"`", + )); + } + } + } + + Ok(storage_texture_attrs) +} + +const DIMENSION: Symbol = Symbol("dimension"); +const IMAGE_FORMAT: Symbol = Symbol("image_format"); +const ACCESS: Symbol = Symbol("access"); +const SAMPLE_TYPE: Symbol = Symbol("sample_type"); +const FILTERABLE: Symbol = Symbol("filterable"); +const MULTISAMPLED: Symbol = Symbol("multisampled"); + +// Values for `dimension` attribute. +const DIM_1D: &str = "1d"; +const DIM_2D: &str = "2d"; +const DIM_3D: &str = "3d"; +const DIM_2D_ARRAY: &str = "2d_array"; +const DIM_CUBE: &str = "cube"; +const DIM_CUBE_ARRAY: &str = "cube_array"; + +// Values for sample `type` attribute. +const FLOAT: &str = "float"; +const DEPTH: &str = "depth"; +const S_INT: &str = "s_int"; +const U_INT: &str = "u_int"; + +fn get_texture_attrs(metas: Vec) -> Result { + let mut dimension = Default::default(); + let mut sample_type = Default::default(); + let mut multisampled = Default::default(); + let mut filterable = None; + let mut filterable_ident = None; + + let mut visibility = ShaderStageVisibility::vertex_fragment(); + + for meta in metas { + use syn::Meta::{List, NameValue}; + match meta { + // Parse #[texture(0, dimension = "...")]. + NameValue(m) if m.path == DIMENSION => { + let value = get_lit_str(DIMENSION, &m.value)?; + dimension = get_texture_dimension_value(value)?; + } + // Parse #[texture(0, sample_type = "...")]. + NameValue(m) if m.path == SAMPLE_TYPE => { + let value = get_lit_str(SAMPLE_TYPE, &m.value)?; + sample_type = get_texture_sample_type_value(value)?; + } + // Parse #[texture(0, multisampled = "...")]. + NameValue(m) if m.path == MULTISAMPLED => { + multisampled = get_lit_bool(MULTISAMPLED, &m.value)?; + } + // Parse #[texture(0, filterable = "...")]. + NameValue(m) if m.path == FILTERABLE => { + filterable = get_lit_bool(FILTERABLE, &m.value)?.into(); + filterable_ident = m.path.into(); + } + // Parse #[texture(0, visibility(...))]. + List(m) if m.path == VISIBILITY => { + visibility = get_visibility_flag_value(&m)?; + } + NameValue(m) => { + return Err(Error::new_spanned( + m.path, + "Not a valid name. Available attributes: `dimension`, `sample_type`, `multisampled`, or `filterable`." + )); + } + _ => { + return Err(Error::new_spanned( + meta, + "Not a name value pair: `foo = \"...\"`", + )); + } + } + } + + // Resolve `filterable` since the float + // sample type is the one that contains the value. + if let Some(filterable) = filterable { + let path = filterable_ident.unwrap(); + match sample_type { + BindingTextureSampleType::Float { filterable: _ } => { + sample_type = BindingTextureSampleType::Float { filterable } + } + _ => { + return Err(Error::new_spanned( + path, + "Type must be `float` to use the `filterable` attribute.", + )); + } + }; + } + + Ok(TextureAttrs { + dimension, + sample_type, + multisampled, + visibility, + }) +} + +fn get_texture_dimension_value(lit_str: &LitStr) -> Result { + match lit_str.value().as_str() { + DIM_1D => Ok(BindingTextureDimension::D1), + DIM_2D => Ok(BindingTextureDimension::D2), + DIM_2D_ARRAY => Ok(BindingTextureDimension::D2Array), + DIM_3D => Ok(BindingTextureDimension::D3), + DIM_CUBE => Ok(BindingTextureDimension::Cube), + DIM_CUBE_ARRAY => Ok(BindingTextureDimension::CubeArray), + + _ => Err(Error::new_spanned( + lit_str, + "Not a valid dimension. Must be `1d`, `2d`, `2d_array`, `3d`, `cube` or `cube_array`.", + )), + } +} + +fn get_texture_sample_type_value(lit_str: &LitStr) -> Result { + match lit_str.value().as_str() { + FLOAT => Ok(BindingTextureSampleType::Float { filterable: true }), + DEPTH => Ok(BindingTextureSampleType::Depth), + S_INT => Ok(BindingTextureSampleType::Sint), + U_INT => Ok(BindingTextureSampleType::Uint), + + _ => Err(Error::new_spanned( + lit_str, + "Not a valid sample type. Must be `float`, `depth`, `s_int` or `u_int`.", + )), + } +} + +#[derive(Default)] +struct SamplerAttrs { + sampler_binding_type: SamplerBindingType, + visibility: ShaderStageVisibility, +} + +#[derive(Default)] +enum SamplerBindingType { + #[default] + Filtering, + NonFiltering, + Comparison, +} + +impl ToTokens for SamplerBindingType { + fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) { + tokens.extend(match self { + SamplerBindingType::Filtering => quote! { SamplerBindingType::Filtering }, + SamplerBindingType::NonFiltering => quote! { SamplerBindingType::NonFiltering }, + SamplerBindingType::Comparison => quote! { SamplerBindingType::Comparison }, + }); + } +} + +const SAMPLER_TYPE: Symbol = Symbol("sampler_type"); + +const FILTERING: &str = "filtering"; +const NON_FILTERING: &str = "non_filtering"; +const COMPARISON: &str = "comparison"; + +fn get_sampler_attrs(metas: Vec) -> Result { + let mut sampler_binding_type = Default::default(); + let mut visibility = ShaderStageVisibility::vertex_fragment(); + + for meta in metas { + use syn::Meta::{List, NameValue}; + match meta { + // Parse #[sampler(0, sampler_type = "..."))]. + NameValue(m) if m.path == SAMPLER_TYPE => { + let value = get_lit_str(DIMENSION, &m.value)?; + sampler_binding_type = get_sampler_binding_type_value(value)?; + } + // Parse #[sampler(0, visibility(...))]. + List(m) if m.path == VISIBILITY => { + visibility = get_visibility_flag_value(&m)?; + } + NameValue(m) => { + return Err(Error::new_spanned( + m.path, + "Not a valid name. Available attributes: `sampler_type`.", + )); + } + _ => { + return Err(Error::new_spanned( + meta, + "Not a name value pair: `foo = \"...\"`", + )); + } + } + } + + Ok(SamplerAttrs { + sampler_binding_type, + visibility, + }) +} + +fn get_sampler_binding_type_value(lit_str: &LitStr) -> Result { + match lit_str.value().as_str() { + FILTERING => Ok(SamplerBindingType::Filtering), + NON_FILTERING => Ok(SamplerBindingType::NonFiltering), + COMPARISON => Ok(SamplerBindingType::Comparison), + + _ => Err(Error::new_spanned( + lit_str, + "Not a valid dimension. Must be `filtering`, `non_filtering`, or `comparison`.", + )), + } +} + +#[derive(Default)] +struct StorageAttrs { + visibility: ShaderStageVisibility, + binding_array: Option, + read_only: bool, + buffer: bool, +} + +const READ_ONLY: Symbol = Symbol("read_only"); +const BUFFER: Symbol = Symbol("buffer"); + +fn get_storage_binding_attr(metas: Vec) -> Result { + let mut visibility = ShaderStageVisibility::vertex_fragment(); + let mut binding_array = None; + let mut read_only = false; + let mut buffer = false; + + for meta in metas { + use syn::Meta::{List, Path}; + match meta { + // Parse #[storage(0, visibility(...))]. + List(m) if m.path == VISIBILITY => { + visibility = get_visibility_flag_value(&m)?; + } + // Parse #[storage(0, binding_array(...))] for bindless mode. + List(m) if m.path == BINDING_ARRAY_MODIFIER_NAME => { + binding_array = Some(get_binding_array_flag_value(&m)?); + } + Path(path) if path == READ_ONLY => { + read_only = true; + } + Path(path) if path == BUFFER => { + buffer = true; + } + _ => { + return Err(Error::new_spanned( + meta, + "Not a valid attribute. Available attributes: `read_only`, `visibility`", + )); + } + } + } + + Ok(StorageAttrs { + visibility, + binding_array, + read_only, + buffer, + }) +} diff --git a/crates/macros/src/extract_component.rs b/crates/macros/src/extract_component.rs new file mode 100644 index 0000000..8526f7b --- /dev/null +++ b/crates/macros/src/extract_component.rs @@ -0,0 +1,51 @@ +use proc_macro::TokenStream; +use quote::quote; +use syn::{parse_macro_input, parse_quote, DeriveInput, Path}; + +pub fn derive_extract_component(input: TokenStream) -> TokenStream { + let mut ast = parse_macro_input!(input as DeriveInput); + let bevy_render_path: Path = crate::bevy_render_path(); + let bevy_ecs_path: Path = bevy_macro_utils::BevyManifest::shared() + .maybe_get_path("bevy_ecs") + .expect("bevy_ecs should be found in manifest"); + + ast.generics + .make_where_clause() + .predicates + .push(parse_quote! { Self: Clone }); + + let struct_name = &ast.ident; + let (impl_generics, type_generics, where_clause) = &ast.generics.split_for_impl(); + + let filter = if let Some(attr) = ast + .attrs + .iter() + .find(|a| a.path().is_ident("extract_component_filter")) + { + let filter = match attr.parse_args::() { + Ok(filter) => filter, + Err(e) => return e.to_compile_error().into(), + }; + + quote! { + #filter + } + } else { + quote! { + () + } + }; + + TokenStream::from(quote! { + impl #impl_generics #bevy_render_path::extract_component::ExtractComponent for #struct_name #type_generics #where_clause { + type QueryData = &'static Self; + + type QueryFilter = #filter; + type Out = Self; + + fn extract_component(item: #bevy_ecs_path::query::QueryItem<'_, '_, Self::QueryData>) -> Option { + Some(item.clone()) + } + } + }) +} diff --git a/crates/macros/src/extract_resource.rs b/crates/macros/src/extract_resource.rs new file mode 100644 index 0000000..0a35eb4 --- /dev/null +++ b/crates/macros/src/extract_resource.rs @@ -0,0 +1,26 @@ +use proc_macro::TokenStream; +use quote::quote; +use syn::{parse_macro_input, parse_quote, DeriveInput, Path}; + +pub fn derive_extract_resource(input: TokenStream) -> TokenStream { + let mut ast = parse_macro_input!(input as DeriveInput); + let bevy_render_path: Path = crate::bevy_render_path(); + + ast.generics + .make_where_clause() + .predicates + .push(parse_quote! { Self: Clone }); + + let struct_name = &ast.ident; + let (impl_generics, type_generics, where_clause) = &ast.generics.split_for_impl(); + + TokenStream::from(quote! { + impl #impl_generics #bevy_render_path::extract_resource::ExtractResource for #struct_name #type_generics #where_clause { + type Source = Self; + + fn extract_resource(source: &Self::Source) -> Self { + source.clone() + } + } + }) +} diff --git a/crates/macros/src/lib.rs b/crates/macros/src/lib.rs new file mode 100644 index 0000000..dbe6363 --- /dev/null +++ b/crates/macros/src/lib.rs @@ -0,0 +1,152 @@ +#![expect(missing_docs, reason = "Not all docs are written yet, see #3492.")] +#![cfg_attr(docsrs, feature(doc_cfg))] + +mod as_bind_group; +mod extract_component; +mod extract_resource; +mod specializer; + +use bevy_macro_utils::{derive_label, BevyManifest}; +use proc_macro::TokenStream; +use quote::format_ident; +use syn::{parse_macro_input, DeriveInput}; + +pub(crate) fn bevy_render_path() -> syn::Path { + // Use our vendored render module + // When used from within libmarathon, use crate::render + // When used from other crates, they would use libmarathon::render + syn::parse_quote!(crate::render) +} + +pub(crate) fn bevy_ecs_path() -> syn::Path { + // Still use bevy_ecs from the external crate + BevyManifest::shared().get_path("bevy_ecs") +} + +#[proc_macro_derive(ExtractResource)] +pub fn derive_extract_resource(input: TokenStream) -> TokenStream { + extract_resource::derive_extract_resource(input) +} + +/// Implements `ExtractComponent` trait for a component. +/// +/// The component must implement [`Clone`]. +/// The component will be extracted into the render world via cloning. +/// Note that this only enables extraction of the component, it does not execute the extraction. +/// See `ExtractComponentPlugin` to actually perform the extraction. +/// +/// If you only want to extract a component conditionally, you may use the `extract_component_filter` attribute. +/// +/// # Example +/// +/// ```no_compile +/// use bevy_ecs::component::Component; +/// use bevy_render_macros::ExtractComponent; +/// +/// #[derive(Component, Clone, ExtractComponent)] +/// #[extract_component_filter(With)] +/// pub struct Foo { +/// pub should_foo: bool, +/// } +/// +/// // Without a filter (unconditional). +/// #[derive(Component, Clone, ExtractComponent)] +/// pub struct Bar { +/// pub should_bar: bool, +/// } +/// ``` +#[proc_macro_derive(ExtractComponent, attributes(extract_component_filter))] +pub fn derive_extract_component(input: TokenStream) -> TokenStream { + extract_component::derive_extract_component(input) +} + +#[proc_macro_derive( + AsBindGroup, + attributes( + uniform, + storage_texture, + texture, + sampler, + bind_group_data, + storage, + bindless, + data + ) +)] +pub fn derive_as_bind_group(input: TokenStream) -> TokenStream { + let input = parse_macro_input!(input as DeriveInput); + + as_bind_group::derive_as_bind_group(input).unwrap_or_else(|err| err.to_compile_error().into()) +} + +/// Derive macro generating an impl of the trait `RenderLabel`. +/// +/// This does not work for unions. +#[proc_macro_derive(RenderLabel)] +pub fn derive_render_label(input: TokenStream) -> TokenStream { + let input = parse_macro_input!(input as DeriveInput); + let mut trait_path = bevy_render_path(); + trait_path + .segments + .push(format_ident!("render_graph").into()); + trait_path + .segments + .push(format_ident!("RenderLabel").into()); + derive_label(input, "RenderLabel", &trait_path) +} + +/// Derive macro generating an impl of the trait `RenderSubGraph`. +/// +/// This does not work for unions. +#[proc_macro_derive(RenderSubGraph)] +pub fn derive_render_sub_graph(input: TokenStream) -> TokenStream { + let input = parse_macro_input!(input as DeriveInput); + let mut trait_path = bevy_render_path(); + trait_path + .segments + .push(format_ident!("render_graph").into()); + trait_path + .segments + .push(format_ident!("RenderSubGraph").into()); + derive_label(input, "RenderSubGraph", &trait_path) +} + +/// Derive macro generating an impl of the trait `Specializer` +/// +/// This only works for structs whose members all implement `Specializer` +#[proc_macro_derive(Specializer, attributes(specialize, key, base_descriptor))] +pub fn derive_specialize(input: TokenStream) -> TokenStream { + specializer::impl_specializer(input) +} + +/// Derive macro generating the most common impl of the trait `SpecializerKey` +#[proc_macro_derive(SpecializerKey)] +pub fn derive_specializer_key(input: TokenStream) -> TokenStream { + specializer::impl_specializer_key(input) +} + +#[proc_macro_derive(ShaderLabel)] +pub fn derive_shader_label(input: TokenStream) -> TokenStream { + let input = parse_macro_input!(input as DeriveInput); + let mut trait_path = bevy_render_path(); + trait_path + .segments + .push(format_ident!("render_phase").into()); + trait_path + .segments + .push(format_ident!("ShaderLabel").into()); + derive_label(input, "ShaderLabel", &trait_path) +} + +#[proc_macro_derive(DrawFunctionLabel)] +pub fn derive_draw_function_label(input: TokenStream) -> TokenStream { + let input = parse_macro_input!(input as DeriveInput); + let mut trait_path = bevy_render_path(); + trait_path + .segments + .push(format_ident!("render_phase").into()); + trait_path + .segments + .push(format_ident!("DrawFunctionLabel").into()); + derive_label(input, "DrawFunctionLabel", &trait_path) +} diff --git a/crates/macros/src/specializer.rs b/crates/macros/src/specializer.rs new file mode 100644 index 0000000..d59d496 --- /dev/null +++ b/crates/macros/src/specializer.rs @@ -0,0 +1,379 @@ +use bevy_macro_utils::{ + fq_std::{FQDefault, FQResult}, + get_struct_fields, +}; +use proc_macro::TokenStream; +use proc_macro2::Span; +use quote::{format_ident, quote}; +use syn::{ + parse::{Parse, ParseStream}, + parse_macro_input, parse_quote, + punctuated::Punctuated, + spanned::Spanned, + DeriveInput, Expr, Field, Ident, Index, Member, Meta, MetaList, Pat, Path, Token, Type, + WherePredicate, +}; + +const SPECIALIZE_ATTR_IDENT: &str = "specialize"; +const SPECIALIZE_ALL_IDENT: &str = "all"; + +const KEY_ATTR_IDENT: &str = "key"; +const KEY_DEFAULT_IDENT: &str = "default"; + +enum SpecializeImplTargets { + All, + Specific(Vec), +} + +impl Parse for SpecializeImplTargets { + fn parse(input: ParseStream) -> syn::Result { + let paths = input.parse_terminated(Path::parse, Token![,])?; + if paths + .first() + .is_some_and(|p| p.is_ident(SPECIALIZE_ALL_IDENT)) + { + Ok(SpecializeImplTargets::All) + } else { + Ok(SpecializeImplTargets::Specific(paths.into_iter().collect())) + } + } +} + +#[derive(Clone)] +enum Key { + Whole, + Default, + Index(Index), + Custom(Expr), +} + +impl Key { + fn expr(&self) -> Expr { + match self { + Key::Whole => parse_quote!(key), + Key::Default => parse_quote!(#FQDefault::default()), + Key::Index(index) => { + let member = Member::Unnamed(index.clone()); + parse_quote!(key.#member) + } + Key::Custom(expr) => expr.clone(), + } + } +} + +const KEY_ERROR_MSG: &str = "Invalid key override. Must be either `default` or a valid Rust expression of the correct key type"; + +impl Parse for Key { + fn parse(input: ParseStream) -> syn::Result { + if let Ok(ident) = input.parse::() { + if ident == KEY_DEFAULT_IDENT { + Ok(Key::Default) + } else { + Err(syn::Error::new_spanned(ident, KEY_ERROR_MSG)) + } + } else { + input.parse::().map(Key::Custom).map_err(|mut err| { + err.extend(syn::Error::new(err.span(), KEY_ERROR_MSG)); + err + }) + } + } +} + +#[derive(Clone)] +struct FieldInfo { + ty: Type, + member: Member, + key: Key, +} + +impl FieldInfo { + fn key_ty(&self, specialize_path: &Path, target_path: &Path) -> Option { + let ty = &self.ty; + matches!(self.key, Key::Whole | Key::Index(_)) + .then_some(parse_quote!(<#ty as #specialize_path::Specializer<#target_path>>::Key)) + } + + fn key_ident(&self, ident: Ident) -> Option { + matches!(self.key, Key::Whole | Key::Index(_)).then_some(ident) + } + + fn specialize_expr(&self, specialize_path: &Path, target_path: &Path) -> Expr { + let FieldInfo { + ty, member, key, .. + } = &self; + let key_expr = key.expr(); + parse_quote!(<#ty as #specialize_path::Specializer<#target_path>>::specialize(&self.#member, #key_expr, descriptor)) + } + + fn specialize_predicate(&self, specialize_path: &Path, target_path: &Path) -> WherePredicate { + let ty = &self.ty; + if matches!(&self.key, Key::Default) { + parse_quote!(#ty: #specialize_path::Specializer<#target_path, Key: #FQDefault>) + } else { + parse_quote!(#ty: #specialize_path::Specializer<#target_path>) + } + } +} + +fn get_field_info( + fields: &Punctuated, + targets: &SpecializeImplTargets, +) -> syn::Result> { + let mut field_info: Vec = Vec::new(); + let mut used_count = 0; + let mut single_index = 0; + for (index, field) in fields.iter().enumerate() { + let field_ty = field.ty.clone(); + let field_member = field.ident.clone().map_or( + Member::Unnamed(Index { + index: index as u32, + span: field.span(), + }), + Member::Named, + ); + let key_index = Index { + index: used_count, + span: field.span(), + }; + + let mut use_key_field = true; + let mut key = Key::Index(key_index); + for attr in &field.attrs { + match &attr.meta { + Meta::List(MetaList { path, tokens, .. }) if path.is_ident(&KEY_ATTR_IDENT) => { + let owned_tokens = tokens.clone().into(); + let Ok(parsed_key) = syn::parse::(owned_tokens) else { + return Err(syn::Error::new( + attr.span(), + "Invalid key override attribute", + )); + }; + key = parsed_key; + if matches!( + (&key, &targets), + (Key::Custom(_), SpecializeImplTargets::All) + ) { + return Err(syn::Error::new( + attr.span(), + "#[key(default)] is the only key override type allowed with #[specialize(all)]", + )); + } + use_key_field = false; + } + _ => {} + } + } + + if use_key_field { + used_count += 1; + single_index = index; + } + + field_info.push(FieldInfo { + ty: field_ty, + member: field_member, + key, + }); + } + + if used_count == 1 { + field_info[single_index].key = Key::Whole; + } + + Ok(field_info) +} + +fn get_specialize_targets( + ast: &DeriveInput, + derive_name: &str, +) -> syn::Result { + let specialize_attr = ast.attrs.iter().find_map(|attr| { + if attr.path().is_ident(SPECIALIZE_ATTR_IDENT) + && let Meta::List(meta_list) = &attr.meta + { + return Some(meta_list); + } + None + }); + let Some(specialize_meta_list) = specialize_attr else { + return Err(syn::Error::new( + Span::call_site(), + format!("#[derive({derive_name})] must be accompanied by #[specialize(..targets)].\n Example usages: #[specialize(RenderPipeline)], #[specialize(all)]") + )); + }; + syn::parse::(specialize_meta_list.tokens.clone().into()) +} + +macro_rules! guard { + ($expr: expr) => { + match $expr { + Ok(__val) => __val, + Err(err) => return err.to_compile_error().into(), + } + }; +} + +pub fn impl_specializer(input: TokenStream) -> TokenStream { + let bevy_render_path: Path = crate::bevy_render_path(); + let specialize_path = { + let mut path = bevy_render_path.clone(); + path.segments.push(format_ident!("render_resource").into()); + path + }; + + let ecs_path = crate::bevy_ecs_path(); + + let ast = parse_macro_input!(input as DeriveInput); + let targets = guard!(get_specialize_targets(&ast, "Specializer")); + let fields = guard!(get_struct_fields(&ast.data, "Specializer")); + let field_info = guard!(get_field_info(fields, &targets)); + + let key_idents: Vec> = field_info + .iter() + .enumerate() + .map(|(i, field_info)| field_info.key_ident(format_ident!("key{i}"))) + .collect(); + let key_tuple_idents: Vec = key_idents.iter().flatten().cloned().collect(); + let ignore_pat: Pat = parse_quote!(_); + let key_patterns: Vec = key_idents + .iter() + .map(|key_ident| match key_ident { + Some(key_ident) => parse_quote!(#key_ident), + None => ignore_pat.clone(), + }) + .collect(); + + match targets { + SpecializeImplTargets::All => impl_specialize_all( + &specialize_path, + &ecs_path, + &ast, + &field_info, + &key_patterns, + &key_tuple_idents, + ), + SpecializeImplTargets::Specific(targets) => targets + .iter() + .map(|target| { + impl_specialize_specific( + &specialize_path, + &ecs_path, + &ast, + &field_info, + target, + &key_patterns, + &key_tuple_idents, + ) + }) + .collect(), + } +} + +fn impl_specialize_all( + specialize_path: &Path, + ecs_path: &Path, + ast: &DeriveInput, + field_info: &[FieldInfo], + key_patterns: &[Pat], + key_tuple_idents: &[Ident], +) -> TokenStream { + let target_path = Path::from(format_ident!("T")); + let key_elems: Vec = field_info + .iter() + .filter_map(|field_info| field_info.key_ty(specialize_path, &target_path)) + .collect(); + let specialize_exprs: Vec = field_info + .iter() + .map(|field_info| field_info.specialize_expr(specialize_path, &target_path)) + .collect(); + + let struct_name = &ast.ident; + let mut generics = ast.generics.clone(); + generics.params.insert( + 0, + parse_quote!(#target_path: #specialize_path::Specializable), + ); + + if !field_info.is_empty() { + let where_clause = generics.make_where_clause(); + for field in field_info { + where_clause + .predicates + .push(field.specialize_predicate(specialize_path, &target_path)); + } + } + + let (_, type_generics, _) = ast.generics.split_for_impl(); + let (impl_generics, _, where_clause) = &generics.split_for_impl(); + + TokenStream::from(quote! { + impl #impl_generics #specialize_path::Specializer<#target_path> for #struct_name #type_generics #where_clause { + type Key = (#(#key_elems),*); + + fn specialize( + &self, + key: Self::Key, + descriptor: &mut <#target_path as #specialize_path::Specializable>::Descriptor + ) -> #FQResult<#specialize_path::Canonical, #ecs_path::error::BevyError> { + #(let #key_patterns = #specialize_exprs?;)* + #FQResult::Ok((#(#key_tuple_idents),*)) + } + } + }) +} + +fn impl_specialize_specific( + specialize_path: &Path, + ecs_path: &Path, + ast: &DeriveInput, + field_info: &[FieldInfo], + target_path: &Path, + key_patterns: &[Pat], + key_tuple_idents: &[Ident], +) -> TokenStream { + let key_elems: Vec = field_info + .iter() + .filter_map(|field_info| field_info.key_ty(specialize_path, target_path)) + .collect(); + let specialize_exprs: Vec = field_info + .iter() + .map(|field_info| field_info.specialize_expr(specialize_path, target_path)) + .collect(); + + let struct_name = &ast.ident; + let (impl_generics, type_generics, where_clause) = &ast.generics.split_for_impl(); + + TokenStream::from(quote! { + impl #impl_generics #specialize_path::Specializer<#target_path> for #struct_name #type_generics #where_clause { + type Key = (#(#key_elems),*); + + fn specialize( + &self, + key: Self::Key, + descriptor: &mut <#target_path as #specialize_path::Specializable>::Descriptor + ) -> #FQResult<#specialize_path::Canonical, #ecs_path::error::BevyError> { + #(let #key_patterns = #specialize_exprs?;)* + #FQResult::Ok((#(#key_tuple_idents),*)) + } + } + }) +} + +pub fn impl_specializer_key(input: TokenStream) -> TokenStream { + let bevy_render_path: Path = crate::bevy_render_path(); + let specialize_path = { + let mut path = bevy_render_path.clone(); + path.segments.push(format_ident!("render_resource").into()); + path + }; + + let ast = parse_macro_input!(input as DeriveInput); + let ident = ast.ident; + TokenStream::from(quote!( + impl #specialize_path::SpecializerKey for #ident { + const IS_CANONICAL: bool = true; + type Canonical = Self; + } + )) +} diff --git a/crates/sync-macros/tests/basic_macro_test.rs b/crates/macros/tests/basic_macro_test.rs similarity index 100% rename from crates/sync-macros/tests/basic_macro_test.rs rename to crates/macros/tests/basic_macro_test.rs diff --git a/crates/sync-macros/src/lib.rs b/crates/sync-macros/src/lib.rs deleted file mode 100644 index 6315d4e..0000000 --- a/crates/sync-macros/src/lib.rs +++ /dev/null @@ -1,578 +0,0 @@ -use proc_macro::TokenStream; -use quote::quote; -use syn::{ - DeriveInput, - ItemStruct, - parse_macro_input, -}; - -/// Sync strategy types -#[derive(Debug, Clone, PartialEq)] -enum SyncStrategy { - LastWriteWins, - Set, - Sequence, - Custom, -} - -impl SyncStrategy { - fn from_str(s: &str) -> Result { - match s { - | "LastWriteWins" => Ok(SyncStrategy::LastWriteWins), - | "Set" => Ok(SyncStrategy::Set), - | "Sequence" => Ok(SyncStrategy::Sequence), - | "Custom" => Ok(SyncStrategy::Custom), - | _ => Err(format!( - "Unknown strategy '{}'. Choose one of: \"LastWriteWins\", \"Set\", \"Sequence\", \"Custom\"", - s - )), - } - } - - fn to_tokens(&self) -> proc_macro2::TokenStream { - match self { - | SyncStrategy::LastWriteWins => { - quote! { libmarathon::networking::SyncStrategy::LastWriteWins } - }, - | SyncStrategy::Set => quote! { libmarathon::networking::SyncStrategy::Set }, - | SyncStrategy::Sequence => quote! { libmarathon::networking::SyncStrategy::Sequence }, - | SyncStrategy::Custom => quote! { libmarathon::networking::SyncStrategy::Custom }, - } - } -} - -/// Parsed sync attributes -struct SyncAttributes { - version: u32, - strategy: SyncStrategy, -} - -impl SyncAttributes { - fn parse(input: &DeriveInput) -> Result { - let mut version: Option = None; - let mut strategy: Option = None; - - // Find the #[sync(...)] attribute - for attr in &input.attrs { - if !attr.path().is_ident("sync") { - continue; - } - - attr.parse_nested_meta(|meta| { - if meta.path.is_ident("version") { - let value: syn::LitInt = meta.value()?.parse()?; - version = Some(value.base10_parse()?); - Ok(()) - } else if meta.path.is_ident("strategy") { - let value: syn::LitStr = meta.value()?.parse()?; - let strategy_str = value.value(); - strategy = Some( - SyncStrategy::from_str(&strategy_str) - .map_err(|e| syn::Error::new_spanned(&value, e))?, - ); - Ok(()) - } else { - Err(meta.error("unrecognized sync attribute")) - } - })?; - } - - // Require version and strategy - let version = version.ok_or_else(|| { - syn::Error::new( - proc_macro2::Span::call_site(), - "Missing required attribute `version`\n\n \n\n = help: Add #[sync(version = 1, strategy = \"...\")] to your struct\n\n = note: See documentation: https://docs.rs/lonni/sync/strategies.html", - ) - })?; - - let strategy = strategy.ok_or_else(|| { - syn::Error::new( - proc_macro2::Span::call_site(), - "Missing required attribute `strategy`\n\n \n\n = help: Choose one of: \"LastWriteWins\", \"Set\", \"Sequence\", \"Custom\"\n\n = help: Add #[sync(version = 1, strategy = \"LastWriteWins\")] to your struct\n\n = note: See documentation: https://docs.rs/lonni/sync/strategies.html", - ) - })?; - - Ok(SyncAttributes { - version, - strategy, - }) - } -} - -/// RFC 0003 macro: Generate SyncComponent trait implementation -/// -/// # Example -/// ```ignore -/// use bevy::prelude::*; -/// use libmarathon::networking::Synced; -/// use sync_macros::Synced as SyncedDerive; -/// -/// #[derive(Component, Clone)] -/// #[derive(Synced)] -/// #[sync(version = 1, strategy = "LastWriteWins")] -/// struct Health(f32); -/// -/// // In a Bevy system: -/// fn spawn_health(mut commands: Commands) { -/// commands.spawn((Health(100.0), Synced)); -/// } -/// ``` -#[proc_macro_derive(Synced, attributes(sync))] -pub fn derive_synced(input: TokenStream) -> TokenStream { - let input = parse_macro_input!(input as DeriveInput); - - // Parse attributes - let attrs = match SyncAttributes::parse(&input) { - | Ok(attrs) => attrs, - | Err(e) => return TokenStream::from(e.to_compile_error()), - }; - - let name = &input.ident; - let name_str = name.to_string(); - let version = attrs.version; - let strategy_tokens = attrs.strategy.to_tokens(); - - // Generate serialization method based on type - let serialize_impl = generate_serialize(&input); - let deserialize_impl = generate_deserialize(&input, name); - - // Generate merge method based on strategy - let merge_impl = generate_merge(&input, &attrs.strategy); - - // Extract struct attributes and visibility for re-emission - let vis = &input.vis; - let attrs_without_sync: Vec<_> = input - .attrs - .iter() - .filter(|attr| !attr.path().is_ident("sync")) - .collect(); - let struct_token = match &input.data { - | syn::Data::Struct(_) => quote! { struct }, - | _ => quote! {}, - }; - - // Re-emit the struct with rkyv derives added - let rkyv_struct = match &input.data { - | syn::Data::Struct(data_struct) => { - let fields = &data_struct.fields; - quote! { - #[derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)] - #(#attrs_without_sync)* - #vis #struct_token #name #fields - } - }, - | _ => quote! {}, - }; - - let expanded = quote! { - // Re-emit struct with rkyv derives - #rkyv_struct - - // Register component with inventory for type registry - // Build type path at compile time using concat! and module_path! - // since std::any::type_name() is not yet const - const _: () = { - const TYPE_PATH: &str = concat!(module_path!(), "::", stringify!(#name)); - - inventory::submit! { - libmarathon::persistence::ComponentMeta { - type_name: #name_str, - type_path: TYPE_PATH, - type_id: std::any::TypeId::of::<#name>(), - deserialize_fn: |bytes: &[u8]| -> anyhow::Result> { - let component: #name = rkyv::from_bytes::<#name, rkyv::rancor::Failure>(bytes)?; - Ok(Box::new(component)) - }, - serialize_fn: |world: &bevy::ecs::world::World, entity: bevy::ecs::entity::Entity| -> Option { - world.get::<#name>(entity).and_then(|component| { - rkyv::to_bytes::(component) - .map(|vec| bytes::Bytes::from(vec.to_vec())) - .ok() - }) - }, - insert_fn: |entity_mut: &mut bevy::ecs::world::EntityWorldMut, boxed: Box| { - if let Ok(component) = boxed.downcast::<#name>() { - entity_mut.insert(*component); - } - }, - } - }; - }; - - impl libmarathon::networking::SyncComponent for #name { - const VERSION: u32 = #version; - const STRATEGY: libmarathon::networking::SyncStrategy = #strategy_tokens; - - #[inline] - fn serialize_sync(&self) -> anyhow::Result { - #serialize_impl - } - - #[inline] - fn deserialize_sync(data: &[u8]) -> anyhow::Result { - #deserialize_impl - } - - #[inline] - fn merge(&mut self, remote: Self, clock_cmp: libmarathon::networking::ClockComparison) -> libmarathon::networking::ComponentMergeDecision { - #merge_impl - } - } - }; - - TokenStream::from(expanded) -} - -/// Generate specialized serialization code -fn generate_serialize(_input: &DeriveInput) -> proc_macro2::TokenStream { - // Use rkyv for zero-copy serialization - // Later we can optimize for specific types (e.g., f32 -> to_le_bytes) - quote! { - rkyv::to_bytes::(self).map(|bytes| bytes::Bytes::from(bytes.to_vec())).map_err(|e| anyhow::anyhow!("Serialization failed: {}", e)) - } -} - -/// Generate specialized deserialization code -fn generate_deserialize(_input: &DeriveInput, _name: &syn::Ident) -> proc_macro2::TokenStream { - quote! { - rkyv::from_bytes::(data).map_err(|e| anyhow::anyhow!("Deserialization failed: {}", e)) - } -} - -/// Generate merge logic based on strategy -fn generate_merge(input: &DeriveInput, strategy: &SyncStrategy) -> proc_macro2::TokenStream { - match strategy { - | SyncStrategy::LastWriteWins => generate_lww_merge(input), - | SyncStrategy::Set => generate_set_merge(input), - | SyncStrategy::Sequence => generate_sequence_merge(input), - | SyncStrategy::Custom => generate_custom_merge(input), - } -} - -/// Generate hash calculation code for tiebreaking in concurrent merges -/// -/// Returns a TokenStream that computes hashes for both local and remote values -/// and compares them for deterministic conflict resolution. -fn generate_hash_tiebreaker() -> proc_macro2::TokenStream { - quote! { - let local_hash = { - let bytes = rkyv::to_bytes::(self).map(|b| b.to_vec()).unwrap_or_default(); - bytes.iter().fold(0u64, |acc, &b| acc.wrapping_mul(31).wrapping_add(b as u64)) - }; - let remote_hash = { - let bytes = rkyv::to_bytes::(&remote).map(|b| b.to_vec()).unwrap_or_default(); - bytes.iter().fold(0u64, |acc, &b| acc.wrapping_mul(31).wrapping_add(b as u64)) - }; - } -} - -/// Generate Last-Write-Wins merge logic -fn generate_lww_merge(_input: &DeriveInput) -> proc_macro2::TokenStream { - let hash_tiebreaker = generate_hash_tiebreaker(); - - quote! { - use tracing::info; - - match clock_cmp { - libmarathon::networking::ClockComparison::RemoteNewer => { - info!( - component = std::any::type_name::(), - ?clock_cmp, - "Taking remote (newer)" - ); - *self = remote; - libmarathon::networking::ComponentMergeDecision::TookRemote - } - libmarathon::networking::ClockComparison::LocalNewer => { - libmarathon::networking::ComponentMergeDecision::KeptLocal - } - libmarathon::networking::ClockComparison::Concurrent => { - // Tiebreaker: Compare serialized representations for deterministic choice - // In a real implementation, we'd use node_id, but for now use a simple hash - #hash_tiebreaker - - if remote_hash > local_hash { - info!( - component = std::any::type_name::(), - ?clock_cmp, - "Taking remote (concurrent, tiebreaker)" - ); - *self = remote; - libmarathon::networking::ComponentMergeDecision::TookRemote - } else { - libmarathon::networking::ComponentMergeDecision::KeptLocal - } - } - } - } -} - -/// Generate OR-Set merge logic -/// -/// For OR-Set strategy, the component must contain an OrSet field. -/// We merge by calling the OrSet's merge method which implements add-wins -/// semantics. -fn generate_set_merge(_input: &DeriveInput) -> proc_macro2::TokenStream { - let hash_tiebreaker = generate_hash_tiebreaker(); - - quote! { - use tracing::info; - - // For Set strategy, we always merge the sets - // The OrSet CRDT handles the conflict resolution with add-wins semantics - info!( - component = std::any::type_name::(), - "Merging OR-Set (add-wins semantics)" - ); - - // Assuming the component wraps an OrSet or has a field with merge() - // For now, we'll do a structural merge by replacing the whole value - // This is a simplified implementation - full implementation would require - // the component to expose merge() method or implement it directly - - match clock_cmp { - libmarathon::networking::ClockComparison::RemoteNewer => { - *self = remote; - libmarathon::networking::ComponentMergeDecision::TookRemote - } - libmarathon::networking::ClockComparison::LocalNewer => { - libmarathon::networking::ComponentMergeDecision::KeptLocal - } - libmarathon::networking::ClockComparison::Concurrent => { - // In a full implementation, we would merge the OrSet here - // For now, use LWW with tiebreaker as fallback - #hash_tiebreaker - - if remote_hash > local_hash { - *self = remote; - libmarathon::networking::ComponentMergeDecision::TookRemote - } else { - libmarathon::networking::ComponentMergeDecision::KeptLocal - } - } - } - } -} - -/// Generate RGA/Sequence merge logic -/// -/// For Sequence strategy, the component must contain an Rga field. -/// We merge by calling the Rga's merge method which maintains causal ordering. -fn generate_sequence_merge(_input: &DeriveInput) -> proc_macro2::TokenStream { - let hash_tiebreaker = generate_hash_tiebreaker(); - - quote! { - use tracing::info; - - // For Sequence strategy, we always merge the sequences - // The RGA CRDT handles the conflict resolution with causal ordering - info!( - component = std::any::type_name::(), - "Merging RGA sequence (causal ordering)" - ); - - // Assuming the component wraps an Rga or has a field with merge() - // For now, we'll do a structural merge by replacing the whole value - // This is a simplified implementation - full implementation would require - // the component to expose merge() method or implement it directly - - match clock_cmp { - libmarathon::networking::ClockComparison::RemoteNewer => { - *self = remote; - libmarathon::networking::ComponentMergeDecision::TookRemote - } - libmarathon::networking::ClockComparison::LocalNewer => { - libmarathon::networking::ComponentMergeDecision::KeptLocal - } - libmarathon::networking::ClockComparison::Concurrent => { - // In a full implementation, we would merge the Rga here - // For now, use LWW with tiebreaker as fallback - #hash_tiebreaker - - if remote_hash > local_hash { - *self = remote; - libmarathon::networking::ComponentMergeDecision::TookRemote - } else { - libmarathon::networking::ComponentMergeDecision::KeptLocal - } - } - } - } -} - -/// Generate custom merge logic placeholder -fn generate_custom_merge(input: &DeriveInput) -> proc_macro2::TokenStream { - let name = &input.ident; - quote! { - compile_error!( - concat!( - "Custom strategy requires implementing ConflictResolver trait for ", - stringify!(#name) - ) - ); - libmarathon::networking::ComponentMergeDecision::KeptLocal - } -} - - -/// Attribute macro for synced components -/// -/// This is an alternative to the derive macro that automatically adds rkyv derives. -/// -/// # Example -/// ```ignore -/// #[synced(version = 1, strategy = "LastWriteWins")] -/// struct Health(f32); -/// ``` -#[proc_macro_attribute] -pub fn synced(attr: TokenStream, item: TokenStream) -> TokenStream { - let input_struct = match syn::parse::(item.clone()) { - Ok(s) => s, - Err(e) => { - return syn::Error::new_spanned( - proc_macro2::TokenStream::from(item), - format!("synced attribute can only be applied to structs: {}", e), - ) - .to_compile_error() - .into(); - } - }; - - // Parse the attribute arguments manually - let attr_str = attr.to_string(); - let (version, strategy) = parse_attr_string(&attr_str); - - // Generate the same implementations as the derive macro - let name = &input_struct.ident; - let name_str = name.to_string(); - let strategy_tokens = strategy.to_tokens(); - let vis = &input_struct.vis; - let attrs = &input_struct.attrs; - let generics = &input_struct.generics; - let fields = &input_struct.fields; - - // Convert ItemStruct to DeriveInput for compatibility with existing functions - // Build it manually to avoid parse_quote issues with tuple structs - let derive_input = DeriveInput { - attrs: attrs.clone(), - vis: vis.clone(), - ident: name.clone(), - generics: generics.clone(), - data: syn::Data::Struct(syn::DataStruct { - struct_token: syn::token::Struct::default(), - fields: fields.clone(), - semi_token: if matches!(fields, syn::Fields::Unit) { - Some(syn::token::Semi::default()) - } else { - None - }, - }), - }; - - let serialize_impl = generate_serialize(&derive_input); - let deserialize_impl = generate_deserialize(&derive_input, name); - let merge_impl = generate_merge(&derive_input, &strategy); - - // Add semicolon for tuple/unit structs - let semi = if matches!(fields, syn::Fields::Named(_)) { - quote! {} - } else { - quote! { ; } - }; - - let expanded = quote! { - // Output the struct with rkyv derives added - #[derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)] - #(#attrs)* - #vis struct #name #generics #fields #semi - - // Register component with inventory for type registry - const _: () = { - const TYPE_PATH: &str = concat!(module_path!(), "::", stringify!(#name)); - - inventory::submit! { - libmarathon::persistence::ComponentMeta { - type_name: #name_str, - type_path: TYPE_PATH, - type_id: std::any::TypeId::of::<#name>(), - deserialize_fn: |bytes: &[u8]| -> anyhow::Result> { - let component: #name = rkyv::from_bytes::<#name, rkyv::rancor::Failure>(bytes)?; - Ok(Box::new(component)) - }, - serialize_fn: |world: &bevy::ecs::world::World, entity: bevy::ecs::entity::Entity| -> Option { - world.get::<#name>(entity).and_then(|component| { - rkyv::to_bytes::(component) - .map(|vec| bytes::Bytes::from(vec.to_vec())) - .ok() - }) - }, - insert_fn: |entity_mut: &mut bevy::ecs::world::EntityWorldMut, boxed: Box| { - if let Ok(component) = boxed.downcast::<#name>() { - entity_mut.insert(*component); - } - }, - } - }; - }; - - impl libmarathon::networking::SyncComponent for #name { - const VERSION: u32 = #version; - const STRATEGY: libmarathon::networking::SyncStrategy = #strategy_tokens; - - #[inline] - fn serialize_sync(&self) -> anyhow::Result { - #serialize_impl - } - - #[inline] - fn deserialize_sync(data: &[u8]) -> anyhow::Result { - #deserialize_impl - } - - #[inline] - fn merge(&mut self, remote: Self, clock_cmp: libmarathon::networking::ClockComparison) -> libmarathon::networking::ComponentMergeDecision { - #merge_impl - } - } - }; - - TokenStream::from(expanded) -} - -/// Parse attribute string (simple parser for version and strategy) -fn parse_attr_string(attr: &str) -> (u32, SyncStrategy) { - let mut version = 1; - let mut strategy = SyncStrategy::LastWriteWins; - - // Simple parsing - look for version = N and strategy = "..." - if let Some(v_pos) = attr.find("version") { - if let Some(eq_pos) = attr[v_pos..].find('=') { - let start = v_pos + eq_pos + 1; - let rest = &attr[start..].trim(); - if let Some(comma_pos) = rest.find(',') { - if let Ok(v) = rest[..comma_pos].trim().parse() { - version = v; - } - } else if let Ok(v) = rest.trim().parse() { - version = v; - } - } - } - - if let Some(s_pos) = attr.find("strategy") { - if let Some(eq_pos) = attr[s_pos..].find('=') { - let start = s_pos + eq_pos + 1; - let rest = &attr[start..].trim(); - if let Some(quote_start) = rest.find('"') { - if let Some(quote_end) = rest[quote_start + 1..].find('"') { - let strategy_str = &rest[quote_start + 1..quote_start + 1 + quote_end]; - if let Ok(s) = SyncStrategy::from_str(strategy_str) { - strategy = s; - } - } - } - } - } - - (version, strategy) -}