Vendor Bevy rendering crates (Phase 1 complete)

Closes #6, #7, #8, #9, #10
Refs #2, #122

Vendored bevy_render, bevy_core_pipeline, and bevy_pbr from Bevy v0.17.2
(commit 566358363126dd69f6e457e47f306c68f8041d2a) into libmarathon.

- ~51K LOC vendored to crates/libmarathon/src/render/
- Merged bevy_render_macros into crates/macros/
- Fixed 773→0 compilation errors
- Updated dependencies (encase 0.10→0.11, added 4 new deps)
- Removed bevy_render/pbr/core_pipeline from app Cargo features

All builds passing, macOS smoke test successful.

Signed-off-by: Sienna Meridian Satterwhite <sienna@r3t.io>
This commit is contained in:
2025-12-23 23:50:49 +00:00
parent 7b8fed178e
commit f3f8094530
265 changed files with 83142 additions and 643 deletions

116
Cargo.lock generated
View File

@@ -766,7 +766,7 @@ dependencies = [
"bevy_reflect",
"bytemuck",
"derive_more 2.0.1",
"encase 0.11.2",
"encase",
"serde",
"thiserror 2.0.17",
"wgpu-types",
@@ -877,7 +877,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7449e5903594a00f007732ba232af0c527ad4e6e3d29bc3e195ec78dbd20c8b2"
dependencies = [
"bevy_macro_utils",
"encase_derive_impl 0.11.2",
"encase_derive_impl",
]
[[package]]
@@ -1379,7 +1379,7 @@ dependencies = [
"bytemuck",
"derive_more 2.0.1",
"downcast-rs 2.0.2",
"encase 0.11.2",
"encase",
"fixedbitset",
"image",
"indexmap",
@@ -2866,18 +2866,6 @@ version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d"
[[package]]
name = "encase"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0a05902cf601ed11d564128448097b98ebe3c6574bd7b6a653a3d56d54aa020"
dependencies = [
"const_panic",
"encase_derive 0.10.0",
"glam 0.29.3",
"thiserror 1.0.69",
]
[[package]]
name = "encase"
version = "0.11.2"
@@ -2885,38 +2873,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02ba239319a4f60905966390f5e52799d868103a533bb7e27822792332504ddd"
dependencies = [
"const_panic",
"encase_derive 0.11.2",
"encase_derive",
"glam 0.30.9",
"thiserror 2.0.17",
]
[[package]]
name = "encase_derive"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "181d475b694e2dd56ae919ce7699d344d1fd259292d590c723a50d1189a2ea85"
dependencies = [
"encase_derive_impl 0.10.0",
]
[[package]]
name = "encase_derive"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5223d6c647f09870553224f6e37261fe5567bc5a4f4cf13ed337476e79990f2f"
dependencies = [
"encase_derive_impl 0.11.2",
]
[[package]]
name = "encase_derive_impl"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f97b51c5cc57ef7c5f7a0c57c250251c49ee4c28f819f87ac32f4aceabc36792"
dependencies = [
"proc-macro2",
"quote",
"syn",
"encase_derive_impl",
]
[[package]]
@@ -4597,7 +4565,29 @@ version = "0.1.0"
dependencies = [
"anyhow",
"arboard",
"async-channel",
"bevy",
"bevy_app",
"bevy_asset",
"bevy_camera",
"bevy_color",
"bevy_derive",
"bevy_diagnostic",
"bevy_ecs",
"bevy_encase_derive",
"bevy_image",
"bevy_light",
"bevy_math",
"bevy_mesh",
"bevy_platform",
"bevy_reflect",
"bevy_shader",
"bevy_tasks",
"bevy_time",
"bevy_transform",
"bevy_utils",
"bevy_window",
"bitflags 2.10.0",
"blake3",
"blocking",
"bytemuck",
@@ -4606,16 +4596,26 @@ dependencies = [
"crdts",
"criterion",
"crossbeam-channel",
"derive_more 2.0.1",
"dirs",
"downcast-rs 2.0.2",
"egui",
"encase 0.10.0",
"encase",
"fixedbitset",
"futures-lite",
"glam 0.29.3",
"image",
"indexmap",
"inventory",
"iroh",
"iroh-gossip",
"itertools 0.14.0",
"macros",
"naga",
"nonmax",
"offset-allocator",
"proptest",
"radsort",
"rand 0.8.5",
"raw-window-handle",
"rkyv",
@@ -4623,7 +4623,8 @@ dependencies = [
"serde",
"serde_json",
"sha2 0.10.9",
"sync-macros",
"smallvec",
"static_assertions",
"tempfile",
"thiserror 2.0.17",
"tokio",
@@ -4631,6 +4632,8 @@ dependencies = [
"tracing",
"tracing-oslog",
"uuid",
"variadics_please",
"wgpu",
"wgpu-types",
"winit",
]
@@ -4749,6 +4752,24 @@ dependencies = [
"libc",
]
[[package]]
name = "macros"
version = "0.1.0"
dependencies = [
"anyhow",
"bevy",
"bevy_macro_utils",
"bytes",
"inventory",
"libmarathon",
"proc-macro2",
"quote",
"rkyv",
"serde",
"syn",
"tracing",
]
[[package]]
name = "malloc_buf"
version = "0.0.6"
@@ -7241,23 +7262,6 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "sync-macros"
version = "0.1.0"
dependencies = [
"anyhow",
"bevy",
"bytes",
"inventory",
"libmarathon",
"proc-macro2",
"quote",
"rkyv",
"serde",
"syn",
"tracing",
]
[[package]]
name = "sync_wrapper"
version = "1.0.2"

View File

@@ -1,5 +1,5 @@
[workspace]
members = ["crates/libmarathon", "crates/sync-macros", "crates/app", "crates/xtask"]
members = ["crates/libmarathon", "crates/macros", "crates/app", "crates/xtask"]
resolver = "2"
[workspace.package]

View File

@@ -12,9 +12,7 @@ headless = []
[dependencies]
libmarathon = { path = "../libmarathon" }
bevy = { version = "0.17", default-features = false, features = [
"bevy_render",
"bevy_core_pipeline",
"bevy_pbr",
# bevy_render, bevy_core_pipeline, bevy_pbr are now vendored in libmarathon
"bevy_ui",
"bevy_text",
"png",

View File

@@ -8,6 +8,47 @@ anyhow.workspace = true
arboard = "3.4"
bevy.workspace = true
rkyv.workspace = true
# Bevy subcrates required by vendored rendering (bevy_render, bevy_core_pipeline, bevy_pbr)
bevy_app = "0.17.2"
bevy_asset = "0.17.2"
bevy_camera = "0.17.2"
bevy_color = "0.17.2"
bevy_derive = "0.17.2"
bevy_diagnostic = "0.17.2"
bevy_ecs = "0.17.2"
bevy_encase_derive = "0.17.2"
bevy_image = "0.17.2"
bevy_light = "0.17.2"
bevy_math = "0.17.2"
bevy_mesh = "0.17.2"
bevy_platform = { version = "0.17.2", default-features = false }
bevy_reflect = "0.17.2"
macros = { path = "../macros" }
bevy_shader = "0.17.2"
bevy_tasks = "0.17.2"
bevy_time = "0.17.2"
bevy_transform = "0.17.2"
bevy_utils = "0.17.2"
bevy_window = "0.17.2"
# Additional dependencies required by vendored rendering crates
wgpu = { version = "26", default-features = false, features = ["dx12", "metal"] }
naga = { version = "26", features = ["wgsl-in"] }
downcast-rs = { version = "2", default-features = false, features = ["std"] }
derive_more = { version = "2", default-features = false, features = ["from"] }
image = { version = "0.25.2", default-features = false }
bitflags = { version = "2.3", features = ["bytemuck"] }
fixedbitset = "0.5"
radsort = "0.1"
nonmax = "0.5"
smallvec = { version = "1", default-features = false }
indexmap = "2.0"
async-channel = "2.3"
offset-allocator = "0.2"
variadics_please = "1.1"
static_assertions = "1.1"
blake3 = "1.5"
blocking = "1.6"
bytemuck = { version = "1.14", features = ["derive"] }
@@ -17,7 +58,7 @@ crdts.workspace = true
crossbeam-channel = "0.5"
dirs = "5.0"
egui = { version = "0.33", default-features = false, features = ["bytemuck", "default_fonts"] }
encase = { version = "0.10", features = ["glam"] }
encase = { version = "0.11", features = ["glam"] }
futures-lite = "2.0"
glam = "0.29"
inventory.workspace = true
@@ -30,7 +71,6 @@ rusqlite = { version = "0.37.0", features = ["bundled"] }
serde = { version = "1.0", features = ["derive"] }
serde_json.workspace = true
sha2 = "0.10"
sync-macros = { path = "../sync-macros" }
thiserror = "2.0"
tokio.workspace = true
toml.workspace = true

View File

@@ -28,6 +28,7 @@ pub mod engine;
pub mod networking;
pub mod persistence;
pub mod platform;
pub mod render; // Vendored Bevy rendering (bevy_render + bevy_core_pipeline + bevy_pbr)
pub mod utils;
pub mod sync;

View File

@@ -0,0 +1,62 @@
use bevy_reflect::{std_traits::ReflectDefault, Reflect};
// TODO: add discussion about performance.
/// Sets how a material's base color alpha channel is used for transparency.
#[derive(Debug, Default, Reflect, Copy, Clone, PartialEq)]
#[reflect(Default, Debug, Clone)]
pub enum AlphaMode {
/// Base color alpha values are overridden to be fully opaque (1.0).
#[default]
Opaque,
/// Reduce transparency to fully opaque or fully transparent
/// based on a threshold.
///
/// Compares the base color alpha value to the specified threshold.
/// If the value is below the threshold,
/// considers the color to be fully transparent (alpha is set to 0.0).
/// If it is equal to or above the threshold,
/// considers the color to be fully opaque (alpha is set to 1.0).
Mask(f32),
/// The base color alpha value defines the opacity of the color.
/// Standard alpha-blending is used to blend the fragment's color
/// with the color behind it.
Blend,
/// Similar to [`AlphaMode::Blend`], however assumes RGB channel values are
/// [premultiplied](https://en.wikipedia.org/wiki/Alpha_compositing#Straight_versus_premultiplied).
///
/// For otherwise constant RGB values, behaves more like [`AlphaMode::Blend`] for
/// alpha values closer to 1.0, and more like [`AlphaMode::Add`] for
/// alpha values closer to 0.0.
///
/// Can be used to avoid “border” or “outline” artifacts that can occur
/// when using plain alpha-blended textures.
Premultiplied,
/// Spreads the fragment out over a hardware-dependent number of sample
/// locations proportional to the alpha value. This requires multisample
/// antialiasing; if MSAA isn't on, this is identical to
/// [`AlphaMode::Mask`] with a value of 0.5.
///
/// Alpha to coverage provides improved performance and better visual
/// fidelity over [`AlphaMode::Blend`], as Bevy doesn't have to sort objects
/// when it's in use. It's especially useful for complex transparent objects
/// like foliage.
///
/// [alpha to coverage]: https://en.wikipedia.org/wiki/Alpha_to_coverage
AlphaToCoverage,
/// Combines the color of the fragments with the colors behind them in an
/// additive process, (i.e. like light) producing lighter results.
///
/// Black produces no effect. Alpha values can be used to modulate the result.
///
/// Useful for effects like holograms, ghosts, lasers and other energy beams.
Add,
/// Combines the color of the fragments with the colors behind them in a
/// multiplicative process, (i.e. like pigments) producing darker results.
///
/// White produces no effect. Alpha values can be used to modulate the result.
///
/// Useful for effects like stained glass, window tint film and some colored liquids.
Multiply,
}
impl Eq for AlphaMode {}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,225 @@
use bevy_ecs::{
component::Component,
entity::Entity,
system::{ResMut, SystemParam, SystemParamItem},
};
use bytemuck::Pod;
use gpu_preprocessing::UntypedPhaseIndirectParametersBuffers;
use nonmax::NonMaxU32;
use crate::render::{
render_phase::{
BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItemExtraIndex,
SortedPhaseItem, SortedRenderPhase, ViewBinnedRenderPhases,
},
render_resource::{CachedRenderPipelineId, GpuArrayBufferable},
sync_world::MainEntity,
};
pub mod gpu_preprocessing;
pub mod no_gpu_preprocessing;
/// Add this component to mesh entities to disable automatic batching
#[derive(Component, Default)]
pub struct NoAutomaticBatching;
/// Data necessary to be equal for two draw commands to be mergeable
///
/// This is based on the following assumptions:
/// - Only entities with prepared assets (pipelines, materials, meshes) are
/// queued to phases
/// - View bindings are constant across a phase for a given draw function as
/// phases are per-view
/// - `batch_and_prepare_render_phase` is the only system that performs this
/// batching and has sole responsibility for preparing the per-object data.
/// As such the mesh binding and dynamic offsets are assumed to only be
/// variable as a result of the `batch_and_prepare_render_phase` system, e.g.
/// due to having to split data across separate uniform bindings within the
/// same buffer due to the maximum uniform buffer binding size.
#[derive(PartialEq)]
struct BatchMeta<T: PartialEq> {
/// The pipeline id encompasses all pipeline configuration including vertex
/// buffers and layouts, shaders and their specializations, bind group
/// layouts, etc.
pipeline_id: CachedRenderPipelineId,
/// The draw function id defines the `RenderCommands` that are called to
/// set the pipeline and bindings, and make the draw command
draw_function_id: DrawFunctionId,
dynamic_offset: Option<NonMaxU32>,
user_data: T,
}
impl<T: PartialEq> BatchMeta<T> {
fn new(item: &impl CachedRenderPipelinePhaseItem, user_data: T) -> Self {
BatchMeta {
pipeline_id: item.cached_pipeline(),
draw_function_id: item.draw_function(),
dynamic_offset: match item.extra_index() {
PhaseItemExtraIndex::DynamicOffset(dynamic_offset) => {
NonMaxU32::new(dynamic_offset)
}
PhaseItemExtraIndex::None | PhaseItemExtraIndex::IndirectParametersIndex { .. } => {
None
}
},
user_data,
}
}
}
/// A trait to support getting data used for batching draw commands via phase
/// items.
///
/// This is a simple version that only allows for sorting, not binning, as well
/// as only CPU processing, not GPU preprocessing. For these fancier features,
/// see [`GetFullBatchData`].
pub trait GetBatchData {
/// The system parameters [`GetBatchData::get_batch_data`] needs in
/// order to compute the batch data.
type Param: SystemParam + 'static;
/// Data used for comparison between phase items. If the pipeline id, draw
/// function id, per-instance data buffer dynamic offset and this data
/// matches, the draws can be batched.
type CompareData: PartialEq;
/// The per-instance data to be inserted into the
/// [`crate::render_resource::GpuArrayBuffer`] containing these data for all
/// instances.
type BufferData: GpuArrayBufferable + Sync + Send + 'static;
/// Get the per-instance data to be inserted into the
/// [`crate::render_resource::GpuArrayBuffer`]. If the instance can be
/// batched, also return the data used for comparison when deciding whether
/// draws can be batched, else return None for the `CompareData`.
///
/// This is only called when building instance data on CPU. In the GPU
/// instance data building path, we use
/// [`GetFullBatchData::get_index_and_compare_data`] instead.
fn get_batch_data(
param: &SystemParamItem<Self::Param>,
query_item: (Entity, MainEntity),
) -> Option<(Self::BufferData, Option<Self::CompareData>)>;
}
/// A trait to support getting data used for batching draw commands via phase
/// items.
///
/// This version allows for binning and GPU preprocessing.
pub trait GetFullBatchData: GetBatchData {
/// The per-instance data that was inserted into the
/// [`crate::render_resource::BufferVec`] during extraction.
type BufferInputData: Pod + Default + Sync + Send;
/// Get the per-instance data to be inserted into the
/// [`crate::render_resource::GpuArrayBuffer`].
///
/// This is only called when building uniforms on CPU. In the GPU instance
/// buffer building path, we use
/// [`GetFullBatchData::get_index_and_compare_data`] instead.
fn get_binned_batch_data(
param: &SystemParamItem<Self::Param>,
query_item: MainEntity,
) -> Option<Self::BufferData>;
/// Returns the index of the [`GetFullBatchData::BufferInputData`] that the
/// GPU preprocessing phase will use.
///
/// We already inserted the [`GetFullBatchData::BufferInputData`] during the
/// extraction phase before we got here, so this function shouldn't need to
/// look up any render data. If CPU instance buffer building is in use, this
/// function will never be called.
fn get_index_and_compare_data(
param: &SystemParamItem<Self::Param>,
query_item: MainEntity,
) -> Option<(NonMaxU32, Option<Self::CompareData>)>;
/// Returns the index of the [`GetFullBatchData::BufferInputData`] that the
/// GPU preprocessing phase will use.
///
/// We already inserted the [`GetFullBatchData::BufferInputData`] during the
/// extraction phase before we got here, so this function shouldn't need to
/// look up any render data.
///
/// This function is currently only called for unbatchable entities when GPU
/// instance buffer building is in use. For batchable entities, the uniform
/// index is written during queuing (e.g. in `queue_material_meshes`). In
/// the case of CPU instance buffer building, the CPU writes the uniforms,
/// so there's no index to return.
fn get_binned_index(
param: &SystemParamItem<Self::Param>,
query_item: MainEntity,
) -> Option<NonMaxU32>;
/// Writes the [`gpu_preprocessing::IndirectParametersGpuMetadata`]
/// necessary to draw this batch into the given metadata buffer at the given
/// index.
///
/// This is only used if GPU culling is enabled (which requires GPU
/// preprocessing).
///
/// * `indexed` is true if the mesh is indexed or false if it's non-indexed.
///
/// * `base_output_index` is the index of the first mesh instance in this
/// batch in the `MeshUniform` output buffer.
///
/// * `batch_set_index` is the index of the batch set in the
/// [`gpu_preprocessing::IndirectBatchSet`] buffer, if this batch belongs to
/// a batch set.
///
/// * `indirect_parameters_buffers` is the buffer in which to write the
/// metadata.
///
/// * `indirect_parameters_offset` is the index in that buffer at which to
/// write the metadata.
fn write_batch_indirect_parameters_metadata(
indexed: bool,
base_output_index: u32,
batch_set_index: Option<NonMaxU32>,
indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers,
indirect_parameters_offset: u32,
);
}
/// Sorts a render phase that uses bins.
pub fn sort_binned_render_phase<BPI>(mut phases: ResMut<ViewBinnedRenderPhases<BPI>>)
where
BPI: BinnedPhaseItem,
{
for phase in phases.values_mut() {
phase.multidrawable_meshes.sort_unstable_keys();
phase.batchable_meshes.sort_unstable_keys();
phase.unbatchable_meshes.sort_unstable_keys();
phase.non_mesh_items.sort_unstable_keys();
}
}
/// Batches the items in a sorted render phase.
///
/// This means comparing metadata needed to draw each phase item and trying to
/// combine the draws into a batch.
///
/// This is common code factored out from
/// [`gpu_preprocessing::batch_and_prepare_sorted_render_phase`] and
/// [`no_gpu_preprocessing::batch_and_prepare_sorted_render_phase`].
fn batch_and_prepare_sorted_render_phase<I, GBD>(
phase: &mut SortedRenderPhase<I>,
mut process_item: impl FnMut(&mut I) -> Option<GBD::CompareData>,
) where
I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
GBD: GetBatchData,
{
let items = phase.items.iter_mut().map(|item| {
let batch_data = match process_item(item) {
Some(compare_data) if I::AUTOMATIC_BATCHING => Some(BatchMeta::new(item, compare_data)),
_ => None,
};
(item.batch_range_mut(), batch_data)
});
items.reduce(|(start_range, prev_batch_meta), (range, batch_meta)| {
if batch_meta.is_some() && prev_batch_meta == batch_meta {
start_range.end = range.end;
(start_range, prev_batch_meta)
} else {
(range, batch_meta)
}
});
}

View File

@@ -0,0 +1,182 @@
//! Batching functionality when GPU preprocessing isn't in use.
use bevy_derive::{Deref, DerefMut};
use bevy_ecs::entity::Entity;
use bevy_ecs::resource::Resource;
use bevy_ecs::system::{Res, ResMut, StaticSystemParam};
use smallvec::{smallvec, SmallVec};
use tracing::error;
use wgpu::BindingResource;
use crate::render::{
render_phase::{
BinnedPhaseItem, BinnedRenderPhaseBatch, BinnedRenderPhaseBatchSets,
CachedRenderPipelinePhaseItem, PhaseItemExtraIndex, SortedPhaseItem,
ViewBinnedRenderPhases, ViewSortedRenderPhases,
},
render_resource::{GpuArrayBuffer, GpuArrayBufferable},
renderer::{RenderDevice, RenderQueue},
};
use super::{GetBatchData, GetFullBatchData};
/// The GPU buffers holding the data needed to render batches.
///
/// For example, in the 3D PBR pipeline this holds `MeshUniform`s, which are the
/// `BD` type parameter in that mode.
#[derive(Resource, Deref, DerefMut)]
pub struct BatchedInstanceBuffer<BD>(pub GpuArrayBuffer<BD>)
where
BD: GpuArrayBufferable + Sync + Send + 'static;
impl<BD> BatchedInstanceBuffer<BD>
where
BD: GpuArrayBufferable + Sync + Send + 'static,
{
/// Creates a new buffer.
pub fn new(render_device: &RenderDevice) -> Self {
BatchedInstanceBuffer(GpuArrayBuffer::new(render_device))
}
/// Returns the binding of the buffer that contains the per-instance data.
///
/// If we're in the GPU instance buffer building mode, this buffer needs to
/// be filled in via a compute shader.
pub fn instance_data_binding(&self) -> Option<BindingResource<'_>> {
self.binding()
}
}
/// A system that clears out the [`BatchedInstanceBuffer`] for the frame.
///
/// This needs to run before the CPU batched instance buffers are used.
pub fn clear_batched_cpu_instance_buffers<GBD>(
cpu_batched_instance_buffer: Option<ResMut<BatchedInstanceBuffer<GBD::BufferData>>>,
) where
GBD: GetBatchData,
{
if let Some(mut cpu_batched_instance_buffer) = cpu_batched_instance_buffer {
cpu_batched_instance_buffer.clear();
}
}
/// Batch the items in a sorted render phase, when GPU instance buffer building
/// isn't in use. This means comparing metadata needed to draw each phase item
/// and trying to combine the draws into a batch.
pub fn batch_and_prepare_sorted_render_phase<I, GBD>(
batched_instance_buffer: ResMut<BatchedInstanceBuffer<GBD::BufferData>>,
mut phases: ResMut<ViewSortedRenderPhases<I>>,
param: StaticSystemParam<GBD::Param>,
) where
I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
GBD: GetBatchData,
{
let system_param_item = param.into_inner();
// We only process CPU-built batch data in this function.
let batched_instance_buffer = batched_instance_buffer.into_inner();
for phase in phases.values_mut() {
super::batch_and_prepare_sorted_render_phase::<I, GBD>(phase, |item| {
let (buffer_data, compare_data) =
GBD::get_batch_data(&system_param_item, (item.entity(), item.main_entity()))?;
let buffer_index = batched_instance_buffer.push(buffer_data);
let index = buffer_index.index;
let (batch_range, extra_index) = item.batch_range_and_extra_index_mut();
*batch_range = index..index + 1;
*extra_index = PhaseItemExtraIndex::maybe_dynamic_offset(buffer_index.dynamic_offset);
compare_data
});
}
}
/// Creates batches for a render phase that uses bins, when GPU batch data
/// building isn't in use.
pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
gpu_array_buffer: ResMut<BatchedInstanceBuffer<GFBD::BufferData>>,
mut phases: ResMut<ViewBinnedRenderPhases<BPI>>,
param: StaticSystemParam<GFBD::Param>,
) where
BPI: BinnedPhaseItem,
GFBD: GetFullBatchData,
{
let gpu_array_buffer = gpu_array_buffer.into_inner();
let system_param_item = param.into_inner();
for phase in phases.values_mut() {
// Prepare batchables.
for bin in phase.batchable_meshes.values_mut() {
let mut batch_set: SmallVec<[BinnedRenderPhaseBatch; 1]> = smallvec![];
for main_entity in bin.entities().keys() {
let Some(buffer_data) =
GFBD::get_binned_batch_data(&system_param_item, *main_entity)
else {
continue;
};
let instance = gpu_array_buffer.push(buffer_data);
// If the dynamic offset has changed, flush the batch.
//
// This is the only time we ever have more than one batch per
// bin. Note that dynamic offsets are only used on platforms
// with no storage buffers.
if !batch_set.last().is_some_and(|batch| {
batch.instance_range.end == instance.index
&& batch.extra_index
== PhaseItemExtraIndex::maybe_dynamic_offset(instance.dynamic_offset)
}) {
batch_set.push(BinnedRenderPhaseBatch {
representative_entity: (Entity::PLACEHOLDER, *main_entity),
instance_range: instance.index..instance.index,
extra_index: PhaseItemExtraIndex::maybe_dynamic_offset(
instance.dynamic_offset,
),
});
}
if let Some(batch) = batch_set.last_mut() {
batch.instance_range.end = instance.index + 1;
}
}
match phase.batch_sets {
BinnedRenderPhaseBatchSets::DynamicUniforms(ref mut batch_sets) => {
batch_sets.push(batch_set);
}
BinnedRenderPhaseBatchSets::Direct(_)
| BinnedRenderPhaseBatchSets::MultidrawIndirect { .. } => {
error!(
"Dynamic uniform batch sets should be used when GPU preprocessing is off"
);
}
}
}
// Prepare unbatchables.
for unbatchables in phase.unbatchable_meshes.values_mut() {
for main_entity in unbatchables.entities.keys() {
let Some(buffer_data) =
GFBD::get_binned_batch_data(&system_param_item, *main_entity)
else {
continue;
};
let instance = gpu_array_buffer.push(buffer_data);
unbatchables.buffer_indices.add(instance.into());
}
}
}
}
/// Writes the instance buffer data to the GPU.
pub fn write_batched_instance_buffer<GBD>(
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
mut cpu_batched_instance_buffer: ResMut<BatchedInstanceBuffer<GBD::BufferData>>,
) where
GBD: GetBatchData,
{
cpu_batched_instance_buffer.write_buffer(&render_device, &render_queue);
}

View File

@@ -0,0 +1,37 @@
// Defines the common arrays used to access bindless resources.
//
// This need to be kept up to date with the `BINDING_NUMBERS` table in
// `bindless.rs`.
//
// You access these by indexing into the bindless index table, and from there
// indexing into the appropriate binding array. For example, to access the base
// color texture of a `StandardMaterial` in bindless mode, write
// `bindless_textures_2d[materials[slot].base_color_texture]`, where
// `materials` is the bindless index table and `slot` is the index into that
// table (which can be found in the `Mesh`).
#define_import_path bevy_render::bindless
#ifdef BINDLESS
// Binding 0 is the bindless index table.
// Filtering samplers.
@group(#{MATERIAL_BIND_GROUP}) @binding(1) var bindless_samplers_filtering: binding_array<sampler>;
// Non-filtering samplers (nearest neighbor).
@group(#{MATERIAL_BIND_GROUP}) @binding(2) var bindless_samplers_non_filtering: binding_array<sampler>;
// Comparison samplers (typically for shadow mapping).
@group(#{MATERIAL_BIND_GROUP}) @binding(3) var bindless_samplers_comparison: binding_array<sampler>;
// 1D textures.
@group(#{MATERIAL_BIND_GROUP}) @binding(4) var bindless_textures_1d: binding_array<texture_1d<f32>>;
// 2D textures.
@group(#{MATERIAL_BIND_GROUP}) @binding(5) var bindless_textures_2d: binding_array<texture_2d<f32>>;
// 2D array textures.
@group(#{MATERIAL_BIND_GROUP}) @binding(6) var bindless_textures_2d_array: binding_array<texture_2d_array<f32>>;
// 3D textures.
@group(#{MATERIAL_BIND_GROUP}) @binding(7) var bindless_textures_3d: binding_array<texture_3d<f32>>;
// Cubemap textures.
@group(#{MATERIAL_BIND_GROUP}) @binding(8) var bindless_textures_cube: binding_array<texture_cube<f32>>;
// Cubemap array textures.
@group(#{MATERIAL_BIND_GROUP}) @binding(9) var bindless_textures_cube_array: binding_array<texture_cube_array<f32>>;
#endif // BINDLESS

View File

@@ -0,0 +1,9 @@
#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput
@group(0) @binding(0) var in_texture: texture_2d<f32>;
@group(0) @binding(1) var in_sampler: sampler;
@fragment
fn fs_main(in: FullscreenVertexOutput) -> @location(0) vec4<f32> {
return textureSample(in_texture, in_sampler, in.uv);
}

View File

@@ -0,0 +1,114 @@
use crate::render::FullscreenShader;
use bevy_app::{App, Plugin};
use bevy_asset::{embedded_asset, load_embedded_asset, AssetServer, Handle};
use bevy_ecs::prelude::*;
use crate::render::{
render_resource::{
binding_types::{sampler, texture_2d},
*,
},
renderer::RenderDevice,
RenderApp, RenderStartup,
};
use bevy_shader::Shader;
use bevy_utils::default;
/// Adds support for specialized "blit pipelines", which can be used to write one texture to another.
pub struct BlitPlugin;
impl Plugin for BlitPlugin {
fn build(&self, app: &mut App) {
embedded_asset!(app, "blit.wgsl");
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
return;
};
render_app
.allow_ambiguous_resource::<SpecializedRenderPipelines<BlitPipeline>>()
.init_resource::<SpecializedRenderPipelines<BlitPipeline>>()
.add_systems(RenderStartup, init_blit_pipeline);
}
}
#[derive(Resource)]
pub struct BlitPipeline {
pub layout: BindGroupLayout,
pub sampler: Sampler,
pub fullscreen_shader: FullscreenShader,
pub fragment_shader: Handle<Shader>,
}
pub fn init_blit_pipeline(
mut commands: Commands,
render_device: Res<RenderDevice>,
fullscreen_shader: Res<FullscreenShader>,
asset_server: Res<AssetServer>,
) {
let layout = render_device.create_bind_group_layout(
"blit_bind_group_layout",
&BindGroupLayoutEntries::sequential(
ShaderStages::FRAGMENT,
(
texture_2d(TextureSampleType::Float { filterable: false }),
sampler(SamplerBindingType::NonFiltering),
),
),
);
let sampler = render_device.create_sampler(&SamplerDescriptor::default());
commands.insert_resource(BlitPipeline {
layout,
sampler,
fullscreen_shader: fullscreen_shader.clone(),
fragment_shader: load_embedded_asset!(asset_server.as_ref(), "blit.wgsl"),
});
}
impl BlitPipeline {
pub fn create_bind_group(
&self,
render_device: &RenderDevice,
src_texture: &TextureView,
) -> BindGroup {
render_device.create_bind_group(
None,
&self.layout,
&BindGroupEntries::sequential((src_texture, &self.sampler)),
)
}
}
#[derive(PartialEq, Eq, Hash, Clone, Copy)]
pub struct BlitPipelineKey {
pub texture_format: TextureFormat,
pub blend_state: Option<BlendState>,
pub samples: u32,
}
impl SpecializedRenderPipeline for BlitPipeline {
type Key = BlitPipelineKey;
fn specialize(&self, key: Self::Key) -> RenderPipelineDescriptor {
RenderPipelineDescriptor {
label: Some("blit pipeline".into()),
layout: vec![self.layout.clone()],
vertex: self.fullscreen_shader.to_vertex_state(),
fragment: Some(FragmentState {
shader: self.fragment_shader.clone(),
targets: vec![Some(ColorTargetState {
format: key.texture_format,
blend: key.blend_state,
write_mask: ColorWrites::ALL,
})],
..default()
}),
multisample: MultisampleState {
count: key.samples,
..default()
},
..default()
}
}
}

View File

@@ -0,0 +1,695 @@
use crate::render::{
batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport},
extract_component::{ExtractComponent, ExtractComponentPlugin},
extract_resource::{ExtractResource, ExtractResourcePlugin},
render_asset::RenderAssets,
render_graph::{CameraDriverNode, InternedRenderSubGraph, RenderGraph, RenderSubGraph},
render_resource::TextureView,
sync_world::{RenderEntity, SyncToRenderWorld},
texture::{GpuImage, ManualTextureViews},
view::{
ColorGrading, ExtractedView, ExtractedWindows, Hdr, Msaa, NoIndirectDrawing,
RenderVisibleEntities, RetainedViewEntity, ViewUniformOffset,
},
Extract, ExtractSchedule, Render, RenderApp, RenderSystems,
};
use bevy_app::{App, Plugin, PostStartup, PostUpdate};
use bevy_asset::{AssetEvent, AssetEventSystems, AssetId, Assets};
use bevy_camera::{
primitives::Frustum,
visibility::{self, RenderLayers, VisibleEntities},
Camera, Camera2d, Camera3d, CameraMainTextureUsages, CameraOutputMode, CameraUpdateSystems,
ClearColor, ClearColorConfig, Exposure, ManualTextureViewHandle, NormalizedRenderTarget,
Projection, RenderTargetInfo, Viewport,
};
use bevy_derive::{Deref, DerefMut};
use bevy_ecs::{
change_detection::DetectChanges,
component::Component,
entity::{ContainsEntity, Entity},
error::BevyError,
lifecycle::HookContext,
message::MessageReader,
prelude::With,
query::{Has, QueryItem},
reflect::ReflectComponent,
resource::Resource,
schedule::IntoScheduleConfigs,
system::{Commands, Query, Res, ResMut},
world::DeferredWorld,
};
use bevy_image::Image;
use bevy_math::{uvec2, vec2, Mat4, URect, UVec2, UVec4, Vec2};
use bevy_platform::collections::{HashMap, HashSet};
use bevy_reflect::prelude::*;
use bevy_transform::components::GlobalTransform;
use bevy_window::{PrimaryWindow, Window, WindowCreated, WindowResized, WindowScaleFactorChanged};
use tracing::warn;
use wgpu::TextureFormat;
#[derive(Default)]
pub struct CameraPlugin;
impl Plugin for CameraPlugin {
fn build(&self, app: &mut App) {
app.register_required_components::<Camera, Msaa>()
.register_required_components::<Camera, SyncToRenderWorld>()
.register_required_components::<Camera3d, ColorGrading>()
.register_required_components::<Camera3d, Exposure>()
.add_plugins((
ExtractResourcePlugin::<ClearColor>::default(),
ExtractComponentPlugin::<CameraMainTextureUsages>::default(),
))
.add_systems(PostStartup, camera_system.in_set(CameraUpdateSystems))
.add_systems(
PostUpdate,
camera_system
.in_set(CameraUpdateSystems)
.before(AssetEventSystems)
.before(visibility::update_frusta),
);
app.world_mut()
.register_component_hooks::<Camera>()
.on_add(warn_on_no_render_graph);
if let Some(render_app) = app.get_sub_app_mut(RenderApp) {
render_app
.init_resource::<SortedCameras>()
.add_systems(ExtractSchedule, extract_cameras)
.add_systems(Render, sort_cameras.in_set(RenderSystems::ManageViews));
let camera_driver_node = CameraDriverNode::new(render_app.world_mut());
let mut render_graph = render_app.world_mut().resource_mut::<RenderGraph>();
render_graph.add_node(crate::render::graph::CameraDriverLabel, camera_driver_node);
}
}
}
fn warn_on_no_render_graph(world: DeferredWorld, HookContext { entity, caller, .. }: HookContext) {
if !world.entity(entity).contains::<CameraRenderGraph>() {
warn!("{}Entity {entity} has a `Camera` component, but it doesn't have a render graph configured. Usually, adding a `Camera2d` or `Camera3d` component will work.
However, you may instead need to enable `bevy_core_pipeline`, or may want to manually add a `CameraRenderGraph` component to create a custom render graph.", caller.map(|location|format!("{location}: ")).unwrap_or_default());
}
}
impl ExtractResource for ClearColor {
type Source = Self;
fn extract_resource(source: &Self::Source) -> Self {
source.clone()
}
}
impl ExtractComponent for CameraMainTextureUsages {
type QueryData = &'static Self;
type QueryFilter = ();
type Out = Self;
fn extract_component(item: QueryItem<Self::QueryData>) -> Option<Self::Out> {
Some(*item)
}
}
impl ExtractComponent for Camera2d {
type QueryData = &'static Self;
type QueryFilter = With<Camera>;
type Out = Self;
fn extract_component(item: QueryItem<Self::QueryData>) -> Option<Self::Out> {
Some(item.clone())
}
}
impl ExtractComponent for Camera3d {
type QueryData = &'static Self;
type QueryFilter = With<Camera>;
type Out = Self;
fn extract_component(item: QueryItem<Self::QueryData>) -> Option<Self::Out> {
Some(item.clone())
}
}
/// Configures the [`RenderGraph`] name assigned to be run for a given [`Camera`] entity.
#[derive(Component, Debug, Deref, DerefMut, Reflect, Clone)]
#[reflect(opaque)]
#[reflect(Component, Debug, Clone)]
pub struct CameraRenderGraph(InternedRenderSubGraph);
impl CameraRenderGraph {
/// Creates a new [`CameraRenderGraph`] from any string-like type.
#[inline]
pub fn new<T: RenderSubGraph>(name: T) -> Self {
Self(name.intern())
}
/// Sets the graph name.
#[inline]
pub fn set<T: RenderSubGraph>(&mut self, name: T) {
self.0 = name.intern();
}
}
pub trait NormalizedRenderTargetExt {
fn get_texture_view<'a>(
&self,
windows: &'a ExtractedWindows,
images: &'a RenderAssets<GpuImage>,
manual_texture_views: &'a ManualTextureViews,
) -> Option<&'a TextureView>;
/// Retrieves the [`TextureFormat`] of this render target, if it exists.
fn get_texture_format<'a>(
&self,
windows: &'a ExtractedWindows,
images: &'a RenderAssets<GpuImage>,
manual_texture_views: &'a ManualTextureViews,
) -> Option<TextureFormat>;
fn get_render_target_info<'a>(
&self,
resolutions: impl IntoIterator<Item = (Entity, &'a Window)>,
images: &Assets<Image>,
manual_texture_views: &ManualTextureViews,
) -> Result<RenderTargetInfo, MissingRenderTargetInfoError>;
// Check if this render target is contained in the given changed windows or images.
fn is_changed(
&self,
changed_window_ids: &HashSet<Entity>,
changed_image_handles: &HashSet<&AssetId<Image>>,
) -> bool;
}
impl NormalizedRenderTargetExt for NormalizedRenderTarget {
fn get_texture_view<'a>(
&self,
windows: &'a ExtractedWindows,
images: &'a RenderAssets<GpuImage>,
manual_texture_views: &'a ManualTextureViews,
) -> Option<&'a TextureView> {
match self {
NormalizedRenderTarget::Window(window_ref) => windows
.get(&window_ref.entity())
.and_then(|window| window.swap_chain_texture_view.as_ref()),
NormalizedRenderTarget::Image(image_target) => images
.get(&image_target.handle)
.map(|image| &image.texture_view),
NormalizedRenderTarget::TextureView(id) => {
manual_texture_views.get(id).map(|tex| &tex.texture_view)
}
NormalizedRenderTarget::None { .. } => None,
}
}
/// Retrieves the [`TextureFormat`] of this render target, if it exists.
fn get_texture_format<'a>(
&self,
windows: &'a ExtractedWindows,
images: &'a RenderAssets<GpuImage>,
manual_texture_views: &'a ManualTextureViews,
) -> Option<TextureFormat> {
match self {
NormalizedRenderTarget::Window(window_ref) => windows
.get(&window_ref.entity())
.and_then(|window| window.swap_chain_texture_format),
NormalizedRenderTarget::Image(image_target) => images
.get(&image_target.handle)
.map(|image| image.texture_format),
NormalizedRenderTarget::TextureView(id) => {
manual_texture_views.get(id).map(|tex| tex.format)
}
NormalizedRenderTarget::None { .. } => None,
}
}
fn get_render_target_info<'a>(
&self,
resolutions: impl IntoIterator<Item = (Entity, &'a Window)>,
images: &Assets<Image>,
manual_texture_views: &ManualTextureViews,
) -> Result<RenderTargetInfo, MissingRenderTargetInfoError> {
match self {
NormalizedRenderTarget::Window(window_ref) => resolutions
.into_iter()
.find(|(entity, _)| *entity == window_ref.entity())
.map(|(_, window)| RenderTargetInfo {
physical_size: window.physical_size(),
scale_factor: window.resolution.scale_factor(),
})
.ok_or(MissingRenderTargetInfoError::Window {
window: window_ref.entity(),
}),
NormalizedRenderTarget::Image(image_target) => images
.get(&image_target.handle)
.map(|image| RenderTargetInfo {
physical_size: image.size(),
scale_factor: image_target.scale_factor.0,
})
.ok_or(MissingRenderTargetInfoError::Image {
image: image_target.handle.id(),
}),
NormalizedRenderTarget::TextureView(id) => manual_texture_views
.get(id)
.map(|tex| RenderTargetInfo {
physical_size: tex.size,
scale_factor: 1.0,
})
.ok_or(MissingRenderTargetInfoError::TextureView { texture_view: *id }),
NormalizedRenderTarget::None { width, height } => Ok(RenderTargetInfo {
physical_size: uvec2(*width, *height),
scale_factor: 1.0,
}),
}
}
// Check if this render target is contained in the given changed windows or images.
fn is_changed(
&self,
changed_window_ids: &HashSet<Entity>,
changed_image_handles: &HashSet<&AssetId<Image>>,
) -> bool {
match self {
NormalizedRenderTarget::Window(window_ref) => {
changed_window_ids.contains(&window_ref.entity())
}
NormalizedRenderTarget::Image(image_target) => {
changed_image_handles.contains(&image_target.handle.id())
}
NormalizedRenderTarget::TextureView(_) => true,
NormalizedRenderTarget::None { .. } => false,
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum MissingRenderTargetInfoError {
#[error("RenderTarget::Window missing ({window:?}): Make sure the provided entity has a Window component.")]
Window { window: Entity },
#[error("RenderTarget::Image missing ({image:?}): Make sure the Image's usages include RenderAssetUsages::MAIN_WORLD.")]
Image { image: AssetId<Image> },
#[error("RenderTarget::TextureView missing ({texture_view:?}): make sure the texture view handle was not removed.")]
TextureView {
texture_view: ManualTextureViewHandle,
},
}
/// System in charge of updating a [`Camera`] when its window or projection changes.
///
/// The system detects window creation, resize, and scale factor change events to update the camera
/// [`Projection`] if needed.
///
/// ## World Resources
///
/// [`Res<Assets<Image>>`](Assets<Image>) -- For cameras that render to an image, this resource is used to
/// inspect information about the render target. This system will not access any other image assets.
///
/// [`OrthographicProjection`]: bevy_camera::OrthographicProjection
/// [`PerspectiveProjection`]: bevy_camera::PerspectiveProjection
pub fn camera_system(
mut window_resized_reader: MessageReader<WindowResized>,
mut window_created_reader: MessageReader<WindowCreated>,
mut window_scale_factor_changed_reader: MessageReader<WindowScaleFactorChanged>,
mut image_asset_event_reader: MessageReader<AssetEvent<Image>>,
primary_window: Query<Entity, With<PrimaryWindow>>,
windows: Query<(Entity, &Window)>,
images: Res<Assets<Image>>,
manual_texture_views: Res<ManualTextureViews>,
mut cameras: Query<(&mut Camera, &mut Projection)>,
) -> Result<(), BevyError> {
let primary_window = primary_window.iter().next();
let mut changed_window_ids = <HashSet<_>>::default();
changed_window_ids.extend(window_created_reader.read().map(|event| event.window));
changed_window_ids.extend(window_resized_reader.read().map(|event| event.window));
let scale_factor_changed_window_ids: HashSet<_> = window_scale_factor_changed_reader
.read()
.map(|event| event.window)
.collect();
changed_window_ids.extend(scale_factor_changed_window_ids.clone());
let changed_image_handles: HashSet<&AssetId<Image>> = image_asset_event_reader
.read()
.filter_map(|event| match event {
AssetEvent::Modified { id } | AssetEvent::Added { id } => Some(id),
_ => None,
})
.collect();
for (mut camera, mut camera_projection) in &mut cameras {
let mut viewport_size = camera
.viewport
.as_ref()
.map(|viewport| viewport.physical_size);
if let Some(normalized_target) = &camera.target.normalize(primary_window)
&& (normalized_target.is_changed(&changed_window_ids, &changed_image_handles)
|| camera.is_added()
|| camera_projection.is_changed()
|| camera.computed.old_viewport_size != viewport_size
|| camera.computed.old_sub_camera_view != camera.sub_camera_view)
{
let new_computed_target_info = normalized_target.get_render_target_info(
windows,
&images,
&manual_texture_views,
)?;
// Check for the scale factor changing, and resize the viewport if needed.
// This can happen when the window is moved between monitors with different DPIs.
// Without this, the viewport will take a smaller portion of the window moved to
// a higher DPI monitor.
if normalized_target.is_changed(&scale_factor_changed_window_ids, &HashSet::default())
&& let Some(old_scale_factor) = camera
.computed
.target_info
.as_ref()
.map(|info| info.scale_factor)
{
let resize_factor = new_computed_target_info.scale_factor / old_scale_factor;
if let Some(ref mut viewport) = camera.viewport {
let resize = |vec: UVec2| (vec.as_vec2() * resize_factor).as_uvec2();
viewport.physical_position = resize(viewport.physical_position);
viewport.physical_size = resize(viewport.physical_size);
viewport_size = Some(viewport.physical_size);
}
}
// This check is needed because when changing WindowMode to Fullscreen, the viewport may have invalid
// arguments due to a sudden change on the window size to a lower value.
// If the size of the window is lower, the viewport will match that lower value.
if let Some(viewport) = &mut camera.viewport {
viewport.clamp_to_size(new_computed_target_info.physical_size);
}
camera.computed.target_info = Some(new_computed_target_info);
if let Some(size) = camera.logical_viewport_size()
&& size.x != 0.0
&& size.y != 0.0
{
camera_projection.update(size.x, size.y);
camera.computed.clip_from_view = match &camera.sub_camera_view {
Some(sub_view) => camera_projection.get_clip_from_view_for_sub(sub_view),
None => camera_projection.get_clip_from_view(),
}
}
}
if camera.computed.old_viewport_size != viewport_size {
camera.computed.old_viewport_size = viewport_size;
}
if camera.computed.old_sub_camera_view != camera.sub_camera_view {
camera.computed.old_sub_camera_view = camera.sub_camera_view;
}
}
Ok(())
}
#[derive(Component, Debug)]
pub struct ExtractedCamera {
pub target: Option<NormalizedRenderTarget>,
pub physical_viewport_size: Option<UVec2>,
pub physical_target_size: Option<UVec2>,
pub viewport: Option<Viewport>,
pub render_graph: InternedRenderSubGraph,
pub order: isize,
pub output_mode: CameraOutputMode,
pub msaa_writeback: bool,
pub clear_color: ClearColorConfig,
pub sorted_camera_index_for_target: usize,
pub exposure: f32,
pub hdr: bool,
}
pub fn extract_cameras(
mut commands: Commands,
query: Extract<
Query<(
Entity,
RenderEntity,
&Camera,
&CameraRenderGraph,
&GlobalTransform,
&VisibleEntities,
&Frustum,
Has<Hdr>,
Option<&ColorGrading>,
Option<&Exposure>,
Option<&TemporalJitter>,
Option<&MipBias>,
Option<&RenderLayers>,
Option<&Projection>,
Has<NoIndirectDrawing>,
)>,
>,
primary_window: Extract<Query<Entity, With<PrimaryWindow>>>,
gpu_preprocessing_support: Res<GpuPreprocessingSupport>,
mapper: Extract<Query<&RenderEntity>>,
) {
let primary_window = primary_window.iter().next();
type ExtractedCameraComponents = (
ExtractedCamera,
ExtractedView,
RenderVisibleEntities,
TemporalJitter,
MipBias,
RenderLayers,
Projection,
NoIndirectDrawing,
ViewUniformOffset,
);
for (
main_entity,
render_entity,
camera,
camera_render_graph,
transform,
visible_entities,
frustum,
hdr,
color_grading,
exposure,
temporal_jitter,
mip_bias,
render_layers,
projection,
no_indirect_drawing,
) in query.iter()
{
if !camera.is_active {
commands
.entity(render_entity)
.remove::<ExtractedCameraComponents>();
continue;
}
let color_grading = color_grading.unwrap_or(&ColorGrading::default()).clone();
if let (
Some(URect {
min: viewport_origin,
..
}),
Some(viewport_size),
Some(target_size),
) = (
camera.physical_viewport_rect(),
camera.physical_viewport_size(),
camera.physical_target_size(),
) {
if target_size.x == 0 || target_size.y == 0 {
commands
.entity(render_entity)
.remove::<ExtractedCameraComponents>();
continue;
}
let render_visible_entities = RenderVisibleEntities {
entities: visible_entities
.entities
.iter()
.map(|(type_id, entities)| {
let entities = entities
.iter()
.map(|entity| {
let render_entity = mapper
.get(*entity)
.cloned()
.map(|entity| entity.id())
.unwrap_or(Entity::PLACEHOLDER);
(render_entity, (*entity).into())
})
.collect();
(*type_id, entities)
})
.collect(),
};
let mut commands = commands.entity(render_entity);
commands.insert((
ExtractedCamera {
target: camera.target.normalize(primary_window),
viewport: camera.viewport.clone(),
physical_viewport_size: Some(viewport_size),
physical_target_size: Some(target_size),
render_graph: camera_render_graph.0,
order: camera.order,
output_mode: camera.output_mode,
msaa_writeback: camera.msaa_writeback,
clear_color: camera.clear_color,
// this will be set in sort_cameras
sorted_camera_index_for_target: 0,
exposure: exposure
.map(Exposure::exposure)
.unwrap_or_else(|| Exposure::default().exposure()),
hdr,
},
ExtractedView {
retained_view_entity: RetainedViewEntity::new(main_entity.into(), None, 0),
clip_from_view: camera.clip_from_view(),
world_from_view: *transform,
clip_from_world: None,
hdr,
viewport: UVec4::new(
viewport_origin.x,
viewport_origin.y,
viewport_size.x,
viewport_size.y,
),
color_grading,
},
render_visible_entities,
*frustum,
));
if let Some(temporal_jitter) = temporal_jitter {
commands.insert(temporal_jitter.clone());
} else {
commands.remove::<TemporalJitter>();
}
if let Some(mip_bias) = mip_bias {
commands.insert(mip_bias.clone());
} else {
commands.remove::<MipBias>();
}
if let Some(render_layers) = render_layers {
commands.insert(render_layers.clone());
} else {
commands.remove::<RenderLayers>();
}
if let Some(projection) = projection {
commands.insert(projection.clone());
} else {
commands.remove::<Projection>();
}
if no_indirect_drawing
|| !matches!(
gpu_preprocessing_support.max_supported_mode,
GpuPreprocessingMode::Culling
)
{
commands.insert(NoIndirectDrawing);
} else {
commands.remove::<NoIndirectDrawing>();
}
};
}
}
/// Cameras sorted by their order field. This is updated in the [`sort_cameras`] system.
#[derive(Resource, Default)]
pub struct SortedCameras(pub Vec<SortedCamera>);
pub struct SortedCamera {
pub entity: Entity,
pub order: isize,
pub target: Option<NormalizedRenderTarget>,
pub hdr: bool,
}
pub fn sort_cameras(
mut sorted_cameras: ResMut<SortedCameras>,
mut cameras: Query<(Entity, &mut ExtractedCamera)>,
) {
sorted_cameras.0.clear();
for (entity, camera) in cameras.iter() {
sorted_cameras.0.push(SortedCamera {
entity,
order: camera.order,
target: camera.target.clone(),
hdr: camera.hdr,
});
}
// sort by order and ensure within an order, RenderTargets of the same type are packed together
sorted_cameras
.0
.sort_by(|c1, c2| (c1.order, &c1.target).cmp(&(c2.order, &c2.target)));
let mut previous_order_target = None;
let mut ambiguities = <HashSet<_>>::default();
let mut target_counts = <HashMap<_, _>>::default();
for sorted_camera in &mut sorted_cameras.0 {
let new_order_target = (sorted_camera.order, sorted_camera.target.clone());
if let Some(previous_order_target) = previous_order_target
&& previous_order_target == new_order_target
{
ambiguities.insert(new_order_target.clone());
}
if let Some(target) = &sorted_camera.target {
let count = target_counts
.entry((target.clone(), sorted_camera.hdr))
.or_insert(0usize);
let (_, mut camera) = cameras.get_mut(sorted_camera.entity).unwrap();
camera.sorted_camera_index_for_target = *count;
*count += 1;
}
previous_order_target = Some(new_order_target);
}
if !ambiguities.is_empty() {
warn!(
"Camera order ambiguities detected for active cameras with the following priorities: {:?}. \
To fix this, ensure there is exactly one Camera entity spawned with a given order for a given RenderTarget. \
Ambiguities should be resolved because either (1) multiple active cameras were spawned accidentally, which will \
result in rendering multiple instances of the scene or (2) for cases where multiple active cameras is intentional, \
ambiguities could result in unpredictable render results.",
ambiguities
);
}
}
/// A subpixel offset to jitter a perspective camera's frustum by.
///
/// Useful for temporal rendering techniques.
#[derive(Component, Clone, Default, Reflect)]
#[reflect(Default, Component, Clone)]
pub struct TemporalJitter {
/// Offset is in range [-0.5, 0.5].
pub offset: Vec2,
}
impl TemporalJitter {
pub fn jitter_projection(&self, clip_from_view: &mut Mat4, view_size: Vec2) {
// https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK/blob/d7531ae47d8b36a5d4025663e731a47a38be882f/docs/techniques/media/super-resolution-temporal/jitter-space.svg
let mut jitter = (self.offset * vec2(2.0, -2.0)) / view_size;
// orthographic
if clip_from_view.w_axis.w == 1.0 {
jitter *= vec2(clip_from_view.x_axis.x, clip_from_view.y_axis.y) * 0.5;
}
clip_from_view.z_axis.x += jitter.x;
clip_from_view.z_axis.y += jitter.y;
}
}
/// Camera component specifying a mip bias to apply when sampling from material textures.
///
/// Often used in conjunction with antialiasing post-process effects to reduce textures blurriness.
#[derive(Component, Reflect, Clone)]
#[reflect(Default, Component)]
pub struct MipBias(pub f32);
impl Default for MipBias {
fn default() -> Self {
Self(-1.0)
}
}

View File

@@ -0,0 +1,47 @@
#define_import_path bevy_render::color_operations
#import bevy_render::maths::FRAC_PI_3
// Converts HSV to RGB.
//
// Input: H ∈ [0, 2π), S ∈ [0, 1], V ∈ [0, 1].
// Output: R ∈ [0, 1], G ∈ [0, 1], B ∈ [0, 1].
//
// <https://en.wikipedia.org/wiki/HSL_and_HSV#HSV_to_RGB_alternative>
fn hsv_to_rgb(hsv: vec3<f32>) -> vec3<f32> {
let n = vec3(5.0, 3.0, 1.0);
let k = (n + hsv.x / FRAC_PI_3) % 6.0;
return hsv.z - hsv.z * hsv.y * max(vec3(0.0), min(k, min(4.0 - k, vec3(1.0))));
}
// Converts RGB to HSV.
//
// Input: R ∈ [0, 1], G ∈ [0, 1], B ∈ [0, 1].
// Output: H ∈ [0, 2π), S ∈ [0, 1], V ∈ [0, 1].
//
// <https://en.wikipedia.org/wiki/HSL_and_HSV#From_RGB>
fn rgb_to_hsv(rgb: vec3<f32>) -> vec3<f32> {
let x_max = max(rgb.r, max(rgb.g, rgb.b)); // i.e. V
let x_min = min(rgb.r, min(rgb.g, rgb.b));
let c = x_max - x_min; // chroma
var swizzle = vec3<f32>(0.0);
if (x_max == rgb.r) {
swizzle = vec3(rgb.gb, 0.0);
} else if (x_max == rgb.g) {
swizzle = vec3(rgb.br, 2.0);
} else {
swizzle = vec3(rgb.rg, 4.0);
}
let h = FRAC_PI_3 * (((swizzle.x - swizzle.y) / c + swizzle.z) % 6.0);
// Avoid division by zero.
var s = 0.0;
if (x_max > 0.0) {
s = c / x_max;
}
return vec3(h, s, x_max);
}

View File

@@ -0,0 +1,106 @@
use crate::render::core_2d::Opaque2d;
use bevy_ecs::{prelude::World, query::QueryItem};
use crate::render::{
camera::ExtractedCamera,
diagnostic::RecordDiagnostics,
render_graph::{NodeRunError, RenderGraphContext, ViewNode},
render_phase::{TrackedRenderPass, ViewBinnedRenderPhases},
render_resource::{CommandEncoderDescriptor, RenderPassDescriptor, StoreOp},
renderer::RenderContext,
view::{ExtractedView, ViewDepthTexture, ViewTarget},
};
use tracing::error;
#[cfg(feature = "trace")]
use tracing::info_span;
use super::AlphaMask2d;
/// A [`bevy_render::render_graph::Node`] that runs the
/// [`Opaque2d`] [`ViewBinnedRenderPhases`] and [`AlphaMask2d`] [`ViewBinnedRenderPhases`]
#[derive(Default)]
pub struct MainOpaquePass2dNode;
impl ViewNode for MainOpaquePass2dNode {
type ViewQuery = (
&'static ExtractedCamera,
&'static ExtractedView,
&'static ViewTarget,
&'static ViewDepthTexture,
);
fn run<'w>(
&self,
graph: &mut RenderGraphContext,
render_context: &mut RenderContext<'w>,
(camera, view, target, depth): QueryItem<'w, '_, Self::ViewQuery>,
world: &'w World,
) -> Result<(), NodeRunError> {
let (Some(opaque_phases), Some(alpha_mask_phases)) = (
world.get_resource::<ViewBinnedRenderPhases<Opaque2d>>(),
world.get_resource::<ViewBinnedRenderPhases<AlphaMask2d>>(),
) else {
return Ok(());
};
let diagnostics = render_context.diagnostic_recorder();
let color_attachments = [Some(target.get_color_attachment())];
let depth_stencil_attachment = Some(depth.get_attachment(StoreOp::Store));
let view_entity = graph.view_entity();
let (Some(opaque_phase), Some(alpha_mask_phase)) = (
opaque_phases.get(&view.retained_view_entity),
alpha_mask_phases.get(&view.retained_view_entity),
) else {
return Ok(());
};
render_context.add_command_buffer_generation_task(move |render_device| {
#[cfg(feature = "trace")]
let _main_opaque_pass_2d_span = info_span!("main_opaque_pass_2d").entered();
// Command encoder setup
let mut command_encoder =
render_device.create_command_encoder(&CommandEncoderDescriptor {
label: Some("main_opaque_pass_2d_command_encoder"),
});
// Render pass setup
let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor {
label: Some("main_opaque_pass_2d"),
color_attachments: &color_attachments,
depth_stencil_attachment,
timestamp_writes: None,
occlusion_query_set: None,
});
let mut render_pass = TrackedRenderPass::new(&render_device, render_pass);
let pass_span = diagnostics.pass_span(&mut render_pass, "main_opaque_pass_2d");
if let Some(viewport) = camera.viewport.as_ref() {
render_pass.set_camera_viewport(viewport);
}
// Opaque draws
if !opaque_phase.is_empty() {
#[cfg(feature = "trace")]
let _opaque_main_pass_2d_span = info_span!("opaque_main_pass_2d").entered();
if let Err(err) = opaque_phase.render(&mut render_pass, world, view_entity) {
error!("Error encountered while rendering the 2d opaque phase {err:?}");
}
}
// Alpha mask draws
if !alpha_mask_phase.is_empty() {
#[cfg(feature = "trace")]
let _alpha_mask_main_pass_2d_span = info_span!("alpha_mask_main_pass_2d").entered();
if let Err(err) = alpha_mask_phase.render(&mut render_pass, world, view_entity) {
error!("Error encountered while rendering the 2d alpha mask phase {err:?}");
}
}
pass_span.end(&mut render_pass);
drop(render_pass);
command_encoder.finish()
});
Ok(())
}
}

View File

@@ -0,0 +1,120 @@
use crate::render::core_2d::Transparent2d;
use bevy_ecs::prelude::*;
use crate::render::{
camera::ExtractedCamera,
diagnostic::RecordDiagnostics,
render_graph::{NodeRunError, RenderGraphContext, ViewNode},
render_phase::{TrackedRenderPass, ViewSortedRenderPhases},
render_resource::{CommandEncoderDescriptor, RenderPassDescriptor, StoreOp},
renderer::RenderContext,
view::{ExtractedView, ViewDepthTexture, ViewTarget},
};
use tracing::error;
#[cfg(feature = "trace")]
use tracing::info_span;
#[derive(Default)]
pub struct MainTransparentPass2dNode {}
impl ViewNode for MainTransparentPass2dNode {
type ViewQuery = (
&'static ExtractedCamera,
&'static ExtractedView,
&'static ViewTarget,
&'static ViewDepthTexture,
);
fn run<'w>(
&self,
graph: &mut RenderGraphContext,
render_context: &mut RenderContext<'w>,
(camera, view, target, depth): bevy_ecs::query::QueryItem<'w, '_, Self::ViewQuery>,
world: &'w World,
) -> Result<(), NodeRunError> {
let Some(transparent_phases) =
world.get_resource::<ViewSortedRenderPhases<Transparent2d>>()
else {
return Ok(());
};
let view_entity = graph.view_entity();
let Some(transparent_phase) = transparent_phases.get(&view.retained_view_entity) else {
return Ok(());
};
let diagnostics = render_context.diagnostic_recorder();
let color_attachments = [Some(target.get_color_attachment())];
// NOTE: For the transparent pass we load the depth buffer. There should be no
// need to write to it, but store is set to `true` as a workaround for issue #3776,
// https://github.com/bevyengine/bevy/issues/3776
// so that wgpu does not clear the depth buffer.
// As the opaque and alpha mask passes run first, opaque meshes can occlude
// transparent ones.
let depth_stencil_attachment = Some(depth.get_attachment(StoreOp::Store));
render_context.add_command_buffer_generation_task(move |render_device| {
// Command encoder setup
let mut command_encoder =
render_device.create_command_encoder(&CommandEncoderDescriptor {
label: Some("main_transparent_pass_2d_command_encoder"),
});
// This needs to run at least once to clear the background color, even if there are no items to render
{
#[cfg(feature = "trace")]
let _main_pass_2d = info_span!("main_transparent_pass_2d").entered();
let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor {
label: Some("main_transparent_pass_2d"),
color_attachments: &color_attachments,
depth_stencil_attachment,
timestamp_writes: None,
occlusion_query_set: None,
});
let mut render_pass = TrackedRenderPass::new(&render_device, render_pass);
let pass_span = diagnostics.pass_span(&mut render_pass, "main_transparent_pass_2d");
if let Some(viewport) = camera.viewport.as_ref() {
render_pass.set_camera_viewport(viewport);
}
if !transparent_phase.items.is_empty() {
#[cfg(feature = "trace")]
let _transparent_main_pass_2d_span =
info_span!("transparent_main_pass_2d").entered();
if let Err(err) = transparent_phase.render(&mut render_pass, world, view_entity)
{
error!(
"Error encountered while rendering the transparent 2D phase {err:?}"
);
}
}
pass_span.end(&mut render_pass);
}
// WebGL2 quirk: if ending with a render pass with a custom viewport, the viewport isn't
// reset for the next render pass so add an empty render pass without a custom viewport
#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))]
if camera.viewport.is_some() {
#[cfg(feature = "trace")]
let _reset_viewport_pass_2d = info_span!("reset_viewport_pass_2d").entered();
let pass_descriptor = RenderPassDescriptor {
label: Some("reset_viewport_pass_2d"),
color_attachments: &[Some(target.get_color_attachment())],
depth_stencil_attachment: None,
timestamp_writes: None,
occlusion_query_set: None,
};
command_encoder.begin_render_pass(&pass_descriptor);
}
command_encoder.finish()
});
Ok(())
}
}

View File

@@ -0,0 +1,508 @@
mod main_opaque_pass_2d_node;
mod main_transparent_pass_2d_node;
pub mod graph {
use crate::render::render_graph::{RenderLabel, RenderSubGraph};
#[derive(Debug, Hash, PartialEq, Eq, Clone, RenderSubGraph)]
pub struct Core2d;
pub mod input {
pub const VIEW_ENTITY: &str = "view_entity";
}
#[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)]
pub enum Node2d {
MsaaWriteback,
StartMainPass,
MainOpaquePass,
MainTransparentPass,
EndMainPass,
Wireframe,
StartMainPassPostProcessing,
Bloom,
PostProcessing,
Tonemapping,
Fxaa,
Smaa,
Upscaling,
ContrastAdaptiveSharpening,
EndMainPassPostProcessing,
}
}
use core::ops::Range;
use bevy_asset::UntypedAssetId;
use bevy_camera::{Camera, Camera2d};
use bevy_image::ToExtents;
use bevy_platform::collections::{HashMap, HashSet};
use crate::render::{
batching::gpu_preprocessing::GpuPreprocessingMode,
camera::CameraRenderGraph,
render_phase::PhaseItemBatchSetKey,
view::{ExtractedView, RetainedViewEntity},
};
pub use main_opaque_pass_2d_node::*;
pub use main_transparent_pass_2d_node::*;
use crate::render::{
tonemapping::{DebandDither, Tonemapping, TonemappingNode},
upscaling::UpscalingNode,
};
use bevy_app::{App, Plugin};
use bevy_ecs::prelude::*;
use bevy_math::FloatOrd;
use crate::render::{
camera::ExtractedCamera,
extract_component::ExtractComponentPlugin,
render_graph::{EmptyNode, RenderGraphExt, ViewNodeRunner},
render_phase::{
sort_phase_system, BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId,
DrawFunctions, PhaseItem, PhaseItemExtraIndex, SortedPhaseItem, ViewBinnedRenderPhases,
ViewSortedRenderPhases,
},
render_resource::{
BindGroupId, CachedRenderPipelineId, TextureDescriptor, TextureDimension, TextureFormat,
TextureUsages,
},
renderer::RenderDevice,
sync_world::MainEntity,
texture::TextureCache,
view::{Msaa, ViewDepthTexture},
Extract, ExtractSchedule, Render, RenderApp, RenderSystems,
};
use self::graph::{Core2d, Node2d};
pub const CORE_2D_DEPTH_FORMAT: TextureFormat = TextureFormat::Depth32Float;
pub struct Core2dPlugin;
impl Plugin for Core2dPlugin {
fn build(&self, app: &mut App) {
app.register_required_components::<Camera2d, DebandDither>()
.register_required_components_with::<Camera2d, CameraRenderGraph>(|| {
CameraRenderGraph::new(Core2d)
})
.register_required_components_with::<Camera2d, Tonemapping>(|| Tonemapping::None)
.add_plugins(ExtractComponentPlugin::<Camera2d>::default());
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
return;
};
render_app
.init_resource::<DrawFunctions<Opaque2d>>()
.init_resource::<DrawFunctions<AlphaMask2d>>()
.init_resource::<DrawFunctions<Transparent2d>>()
.init_resource::<ViewSortedRenderPhases<Transparent2d>>()
.init_resource::<ViewBinnedRenderPhases<Opaque2d>>()
.init_resource::<ViewBinnedRenderPhases<AlphaMask2d>>()
.add_systems(ExtractSchedule, extract_core_2d_camera_phases)
.add_systems(
Render,
(
sort_phase_system::<Transparent2d>.in_set(RenderSystems::PhaseSort),
prepare_core_2d_depth_textures.in_set(RenderSystems::PrepareResources),
),
);
render_app
.add_render_sub_graph(Core2d)
.add_render_graph_node::<EmptyNode>(Core2d, Node2d::StartMainPass)
.add_render_graph_node::<ViewNodeRunner<MainOpaquePass2dNode>>(
Core2d,
Node2d::MainOpaquePass,
)
.add_render_graph_node::<ViewNodeRunner<MainTransparentPass2dNode>>(
Core2d,
Node2d::MainTransparentPass,
)
.add_render_graph_node::<EmptyNode>(Core2d, Node2d::EndMainPass)
.add_render_graph_node::<EmptyNode>(Core2d, Node2d::StartMainPassPostProcessing)
.add_render_graph_node::<ViewNodeRunner<TonemappingNode>>(Core2d, Node2d::Tonemapping)
.add_render_graph_node::<EmptyNode>(Core2d, Node2d::EndMainPassPostProcessing)
.add_render_graph_node::<ViewNodeRunner<UpscalingNode>>(Core2d, Node2d::Upscaling)
.add_render_graph_edges(
Core2d,
(
Node2d::StartMainPass,
Node2d::MainOpaquePass,
Node2d::MainTransparentPass,
Node2d::EndMainPass,
Node2d::StartMainPassPostProcessing,
Node2d::Tonemapping,
Node2d::EndMainPassPostProcessing,
Node2d::Upscaling,
),
);
}
}
/// Opaque 2D [`BinnedPhaseItem`]s.
pub struct Opaque2d {
/// Determines which objects can be placed into a *batch set*.
///
/// Objects in a single batch set can potentially be multi-drawn together,
/// if it's enabled and the current platform supports it.
pub batch_set_key: BatchSetKey2d,
/// The key, which determines which can be batched.
pub bin_key: Opaque2dBinKey,
/// An entity from which data will be fetched, including the mesh if
/// applicable.
pub representative_entity: (Entity, MainEntity),
/// The ranges of instances.
pub batch_range: Range<u32>,
/// An extra index, which is either a dynamic offset or an index in the
/// indirect parameters list.
pub extra_index: PhaseItemExtraIndex,
}
/// Data that must be identical in order to batch phase items together.
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Opaque2dBinKey {
/// The identifier of the render pipeline.
pub pipeline: CachedRenderPipelineId,
/// The function used to draw.
pub draw_function: DrawFunctionId,
/// The asset that this phase item is associated with.
///
/// Normally, this is the ID of the mesh, but for non-mesh items it might be
/// the ID of another type of asset.
pub asset_id: UntypedAssetId,
/// The ID of a bind group specific to the material.
pub material_bind_group_id: Option<BindGroupId>,
}
impl PhaseItem for Opaque2d {
#[inline]
fn entity(&self) -> Entity {
self.representative_entity.0
}
fn main_entity(&self) -> MainEntity {
self.representative_entity.1
}
#[inline]
fn draw_function(&self) -> DrawFunctionId {
self.bin_key.draw_function
}
#[inline]
fn batch_range(&self) -> &Range<u32> {
&self.batch_range
}
#[inline]
fn batch_range_mut(&mut self) -> &mut Range<u32> {
&mut self.batch_range
}
fn extra_index(&self) -> PhaseItemExtraIndex {
self.extra_index.clone()
}
fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range<u32>, &mut PhaseItemExtraIndex) {
(&mut self.batch_range, &mut self.extra_index)
}
}
impl BinnedPhaseItem for Opaque2d {
// Since 2D meshes presently can't be multidrawn, the batch set key is
// irrelevant.
type BatchSetKey = BatchSetKey2d;
type BinKey = Opaque2dBinKey;
fn new(
batch_set_key: Self::BatchSetKey,
bin_key: Self::BinKey,
representative_entity: (Entity, MainEntity),
batch_range: Range<u32>,
extra_index: PhaseItemExtraIndex,
) -> Self {
Opaque2d {
batch_set_key,
bin_key,
representative_entity,
batch_range,
extra_index,
}
}
}
/// 2D meshes aren't currently multi-drawn together, so this batch set key only
/// stores whether the mesh is indexed.
#[derive(Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)]
pub struct BatchSetKey2d {
/// True if the mesh is indexed.
pub indexed: bool,
}
impl PhaseItemBatchSetKey for BatchSetKey2d {
fn indexed(&self) -> bool {
self.indexed
}
}
impl CachedRenderPipelinePhaseItem for Opaque2d {
#[inline]
fn cached_pipeline(&self) -> CachedRenderPipelineId {
self.bin_key.pipeline
}
}
/// Alpha mask 2D [`BinnedPhaseItem`]s.
pub struct AlphaMask2d {
/// Determines which objects can be placed into a *batch set*.
///
/// Objects in a single batch set can potentially be multi-drawn together,
/// if it's enabled and the current platform supports it.
pub batch_set_key: BatchSetKey2d,
/// The key, which determines which can be batched.
pub bin_key: AlphaMask2dBinKey,
/// An entity from which data will be fetched, including the mesh if
/// applicable.
pub representative_entity: (Entity, MainEntity),
/// The ranges of instances.
pub batch_range: Range<u32>,
/// An extra index, which is either a dynamic offset or an index in the
/// indirect parameters list.
pub extra_index: PhaseItemExtraIndex,
}
/// Data that must be identical in order to batch phase items together.
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct AlphaMask2dBinKey {
/// The identifier of the render pipeline.
pub pipeline: CachedRenderPipelineId,
/// The function used to draw.
pub draw_function: DrawFunctionId,
/// The asset that this phase item is associated with.
///
/// Normally, this is the ID of the mesh, but for non-mesh items it might be
/// the ID of another type of asset.
pub asset_id: UntypedAssetId,
/// The ID of a bind group specific to the material.
pub material_bind_group_id: Option<BindGroupId>,
}
impl PhaseItem for AlphaMask2d {
#[inline]
fn entity(&self) -> Entity {
self.representative_entity.0
}
#[inline]
fn main_entity(&self) -> MainEntity {
self.representative_entity.1
}
#[inline]
fn draw_function(&self) -> DrawFunctionId {
self.bin_key.draw_function
}
#[inline]
fn batch_range(&self) -> &Range<u32> {
&self.batch_range
}
#[inline]
fn batch_range_mut(&mut self) -> &mut Range<u32> {
&mut self.batch_range
}
fn extra_index(&self) -> PhaseItemExtraIndex {
self.extra_index.clone()
}
fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range<u32>, &mut PhaseItemExtraIndex) {
(&mut self.batch_range, &mut self.extra_index)
}
}
impl BinnedPhaseItem for AlphaMask2d {
// Since 2D meshes presently can't be multidrawn, the batch set key is
// irrelevant.
type BatchSetKey = BatchSetKey2d;
type BinKey = AlphaMask2dBinKey;
fn new(
batch_set_key: Self::BatchSetKey,
bin_key: Self::BinKey,
representative_entity: (Entity, MainEntity),
batch_range: Range<u32>,
extra_index: PhaseItemExtraIndex,
) -> Self {
AlphaMask2d {
batch_set_key,
bin_key,
representative_entity,
batch_range,
extra_index,
}
}
}
impl CachedRenderPipelinePhaseItem for AlphaMask2d {
#[inline]
fn cached_pipeline(&self) -> CachedRenderPipelineId {
self.bin_key.pipeline
}
}
/// Transparent 2D [`SortedPhaseItem`]s.
pub struct Transparent2d {
pub sort_key: FloatOrd,
pub entity: (Entity, MainEntity),
pub pipeline: CachedRenderPipelineId,
pub draw_function: DrawFunctionId,
pub batch_range: Range<u32>,
pub extracted_index: usize,
pub extra_index: PhaseItemExtraIndex,
/// Whether the mesh in question is indexed (uses an index buffer in
/// addition to its vertex buffer).
pub indexed: bool,
}
impl PhaseItem for Transparent2d {
#[inline]
fn entity(&self) -> Entity {
self.entity.0
}
#[inline]
fn main_entity(&self) -> MainEntity {
self.entity.1
}
#[inline]
fn draw_function(&self) -> DrawFunctionId {
self.draw_function
}
#[inline]
fn batch_range(&self) -> &Range<u32> {
&self.batch_range
}
#[inline]
fn batch_range_mut(&mut self) -> &mut Range<u32> {
&mut self.batch_range
}
#[inline]
fn extra_index(&self) -> PhaseItemExtraIndex {
self.extra_index.clone()
}
#[inline]
fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range<u32>, &mut PhaseItemExtraIndex) {
(&mut self.batch_range, &mut self.extra_index)
}
}
impl SortedPhaseItem for Transparent2d {
type SortKey = FloatOrd;
#[inline]
fn sort_key(&self) -> Self::SortKey {
self.sort_key
}
#[inline]
fn sort(items: &mut [Self]) {
// radsort is a stable radix sort that performed better than `slice::sort_by_key` or `slice::sort_unstable_by_key`.
radsort::sort_by_key(items, |item| item.sort_key().0);
}
fn indexed(&self) -> bool {
self.indexed
}
}
impl CachedRenderPipelinePhaseItem for Transparent2d {
#[inline]
fn cached_pipeline(&self) -> CachedRenderPipelineId {
self.pipeline
}
}
pub fn extract_core_2d_camera_phases(
mut transparent_2d_phases: ResMut<ViewSortedRenderPhases<Transparent2d>>,
mut opaque_2d_phases: ResMut<ViewBinnedRenderPhases<Opaque2d>>,
mut alpha_mask_2d_phases: ResMut<ViewBinnedRenderPhases<AlphaMask2d>>,
cameras_2d: Extract<Query<(Entity, &Camera), With<Camera2d>>>,
mut live_entities: Local<HashSet<RetainedViewEntity>>,
) {
live_entities.clear();
for (main_entity, camera) in &cameras_2d {
if !camera.is_active {
continue;
}
// This is the main 2D camera, so we use the first subview index (0).
let retained_view_entity = RetainedViewEntity::new(main_entity.into(), None, 0);
transparent_2d_phases.insert_or_clear(retained_view_entity);
opaque_2d_phases.prepare_for_new_frame(retained_view_entity, GpuPreprocessingMode::None);
alpha_mask_2d_phases
.prepare_for_new_frame(retained_view_entity, GpuPreprocessingMode::None);
live_entities.insert(retained_view_entity);
}
// Clear out all dead views.
transparent_2d_phases.retain(|camera_entity, _| live_entities.contains(camera_entity));
opaque_2d_phases.retain(|camera_entity, _| live_entities.contains(camera_entity));
alpha_mask_2d_phases.retain(|camera_entity, _| live_entities.contains(camera_entity));
}
pub fn prepare_core_2d_depth_textures(
mut commands: Commands,
mut texture_cache: ResMut<TextureCache>,
render_device: Res<RenderDevice>,
transparent_2d_phases: Res<ViewSortedRenderPhases<Transparent2d>>,
opaque_2d_phases: Res<ViewBinnedRenderPhases<Opaque2d>>,
views_2d: Query<(Entity, &ExtractedCamera, &ExtractedView, &Msaa), (With<Camera2d>,)>,
) {
let mut textures = <HashMap<_, _>>::default();
for (view, camera, extracted_view, msaa) in &views_2d {
if !opaque_2d_phases.contains_key(&extracted_view.retained_view_entity)
|| !transparent_2d_phases.contains_key(&extracted_view.retained_view_entity)
{
continue;
};
let Some(physical_target_size) = camera.physical_target_size else {
continue;
};
let cached_texture = textures
.entry(camera.target.clone())
.or_insert_with(|| {
let descriptor = TextureDescriptor {
label: Some("view_depth_texture"),
// The size of the depth texture
size: physical_target_size.to_extents(),
mip_level_count: 1,
sample_count: msaa.samples(),
dimension: TextureDimension::D2,
format: CORE_2D_DEPTH_FORMAT,
usage: TextureUsages::RENDER_ATTACHMENT,
view_formats: &[],
};
texture_cache.get(&render_device, descriptor)
})
.clone();
commands
.entity(view)
.insert(ViewDepthTexture::new(cached_texture, Some(0.0)));
}
}

View File

@@ -0,0 +1,142 @@
use crate::render::{
core_3d::Opaque3d,
skybox::{SkyboxBindGroup, SkyboxPipelineId},
};
use bevy_camera::{MainPassResolutionOverride, Viewport};
use bevy_ecs::{prelude::World, query::QueryItem};
use crate::render::{
camera::ExtractedCamera,
diagnostic::RecordDiagnostics,
render_graph::{NodeRunError, RenderGraphContext, ViewNode},
render_phase::{TrackedRenderPass, ViewBinnedRenderPhases},
render_resource::{CommandEncoderDescriptor, PipelineCache, RenderPassDescriptor, StoreOp},
renderer::RenderContext,
view::{ExtractedView, ViewDepthTexture, ViewTarget, ViewUniformOffset},
};
use tracing::error;
#[cfg(feature = "trace")]
use tracing::info_span;
use super::AlphaMask3d;
/// A [`bevy_render::render_graph::Node`] that runs the [`Opaque3d`] and [`AlphaMask3d`]
/// [`ViewBinnedRenderPhases`]s.
#[derive(Default)]
pub struct MainOpaquePass3dNode;
impl ViewNode for MainOpaquePass3dNode {
type ViewQuery = (
&'static ExtractedCamera,
&'static ExtractedView,
&'static ViewTarget,
&'static ViewDepthTexture,
Option<&'static SkyboxPipelineId>,
Option<&'static SkyboxBindGroup>,
&'static ViewUniformOffset,
Option<&'static MainPassResolutionOverride>,
);
fn run<'w>(
&self,
graph: &mut RenderGraphContext,
render_context: &mut RenderContext<'w>,
(
camera,
extracted_view,
target,
depth,
skybox_pipeline,
skybox_bind_group,
view_uniform_offset,
resolution_override,
): QueryItem<'w, '_, Self::ViewQuery>,
world: &'w World,
) -> Result<(), NodeRunError> {
let (Some(opaque_phases), Some(alpha_mask_phases)) = (
world.get_resource::<ViewBinnedRenderPhases<Opaque3d>>(),
world.get_resource::<ViewBinnedRenderPhases<AlphaMask3d>>(),
) else {
return Ok(());
};
let (Some(opaque_phase), Some(alpha_mask_phase)) = (
opaque_phases.get(&extracted_view.retained_view_entity),
alpha_mask_phases.get(&extracted_view.retained_view_entity),
) else {
return Ok(());
};
let diagnostics = render_context.diagnostic_recorder();
let color_attachments = [Some(target.get_color_attachment())];
let depth_stencil_attachment = Some(depth.get_attachment(StoreOp::Store));
let view_entity = graph.view_entity();
render_context.add_command_buffer_generation_task(move |render_device| {
#[cfg(feature = "trace")]
let _main_opaque_pass_3d_span = info_span!("main_opaque_pass_3d").entered();
// Command encoder setup
let mut command_encoder =
render_device.create_command_encoder(&CommandEncoderDescriptor {
label: Some("main_opaque_pass_3d_command_encoder"),
});
// Render pass setup
let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor {
label: Some("main_opaque_pass_3d"),
color_attachments: &color_attachments,
depth_stencil_attachment,
timestamp_writes: None,
occlusion_query_set: None,
});
let mut render_pass = TrackedRenderPass::new(&render_device, render_pass);
let pass_span = diagnostics.pass_span(&mut render_pass, "main_opaque_pass_3d");
if let Some(viewport) =
Viewport::from_viewport_and_override(camera.viewport.as_ref(), resolution_override)
{
render_pass.set_camera_viewport(&viewport);
}
// Opaque draws
if !opaque_phase.is_empty() {
#[cfg(feature = "trace")]
let _opaque_main_pass_3d_span = info_span!("opaque_main_pass_3d").entered();
if let Err(err) = opaque_phase.render(&mut render_pass, world, view_entity) {
error!("Error encountered while rendering the opaque phase {err:?}");
}
}
// Alpha draws
if !alpha_mask_phase.is_empty() {
#[cfg(feature = "trace")]
let _alpha_mask_main_pass_3d_span = info_span!("alpha_mask_main_pass_3d").entered();
if let Err(err) = alpha_mask_phase.render(&mut render_pass, world, view_entity) {
error!("Error encountered while rendering the alpha mask phase {err:?}");
}
}
// Skybox draw using a fullscreen triangle
if let (Some(skybox_pipeline), Some(SkyboxBindGroup(skybox_bind_group))) =
(skybox_pipeline, skybox_bind_group)
{
let pipeline_cache = world.resource::<PipelineCache>();
if let Some(pipeline) = pipeline_cache.get_render_pipeline(skybox_pipeline.0) {
render_pass.set_render_pipeline(pipeline);
render_pass.set_bind_group(
0,
&skybox_bind_group.0,
&[view_uniform_offset.offset, skybox_bind_group.1],
);
render_pass.draw(0..3, 0..1);
}
}
pass_span.end(&mut render_pass);
drop(render_pass);
command_encoder.finish()
});
Ok(())
}
}

View File

@@ -0,0 +1,167 @@
use super::ViewTransmissionTexture;
use crate::render::core_3d::Transmissive3d;
use bevy_camera::{Camera3d, MainPassResolutionOverride, Viewport};
use bevy_ecs::{prelude::*, query::QueryItem};
use bevy_image::ToExtents;
use crate::render::{
camera::ExtractedCamera,
diagnostic::RecordDiagnostics,
render_graph::{NodeRunError, RenderGraphContext, ViewNode},
render_phase::ViewSortedRenderPhases,
render_resource::{RenderPassDescriptor, StoreOp},
renderer::RenderContext,
view::{ExtractedView, ViewDepthTexture, ViewTarget},
};
use core::ops::Range;
use tracing::error;
#[cfg(feature = "trace")]
use tracing::info_span;
/// A [`bevy_render::render_graph::Node`] that runs the [`Transmissive3d`]
/// [`ViewSortedRenderPhases`].
#[derive(Default)]
pub struct MainTransmissivePass3dNode;
impl ViewNode for MainTransmissivePass3dNode {
type ViewQuery = (
&'static ExtractedCamera,
&'static ExtractedView,
&'static Camera3d,
&'static ViewTarget,
Option<&'static ViewTransmissionTexture>,
&'static ViewDepthTexture,
Option<&'static MainPassResolutionOverride>,
);
fn run(
&self,
graph: &mut RenderGraphContext,
render_context: &mut RenderContext,
(camera, view, camera_3d, target, transmission, depth, resolution_override): QueryItem<
Self::ViewQuery,
>,
world: &World,
) -> Result<(), NodeRunError> {
let view_entity = graph.view_entity();
let Some(transmissive_phases) =
world.get_resource::<ViewSortedRenderPhases<Transmissive3d>>()
else {
return Ok(());
};
let Some(transmissive_phase) = transmissive_phases.get(&view.retained_view_entity) else {
return Ok(());
};
let diagnostics = render_context.diagnostic_recorder();
let physical_target_size = camera.physical_target_size.unwrap();
let render_pass_descriptor = RenderPassDescriptor {
label: Some("main_transmissive_pass_3d"),
color_attachments: &[Some(target.get_color_attachment())],
depth_stencil_attachment: Some(depth.get_attachment(StoreOp::Store)),
timestamp_writes: None,
occlusion_query_set: None,
};
// Run the transmissive pass, sorted back-to-front
// NOTE: Scoped to drop the mutable borrow of render_context
#[cfg(feature = "trace")]
let _main_transmissive_pass_3d_span = info_span!("main_transmissive_pass_3d").entered();
if !transmissive_phase.items.is_empty() {
let screen_space_specular_transmission_steps =
camera_3d.screen_space_specular_transmission_steps;
if screen_space_specular_transmission_steps > 0 {
let transmission =
transmission.expect("`ViewTransmissionTexture` should exist at this point");
// `transmissive_phase.items` are depth sorted, so we split them into N = `screen_space_specular_transmission_steps`
// ranges, rendering them back-to-front in multiple steps, allowing multiple levels of transparency.
//
// Note: For the sake of simplicity, we currently split items evenly among steps. In the future, we
// might want to use a more sophisticated heuristic (e.g. based on view bounds, or with an exponential
// falloff so that nearby objects have more levels of transparency available to them)
for range in split_range(
0..transmissive_phase.items.len(),
screen_space_specular_transmission_steps,
) {
// Copy the main texture to the transmission texture, allowing to use the color output of the
// previous step (or of the `Opaque3d` phase, for the first step) as a transmissive color input
render_context.command_encoder().copy_texture_to_texture(
target.main_texture().as_image_copy(),
transmission.texture.as_image_copy(),
physical_target_size.to_extents(),
);
let mut render_pass =
render_context.begin_tracked_render_pass(render_pass_descriptor.clone());
let pass_span =
diagnostics.pass_span(&mut render_pass, "main_transmissive_pass_3d");
if let Some(viewport) = camera.viewport.as_ref() {
render_pass.set_camera_viewport(viewport);
}
// render items in range
if let Err(err) =
transmissive_phase.render_range(&mut render_pass, world, view_entity, range)
{
error!("Error encountered while rendering the transmissive phase {err:?}");
}
pass_span.end(&mut render_pass);
}
} else {
let mut render_pass =
render_context.begin_tracked_render_pass(render_pass_descriptor);
let pass_span =
diagnostics.pass_span(&mut render_pass, "main_transmissive_pass_3d");
if let Some(viewport) = Viewport::from_viewport_and_override(
camera.viewport.as_ref(),
resolution_override,
) {
render_pass.set_camera_viewport(&viewport);
}
if let Err(err) = transmissive_phase.render(&mut render_pass, world, view_entity) {
error!("Error encountered while rendering the transmissive phase {err:?}");
}
pass_span.end(&mut render_pass);
}
}
Ok(())
}
}
/// Splits a [`Range`] into at most `max_num_splits` sub-ranges without overlaps
///
/// Properly takes into account remainders of inexact divisions (by adding extra
/// elements to the initial sub-ranges as needed)
fn split_range(range: Range<usize>, max_num_splits: usize) -> impl Iterator<Item = Range<usize>> {
let len = range.end - range.start;
assert!(len > 0, "to be split, a range must not be empty");
assert!(max_num_splits > 0, "max_num_splits must be at least 1");
let num_splits = max_num_splits.min(len);
let step = len / num_splits;
let mut rem = len % num_splits;
let mut start = range.start;
(0..num_splits).map(move |_| {
let extra = if rem > 0 {
rem -= 1;
1
} else {
0
};
let end = (start + step + extra).min(range.end);
let result = start..end;
start = end;
result
})
}

View File

@@ -0,0 +1,107 @@
use crate::render::core_3d::Transparent3d;
use bevy_camera::{MainPassResolutionOverride, Viewport};
use bevy_ecs::{prelude::*, query::QueryItem};
use crate::render::{
camera::ExtractedCamera,
diagnostic::RecordDiagnostics,
render_graph::{NodeRunError, RenderGraphContext, ViewNode},
render_phase::ViewSortedRenderPhases,
render_resource::{RenderPassDescriptor, StoreOp},
renderer::RenderContext,
view::{ExtractedView, ViewDepthTexture, ViewTarget},
};
use tracing::error;
#[cfg(feature = "trace")]
use tracing::info_span;
/// A [`bevy_render::render_graph::Node`] that runs the [`Transparent3d`]
/// [`ViewSortedRenderPhases`].
#[derive(Default)]
pub struct MainTransparentPass3dNode;
impl ViewNode for MainTransparentPass3dNode {
type ViewQuery = (
&'static ExtractedCamera,
&'static ExtractedView,
&'static ViewTarget,
&'static ViewDepthTexture,
Option<&'static MainPassResolutionOverride>,
);
fn run(
&self,
graph: &mut RenderGraphContext,
render_context: &mut RenderContext,
(camera, view, target, depth, resolution_override): QueryItem<Self::ViewQuery>,
world: &World,
) -> Result<(), NodeRunError> {
let view_entity = graph.view_entity();
let Some(transparent_phases) =
world.get_resource::<ViewSortedRenderPhases<Transparent3d>>()
else {
return Ok(());
};
let Some(transparent_phase) = transparent_phases.get(&view.retained_view_entity) else {
return Ok(());
};
if !transparent_phase.items.is_empty() {
// Run the transparent pass, sorted back-to-front
// NOTE: Scoped to drop the mutable borrow of render_context
#[cfg(feature = "trace")]
let _main_transparent_pass_3d_span = info_span!("main_transparent_pass_3d").entered();
let diagnostics = render_context.diagnostic_recorder();
let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor {
label: Some("main_transparent_pass_3d"),
color_attachments: &[Some(target.get_color_attachment())],
// NOTE: For the transparent pass we load the depth buffer. There should be no
// need to write to it, but store is set to `true` as a workaround for issue #3776,
// https://github.com/bevyengine/bevy/issues/3776
// so that wgpu does not clear the depth buffer.
// As the opaque and alpha mask passes run first, opaque meshes can occlude
// transparent ones.
depth_stencil_attachment: Some(depth.get_attachment(StoreOp::Store)),
timestamp_writes: None,
occlusion_query_set: None,
});
let pass_span = diagnostics.pass_span(&mut render_pass, "main_transparent_pass_3d");
if let Some(viewport) =
Viewport::from_viewport_and_override(camera.viewport.as_ref(), resolution_override)
{
render_pass.set_camera_viewport(&viewport);
}
if let Err(err) = transparent_phase.render(&mut render_pass, world, view_entity) {
error!("Error encountered while rendering the transparent phase {err:?}");
}
pass_span.end(&mut render_pass);
}
// WebGL2 quirk: if ending with a render pass with a custom viewport, the viewport isn't
// reset for the next render pass so add an empty render pass without a custom viewport
#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))]
if camera.viewport.is_some() {
#[cfg(feature = "trace")]
let _reset_viewport_pass_3d = info_span!("reset_viewport_pass_3d").entered();
let pass_descriptor = RenderPassDescriptor {
label: Some("reset_viewport_pass_3d"),
color_attachments: &[Some(target.get_color_attachment())],
depth_stencil_attachment: None,
timestamp_writes: None,
occlusion_query_set: None,
};
render_context
.command_encoder()
.begin_render_pass(&pass_descriptor);
}
Ok(())
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,18 @@
#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput
@group(0) @binding(0)
var material_id_texture: texture_2d<u32>;
struct FragmentOutput {
@builtin(frag_depth) frag_depth: f32,
}
@fragment
fn fragment(in: FullscreenVertexOutput) -> FragmentOutput {
var out: FragmentOutput;
// Depth is stored as unorm, so we are dividing the u8 by 255.0 here.
out.frag_depth = f32(textureLoad(material_id_texture, vec2<i32>(in.position.xy), 0).x) / 255.0;
return out;
}

View File

@@ -0,0 +1,193 @@
use crate::render::{
prepass::{DeferredPrepass, ViewPrepassTextures},
FullscreenShader,
};
use bevy_app::prelude::*;
use bevy_asset::{embedded_asset, load_embedded_asset, AssetServer};
use bevy_ecs::prelude::*;
use bevy_image::ToExtents;
use crate::render::{
camera::ExtractedCamera,
diagnostic::RecordDiagnostics,
render_resource::{binding_types::texture_2d, *},
renderer::RenderDevice,
texture::{CachedTexture, TextureCache},
view::ViewTarget,
Render, RenderApp, RenderStartup, RenderSystems,
};
use super::DEFERRED_LIGHTING_PASS_ID_DEPTH_FORMAT;
use bevy_ecs::query::QueryItem;
use crate::render::{
render_graph::{NodeRunError, RenderGraphContext, ViewNode},
renderer::RenderContext,
};
use bevy_utils::default;
pub struct CopyDeferredLightingIdPlugin;
impl Plugin for CopyDeferredLightingIdPlugin {
fn build(&self, app: &mut App) {
embedded_asset!(app, "copy_deferred_lighting_id.wgsl");
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
return;
};
render_app
.add_systems(RenderStartup, init_copy_deferred_lighting_id_pipeline)
.add_systems(
Render,
(prepare_deferred_lighting_id_textures.in_set(RenderSystems::PrepareResources),),
);
}
}
#[derive(Default)]
pub struct CopyDeferredLightingIdNode;
impl CopyDeferredLightingIdNode {
pub const NAME: &'static str = "copy_deferred_lighting_id";
}
impl ViewNode for CopyDeferredLightingIdNode {
type ViewQuery = (
&'static ViewTarget,
&'static ViewPrepassTextures,
&'static DeferredLightingIdDepthTexture,
);
fn run(
&self,
_graph: &mut RenderGraphContext,
render_context: &mut RenderContext,
(_view_target, view_prepass_textures, deferred_lighting_id_depth_texture): QueryItem<
Self::ViewQuery,
>,
world: &World,
) -> Result<(), NodeRunError> {
let copy_deferred_lighting_id_pipeline = world.resource::<CopyDeferredLightingIdPipeline>();
let pipeline_cache = world.resource::<PipelineCache>();
let Some(pipeline) =
pipeline_cache.get_render_pipeline(copy_deferred_lighting_id_pipeline.pipeline_id)
else {
return Ok(());
};
let Some(deferred_lighting_pass_id_texture) =
&view_prepass_textures.deferred_lighting_pass_id
else {
return Ok(());
};
let diagnostics = render_context.diagnostic_recorder();
let bind_group = render_context.render_device().create_bind_group(
"copy_deferred_lighting_id_bind_group",
&copy_deferred_lighting_id_pipeline.layout,
&BindGroupEntries::single(&deferred_lighting_pass_id_texture.texture.default_view),
);
let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor {
label: Some("copy_deferred_lighting_id"),
color_attachments: &[],
depth_stencil_attachment: Some(RenderPassDepthStencilAttachment {
view: &deferred_lighting_id_depth_texture.texture.default_view,
depth_ops: Some(Operations {
load: LoadOp::Clear(0.0),
store: StoreOp::Store,
}),
stencil_ops: None,
}),
timestamp_writes: None,
occlusion_query_set: None,
});
let pass_span = diagnostics.pass_span(&mut render_pass, "copy_deferred_lighting_id");
render_pass.set_render_pipeline(pipeline);
render_pass.set_bind_group(0, &bind_group, &[]);
render_pass.draw(0..3, 0..1);
pass_span.end(&mut render_pass);
Ok(())
}
}
#[derive(Resource)]
struct CopyDeferredLightingIdPipeline {
layout: BindGroupLayout,
pipeline_id: CachedRenderPipelineId,
}
pub fn init_copy_deferred_lighting_id_pipeline(
mut commands: Commands,
render_device: Res<RenderDevice>,
fullscreen_shader: Res<FullscreenShader>,
asset_server: Res<AssetServer>,
pipeline_cache: Res<PipelineCache>,
) {
let layout = render_device.create_bind_group_layout(
"copy_deferred_lighting_id_bind_group_layout",
&BindGroupLayoutEntries::single(
ShaderStages::FRAGMENT,
texture_2d(TextureSampleType::Uint),
),
);
let vertex_state = fullscreen_shader.to_vertex_state();
let shader = load_embedded_asset!(asset_server.as_ref(), "copy_deferred_lighting_id.wgsl");
let pipeline_id = pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor {
label: Some("copy_deferred_lighting_id_pipeline".into()),
layout: vec![layout.clone()],
vertex: vertex_state,
fragment: Some(FragmentState {
shader,
..default()
}),
depth_stencil: Some(DepthStencilState {
format: DEFERRED_LIGHTING_PASS_ID_DEPTH_FORMAT,
depth_write_enabled: true,
depth_compare: CompareFunction::Always,
stencil: StencilState::default(),
bias: DepthBiasState::default(),
}),
..default()
});
commands.insert_resource(CopyDeferredLightingIdPipeline {
layout,
pipeline_id,
});
}
#[derive(Component)]
pub struct DeferredLightingIdDepthTexture {
pub texture: CachedTexture,
}
fn prepare_deferred_lighting_id_textures(
mut commands: Commands,
mut texture_cache: ResMut<TextureCache>,
render_device: Res<RenderDevice>,
views: Query<(Entity, &ExtractedCamera), With<DeferredPrepass>>,
) {
for (entity, camera) in &views {
if let Some(physical_target_size) = camera.physical_target_size {
let texture_descriptor = TextureDescriptor {
label: Some("deferred_lighting_id_depth_texture_a"),
size: physical_target_size.to_extents(),
mip_level_count: 1,
sample_count: 1,
dimension: TextureDimension::D2,
format: DEFERRED_LIGHTING_PASS_ID_DEPTH_FORMAT,
usage: TextureUsages::RENDER_ATTACHMENT | TextureUsages::COPY_SRC,
view_formats: &[],
};
let texture = texture_cache.get(&render_device, texture_descriptor);
commands
.entity(entity)
.insert(DeferredLightingIdDepthTexture { texture });
}
}
}

View File

@@ -0,0 +1,186 @@
pub mod copy_lighting_id;
pub mod node;
use core::ops::Range;
use crate::render::prepass::{OpaqueNoLightmap3dBatchSetKey, OpaqueNoLightmap3dBinKey};
use bevy_ecs::prelude::*;
use crate::render::sync_world::MainEntity;
use crate::render::{
render_phase::{
BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItem,
PhaseItemExtraIndex,
},
render_resource::{CachedRenderPipelineId, TextureFormat},
};
pub const DEFERRED_PREPASS_FORMAT: TextureFormat = TextureFormat::Rgba32Uint;
pub const DEFERRED_LIGHTING_PASS_ID_FORMAT: TextureFormat = TextureFormat::R8Uint;
pub const DEFERRED_LIGHTING_PASS_ID_DEPTH_FORMAT: TextureFormat = TextureFormat::Depth16Unorm;
/// Opaque phase of the 3D Deferred pass.
///
/// Sorted by pipeline, then by mesh to improve batching.
///
/// Used to render all 3D meshes with materials that have no transparency.
#[derive(PartialEq, Eq, Hash)]
pub struct Opaque3dDeferred {
/// Determines which objects can be placed into a *batch set*.
///
/// Objects in a single batch set can potentially be multi-drawn together,
/// if it's enabled and the current platform supports it.
pub batch_set_key: OpaqueNoLightmap3dBatchSetKey,
/// Information that separates items into bins.
pub bin_key: OpaqueNoLightmap3dBinKey,
pub representative_entity: (Entity, MainEntity),
pub batch_range: Range<u32>,
pub extra_index: PhaseItemExtraIndex,
}
impl PhaseItem for Opaque3dDeferred {
#[inline]
fn entity(&self) -> Entity {
self.representative_entity.0
}
fn main_entity(&self) -> MainEntity {
self.representative_entity.1
}
#[inline]
fn draw_function(&self) -> DrawFunctionId {
self.batch_set_key.draw_function
}
#[inline]
fn batch_range(&self) -> &Range<u32> {
&self.batch_range
}
#[inline]
fn batch_range_mut(&mut self) -> &mut Range<u32> {
&mut self.batch_range
}
#[inline]
fn extra_index(&self) -> PhaseItemExtraIndex {
self.extra_index.clone()
}
#[inline]
fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range<u32>, &mut PhaseItemExtraIndex) {
(&mut self.batch_range, &mut self.extra_index)
}
}
impl BinnedPhaseItem for Opaque3dDeferred {
type BatchSetKey = OpaqueNoLightmap3dBatchSetKey;
type BinKey = OpaqueNoLightmap3dBinKey;
#[inline]
fn new(
batch_set_key: Self::BatchSetKey,
bin_key: Self::BinKey,
representative_entity: (Entity, MainEntity),
batch_range: Range<u32>,
extra_index: PhaseItemExtraIndex,
) -> Self {
Self {
batch_set_key,
bin_key,
representative_entity,
batch_range,
extra_index,
}
}
}
impl CachedRenderPipelinePhaseItem for Opaque3dDeferred {
#[inline]
fn cached_pipeline(&self) -> CachedRenderPipelineId {
self.batch_set_key.pipeline
}
}
/// Alpha mask phase of the 3D Deferred pass.
///
/// Sorted by pipeline, then by mesh to improve batching.
///
/// Used to render all meshes with a material with an alpha mask.
pub struct AlphaMask3dDeferred {
/// Determines which objects can be placed into a *batch set*.
///
/// Objects in a single batch set can potentially be multi-drawn together,
/// if it's enabled and the current platform supports it.
pub batch_set_key: OpaqueNoLightmap3dBatchSetKey,
/// Information that separates items into bins.
pub bin_key: OpaqueNoLightmap3dBinKey,
pub representative_entity: (Entity, MainEntity),
pub batch_range: Range<u32>,
pub extra_index: PhaseItemExtraIndex,
}
impl PhaseItem for AlphaMask3dDeferred {
#[inline]
fn entity(&self) -> Entity {
self.representative_entity.0
}
#[inline]
fn main_entity(&self) -> MainEntity {
self.representative_entity.1
}
#[inline]
fn draw_function(&self) -> DrawFunctionId {
self.batch_set_key.draw_function
}
#[inline]
fn batch_range(&self) -> &Range<u32> {
&self.batch_range
}
#[inline]
fn batch_range_mut(&mut self) -> &mut Range<u32> {
&mut self.batch_range
}
#[inline]
fn extra_index(&self) -> PhaseItemExtraIndex {
self.extra_index.clone()
}
#[inline]
fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range<u32>, &mut PhaseItemExtraIndex) {
(&mut self.batch_range, &mut self.extra_index)
}
}
impl BinnedPhaseItem for AlphaMask3dDeferred {
type BatchSetKey = OpaqueNoLightmap3dBatchSetKey;
type BinKey = OpaqueNoLightmap3dBinKey;
fn new(
batch_set_key: Self::BatchSetKey,
bin_key: Self::BinKey,
representative_entity: (Entity, MainEntity),
batch_range: Range<u32>,
extra_index: PhaseItemExtraIndex,
) -> Self {
Self {
batch_set_key,
bin_key,
representative_entity,
batch_range,
extra_index,
}
}
}
impl CachedRenderPipelinePhaseItem for AlphaMask3dDeferred {
#[inline]
fn cached_pipeline(&self) -> CachedRenderPipelineId {
self.batch_set_key.pipeline
}
}

View File

@@ -0,0 +1,273 @@
use bevy_camera::{MainPassResolutionOverride, Viewport};
use bevy_ecs::{prelude::*, query::QueryItem};
use crate::render::experimental::occlusion_culling::OcclusionCulling;
use crate::render::render_graph::ViewNode;
use crate::render::view::{ExtractedView, NoIndirectDrawing};
use crate::render::{
camera::ExtractedCamera,
diagnostic::RecordDiagnostics,
render_graph::{NodeRunError, RenderGraphContext},
render_phase::{TrackedRenderPass, ViewBinnedRenderPhases},
render_resource::{CommandEncoderDescriptor, RenderPassDescriptor, StoreOp},
renderer::RenderContext,
view::ViewDepthTexture,
};
use tracing::error;
#[cfg(feature = "trace")]
use tracing::info_span;
use crate::render::prepass::ViewPrepassTextures;
use super::{AlphaMask3dDeferred, Opaque3dDeferred};
/// The phase of the deferred prepass that draws meshes that were visible last
/// frame.
///
/// If occlusion culling isn't in use, this prepass simply draws all meshes.
///
/// Like all prepass nodes, this is inserted before the main pass in the render
/// graph.
#[derive(Default)]
pub struct EarlyDeferredGBufferPrepassNode;
impl ViewNode for EarlyDeferredGBufferPrepassNode {
type ViewQuery = <LateDeferredGBufferPrepassNode as ViewNode>::ViewQuery;
fn run<'w>(
&self,
graph: &mut RenderGraphContext,
render_context: &mut RenderContext<'w>,
view_query: QueryItem<'w, '_, Self::ViewQuery>,
world: &'w World,
) -> Result<(), NodeRunError> {
run_deferred_prepass(
graph,
render_context,
view_query,
false,
world,
"early deferred prepass",
)
}
}
/// The phase of the prepass that runs after occlusion culling against the
/// meshes that were visible last frame.
///
/// If occlusion culling isn't in use, this is a no-op.
///
/// Like all prepass nodes, this is inserted before the main pass in the render
/// graph.
#[derive(Default)]
pub struct LateDeferredGBufferPrepassNode;
impl ViewNode for LateDeferredGBufferPrepassNode {
type ViewQuery = (
&'static ExtractedCamera,
&'static ExtractedView,
&'static ViewDepthTexture,
&'static ViewPrepassTextures,
Option<&'static MainPassResolutionOverride>,
Has<OcclusionCulling>,
Has<NoIndirectDrawing>,
);
fn run<'w>(
&self,
graph: &mut RenderGraphContext,
render_context: &mut RenderContext<'w>,
view_query: QueryItem<'w, '_, Self::ViewQuery>,
world: &'w World,
) -> Result<(), NodeRunError> {
let (.., occlusion_culling, no_indirect_drawing) = view_query;
if !occlusion_culling || no_indirect_drawing {
return Ok(());
}
run_deferred_prepass(
graph,
render_context,
view_query,
true,
world,
"late deferred prepass",
)
}
}
/// Runs the deferred prepass that draws all meshes to the depth buffer and
/// G-buffers.
///
/// If occlusion culling isn't in use, and a prepass is enabled, then there's
/// only one prepass. If occlusion culling is in use, then any prepass is split
/// into two: an *early* prepass and a *late* prepass. The early prepass draws
/// what was visible last frame, and the last prepass performs occlusion culling
/// against a conservative hierarchical Z buffer before drawing unoccluded
/// meshes.
fn run_deferred_prepass<'w>(
graph: &mut RenderGraphContext,
render_context: &mut RenderContext<'w>,
(camera, extracted_view, view_depth_texture, view_prepass_textures, resolution_override, _, _): QueryItem<
'w,
'_,
<LateDeferredGBufferPrepassNode as ViewNode>::ViewQuery,
>,
is_late: bool,
world: &'w World,
label: &'static str,
) -> Result<(), NodeRunError> {
let (Some(opaque_deferred_phases), Some(alpha_mask_deferred_phases)) = (
world.get_resource::<ViewBinnedRenderPhases<Opaque3dDeferred>>(),
world.get_resource::<ViewBinnedRenderPhases<AlphaMask3dDeferred>>(),
) else {
return Ok(());
};
let (Some(opaque_deferred_phase), Some(alpha_mask_deferred_phase)) = (
opaque_deferred_phases.get(&extracted_view.retained_view_entity),
alpha_mask_deferred_phases.get(&extracted_view.retained_view_entity),
) else {
return Ok(());
};
let diagnostic = render_context.diagnostic_recorder();
let mut color_attachments = vec![];
color_attachments.push(
view_prepass_textures
.normal
.as_ref()
.map(|normals_texture| normals_texture.get_attachment()),
);
color_attachments.push(
view_prepass_textures
.motion_vectors
.as_ref()
.map(|motion_vectors_texture| motion_vectors_texture.get_attachment()),
);
// If we clear the deferred texture with LoadOp::Clear(Default::default()) we get these errors:
// Chrome: GL_INVALID_OPERATION: No defined conversion between clear value and attachment format.
// Firefox: WebGL warning: clearBufferu?[fi]v: This attachment is of type FLOAT, but this function is of type UINT.
// Appears to be unsupported: https://registry.khronos.org/webgl/specs/latest/2.0/#3.7.9
// For webgl2 we fallback to manually clearing
#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))]
if !is_late {
if let Some(deferred_texture) = &view_prepass_textures.deferred {
render_context.command_encoder().clear_texture(
&deferred_texture.texture.texture,
&bevy_render::render_resource::ImageSubresourceRange::default(),
);
}
}
color_attachments.push(
view_prepass_textures
.deferred
.as_ref()
.map(|deferred_texture| {
if is_late {
deferred_texture.get_attachment()
} else {
#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))]
{
bevy_render::render_resource::RenderPassColorAttachment {
view: &deferred_texture.texture.default_view,
resolve_target: None,
ops: bevy_render::render_resource::Operations {
load: bevy_render::render_resource::LoadOp::Load,
store: StoreOp::Store,
},
depth_slice: None,
}
}
#[cfg(any(
not(feature = "webgl"),
not(target_arch = "wasm32"),
feature = "webgpu"
))]
deferred_texture.get_attachment()
}
}),
);
color_attachments.push(
view_prepass_textures
.deferred_lighting_pass_id
.as_ref()
.map(|deferred_lighting_pass_id| deferred_lighting_pass_id.get_attachment()),
);
// If all color attachments are none: clear the color attachment list so that no fragment shader is required
if color_attachments.iter().all(Option::is_none) {
color_attachments.clear();
}
let depth_stencil_attachment = Some(view_depth_texture.get_attachment(StoreOp::Store));
let view_entity = graph.view_entity();
render_context.add_command_buffer_generation_task(move |render_device| {
#[cfg(feature = "trace")]
let _deferred_span = info_span!("deferred_prepass").entered();
// Command encoder setup
let mut command_encoder = render_device.create_command_encoder(&CommandEncoderDescriptor {
label: Some("deferred_prepass_command_encoder"),
});
// Render pass setup
let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor {
label: Some(label),
color_attachments: &color_attachments,
depth_stencil_attachment,
timestamp_writes: None,
occlusion_query_set: None,
});
let mut render_pass = TrackedRenderPass::new(&render_device, render_pass);
let pass_span = diagnostic.pass_span(&mut render_pass, label);
if let Some(viewport) =
Viewport::from_viewport_and_override(camera.viewport.as_ref(), resolution_override)
{
render_pass.set_camera_viewport(&viewport);
}
// Opaque draws
if !opaque_deferred_phase.multidrawable_meshes.is_empty()
|| !opaque_deferred_phase.batchable_meshes.is_empty()
|| !opaque_deferred_phase.unbatchable_meshes.is_empty()
{
#[cfg(feature = "trace")]
let _opaque_prepass_span = info_span!("opaque_deferred_prepass").entered();
if let Err(err) = opaque_deferred_phase.render(&mut render_pass, world, view_entity) {
error!("Error encountered while rendering the opaque deferred phase {err:?}");
}
}
// Alpha masked draws
if !alpha_mask_deferred_phase.is_empty() {
#[cfg(feature = "trace")]
let _alpha_mask_deferred_span = info_span!("alpha_mask_deferred_prepass").entered();
if let Err(err) = alpha_mask_deferred_phase.render(&mut render_pass, world, view_entity)
{
error!("Error encountered while rendering the alpha mask deferred phase {err:?}");
}
}
pass_span.end(&mut render_pass);
drop(render_pass);
// After rendering to the view depth texture, copy it to the prepass depth texture
if let Some(prepass_depth_texture) = &view_prepass_textures.depth {
command_encoder.copy_texture_to_texture(
view_depth_texture.texture.as_image_copy(),
prepass_depth_texture.texture.texture.as_image_copy(),
view_prepass_textures.size,
);
}
command_encoder.finish()
});
Ok(())
}

View File

@@ -0,0 +1,709 @@
use std::{borrow::Cow, sync::Arc};
use core::{
ops::{DerefMut, Range},
sync::atomic::{AtomicBool, Ordering},
};
use std::thread::{self, ThreadId};
use bevy_diagnostic::{Diagnostic, DiagnosticMeasurement, DiagnosticPath, DiagnosticsStore};
use bevy_ecs::resource::Resource;
use bevy_ecs::system::{Res, ResMut};
use bevy_platform::time::Instant;
use std::sync::Mutex;
use wgpu::{
Buffer, BufferDescriptor, BufferUsages, CommandEncoder, ComputePass, Features, MapMode,
PipelineStatisticsTypes, QuerySet, QuerySetDescriptor, QueryType, RenderPass,
};
use crate::render::renderer::{RenderAdapterInfo, RenderDevice, RenderQueue, WgpuWrapper};
use super::RecordDiagnostics;
// buffer offset must be divisible by 256, so this constant must be divisible by 32 (=256/8)
const MAX_TIMESTAMP_QUERIES: u32 = 256;
const MAX_PIPELINE_STATISTICS: u32 = 128;
const TIMESTAMP_SIZE: u64 = 8;
const PIPELINE_STATISTICS_SIZE: u64 = 40;
struct DiagnosticsRecorderInternal {
timestamp_period_ns: f32,
features: Features,
current_frame: Mutex<FrameData>,
submitted_frames: Vec<FrameData>,
finished_frames: Vec<FrameData>,
#[cfg(feature = "tracing-tracy")]
tracy_gpu_context: tracy_client::GpuContext,
}
/// Records diagnostics into [`QuerySet`]'s keeping track of the mapping between
/// spans and indices to the corresponding entries in the [`QuerySet`].
#[derive(Resource)]
pub struct DiagnosticsRecorder(WgpuWrapper<DiagnosticsRecorderInternal>);
impl DiagnosticsRecorder {
/// Creates the new `DiagnosticsRecorder`.
pub fn new(
adapter_info: &RenderAdapterInfo,
device: &RenderDevice,
queue: &RenderQueue,
) -> DiagnosticsRecorder {
let features = device.features();
#[cfg(feature = "tracing-tracy")]
let tracy_gpu_context =
super::tracy_gpu::new_tracy_gpu_context(adapter_info, device, queue);
let _ = adapter_info; // Prevent unused variable warnings when tracing-tracy is not enabled
DiagnosticsRecorder(WgpuWrapper::new(DiagnosticsRecorderInternal {
timestamp_period_ns: queue.get_timestamp_period(),
features,
current_frame: Mutex::new(FrameData::new(
device,
features,
#[cfg(feature = "tracing-tracy")]
tracy_gpu_context.clone(),
)),
submitted_frames: Vec::new(),
finished_frames: Vec::new(),
#[cfg(feature = "tracing-tracy")]
tracy_gpu_context,
}))
}
fn current_frame_mut(&mut self) -> &mut FrameData {
self.0.current_frame.get_mut().expect("lock poisoned")
}
fn current_frame_lock(&self) -> impl DerefMut<Target = FrameData> + '_ {
self.0.current_frame.lock().expect("lock poisoned")
}
/// Begins recording diagnostics for a new frame.
pub fn begin_frame(&mut self) {
let internal = &mut self.0;
let mut idx = 0;
while idx < internal.submitted_frames.len() {
let timestamp = internal.timestamp_period_ns;
if internal.submitted_frames[idx].run_mapped_callback(timestamp) {
let removed = internal.submitted_frames.swap_remove(idx);
internal.finished_frames.push(removed);
} else {
idx += 1;
}
}
self.current_frame_mut().begin();
}
/// Copies data from [`QuerySet`]'s to a [`Buffer`], after which it can be downloaded to CPU.
///
/// Should be called before [`DiagnosticsRecorder::finish_frame`].
pub fn resolve(&mut self, encoder: &mut CommandEncoder) {
self.current_frame_mut().resolve(encoder);
}
/// Finishes recording diagnostics for the current frame.
///
/// The specified `callback` will be invoked when diagnostics become available.
///
/// Should be called after [`DiagnosticsRecorder::resolve`],
/// and **after** all commands buffers have been queued.
pub fn finish_frame(
&mut self,
device: &RenderDevice,
callback: impl FnOnce(RenderDiagnostics) + Send + Sync + 'static,
) {
#[cfg(feature = "tracing-tracy")]
let tracy_gpu_context = self.0.tracy_gpu_context.clone();
let internal = &mut self.0;
internal
.current_frame
.get_mut()
.expect("lock poisoned")
.finish(callback);
// reuse one of the finished frames, if we can
let new_frame = match internal.finished_frames.pop() {
Some(frame) => frame,
None => FrameData::new(
device,
internal.features,
#[cfg(feature = "tracing-tracy")]
tracy_gpu_context,
),
};
let old_frame = core::mem::replace(
internal.current_frame.get_mut().expect("lock poisoned"),
new_frame,
);
internal.submitted_frames.push(old_frame);
}
}
impl RecordDiagnostics for DiagnosticsRecorder {
fn begin_time_span<E: WriteTimestamp>(&self, encoder: &mut E, span_name: Cow<'static, str>) {
self.current_frame_lock()
.begin_time_span(encoder, span_name);
}
fn end_time_span<E: WriteTimestamp>(&self, encoder: &mut E) {
self.current_frame_lock().end_time_span(encoder);
}
fn begin_pass_span<P: Pass>(&self, pass: &mut P, span_name: Cow<'static, str>) {
self.current_frame_lock().begin_pass(pass, span_name);
}
fn end_pass_span<P: Pass>(&self, pass: &mut P) {
self.current_frame_lock().end_pass(pass);
}
}
struct SpanRecord {
thread_id: ThreadId,
path_range: Range<usize>,
pass_kind: Option<PassKind>,
begin_timestamp_index: Option<u32>,
end_timestamp_index: Option<u32>,
begin_instant: Option<Instant>,
end_instant: Option<Instant>,
pipeline_statistics_index: Option<u32>,
}
struct FrameData {
timestamps_query_set: Option<QuerySet>,
num_timestamps: u32,
supports_timestamps_inside_passes: bool,
supports_timestamps_inside_encoders: bool,
pipeline_statistics_query_set: Option<QuerySet>,
num_pipeline_statistics: u32,
buffer_size: u64,
pipeline_statistics_buffer_offset: u64,
resolve_buffer: Option<Buffer>,
read_buffer: Option<Buffer>,
path_components: Vec<Cow<'static, str>>,
open_spans: Vec<SpanRecord>,
closed_spans: Vec<SpanRecord>,
is_mapped: Arc<AtomicBool>,
callback: Option<Box<dyn FnOnce(RenderDiagnostics) + Send + Sync + 'static>>,
#[cfg(feature = "tracing-tracy")]
tracy_gpu_context: tracy_client::GpuContext,
}
impl FrameData {
fn new(
device: &RenderDevice,
features: Features,
#[cfg(feature = "tracing-tracy")] tracy_gpu_context: tracy_client::GpuContext,
) -> FrameData {
let wgpu_device = device.wgpu_device();
let mut buffer_size = 0;
let timestamps_query_set = if features.contains(Features::TIMESTAMP_QUERY) {
buffer_size += u64::from(MAX_TIMESTAMP_QUERIES) * TIMESTAMP_SIZE;
Some(wgpu_device.create_query_set(&QuerySetDescriptor {
label: Some("timestamps_query_set"),
ty: QueryType::Timestamp,
count: MAX_TIMESTAMP_QUERIES,
}))
} else {
None
};
let pipeline_statistics_buffer_offset = buffer_size;
let pipeline_statistics_query_set =
if features.contains(Features::PIPELINE_STATISTICS_QUERY) {
buffer_size += u64::from(MAX_PIPELINE_STATISTICS) * PIPELINE_STATISTICS_SIZE;
Some(wgpu_device.create_query_set(&QuerySetDescriptor {
label: Some("pipeline_statistics_query_set"),
ty: QueryType::PipelineStatistics(PipelineStatisticsTypes::all()),
count: MAX_PIPELINE_STATISTICS,
}))
} else {
None
};
let (resolve_buffer, read_buffer) = if buffer_size > 0 {
let resolve_buffer = wgpu_device.create_buffer(&BufferDescriptor {
label: Some("render_statistics_resolve_buffer"),
size: buffer_size,
usage: BufferUsages::QUERY_RESOLVE | BufferUsages::COPY_SRC,
mapped_at_creation: false,
});
let read_buffer = wgpu_device.create_buffer(&BufferDescriptor {
label: Some("render_statistics_read_buffer"),
size: buffer_size,
usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
mapped_at_creation: false,
});
(Some(resolve_buffer), Some(read_buffer))
} else {
(None, None)
};
FrameData {
timestamps_query_set,
num_timestamps: 0,
supports_timestamps_inside_passes: features
.contains(Features::TIMESTAMP_QUERY_INSIDE_PASSES),
supports_timestamps_inside_encoders: features
.contains(Features::TIMESTAMP_QUERY_INSIDE_ENCODERS),
pipeline_statistics_query_set,
num_pipeline_statistics: 0,
buffer_size,
pipeline_statistics_buffer_offset,
resolve_buffer,
read_buffer,
path_components: Vec::new(),
open_spans: Vec::new(),
closed_spans: Vec::new(),
is_mapped: Arc::new(AtomicBool::new(false)),
callback: None,
#[cfg(feature = "tracing-tracy")]
tracy_gpu_context,
}
}
fn begin(&mut self) {
self.num_timestamps = 0;
self.num_pipeline_statistics = 0;
self.path_components.clear();
self.open_spans.clear();
self.closed_spans.clear();
}
fn write_timestamp(
&mut self,
encoder: &mut impl WriteTimestamp,
is_inside_pass: bool,
) -> Option<u32> {
// `encoder.write_timestamp` is unsupported on WebGPU.
if !self.supports_timestamps_inside_encoders {
return None;
}
if is_inside_pass && !self.supports_timestamps_inside_passes {
return None;
}
if self.num_timestamps >= MAX_TIMESTAMP_QUERIES {
return None;
}
let set = self.timestamps_query_set.as_ref()?;
let index = self.num_timestamps;
encoder.write_timestamp(set, index);
self.num_timestamps += 1;
Some(index)
}
fn write_pipeline_statistics(
&mut self,
encoder: &mut impl WritePipelineStatistics,
) -> Option<u32> {
if self.num_pipeline_statistics >= MAX_PIPELINE_STATISTICS {
return None;
}
let set = self.pipeline_statistics_query_set.as_ref()?;
let index = self.num_pipeline_statistics;
encoder.begin_pipeline_statistics_query(set, index);
self.num_pipeline_statistics += 1;
Some(index)
}
fn open_span(
&mut self,
pass_kind: Option<PassKind>,
name: Cow<'static, str>,
) -> &mut SpanRecord {
let thread_id = thread::current().id();
let parent = self
.open_spans
.iter()
.filter(|v| v.thread_id == thread_id)
.next_back();
let path_range = match &parent {
Some(parent) if parent.path_range.end == self.path_components.len() => {
parent.path_range.start..parent.path_range.end + 1
}
Some(parent) => {
self.path_components
.extend_from_within(parent.path_range.clone());
self.path_components.len() - parent.path_range.len()..self.path_components.len() + 1
}
None => self.path_components.len()..self.path_components.len() + 1,
};
self.path_components.push(name);
self.open_spans.push(SpanRecord {
thread_id,
path_range,
pass_kind,
begin_timestamp_index: None,
end_timestamp_index: None,
begin_instant: None,
end_instant: None,
pipeline_statistics_index: None,
});
self.open_spans.last_mut().unwrap()
}
fn close_span(&mut self) -> &mut SpanRecord {
let thread_id = thread::current().id();
let iter = self.open_spans.iter();
let (index, _) = iter
.enumerate()
.filter(|(_, v)| v.thread_id == thread_id)
.next_back()
.unwrap();
let span = self.open_spans.swap_remove(index);
self.closed_spans.push(span);
self.closed_spans.last_mut().unwrap()
}
fn begin_time_span(&mut self, encoder: &mut impl WriteTimestamp, name: Cow<'static, str>) {
let begin_instant = Instant::now();
let begin_timestamp_index = self.write_timestamp(encoder, false);
let span = self.open_span(None, name);
span.begin_instant = Some(begin_instant);
span.begin_timestamp_index = begin_timestamp_index;
}
fn end_time_span(&mut self, encoder: &mut impl WriteTimestamp) {
let end_timestamp_index = self.write_timestamp(encoder, false);
let span = self.close_span();
span.end_timestamp_index = end_timestamp_index;
span.end_instant = Some(Instant::now());
}
fn begin_pass<P: Pass>(&mut self, pass: &mut P, name: Cow<'static, str>) {
let begin_instant = Instant::now();
let begin_timestamp_index = self.write_timestamp(pass, true);
let pipeline_statistics_index = self.write_pipeline_statistics(pass);
let span = self.open_span(Some(P::KIND), name);
span.begin_instant = Some(begin_instant);
span.begin_timestamp_index = begin_timestamp_index;
span.pipeline_statistics_index = pipeline_statistics_index;
}
fn end_pass(&mut self, pass: &mut impl Pass) {
let end_timestamp_index = self.write_timestamp(pass, true);
let span = self.close_span();
span.end_timestamp_index = end_timestamp_index;
if span.pipeline_statistics_index.is_some() {
pass.end_pipeline_statistics_query();
}
span.end_instant = Some(Instant::now());
}
fn resolve(&mut self, encoder: &mut CommandEncoder) {
let Some(resolve_buffer) = &self.resolve_buffer else {
return;
};
match &self.timestamps_query_set {
Some(set) if self.num_timestamps > 0 => {
encoder.resolve_query_set(set, 0..self.num_timestamps, resolve_buffer, 0);
}
_ => {}
}
match &self.pipeline_statistics_query_set {
Some(set) if self.num_pipeline_statistics > 0 => {
encoder.resolve_query_set(
set,
0..self.num_pipeline_statistics,
resolve_buffer,
self.pipeline_statistics_buffer_offset,
);
}
_ => {}
}
let Some(read_buffer) = &self.read_buffer else {
return;
};
encoder.copy_buffer_to_buffer(resolve_buffer, 0, read_buffer, 0, self.buffer_size);
}
fn diagnostic_path(&self, range: &Range<usize>, field: &str) -> DiagnosticPath {
DiagnosticPath::from_components(
core::iter::once("render")
.chain(self.path_components[range.clone()].iter().map(|v| &**v))
.chain(core::iter::once(field)),
)
}
fn finish(&mut self, callback: impl FnOnce(RenderDiagnostics) + Send + Sync + 'static) {
let Some(read_buffer) = &self.read_buffer else {
// we still have cpu timings, so let's use them
let mut diagnostics = Vec::new();
for span in &self.closed_spans {
if let (Some(begin), Some(end)) = (span.begin_instant, span.end_instant) {
diagnostics.push(RenderDiagnostic {
path: self.diagnostic_path(&span.path_range, "elapsed_cpu"),
suffix: "ms",
value: (end - begin).as_secs_f64() * 1000.0,
});
}
}
callback(RenderDiagnostics(diagnostics));
return;
};
self.callback = Some(Box::new(callback));
let is_mapped = self.is_mapped.clone();
read_buffer.slice(..).map_async(MapMode::Read, move |res| {
if let Err(e) = res {
tracing::warn!("Failed to download render statistics buffer: {e}");
return;
}
is_mapped.store(true, Ordering::Release);
});
}
// returns true if the frame is considered finished, false otherwise
fn run_mapped_callback(&mut self, timestamp_period_ns: f32) -> bool {
let Some(read_buffer) = &self.read_buffer else {
return true;
};
if !self.is_mapped.load(Ordering::Acquire) {
// need to wait more
return false;
}
let Some(callback) = self.callback.take() else {
return true;
};
let data = read_buffer.slice(..).get_mapped_range();
let timestamps = data[..(self.num_timestamps * 8) as usize]
.chunks(8)
.map(|v| u64::from_le_bytes(v.try_into().unwrap()))
.collect::<Vec<u64>>();
let start = self.pipeline_statistics_buffer_offset as usize;
let len = (self.num_pipeline_statistics as usize) * 40;
let pipeline_statistics = data[start..start + len]
.chunks(8)
.map(|v| u64::from_le_bytes(v.try_into().unwrap()))
.collect::<Vec<u64>>();
let mut diagnostics = Vec::new();
for span in &self.closed_spans {
if let (Some(begin), Some(end)) = (span.begin_instant, span.end_instant) {
diagnostics.push(RenderDiagnostic {
path: self.diagnostic_path(&span.path_range, "elapsed_cpu"),
suffix: "ms",
value: (end - begin).as_secs_f64() * 1000.0,
});
}
if let (Some(begin), Some(end)) = (span.begin_timestamp_index, span.end_timestamp_index)
{
let begin = timestamps[begin as usize] as f64;
let end = timestamps[end as usize] as f64;
let value = (end - begin) * (timestamp_period_ns as f64) / 1e6;
#[cfg(feature = "tracing-tracy")]
{
// Calling span_alloc() and end_zone() here instead of in open_span() and close_span() means that tracy does not know where each GPU command was recorded on the CPU timeline.
// Unfortunately we must do it this way, because tracy does not play nicely with multithreaded command recording. The start/end pairs would get all mixed up.
// The GPU spans themselves are still accurate though, and it's probably safe to assume that each GPU span in frame N belongs to the corresponding CPU render node span from frame N-1.
let name = &self.path_components[span.path_range.clone()].join("/");
let mut tracy_gpu_span =
self.tracy_gpu_context.span_alloc(name, "", "", 0).unwrap();
tracy_gpu_span.end_zone();
tracy_gpu_span.upload_timestamp_start(begin as i64);
tracy_gpu_span.upload_timestamp_end(end as i64);
}
diagnostics.push(RenderDiagnostic {
path: self.diagnostic_path(&span.path_range, "elapsed_gpu"),
suffix: "ms",
value,
});
}
if let Some(index) = span.pipeline_statistics_index {
let index = (index as usize) * 5;
if span.pass_kind == Some(PassKind::Render) {
diagnostics.push(RenderDiagnostic {
path: self.diagnostic_path(&span.path_range, "vertex_shader_invocations"),
suffix: "",
value: pipeline_statistics[index] as f64,
});
diagnostics.push(RenderDiagnostic {
path: self.diagnostic_path(&span.path_range, "clipper_invocations"),
suffix: "",
value: pipeline_statistics[index + 1] as f64,
});
diagnostics.push(RenderDiagnostic {
path: self.diagnostic_path(&span.path_range, "clipper_primitives_out"),
suffix: "",
value: pipeline_statistics[index + 2] as f64,
});
diagnostics.push(RenderDiagnostic {
path: self.diagnostic_path(&span.path_range, "fragment_shader_invocations"),
suffix: "",
value: pipeline_statistics[index + 3] as f64,
});
}
if span.pass_kind == Some(PassKind::Compute) {
diagnostics.push(RenderDiagnostic {
path: self.diagnostic_path(&span.path_range, "compute_shader_invocations"),
suffix: "",
value: pipeline_statistics[index + 4] as f64,
});
}
}
}
callback(RenderDiagnostics(diagnostics));
drop(data);
read_buffer.unmap();
self.is_mapped.store(false, Ordering::Release);
true
}
}
/// Resource which stores render diagnostics of the most recent frame.
#[derive(Debug, Default, Clone, Resource)]
pub struct RenderDiagnostics(Vec<RenderDiagnostic>);
/// A render diagnostic which has been recorded, but not yet stored in [`DiagnosticsStore`].
#[derive(Debug, Clone, Resource)]
pub struct RenderDiagnostic {
pub path: DiagnosticPath,
pub suffix: &'static str,
pub value: f64,
}
/// Stores render diagnostics before they can be synced with the main app.
///
/// This mutex is locked twice per frame:
/// 1. in `PreUpdate`, during [`sync_diagnostics`],
/// 2. after rendering has finished and statistics have been downloaded from GPU.
#[derive(Debug, Default, Clone, Resource)]
pub struct RenderDiagnosticsMutex(pub(crate) Arc<Mutex<Option<RenderDiagnostics>>>);
/// Updates render diagnostics measurements.
pub fn sync_diagnostics(mutex: Res<RenderDiagnosticsMutex>, mut store: ResMut<DiagnosticsStore>) {
let Some(diagnostics) = mutex.0.lock().ok().and_then(|mut v| v.take()) else {
return;
};
let time = Instant::now();
for diagnostic in &diagnostics.0 {
if store.get(&diagnostic.path).is_none() {
store.add(Diagnostic::new(diagnostic.path.clone()).with_suffix(diagnostic.suffix));
}
store
.get_mut(&diagnostic.path)
.unwrap()
.add_measurement(DiagnosticMeasurement {
time,
value: diagnostic.value,
});
}
}
pub trait WriteTimestamp {
fn write_timestamp(&mut self, query_set: &QuerySet, index: u32);
}
impl WriteTimestamp for CommandEncoder {
fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) {
CommandEncoder::write_timestamp(self, query_set, index);
}
}
impl WriteTimestamp for RenderPass<'_> {
fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) {
RenderPass::write_timestamp(self, query_set, index);
}
}
impl WriteTimestamp for ComputePass<'_> {
fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) {
ComputePass::write_timestamp(self, query_set, index);
}
}
pub trait WritePipelineStatistics {
fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32);
fn end_pipeline_statistics_query(&mut self);
}
impl WritePipelineStatistics for RenderPass<'_> {
fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32) {
RenderPass::begin_pipeline_statistics_query(self, query_set, index);
}
fn end_pipeline_statistics_query(&mut self) {
RenderPass::end_pipeline_statistics_query(self);
}
}
impl WritePipelineStatistics for ComputePass<'_> {
fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32) {
ComputePass::begin_pipeline_statistics_query(self, query_set, index);
}
fn end_pipeline_statistics_query(&mut self) {
ComputePass::end_pipeline_statistics_query(self);
}
}
pub trait Pass: WritePipelineStatistics + WriteTimestamp {
const KIND: PassKind;
}
impl Pass for RenderPass<'_> {
const KIND: PassKind = PassKind::Render;
}
impl Pass for ComputePass<'_> {
const KIND: PassKind = PassKind::Compute;
}
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub enum PassKind {
Render,
Compute,
}

View File

@@ -0,0 +1,188 @@
//! Infrastructure for recording render diagnostics.
//!
//! For more info, see [`RenderDiagnosticsPlugin`].
pub(crate) mod internal;
#[cfg(feature = "tracing-tracy")]
mod tracy_gpu;
use std::{borrow::Cow, sync::Arc};
use core::marker::PhantomData;
use bevy_app::{App, Plugin, PreUpdate};
use crate::render::{renderer::RenderAdapterInfo, RenderApp};
use self::internal::{
sync_diagnostics, DiagnosticsRecorder, Pass, RenderDiagnosticsMutex, WriteTimestamp,
};
use crate::render::renderer::{RenderDevice, RenderQueue};
/// Enables collecting render diagnostics, such as CPU/GPU elapsed time per render pass,
/// as well as pipeline statistics (number of primitives, number of shader invocations, etc).
///
/// To access the diagnostics, you can use the [`DiagnosticsStore`](bevy_diagnostic::DiagnosticsStore) resource,
/// add [`LogDiagnosticsPlugin`](bevy_diagnostic::LogDiagnosticsPlugin), or use [Tracy](https://github.com/bevyengine/bevy/blob/main/docs/profiling.md#tracy-renderqueue).
///
/// To record diagnostics in your own passes:
/// 1. First, obtain the diagnostic recorder using [`RenderContext::diagnostic_recorder`](crate::renderer::RenderContext::diagnostic_recorder).
///
/// It won't do anything unless [`RenderDiagnosticsPlugin`] is present,
/// so you're free to omit `#[cfg]` clauses.
/// ```ignore
/// let diagnostics = render_context.diagnostic_recorder();
/// ```
/// 2. Begin the span inside a command encoder, or a render/compute pass encoder.
/// ```ignore
/// let time_span = diagnostics.time_span(render_context.command_encoder(), "shadows");
/// ```
/// 3. End the span, providing the same encoder.
/// ```ignore
/// time_span.end(render_context.command_encoder());
/// ```
///
/// # Supported platforms
/// Timestamp queries and pipeline statistics are currently supported only on Vulkan and DX12.
/// On other platforms (Metal, WebGPU, WebGL2) only CPU time will be recorded.
#[derive(Default)]
pub struct RenderDiagnosticsPlugin;
impl Plugin for RenderDiagnosticsPlugin {
fn build(&self, app: &mut App) {
let render_diagnostics_mutex = RenderDiagnosticsMutex::default();
app.insert_resource(render_diagnostics_mutex.clone())
.add_systems(PreUpdate, sync_diagnostics);
if let Some(render_app) = app.get_sub_app_mut(RenderApp) {
render_app.insert_resource(render_diagnostics_mutex);
}
}
fn finish(&self, app: &mut App) {
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
return;
};
let adapter_info = render_app.world().resource::<RenderAdapterInfo>();
let device = render_app.world().resource::<RenderDevice>();
let queue = render_app.world().resource::<RenderQueue>();
render_app.insert_resource(DiagnosticsRecorder::new(adapter_info, device, queue));
}
}
/// Allows recording diagnostic spans.
pub trait RecordDiagnostics: Send + Sync {
/// Begin a time span, which will record elapsed CPU and GPU time.
///
/// Returns a guard, which will panic on drop unless you end the span.
fn time_span<E, N>(&self, encoder: &mut E, name: N) -> TimeSpanGuard<'_, Self, E>
where
E: WriteTimestamp,
N: Into<Cow<'static, str>>,
{
self.begin_time_span(encoder, name.into());
TimeSpanGuard {
recorder: self,
marker: PhantomData,
}
}
/// Begin a pass span, which will record elapsed CPU and GPU time,
/// as well as pipeline statistics on supported platforms.
///
/// Returns a guard, which will panic on drop unless you end the span.
fn pass_span<P, N>(&self, pass: &mut P, name: N) -> PassSpanGuard<'_, Self, P>
where
P: Pass,
N: Into<Cow<'static, str>>,
{
self.begin_pass_span(pass, name.into());
PassSpanGuard {
recorder: self,
marker: PhantomData,
}
}
#[doc(hidden)]
fn begin_time_span<E: WriteTimestamp>(&self, encoder: &mut E, name: Cow<'static, str>);
#[doc(hidden)]
fn end_time_span<E: WriteTimestamp>(&self, encoder: &mut E);
#[doc(hidden)]
fn begin_pass_span<P: Pass>(&self, pass: &mut P, name: Cow<'static, str>);
#[doc(hidden)]
fn end_pass_span<P: Pass>(&self, pass: &mut P);
}
/// Guard returned by [`RecordDiagnostics::time_span`].
///
/// Will panic on drop unless [`TimeSpanGuard::end`] is called.
pub struct TimeSpanGuard<'a, R: ?Sized, E> {
recorder: &'a R,
marker: PhantomData<E>,
}
impl<R: RecordDiagnostics + ?Sized, E: WriteTimestamp> TimeSpanGuard<'_, R, E> {
/// End the span. You have to provide the same encoder which was used to begin the span.
pub fn end(self, encoder: &mut E) {
self.recorder.end_time_span(encoder);
core::mem::forget(self);
}
}
impl<R: ?Sized, E> Drop for TimeSpanGuard<'_, R, E> {
fn drop(&mut self) {
panic!("TimeSpanScope::end was never called")
}
}
/// Guard returned by [`RecordDiagnostics::pass_span`].
///
/// Will panic on drop unless [`PassSpanGuard::end`] is called.
pub struct PassSpanGuard<'a, R: ?Sized, P> {
recorder: &'a R,
marker: PhantomData<P>,
}
impl<R: RecordDiagnostics + ?Sized, P: Pass> PassSpanGuard<'_, R, P> {
/// End the span. You have to provide the same pass which was used to begin the span.
pub fn end(self, pass: &mut P) {
self.recorder.end_pass_span(pass);
core::mem::forget(self);
}
}
impl<R: ?Sized, P> Drop for PassSpanGuard<'_, R, P> {
fn drop(&mut self) {
panic!("PassSpanScope::end was never called")
}
}
impl<T: RecordDiagnostics> RecordDiagnostics for Option<Arc<T>> {
fn begin_time_span<E: WriteTimestamp>(&self, encoder: &mut E, name: Cow<'static, str>) {
if let Some(recorder) = &self {
recorder.begin_time_span(encoder, name);
}
}
fn end_time_span<E: WriteTimestamp>(&self, encoder: &mut E) {
if let Some(recorder) = &self {
recorder.end_time_span(encoder);
}
}
fn begin_pass_span<P: Pass>(&self, pass: &mut P, name: Cow<'static, str>) {
if let Some(recorder) = &self {
recorder.begin_pass_span(pass, name);
}
}
fn end_pass_span<P: Pass>(&self, pass: &mut P) {
if let Some(recorder) = &self {
recorder.end_pass_span(pass);
}
}
}

View File

@@ -0,0 +1,69 @@
use crate::render::renderer::{RenderAdapterInfo, RenderDevice, RenderQueue};
use tracy_client::{Client, GpuContext, GpuContextType};
use wgpu::{
Backend, BufferDescriptor, BufferUsages, CommandEncoderDescriptor, MapMode, PollType,
QuerySetDescriptor, QueryType, QUERY_SIZE,
};
pub fn new_tracy_gpu_context(
adapter_info: &RenderAdapterInfo,
device: &RenderDevice,
queue: &RenderQueue,
) -> GpuContext {
let tracy_gpu_backend = match adapter_info.backend {
Backend::Vulkan => GpuContextType::Vulkan,
Backend::Dx12 => GpuContextType::Direct3D12,
Backend::Gl => GpuContextType::OpenGL,
Backend::Metal | Backend::BrowserWebGpu | Backend::Noop => GpuContextType::Invalid,
};
let tracy_client = Client::running().unwrap();
tracy_client
.new_gpu_context(
Some("RenderQueue"),
tracy_gpu_backend,
initial_timestamp(device, queue),
queue.get_timestamp_period(),
)
.unwrap()
}
// Code copied from https://github.com/Wumpf/wgpu-profiler/blob/f9de342a62cb75f50904a98d11dd2bbeb40ceab8/src/tracy.rs
fn initial_timestamp(device: &RenderDevice, queue: &RenderQueue) -> i64 {
let query_set = device.wgpu_device().create_query_set(&QuerySetDescriptor {
label: None,
ty: QueryType::Timestamp,
count: 1,
});
let resolve_buffer = device.create_buffer(&BufferDescriptor {
label: None,
size: QUERY_SIZE as _,
usage: BufferUsages::QUERY_RESOLVE | BufferUsages::COPY_SRC,
mapped_at_creation: false,
});
let map_buffer = device.create_buffer(&BufferDescriptor {
label: None,
size: QUERY_SIZE as _,
usage: BufferUsages::MAP_READ | BufferUsages::COPY_DST,
mapped_at_creation: false,
});
let mut timestamp_encoder = device.create_command_encoder(&CommandEncoderDescriptor::default());
timestamp_encoder.write_timestamp(&query_set, 0);
timestamp_encoder.resolve_query_set(&query_set, 0..1, &resolve_buffer, 0);
// Workaround for https://github.com/gfx-rs/wgpu/issues/6406
// TODO when that bug is fixed, merge these encoders together again
let mut copy_encoder = device.create_command_encoder(&CommandEncoderDescriptor::default());
copy_encoder.copy_buffer_to_buffer(&resolve_buffer, 0, &map_buffer, 0, Some(QUERY_SIZE as _));
queue.submit([timestamp_encoder.finish(), copy_encoder.finish()]);
map_buffer.slice(..).map_async(MapMode::Read, |_| ());
device
.poll(PollType::Wait)
.expect("Failed to poll device for map async");
let view = map_buffer.slice(..).get_mapped_range();
i64::from_le_bytes((*view).try_into().unwrap())
}

View File

@@ -0,0 +1,431 @@
use crate::render::{
render_resource::AsBindGroupError, ExtractSchedule, MainWorld, Render, RenderApp,
RenderSystems,
};
use bevy_app::{App, Plugin, SubApp};
use bevy_asset::RenderAssetUsages;
use bevy_asset::{Asset, AssetEvent, AssetId, Assets, UntypedAssetId};
use bevy_ecs::{
prelude::{Commands, IntoScheduleConfigs, MessageReader, Res, ResMut, Resource},
schedule::{ScheduleConfigs, SystemSet},
system::{ScheduleSystem, StaticSystemParam, SystemParam, SystemParamItem, SystemState},
world::{FromWorld, Mut},
};
use bevy_platform::collections::{HashMap, HashSet};
use crate::render::render_asset::RenderAssetBytesPerFrameLimiter;
use core::marker::PhantomData;
use thiserror::Error;
use tracing::{debug, error};
#[derive(Debug, Error)]
pub enum PrepareAssetError<E: Send + Sync + 'static> {
#[error("Failed to prepare asset")]
RetryNextUpdate(E),
#[error("Failed to build bind group: {0}")]
AsBindGroupError(AsBindGroupError),
}
/// The system set during which we extract modified assets to the render world.
#[derive(SystemSet, Clone, PartialEq, Eq, Debug, Hash)]
pub struct AssetExtractionSystems;
/// Deprecated alias for [`AssetExtractionSystems`].
#[deprecated(since = "0.17.0", note = "Renamed to `AssetExtractionSystems`.")]
pub type ExtractAssetsSet = AssetExtractionSystems;
/// Describes how an asset gets extracted and prepared for rendering.
///
/// In the [`ExtractSchedule`] step the [`ErasedRenderAsset::SourceAsset`] is transferred
/// from the "main world" into the "render world".
///
/// After that in the [`RenderSystems::PrepareAssets`] step the extracted asset
/// is transformed into its GPU-representation of type [`ErasedRenderAsset`].
pub trait ErasedRenderAsset: Send + Sync + 'static {
/// The representation of the asset in the "main world".
type SourceAsset: Asset + Clone;
/// The target representation of the asset in the "render world".
type ErasedAsset: Send + Sync + 'static + Sized;
/// Specifies all ECS data required by [`ErasedRenderAsset::prepare_asset`].
///
/// For convenience use the [`lifetimeless`](bevy_ecs::system::lifetimeless) [`SystemParam`].
type Param: SystemParam;
/// Whether or not to unload the asset after extracting it to the render world.
#[inline]
fn asset_usage(_source_asset: &Self::SourceAsset) -> RenderAssetUsages {
RenderAssetUsages::default()
}
/// Size of the data the asset will upload to the gpu. Specifying a return value
/// will allow the asset to be throttled via [`RenderAssetBytesPerFrameLimiter`].
#[inline]
#[expect(
unused_variables,
reason = "The parameters here are intentionally unused by the default implementation; however, putting underscores here will result in the underscores being copied by rust-analyzer's tab completion."
)]
fn byte_len(erased_asset: &Self::SourceAsset) -> Option<usize> {
None
}
/// Prepares the [`ErasedRenderAsset::SourceAsset`] for the GPU by transforming it into a [`ErasedRenderAsset`].
///
/// ECS data may be accessed via `param`.
fn prepare_asset(
source_asset: Self::SourceAsset,
asset_id: AssetId<Self::SourceAsset>,
param: &mut SystemParamItem<Self::Param>,
) -> Result<Self::ErasedAsset, PrepareAssetError<Self::SourceAsset>>;
/// Called whenever the [`ErasedRenderAsset::SourceAsset`] has been removed.
///
/// You can implement this method if you need to access ECS data (via
/// `_param`) in order to perform cleanup tasks when the asset is removed.
///
/// The default implementation does nothing.
fn unload_asset(
_source_asset: AssetId<Self::SourceAsset>,
_param: &mut SystemParamItem<Self::Param>,
) {
}
}
/// This plugin extracts the changed assets from the "app world" into the "render world"
/// and prepares them for the GPU. They can then be accessed from the [`ErasedRenderAssets`] resource.
///
/// Therefore it sets up the [`ExtractSchedule`] and
/// [`RenderSystems::PrepareAssets`] steps for the specified [`ErasedRenderAsset`].
///
/// The `AFTER` generic parameter can be used to specify that `A::prepare_asset` should not be run until
/// `prepare_assets::<AFTER>` has completed. This allows the `prepare_asset` function to depend on another
/// prepared [`ErasedRenderAsset`], for example `Mesh::prepare_asset` relies on `ErasedRenderAssets::<GpuImage>` for morph
/// targets, so the plugin is created as `ErasedRenderAssetPlugin::<RenderMesh, GpuImage>::default()`.
pub struct ErasedRenderAssetPlugin<
A: ErasedRenderAsset,
AFTER: ErasedRenderAssetDependency + 'static = (),
> {
phantom: PhantomData<fn() -> (A, AFTER)>,
}
impl<A: ErasedRenderAsset, AFTER: ErasedRenderAssetDependency + 'static> Default
for ErasedRenderAssetPlugin<A, AFTER>
{
fn default() -> Self {
Self {
phantom: Default::default(),
}
}
}
impl<A: ErasedRenderAsset, AFTER: ErasedRenderAssetDependency + 'static> Plugin
for ErasedRenderAssetPlugin<A, AFTER>
{
fn build(&self, app: &mut App) {
app.init_resource::<CachedExtractErasedRenderAssetSystemState<A>>();
}
fn finish(&self, app: &mut App) {
if let Some(render_app) = app.get_sub_app_mut(RenderApp) {
render_app
.init_resource::<ExtractedAssets<A>>()
.init_resource::<ErasedRenderAssets<A::ErasedAsset>>()
.init_resource::<PrepareNextFrameAssets<A>>()
.add_systems(
ExtractSchedule,
extract_erased_render_asset::<A>.in_set(AssetExtractionSystems),
);
AFTER::register_system(
render_app,
prepare_erased_assets::<A>.in_set(RenderSystems::PrepareAssets),
);
}
}
}
// helper to allow specifying dependencies between render assets
pub trait ErasedRenderAssetDependency {
fn register_system(render_app: &mut SubApp, system: ScheduleConfigs<ScheduleSystem>);
}
impl ErasedRenderAssetDependency for () {
fn register_system(render_app: &mut SubApp, system: ScheduleConfigs<ScheduleSystem>) {
render_app.add_systems(Render, system);
}
}
impl<A: ErasedRenderAsset> ErasedRenderAssetDependency for A {
fn register_system(render_app: &mut SubApp, system: ScheduleConfigs<ScheduleSystem>) {
render_app.add_systems(Render, system.after(prepare_erased_assets::<A>));
}
}
/// Temporarily stores the extracted and removed assets of the current frame.
#[derive(Resource)]
pub struct ExtractedAssets<A: ErasedRenderAsset> {
/// The assets extracted this frame.
///
/// These are assets that were either added or modified this frame.
pub extracted: Vec<(AssetId<A::SourceAsset>, A::SourceAsset)>,
/// IDs of the assets that were removed this frame.
///
/// These assets will not be present in [`ExtractedAssets::extracted`].
pub removed: HashSet<AssetId<A::SourceAsset>>,
/// IDs of the assets that were modified this frame.
pub modified: HashSet<AssetId<A::SourceAsset>>,
/// IDs of the assets that were added this frame.
pub added: HashSet<AssetId<A::SourceAsset>>,
}
impl<A: ErasedRenderAsset> Default for ExtractedAssets<A> {
fn default() -> Self {
Self {
extracted: Default::default(),
removed: Default::default(),
modified: Default::default(),
added: Default::default(),
}
}
}
/// Stores all GPU representations ([`ErasedRenderAsset`])
/// of [`ErasedRenderAsset::SourceAsset`] as long as they exist.
#[derive(Resource)]
pub struct ErasedRenderAssets<ERA>(HashMap<UntypedAssetId, ERA>);
impl<ERA> Default for ErasedRenderAssets<ERA> {
fn default() -> Self {
Self(Default::default())
}
}
impl<ERA> ErasedRenderAssets<ERA> {
pub fn get(&self, id: impl Into<UntypedAssetId>) -> Option<&ERA> {
self.0.get(&id.into())
}
pub fn get_mut(&mut self, id: impl Into<UntypedAssetId>) -> Option<&mut ERA> {
self.0.get_mut(&id.into())
}
pub fn insert(&mut self, id: impl Into<UntypedAssetId>, value: ERA) -> Option<ERA> {
self.0.insert(id.into(), value)
}
pub fn remove(&mut self, id: impl Into<UntypedAssetId>) -> Option<ERA> {
self.0.remove(&id.into())
}
pub fn iter(&self) -> impl Iterator<Item = (UntypedAssetId, &ERA)> {
self.0.iter().map(|(k, v)| (*k, v))
}
pub fn iter_mut(&mut self) -> impl Iterator<Item = (UntypedAssetId, &mut ERA)> {
self.0.iter_mut().map(|(k, v)| (*k, v))
}
}
#[derive(Resource)]
struct CachedExtractErasedRenderAssetSystemState<A: ErasedRenderAsset> {
state: SystemState<(
MessageReader<'static, 'static, AssetEvent<A::SourceAsset>>,
ResMut<'static, Assets<A::SourceAsset>>,
)>,
}
impl<A: ErasedRenderAsset> FromWorld for CachedExtractErasedRenderAssetSystemState<A> {
fn from_world(world: &mut bevy_ecs::world::World) -> Self {
Self {
state: SystemState::new(world),
}
}
}
/// This system extracts all created or modified assets of the corresponding [`ErasedRenderAsset::SourceAsset`] type
/// into the "render world".
pub(crate) fn extract_erased_render_asset<A: ErasedRenderAsset>(
mut commands: Commands,
mut main_world: ResMut<MainWorld>,
) {
main_world.resource_scope(
|world, mut cached_state: Mut<CachedExtractErasedRenderAssetSystemState<A>>| {
let (mut events, mut assets) = cached_state.state.get_mut(world);
let mut needs_extracting = <HashSet<_>>::default();
let mut removed = <HashSet<_>>::default();
let mut modified = <HashSet<_>>::default();
for event in events.read() {
#[expect(
clippy::match_same_arms,
reason = "LoadedWithDependencies is marked as a TODO, so it's likely this will no longer lint soon."
)]
match event {
AssetEvent::Added { id } => {
needs_extracting.insert(*id);
}
AssetEvent::Modified { id } => {
needs_extracting.insert(*id);
modified.insert(*id);
}
AssetEvent::Removed { .. } => {
// We don't care that the asset was removed from Assets<T> in the main world.
// An asset is only removed from ErasedRenderAssets<T> when its last handle is dropped (AssetEvent::Unused).
}
AssetEvent::Unused { id } => {
needs_extracting.remove(id);
modified.remove(id);
removed.insert(*id);
}
AssetEvent::LoadedWithDependencies { .. } => {
// TODO: handle this
}
}
}
let mut extracted_assets = Vec::new();
let mut added = <HashSet<_>>::default();
for id in needs_extracting.drain() {
if let Some(asset) = assets.get(id) {
let asset_usage = A::asset_usage(asset);
if asset_usage.contains(RenderAssetUsages::RENDER_WORLD) {
if asset_usage == RenderAssetUsages::RENDER_WORLD {
if let Some(asset) = assets.remove(id) {
extracted_assets.push((id, asset));
added.insert(id);
}
} else {
extracted_assets.push((id, asset.clone()));
added.insert(id);
}
}
}
}
commands.insert_resource(ExtractedAssets::<A> {
extracted: extracted_assets,
removed,
modified,
added,
});
cached_state.state.apply(world);
},
);
}
// TODO: consider storing inside system?
/// All assets that should be prepared next frame.
#[derive(Resource)]
pub struct PrepareNextFrameAssets<A: ErasedRenderAsset> {
assets: Vec<(AssetId<A::SourceAsset>, A::SourceAsset)>,
}
impl<A: ErasedRenderAsset> Default for PrepareNextFrameAssets<A> {
fn default() -> Self {
Self {
assets: Default::default(),
}
}
}
/// This system prepares all assets of the corresponding [`ErasedRenderAsset::SourceAsset`] type
/// which where extracted this frame for the GPU.
pub fn prepare_erased_assets<A: ErasedRenderAsset>(
mut extracted_assets: ResMut<ExtractedAssets<A>>,
mut render_assets: ResMut<ErasedRenderAssets<A::ErasedAsset>>,
mut prepare_next_frame: ResMut<PrepareNextFrameAssets<A>>,
param: StaticSystemParam<<A as ErasedRenderAsset>::Param>,
bpf: Res<RenderAssetBytesPerFrameLimiter>,
) {
let mut wrote_asset_count = 0;
let mut param = param.into_inner();
let queued_assets = core::mem::take(&mut prepare_next_frame.assets);
for (id, extracted_asset) in queued_assets {
if extracted_assets.removed.contains(&id) || extracted_assets.added.contains(&id) {
// skip previous frame's assets that have been removed or updated
continue;
}
let write_bytes = if let Some(size) = A::byte_len(&extracted_asset) {
// we could check if available bytes > byte_len here, but we want to make some
// forward progress even if the asset is larger than the max bytes per frame.
// this way we always write at least one (sized) asset per frame.
// in future we could also consider partial asset uploads.
if bpf.exhausted() {
prepare_next_frame.assets.push((id, extracted_asset));
continue;
}
size
} else {
0
};
match A::prepare_asset(extracted_asset, id, &mut param) {
Ok(prepared_asset) => {
render_assets.insert(id, prepared_asset);
bpf.write_bytes(write_bytes);
wrote_asset_count += 1;
}
Err(PrepareAssetError::RetryNextUpdate(extracted_asset)) => {
prepare_next_frame.assets.push((id, extracted_asset));
}
Err(PrepareAssetError::AsBindGroupError(e)) => {
error!(
"{} Bind group construction failed: {e}",
core::any::type_name::<A>()
);
}
}
}
for removed in extracted_assets.removed.drain() {
render_assets.remove(removed);
A::unload_asset(removed, &mut param);
}
for (id, extracted_asset) in extracted_assets.extracted.drain(..) {
// we remove previous here to ensure that if we are updating the asset then
// any users will not see the old asset after a new asset is extracted,
// even if the new asset is not yet ready or we are out of bytes to write.
render_assets.remove(id);
let write_bytes = if let Some(size) = A::byte_len(&extracted_asset) {
if bpf.exhausted() {
prepare_next_frame.assets.push((id, extracted_asset));
continue;
}
size
} else {
0
};
match A::prepare_asset(extracted_asset, id, &mut param) {
Ok(prepared_asset) => {
render_assets.insert(id, prepared_asset);
bpf.write_bytes(write_bytes);
wrote_asset_count += 1;
}
Err(PrepareAssetError::RetryNextUpdate(extracted_asset)) => {
prepare_next_frame.assets.push((id, extracted_asset));
}
Err(PrepareAssetError::AsBindGroupError(e)) => {
error!(
"{} Bind group construction failed: {e}",
core::any::type_name::<A>()
);
}
}
}
if bpf.exhausted() && !prepare_next_frame.assets.is_empty() {
debug!(
"{} write budget exhausted with {} assets remaining (wrote {})",
core::any::type_name::<A>(),
prepare_next_frame.assets.len(),
wrote_asset_count
);
}
}

View File

@@ -0,0 +1,338 @@
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
@group(0) @binding(0) var mip_0: texture_storage_2d<r64uint, read>;
#else
#ifdef MESHLET
@group(0) @binding(0) var mip_0: texture_storage_2d<r32uint, read>;
#else // MESHLET
#ifdef MULTISAMPLE
@group(0) @binding(0) var mip_0: texture_depth_multisampled_2d;
#else // MULTISAMPLE
@group(0) @binding(0) var mip_0: texture_depth_2d;
#endif // MULTISAMPLE
#endif // MESHLET
#endif // MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
@group(0) @binding(1) var mip_1: texture_storage_2d<r32float, write>;
@group(0) @binding(2) var mip_2: texture_storage_2d<r32float, write>;
@group(0) @binding(3) var mip_3: texture_storage_2d<r32float, write>;
@group(0) @binding(4) var mip_4: texture_storage_2d<r32float, write>;
@group(0) @binding(5) var mip_5: texture_storage_2d<r32float, write>;
@group(0) @binding(6) var mip_6: texture_storage_2d<r32float, read_write>;
@group(0) @binding(7) var mip_7: texture_storage_2d<r32float, write>;
@group(0) @binding(8) var mip_8: texture_storage_2d<r32float, write>;
@group(0) @binding(9) var mip_9: texture_storage_2d<r32float, write>;
@group(0) @binding(10) var mip_10: texture_storage_2d<r32float, write>;
@group(0) @binding(11) var mip_11: texture_storage_2d<r32float, write>;
@group(0) @binding(12) var mip_12: texture_storage_2d<r32float, write>;
@group(0) @binding(13) var samplr: sampler;
struct Constants { max_mip_level: u32 }
var<push_constant> constants: Constants;
/// Generates a hierarchical depth buffer.
/// Based on FidelityFX SPD v2.1 https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK/blob/d7531ae47d8b36a5d4025663e731a47a38be882f/sdk/include/FidelityFX/gpu/spd/ffx_spd.h#L528
// TODO:
// * Subgroup support
// * True single pass downsampling
var<workgroup> intermediate_memory: array<array<f32, 16>, 16>;
@compute
@workgroup_size(256, 1, 1)
fn downsample_depth_first(
@builtin(workgroup_id) workgroup_id: vec3u,
@builtin(local_invocation_index) local_invocation_index: u32,
) {
let sub_xy = remap_for_wave_reduction(local_invocation_index % 64u);
let x = sub_xy.x + 8u * ((local_invocation_index >> 6u) % 2u);
let y = sub_xy.y + 8u * (local_invocation_index >> 7u);
downsample_mips_0_and_1(x, y, workgroup_id.xy, local_invocation_index);
downsample_mips_2_to_5(x, y, workgroup_id.xy, local_invocation_index);
}
@compute
@workgroup_size(256, 1, 1)
fn downsample_depth_second(@builtin(local_invocation_index) local_invocation_index: u32) {
let sub_xy = remap_for_wave_reduction(local_invocation_index % 64u);
let x = sub_xy.x + 8u * ((local_invocation_index >> 6u) % 2u);
let y = sub_xy.y + 8u * (local_invocation_index >> 7u);
downsample_mips_6_and_7(x, y);
downsample_mips_8_to_11(x, y, local_invocation_index);
}
fn downsample_mips_0_and_1(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) {
var v: vec4f;
var tex = vec2(workgroup_id * 64u) + vec2(x * 2u, y * 2u);
var pix = vec2(workgroup_id * 32u) + vec2(x, y);
v[0] = reduce_load_mip_0(tex);
textureStore(mip_1, pix, vec4(v[0]));
tex = vec2(workgroup_id * 64u) + vec2(x * 2u + 32u, y * 2u);
pix = vec2(workgroup_id * 32u) + vec2(x + 16u, y);
v[1] = reduce_load_mip_0(tex);
textureStore(mip_1, pix, vec4(v[1]));
tex = vec2(workgroup_id * 64u) + vec2(x * 2u, y * 2u + 32u);
pix = vec2(workgroup_id * 32u) + vec2(x, y + 16u);
v[2] = reduce_load_mip_0(tex);
textureStore(mip_1, pix, vec4(v[2]));
tex = vec2(workgroup_id * 64u) + vec2(x * 2u + 32u, y * 2u + 32u);
pix = vec2(workgroup_id * 32u) + vec2(x + 16u, y + 16u);
v[3] = reduce_load_mip_0(tex);
textureStore(mip_1, pix, vec4(v[3]));
if constants.max_mip_level <= 1u { return; }
for (var i = 0u; i < 4u; i++) {
intermediate_memory[x][y] = v[i];
workgroupBarrier();
if local_invocation_index < 64u {
v[i] = reduce_4(vec4(
intermediate_memory[x * 2u + 0u][y * 2u + 0u],
intermediate_memory[x * 2u + 1u][y * 2u + 0u],
intermediate_memory[x * 2u + 0u][y * 2u + 1u],
intermediate_memory[x * 2u + 1u][y * 2u + 1u],
));
pix = (workgroup_id * 16u) + vec2(
x + (i % 2u) * 8u,
y + (i / 2u) * 8u,
);
textureStore(mip_2, pix, vec4(v[i]));
}
workgroupBarrier();
}
if local_invocation_index < 64u {
intermediate_memory[x + 0u][y + 0u] = v[0];
intermediate_memory[x + 8u][y + 0u] = v[1];
intermediate_memory[x + 0u][y + 8u] = v[2];
intermediate_memory[x + 8u][y + 8u] = v[3];
}
}
fn downsample_mips_2_to_5(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) {
if constants.max_mip_level <= 2u { return; }
workgroupBarrier();
downsample_mip_2(x, y, workgroup_id, local_invocation_index);
if constants.max_mip_level <= 3u { return; }
workgroupBarrier();
downsample_mip_3(x, y, workgroup_id, local_invocation_index);
if constants.max_mip_level <= 4u { return; }
workgroupBarrier();
downsample_mip_4(x, y, workgroup_id, local_invocation_index);
if constants.max_mip_level <= 5u { return; }
workgroupBarrier();
downsample_mip_5(workgroup_id, local_invocation_index);
}
fn downsample_mip_2(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) {
if local_invocation_index < 64u {
let v = reduce_4(vec4(
intermediate_memory[x * 2u + 0u][y * 2u + 0u],
intermediate_memory[x * 2u + 1u][y * 2u + 0u],
intermediate_memory[x * 2u + 0u][y * 2u + 1u],
intermediate_memory[x * 2u + 1u][y * 2u + 1u],
));
textureStore(mip_3, (workgroup_id * 8u) + vec2(x, y), vec4(v));
intermediate_memory[x * 2u + y % 2u][y * 2u] = v;
}
}
fn downsample_mip_3(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) {
if local_invocation_index < 16u {
let v = reduce_4(vec4(
intermediate_memory[x * 4u + 0u + 0u][y * 4u + 0u],
intermediate_memory[x * 4u + 2u + 0u][y * 4u + 0u],
intermediate_memory[x * 4u + 0u + 1u][y * 4u + 2u],
intermediate_memory[x * 4u + 2u + 1u][y * 4u + 2u],
));
textureStore(mip_4, (workgroup_id * 4u) + vec2(x, y), vec4(v));
intermediate_memory[x * 4u + y][y * 4u] = v;
}
}
fn downsample_mip_4(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) {
if local_invocation_index < 4u {
let v = reduce_4(vec4(
intermediate_memory[x * 8u + 0u + 0u + y * 2u][y * 8u + 0u],
intermediate_memory[x * 8u + 4u + 0u + y * 2u][y * 8u + 0u],
intermediate_memory[x * 8u + 0u + 1u + y * 2u][y * 8u + 4u],
intermediate_memory[x * 8u + 4u + 1u + y * 2u][y * 8u + 4u],
));
textureStore(mip_5, (workgroup_id * 2u) + vec2(x, y), vec4(v));
intermediate_memory[x + y * 2u][0u] = v;
}
}
fn downsample_mip_5(workgroup_id: vec2u, local_invocation_index: u32) {
if local_invocation_index < 1u {
let v = reduce_4(vec4(
intermediate_memory[0u][0u],
intermediate_memory[1u][0u],
intermediate_memory[2u][0u],
intermediate_memory[3u][0u],
));
textureStore(mip_6, workgroup_id, vec4(v));
}
}
fn downsample_mips_6_and_7(x: u32, y: u32) {
var v: vec4f;
var tex = vec2(x * 4u + 0u, y * 4u + 0u);
var pix = vec2(x * 2u + 0u, y * 2u + 0u);
v[0] = reduce_load_mip_6(tex);
textureStore(mip_7, pix, vec4(v[0]));
tex = vec2(x * 4u + 2u, y * 4u + 0u);
pix = vec2(x * 2u + 1u, y * 2u + 0u);
v[1] = reduce_load_mip_6(tex);
textureStore(mip_7, pix, vec4(v[1]));
tex = vec2(x * 4u + 0u, y * 4u + 2u);
pix = vec2(x * 2u + 0u, y * 2u + 1u);
v[2] = reduce_load_mip_6(tex);
textureStore(mip_7, pix, vec4(v[2]));
tex = vec2(x * 4u + 2u, y * 4u + 2u);
pix = vec2(x * 2u + 1u, y * 2u + 1u);
v[3] = reduce_load_mip_6(tex);
textureStore(mip_7, pix, vec4(v[3]));
if constants.max_mip_level <= 7u { return; }
let vr = reduce_4(v);
textureStore(mip_8, vec2(x, y), vec4(vr));
intermediate_memory[x][y] = vr;
}
fn downsample_mips_8_to_11(x: u32, y: u32, local_invocation_index: u32) {
if constants.max_mip_level <= 8u { return; }
workgroupBarrier();
downsample_mip_8(x, y, local_invocation_index);
if constants.max_mip_level <= 9u { return; }
workgroupBarrier();
downsample_mip_9(x, y, local_invocation_index);
if constants.max_mip_level <= 10u { return; }
workgroupBarrier();
downsample_mip_10(x, y, local_invocation_index);
if constants.max_mip_level <= 11u { return; }
workgroupBarrier();
downsample_mip_11(local_invocation_index);
}
fn downsample_mip_8(x: u32, y: u32, local_invocation_index: u32) {
if local_invocation_index < 64u {
let v = reduce_4(vec4(
intermediate_memory[x * 2u + 0u][y * 2u + 0u],
intermediate_memory[x * 2u + 1u][y * 2u + 0u],
intermediate_memory[x * 2u + 0u][y * 2u + 1u],
intermediate_memory[x * 2u + 1u][y * 2u + 1u],
));
textureStore(mip_9, vec2(x, y), vec4(v));
intermediate_memory[x * 2u + y % 2u][y * 2u] = v;
}
}
fn downsample_mip_9(x: u32, y: u32, local_invocation_index: u32) {
if local_invocation_index < 16u {
let v = reduce_4(vec4(
intermediate_memory[x * 4u + 0u + 0u][y * 4u + 0u],
intermediate_memory[x * 4u + 2u + 0u][y * 4u + 0u],
intermediate_memory[x * 4u + 0u + 1u][y * 4u + 2u],
intermediate_memory[x * 4u + 2u + 1u][y * 4u + 2u],
));
textureStore(mip_10, vec2(x, y), vec4(v));
intermediate_memory[x * 4u + y][y * 4u] = v;
}
}
fn downsample_mip_10(x: u32, y: u32, local_invocation_index: u32) {
if local_invocation_index < 4u {
let v = reduce_4(vec4(
intermediate_memory[x * 8u + 0u + 0u + y * 2u][y * 8u + 0u],
intermediate_memory[x * 8u + 4u + 0u + y * 2u][y * 8u + 0u],
intermediate_memory[x * 8u + 0u + 1u + y * 2u][y * 8u + 4u],
intermediate_memory[x * 8u + 4u + 1u + y * 2u][y * 8u + 4u],
));
textureStore(mip_11, vec2(x, y), vec4(v));
intermediate_memory[x + y * 2u][0u] = v;
}
}
fn downsample_mip_11(local_invocation_index: u32) {
if local_invocation_index < 1u {
let v = reduce_4(vec4(
intermediate_memory[0u][0u],
intermediate_memory[1u][0u],
intermediate_memory[2u][0u],
intermediate_memory[3u][0u],
));
textureStore(mip_12, vec2(0u, 0u), vec4(v));
}
}
fn remap_for_wave_reduction(a: u32) -> vec2u {
return vec2(
insertBits(extractBits(a, 2u, 3u), a, 0u, 1u),
insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u),
);
}
fn reduce_load_mip_0(tex: vec2u) -> f32 {
let a = load_mip_0(tex.x, tex.y);
let b = load_mip_0(tex.x + 1u, tex.y);
let c = load_mip_0(tex.x, tex.y + 1u);
let d = load_mip_0(tex.x + 1u, tex.y + 1u);
return reduce_4(vec4(a, b, c, d));
}
fn reduce_load_mip_6(tex: vec2u) -> f32 {
return reduce_4(vec4(
textureLoad(mip_6, tex + vec2(0u, 0u)).r,
textureLoad(mip_6, tex + vec2(0u, 1u)).r,
textureLoad(mip_6, tex + vec2(1u, 0u)).r,
textureLoad(mip_6, tex + vec2(1u, 1u)).r,
));
}
fn load_mip_0(x: u32, y: u32) -> f32 {
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
let visibility = textureLoad(mip_0, vec2(x, y)).r;
return bitcast<f32>(u32(visibility >> 32u));
#else // MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
#ifdef MESHLET
let visibility = textureLoad(mip_0, vec2(x, y)).r;
return bitcast<f32>(visibility);
#else // MESHLET
// Downsample the top level.
#ifdef MULTISAMPLE
// The top level is multisampled, so we need to loop over all the samples
// and reduce them to 1.
var result = textureLoad(mip_0, vec2(x, y), 0);
let sample_count = i32(textureNumSamples(mip_0));
for (var sample = 1; sample < sample_count; sample += 1) {
result = min(result, textureLoad(mip_0, vec2(x, y), sample));
}
return result;
#else // MULTISAMPLE
return textureLoad(mip_0, vec2(x, y), 0);
#endif // MULTISAMPLE
#endif // MESHLET
#endif // MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
}
fn reduce_4(v: vec4f) -> f32 {
return min(min(v.x, v.y), min(v.z, v.w));
}

View File

@@ -0,0 +1,783 @@
//! Downsampling of textures to produce mipmap levels.
//!
//! Currently, this module only supports generation of hierarchical Z buffers
//! for occlusion culling. It's marked experimental because the shader is
//! designed only for power-of-two texture sizes and is slightly incorrect for
//! non-power-of-two depth buffer sizes.
use core::array;
use crate::render::core_3d::{
graph::{Core3d, Node3d},
prepare_core_3d_depth_textures,
};
use bevy_app::{App, Plugin};
use bevy_asset::{embedded_asset, load_embedded_asset, Handle};
use bevy_derive::{Deref, DerefMut};
use bevy_ecs::{
component::Component,
entity::Entity,
prelude::{resource_exists, Without},
query::{Or, QueryState, With},
resource::Resource,
schedule::IntoScheduleConfigs as _,
system::{lifetimeless::Read, Commands, Local, Query, Res, ResMut},
world::{FromWorld, World},
};
use bevy_math::{uvec2, UVec2, Vec4Swizzles as _};
use crate::render::{batching::gpu_preprocessing::GpuPreprocessingSupport, RenderStartup};
use crate::render::{
experimental::occlusion_culling::{
OcclusionCulling, OcclusionCullingSubview, OcclusionCullingSubviewEntities,
},
render_graph::{Node, NodeRunError, RenderGraphContext, RenderGraphExt},
render_resource::{
binding_types::{sampler, texture_2d, texture_2d_multisampled, texture_storage_2d},
BindGroup, BindGroupEntries, BindGroupLayout, BindGroupLayoutEntries,
CachedComputePipelineId, ComputePassDescriptor, ComputePipeline, ComputePipelineDescriptor,
Extent3d, IntoBinding, PipelineCache, PushConstantRange, Sampler, SamplerBindingType,
SamplerDescriptor, ShaderStages, SpecializedComputePipeline, SpecializedComputePipelines,
StorageTextureAccess, TextureAspect, TextureDescriptor, TextureDimension, TextureFormat,
TextureSampleType, TextureUsages, TextureView, TextureViewDescriptor, TextureViewDimension,
},
renderer::{RenderContext, RenderDevice},
texture::TextureCache,
view::{ExtractedView, NoIndirectDrawing, ViewDepthTexture},
Render, RenderApp, RenderSystems,
};
use bevy_shader::Shader;
use bevy_utils::default;
use bitflags::bitflags;
use tracing::debug;
/// Identifies the `downsample_depth.wgsl` shader.
#[derive(Resource, Deref)]
pub struct DownsampleDepthShader(Handle<Shader>);
/// The maximum number of mip levels that we can produce.
///
/// 2^12 is 4096, so that's the maximum size of the depth buffer that we
/// support.
pub const DEPTH_PYRAMID_MIP_COUNT: usize = 12;
/// A plugin that allows Bevy to repeatedly downsample textures to create
/// mipmaps.
///
/// Currently, this is only used for hierarchical Z buffer generation for the
/// purposes of occlusion culling.
pub struct MipGenerationPlugin;
impl Plugin for MipGenerationPlugin {
fn build(&self, app: &mut App) {
embedded_asset!(app, "downsample_depth.wgsl");
let downsample_depth_shader = load_embedded_asset!(app, "downsample_depth.wgsl");
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
return;
};
render_app
.insert_resource(DownsampleDepthShader(downsample_depth_shader))
.init_resource::<SpecializedComputePipelines<DownsampleDepthPipeline>>()
.add_render_graph_node::<DownsampleDepthNode>(Core3d, Node3d::EarlyDownsampleDepth)
.add_render_graph_node::<DownsampleDepthNode>(Core3d, Node3d::LateDownsampleDepth)
.add_render_graph_edges(
Core3d,
(
Node3d::EarlyPrepass,
Node3d::EarlyDeferredPrepass,
Node3d::EarlyDownsampleDepth,
Node3d::LatePrepass,
Node3d::LateDeferredPrepass,
),
)
.add_render_graph_edges(
Core3d,
(
Node3d::StartMainPassPostProcessing,
Node3d::LateDownsampleDepth,
Node3d::EndMainPassPostProcessing,
),
)
.add_systems(RenderStartup, init_depth_pyramid_dummy_texture)
.add_systems(
Render,
create_downsample_depth_pipelines.in_set(RenderSystems::Prepare),
)
.add_systems(
Render,
(
prepare_view_depth_pyramids,
prepare_downsample_depth_view_bind_groups,
)
.chain()
.in_set(RenderSystems::PrepareResources)
.run_if(resource_exists::<DownsampleDepthPipelines>)
.after(prepare_core_3d_depth_textures),
);
}
}
/// The nodes that produce a hierarchical Z-buffer, also known as a depth
/// pyramid.
///
/// This runs the single-pass downsampling (SPD) shader with the *min* filter in
/// order to generate a series of mipmaps for the Z buffer. The resulting
/// hierarchical Z-buffer can be used for occlusion culling.
///
/// There are two instances of this node. The *early* downsample depth pass is
/// the first hierarchical Z-buffer stage, which runs after the early prepass
/// and before the late prepass. It prepares the Z-buffer for the bounding box
/// tests that the late mesh preprocessing stage will perform. The *late*
/// downsample depth pass runs at the end of the main phase. It prepares the
/// Z-buffer for the occlusion culling that the early mesh preprocessing phase
/// of the *next* frame will perform.
///
/// This node won't do anything if occlusion culling isn't on.
pub struct DownsampleDepthNode {
/// The query that we use to find views that need occlusion culling for
/// their Z-buffer.
main_view_query: QueryState<(
Read<ViewDepthPyramid>,
Read<ViewDownsampleDepthBindGroup>,
Read<ViewDepthTexture>,
Option<Read<OcclusionCullingSubviewEntities>>,
)>,
/// The query that we use to find shadow maps that need occlusion culling.
shadow_view_query: QueryState<(
Read<ViewDepthPyramid>,
Read<ViewDownsampleDepthBindGroup>,
Read<OcclusionCullingSubview>,
)>,
}
impl FromWorld for DownsampleDepthNode {
fn from_world(world: &mut World) -> Self {
Self {
main_view_query: QueryState::new(world),
shadow_view_query: QueryState::new(world),
}
}
}
impl Node for DownsampleDepthNode {
fn update(&mut self, world: &mut World) {
self.main_view_query.update_archetypes(world);
self.shadow_view_query.update_archetypes(world);
}
fn run<'w>(
&self,
render_graph_context: &mut RenderGraphContext,
render_context: &mut RenderContext<'w>,
world: &'w World,
) -> Result<(), NodeRunError> {
let Ok((
view_depth_pyramid,
view_downsample_depth_bind_group,
view_depth_texture,
maybe_view_light_entities,
)) = self
.main_view_query
.get_manual(world, render_graph_context.view_entity())
else {
return Ok(());
};
// Downsample depth for the main Z-buffer.
downsample_depth(
render_graph_context,
render_context,
world,
view_depth_pyramid,
view_downsample_depth_bind_group,
uvec2(
view_depth_texture.texture.width(),
view_depth_texture.texture.height(),
),
view_depth_texture.texture.sample_count(),
)?;
// Downsample depth for shadow maps that have occlusion culling enabled.
if let Some(view_light_entities) = maybe_view_light_entities {
for &view_light_entity in &view_light_entities.0 {
let Ok((view_depth_pyramid, view_downsample_depth_bind_group, occlusion_culling)) =
self.shadow_view_query.get_manual(world, view_light_entity)
else {
continue;
};
downsample_depth(
render_graph_context,
render_context,
world,
view_depth_pyramid,
view_downsample_depth_bind_group,
UVec2::splat(occlusion_culling.depth_texture_size),
1,
)?;
}
}
Ok(())
}
}
/// Produces a depth pyramid from the current depth buffer for a single view.
/// The resulting depth pyramid can be used for occlusion testing.
fn downsample_depth<'w>(
render_graph_context: &mut RenderGraphContext,
render_context: &mut RenderContext<'w>,
world: &'w World,
view_depth_pyramid: &ViewDepthPyramid,
view_downsample_depth_bind_group: &ViewDownsampleDepthBindGroup,
view_size: UVec2,
sample_count: u32,
) -> Result<(), NodeRunError> {
let downsample_depth_pipelines = world.resource::<DownsampleDepthPipelines>();
let pipeline_cache = world.resource::<PipelineCache>();
// Despite the name "single-pass downsampling", we actually need two
// passes because of the lack of `coherent` buffers in WGPU/WGSL.
// Between each pass, there's an implicit synchronization barrier.
// Fetch the appropriate pipeline ID, depending on whether the depth
// buffer is multisampled or not.
let (Some(first_downsample_depth_pipeline_id), Some(second_downsample_depth_pipeline_id)) =
(if sample_count > 1 {
(
downsample_depth_pipelines.first_multisample.pipeline_id,
downsample_depth_pipelines.second_multisample.pipeline_id,
)
} else {
(
downsample_depth_pipelines.first.pipeline_id,
downsample_depth_pipelines.second.pipeline_id,
)
})
else {
return Ok(());
};
// Fetch the pipelines for the two passes.
let (Some(first_downsample_depth_pipeline), Some(second_downsample_depth_pipeline)) = (
pipeline_cache.get_compute_pipeline(first_downsample_depth_pipeline_id),
pipeline_cache.get_compute_pipeline(second_downsample_depth_pipeline_id),
) else {
return Ok(());
};
// Run the depth downsampling.
view_depth_pyramid.downsample_depth(
&format!("{:?}", render_graph_context.label()),
render_context,
view_size,
view_downsample_depth_bind_group,
first_downsample_depth_pipeline,
second_downsample_depth_pipeline,
);
Ok(())
}
/// A single depth downsample pipeline.
#[derive(Resource)]
pub struct DownsampleDepthPipeline {
/// The bind group layout for this pipeline.
bind_group_layout: BindGroupLayout,
/// A handle that identifies the compiled shader.
pipeline_id: Option<CachedComputePipelineId>,
/// The shader asset handle.
shader: Handle<Shader>,
}
impl DownsampleDepthPipeline {
/// Creates a new [`DownsampleDepthPipeline`] from a bind group layout and the downsample
/// shader.
///
/// This doesn't actually specialize the pipeline; that must be done
/// afterward.
fn new(bind_group_layout: BindGroupLayout, shader: Handle<Shader>) -> DownsampleDepthPipeline {
DownsampleDepthPipeline {
bind_group_layout,
pipeline_id: None,
shader,
}
}
}
/// Stores all depth buffer downsampling pipelines.
#[derive(Resource)]
pub struct DownsampleDepthPipelines {
/// The first pass of the pipeline, when the depth buffer is *not*
/// multisampled.
first: DownsampleDepthPipeline,
/// The second pass of the pipeline, when the depth buffer is *not*
/// multisampled.
second: DownsampleDepthPipeline,
/// The first pass of the pipeline, when the depth buffer is multisampled.
first_multisample: DownsampleDepthPipeline,
/// The second pass of the pipeline, when the depth buffer is multisampled.
second_multisample: DownsampleDepthPipeline,
/// The sampler that the depth downsampling shader uses to sample the depth
/// buffer.
sampler: Sampler,
}
/// Creates the [`DownsampleDepthPipelines`] if downsampling is supported on the
/// current platform.
fn create_downsample_depth_pipelines(
mut commands: Commands,
render_device: Res<RenderDevice>,
pipeline_cache: Res<PipelineCache>,
mut specialized_compute_pipelines: ResMut<SpecializedComputePipelines<DownsampleDepthPipeline>>,
gpu_preprocessing_support: Res<GpuPreprocessingSupport>,
downsample_depth_shader: Res<DownsampleDepthShader>,
mut has_run: Local<bool>,
) {
// Only run once.
// We can't use a `resource_exists` or similar run condition here because
// this function might fail to create downsample depth pipelines if the
// current platform doesn't support compute shaders.
if *has_run {
return;
}
*has_run = true;
if !gpu_preprocessing_support.is_culling_supported() {
debug!("Downsample depth is not supported on this platform.");
return;
}
// Create the bind group layouts. The bind group layouts are identical
// between the first and second passes, so the only thing we need to
// treat specially is the type of the first mip level (non-multisampled
// or multisampled).
let standard_bind_group_layout =
create_downsample_depth_bind_group_layout(&render_device, false);
let multisampled_bind_group_layout =
create_downsample_depth_bind_group_layout(&render_device, true);
// Create the depth pyramid sampler. This is shared among all shaders.
let sampler = render_device.create_sampler(&SamplerDescriptor {
label: Some("depth pyramid sampler"),
..SamplerDescriptor::default()
});
// Initialize the pipelines.
let mut downsample_depth_pipelines = DownsampleDepthPipelines {
first: DownsampleDepthPipeline::new(
standard_bind_group_layout.clone(),
downsample_depth_shader.0.clone(),
),
second: DownsampleDepthPipeline::new(
standard_bind_group_layout.clone(),
downsample_depth_shader.0.clone(),
),
first_multisample: DownsampleDepthPipeline::new(
multisampled_bind_group_layout.clone(),
downsample_depth_shader.0.clone(),
),
second_multisample: DownsampleDepthPipeline::new(
multisampled_bind_group_layout.clone(),
downsample_depth_shader.0.clone(),
),
sampler,
};
// Specialize each pipeline with the appropriate
// `DownsampleDepthPipelineKey`.
downsample_depth_pipelines.first.pipeline_id = Some(specialized_compute_pipelines.specialize(
&pipeline_cache,
&downsample_depth_pipelines.first,
DownsampleDepthPipelineKey::empty(),
));
downsample_depth_pipelines.second.pipeline_id = Some(specialized_compute_pipelines.specialize(
&pipeline_cache,
&downsample_depth_pipelines.second,
DownsampleDepthPipelineKey::SECOND_PHASE,
));
downsample_depth_pipelines.first_multisample.pipeline_id =
Some(specialized_compute_pipelines.specialize(
&pipeline_cache,
&downsample_depth_pipelines.first_multisample,
DownsampleDepthPipelineKey::MULTISAMPLE,
));
downsample_depth_pipelines.second_multisample.pipeline_id =
Some(specialized_compute_pipelines.specialize(
&pipeline_cache,
&downsample_depth_pipelines.second_multisample,
DownsampleDepthPipelineKey::SECOND_PHASE | DownsampleDepthPipelineKey::MULTISAMPLE,
));
commands.insert_resource(downsample_depth_pipelines);
}
/// Creates a single bind group layout for the downsample depth pass.
fn create_downsample_depth_bind_group_layout(
render_device: &RenderDevice,
is_multisampled: bool,
) -> BindGroupLayout {
render_device.create_bind_group_layout(
if is_multisampled {
"downsample multisample depth bind group layout"
} else {
"downsample depth bind group layout"
},
&BindGroupLayoutEntries::sequential(
ShaderStages::COMPUTE,
(
// We only care about the multisample status of the depth buffer
// for the first mip level. After the first mip level is
// sampled, we drop to a single sample.
if is_multisampled {
texture_2d_multisampled(TextureSampleType::Depth)
} else {
texture_2d(TextureSampleType::Depth)
},
// All the mip levels follow:
texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::ReadWrite),
texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
sampler(SamplerBindingType::NonFiltering),
),
),
)
}
bitflags! {
/// Uniquely identifies a configuration of the downsample depth shader.
///
/// Note that meshlets maintain their downsample depth shaders on their own
/// and don't use this infrastructure; thus there's no flag for meshlets in
/// here, even though the shader has defines for it.
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
pub struct DownsampleDepthPipelineKey: u8 {
/// True if the depth buffer is multisampled.
const MULTISAMPLE = 1;
/// True if this shader is the second phase of the downsample depth
/// process; false if this shader is the first phase.
const SECOND_PHASE = 2;
}
}
impl SpecializedComputePipeline for DownsampleDepthPipeline {
type Key = DownsampleDepthPipelineKey;
fn specialize(&self, key: Self::Key) -> ComputePipelineDescriptor {
let mut shader_defs = vec![];
if key.contains(DownsampleDepthPipelineKey::MULTISAMPLE) {
shader_defs.push("MULTISAMPLE".into());
}
let label = format!(
"downsample depth{}{} pipeline",
if key.contains(DownsampleDepthPipelineKey::MULTISAMPLE) {
" multisample"
} else {
""
},
if key.contains(DownsampleDepthPipelineKey::SECOND_PHASE) {
" second phase"
} else {
" first phase"
}
)
.into();
ComputePipelineDescriptor {
label: Some(label),
layout: vec![self.bind_group_layout.clone()],
push_constant_ranges: vec![PushConstantRange {
stages: ShaderStages::COMPUTE,
range: 0..4,
}],
shader: self.shader.clone(),
shader_defs,
entry_point: Some(if key.contains(DownsampleDepthPipelineKey::SECOND_PHASE) {
"downsample_depth_second".into()
} else {
"downsample_depth_first".into()
}),
..default()
}
}
}
/// Stores a placeholder texture that can be bound to a depth pyramid binding if
/// no depth pyramid is needed.
#[derive(Resource, Deref, DerefMut)]
pub struct DepthPyramidDummyTexture(TextureView);
pub fn init_depth_pyramid_dummy_texture(mut commands: Commands, render_device: Res<RenderDevice>) {
commands.insert_resource(DepthPyramidDummyTexture(
create_depth_pyramid_dummy_texture(
&render_device,
"depth pyramid dummy texture",
"depth pyramid dummy texture view",
),
));
}
/// Creates a placeholder texture that can be bound to a depth pyramid binding
/// if no depth pyramid is needed.
pub fn create_depth_pyramid_dummy_texture(
render_device: &RenderDevice,
texture_label: &'static str,
texture_view_label: &'static str,
) -> TextureView {
render_device
.create_texture(&TextureDescriptor {
label: Some(texture_label),
size: Extent3d::default(),
mip_level_count: 1,
sample_count: 1,
dimension: TextureDimension::D2,
format: TextureFormat::R32Float,
usage: TextureUsages::STORAGE_BINDING,
view_formats: &[],
})
.create_view(&TextureViewDescriptor {
label: Some(texture_view_label),
format: Some(TextureFormat::R32Float),
dimension: Some(TextureViewDimension::D2),
usage: None,
aspect: TextureAspect::All,
base_mip_level: 0,
mip_level_count: Some(1),
base_array_layer: 0,
array_layer_count: Some(1),
})
}
/// Stores a hierarchical Z-buffer for a view, which is a series of mipmaps
/// useful for efficient occlusion culling.
///
/// This will only be present on a view when occlusion culling is enabled.
#[derive(Component)]
pub struct ViewDepthPyramid {
/// A texture view containing the entire depth texture.
pub all_mips: TextureView,
/// A series of texture views containing one mip level each.
pub mips: [TextureView; DEPTH_PYRAMID_MIP_COUNT],
/// The total number of mipmap levels.
///
/// This is the base-2 logarithm of the greatest dimension of the depth
/// buffer, rounded up.
pub mip_count: u32,
}
impl ViewDepthPyramid {
/// Allocates a new depth pyramid for a depth buffer with the given size.
pub fn new(
render_device: &RenderDevice,
texture_cache: &mut TextureCache,
depth_pyramid_dummy_texture: &TextureView,
size: UVec2,
texture_label: &'static str,
texture_view_label: &'static str,
) -> ViewDepthPyramid {
// Calculate the size of the depth pyramid.
let depth_pyramid_size = Extent3d {
width: size.x.div_ceil(2),
height: size.y.div_ceil(2),
depth_or_array_layers: 1,
};
// Calculate the number of mip levels we need.
let depth_pyramid_mip_count = depth_pyramid_size.max_mips(TextureDimension::D2);
// Create the depth pyramid.
let depth_pyramid = texture_cache.get(
render_device,
TextureDescriptor {
label: Some(texture_label),
size: depth_pyramid_size,
mip_level_count: depth_pyramid_mip_count,
sample_count: 1,
dimension: TextureDimension::D2,
format: TextureFormat::R32Float,
usage: TextureUsages::STORAGE_BINDING | TextureUsages::TEXTURE_BINDING,
view_formats: &[],
},
);
// Create individual views for each level of the depth pyramid.
let depth_pyramid_mips = array::from_fn(|i| {
if (i as u32) < depth_pyramid_mip_count {
depth_pyramid.texture.create_view(&TextureViewDescriptor {
label: Some(texture_view_label),
format: Some(TextureFormat::R32Float),
dimension: Some(TextureViewDimension::D2),
usage: None,
aspect: TextureAspect::All,
base_mip_level: i as u32,
mip_level_count: Some(1),
base_array_layer: 0,
array_layer_count: Some(1),
})
} else {
(*depth_pyramid_dummy_texture).clone()
}
});
// Create the view for the depth pyramid as a whole.
let depth_pyramid_all_mips = depth_pyramid.default_view.clone();
Self {
all_mips: depth_pyramid_all_mips,
mips: depth_pyramid_mips,
mip_count: depth_pyramid_mip_count,
}
}
/// Creates a bind group that allows the depth buffer to be attached to the
/// `downsample_depth.wgsl` shader.
pub fn create_bind_group<'a, R>(
&'a self,
render_device: &RenderDevice,
label: &'static str,
bind_group_layout: &BindGroupLayout,
source_image: R,
sampler: &'a Sampler,
) -> BindGroup
where
R: IntoBinding<'a>,
{
render_device.create_bind_group(
label,
bind_group_layout,
&BindGroupEntries::sequential((
source_image,
&self.mips[0],
&self.mips[1],
&self.mips[2],
&self.mips[3],
&self.mips[4],
&self.mips[5],
&self.mips[6],
&self.mips[7],
&self.mips[8],
&self.mips[9],
&self.mips[10],
&self.mips[11],
sampler,
)),
)
}
/// Invokes the shaders to generate the hierarchical Z-buffer.
///
/// This is intended to be invoked as part of a render node.
pub fn downsample_depth(
&self,
label: &str,
render_context: &mut RenderContext,
view_size: UVec2,
downsample_depth_bind_group: &BindGroup,
downsample_depth_first_pipeline: &ComputePipeline,
downsample_depth_second_pipeline: &ComputePipeline,
) {
let command_encoder = render_context.command_encoder();
let mut downsample_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor {
label: Some(label),
timestamp_writes: None,
});
downsample_pass.set_pipeline(downsample_depth_first_pipeline);
// Pass the mip count as a push constant, for simplicity.
downsample_pass.set_push_constants(0, &self.mip_count.to_le_bytes());
downsample_pass.set_bind_group(0, downsample_depth_bind_group, &[]);
downsample_pass.dispatch_workgroups(view_size.x.div_ceil(64), view_size.y.div_ceil(64), 1);
if self.mip_count >= 7 {
downsample_pass.set_pipeline(downsample_depth_second_pipeline);
downsample_pass.dispatch_workgroups(1, 1, 1);
}
}
}
/// Creates depth pyramids for views that have occlusion culling enabled.
pub fn prepare_view_depth_pyramids(
mut commands: Commands,
render_device: Res<RenderDevice>,
mut texture_cache: ResMut<TextureCache>,
depth_pyramid_dummy_texture: Res<DepthPyramidDummyTexture>,
views: Query<(Entity, &ExtractedView), (With<OcclusionCulling>, Without<NoIndirectDrawing>)>,
) {
for (view_entity, view) in &views {
commands.entity(view_entity).insert(ViewDepthPyramid::new(
&render_device,
&mut texture_cache,
&depth_pyramid_dummy_texture,
view.viewport.zw(),
"view depth pyramid texture",
"view depth pyramid texture view",
));
}
}
/// The bind group that we use to attach the depth buffer and depth pyramid for
/// a view to the `downsample_depth.wgsl` shader.
///
/// This will only be present for a view if occlusion culling is enabled.
#[derive(Component, Deref, DerefMut)]
pub struct ViewDownsampleDepthBindGroup(BindGroup);
/// Creates the [`ViewDownsampleDepthBindGroup`]s for all views with occlusion
/// culling enabled.
fn prepare_downsample_depth_view_bind_groups(
mut commands: Commands,
render_device: Res<RenderDevice>,
downsample_depth_pipelines: Res<DownsampleDepthPipelines>,
view_depth_textures: Query<
(
Entity,
&ViewDepthPyramid,
Option<&ViewDepthTexture>,
Option<&OcclusionCullingSubview>,
),
Or<(With<ViewDepthTexture>, With<OcclusionCullingSubview>)>,
>,
) {
for (view_entity, view_depth_pyramid, view_depth_texture, shadow_occlusion_culling) in
&view_depth_textures
{
let is_multisampled = view_depth_texture
.is_some_and(|view_depth_texture| view_depth_texture.texture.sample_count() > 1);
commands
.entity(view_entity)
.insert(ViewDownsampleDepthBindGroup(
view_depth_pyramid.create_bind_group(
&render_device,
if is_multisampled {
"downsample multisample depth bind group"
} else {
"downsample depth bind group"
},
if is_multisampled {
&downsample_depth_pipelines
.first_multisample
.bind_group_layout
} else {
&downsample_depth_pipelines.first.bind_group_layout
},
match (view_depth_texture, shadow_occlusion_culling) {
(Some(view_depth_texture), _) => view_depth_texture.view(),
(None, Some(shadow_occlusion_culling)) => {
&shadow_occlusion_culling.depth_texture_view
}
(None, None) => panic!("Should never happen"),
},
&downsample_depth_pipelines.sampler,
),
));
}
}

View File

@@ -0,0 +1,8 @@
//! Experimental rendering features.
//!
//! Experimental features are features with known problems, missing features,
//! compatibility issues, low performance, and/or future breaking changes, but
//! are included nonetheless for testing purposes.
pub mod mip_generation;
pub mod occlusion_culling;

View File

@@ -0,0 +1,69 @@
// Types needed for GPU mesh uniform building.
#define_import_path bevy_pbr::mesh_preprocess_types
// Per-frame data that the CPU supplies to the GPU.
struct MeshInput {
// The model transform.
world_from_local: mat3x4<f32>,
// The lightmap UV rect, packed into 64 bits.
lightmap_uv_rect: vec2<u32>,
// Various flags.
flags: u32,
previous_input_index: u32,
first_vertex_index: u32,
first_index_index: u32,
index_count: u32,
current_skin_index: u32,
// Low 16 bits: index of the material inside the bind group data.
// High 16 bits: index of the lightmap in the binding array.
material_and_lightmap_bind_group_slot: u32,
timestamp: u32,
// User supplied index to identify the mesh instance
tag: u32,
pad: u32,
}
// The `wgpu` indirect parameters structure. This is a union of two structures.
// For more information, see the corresponding comment in
// `gpu_preprocessing.rs`.
struct IndirectParametersIndexed {
// `vertex_count` or `index_count`.
index_count: u32,
// `instance_count` in both structures.
instance_count: u32,
// `first_vertex` or `first_index`.
first_index: u32,
// `base_vertex` or `first_instance`.
base_vertex: u32,
// A read-only copy of `instance_index`.
first_instance: u32,
}
struct IndirectParametersNonIndexed {
vertex_count: u32,
instance_count: u32,
base_vertex: u32,
first_instance: u32,
}
struct IndirectParametersCpuMetadata {
base_output_index: u32,
batch_set_index: u32,
}
struct IndirectParametersGpuMetadata {
mesh_index: u32,
#ifdef WRITE_INDIRECT_PARAMETERS_METADATA
early_instance_count: atomic<u32>,
late_instance_count: atomic<u32>,
#else // WRITE_INDIRECT_PARAMETERS_METADATA
early_instance_count: u32,
late_instance_count: u32,
#endif // WRITE_INDIRECT_PARAMETERS_METADATA
}
struct IndirectBatchSet {
indirect_parameters_count: atomic<u32>,
indirect_parameters_base: u32,
}

View File

@@ -0,0 +1,104 @@
//! GPU occlusion culling.
//!
//! See [`OcclusionCulling`] for a detailed description of occlusion culling in
//! Bevy.
use bevy_app::{App, Plugin};
use bevy_ecs::{component::Component, entity::Entity, prelude::ReflectComponent};
use bevy_reflect::{prelude::ReflectDefault, Reflect};
use bevy_shader::load_shader_library;
use crate::render::{extract_component::ExtractComponent, render_resource::TextureView};
/// Enables GPU occlusion culling.
///
/// See [`OcclusionCulling`] for a detailed description of occlusion culling in
/// Bevy.
pub struct OcclusionCullingPlugin;
impl Plugin for OcclusionCullingPlugin {
fn build(&self, app: &mut App) {
load_shader_library!(app, "mesh_preprocess_types.wgsl");
}
}
/// Add this component to a view in order to enable experimental GPU occlusion
/// culling.
///
/// *Bevy's occlusion culling is currently marked as experimental.* There are
/// known issues whereby, in rare circumstances, occlusion culling can result in
/// meshes being culled that shouldn't be (i.e. meshes that turn invisible).
/// Please try it out and report issues.
///
/// *Occlusion culling* allows Bevy to avoid rendering objects that are fully
/// behind other opaque or alpha tested objects. This is different from, and
/// complements, depth fragment rejection as the `DepthPrepass` enables. While
/// depth rejection allows Bevy to avoid rendering *pixels* that are behind
/// other objects, the GPU still has to examine those pixels to reject them,
/// which requires transforming the vertices of the objects and performing
/// skinning if the objects were skinned. Occlusion culling allows the GPU to go
/// a step further, avoiding even transforming the vertices of objects that it
/// can quickly prove to be behind other objects.
///
/// Occlusion culling inherently has some overhead, because Bevy must examine
/// the objects' bounding boxes, and create an acceleration structure
/// (hierarchical Z-buffer) to perform the occlusion tests. Therefore, occlusion
/// culling is disabled by default. Only enable it if you measure it to be a
/// speedup on your scene. Note that, because Bevy's occlusion culling runs on
/// the GPU and is quite efficient, it's rare for occlusion culling to result in
/// a significant slowdown.
///
/// Occlusion culling currently requires a `DepthPrepass`. If no depth prepass
/// is present on the view, the [`OcclusionCulling`] component will be ignored.
/// Additionally, occlusion culling is currently incompatible with deferred
/// shading; including both `DeferredPrepass` and [`OcclusionCulling`] results
/// in unspecified behavior.
///
/// The algorithm that Bevy uses is known as [*two-phase occlusion culling*].
/// When you enable occlusion culling, Bevy splits the depth prepass into two:
/// an *early* depth prepass and a *late* depth prepass. The early depth prepass
/// renders all the meshes that were visible last frame to produce a
/// conservative approximation of the depth buffer. Then, after producing an
/// acceleration structure known as a hierarchical Z-buffer or depth pyramid,
/// Bevy tests the bounding boxes of all meshes against that depth buffer. Those
/// that can be quickly proven to be behind the geometry rendered during the
/// early depth prepass are skipped entirely. The other potentially-visible
/// meshes are rendered during the late prepass, and finally all the visible
/// meshes are rendered as usual during the opaque, transparent, etc. passes.
///
/// Unlike other occlusion culling systems you may be familiar with, Bevy's
/// occlusion culling is fully dynamic and requires no baking step. The CPU
/// overhead is minimal. Large skinned meshes and other dynamic objects can
/// occlude other objects.
///
/// [*two-phase occlusion culling*]:
/// https://medium.com/@mil_kru/two-pass-occlusion-culling-4100edcad501
#[derive(Component, ExtractComponent, Clone, Copy, Default, Reflect)]
#[reflect(Component, Default, Clone)]
pub struct OcclusionCulling;
/// A render-world component that contains resources necessary to perform
/// occlusion culling on any view other than a camera.
///
/// Bevy automatically places this component on views created for shadow
/// mapping. You don't ordinarily need to add this component yourself.
#[derive(Clone, Component)]
pub struct OcclusionCullingSubview {
/// A texture view of the Z-buffer.
pub depth_texture_view: TextureView,
/// The size of the texture along both dimensions.
///
/// Because [`OcclusionCullingSubview`] is only currently used for shadow
/// maps, they're guaranteed to have sizes equal to a power of two, so we
/// don't have to store the two dimensions individually here.
pub depth_texture_size: u32,
}
/// A render-world component placed on each camera that stores references to all
/// entities other than cameras that need occlusion culling.
///
/// Bevy automatically places this component on cameras that are drawing
/// shadows, when those shadows come from lights with occlusion culling enabled.
/// You don't ordinarily need to add this component yourself.
#[derive(Clone, Component)]
pub struct OcclusionCullingSubviewEntities(pub Vec<Entity>);

View File

@@ -0,0 +1,236 @@
use crate::render::{
render_resource::{encase::internal::WriteInto, DynamicUniformBuffer, ShaderType},
renderer::{RenderDevice, RenderQueue},
sync_component::SyncComponentPlugin,
sync_world::RenderEntity,
Extract, ExtractSchedule, Render, RenderApp, RenderSystems,
};
use bevy_app::{App, Plugin};
use bevy_camera::visibility::ViewVisibility;
use bevy_ecs::{
bundle::NoBundleEffect,
component::Component,
prelude::*,
query::{QueryFilter, QueryItem, ReadOnlyQueryData},
};
use core::{marker::PhantomData, ops::Deref};
pub use macros::ExtractComponent;
/// Stores the index of a uniform inside of [`ComponentUniforms`].
#[derive(Component)]
pub struct DynamicUniformIndex<C: Component> {
index: u32,
marker: PhantomData<C>,
}
impl<C: Component> DynamicUniformIndex<C> {
#[inline]
pub fn index(&self) -> u32 {
self.index
}
}
/// Describes how a component gets extracted for rendering.
///
/// Therefore the component is transferred from the "app world" into the "render world"
/// in the [`ExtractSchedule`] step.
pub trait ExtractComponent: Component {
/// ECS [`ReadOnlyQueryData`] to fetch the components to extract.
type QueryData: ReadOnlyQueryData;
/// Filters the entities with additional constraints.
type QueryFilter: QueryFilter;
/// The output from extraction.
///
/// Returning `None` based on the queried item will remove the component from the entity in
/// the render world. This can be used, for example, to conditionally extract camera settings
/// in order to disable a rendering feature on the basis of those settings, without removing
/// the component from the entity in the main world.
///
/// The output may be different from the queried component.
/// This can be useful for example if only a subset of the fields are useful
/// in the render world.
///
/// `Out` has a [`Bundle`] trait bound instead of a [`Component`] trait bound in order to allow use cases
/// such as tuples of components as output.
type Out: Bundle<Effect: NoBundleEffect>;
// TODO: https://github.com/rust-lang/rust/issues/29661
// type Out: Component = Self;
/// Defines how the component is transferred into the "render world".
fn extract_component(item: QueryItem<'_, '_, Self::QueryData>) -> Option<Self::Out>;
}
/// This plugin prepares the components of the corresponding type for the GPU
/// by transforming them into uniforms.
///
/// They can then be accessed from the [`ComponentUniforms`] resource.
/// For referencing the newly created uniforms a [`DynamicUniformIndex`] is inserted
/// for every processed entity.
///
/// Therefore it sets up the [`RenderSystems::Prepare`] step
/// for the specified [`ExtractComponent`].
pub struct UniformComponentPlugin<C>(PhantomData<fn() -> C>);
impl<C> Default for UniformComponentPlugin<C> {
fn default() -> Self {
Self(PhantomData)
}
}
impl<C: Component + ShaderType + WriteInto + Clone> Plugin for UniformComponentPlugin<C> {
fn build(&self, app: &mut App) {
if let Some(render_app) = app.get_sub_app_mut(RenderApp) {
render_app
.insert_resource(ComponentUniforms::<C>::default())
.add_systems(
Render,
prepare_uniform_components::<C>.in_set(RenderSystems::PrepareResources),
);
}
}
}
/// Stores all uniforms of the component type.
#[derive(Resource)]
pub struct ComponentUniforms<C: Component + ShaderType> {
uniforms: DynamicUniformBuffer<C>,
}
impl<C: Component + ShaderType> Deref for ComponentUniforms<C> {
type Target = DynamicUniformBuffer<C>;
#[inline]
fn deref(&self) -> &Self::Target {
&self.uniforms
}
}
impl<C: Component + ShaderType> ComponentUniforms<C> {
#[inline]
pub fn uniforms(&self) -> &DynamicUniformBuffer<C> {
&self.uniforms
}
}
impl<C: Component + ShaderType> Default for ComponentUniforms<C> {
fn default() -> Self {
Self {
uniforms: Default::default(),
}
}
}
/// This system prepares all components of the corresponding component type.
/// They are transformed into uniforms and stored in the [`ComponentUniforms`] resource.
fn prepare_uniform_components<C>(
mut commands: Commands,
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
mut component_uniforms: ResMut<ComponentUniforms<C>>,
components: Query<(Entity, &C)>,
) where
C: Component + ShaderType + WriteInto + Clone,
{
let components_iter = components.iter();
let count = components_iter.len();
let Some(mut writer) =
component_uniforms
.uniforms
.get_writer(count, &render_device, &render_queue)
else {
return;
};
let entities = components_iter
.map(|(entity, component)| {
(
entity,
DynamicUniformIndex::<C> {
index: writer.write(component),
marker: PhantomData,
},
)
})
.collect::<Vec<_>>();
commands.try_insert_batch(entities);
}
/// This plugin extracts the components into the render world for synced entities.
///
/// To do so, it sets up the [`ExtractSchedule`] step for the specified [`ExtractComponent`].
pub struct ExtractComponentPlugin<C, F = ()> {
only_extract_visible: bool,
marker: PhantomData<fn() -> (C, F)>,
}
impl<C, F> Default for ExtractComponentPlugin<C, F> {
fn default() -> Self {
Self {
only_extract_visible: false,
marker: PhantomData,
}
}
}
impl<C, F> ExtractComponentPlugin<C, F> {
pub fn extract_visible() -> Self {
Self {
only_extract_visible: true,
marker: PhantomData,
}
}
}
impl<C: ExtractComponent> Plugin for ExtractComponentPlugin<C> {
fn build(&self, app: &mut App) {
app.add_plugins(SyncComponentPlugin::<C>::default());
if let Some(render_app) = app.get_sub_app_mut(RenderApp) {
if self.only_extract_visible {
render_app.add_systems(ExtractSchedule, extract_visible_components::<C>);
} else {
render_app.add_systems(ExtractSchedule, extract_components::<C>);
}
}
}
}
/// This system extracts all components of the corresponding [`ExtractComponent`], for entities that are synced via [`crate::sync_world::SyncToRenderWorld`].
fn extract_components<C: ExtractComponent>(
mut commands: Commands,
mut previous_len: Local<usize>,
query: Extract<Query<(RenderEntity, C::QueryData), C::QueryFilter>>,
) {
let mut values = Vec::with_capacity(*previous_len);
for (entity, query_item) in &query {
if let Some(component) = C::extract_component(query_item) {
values.push((entity, component));
} else {
commands.entity(entity).remove::<C::Out>();
}
}
*previous_len = values.len();
commands.try_insert_batch(values);
}
/// This system extracts all components of the corresponding [`ExtractComponent`], for entities that are visible and synced via [`crate::sync_world::SyncToRenderWorld`].
fn extract_visible_components<C: ExtractComponent>(
mut commands: Commands,
mut previous_len: Local<usize>,
query: Extract<Query<(RenderEntity, &ViewVisibility, C::QueryData), C::QueryFilter>>,
) {
let mut values = Vec::with_capacity(*previous_len);
for (entity, view_visibility, query_item) in &query {
if view_visibility.get() {
if let Some(component) = C::extract_component(query_item) {
values.push((entity, component));
} else {
commands.entity(entity).remove::<C::Out>();
}
}
}
*previous_len = values.len();
commands.try_insert_batch(values);
}

View File

@@ -0,0 +1,137 @@
//! Convenience logic for turning components from the main world into extracted
//! instances in the render world.
//!
//! This is essentially the same as the `extract_component` module, but
//! higher-performance because it avoids the ECS overhead.
use core::marker::PhantomData;
use bevy_app::{App, Plugin};
use bevy_camera::visibility::ViewVisibility;
use bevy_derive::{Deref, DerefMut};
use bevy_ecs::{
prelude::Entity,
query::{QueryFilter, QueryItem, ReadOnlyQueryData},
resource::Resource,
system::{Query, ResMut},
};
use crate::render::sync_world::MainEntityHashMap;
use crate::render::{Extract, ExtractSchedule, RenderApp};
/// Describes how to extract data needed for rendering from a component or
/// components.
///
/// Before rendering, any applicable components will be transferred from the
/// main world to the render world in the [`ExtractSchedule`] step.
///
/// This is essentially the same as
/// [`ExtractComponent`](crate::extract_component::ExtractComponent), but
/// higher-performance because it avoids the ECS overhead.
pub trait ExtractInstance: Send + Sync + Sized + 'static {
/// ECS [`ReadOnlyQueryData`] to fetch the components to extract.
type QueryData: ReadOnlyQueryData;
/// Filters the entities with additional constraints.
type QueryFilter: QueryFilter;
/// Defines how the component is transferred into the "render world".
fn extract(item: QueryItem<'_, '_, Self::QueryData>) -> Option<Self>;
}
/// This plugin extracts one or more components into the "render world" as
/// extracted instances.
///
/// Therefore it sets up the [`ExtractSchedule`] step for the specified
/// [`ExtractedInstances`].
#[derive(Default)]
pub struct ExtractInstancesPlugin<EI>
where
EI: ExtractInstance,
{
only_extract_visible: bool,
marker: PhantomData<fn() -> EI>,
}
/// Stores all extract instances of a type in the render world.
#[derive(Resource, Deref, DerefMut)]
pub struct ExtractedInstances<EI>(MainEntityHashMap<EI>)
where
EI: ExtractInstance;
impl<EI> Default for ExtractedInstances<EI>
where
EI: ExtractInstance,
{
fn default() -> Self {
Self(Default::default())
}
}
impl<EI> ExtractInstancesPlugin<EI>
where
EI: ExtractInstance,
{
/// Creates a new [`ExtractInstancesPlugin`] that unconditionally extracts to
/// the render world, whether the entity is visible or not.
pub fn new() -> Self {
Self {
only_extract_visible: false,
marker: PhantomData,
}
}
/// Creates a new [`ExtractInstancesPlugin`] that extracts to the render world
/// if and only if the entity it's attached to is visible.
pub fn extract_visible() -> Self {
Self {
only_extract_visible: true,
marker: PhantomData,
}
}
}
impl<EI> Plugin for ExtractInstancesPlugin<EI>
where
EI: ExtractInstance,
{
fn build(&self, app: &mut App) {
if let Some(render_app) = app.get_sub_app_mut(RenderApp) {
render_app.init_resource::<ExtractedInstances<EI>>();
if self.only_extract_visible {
render_app.add_systems(ExtractSchedule, extract_visible::<EI>);
} else {
render_app.add_systems(ExtractSchedule, extract_all::<EI>);
}
}
}
}
fn extract_all<EI>(
mut extracted_instances: ResMut<ExtractedInstances<EI>>,
query: Extract<Query<(Entity, EI::QueryData), EI::QueryFilter>>,
) where
EI: ExtractInstance,
{
extracted_instances.clear();
for (entity, other) in &query {
if let Some(extract_instance) = EI::extract(other) {
extracted_instances.insert(entity.into(), extract_instance);
}
}
}
fn extract_visible<EI>(
mut extracted_instances: ResMut<ExtractedInstances<EI>>,
query: Extract<Query<(Entity, &ViewVisibility, EI::QueryData), EI::QueryFilter>>,
) where
EI: ExtractInstance,
{
extracted_instances.clear();
for (entity, view_visibility, other) in &query {
if view_visibility.get()
&& let Some(extract_instance) = EI::extract(other)
{
extracted_instances.insert(entity.into(), extract_instance);
}
}
}

View File

@@ -0,0 +1,177 @@
use crate::render::MainWorld;
use bevy_ecs::{
component::Tick,
prelude::*,
query::FilteredAccessSet,
system::{
ReadOnlySystemParam, SystemMeta, SystemParam, SystemParamItem, SystemParamValidationError,
SystemState,
},
world::unsafe_world_cell::UnsafeWorldCell,
};
use core::ops::{Deref, DerefMut};
/// A helper for accessing [`MainWorld`] content using a system parameter.
///
/// A [`SystemParam`] adapter which applies the contained `SystemParam` to the [`World`]
/// contained in [`MainWorld`]. This parameter only works for systems run
/// during the [`ExtractSchedule`](crate::ExtractSchedule).
///
/// This requires that the contained [`SystemParam`] does not mutate the world, as it
/// uses a read-only reference to [`MainWorld`] internally.
///
/// ## Context
///
/// [`ExtractSchedule`] is used to extract (move) data from the simulation world ([`MainWorld`]) to the
/// render world. The render world drives rendering each frame (generally to a `Window`).
/// This design is used to allow performing calculations related to rendering a prior frame at the same
/// time as the next frame is simulated, which increases throughput (FPS).
///
/// [`Extract`] is used to get data from the main world during [`ExtractSchedule`].
///
/// ## Examples
///
/// ```
/// use bevy_ecs::prelude::*;
/// use crate::render::Extract;
/// use crate::render::sync_world::RenderEntity;
/// # #[derive(Component)]
/// // Do make sure to sync the cloud entities before extracting them.
/// # struct Cloud;
/// fn extract_clouds(mut commands: Commands, clouds: Extract<Query<RenderEntity, With<Cloud>>>) {
/// for cloud in &clouds {
/// commands.entity(cloud).insert(Cloud);
/// }
/// }
/// ```
///
/// [`ExtractSchedule`]: crate::ExtractSchedule
/// [Window]: bevy_window::Window
pub struct Extract<'w, 's, P>
where
P: ReadOnlySystemParam + 'static,
{
item: SystemParamItem<'w, 's, P>,
}
#[doc(hidden)]
pub struct ExtractState<P: SystemParam + 'static> {
state: SystemState<P>,
main_world_state: <Res<'static, MainWorld> as SystemParam>::State,
}
// SAFETY: The only `World` access (`Res<MainWorld>`) is read-only.
unsafe impl<P> ReadOnlySystemParam for Extract<'_, '_, P> where P: ReadOnlySystemParam {}
// SAFETY: The only `World` access is properly registered by `Res<MainWorld>::init_state`.
// This call will also ensure that there are no conflicts with prior params.
unsafe impl<P> SystemParam for Extract<'_, '_, P>
where
P: ReadOnlySystemParam,
{
type State = ExtractState<P>;
type Item<'w, 's> = Extract<'w, 's, P>;
fn init_state(world: &mut World) -> Self::State {
let mut main_world = world.resource_mut::<MainWorld>();
ExtractState {
state: SystemState::new(&mut main_world),
main_world_state: Res::<MainWorld>::init_state(world),
}
}
fn init_access(
state: &Self::State,
system_meta: &mut SystemMeta,
component_access_set: &mut FilteredAccessSet,
world: &mut World,
) {
Res::<MainWorld>::init_access(
&state.main_world_state,
system_meta,
component_access_set,
world,
);
}
#[inline]
unsafe fn validate_param(
state: &mut Self::State,
_system_meta: &SystemMeta,
world: UnsafeWorldCell,
) -> Result<(), SystemParamValidationError> {
// SAFETY: Read-only access to world data registered in `init_state`.
let result = unsafe { world.get_resource_by_id(state.main_world_state) };
let Some(main_world) = result else {
return Err(SystemParamValidationError::invalid::<Self>(
"`MainWorld` resource does not exist",
));
};
// SAFETY: Type is guaranteed by `SystemState`.
let main_world: &World = unsafe { main_world.deref() };
// SAFETY: We provide the main world on which this system state was initialized on.
unsafe {
SystemState::<P>::validate_param(
&mut state.state,
main_world.as_unsafe_world_cell_readonly(),
)
}
}
#[inline]
unsafe fn get_param<'w, 's>(
state: &'s mut Self::State,
system_meta: &SystemMeta,
world: UnsafeWorldCell<'w>,
change_tick: Tick,
) -> Self::Item<'w, 's> {
// SAFETY:
// - The caller ensures that `world` is the same one that `init_state` was called with.
// - The caller ensures that no other `SystemParam`s will conflict with the accesses we have registered.
let main_world = unsafe {
Res::<MainWorld>::get_param(
&mut state.main_world_state,
system_meta,
world,
change_tick,
)
};
let item = state.state.get(main_world.into_inner());
Extract { item }
}
}
impl<'w, 's, P> Deref for Extract<'w, 's, P>
where
P: ReadOnlySystemParam,
{
type Target = SystemParamItem<'w, 's, P>;
#[inline]
fn deref(&self) -> &Self::Target {
&self.item
}
}
impl<'w, 's, P> DerefMut for Extract<'w, 's, P>
where
P: ReadOnlySystemParam,
{
#[inline]
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.item
}
}
impl<'a, 'w, 's, P> IntoIterator for &'a Extract<'w, 's, P>
where
P: ReadOnlySystemParam,
&'a SystemParamItem<'w, 's, P>: IntoIterator,
{
type Item = <&'a SystemParamItem<'w, 's, P> as IntoIterator>::Item;
type IntoIter = <&'a SystemParamItem<'w, 's, P> as IntoIterator>::IntoIter;
fn into_iter(self) -> Self::IntoIter {
(&self.item).into_iter()
}
}

View File

@@ -0,0 +1,70 @@
use core::marker::PhantomData;
use bevy_app::{App, Plugin};
use bevy_ecs::prelude::*;
pub use macros::ExtractResource;
use bevy_utils::once;
use crate::render::{Extract, ExtractSchedule, RenderApp};
/// Describes how a resource gets extracted for rendering.
///
/// Therefore the resource is transferred from the "main world" into the "render world"
/// in the [`ExtractSchedule`] step.
pub trait ExtractResource: Resource {
type Source: Resource;
/// Defines how the resource is transferred into the "render world".
fn extract_resource(source: &Self::Source) -> Self;
}
/// This plugin extracts the resources into the "render world".
///
/// Therefore it sets up the[`ExtractSchedule`] step
/// for the specified [`Resource`].
pub struct ExtractResourcePlugin<R: ExtractResource>(PhantomData<R>);
impl<R: ExtractResource> Default for ExtractResourcePlugin<R> {
fn default() -> Self {
Self(PhantomData)
}
}
impl<R: ExtractResource> Plugin for ExtractResourcePlugin<R> {
fn build(&self, app: &mut App) {
if let Some(render_app) = app.get_sub_app_mut(RenderApp) {
render_app.add_systems(ExtractSchedule, extract_resource::<R>);
} else {
once!(tracing::error!(
"Render app did not exist when trying to add `extract_resource` for <{}>.",
core::any::type_name::<R>()
));
}
}
}
/// This system extracts the resource of the corresponding [`Resource`] type
pub fn extract_resource<R: ExtractResource>(
mut commands: Commands,
main_resource: Extract<Option<Res<R::Source>>>,
target_resource: Option<ResMut<R>>,
) {
if let Some(main_resource) = main_resource.as_ref() {
if let Some(mut target_resource) = target_resource {
if main_resource.is_changed() {
*target_resource = R::extract_resource(main_resource);
}
} else {
#[cfg(debug_assertions)]
if !main_resource.is_added() {
once!(tracing::warn!(
"Removing resource {} from render world not expected, adding using `Commands`.
This may decrease performance",
core::any::type_name::<R>()
));
}
commands.insert_resource(R::extract_resource(main_resource));
}
}
}

View File

@@ -0,0 +1,34 @@
#define_import_path bevy_core_pipeline::fullscreen_vertex_shader
struct FullscreenVertexOutput {
@builtin(position)
position: vec4<f32>,
@location(0)
uv: vec2<f32>,
};
// This vertex shader produces the following, when drawn using indices 0..3:
//
// 1 | 0-----x.....2
// 0 | | s | . ´
// -1 | x_____x´
// -2 | : .´
// -3 | 1´
// +---------------
// -1 0 1 2 3
//
// The axes are clip-space x and y. The region marked s is the visible region.
// The digits in the corners of the right-angled triangle are the vertex
// indices.
//
// The top-left has UV 0,0, the bottom-left has 0,2, and the top-right has 2,0.
// This means that the UV gets interpolated to 1,1 at the bottom-right corner
// of the clip-space rectangle that is at 1,-1 in clip space.
@vertex
fn fullscreen_vertex_shader(@builtin(vertex_index) vertex_index: u32) -> FullscreenVertexOutput {
// See the explanation above for how this works
let uv = vec2<f32>(f32(vertex_index >> 1u), f32(vertex_index & 1u)) * 2.0;
let clip_position = vec4<f32>(uv * vec2<f32>(2.0, -2.0) + vec2<f32>(-1.0, 1.0), 0.0, 1.0);
return FullscreenVertexOutput(clip_position, uv);
}

View File

@@ -0,0 +1,41 @@
use bevy_asset::{load_embedded_asset, Handle};
use bevy_ecs::{resource::Resource, world::FromWorld};
use crate::render::render_resource::VertexState;
use bevy_shader::Shader;
/// A shader that renders to the whole screen. Useful for post-processing.
#[derive(Resource, Clone)]
pub struct FullscreenShader(Handle<Shader>);
impl FromWorld for FullscreenShader {
fn from_world(world: &mut bevy_ecs::world::World) -> Self {
Self(load_embedded_asset!(world, "fullscreen.wgsl"))
}
}
impl FullscreenShader {
/// Gets the raw shader handle.
pub fn shader(&self) -> Handle<Shader> {
self.0.clone()
}
/// Creates a [`VertexState`] that uses the [`FullscreenShader`] to output a
/// ```wgsl
/// struct FullscreenVertexOutput {
/// @builtin(position)
/// position: vec4<f32>;
/// @location(0)
/// uv: vec2<f32>;
/// };
/// ```
/// from the vertex shader.
/// The draw call should render one triangle: `render_pass.draw(0..3, 0..1);`
pub fn to_vertex_state(&self) -> VertexState {
VertexState {
shader: self.0.clone(),
shader_defs: Vec::new(),
entry_point: Some("fullscreen_vertex_shader".into()),
buffers: Vec::new(),
}
}
}

View File

@@ -0,0 +1,79 @@
use crate::render::{
extract_resource::ExtractResource,
render_resource::{ShaderType, UniformBuffer},
renderer::{RenderDevice, RenderQueue},
Extract, ExtractSchedule, Render, RenderApp, RenderSystems,
};
use bevy_app::{App, Plugin};
use bevy_diagnostic::FrameCount;
use bevy_ecs::prelude::*;
use bevy_reflect::prelude::*;
use bevy_shader::load_shader_library;
use bevy_time::Time;
pub struct GlobalsPlugin;
impl Plugin for GlobalsPlugin {
fn build(&self, app: &mut App) {
load_shader_library!(app, "globals.wgsl");
if let Some(render_app) = app.get_sub_app_mut(RenderApp) {
render_app
.init_resource::<GlobalsBuffer>()
.init_resource::<Time>()
.add_systems(ExtractSchedule, (extract_frame_count, extract_time))
.add_systems(
Render,
prepare_globals_buffer.in_set(RenderSystems::PrepareResources),
);
}
}
}
fn extract_frame_count(mut commands: Commands, frame_count: Extract<Res<FrameCount>>) {
commands.insert_resource(**frame_count);
}
fn extract_time(mut commands: Commands, time: Extract<Res<Time>>) {
commands.insert_resource(**time);
}
/// Contains global values useful when writing shaders.
/// Currently only contains values related to time.
#[derive(Default, Clone, Resource, ExtractResource, Reflect, ShaderType)]
#[reflect(Resource, Default, Clone)]
pub struct GlobalsUniform {
/// The time since startup in seconds.
/// Wraps to 0 after 1 hour.
time: f32,
/// The delta time since the previous frame in seconds
delta_time: f32,
/// Frame count since the start of the app.
/// It wraps to zero when it reaches the maximum value of a u32.
frame_count: u32,
/// WebGL2 structs must be 16 byte aligned.
#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))]
_wasm_padding: f32,
}
/// The buffer containing the [`GlobalsUniform`]
#[derive(Resource, Default)]
pub struct GlobalsBuffer {
pub buffer: UniformBuffer<GlobalsUniform>,
}
fn prepare_globals_buffer(
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
mut globals_buffer: ResMut<GlobalsBuffer>,
time: Res<Time>,
frame_count: Res<FrameCount>,
) {
let buffer = globals_buffer.buffer.get_mut();
buffer.time = time.elapsed_secs_wrapped();
buffer.delta_time = time.delta_secs();
buffer.frame_count = frame_count.0;
globals_buffer
.buffer
.write_buffer(&render_device, &render_queue);
}

View File

@@ -0,0 +1,16 @@
#define_import_path bevy_render::globals
struct Globals {
// The time since startup in seconds
// Wraps to 0 after 1 hour.
time: f32,
// The delta time since the previous frame in seconds
delta_time: f32,
// Frame count since the start of the app.
// It wraps to zero when it reaches the maximum value of a u32.
frame_count: u32,
#ifdef SIXTEEN_BYTE_ALIGNMENT
// WebGL2 structs must be 16 byte aligned.
_webgl2_padding: f32
#endif
};

View File

@@ -0,0 +1,59 @@
use crate::render::{
render_resource::{GpuArrayBuffer, GpuArrayBufferable},
renderer::{RenderDevice, RenderQueue},
Render, RenderApp, RenderSystems,
};
use bevy_app::{App, Plugin};
use bevy_ecs::{
prelude::{Component, Entity},
schedule::IntoScheduleConfigs,
system::{Commands, Query, Res, ResMut},
};
use core::marker::PhantomData;
/// This plugin prepares the components of the corresponding type for the GPU
/// by storing them in a [`GpuArrayBuffer`].
pub struct GpuComponentArrayBufferPlugin<C: Component + GpuArrayBufferable>(PhantomData<C>);
impl<C: Component + GpuArrayBufferable> Plugin for GpuComponentArrayBufferPlugin<C> {
fn build(&self, app: &mut App) {
if let Some(render_app) = app.get_sub_app_mut(RenderApp) {
render_app.add_systems(
Render,
prepare_gpu_component_array_buffers::<C>.in_set(RenderSystems::PrepareResources),
);
}
}
fn finish(&self, app: &mut App) {
if let Some(render_app) = app.get_sub_app_mut(RenderApp) {
render_app.insert_resource(GpuArrayBuffer::<C>::new(
render_app.world().resource::<RenderDevice>(),
));
}
}
}
impl<C: Component + GpuArrayBufferable> Default for GpuComponentArrayBufferPlugin<C> {
fn default() -> Self {
Self(PhantomData::<C>)
}
}
fn prepare_gpu_component_array_buffers<C: Component + GpuArrayBufferable>(
mut commands: Commands,
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
mut gpu_array_buffer: ResMut<GpuArrayBuffer<C>>,
components: Query<(Entity, &C)>,
) {
gpu_array_buffer.clear();
let entities = components
.iter()
.map(|(entity, component)| (entity, gpu_array_buffer.push(component.clone())))
.collect::<Vec<_>>();
commands.try_insert_batch(entities);
gpu_array_buffer.write_buffer(&render_device, &render_queue);
}

View File

@@ -0,0 +1,414 @@
use crate::render::{
extract_component::ExtractComponentPlugin,
render_asset::RenderAssets,
render_resource::{
Buffer, BufferUsages, CommandEncoder, Extent3d, TexelCopyBufferLayout, Texture,
TextureFormat,
},
renderer::{render_system, RenderDevice},
storage::{GpuShaderStorageBuffer, ShaderStorageBuffer},
sync_world::MainEntity,
texture::GpuImage,
ExtractSchedule, MainWorld, Render, RenderApp, RenderSystems,
};
use async_channel::{Receiver, Sender};
use bevy_app::{App, Plugin};
use bevy_asset::Handle;
use bevy_derive::{Deref, DerefMut};
use bevy_ecs::schedule::IntoScheduleConfigs;
use bevy_ecs::{
change_detection::ResMut,
entity::Entity,
event::EntityEvent,
prelude::{Component, Resource, World},
system::{Query, Res},
};
use bevy_image::{Image, TextureFormatPixelInfo};
use bevy_platform::collections::HashMap;
use bevy_reflect::Reflect;
use macros::ExtractComponent;
use encase::internal::ReadFrom;
use encase::private::Reader;
use encase::ShaderType;
use tracing::warn;
/// A plugin that enables reading back gpu buffers and textures to the cpu.
pub struct GpuReadbackPlugin {
/// Describes the number of frames a buffer can be unused before it is removed from the pool in
/// order to avoid unnecessary reallocations.
max_unused_frames: usize,
}
impl Default for GpuReadbackPlugin {
fn default() -> Self {
Self {
max_unused_frames: 10,
}
}
}
impl Plugin for GpuReadbackPlugin {
fn build(&self, app: &mut App) {
app.add_plugins(ExtractComponentPlugin::<Readback>::default());
if let Some(render_app) = app.get_sub_app_mut(RenderApp) {
render_app
.init_resource::<GpuReadbackBufferPool>()
.init_resource::<GpuReadbacks>()
.insert_resource(GpuReadbackMaxUnusedFrames(self.max_unused_frames))
.add_systems(ExtractSchedule, sync_readbacks.ambiguous_with_all())
.add_systems(
Render,
(
prepare_buffers.in_set(RenderSystems::PrepareResources),
map_buffers
.after(render_system)
.in_set(RenderSystems::Render),
),
);
}
}
}
/// A component that registers the wrapped handle for gpu readback, either a texture or a buffer.
///
/// Data is read asynchronously and will be triggered on the entity via the [`ReadbackComplete`] event
/// when complete. If this component is not removed, the readback will be attempted every frame
#[derive(Component, ExtractComponent, Clone, Debug)]
pub enum Readback {
Texture(Handle<Image>),
Buffer {
buffer: Handle<ShaderStorageBuffer>,
start_offset_and_size: Option<(u64, u64)>,
},
}
impl Readback {
/// Create a readback component for a texture using the given handle.
pub fn texture(image: Handle<Image>) -> Self {
Self::Texture(image)
}
/// Create a readback component for a full buffer using the given handle.
pub fn buffer(buffer: Handle<ShaderStorageBuffer>) -> Self {
Self::Buffer {
buffer,
start_offset_and_size: None,
}
}
/// Create a readback component for a buffer range using the given handle, a start offset in bytes
/// and a number of bytes to read.
pub fn buffer_range(buffer: Handle<ShaderStorageBuffer>, start_offset: u64, size: u64) -> Self {
Self::Buffer {
buffer,
start_offset_and_size: Some((start_offset, size)),
}
}
}
/// An event that is triggered when a gpu readback is complete.
///
/// The event contains the data as a `Vec<u8>`, which can be interpreted as the raw bytes of the
/// requested buffer or texture.
#[derive(EntityEvent, Deref, DerefMut, Reflect, Debug)]
#[reflect(Debug)]
pub struct ReadbackComplete {
pub entity: Entity,
#[deref]
pub data: Vec<u8>,
}
impl ReadbackComplete {
/// Convert the raw bytes of the event to a shader type.
pub fn to_shader_type<T: ShaderType + ReadFrom + Default>(&self) -> T {
let mut val = T::default();
let mut reader = Reader::new::<T>(&self.data, 0).expect("Failed to create Reader");
T::read_from(&mut val, &mut reader);
val
}
}
#[derive(Resource)]
struct GpuReadbackMaxUnusedFrames(usize);
struct GpuReadbackBuffer {
buffer: Buffer,
taken: bool,
frames_unused: usize,
}
#[derive(Resource, Default)]
struct GpuReadbackBufferPool {
// Map of buffer size to list of buffers, with a flag for whether the buffer is taken and how
// many frames it has been unused for.
// TODO: We could ideally write all readback data to one big buffer per frame, the assumption
// here is that very few entities well actually be read back at once, and their size is
// unlikely to change.
buffers: HashMap<u64, Vec<GpuReadbackBuffer>>,
}
impl GpuReadbackBufferPool {
fn get(&mut self, render_device: &RenderDevice, size: u64) -> Buffer {
let buffers = self.buffers.entry(size).or_default();
// find an untaken buffer for this size
if let Some(buf) = buffers.iter_mut().find(|x| !x.taken) {
buf.taken = true;
buf.frames_unused = 0;
return buf.buffer.clone();
}
let buffer = render_device.create_buffer(&wgpu::BufferDescriptor {
label: Some("Readback Buffer"),
size,
usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
mapped_at_creation: false,
});
buffers.push(GpuReadbackBuffer {
buffer: buffer.clone(),
taken: true,
frames_unused: 0,
});
buffer
}
// Returns the buffer to the pool so it can be used in a future frame
fn return_buffer(&mut self, buffer: &Buffer) {
let size = buffer.size();
let buffers = self
.buffers
.get_mut(&size)
.expect("Returned buffer of untracked size");
if let Some(buf) = buffers.iter_mut().find(|x| x.buffer.id() == buffer.id()) {
buf.taken = false;
} else {
warn!("Returned buffer that was not allocated");
}
}
fn update(&mut self, max_unused_frames: usize) {
for (_, buffers) in &mut self.buffers {
// Tick all the buffers
for buf in &mut *buffers {
if !buf.taken {
buf.frames_unused += 1;
}
}
// Remove buffers that haven't been used for MAX_UNUSED_FRAMES
buffers.retain(|x| x.frames_unused < max_unused_frames);
}
// Remove empty buffer sizes
self.buffers.retain(|_, buffers| !buffers.is_empty());
}
}
enum ReadbackSource {
Texture {
texture: Texture,
layout: TexelCopyBufferLayout,
size: Extent3d,
},
Buffer {
buffer: Buffer,
start_offset_and_size: Option<(u64, u64)>,
},
}
#[derive(Resource, Default)]
struct GpuReadbacks {
requested: Vec<GpuReadback>,
mapped: Vec<GpuReadback>,
}
struct GpuReadback {
pub entity: Entity,
pub src: ReadbackSource,
pub buffer: Buffer,
pub rx: Receiver<(Entity, Buffer, Vec<u8>)>,
pub tx: Sender<(Entity, Buffer, Vec<u8>)>,
}
fn sync_readbacks(
mut main_world: ResMut<MainWorld>,
mut buffer_pool: ResMut<GpuReadbackBufferPool>,
mut readbacks: ResMut<GpuReadbacks>,
max_unused_frames: Res<GpuReadbackMaxUnusedFrames>,
) {
readbacks.mapped.retain(|readback| {
if let Ok((entity, buffer, data)) = readback.rx.try_recv() {
main_world.trigger(ReadbackComplete { data, entity });
buffer_pool.return_buffer(&buffer);
false
} else {
true
}
});
buffer_pool.update(max_unused_frames.0);
}
fn prepare_buffers(
render_device: Res<RenderDevice>,
mut readbacks: ResMut<GpuReadbacks>,
mut buffer_pool: ResMut<GpuReadbackBufferPool>,
gpu_images: Res<RenderAssets<GpuImage>>,
ssbos: Res<RenderAssets<GpuShaderStorageBuffer>>,
handles: Query<(&MainEntity, &Readback)>,
) {
for (entity, readback) in handles.iter() {
match readback {
Readback::Texture(image) => {
if let Some(gpu_image) = gpu_images.get(image)
&& let Ok(pixel_size) = gpu_image.texture_format.pixel_size()
{
let layout = layout_data(gpu_image.size, gpu_image.texture_format);
let buffer = buffer_pool.get(
&render_device,
get_aligned_size(gpu_image.size, pixel_size as u32) as u64,
);
let (tx, rx) = async_channel::bounded(1);
readbacks.requested.push(GpuReadback {
entity: entity.id(),
src: ReadbackSource::Texture {
texture: gpu_image.texture.clone(),
layout,
size: gpu_image.size,
},
buffer,
rx,
tx,
});
}
}
Readback::Buffer {
buffer,
start_offset_and_size,
} => {
if let Some(ssbo) = ssbos.get(buffer) {
let full_size = ssbo.buffer.size();
let size = start_offset_and_size
.map(|(start, size)| {
let end = start + size;
if end > full_size {
panic!(
"Tried to read past the end of the buffer (start: {start}, \
size: {size}, buffer size: {full_size})."
);
}
size
})
.unwrap_or(full_size);
let buffer = buffer_pool.get(&render_device, size);
let (tx, rx) = async_channel::bounded(1);
readbacks.requested.push(GpuReadback {
entity: entity.id(),
src: ReadbackSource::Buffer {
start_offset_and_size: *start_offset_and_size,
buffer: ssbo.buffer.clone(),
},
buffer,
rx,
tx,
});
}
}
}
}
}
pub(crate) fn submit_readback_commands(world: &World, command_encoder: &mut CommandEncoder) {
let readbacks = world.resource::<GpuReadbacks>();
for readback in &readbacks.requested {
match &readback.src {
ReadbackSource::Texture {
texture,
layout,
size,
} => {
command_encoder.copy_texture_to_buffer(
texture.as_image_copy(),
wgpu::TexelCopyBufferInfo {
buffer: &readback.buffer,
layout: *layout,
},
*size,
);
}
ReadbackSource::Buffer {
buffer,
start_offset_and_size,
} => {
let (src_start, size) = start_offset_and_size.unwrap_or((0, buffer.size()));
command_encoder.copy_buffer_to_buffer(buffer, src_start, &readback.buffer, 0, size);
}
}
}
}
/// Move requested readbacks to mapped readbacks after commands have been submitted in render system
fn map_buffers(mut readbacks: ResMut<GpuReadbacks>) {
let requested = readbacks.requested.drain(..).collect::<Vec<GpuReadback>>();
for readback in requested {
let slice = readback.buffer.slice(..);
let entity = readback.entity;
let buffer = readback.buffer.clone();
let tx = readback.tx.clone();
slice.map_async(wgpu::MapMode::Read, move |res| {
res.expect("Failed to map buffer");
let buffer_slice = buffer.slice(..);
let data = buffer_slice.get_mapped_range();
let result = Vec::from(&*data);
drop(data);
buffer.unmap();
if let Err(e) = tx.try_send((entity, buffer, result)) {
warn!("Failed to send readback result: {}", e);
}
});
readbacks.mapped.push(readback);
}
}
// Utils
/// Round up a given value to be a multiple of [`wgpu::COPY_BYTES_PER_ROW_ALIGNMENT`].
pub(crate) const fn align_byte_size(value: u32) -> u32 {
RenderDevice::align_copy_bytes_per_row(value as usize) as u32
}
/// Get the size of a image when the size of each row has been rounded up to [`wgpu::COPY_BYTES_PER_ROW_ALIGNMENT`].
pub(crate) const fn get_aligned_size(extent: Extent3d, pixel_size: u32) -> u32 {
extent.height * align_byte_size(extent.width * pixel_size) * extent.depth_or_array_layers
}
/// Get a [`TexelCopyBufferLayout`] aligned such that the image can be copied into a buffer.
pub(crate) fn layout_data(extent: Extent3d, format: TextureFormat) -> TexelCopyBufferLayout {
TexelCopyBufferLayout {
bytes_per_row: if extent.height > 1 || extent.depth_or_array_layers > 1 {
if let Ok(pixel_size) = format.pixel_size() {
// 1 = 1 row
Some(get_aligned_size(
Extent3d {
width: extent.width,
height: 1,
depth_or_array_layers: 1,
},
pixel_size as u32,
))
} else {
None
}
} else {
None
},
rows_per_image: if extent.depth_or_array_layers > 1 {
let (_, block_dimension_y) = format.block_dimensions();
Some(extent.height / block_dimension_y)
} else {
None
},
offset: 0,
}
}

View File

@@ -0,0 +1,186 @@
#define_import_path bevy_render::maths
const PI: f32 = 3.141592653589793; // π
const PI_2: f32 = 6.283185307179586; // 2π
const HALF_PI: f32 = 1.57079632679; // π/2
const FRAC_PI_3: f32 = 1.0471975512; // π/3
const E: f32 = 2.718281828459045; // exp(1)
fn affine2_to_square(affine: mat3x2<f32>) -> mat3x3<f32> {
return mat3x3<f32>(
vec3<f32>(affine[0].xy, 0.0),
vec3<f32>(affine[1].xy, 0.0),
vec3<f32>(affine[2].xy, 1.0),
);
}
fn affine3_to_square(affine: mat3x4<f32>) -> mat4x4<f32> {
return transpose(mat4x4<f32>(
affine[0],
affine[1],
affine[2],
vec4<f32>(0.0, 0.0, 0.0, 1.0),
));
}
fn mat2x4_f32_to_mat3x3_unpack(
a: mat2x4<f32>,
b: f32,
) -> mat3x3<f32> {
return mat3x3<f32>(
a[0].xyz,
vec3<f32>(a[0].w, a[1].xy),
vec3<f32>(a[1].zw, b),
);
}
// Extracts the square portion of an affine matrix: i.e. discards the
// translation.
fn affine3_to_mat3x3(affine: mat4x3<f32>) -> mat3x3<f32> {
return mat3x3<f32>(affine[0].xyz, affine[1].xyz, affine[2].xyz);
}
// Returns the inverse of a 3x3 matrix.
fn inverse_mat3x3(matrix: mat3x3<f32>) -> mat3x3<f32> {
let tmp0 = cross(matrix[1], matrix[2]);
let tmp1 = cross(matrix[2], matrix[0]);
let tmp2 = cross(matrix[0], matrix[1]);
let inv_det = 1.0 / dot(matrix[2], tmp2);
return transpose(mat3x3<f32>(tmp0 * inv_det, tmp1 * inv_det, tmp2 * inv_det));
}
// Returns the inverse of an affine matrix.
//
// https://en.wikipedia.org/wiki/Affine_transformation#Groups
fn inverse_affine3(affine: mat4x3<f32>) -> mat4x3<f32> {
let matrix3 = affine3_to_mat3x3(affine);
let inv_matrix3 = inverse_mat3x3(matrix3);
return mat4x3<f32>(inv_matrix3[0], inv_matrix3[1], inv_matrix3[2], -(inv_matrix3 * affine[3]));
}
// Extracts the upper 3x3 portion of a 4x4 matrix.
fn mat4x4_to_mat3x3(m: mat4x4<f32>) -> mat3x3<f32> {
return mat3x3<f32>(m[0].xyz, m[1].xyz, m[2].xyz);
}
// Copy the sign bit from B onto A.
// copysign allows proper handling of negative zero to match the rust implementation of orthonormalize
fn copysign(a: f32, b: f32) -> f32 {
return bitcast<f32>((bitcast<u32>(a) & 0x7FFFFFFF) | (bitcast<u32>(b) & 0x80000000));
}
// Constructs a right-handed orthonormal basis from a given unit Z vector.
//
// NOTE: requires unit-length (normalized) input to function properly.
//
// https://jcgt.org/published/0006/01/01/paper.pdf
// this method of constructing a basis from a vec3 is also used by `glam::Vec3::any_orthonormal_pair`
// the construction of the orthonormal basis up and right vectors here needs to precisely match the rust
// implementation in bevy_light/spot_light.rs:spot_light_world_from_view
fn orthonormalize(z_basis: vec3<f32>) -> mat3x3<f32> {
let sign = copysign(1.0, z_basis.z);
let a = -1.0 / (sign + z_basis.z);
let b = z_basis.x * z_basis.y * a;
let x_basis = vec3(1.0 + sign * z_basis.x * z_basis.x * a, sign * b, -sign * z_basis.x);
let y_basis = vec3(b, sign + z_basis.y * z_basis.y * a, -z_basis.y);
return mat3x3(x_basis, y_basis, z_basis);
}
// Returns true if any part of a sphere is on the positive side of a plane.
//
// `sphere_center.w` should be 1.0.
//
// This is used for frustum culling.
fn sphere_intersects_plane_half_space(
plane: vec4<f32>,
sphere_center: vec4<f32>,
sphere_radius: f32
) -> bool {
return dot(plane, sphere_center) + sphere_radius > 0.0;
}
// Returns the distances along the ray to its intersections with a sphere
// centered at the origin.
//
// r: distance from the sphere center to the ray origin
// mu: cosine of the zenith angle
// sphere_radius: radius of the sphere
//
// Returns vec2(t0, t1). If there is no intersection, returns vec2(-1.0).
fn ray_sphere_intersect(r: f32, mu: f32, sphere_radius: f32) -> vec2<f32> {
let discriminant = r * r * (mu * mu - 1.0) + sphere_radius * sphere_radius;
// No intersection
if discriminant < 0.0 {
return vec2(-1.0);
}
let q = -r * mu;
let sqrt_discriminant = sqrt(discriminant);
// Return both intersection distances
return vec2(
q - sqrt_discriminant,
q + sqrt_discriminant
);
}
// pow() but safe for NaNs/negatives
fn powsafe(color: vec3<f32>, power: f32) -> vec3<f32> {
return pow(abs(color), vec3(power)) * sign(color);
}
// https://en.wikipedia.org/wiki/Vector_projection#Vector_projection_2
fn project_onto(lhs: vec3<f32>, rhs: vec3<f32>) -> vec3<f32> {
let other_len_sq_rcp = 1.0 / dot(rhs, rhs);
return rhs * dot(lhs, rhs) * other_len_sq_rcp;
}
// Below are fast approximations of common irrational and trig functions. These
// are likely most useful when raymarching, for example, where complete numeric
// accuracy can be sacrificed for greater sample count.
// Slightly less accurate than fast_acos_4, but much simpler.
fn fast_acos(in_x: f32) -> f32 {
let x = abs(in_x);
var res = -0.156583 * x + HALF_PI;
res *= sqrt(1.0 - x);
return select(PI - res, res, in_x >= 0.0);
}
// 4th order polynomial approximation
// 4 VGRP, 16 ALU Full Rate
// 7 * 10^-5 radians precision
// Reference : Handbook of Mathematical Functions (chapter : Elementary Transcendental Functions), M. Abramowitz and I.A. Stegun, Ed.
fn fast_acos_4(x: f32) -> f32 {
let x1 = abs(x);
let x2 = x1 * x1;
let x3 = x2 * x1;
var s: f32;
s = -0.2121144 * x1 + 1.5707288;
s = 0.0742610 * x2 + s;
s = -0.0187293 * x3 + s;
s = sqrt(1.0 - x1) * s;
// acos function mirroring
return select(PI - s, s, x >= 0.0);
}
fn fast_atan2(y: f32, x: f32) -> f32 {
var t0 = max(abs(x), abs(y));
var t1 = min(abs(x), abs(y));
var t3 = t1 / t0;
var t4 = t3 * t3;
t0 = 0.0872929;
t0 = t0 * t4 - 0.301895;
t0 = t0 * t4 + 1.0;
t3 = t0 * t3;
t3 = select(t3, (0.5 * PI) - t3, abs(y) > abs(x));
t3 = select(t3, PI - t3, x < 0);
t3 = select(-t3, t3, y > 0);
return t3;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,181 @@
pub mod allocator;
use crate::render::{
render_asset::{PrepareAssetError, RenderAsset, RenderAssetPlugin, RenderAssets},
render_resource::TextureView,
texture::GpuImage,
RenderApp,
};
use allocator::MeshAllocatorPlugin;
use bevy_app::{App, Plugin, PostUpdate};
use bevy_asset::{AssetId, RenderAssetUsages};
use bevy_ecs::{
prelude::*,
system::{
lifetimeless::{SRes, SResMut},
SystemParamItem,
},
};
use bevy_mesh::morph::{MeshMorphWeights, MorphWeights};
use bevy_mesh::*;
use wgpu::IndexFormat;
/// Makes sure that [`Mesh`]es are extracted and prepared for the GPU.
/// Does *not* add the [`Mesh`] as an asset. Use [`MeshPlugin`] for that.
pub struct MeshRenderAssetPlugin;
impl Plugin for MeshRenderAssetPlugin {
fn build(&self, app: &mut App) {
app
// 'Mesh' must be prepared after 'Image' as meshes rely on the morph target image being ready
.add_plugins(RenderAssetPlugin::<RenderMesh, GpuImage>::default())
.add_plugins(MeshAllocatorPlugin);
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
return;
};
render_app.init_resource::<MeshVertexBufferLayouts>();
}
}
/// [Inherit weights](inherit_weights) from glTF mesh parent entity to direct
/// bevy mesh child entities (ie: glTF primitive).
pub struct MorphPlugin;
impl Plugin for MorphPlugin {
fn build(&self, app: &mut App) {
app.add_systems(PostUpdate, inherit_weights.in_set(InheritWeightSystems));
}
}
/// Bevy meshes are gltf primitives, [`MorphWeights`] on the bevy node entity
/// should be inherited by children meshes.
///
/// Only direct children are updated, to fulfill the expectations of glTF spec.
pub fn inherit_weights(
morph_nodes: Query<(&Children, &MorphWeights), (Without<Mesh3d>, Changed<MorphWeights>)>,
mut morph_primitives: Query<&mut MeshMorphWeights, With<Mesh3d>>,
) {
for (children, parent_weights) in &morph_nodes {
let mut iter = morph_primitives.iter_many_mut(children);
while let Some(mut child_weight) = iter.fetch_next() {
child_weight.clear_weights();
child_weight.extend_weights(parent_weights.weights());
}
}
}
/// The render world representation of a [`Mesh`].
#[derive(Debug, Clone)]
pub struct RenderMesh {
/// The number of vertices in the mesh.
pub vertex_count: u32,
/// Morph targets for the mesh, if present.
pub morph_targets: Option<TextureView>,
/// Information about the mesh data buffers, including whether the mesh uses
/// indices or not.
pub buffer_info: RenderMeshBufferInfo,
/// Precomputed pipeline key bits for this mesh.
pub key_bits: BaseMeshPipelineKey,
/// A reference to the vertex buffer layout.
///
/// Combined with [`RenderMesh::buffer_info`], this specifies the complete
/// layout of the buffers associated with this mesh.
pub layout: MeshVertexBufferLayoutRef,
}
impl RenderMesh {
/// Returns the primitive topology of this mesh (triangles, triangle strips,
/// etc.)
#[inline]
pub fn primitive_topology(&self) -> PrimitiveTopology {
self.key_bits.primitive_topology()
}
/// Returns true if this mesh uses an index buffer or false otherwise.
#[inline]
pub fn indexed(&self) -> bool {
matches!(self.buffer_info, RenderMeshBufferInfo::Indexed { .. })
}
}
/// The index/vertex buffer info of a [`RenderMesh`].
#[derive(Debug, Clone)]
pub enum RenderMeshBufferInfo {
Indexed {
count: u32,
index_format: IndexFormat,
},
NonIndexed,
}
impl RenderAsset for RenderMesh {
type SourceAsset = Mesh;
type Param = (
SRes<RenderAssets<GpuImage>>,
SResMut<MeshVertexBufferLayouts>,
);
#[inline]
fn asset_usage(mesh: &Self::SourceAsset) -> RenderAssetUsages {
mesh.asset_usage
}
fn byte_len(mesh: &Self::SourceAsset) -> Option<usize> {
let mut vertex_size = 0;
for attribute_data in mesh.attributes() {
let vertex_format = attribute_data.0.format;
vertex_size += vertex_format.size() as usize;
}
let vertex_count = mesh.count_vertices();
let index_bytes = mesh.get_index_buffer_bytes().map(<[_]>::len).unwrap_or(0);
Some(vertex_size * vertex_count + index_bytes)
}
/// Converts the extracted mesh into a [`RenderMesh`].
fn prepare_asset(
mesh: Self::SourceAsset,
_: AssetId<Self::SourceAsset>,
(images, mesh_vertex_buffer_layouts): &mut SystemParamItem<Self::Param>,
_: Option<&Self>,
) -> Result<Self, PrepareAssetError<Self::SourceAsset>> {
let morph_targets = match mesh.morph_targets() {
Some(mt) => {
let Some(target_image) = images.get(mt) else {
return Err(PrepareAssetError::RetryNextUpdate(mesh));
};
Some(target_image.texture_view.clone())
}
None => None,
};
let buffer_info = match mesh.indices() {
Some(indices) => RenderMeshBufferInfo::Indexed {
count: indices.len() as u32,
index_format: indices.into(),
},
None => RenderMeshBufferInfo::NonIndexed,
};
let mesh_vertex_buffer_layout =
mesh.get_mesh_vertex_buffer_layout(mesh_vertex_buffer_layouts);
let mut key_bits = BaseMeshPipelineKey::from_primitive_topology(mesh.primitive_topology());
key_bits.set(
BaseMeshPipelineKey::MORPH_TARGETS,
mesh.morph_targets().is_some(),
);
Ok(RenderMesh {
vertex_count: mesh.count_vertices() as u32,
buffer_info,
key_bits,
layout: mesh_vertex_buffer_layout,
morph_targets,
})
}
}

View File

@@ -0,0 +1,317 @@
#![expect(missing_docs, reason = "Not all docs are written yet, see #3492.")]
#![expect(unsafe_code, reason = "Unsafe code is used to improve performance.")]
#![cfg_attr(
any(docsrs, docsrs_dep),
expect(
internal_features,
reason = "rustdoc_internals is needed for fake_variadic"
)
)]
#![cfg_attr(any(docsrs, docsrs_dep), feature(doc_cfg, rustdoc_internals))]
// Copyright (c) 2019-2024 Bevy Contributors
// SPDX-License-Identifier: MIT OR Apache-2.0
//
// This code is vendored from Bevy: https://github.com/bevyengine/bevy
// Original repository: https://github.com/bevyengine/bevy
// Vendored from commit: 566358363126dd69f6e457e47f306c68f8041d2a (v0.17.2)
// Adapted for Marathon engine.
//
// This module contains vendored code from:
// - bevy_render 0.17.2 (core rendering)
// - bevy_core_pipeline 0.17.2 (render pipelines)
// - bevy_pbr 0.17.2 (materials and lighting)
//
// External dependencies (NOT vendored):
// - bevy_ecs, bevy_app, bevy_asset, bevy_transform, bevy_window, etc.
// Re-export macro from resource_macros
pub use crate::define_atomic_id;
// Re-export derive macros from macros
pub use macros::{AsBindGroup, RenderLabel, RenderSubGraph};
#[cfg(target_pointer_width = "16")]
compile_error!("bevy_render cannot compile for a 16-bit platform.");
// ============================================================================
// bevy_render modules
// ============================================================================
pub mod alpha;
pub mod batching;
pub mod camera;
pub mod diagnostic;
pub mod erased_render_asset;
pub mod experimental;
pub mod extract_component;
pub mod extract_instances;
mod extract_param;
pub mod extract_resource;
pub mod globals;
pub mod gpu_component_array_buffer;
pub mod gpu_readback;
pub mod mesh;
#[cfg(not(target_arch = "wasm32"))]
pub mod pipelined_rendering;
pub mod render_asset;
pub mod render_graph;
pub mod render_phase;
pub mod render_resource;
pub mod renderer;
pub mod settings;
pub mod storage;
pub mod sync_component;
pub mod sync_world;
pub mod texture;
pub mod view;
// ============================================================================
// bevy_core_pipeline modules
// ============================================================================
pub mod blit;
pub mod core_2d;
pub mod core_3d;
pub mod deferred;
pub mod oit;
pub mod prepass;
pub mod tonemapping;
pub mod upscaling;
pub mod skybox;
pub use skybox::Skybox;
mod fullscreen_vertex_shader;
pub use fullscreen_vertex_shader::FullscreenShader;
// ============================================================================
// bevy_pbr module
// ============================================================================
pub mod pbr;
// Re-export commonly used types from pbr for convenience
pub use pbr::StandardMaterial;
// These light and shadow types come from bevy_light
pub use bevy_light::{
AmbientLight, DirectionalLight, DirectionalLightShadowMap,
NotShadowCaster, NotShadowReceiver, PointLight, PointLightShadowMap,
SpotLight, TransmittedShadowReceiver,
};
// ============================================================================
// Re-exports from bevy_render for convenience
// ============================================================================
pub use alpha::AlphaMode;
pub use camera::CameraRenderGraph;
// These camera types come from bevy_camera, not vendored code
pub use bevy_camera::{Camera, Camera2d, Camera3d, OrthographicProjection, PerspectiveProjection, Projection, ScalingMode};
pub use extract_component::{ExtractComponent, ExtractComponentPlugin};
pub use extract_resource::{ExtractResource, ExtractResourcePlugin};
pub use bevy_mesh::{Mesh3d, Meshable};
// MeshMaterial3d is from pbr module
pub use pbr::MeshMaterial3d;
pub use render_asset::{RenderAssetPlugin, prepare_assets};
// These come from bevy_asset
pub use bevy_asset::RenderAssetUsages;
pub use render_graph::RenderGraph;
pub use render_phase::{
BinnedRenderPhase, CachedRenderPipelinePhaseItem, DrawFunctions, PhaseItem, RenderCommand,
RenderCommandState, SortedRenderPhase, TrackedRenderPass,
};
pub use render_resource::{
BindGroup, BindGroupEntries, BindGroupLayout, Buffer, BufferUsages, BufferVec,
ComputePipeline, PipelineCache, RenderPipeline, Sampler,
Texture, TextureFormat, TextureUsages,
};
// These shader types come from bevy_shader
pub use bevy_shader::{ShaderDefVal, ShaderRef};
pub use renderer::{RenderAdapter, RenderAdapterInfo, RenderDevice, RenderQueue};
pub use settings::{RenderCreation, WgpuSettings};
pub use texture::GpuImage;
// These come from bevy_image
pub use bevy_image::{BevyDefault, Image, ImageFormat, ImageSampler, TextureFormatPixelInfo};
pub use view::{
ColorGrading, ExtractedView, Msaa, ViewTarget,
};
pub use bevy_camera::Exposure;
// These come from bevy_camera
pub use bevy_camera::visibility::{RenderLayers, VisibleEntities};
// Tonemapping comes from the vendored tonemapping module
pub use tonemapping::Tonemapping;
// ============================================================================
// Prelude module (from bevy_render)
// ============================================================================
pub mod prelude {
#[doc(hidden)]
pub use crate::render::{
alpha::AlphaMode,
camera::NormalizedRenderTargetExt as _,
texture::ManualTextureViews,
view::Msaa,
ExtractSchedule,
};
}
// ============================================================================
// Additional re-exports from bevy_render
// ============================================================================
pub use extract_param::Extract;
pub use sync_world::{RenderEntity, SyncToRenderWorld};
// Re-export main plugin types
// Note: RenderPlugin is defined in bevy_render's lib.rs and wasn't vendored
// MainWorld is defined above in this file
// Re-export schedule types
#[cfg(not(target_arch = "wasm32"))]
pub use pipelined_rendering::PipelinedRenderingPlugin;
// Re-export RenderSystems and other core types
use bevy_ecs::schedule::{ScheduleLabel, SystemSet};
use bitflags::bitflags;
/// The systems sets of the default rendering schedule.
#[derive(Debug, Hash, PartialEq, Eq, Clone, SystemSet)]
pub enum RenderSystems {
ExtractCommands,
PrepareAssets,
PrepareMeshes,
ManageViews,
Queue,
QueueMeshes,
QueueSweep,
PhaseSort,
Prepare,
PrepareResources,
PrepareResourcesCollectPhaseBuffers,
PrepareResourcesFlush,
PrepareBindGroups,
Render,
Cleanup,
PostCleanup,
}
bitflags! {
/// Debugging flags that can optionally be set when constructing the renderer.
#[derive(Clone, Copy, PartialEq, Default, Debug)]
pub struct RenderDebugFlags: u8 {
const ALLOW_COPIES_FROM_INDIRECT_PARAMETERS = 1;
}
}
/// The startup schedule of the RenderApp
#[derive(ScheduleLabel, Debug, Hash, PartialEq, Eq, Clone, Default)]
pub struct ExtractSchedule;
/// The startup schedule of the [`RenderApp`]
#[derive(ScheduleLabel, Debug, Hash, PartialEq, Eq, Clone, Default)]
pub struct RenderStartup;
/// The main render schedule.
#[derive(ScheduleLabel, Debug, Hash, PartialEq, Eq, Clone, Default)]
pub struct Render;
/// A label for the rendering sub-app.
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, bevy_app::AppLabel)]
pub struct RenderApp;
use bevy_ecs::world::World;
use bevy_ecs::resource::Resource;
use core::ops::{Deref, DerefMut};
/// See [`Extract`] for more details.
#[derive(Resource, Default)]
pub struct MainWorld(World);
impl Deref for MainWorld {
type Target = World;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for MainWorld {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
/// Graph module for camera driver label
pub mod graph {
use crate::render::render_graph::RenderLabel;
#[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)]
pub struct CameraDriverLabel;
}
/// Get the Adreno GPU model number if the adapter is an Adreno GPU.
pub fn get_adreno_model(adapter_info: &renderer::RenderAdapterInfo) -> Option<u32> {
if !cfg!(target_os = "android") {
return None;
}
let adreno_model = adapter_info.name.strip_prefix("Adreno (TM) ")?;
// Take suffixes into account (like Adreno 642L).
Some(
adreno_model
.chars()
.map_while(|c| c.to_digit(10))
.fold(0, |acc, digit| acc * 10 + digit),
)
}
/// Get the Mali driver version if the adapter is a Mali GPU.
pub fn get_mali_driver_version(adapter_info: &renderer::RenderAdapterInfo) -> Option<u32> {
if !cfg!(target_os = "android") {
return None;
}
if !adapter_info.name.contains("Mali") {
return None;
}
let driver_info = &adapter_info.driver_info;
if let Some(start_pos) = driver_info.find("v1.r")
&& let Some(end_pos) = driver_info[start_pos..].find('p')
{
let start_idx = start_pos + 4; // Skip "v1.r"
let end_idx = start_pos + end_pos;
driver_info[start_idx..end_idx].parse().ok()
} else {
None
}
}
// ============================================================================
// bevy_core_pipeline plugin and re-exports
// ============================================================================
use bevy_app::{App, Plugin};
use bevy_asset::embedded_asset;
#[derive(Default)]
pub struct CorePipelinePlugin;
impl Plugin for CorePipelinePlugin {
fn build(&self, app: &mut App) {
embedded_asset!(app, "fullscreen_vertex_shader/fullscreen.wgsl");
app.add_plugins((core_2d::Core2dPlugin, core_3d::Core3dPlugin, deferred::copy_lighting_id::CopyDeferredLightingIdPlugin))
.add_plugins((
blit::BlitPlugin,
tonemapping::TonemappingPlugin,
upscaling::UpscalingPlugin,
oit::OrderIndependentTransparencyPlugin,
experimental::mip_generation::MipGenerationPlugin,
));
#[cfg(not(target_arch = "wasm32"))]
{
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
return;
};
render_app.init_resource::<FullscreenShader>();
}
}
}

View File

@@ -0,0 +1,297 @@
//! Order Independent Transparency (OIT) for 3d rendering. See [`OrderIndependentTransparencyPlugin`] for more details.
use bevy_app::prelude::*;
use bevy_camera::{Camera, Camera3d};
use bevy_ecs::{component::*, lifecycle::ComponentHook, prelude::*};
use bevy_math::UVec2;
use bevy_platform::collections::HashSet;
use bevy_platform::time::Instant;
use bevy_reflect::{std_traits::ReflectDefault, Reflect};
use crate::render::{
camera::ExtractedCamera,
extract_component::{ExtractComponent, ExtractComponentPlugin},
render_graph::{RenderGraphExt, ViewNodeRunner},
render_resource::{BufferUsages, BufferVec, DynamicUniformBuffer, ShaderType, TextureUsages},
renderer::{RenderDevice, RenderQueue},
view::Msaa,
Render, RenderApp, RenderStartup, RenderSystems,
};
use bevy_shader::load_shader_library;
use bevy_window::PrimaryWindow;
use resolve::{
node::{OitResolveNode, OitResolvePass},
OitResolvePlugin,
};
use tracing::{trace, warn};
use crate::render::core_3d::graph::{Core3d, Node3d};
/// Module that defines the necessary systems to resolve the OIT buffer and render it to the screen.
pub mod resolve;
/// Used to identify which camera will use OIT to render transparent meshes
/// and to configure OIT.
// TODO consider supporting multiple OIT techniques like WBOIT, Moment Based OIT,
// depth peeling, stochastic transparency, ray tracing etc.
// This should probably be done by adding an enum to this component.
// We use the same struct to pass on the settings to the drawing shader.
#[derive(Clone, Copy, ExtractComponent, Reflect, ShaderType)]
#[reflect(Clone, Default)]
pub struct OrderIndependentTransparencySettings {
/// Controls how many layers will be used to compute the blending.
/// The more layers you use the more memory it will use but it will also give better results.
/// 8 is generally recommended, going above 32 is probably not worth it in the vast majority of cases
pub layer_count: i32,
/// Threshold for which fragments will be added to the blending layers.
/// This can be tweaked to optimize quality / layers count. Higher values will
/// allow lower number of layers and a better performance, compromising quality.
pub alpha_threshold: f32,
}
impl Default for OrderIndependentTransparencySettings {
fn default() -> Self {
Self {
layer_count: 8,
alpha_threshold: 0.0,
}
}
}
// OrderIndependentTransparencySettings is also a Component. We explicitly implement the trait so
// we can hook on_add to issue a warning in case `layer_count` is seemingly too high.
impl Component for OrderIndependentTransparencySettings {
const STORAGE_TYPE: StorageType = StorageType::SparseSet;
type Mutability = Mutable;
fn on_add() -> Option<ComponentHook> {
Some(|world, context| {
if let Some(value) = world.get::<OrderIndependentTransparencySettings>(context.entity)
&& value.layer_count > 32
{
warn!("{}OrderIndependentTransparencySettings layer_count set to {} might be too high.",
context.caller.map(|location|format!("{location}: ")).unwrap_or_default(),
value.layer_count
);
}
})
}
}
/// A plugin that adds support for Order Independent Transparency (OIT).
/// This can correctly render some scenes that would otherwise have artifacts due to alpha blending, but uses more memory.
///
/// To enable OIT for a camera you need to add the [`OrderIndependentTransparencySettings`] component to it.
///
/// If you want to use OIT for your custom material you need to call `oit_draw(position, color)` in your fragment shader.
/// You also need to make sure that your fragment shader doesn't output any colors.
///
/// # Implementation details
/// This implementation uses 2 passes.
///
/// The first pass writes the depth and color of all the fragments to a big buffer.
/// The buffer contains N layers for each pixel, where N can be set with [`OrderIndependentTransparencySettings::layer_count`].
/// This pass is essentially a forward pass.
///
/// The second pass is a single fullscreen triangle pass that sorts all the fragments then blends them together
/// and outputs the result to the screen.
pub struct OrderIndependentTransparencyPlugin;
impl Plugin for OrderIndependentTransparencyPlugin {
fn build(&self, app: &mut App) {
load_shader_library!(app, "oit_draw.wgsl");
app.add_plugins((
ExtractComponentPlugin::<OrderIndependentTransparencySettings>::default(),
OitResolvePlugin,
))
.add_systems(Update, check_msaa)
.add_systems(Last, configure_depth_texture_usages);
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
return;
};
render_app
.add_systems(RenderStartup, init_oit_buffers)
.add_systems(
Render,
prepare_oit_buffers.in_set(RenderSystems::PrepareResources),
);
render_app
.add_render_graph_node::<ViewNodeRunner<OitResolveNode>>(Core3d, OitResolvePass)
.add_render_graph_edges(
Core3d,
(
Node3d::MainTransparentPass,
OitResolvePass,
Node3d::EndMainPass,
),
);
}
}
// WARN This should only happen for cameras with the [`OrderIndependentTransparencySettings`] component
// but when multiple cameras are present on the same window
// bevy reuses the same depth texture so we need to set this on all cameras with the same render target.
fn configure_depth_texture_usages(
p: Query<Entity, With<PrimaryWindow>>,
cameras: Query<(&Camera, Has<OrderIndependentTransparencySettings>)>,
mut new_cameras: Query<(&mut Camera3d, &Camera), Added<Camera3d>>,
) {
if new_cameras.is_empty() {
return;
}
// Find all the render target that potentially uses OIT
let primary_window = p.single().ok();
let mut render_target_has_oit = <HashSet<_>>::default();
for (camera, has_oit) in &cameras {
if has_oit {
render_target_has_oit.insert(camera.target.normalize(primary_window));
}
}
// Update the depth texture usage for cameras with a render target that has OIT
for (mut camera_3d, camera) in &mut new_cameras {
if render_target_has_oit.contains(&camera.target.normalize(primary_window)) {
let mut usages = TextureUsages::from(camera_3d.depth_texture_usages);
usages |= TextureUsages::RENDER_ATTACHMENT | TextureUsages::TEXTURE_BINDING;
camera_3d.depth_texture_usages = usages.into();
}
}
}
fn check_msaa(cameras: Query<&Msaa, With<OrderIndependentTransparencySettings>>) {
for msaa in &cameras {
if msaa.samples() > 1 {
panic!("MSAA is not supported when using OrderIndependentTransparency");
}
}
}
/// Holds the buffers that contain the data of all OIT layers.
/// We use one big buffer for the entire app. Each camera will reuse it so it will
/// always be the size of the biggest OIT enabled camera.
#[derive(Resource)]
pub struct OitBuffers {
/// The OIT layers containing depth and color for each fragments.
/// This is essentially used as a 3d array where xy is the screen coordinate and z is
/// the list of fragments rendered with OIT.
pub layers: BufferVec<UVec2>,
/// Buffer containing the index of the last layer that was written for each fragment.
pub layer_ids: BufferVec<i32>,
pub settings: DynamicUniformBuffer<OrderIndependentTransparencySettings>,
}
pub fn init_oit_buffers(
mut commands: Commands,
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
) {
// initialize buffers with something so there's a valid binding
let mut layers = BufferVec::new(BufferUsages::COPY_DST | BufferUsages::STORAGE);
layers.set_label(Some("oit_layers"));
layers.reserve(1, &render_device);
layers.write_buffer(&render_device, &render_queue);
let mut layer_ids = BufferVec::new(BufferUsages::COPY_DST | BufferUsages::STORAGE);
layer_ids.set_label(Some("oit_layer_ids"));
layer_ids.reserve(1, &render_device);
layer_ids.write_buffer(&render_device, &render_queue);
let mut settings = DynamicUniformBuffer::default();
settings.set_label(Some("oit_settings"));
commands.insert_resource(OitBuffers {
layers,
layer_ids,
settings,
});
}
#[derive(Component)]
pub struct OrderIndependentTransparencySettingsOffset {
pub offset: u32,
}
/// This creates or resizes the oit buffers for each camera.
/// It will always create one big buffer that's as big as the biggest buffer needed.
/// Cameras with smaller viewports or less layers will simply use the big buffer and ignore the rest.
pub fn prepare_oit_buffers(
mut commands: Commands,
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
cameras: Query<
(&ExtractedCamera, &OrderIndependentTransparencySettings),
(
Changed<ExtractedCamera>,
Changed<OrderIndependentTransparencySettings>,
),
>,
camera_oit_uniforms: Query<(Entity, &OrderIndependentTransparencySettings)>,
mut buffers: ResMut<OitBuffers>,
) {
// Get the max buffer size for any OIT enabled camera
let mut max_layer_ids_size = usize::MIN;
let mut max_layers_size = usize::MIN;
for (camera, settings) in &cameras {
let Some(size) = camera.physical_target_size else {
continue;
};
let layer_count = settings.layer_count as usize;
let size = (size.x * size.y) as usize;
max_layer_ids_size = max_layer_ids_size.max(size);
max_layers_size = max_layers_size.max(size * layer_count);
}
// Create or update the layers buffer based on the max size
if buffers.layers.capacity() < max_layers_size {
let start = Instant::now();
buffers.layers.reserve(max_layers_size, &render_device);
let remaining = max_layers_size - buffers.layers.capacity();
for _ in 0..remaining {
buffers.layers.push(UVec2::ZERO);
}
buffers.layers.write_buffer(&render_device, &render_queue);
trace!(
"OIT layers buffer updated in {:.01}ms with total size {} MiB",
start.elapsed().as_millis(),
buffers.layers.capacity() * size_of::<UVec2>() / 1024 / 1024,
);
}
// Create or update the layer_ids buffer based on the max size
if buffers.layer_ids.capacity() < max_layer_ids_size {
let start = Instant::now();
buffers
.layer_ids
.reserve(max_layer_ids_size, &render_device);
let remaining = max_layer_ids_size - buffers.layer_ids.capacity();
for _ in 0..remaining {
buffers.layer_ids.push(0);
}
buffers
.layer_ids
.write_buffer(&render_device, &render_queue);
trace!(
"OIT layer ids buffer updated in {:.01}ms with total size {} MiB",
start.elapsed().as_millis(),
buffers.layer_ids.capacity() * size_of::<UVec2>() / 1024 / 1024,
);
}
if let Some(mut writer) = buffers.settings.get_writer(
camera_oit_uniforms.iter().len(),
&render_device,
&render_queue,
) {
for (entity, settings) in &camera_oit_uniforms {
let offset = writer.write(settings);
commands
.entity(entity)
.insert(OrderIndependentTransparencySettingsOffset { offset });
}
}
}

View File

@@ -0,0 +1,48 @@
#define_import_path bevy_core_pipeline::oit
#import bevy_pbr::mesh_view_bindings::{view, oit_layers, oit_layer_ids, oit_settings}
#ifdef OIT_ENABLED
// Add the fragment to the oit buffer
fn oit_draw(position: vec4f, color: vec4f) {
// Don't add fully transparent fragments to the list
// because we don't want to have to sort them in the resolve pass
if color.a < oit_settings.alpha_threshold {
return;
}
// get the index of the current fragment relative to the screen size
let screen_index = i32(floor(position.x) + floor(position.y) * view.viewport.z);
// get the size of the buffer.
// It's always the size of the screen
let buffer_size = i32(view.viewport.z * view.viewport.w);
// gets the layer index of the current fragment
var layer_id = atomicAdd(&oit_layer_ids[screen_index], 1);
// exit early if we've reached the maximum amount of fragments per layer
if layer_id >= oit_settings.layers_count {
// force to store the oit_layers_count to make sure we don't
// accidentally increase the index above the maximum value
atomicStore(&oit_layer_ids[screen_index], oit_settings.layers_count);
// TODO for tail blending we should return the color here
return;
}
// get the layer_index from the screen
let layer_index = screen_index + layer_id * buffer_size;
let rgb9e5_color = bevy_pbr::rgb9e5::vec3_to_rgb9e5_(color.rgb);
let depth_alpha = pack_24bit_depth_8bit_alpha(position.z, color.a);
oit_layers[layer_index] = vec2(rgb9e5_color, depth_alpha);
}
#endif // OIT_ENABLED
fn pack_24bit_depth_8bit_alpha(depth: f32, alpha: f32) -> u32 {
let depth_bits = u32(saturate(depth) * f32(0xFFFFFFu) + 0.5);
let alpha_bits = u32(saturate(alpha) * f32(0xFFu) + 0.5);
return (depth_bits & 0xFFFFFFu) | ((alpha_bits & 0xFFu) << 24u);
}
fn unpack_24bit_depth_8bit_alpha(packed: u32) -> vec2<f32> {
let depth_bits = packed & 0xFFFFFFu;
let alpha_bits = (packed >> 24u) & 0xFFu;
return vec2(f32(depth_bits) / f32(0xFFFFFFu), f32(alpha_bits) / f32(0xFFu));
}

View File

@@ -0,0 +1,255 @@
use super::OitBuffers;
use crate::render::{oit::OrderIndependentTransparencySettings, FullscreenShader};
use bevy_app::Plugin;
use bevy_asset::{embedded_asset, load_embedded_asset, AssetServer};
use bevy_derive::Deref;
use bevy_ecs::{
entity::{EntityHashMap, EntityHashSet},
prelude::*,
};
use bevy_image::BevyDefault as _;
use crate::render::{
render_resource::{
binding_types::{storage_buffer_sized, texture_depth_2d, uniform_buffer},
BindGroup, BindGroupEntries, BindGroupLayout, BindGroupLayoutEntries, BlendComponent,
BlendState, CachedRenderPipelineId, ColorTargetState, ColorWrites, DownlevelFlags,
FragmentState, PipelineCache, RenderPipelineDescriptor, ShaderStages, TextureFormat,
},
renderer::{RenderAdapter, RenderDevice},
view::{ExtractedView, ViewTarget, ViewUniform, ViewUniforms},
Render, RenderApp, RenderSystems,
};
use bevy_shader::ShaderDefVal;
use bevy_utils::default;
use tracing::warn;
/// Contains the render node used to run the resolve pass.
pub mod node;
/// Minimum required value of `wgpu::Limits::max_storage_buffers_per_shader_stage`.
pub const OIT_REQUIRED_STORAGE_BUFFERS: u32 = 2;
/// Plugin needed to resolve the Order Independent Transparency (OIT) buffer to the screen.
pub struct OitResolvePlugin;
impl Plugin for OitResolvePlugin {
fn build(&self, app: &mut bevy_app::App) {
embedded_asset!(app, "oit_resolve.wgsl");
}
fn finish(&self, app: &mut bevy_app::App) {
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
return;
};
if !is_oit_supported(
render_app.world().resource::<RenderAdapter>(),
render_app.world().resource::<RenderDevice>(),
true,
) {
return;
}
render_app
.add_systems(
Render,
(
queue_oit_resolve_pipeline.in_set(RenderSystems::Queue),
prepare_oit_resolve_bind_group.in_set(RenderSystems::PrepareBindGroups),
),
)
.init_resource::<OitResolvePipeline>();
}
}
pub fn is_oit_supported(adapter: &RenderAdapter, device: &RenderDevice, warn: bool) -> bool {
if !adapter
.get_downlevel_capabilities()
.flags
.contains(DownlevelFlags::FRAGMENT_WRITABLE_STORAGE)
{
if warn {
warn!("OrderIndependentTransparencyPlugin not loaded. GPU lacks support: DownlevelFlags::FRAGMENT_WRITABLE_STORAGE.");
}
return false;
}
let max_storage_buffers_per_shader_stage = device.limits().max_storage_buffers_per_shader_stage;
if max_storage_buffers_per_shader_stage < OIT_REQUIRED_STORAGE_BUFFERS {
if warn {
warn!(
max_storage_buffers_per_shader_stage,
OIT_REQUIRED_STORAGE_BUFFERS,
"OrderIndependentTransparencyPlugin not loaded. RenderDevice lacks support: max_storage_buffers_per_shader_stage < OIT_REQUIRED_STORAGE_BUFFERS."
);
}
return false;
}
true
}
/// Bind group for the OIT resolve pass.
#[derive(Resource, Deref)]
pub struct OitResolveBindGroup(pub BindGroup);
/// Bind group layouts used for the OIT resolve pass.
#[derive(Resource)]
pub struct OitResolvePipeline {
/// View bind group layout.
pub view_bind_group_layout: BindGroupLayout,
/// Depth bind group layout.
pub oit_depth_bind_group_layout: BindGroupLayout,
}
impl FromWorld for OitResolvePipeline {
fn from_world(world: &mut World) -> Self {
let render_device = world.resource::<RenderDevice>();
let view_bind_group_layout = render_device.create_bind_group_layout(
"oit_resolve_bind_group_layout",
&BindGroupLayoutEntries::sequential(
ShaderStages::FRAGMENT,
(
uniform_buffer::<ViewUniform>(true),
// layers
storage_buffer_sized(false, None),
// layer ids
storage_buffer_sized(false, None),
),
),
);
let oit_depth_bind_group_layout = render_device.create_bind_group_layout(
"oit_depth_bind_group_layout",
&BindGroupLayoutEntries::single(ShaderStages::FRAGMENT, texture_depth_2d()),
);
OitResolvePipeline {
view_bind_group_layout,
oit_depth_bind_group_layout,
}
}
}
#[derive(Component, Deref, Clone, Copy)]
pub struct OitResolvePipelineId(pub CachedRenderPipelineId);
/// This key is used to cache the pipeline id and to specialize the render pipeline descriptor.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub struct OitResolvePipelineKey {
hdr: bool,
layer_count: i32,
}
pub fn queue_oit_resolve_pipeline(
mut commands: Commands,
pipeline_cache: Res<PipelineCache>,
resolve_pipeline: Res<OitResolvePipeline>,
views: Query<
(
Entity,
&ExtractedView,
&OrderIndependentTransparencySettings,
),
With<OrderIndependentTransparencySettings>,
>,
fullscreen_shader: Res<FullscreenShader>,
asset_server: Res<AssetServer>,
// Store the key with the id to make the clean up logic easier.
// This also means it will always replace the entry if the key changes so nothing to clean up.
mut cached_pipeline_id: Local<EntityHashMap<(OitResolvePipelineKey, CachedRenderPipelineId)>>,
) {
let mut current_view_entities = EntityHashSet::default();
for (e, view, oit_settings) in &views {
current_view_entities.insert(e);
let key = OitResolvePipelineKey {
hdr: view.hdr,
layer_count: oit_settings.layer_count,
};
if let Some((cached_key, id)) = cached_pipeline_id.get(&e)
&& *cached_key == key
{
commands.entity(e).insert(OitResolvePipelineId(*id));
continue;
}
let desc = specialize_oit_resolve_pipeline(
key,
&resolve_pipeline,
&fullscreen_shader,
&asset_server,
);
let pipeline_id = pipeline_cache.queue_render_pipeline(desc);
commands.entity(e).insert(OitResolvePipelineId(pipeline_id));
cached_pipeline_id.insert(e, (key, pipeline_id));
}
// Clear cache for views that don't exist anymore.
for e in cached_pipeline_id.keys().copied().collect::<Vec<_>>() {
if !current_view_entities.contains(&e) {
cached_pipeline_id.remove(&e);
}
}
}
fn specialize_oit_resolve_pipeline(
key: OitResolvePipelineKey,
resolve_pipeline: &OitResolvePipeline,
fullscreen_shader: &FullscreenShader,
asset_server: &AssetServer,
) -> RenderPipelineDescriptor {
let format = if key.hdr {
ViewTarget::TEXTURE_FORMAT_HDR
} else {
TextureFormat::bevy_default()
};
RenderPipelineDescriptor {
label: Some("oit_resolve_pipeline".into()),
layout: vec![
resolve_pipeline.view_bind_group_layout.clone(),
resolve_pipeline.oit_depth_bind_group_layout.clone(),
],
fragment: Some(FragmentState {
shader: load_embedded_asset!(asset_server, "oit_resolve.wgsl"),
shader_defs: vec![ShaderDefVal::UInt(
"LAYER_COUNT".into(),
key.layer_count as u32,
)],
targets: vec![Some(ColorTargetState {
format,
blend: Some(BlendState {
color: BlendComponent::OVER,
alpha: BlendComponent::OVER,
}),
write_mask: ColorWrites::ALL,
})],
..default()
}),
vertex: fullscreen_shader.to_vertex_state(),
..default()
}
}
pub fn prepare_oit_resolve_bind_group(
mut commands: Commands,
resolve_pipeline: Res<OitResolvePipeline>,
render_device: Res<RenderDevice>,
view_uniforms: Res<ViewUniforms>,
buffers: Res<OitBuffers>,
) {
if let (Some(binding), Some(layers_binding), Some(layer_ids_binding)) = (
view_uniforms.uniforms.binding(),
buffers.layers.binding(),
buffers.layer_ids.binding(),
) {
let bind_group = render_device.create_bind_group(
"oit_resolve_bind_group",
&resolve_pipeline.view_bind_group_layout,
&BindGroupEntries::sequential((binding.clone(), layers_binding, layer_ids_binding)),
);
commands.insert_resource(OitResolveBindGroup(bind_group));
}
}

View File

@@ -0,0 +1,88 @@
use bevy_camera::{MainPassResolutionOverride, Viewport};
use bevy_ecs::{prelude::*, query::QueryItem};
use crate::render::{
camera::ExtractedCamera,
diagnostic::RecordDiagnostics,
render_graph::{NodeRunError, RenderGraphContext, RenderLabel, ViewNode},
render_resource::{BindGroupEntries, PipelineCache, RenderPassDescriptor},
renderer::RenderContext,
view::{ViewDepthTexture, ViewTarget, ViewUniformOffset},
};
use super::{OitResolveBindGroup, OitResolvePipeline, OitResolvePipelineId};
/// Render label for the OIT resolve pass.
#[derive(RenderLabel, Debug, Clone, Hash, PartialEq, Eq)]
pub struct OitResolvePass;
/// The node that executes the OIT resolve pass.
#[derive(Default)]
pub struct OitResolveNode;
impl ViewNode for OitResolveNode {
type ViewQuery = (
&'static ExtractedCamera,
&'static ViewTarget,
&'static ViewUniformOffset,
&'static OitResolvePipelineId,
&'static ViewDepthTexture,
Option<&'static MainPassResolutionOverride>,
);
fn run(
&self,
_graph: &mut RenderGraphContext,
render_context: &mut RenderContext,
(camera, view_target, view_uniform, oit_resolve_pipeline_id, depth, resolution_override): QueryItem<
Self::ViewQuery,
>,
world: &World,
) -> Result<(), NodeRunError> {
let Some(resolve_pipeline) = world.get_resource::<OitResolvePipeline>() else {
return Ok(());
};
// resolve oit
// sorts the layers and renders the final blended color to the screen
{
let pipeline_cache = world.resource::<PipelineCache>();
let bind_group = world.resource::<OitResolveBindGroup>();
let Some(pipeline) = pipeline_cache.get_render_pipeline(oit_resolve_pipeline_id.0)
else {
return Ok(());
};
let diagnostics = render_context.diagnostic_recorder();
let depth_bind_group = render_context.render_device().create_bind_group(
"oit_resolve_depth_bind_group",
&resolve_pipeline.oit_depth_bind_group_layout,
&BindGroupEntries::single(depth.view()),
);
let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor {
label: Some("oit_resolve"),
color_attachments: &[Some(view_target.get_color_attachment())],
depth_stencil_attachment: None,
timestamp_writes: None,
occlusion_query_set: None,
});
let pass_span = diagnostics.pass_span(&mut render_pass, "oit_resolve");
if let Some(viewport) =
Viewport::from_viewport_and_override(camera.viewport.as_ref(), resolution_override)
{
render_pass.set_camera_viewport(&viewport);
}
render_pass.set_render_pipeline(pipeline);
render_pass.set_bind_group(0, bind_group, &[view_uniform.offset]);
render_pass.set_bind_group(1, &depth_bind_group, &[]);
render_pass.draw(0..3, 0..1);
pass_span.end(&mut render_pass);
}
Ok(())
}
}

View File

@@ -0,0 +1,117 @@
#import bevy_render::view::View
@group(0) @binding(0) var<uniform> view: View;
@group(0) @binding(1) var<storage, read_write> layers: array<vec2<u32>>;
@group(0) @binding(2) var<storage, read_write> layer_ids: array<atomic<i32>>;
@group(1) @binding(0) var depth: texture_depth_2d;
struct OitFragment {
color: vec3<f32>,
alpha: f32,
depth: f32,
}
// Contains all the colors and depth for this specific fragment
var<private> fragment_list: array<OitFragment, #{LAYER_COUNT}>;
struct FullscreenVertexOutput {
@builtin(position) position: vec4<f32>,
@location(0) uv: vec2<f32>,
};
@fragment
fn fragment(in: FullscreenVertexOutput) -> @location(0) vec4<f32> {
let buffer_size = i32(view.viewport.z * view.viewport.w);
let screen_index = i32(floor(in.position.x) + floor(in.position.y) * view.viewport.z);
let counter = atomicLoad(&layer_ids[screen_index]);
if counter == 0 {
reset_indices(screen_index);
// https://github.com/gfx-rs/wgpu/issues/4416
if true {
discard;
}
return vec4(0.0);
} else {
// Load depth for manual depth testing.
// This is necessary because early z doesn't seem to trigger in the transparent pass.
// This should be done during the draw pass so those fragments simply don't exist in the list,
// but this requires a bigger refactor
let d = textureLoad(depth, vec2<i32>(in.position.xy), 0);
let result = sort(screen_index, buffer_size, d);
reset_indices(screen_index);
return result.color;
}
}
// Resets all indices to 0.
// This means we don't have to clear the entire layers buffer
fn reset_indices(screen_index: i32) {
atomicStore(&layer_ids[screen_index], 0);
layers[screen_index] = vec2(0u);
}
struct SortResult {
color: vec4f,
depth: f32,
}
fn sort(screen_index: i32, buffer_size: i32, opaque_depth: f32) -> SortResult {
var counter = atomicLoad(&layer_ids[screen_index]);
// fill list
for (var i = 0; i < counter; i += 1) {
let fragment = layers[screen_index + buffer_size * i];
// unpack color/alpha/depth
let color = bevy_pbr::rgb9e5::rgb9e5_to_vec3_(fragment.x);
let depth_alpha = bevy_core_pipeline::oit::unpack_24bit_depth_8bit_alpha(fragment.y);
fragment_list[i].color = color;
fragment_list[i].alpha = depth_alpha.y;
fragment_list[i].depth = depth_alpha.x;
}
// bubble sort the list based on the depth
for (var i = counter; i >= 0; i -= 1) {
for (var j = 0; j < i; j += 1) {
if fragment_list[j].depth < fragment_list[j + 1].depth {
// swap
let temp = fragment_list[j + 1];
fragment_list[j + 1] = fragment_list[j];
fragment_list[j] = temp;
}
}
}
// resolve blend
var final_color = vec4(0.0);
for (var i = 0; i <= counter; i += 1) {
// depth testing
// This needs to happen here because we can only stop iterating if the fragment is
// occluded by something opaque and the fragments need to be sorted first
if fragment_list[i].depth < opaque_depth {
break;
}
let color = fragment_list[i].color;
let alpha = fragment_list[i].alpha;
var base_color = vec4(color.rgb * alpha, alpha);
final_color = blend(final_color, base_color);
if final_color.a == 1.0 {
break;
}
}
var result: SortResult;
result.color = final_color;
result.depth = fragment_list[0].depth;
return result;
}
// OVER operator using premultiplied alpha
// see: https://en.wikipedia.org/wiki/Alpha_compositing
fn blend(color_a: vec4<f32>, color_b: vec4<f32>) -> vec4<f32> {
let final_color = color_a.rgb + (1.0 - color_a.a) * color_b.rgb;
let alpha = color_a.a + (1.0 - color_a.a) * color_b.a;
return vec4(final_color.rgb, alpha);
}

View File

@@ -0,0 +1,65 @@
#import bevy_pbr::{
mesh_view_types::{Lights, DirectionalLight},
atmosphere::{
types::{Atmosphere, AtmosphereSettings},
bindings::{atmosphere, settings, view, lights, aerial_view_lut_out},
functions::{
sample_transmittance_lut, sample_atmosphere, rayleigh, henyey_greenstein,
sample_multiscattering_lut, AtmosphereSample, sample_local_inscattering,
uv_to_ndc, max_atmosphere_distance, uv_to_ray_direction,
MIDPOINT_RATIO, get_view_position
},
}
}
@group(0) @binding(13) var aerial_view_lut_out: texture_storage_3d<rgba16float, write>;
@compute
@workgroup_size(16, 16, 1)
fn main(@builtin(global_invocation_id) idx: vec3<u32>) {
if any(idx.xy > settings.aerial_view_lut_size.xy) { return; }
let uv = (vec2<f32>(idx.xy) + 0.5) / vec2<f32>(settings.aerial_view_lut_size.xy);
let ray_dir = uv_to_ray_direction(uv);
let world_pos = get_view_position();
let r = length(world_pos);
let t_max = settings.aerial_view_lut_max_distance;
var prev_t = 0.0;
var total_inscattering = vec3(0.0);
var throughput = vec3(1.0);
for (var slice_i: u32 = 0; slice_i < settings.aerial_view_lut_size.z; slice_i++) {
for (var step_i: u32 = 0; step_i < settings.aerial_view_lut_samples; step_i++) {
let t_i = t_max * (f32(slice_i) + ((f32(step_i) + MIDPOINT_RATIO) / f32(settings.aerial_view_lut_samples))) / f32(settings.aerial_view_lut_size.z);
let dt = (t_i - prev_t);
prev_t = t_i;
let sample_pos = world_pos + ray_dir * t_i;
let local_r = length(sample_pos);
let local_up = normalize(sample_pos);
let local_atmosphere = sample_atmosphere(local_r);
let sample_optical_depth = local_atmosphere.extinction * dt;
let sample_transmittance = exp(-sample_optical_depth);
// evaluate one segment of the integral
var inscattering = sample_local_inscattering(local_atmosphere, ray_dir, sample_pos);
// Analytical integration of the single scattering term in the radiance transfer equation
let s_int = (inscattering - inscattering * sample_transmittance) / local_atmosphere.extinction;
total_inscattering += throughput * s_int;
throughput *= sample_transmittance;
if all(throughput < vec3(0.001)) {
break;
}
}
// Store in log space to allow linear interpolation of exponential values between slices
let log_inscattering = log(max(total_inscattering, vec3(1e-6)));
textureStore(aerial_view_lut_out, vec3(vec2<u32>(idx.xy), slice_i), vec4(log_inscattering, 0.0));
}
}

View File

@@ -0,0 +1,22 @@
#define_import_path bevy_pbr::atmosphere::bindings
#import bevy_render::view::View;
#import bevy_pbr::{
mesh_view_types::Lights,
atmosphere::types::{Atmosphere, AtmosphereSettings, AtmosphereTransforms}
}
@group(0) @binding(0) var<uniform> atmosphere: Atmosphere;
@group(0) @binding(1) var<uniform> settings: AtmosphereSettings;
@group(0) @binding(2) var<uniform> atmosphere_transforms: AtmosphereTransforms;
@group(0) @binding(3) var<uniform> view: View;
@group(0) @binding(4) var<uniform> lights: Lights;
@group(0) @binding(5) var transmittance_lut: texture_2d<f32>;
@group(0) @binding(6) var transmittance_lut_sampler: sampler;
@group(0) @binding(7) var multiscattering_lut: texture_2d<f32>;
@group(0) @binding(8) var multiscattering_lut_sampler: sampler;
@group(0) @binding(9) var sky_view_lut: texture_2d<f32>;
@group(0) @binding(10) var sky_view_lut_sampler: sampler;
@group(0) @binding(11) var aerial_view_lut: texture_3d<f32>;
@group(0) @binding(12) var aerial_view_lut_sampler: sampler;

View File

@@ -0,0 +1,139 @@
// Copyright (c) 2017 Eric Bruneton
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. Neither the name of the copyright holders nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
// THE POSSIBILITY OF SUCH DAMAGE.
//
// Precomputed Atmospheric Scattering
// Copyright (c) 2008 INRIA
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. Neither the name of the copyright holders nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
// THE POSSIBILITY OF SUCH DAMAGE.
#define_import_path bevy_pbr::atmosphere::bruneton_functions
#import bevy_pbr::atmosphere::{
types::Atmosphere,
bindings::atmosphere,
}
// Mapping from view height (r) and zenith cos angle (mu) to UV coordinates in the transmittance LUT
// Assuming r between ground and top atmosphere boundary, and mu= cos(zenith_angle)
// Chosen to increase precision near the ground and to work around a discontinuity at the horizon
// See Bruneton and Neyret 2008, "Precomputed Atmospheric Scattering" section 4
fn transmittance_lut_r_mu_to_uv(r: f32, mu: f32) -> vec2<f32> {
// Distance along a horizontal ray from the ground to the top atmosphere boundary
let H = sqrt(atmosphere.top_radius * atmosphere.top_radius - atmosphere.bottom_radius * atmosphere.bottom_radius);
// Distance from a point at height r to the horizon
// ignore the case where r <= atmosphere.bottom_radius
let rho = sqrt(max(r * r - atmosphere.bottom_radius * atmosphere.bottom_radius, 0.0));
// Distance from a point at height r to the top atmosphere boundary at zenith angle mu
let d = distance_to_top_atmosphere_boundary(r, mu);
// Minimum and maximum distance to the top atmosphere boundary from a point at height r
let d_min = atmosphere.top_radius - r; // length of the ray straight up to the top atmosphere boundary
let d_max = rho + H; // length of the ray to the top atmosphere boundary and grazing the horizon
let u = (d - d_min) / (d_max - d_min);
let v = rho / H;
return vec2<f32>(u, v);
}
// Inverse of the mapping above, mapping from UV coordinates in the transmittance LUT to view height (r) and zenith cos angle (mu)
fn transmittance_lut_uv_to_r_mu(uv: vec2<f32>) -> vec2<f32> {
// Distance to top atmosphere boundary for a horizontal ray at ground level
let H = sqrt(atmosphere.top_radius * atmosphere.top_radius - atmosphere.bottom_radius * atmosphere.bottom_radius);
// Distance to the horizon, from which we can compute r:
let rho = H * uv.y;
let r = sqrt(rho * rho + atmosphere.bottom_radius * atmosphere.bottom_radius);
// Distance to the top atmosphere boundary for the ray (r,mu), and its minimum
// and maximum values over all mu- obtained for (r,1) and (r,mu_horizon) -
// from which we can recover mu:
let d_min = atmosphere.top_radius - r;
let d_max = rho + H;
let d = d_min + uv.x * (d_max - d_min);
var mu: f32;
if d == 0.0 {
mu = 1.0;
} else {
mu = (H * H - rho * rho - d * d) / (2.0 * r * d);
}
mu = clamp(mu, -1.0, 1.0);
return vec2<f32>(r, mu);
}
/// Simplified ray-sphere intersection
/// where:
/// Ray origin, o = [0,0,r] with r <= atmosphere.top_radius
/// mu is the cosine of spherical coordinate theta (-1.0 <= mu <= 1.0)
/// so ray direction in spherical coordinates is [1,acos(mu),0] which needs to be converted to cartesian
/// Direction of ray, u = [0,sqrt(1-mu*mu),mu]
/// Center of sphere, c = [0,0,0]
/// Radius of sphere, r = atmosphere.top_radius
/// This function solves the quadratic equation for line-sphere intersection simplified under these assumptions
fn distance_to_top_atmosphere_boundary(r: f32, mu: f32) -> f32 {
// ignore the case where r > atmosphere.top_radius
let positive_discriminant = max(r * r * (mu * mu - 1.0) + atmosphere.top_radius * atmosphere.top_radius, 0.0);
return max(-r * mu + sqrt(positive_discriminant), 0.0);
}
/// Simplified ray-sphere intersection
/// as above for intersections with the ground
fn distance_to_bottom_atmosphere_boundary(r: f32, mu: f32) -> f32 {
let positive_discriminant = max(r * r * (mu * mu - 1.0) + atmosphere.bottom_radius * atmosphere.bottom_radius, 0.0);
return max(-r * mu - sqrt(positive_discriminant), 0.0);
}
fn ray_intersects_ground(r: f32, mu: f32) -> bool {
return mu < 0.0 && r * r * (mu * mu - 1.0) + atmosphere.bottom_radius * atmosphere.bottom_radius >= 0.0;
}

View File

@@ -0,0 +1,332 @@
use crate::render::pbr::{
resources::{
AtmosphereSamplers, AtmosphereTextures, AtmosphereTransform, AtmosphereTransforms,
AtmosphereTransformsOffset,
},
GpuAtmosphereSettings, GpuLights, LightMeta, ViewLightsUniformOffset,
};
use bevy_asset::{load_embedded_asset, AssetServer, Assets, Handle, RenderAssetUsages};
use bevy_ecs::{
component::Component,
entity::Entity,
query::{QueryState, With, Without},
resource::Resource,
system::{lifetimeless::Read, Commands, Query, Res, ResMut},
world::{FromWorld, World},
};
use bevy_image::Image;
use bevy_light::{AtmosphereEnvironmentMapLight, GeneratedEnvironmentMapLight};
use bevy_math::{Quat, UVec2};
use crate::render::{
extract_component::{ComponentUniforms, DynamicUniformIndex, ExtractComponent},
render_asset::RenderAssets,
render_graph::{Node, NodeRunError, RenderGraphContext},
render_resource::{binding_types::*, *},
renderer::{RenderContext, RenderDevice},
texture::{CachedTexture, GpuImage},
view::{ViewUniform, ViewUniformOffset, ViewUniforms},
};
use bevy_utils::default;
use tracing::warn;
use super::Atmosphere;
// Render world representation of an environment map light for the atmosphere
#[derive(Component, ExtractComponent, Clone)]
pub struct AtmosphereEnvironmentMap {
pub environment_map: Handle<Image>,
pub size: UVec2,
}
#[derive(Component)]
pub struct AtmosphereProbeTextures {
pub environment: TextureView,
pub transmittance_lut: CachedTexture,
pub multiscattering_lut: CachedTexture,
pub sky_view_lut: CachedTexture,
pub aerial_view_lut: CachedTexture,
}
#[derive(Component)]
pub(crate) struct AtmosphereProbeBindGroups {
pub environment: BindGroup,
}
#[derive(Resource)]
pub struct AtmosphereProbeLayouts {
pub environment: BindGroupLayout,
}
#[derive(Resource)]
pub struct AtmosphereProbePipeline {
pub environment: CachedComputePipelineId,
}
pub fn init_atmosphere_probe_layout(mut commands: Commands, render_device: Res<RenderDevice>) {
let environment = render_device.create_bind_group_layout(
"environment_bind_group_layout",
&BindGroupLayoutEntries::sequential(
ShaderStages::COMPUTE,
(
uniform_buffer::<Atmosphere>(true),
uniform_buffer::<GpuAtmosphereSettings>(true),
uniform_buffer::<AtmosphereTransform>(true),
uniform_buffer::<ViewUniform>(true),
uniform_buffer::<GpuLights>(true),
texture_2d(TextureSampleType::Float { filterable: true }), //transmittance lut and sampler
sampler(SamplerBindingType::Filtering),
texture_2d(TextureSampleType::Float { filterable: true }), //multiscattering lut and sampler
sampler(SamplerBindingType::Filtering),
texture_2d(TextureSampleType::Float { filterable: true }), //sky view lut and sampler
sampler(SamplerBindingType::Filtering),
texture_3d(TextureSampleType::Float { filterable: true }), //aerial view lut ans sampler
sampler(SamplerBindingType::Filtering),
texture_storage_2d_array(
// output 2D array texture
TextureFormat::Rgba16Float,
StorageTextureAccess::WriteOnly,
),
),
),
);
commands.insert_resource(AtmosphereProbeLayouts { environment });
}
pub(super) fn prepare_atmosphere_probe_bind_groups(
probes: Query<(Entity, &AtmosphereProbeTextures), With<AtmosphereEnvironmentMap>>,
render_device: Res<RenderDevice>,
layouts: Res<AtmosphereProbeLayouts>,
samplers: Res<AtmosphereSamplers>,
view_uniforms: Res<ViewUniforms>,
lights_uniforms: Res<LightMeta>,
atmosphere_transforms: Res<AtmosphereTransforms>,
atmosphere_uniforms: Res<ComponentUniforms<Atmosphere>>,
settings_uniforms: Res<ComponentUniforms<GpuAtmosphereSettings>>,
mut commands: Commands,
) {
for (entity, textures) in &probes {
let environment = render_device.create_bind_group(
"environment_bind_group",
&layouts.environment,
&BindGroupEntries::sequential((
atmosphere_uniforms.binding().unwrap(),
settings_uniforms.binding().unwrap(),
atmosphere_transforms.uniforms().binding().unwrap(),
view_uniforms.uniforms.binding().unwrap(),
lights_uniforms.view_gpu_lights.binding().unwrap(),
&textures.transmittance_lut.default_view,
&samplers.transmittance_lut,
&textures.multiscattering_lut.default_view,
&samplers.multiscattering_lut,
&textures.sky_view_lut.default_view,
&samplers.sky_view_lut,
&textures.aerial_view_lut.default_view,
&samplers.aerial_view_lut,
&textures.environment,
)),
);
commands
.entity(entity)
.insert(AtmosphereProbeBindGroups { environment });
}
}
pub(super) fn prepare_probe_textures(
view_textures: Query<&AtmosphereTextures, With<Atmosphere>>,
probes: Query<
(Entity, &AtmosphereEnvironmentMap),
(
With<AtmosphereEnvironmentMap>,
Without<AtmosphereProbeTextures>,
),
>,
gpu_images: Res<RenderAssets<GpuImage>>,
mut commands: Commands,
) {
for (probe, render_env_map) in &probes {
let environment = gpu_images.get(&render_env_map.environment_map).unwrap();
// create a cube view
let environment_view = environment.texture.create_view(&TextureViewDescriptor {
dimension: Some(TextureViewDimension::D2Array),
..Default::default()
});
// Get the first view entity's textures to borrow
if let Some(view_textures) = view_textures.iter().next() {
commands.entity(probe).insert(AtmosphereProbeTextures {
environment: environment_view,
transmittance_lut: view_textures.transmittance_lut.clone(),
multiscattering_lut: view_textures.multiscattering_lut.clone(),
sky_view_lut: view_textures.sky_view_lut.clone(),
aerial_view_lut: view_textures.aerial_view_lut.clone(),
});
}
}
}
pub fn init_atmosphere_probe_pipeline(
pipeline_cache: Res<PipelineCache>,
layouts: Res<AtmosphereProbeLayouts>,
asset_server: Res<AssetServer>,
mut commands: Commands,
) {
let environment = pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor {
label: Some("environment_pipeline".into()),
layout: vec![layouts.environment.clone()],
shader: load_embedded_asset!(asset_server.as_ref(), "environment.wgsl"),
..default()
});
commands.insert_resource(AtmosphereProbePipeline { environment });
}
// Ensure power-of-two dimensions to avoid edge update issues on cubemap faces
pub fn validate_environment_map_size(size: UVec2) -> UVec2 {
let new_size = UVec2::new(
size.x.max(1).next_power_of_two(),
size.y.max(1).next_power_of_two(),
);
if new_size != size {
warn!(
"Non-power-of-two AtmosphereEnvironmentMapLight size {}, correcting to {new_size}",
size
);
}
new_size
}
pub fn prepare_atmosphere_probe_components(
probes: Query<(Entity, &AtmosphereEnvironmentMapLight), (Without<AtmosphereEnvironmentMap>,)>,
mut commands: Commands,
mut images: ResMut<Assets<Image>>,
) {
for (entity, env_map_light) in &probes {
// Create a cubemap image in the main world that we can reference
let new_size = validate_environment_map_size(env_map_light.size);
let mut environment_image = Image::new_fill(
Extent3d {
width: new_size.x,
height: new_size.y,
depth_or_array_layers: 6,
},
TextureDimension::D2,
&[0; 8],
TextureFormat::Rgba16Float,
RenderAssetUsages::all(),
);
environment_image.texture_view_descriptor = Some(TextureViewDescriptor {
dimension: Some(TextureViewDimension::Cube),
..Default::default()
});
environment_image.texture_descriptor.usage = TextureUsages::TEXTURE_BINDING
| TextureUsages::STORAGE_BINDING
| TextureUsages::COPY_SRC;
// Add the image to assets to get a handle
let environment_handle = images.add(environment_image);
commands.entity(entity).insert(AtmosphereEnvironmentMap {
environment_map: environment_handle.clone(),
size: new_size,
});
commands
.entity(entity)
.insert(GeneratedEnvironmentMapLight {
environment_map: environment_handle,
intensity: env_map_light.intensity,
rotation: Quat::IDENTITY,
affects_lightmapped_mesh_diffuse: env_map_light.affects_lightmapped_mesh_diffuse,
});
}
}
pub(super) struct EnvironmentNode {
main_view_query: QueryState<(
Read<DynamicUniformIndex<Atmosphere>>,
Read<DynamicUniformIndex<GpuAtmosphereSettings>>,
Read<AtmosphereTransformsOffset>,
Read<ViewUniformOffset>,
Read<ViewLightsUniformOffset>,
)>,
probe_query: QueryState<(
Read<AtmosphereProbeBindGroups>,
Read<AtmosphereEnvironmentMap>,
)>,
}
impl FromWorld for EnvironmentNode {
fn from_world(world: &mut World) -> Self {
Self {
main_view_query: QueryState::new(world),
probe_query: QueryState::new(world),
}
}
}
impl Node for EnvironmentNode {
fn update(&mut self, world: &mut World) {
self.main_view_query.update_archetypes(world);
self.probe_query.update_archetypes(world);
}
fn run(
&self,
graph: &mut RenderGraphContext,
render_context: &mut RenderContext,
world: &World,
) -> Result<(), NodeRunError> {
let pipeline_cache = world.resource::<PipelineCache>();
let pipelines = world.resource::<AtmosphereProbePipeline>();
let view_entity = graph.view_entity();
let Some(environment_pipeline) = pipeline_cache.get_compute_pipeline(pipelines.environment)
else {
return Ok(());
};
let (Ok((
atmosphere_uniforms_offset,
settings_uniforms_offset,
atmosphere_transforms_offset,
view_uniforms_offset,
lights_uniforms_offset,
)),) = (self.main_view_query.get_manual(world, view_entity),)
else {
return Ok(());
};
for (bind_groups, env_map_light) in self.probe_query.iter_manual(world) {
let mut pass =
render_context
.command_encoder()
.begin_compute_pass(&ComputePassDescriptor {
label: Some("environment_pass"),
timestamp_writes: None,
});
pass.set_pipeline(environment_pipeline);
pass.set_bind_group(
0,
&bind_groups.environment,
&[
atmosphere_uniforms_offset.index(),
settings_uniforms_offset.index(),
atmosphere_transforms_offset.index(),
view_uniforms_offset.offset,
lights_uniforms_offset.offset,
],
);
pass.dispatch_workgroups(
env_map_light.size.x / 8,
env_map_light.size.y / 8,
6, // 6 cubemap faces
);
}
Ok(())
}
}

View File

@@ -0,0 +1,39 @@
#import bevy_pbr::{
atmosphere::{
functions::{direction_world_to_atmosphere, sample_sky_view_lut, get_view_position},
},
utils::sample_cube_dir
}
@group(0) @binding(13) var output: texture_storage_2d_array<rgba16float, write>;
@compute @workgroup_size(8, 8, 1)
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
let dimensions = textureDimensions(output);
let slice_index = global_id.z;
if (global_id.x >= dimensions.x || global_id.y >= dimensions.y || slice_index >= 6u) {
return;
}
// Calculate normalized UV coordinates for this pixel
let uv = vec2<f32>(
(f32(global_id.x) + 0.5) / f32(dimensions.x),
(f32(global_id.y) + 0.5) / f32(dimensions.y)
);
var ray_dir_ws = sample_cube_dir(uv, slice_index);
// invert the z direction to account for cubemaps being lefthanded
ray_dir_ws.z = -ray_dir_ws.z;
let world_pos = get_view_position();
let r = length(world_pos);
let up = normalize(world_pos);
let ray_dir_as = direction_world_to_atmosphere(ray_dir_ws.xyz, up);
let inscattering = sample_sky_view_lut(r, ray_dir_as);
let color = vec4<f32>(inscattering, 1.0);
textureStore(output, vec2<i32>(global_id.xy), i32(slice_index), color);
}

View File

@@ -0,0 +1,528 @@
#define_import_path bevy_pbr::atmosphere::functions
#import bevy_render::maths::{PI, HALF_PI, PI_2, fast_acos, fast_acos_4, fast_atan2, ray_sphere_intersect}
#import bevy_pbr::atmosphere::{
types::Atmosphere,
bindings::{
atmosphere, settings, view, lights, transmittance_lut, transmittance_lut_sampler,
multiscattering_lut, multiscattering_lut_sampler, sky_view_lut, sky_view_lut_sampler,
aerial_view_lut, aerial_view_lut_sampler, atmosphere_transforms
},
bruneton_functions::{
transmittance_lut_r_mu_to_uv, transmittance_lut_uv_to_r_mu,
ray_intersects_ground, distance_to_top_atmosphere_boundary,
distance_to_bottom_atmosphere_boundary
},
}
// NOTE FOR CONVENTIONS:
// r:
// radius, or distance from planet center
//
// altitude:
// distance from planet **surface**
//
// mu:
// cosine of the zenith angle of a ray with
// respect to the planet normal
//
// atmosphere space:
// abbreviated as "as" (contrast with vs, cs, ws), this space is similar
// to view space, but with the camera positioned horizontally on the planet
// surface, so the horizon is a horizontal line centered vertically in the
// frame. This enables the non-linear latitude parametrization the paper uses
// to concentrate detail near the horizon
// CONSTANTS
const FRAC_PI: f32 = 0.3183098862; // 1 / π
const FRAC_2_PI: f32 = 0.15915494309; // 1 / (2π)
const FRAC_3_16_PI: f32 = 0.0596831036594607509; // 3 / (16π)
const FRAC_4_PI: f32 = 0.07957747154594767; // 1 / (4π)
const ROOT_2: f32 = 1.41421356; // √2
const EPSILON: f32 = 1.0; // 1 meter
// During raymarching, each segment is sampled at a single point. This constant determines
// where in the segment that sample is taken (0.0 = start, 0.5 = middle, 1.0 = end).
// We use 0.3 to sample closer to the start of each segment, which better approximates
// the exponential falloff of atmospheric density.
const MIDPOINT_RATIO: f32 = 0.3;
// LUT UV PARAMETERIZATIONS
fn unit_to_sub_uvs(val: vec2<f32>, resolution: vec2<f32>) -> vec2<f32> {
return (val + 0.5f / resolution) * (resolution / (resolution + 1.0f));
}
fn sub_uvs_to_unit(val: vec2<f32>, resolution: vec2<f32>) -> vec2<f32> {
return (val - 0.5f / resolution) * (resolution / (resolution - 1.0f));
}
fn multiscattering_lut_r_mu_to_uv(r: f32, mu: f32) -> vec2<f32> {
let u = 0.5 + 0.5 * mu;
let v = saturate((r - atmosphere.bottom_radius) / (atmosphere.top_radius - atmosphere.bottom_radius)); //TODO
return unit_to_sub_uvs(vec2(u, v), vec2<f32>(settings.multiscattering_lut_size));
}
fn multiscattering_lut_uv_to_r_mu(uv: vec2<f32>) -> vec2<f32> {
let adj_uv = sub_uvs_to_unit(uv, vec2<f32>(settings.multiscattering_lut_size));
let r = mix(atmosphere.bottom_radius, atmosphere.top_radius, adj_uv.y);
let mu = adj_uv.x * 2 - 1;
return vec2(r, mu);
}
fn sky_view_lut_r_mu_azimuth_to_uv(r: f32, mu: f32, azimuth: f32) -> vec2<f32> {
let u = (azimuth * FRAC_2_PI) + 0.5;
let v_horizon = sqrt(r * r - atmosphere.bottom_radius * atmosphere.bottom_radius);
let cos_beta = v_horizon / r;
// Using fast_acos_4 for better precision at small angles
// to avoid artifacts at the horizon
let beta = fast_acos_4(cos_beta);
let horizon_zenith = PI - beta;
let view_zenith = fast_acos_4(mu);
// Apply non-linear transformation to compress more texels
// near the horizon where high-frequency details matter most
// l is latitude in [-π/2, π/2] and v is texture coordinate in [0,1]
let l = view_zenith - horizon_zenith;
let abs_l = abs(l);
let v = 0.5 + 0.5 * sign(l) * sqrt(abs_l / HALF_PI);
return unit_to_sub_uvs(vec2(u, v), vec2<f32>(settings.sky_view_lut_size));
}
fn sky_view_lut_uv_to_zenith_azimuth(r: f32, uv: vec2<f32>) -> vec2<f32> {
let adj_uv = sub_uvs_to_unit(vec2(uv.x, 1.0 - uv.y), vec2<f32>(settings.sky_view_lut_size));
let azimuth = (adj_uv.x - 0.5) * PI_2;
// Horizon parameters
let v_horizon = sqrt(r * r - atmosphere.bottom_radius * atmosphere.bottom_radius);
let cos_beta = v_horizon / r;
let beta = fast_acos_4(cos_beta);
let horizon_zenith = PI - beta;
// Inverse of horizon-detail mapping to recover original latitude from texture coordinate
let t = abs(2.0 * (adj_uv.y - 0.5));
let l = sign(adj_uv.y - 0.5) * HALF_PI * t * t;
return vec2(horizon_zenith - l, azimuth);
}
// LUT SAMPLING
fn sample_transmittance_lut(r: f32, mu: f32) -> vec3<f32> {
let uv = transmittance_lut_r_mu_to_uv(r, mu);
return textureSampleLevel(transmittance_lut, transmittance_lut_sampler, uv, 0.0).rgb;
}
// NOTICE: This function is copyrighted by Eric Bruneton and INRIA, and falls
// under the license reproduced in bruneton_functions.wgsl (variant of MIT license)
//
// FIXME: this function should be in bruneton_functions.wgsl, but because naga_oil doesn't
// support cyclic imports it's stuck here
fn sample_transmittance_lut_segment(r: f32, mu: f32, t: f32) -> vec3<f32> {
let r_t = get_local_r(r, mu, t);
let mu_t = clamp((r * mu + t) / r_t, -1.0, 1.0);
if ray_intersects_ground(r, mu) {
return min(
sample_transmittance_lut(r_t, -mu_t) / sample_transmittance_lut(r, -mu),
vec3(1.0)
);
} else {
return min(
sample_transmittance_lut(r, mu) / sample_transmittance_lut(r_t, mu_t), vec3(1.0)
);
}
}
fn sample_multiscattering_lut(r: f32, mu: f32) -> vec3<f32> {
let uv = multiscattering_lut_r_mu_to_uv(r, mu);
return textureSampleLevel(multiscattering_lut, multiscattering_lut_sampler, uv, 0.0).rgb;
}
fn sample_sky_view_lut(r: f32, ray_dir_as: vec3<f32>) -> vec3<f32> {
let mu = ray_dir_as.y;
let azimuth = fast_atan2(ray_dir_as.x, -ray_dir_as.z);
let uv = sky_view_lut_r_mu_azimuth_to_uv(r, mu, azimuth);
return textureSampleLevel(sky_view_lut, sky_view_lut_sampler, uv, 0.0).rgb;
}
fn ndc_to_camera_dist(ndc: vec3<f32>) -> f32 {
let view_pos = view.view_from_clip * vec4(ndc, 1.0);
let t = length(view_pos.xyz / view_pos.w) * settings.scene_units_to_m;
return t;
}
// RGB channels: total inscattered light along the camera ray to the current sample.
// A channel: average transmittance across all wavelengths to the current sample.
fn sample_aerial_view_lut(uv: vec2<f32>, t: f32) -> vec3<f32> {
let t_max = settings.aerial_view_lut_max_distance;
let num_slices = f32(settings.aerial_view_lut_size.z);
// Each texel stores the value of the scattering integral over the whole slice,
// which requires us to offset the w coordinate by half a slice. For
// example, if we wanted the value of the integral at the boundary between slices,
// we'd need to sample at the center of the previous slice, and vice-versa for
// sampling in the center of a slice.
let uvw = vec3(uv, saturate(t / t_max - 0.5 / num_slices));
let sample = textureSampleLevel(aerial_view_lut, aerial_view_lut_sampler, uvw, 0.0);
// Since sampling anywhere between w=0 and w=t_slice will clamp to the first slice,
// we need to do a linear step over the first slice towards zero at the camera's
// position to recover the correct integral value.
let t_slice = t_max / num_slices;
let fade = saturate(t / t_slice);
// Recover the values from log space
return exp(sample.rgb) * fade;
}
// PHASE FUNCTIONS
// -(L . V) == (L . -V). -V here is our ray direction, which points away from the view
// instead of towards it (which would be the *view direction*, V)
// evaluates the rayleigh phase function, which describes the likelihood
// of a rayleigh scattering event scattering light from the light direction towards the view
fn rayleigh(neg_LdotV: f32) -> f32 {
return FRAC_3_16_PI * (1 + (neg_LdotV * neg_LdotV));
}
// evaluates the henyey-greenstein phase function, which describes the likelihood
// of a mie scattering event scattering light from the light direction towards the view
fn henyey_greenstein(neg_LdotV: f32) -> f32 {
let g = atmosphere.mie_asymmetry;
let denom = 1.0 + g * g - 2.0 * g * neg_LdotV;
return FRAC_4_PI * (1.0 - g * g) / (denom * sqrt(denom));
}
// ATMOSPHERE SAMPLING
struct AtmosphereSample {
/// units: m^-1
rayleigh_scattering: vec3<f32>,
/// units: m^-1
mie_scattering: f32,
/// the sum of scattering and absorption. Since the phase function doesn't
/// matter for this, we combine rayleigh and mie extinction to a single
// value.
//
/// units: m^-1
extinction: vec3<f32>
}
/// Samples atmosphere optical densities at a given radius
fn sample_atmosphere(r: f32) -> AtmosphereSample {
let altitude = clamp(r, atmosphere.bottom_radius, atmosphere.top_radius) - atmosphere.bottom_radius;
// atmosphere values at altitude
let mie_density = exp(-atmosphere.mie_density_exp_scale * altitude);
let rayleigh_density = exp(-atmosphere.rayleigh_density_exp_scale * altitude);
var ozone_density: f32 = max(0.0, 1.0 - (abs(altitude - atmosphere.ozone_layer_altitude) / (atmosphere.ozone_layer_width * 0.5)));
let mie_scattering = mie_density * atmosphere.mie_scattering;
let mie_absorption = mie_density * atmosphere.mie_absorption;
let mie_extinction = mie_scattering + mie_absorption;
let rayleigh_scattering = rayleigh_density * atmosphere.rayleigh_scattering;
// no rayleigh absorption
// rayleigh extinction is the sum of scattering and absorption
// ozone doesn't contribute to scattering
let ozone_absorption = ozone_density * atmosphere.ozone_absorption;
var sample: AtmosphereSample;
sample.rayleigh_scattering = rayleigh_scattering;
sample.mie_scattering = mie_scattering;
sample.extinction = rayleigh_scattering + mie_extinction + ozone_absorption;
return sample;
}
/// evaluates L_scat, equation 3 in the paper, which gives the total single-order scattering towards the view at a single point
fn sample_local_inscattering(local_atmosphere: AtmosphereSample, ray_dir: vec3<f32>, world_pos: vec3<f32>) -> vec3<f32> {
let local_r = length(world_pos);
let local_up = normalize(world_pos);
var inscattering = vec3(0.0);
for (var light_i: u32 = 0u; light_i < lights.n_directional_lights; light_i++) {
let light = &lights.directional_lights[light_i];
let mu_light = dot((*light).direction_to_light, local_up);
// -(L . V) == (L . -V). -V here is our ray direction, which points away from the view
// instead of towards it (as is the convention for V)
let neg_LdotV = dot((*light).direction_to_light, ray_dir);
// Phase functions give the proportion of light
// scattered towards the camera for each scattering type
let rayleigh_phase = rayleigh(neg_LdotV);
let mie_phase = henyey_greenstein(neg_LdotV);
let scattering_coeff = local_atmosphere.rayleigh_scattering * rayleigh_phase + local_atmosphere.mie_scattering * mie_phase;
let transmittance_to_light = sample_transmittance_lut(local_r, mu_light);
let shadow_factor = transmittance_to_light * f32(!ray_intersects_ground(local_r, mu_light));
// Transmittance from scattering event to light source
let scattering_factor = shadow_factor * scattering_coeff;
// Additive factor from the multiscattering LUT
let psi_ms = sample_multiscattering_lut(local_r, mu_light);
let multiscattering_factor = psi_ms * (local_atmosphere.rayleigh_scattering + local_atmosphere.mie_scattering);
inscattering += (*light).color.rgb * (scattering_factor + multiscattering_factor);
}
return inscattering;
}
fn sample_sun_radiance(ray_dir_ws: vec3<f32>) -> vec3<f32> {
let view_pos = get_view_position();
let r = length(view_pos);
let up = normalize(view_pos);
let mu_view = dot(ray_dir_ws, up);
let shadow_factor = f32(!ray_intersects_ground(r, mu_view));
var sun_radiance = vec3(0.0);
for (var light_i: u32 = 0u; light_i < lights.n_directional_lights; light_i++) {
let light = &lights.directional_lights[light_i];
let neg_LdotV = dot((*light).direction_to_light, ray_dir_ws);
let angle_to_sun = fast_acos(clamp(neg_LdotV, -1.0, 1.0));
let w = max(0.5 * fwidth(angle_to_sun), 1e-6);
let sun_angular_size = (*light).sun_disk_angular_size;
let sun_intensity = (*light).sun_disk_intensity;
if sun_angular_size > 0.0 && sun_intensity > 0.0 {
let factor = 1 - smoothstep(sun_angular_size * 0.5 - w, sun_angular_size * 0.5 + w, angle_to_sun);
let sun_solid_angle = (sun_angular_size * sun_angular_size) * 0.25 * PI;
sun_radiance += ((*light).color.rgb / sun_solid_angle) * sun_intensity * factor * shadow_factor;
}
}
return sun_radiance;
}
// TRANSFORM UTILITIES
fn max_atmosphere_distance(r: f32, mu: f32) -> f32 {
let t_top = distance_to_top_atmosphere_boundary(r, mu);
let t_bottom = distance_to_bottom_atmosphere_boundary(r, mu);
let hits = ray_intersects_ground(r, mu);
return mix(t_top, t_bottom, f32(hits));
}
/// Returns the observer's position in the atmosphere
fn get_view_position() -> vec3<f32> {
var world_pos = view.world_position * settings.scene_units_to_m + vec3(0.0, atmosphere.bottom_radius, 0.0);
// If the camera is underground, clamp it to the ground surface along the local up.
let r = length(world_pos);
// Nudge r above ground to avoid sqrt cancellation, zero-length segments where
// r is equal to bottom_radius, which show up as black pixels
let min_radius = atmosphere.bottom_radius + EPSILON;
if r < min_radius {
let up = normalize(world_pos);
world_pos = up * min_radius;
}
return world_pos;
}
// We assume the `up` vector at the view position is the y axis, since the world is locally flat/level.
// t = distance along view ray in atmosphere space
// NOTE: this means that if your world is actually spherical, this will be wrong.
fn get_local_up(r: f32, t: f32, ray_dir: vec3<f32>) -> vec3<f32> {
return normalize(vec3(0.0, r, 0.0) + t * ray_dir);
}
// Given a ray starting at radius r, with mu = cos(zenith angle),
// and a t = distance along the ray, gives the new radius at point t
fn get_local_r(r: f32, mu: f32, t: f32) -> f32 {
return sqrt(t * t + 2.0 * r * mu * t + r * r);
}
// Convert uv [0.0 .. 1.0] coordinate to ndc space xy [-1.0 .. 1.0]
fn uv_to_ndc(uv: vec2<f32>) -> vec2<f32> {
return uv * vec2(2.0, -2.0) + vec2(-1.0, 1.0);
}
/// Convert ndc space xy coordinate [-1.0 .. 1.0] to uv [0.0 .. 1.0]
fn ndc_to_uv(ndc: vec2<f32>) -> vec2<f32> {
return ndc * vec2(0.5, -0.5) + vec2(0.5);
}
/// Converts a direction in world space to atmosphere space
fn direction_world_to_atmosphere(dir_ws: vec3<f32>, up: vec3<f32>) -> vec3<f32> {
// Camera forward in world space (-Z in view to world transform)
let forward_ws = (view.world_from_view * vec4(0.0, 0.0, -1.0, 0.0)).xyz;
let tangent_z = normalize(up * dot(forward_ws, up) - forward_ws);
let tangent_x = cross(up, tangent_z);
return vec3(
dot(dir_ws, tangent_x),
dot(dir_ws, up),
dot(dir_ws, tangent_z),
);
}
/// Converts a direction in atmosphere space to world space
fn direction_atmosphere_to_world(dir_as: vec3<f32>) -> vec3<f32> {
let dir_ws = atmosphere_transforms.world_from_atmosphere * vec4(dir_as, 0.0);
return dir_ws.xyz;
}
// Modified from skybox.wgsl. For this pass we don't need to apply a separate sky transform or consider camera viewport.
// Returns a normalized ray direction in world space.
fn uv_to_ray_direction(uv: vec2<f32>) -> vec3<f32> {
// Using world positions of the fragment and camera to calculate a ray direction
// breaks down at large translations. This code only needs to know the ray direction.
// The ray direction is along the direction from the camera to the fragment position.
// In view space, the camera is at the origin, so the view space ray direction is
// along the direction of the fragment position - (0,0,0) which is just the
// fragment position.
// Use the position on the near clipping plane to avoid -inf world position
// because the far plane of an infinite reverse projection is at infinity.
let view_position_homogeneous = view.view_from_clip * vec4(
uv_to_ndc(uv),
1.0,
1.0,
);
let view_ray_direction = view_position_homogeneous.xyz / view_position_homogeneous.w;
// Transforming the view space ray direction by the inverse view matrix, transforms the
// direction to world space. Note that the w element is set to 0.0, as this is a
// vector direction, not a position, That causes the matrix multiplication to ignore
// the translations from the view matrix.
let ray_direction = (view.world_from_view * vec4(view_ray_direction, 0.0)).xyz;
return normalize(ray_direction);
}
fn zenith_azimuth_to_ray_dir(zenith: f32, azimuth: f32) -> vec3<f32> {
let sin_zenith = sin(zenith);
let mu = cos(zenith);
let sin_azimuth = sin(azimuth);
let cos_azimuth = cos(azimuth);
return vec3(sin_azimuth * sin_zenith, mu, -cos_azimuth * sin_zenith);
}
struct RaymarchSegment {
start: f32,
end: f32,
}
fn get_raymarch_segment(r: f32, mu: f32) -> RaymarchSegment {
// Get both intersection points with atmosphere
let atmosphere_intersections = ray_sphere_intersect(r, mu, atmosphere.top_radius);
let ground_intersections = ray_sphere_intersect(r, mu, atmosphere.bottom_radius);
var segment: RaymarchSegment;
if r < atmosphere.bottom_radius {
// Inside planet - start from bottom of atmosphere
segment.start = ground_intersections.y; // Use second intersection point with ground
segment.end = atmosphere_intersections.y;
} else if r < atmosphere.top_radius {
// Inside atmosphere
segment.start = 0.0;
segment.end = select(atmosphere_intersections.y, ground_intersections.x, ray_intersects_ground(r, mu));
} else {
// Outside atmosphere
if atmosphere_intersections.x < 0.0 {
// No intersection with atmosphere
return segment;
}
// Start at atmosphere entry, end at exit or ground
segment.start = atmosphere_intersections.x;
segment.end = select(atmosphere_intersections.y, ground_intersections.x, ray_intersects_ground(r, mu));
}
return segment;
}
struct RaymarchResult {
inscattering: vec3<f32>,
transmittance: vec3<f32>,
}
fn raymarch_atmosphere(
pos: vec3<f32>,
ray_dir: vec3<f32>,
t_max: f32,
max_samples: u32,
uv: vec2<f32>,
ground: bool
) -> RaymarchResult {
let r = length(pos);
let up = normalize(pos);
let mu = dot(ray_dir, up);
// Optimization: Reduce sample count at close proximity to the scene
let sample_count = mix(1.0, f32(max_samples), saturate(t_max * 0.01));
let segment = get_raymarch_segment(r, mu);
let t_start = segment.start;
var t_end = segment.end;
t_end = min(t_end, t_max);
let t_total = t_end - t_start;
var result: RaymarchResult;
result.inscattering = vec3(0.0);
result.transmittance = vec3(1.0);
// Skip if invalid segment
if t_total <= 0.0 {
return result;
}
var prev_t = t_start;
var optical_depth = vec3(0.0);
for (var s = 0.0; s < sample_count; s += 1.0) {
// Linear distribution from atmosphere entry to exit/ground
let t_i = t_start + t_total * (s + MIDPOINT_RATIO) / sample_count;
let dt_i = (t_i - prev_t);
prev_t = t_i;
let sample_pos = pos + ray_dir * t_i;
let local_r = length(sample_pos);
let local_up = normalize(sample_pos);
let local_atmosphere = sample_atmosphere(local_r);
let sample_optical_depth = local_atmosphere.extinction * dt_i;
optical_depth += sample_optical_depth;
let sample_transmittance = exp(-sample_optical_depth);
let inscattering = sample_local_inscattering(
local_atmosphere,
ray_dir,
sample_pos
);
let s_int = (inscattering - inscattering * sample_transmittance) / local_atmosphere.extinction;
result.inscattering += result.transmittance * s_int;
result.transmittance *= sample_transmittance;
if all(result.transmittance < vec3(0.001)) {
break;
}
}
// include reflected luminance from planet ground
if ground && ray_intersects_ground(r, mu) {
for (var light_i: u32 = 0u; light_i < lights.n_directional_lights; light_i++) {
let light = &lights.directional_lights[light_i];
let light_dir = (*light).direction_to_light;
let light_color = (*light).color.rgb;
let transmittance_to_ground = exp(-optical_depth);
// position on the sphere and get the sphere normal (up)
let sphere_point = pos + ray_dir * t_end;
let sphere_normal = normalize(sphere_point);
let mu_light = dot(light_dir, sphere_normal);
let transmittance_to_light = sample_transmittance_lut(0.0, mu_light);
let light_luminance = transmittance_to_light * max(mu_light, 0.0) * light_color;
// Normalized Lambert BRDF
let ground_luminance = transmittance_to_ground * atmosphere.ground_albedo / PI;
result.inscattering += ground_luminance * light_luminance;
}
}
return result;
}

View File

@@ -0,0 +1,510 @@
//! Procedural Atmospheric Scattering.
//!
//! This plugin implements [Hillaire's 2020 paper](https://sebh.github.io/publications/egsr2020.pdf)
//! on real-time atmospheric scattering. While it *will* work simply as a
//! procedural skybox, it also does much more. It supports dynamic time-of-
//! -day, multiple directional lights, and since it's applied as a post-processing
//! effect *on top* of the existing skybox, a starry skybox would automatically
//! show based on the time of day. Scattering in front of terrain (similar
//! to distance fog, but more complex) is handled as well, and takes into
//! account the directional light color and direction.
//!
//! Adding the [`Atmosphere`] component to a 3d camera will enable the effect,
//! which by default is set to look similar to Earth's atmosphere. See the
//! documentation on the component itself for information regarding its fields.
//!
//! Performance-wise, the effect should be fairly cheap since the LUTs (Look
//! Up Tables) that encode most of the data are small, and take advantage of the
//! fact that the atmosphere is symmetric. Performance is also proportional to
//! the number of directional lights in the scene. In order to tune
//! performance more finely, the [`AtmosphereSettings`] camera component
//! manages the size of each LUT and the sample count for each ray.
//!
//! Given how similar it is to [`crate::volumetric_fog`], it might be expected
//! that these two modules would work together well. However for now using both
//! at once is untested, and might not be physically accurate. These may be
//! integrated into a single module in the future.
//!
//! On web platforms, atmosphere rendering will look slightly different. Specifically, when calculating how light travels
//! through the atmosphere, we use a simpler averaging technique instead of the more
//! complex blending operations. This difference will be resolved for WebGPU in a future release.
//!
//! [Shadertoy]: https://www.shadertoy.com/view/slSXRW
//!
//! [Unreal Engine Implementation]: https://github.com/sebh/UnrealEngineSkyAtmosphere
mod environment;
mod node;
pub mod resources;
use bevy_app::{App, Plugin, Update};
use bevy_asset::embedded_asset;
use bevy_camera::Camera3d;
use crate::render::core_3d::graph::Node3d;
use bevy_ecs::{
component::Component,
query::{Changed, QueryItem, With},
schedule::IntoScheduleConfigs,
system::{lifetimeless::Read, Query},
};
use bevy_math::{UVec2, UVec3, Vec3};
use bevy_reflect::{std_traits::ReflectDefault, Reflect};
use crate::render::{
extract_component::UniformComponentPlugin,
render_resource::{DownlevelFlags, ShaderType, SpecializedRenderPipelines},
view::Hdr,
RenderStartup,
};
use crate::render::{
extract_component::{ExtractComponent, ExtractComponentPlugin},
render_graph::{RenderGraphExt, ViewNodeRunner},
render_resource::{TextureFormat, TextureUsages},
renderer::RenderAdapter,
Render, RenderApp, RenderSystems,
};
use crate::render::core_3d::graph::Core3d;
use bevy_shader::load_shader_library;
use environment::{
init_atmosphere_probe_layout, init_atmosphere_probe_pipeline,
prepare_atmosphere_probe_bind_groups, prepare_atmosphere_probe_components,
prepare_probe_textures, AtmosphereEnvironmentMap, EnvironmentNode,
};
use resources::{
prepare_atmosphere_transforms, queue_render_sky_pipelines, AtmosphereTransforms,
RenderSkyBindGroupLayouts,
};
use tracing::warn;
use self::{
node::{AtmosphereLutsNode, AtmosphereNode, RenderSkyNode},
resources::{
prepare_atmosphere_bind_groups, prepare_atmosphere_textures, AtmosphereBindGroupLayouts,
AtmosphereLutPipelines, AtmosphereSamplers,
},
};
#[doc(hidden)]
pub struct AtmospherePlugin;
impl Plugin for AtmospherePlugin {
fn build(&self, app: &mut App) {
load_shader_library!(app, "types.wgsl");
load_shader_library!(app, "functions.wgsl");
load_shader_library!(app, "bruneton_functions.wgsl");
load_shader_library!(app, "bindings.wgsl");
embedded_asset!(app, "transmittance_lut.wgsl");
embedded_asset!(app, "multiscattering_lut.wgsl");
embedded_asset!(app, "sky_view_lut.wgsl");
embedded_asset!(app, "aerial_view_lut.wgsl");
embedded_asset!(app, "render_sky.wgsl");
embedded_asset!(app, "environment.wgsl");
app.add_plugins((
ExtractComponentPlugin::<Atmosphere>::default(),
ExtractComponentPlugin::<GpuAtmosphereSettings>::default(),
ExtractComponentPlugin::<AtmosphereEnvironmentMap>::default(),
UniformComponentPlugin::<Atmosphere>::default(),
UniformComponentPlugin::<GpuAtmosphereSettings>::default(),
))
.add_systems(Update, prepare_atmosphere_probe_components);
}
fn finish(&self, app: &mut App) {
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
return;
};
let render_adapter = render_app.world().resource::<RenderAdapter>();
if !render_adapter
.get_downlevel_capabilities()
.flags
.contains(DownlevelFlags::COMPUTE_SHADERS)
{
warn!("AtmospherePlugin not loaded. GPU lacks support for compute shaders.");
return;
}
if !render_adapter
.get_texture_format_features(TextureFormat::Rgba16Float)
.allowed_usages
.contains(TextureUsages::STORAGE_BINDING)
{
warn!("AtmospherePlugin not loaded. GPU lacks support: TextureFormat::Rgba16Float does not support TextureUsages::STORAGE_BINDING.");
return;
}
render_app
.init_resource::<AtmosphereBindGroupLayouts>()
.init_resource::<RenderSkyBindGroupLayouts>()
.init_resource::<AtmosphereSamplers>()
.init_resource::<AtmosphereLutPipelines>()
.init_resource::<AtmosphereTransforms>()
.init_resource::<SpecializedRenderPipelines<RenderSkyBindGroupLayouts>>()
.add_systems(
RenderStartup,
(init_atmosphere_probe_layout, init_atmosphere_probe_pipeline).chain(),
)
.add_systems(
Render,
(
configure_camera_depth_usages.in_set(RenderSystems::ManageViews),
queue_render_sky_pipelines.in_set(RenderSystems::Queue),
prepare_atmosphere_textures.in_set(RenderSystems::PrepareResources),
prepare_probe_textures
.in_set(RenderSystems::PrepareResources)
.after(prepare_atmosphere_textures),
prepare_atmosphere_probe_bind_groups.in_set(RenderSystems::PrepareBindGroups),
prepare_atmosphere_transforms.in_set(RenderSystems::PrepareResources),
prepare_atmosphere_bind_groups.in_set(RenderSystems::PrepareBindGroups),
),
)
.add_render_graph_node::<ViewNodeRunner<AtmosphereLutsNode>>(
Core3d,
AtmosphereNode::RenderLuts,
)
.add_render_graph_edges(
Core3d,
(
// END_PRE_PASSES -> RENDER_LUTS -> MAIN_PASS
Node3d::EndPrepasses,
AtmosphereNode::RenderLuts,
Node3d::StartMainPass,
),
)
.add_render_graph_node::<ViewNodeRunner<RenderSkyNode>>(
Core3d,
AtmosphereNode::RenderSky,
)
.add_render_graph_node::<EnvironmentNode>(Core3d, AtmosphereNode::Environment)
.add_render_graph_edges(
Core3d,
(
Node3d::MainOpaquePass,
AtmosphereNode::RenderSky,
Node3d::MainTransparentPass,
),
);
}
}
/// This component describes the atmosphere of a planet, and when added to a camera
/// will enable atmospheric scattering for that camera. This is only compatible with
/// HDR cameras.
///
/// Most atmospheric particles scatter and absorb light in two main ways:
///
/// Rayleigh scattering occurs among very small particles, like individual gas
/// molecules. It's wavelength dependent, and causes colors to separate out as
/// light travels through the atmosphere. These particles *don't* absorb light.
///
/// Mie scattering occurs among slightly larger particles, like dust and sea spray.
/// These particles *do* absorb light, but Mie scattering and absorption is
/// *wavelength independent*.
///
/// Ozone acts differently from the other two, and is special-cased because
/// it's very important to the look of Earth's atmosphere. It's wavelength
/// dependent, but only *absorbs* light. Also, while the density of particles
/// participating in Rayleigh and Mie scattering falls off roughly exponentially
/// from the planet's surface, ozone only exists in a band centered at a fairly
/// high altitude.
#[derive(Clone, Component, Reflect, ShaderType)]
#[require(AtmosphereSettings, Hdr)]
#[reflect(Clone, Default)]
pub struct Atmosphere {
/// Radius of the planet
///
/// units: m
pub bottom_radius: f32,
/// Radius at which we consider the atmosphere to 'end' for our
/// calculations (from center of planet)
///
/// units: m
pub top_radius: f32,
/// An approximation of the average albedo (or color, roughly) of the
/// planet's surface. This is used when calculating multiscattering.
///
/// units: N/A
pub ground_albedo: Vec3,
/// The rate of falloff of rayleigh particulate with respect to altitude:
/// optical density = exp(-rayleigh_density_exp_scale * altitude in meters).
///
/// THIS VALUE MUST BE POSITIVE
///
/// units: N/A
pub rayleigh_density_exp_scale: f32,
/// The scattering optical density of rayleigh particulate, or how
/// much light it scatters per meter
///
/// units: m^-1
pub rayleigh_scattering: Vec3,
/// The rate of falloff of mie particulate with respect to altitude:
/// optical density = exp(-mie_density_exp_scale * altitude in meters)
///
/// THIS VALUE MUST BE POSITIVE
///
/// units: N/A
pub mie_density_exp_scale: f32,
/// The scattering optical density of mie particulate, or how much light
/// it scatters per meter.
///
/// units: m^-1
pub mie_scattering: f32,
/// The absorbing optical density of mie particulate, or how much light
/// it absorbs per meter.
///
/// units: m^-1
pub mie_absorption: f32,
/// The "asymmetry" of mie scattering, or how much light tends to scatter
/// forwards, rather than backwards or to the side.
///
/// domain: (-1, 1)
/// units: N/A
pub mie_asymmetry: f32, //the "asymmetry" value of the phase function, unitless. Domain: (-1, 1)
/// The altitude at which the ozone layer is centered.
///
/// units: m
pub ozone_layer_altitude: f32,
/// The width of the ozone layer
///
/// units: m
pub ozone_layer_width: f32,
/// The optical density of ozone, or how much of each wavelength of
/// light it absorbs per meter.
///
/// units: m^-1
pub ozone_absorption: Vec3,
}
impl Atmosphere {
pub const EARTH: Atmosphere = Atmosphere {
bottom_radius: 6_360_000.0,
top_radius: 6_460_000.0,
ground_albedo: Vec3::splat(0.3),
rayleigh_density_exp_scale: 1.0 / 8_000.0,
rayleigh_scattering: Vec3::new(5.802e-6, 13.558e-6, 33.100e-6),
mie_density_exp_scale: 1.0 / 1_200.0,
mie_scattering: 3.996e-6,
mie_absorption: 0.444e-6,
mie_asymmetry: 0.8,
ozone_layer_altitude: 25_000.0,
ozone_layer_width: 30_000.0,
ozone_absorption: Vec3::new(0.650e-6, 1.881e-6, 0.085e-6),
};
pub fn with_density_multiplier(mut self, mult: f32) -> Self {
self.rayleigh_scattering *= mult;
self.mie_scattering *= mult;
self.mie_absorption *= mult;
self.ozone_absorption *= mult;
self
}
}
impl Default for Atmosphere {
fn default() -> Self {
Self::EARTH
}
}
impl ExtractComponent for Atmosphere {
type QueryData = Read<Atmosphere>;
type QueryFilter = With<Camera3d>;
type Out = Atmosphere;
fn extract_component(item: QueryItem<'_, '_, Self::QueryData>) -> Option<Self::Out> {
Some(item.clone())
}
}
/// This component controls the resolution of the atmosphere LUTs, and
/// how many samples are used when computing them.
///
/// The transmittance LUT stores the transmittance from a point in the
/// atmosphere to the outer edge of the atmosphere in any direction,
/// parametrized by the point's radius and the cosine of the zenith angle
/// of the ray.
///
/// The multiscattering LUT stores the factor representing luminance scattered
/// towards the camera with scattering order >2, parametrized by the point's radius
/// and the cosine of the zenith angle of the sun.
///
/// The sky-view lut is essentially the actual skybox, storing the light scattered
/// towards the camera in every direction with a cubemap.
///
/// The aerial-view lut is a 3d LUT fit to the view frustum, which stores the luminance
/// scattered towards the camera at each point (RGB channels), alongside the average
/// transmittance to that point (A channel).
#[derive(Clone, Component, Reflect)]
#[reflect(Clone, Default)]
pub struct AtmosphereSettings {
/// The size of the transmittance LUT
pub transmittance_lut_size: UVec2,
/// The size of the multiscattering LUT
pub multiscattering_lut_size: UVec2,
/// The size of the sky-view LUT.
pub sky_view_lut_size: UVec2,
/// The size of the aerial-view LUT.
pub aerial_view_lut_size: UVec3,
/// The number of points to sample along each ray when
/// computing the transmittance LUT
pub transmittance_lut_samples: u32,
/// The number of rays to sample when computing each
/// pixel of the multiscattering LUT
pub multiscattering_lut_dirs: u32,
/// The number of points to sample when integrating along each
/// multiscattering ray
pub multiscattering_lut_samples: u32,
/// The number of points to sample along each ray when
/// computing the sky-view LUT.
pub sky_view_lut_samples: u32,
/// The number of points to sample for each slice along the z-axis
/// of the aerial-view LUT.
pub aerial_view_lut_samples: u32,
/// The maximum distance from the camera to evaluate the
/// aerial view LUT. The slices along the z-axis of the
/// texture will be distributed linearly from the camera
/// to this value.
///
/// units: m
pub aerial_view_lut_max_distance: f32,
/// A conversion factor between scene units and meters, used to
/// ensure correctness at different length scales.
pub scene_units_to_m: f32,
/// The number of points to sample for each fragment when the using
/// ray marching to render the sky
pub sky_max_samples: u32,
/// The rendering method to use for the atmosphere.
pub rendering_method: AtmosphereMode,
}
impl Default for AtmosphereSettings {
fn default() -> Self {
Self {
transmittance_lut_size: UVec2::new(256, 128),
transmittance_lut_samples: 40,
multiscattering_lut_size: UVec2::new(32, 32),
multiscattering_lut_dirs: 64,
multiscattering_lut_samples: 20,
sky_view_lut_size: UVec2::new(400, 200),
sky_view_lut_samples: 16,
aerial_view_lut_size: UVec3::new(32, 32, 32),
aerial_view_lut_samples: 10,
aerial_view_lut_max_distance: 3.2e4,
scene_units_to_m: 1.0,
sky_max_samples: 16,
rendering_method: AtmosphereMode::LookupTexture,
}
}
}
#[derive(Clone, Component, Reflect, ShaderType)]
#[reflect(Default)]
pub struct GpuAtmosphereSettings {
pub transmittance_lut_size: UVec2,
pub multiscattering_lut_size: UVec2,
pub sky_view_lut_size: UVec2,
pub aerial_view_lut_size: UVec3,
pub transmittance_lut_samples: u32,
pub multiscattering_lut_dirs: u32,
pub multiscattering_lut_samples: u32,
pub sky_view_lut_samples: u32,
pub aerial_view_lut_samples: u32,
pub aerial_view_lut_max_distance: f32,
pub scene_units_to_m: f32,
pub sky_max_samples: u32,
pub rendering_method: u32,
}
impl Default for GpuAtmosphereSettings {
fn default() -> Self {
AtmosphereSettings::default().into()
}
}
impl From<AtmosphereSettings> for GpuAtmosphereSettings {
fn from(s: AtmosphereSettings) -> Self {
Self {
transmittance_lut_size: s.transmittance_lut_size,
multiscattering_lut_size: s.multiscattering_lut_size,
sky_view_lut_size: s.sky_view_lut_size,
aerial_view_lut_size: s.aerial_view_lut_size,
transmittance_lut_samples: s.transmittance_lut_samples,
multiscattering_lut_dirs: s.multiscattering_lut_dirs,
multiscattering_lut_samples: s.multiscattering_lut_samples,
sky_view_lut_samples: s.sky_view_lut_samples,
aerial_view_lut_samples: s.aerial_view_lut_samples,
aerial_view_lut_max_distance: s.aerial_view_lut_max_distance,
scene_units_to_m: s.scene_units_to_m,
sky_max_samples: s.sky_max_samples,
rendering_method: s.rendering_method as u32,
}
}
}
impl ExtractComponent for GpuAtmosphereSettings {
type QueryData = Read<AtmosphereSettings>;
type QueryFilter = (With<Camera3d>, With<Atmosphere>);
type Out = GpuAtmosphereSettings;
fn extract_component(item: QueryItem<'_, '_, Self::QueryData>) -> Option<Self::Out> {
Some(item.clone().into())
}
}
fn configure_camera_depth_usages(
mut cameras: Query<&mut Camera3d, (Changed<Camera3d>, With<Atmosphere>)>,
) {
for mut camera in &mut cameras {
camera.depth_texture_usages.0 |= TextureUsages::TEXTURE_BINDING.bits();
}
}
/// Selects how the atmosphere is rendered. Choose based on scene scale and
/// volumetric shadow quality, and based on performance needs.
#[repr(u32)]
#[derive(Clone, Default, Reflect, Copy)]
pub enum AtmosphereMode {
/// High-performance solution tailored to scenes that are mostly inside of the atmosphere.
/// Uses a set of lookup textures to approximate scattering integration.
/// Slightly less accurate for very long-distance/space views (lighting precision
/// tapers as the camera moves far from the scene origin) and for sharp volumetric
/// (cloud/fog) shadows.
#[default]
LookupTexture = 0,
/// Slower, more accurate rendering method for any type of scene.
/// Integrates the scattering numerically with raymarching and produces sharp volumetric
/// (cloud/fog) shadows.
/// Best for cinematic shots, planets seen from orbit, and scenes requiring
/// accurate long-distance lighting.
Raymarched = 1,
}

View File

@@ -0,0 +1,139 @@
#import bevy_pbr::{
mesh_view_types::{Lights, DirectionalLight},
atmosphere::{
types::{Atmosphere, AtmosphereSettings},
bindings::{atmosphere, settings},
functions::{
multiscattering_lut_uv_to_r_mu, sample_transmittance_lut,
get_local_r, get_local_up, sample_atmosphere, FRAC_4_PI,
max_atmosphere_distance, rayleigh, henyey_greenstein,
zenith_azimuth_to_ray_dir,
},
bruneton_functions::{
distance_to_top_atmosphere_boundary, distance_to_bottom_atmosphere_boundary, ray_intersects_ground
}
}
}
#import bevy_render::maths::{PI,PI_2}
const PHI_2: vec2<f32> = vec2(1.3247179572447460259609088, 1.7548776662466927600495087);
@group(0) @binding(13) var multiscattering_lut_out: texture_storage_2d<rgba16float, write>;
fn s2_sequence(n: u32) -> vec2<f32> {
return fract(0.5 + f32(n) * PHI_2);
}
// Lambert equal-area projection.
fn uv_to_sphere(uv: vec2<f32>) -> vec3<f32> {
let phi = PI_2 * uv.y;
let sin_lambda = 2 * uv.x - 1;
let cos_lambda = sqrt(1 - sin_lambda * sin_lambda);
return vec3(cos_lambda * cos(phi), cos_lambda * sin(phi), sin_lambda);
}
// Shared memory arrays for workgroup communication
var<workgroup> multi_scat_shared_mem: array<vec3<f32>, 64>;
var<workgroup> l_shared_mem: array<vec3<f32>, 64>;
@compute
@workgroup_size(1, 1, 64)
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
var uv = (vec2<f32>(global_id.xy) + 0.5) / vec2<f32>(settings.multiscattering_lut_size);
let r_mu = multiscattering_lut_uv_to_r_mu(uv);
let light_dir = normalize(vec3(0.0, r_mu.y, -1.0));
let ray_dir = uv_to_sphere(s2_sequence(global_id.z));
let ms_sample = sample_multiscattering_dir(r_mu.x, ray_dir, light_dir);
// Calculate the contribution for this sample
let sphere_solid_angle = 4.0 * PI;
let sample_weight = sphere_solid_angle / 64.0;
multi_scat_shared_mem[global_id.z] = ms_sample.f_ms * sample_weight;
l_shared_mem[global_id.z] = ms_sample.l_2 * sample_weight;
workgroupBarrier();
// Parallel reduction bitshift to the right to divide by 2 each step
for (var step = 32u; step > 0u; step >>= 1u) {
if global_id.z < step {
multi_scat_shared_mem[global_id.z] += multi_scat_shared_mem[global_id.z + step];
l_shared_mem[global_id.z] += l_shared_mem[global_id.z + step];
}
workgroupBarrier();
}
if global_id.z > 0u {
return;
}
// Apply isotropic phase function
let f_ms = multi_scat_shared_mem[0] * FRAC_4_PI;
let l_2 = l_shared_mem[0] * FRAC_4_PI;
// Equation 10 from the paper: Geometric series for infinite scattering
let psi_ms = l_2 / (1.0 - f_ms);
textureStore(multiscattering_lut_out, global_id.xy, vec4<f32>(psi_ms, 1.0));
}
struct MultiscatteringSample {
l_2: vec3<f32>,
f_ms: vec3<f32>,
};
fn sample_multiscattering_dir(r: f32, ray_dir: vec3<f32>, light_dir: vec3<f32>) -> MultiscatteringSample {
// get the cosine of the zenith angle of the view direction with respect to the light direction
let mu_view = ray_dir.y;
let t_max = max_atmosphere_distance(r, mu_view);
let dt = t_max / f32(settings.multiscattering_lut_samples);
var optical_depth = vec3<f32>(0.0);
var l_2 = vec3(0.0);
var f_ms = vec3(0.0);
var throughput = vec3(1.0);
for (var i: u32 = 0u; i < settings.multiscattering_lut_samples; i++) {
let t_i = dt * (f32(i) + 0.5);
let local_r = get_local_r(r, mu_view, t_i);
let local_up = get_local_up(r, t_i, ray_dir);
let local_atmosphere = sample_atmosphere(local_r);
let sample_optical_depth = local_atmosphere.extinction * dt;
let sample_transmittance = exp(-sample_optical_depth);
optical_depth += sample_optical_depth;
let mu_light = dot(light_dir, local_up);
let scattering_no_phase = local_atmosphere.rayleigh_scattering + local_atmosphere.mie_scattering;
let ms = scattering_no_phase;
let ms_int = (ms - ms * sample_transmittance) / local_atmosphere.extinction;
f_ms += throughput * ms_int;
let transmittance_to_light = sample_transmittance_lut(local_r, mu_light);
let shadow_factor = transmittance_to_light * f32(!ray_intersects_ground(local_r, mu_light));
let s = scattering_no_phase * shadow_factor * FRAC_4_PI;
let s_int = (s - s * sample_transmittance) / local_atmosphere.extinction;
l_2 += throughput * s_int;
throughput *= sample_transmittance;
if all(throughput < vec3(0.001)) {
break;
}
}
//include reflected luminance from planet ground
if ray_intersects_ground(r, mu_view) {
let transmittance_to_ground = exp(-optical_depth);
let local_up = get_local_up(r, t_max, ray_dir);
let mu_light = dot(light_dir, local_up);
let transmittance_to_light = sample_transmittance_lut(0.0, mu_light);
let ground_luminance = transmittance_to_light * transmittance_to_ground * max(mu_light, 0.0) * atmosphere.ground_albedo;
l_2 += ground_luminance;
}
return MultiscatteringSample(l_2, f_ms);
}

View File

@@ -0,0 +1,233 @@
use bevy_ecs::{query::QueryItem, system::lifetimeless::Read, world::World};
use bevy_math::{UVec2, Vec3Swizzles};
use crate::render::{
diagnostic::RecordDiagnostics,
extract_component::DynamicUniformIndex,
render_graph::{NodeRunError, RenderGraphContext, RenderLabel, ViewNode},
render_resource::{ComputePass, ComputePassDescriptor, PipelineCache, RenderPassDescriptor},
renderer::RenderContext,
view::{ViewTarget, ViewUniformOffset},
};
use crate::render::pbr::ViewLightsUniformOffset;
use super::{
resources::{
AtmosphereBindGroups, AtmosphereLutPipelines, AtmosphereTransformsOffset,
RenderSkyPipelineId,
},
Atmosphere, GpuAtmosphereSettings,
};
#[derive(PartialEq, Eq, Debug, Copy, Clone, Hash, RenderLabel)]
pub enum AtmosphereNode {
RenderLuts,
RenderSky,
Environment,
}
#[derive(Default)]
pub(super) struct AtmosphereLutsNode {}
impl ViewNode for AtmosphereLutsNode {
type ViewQuery = (
Read<GpuAtmosphereSettings>,
Read<AtmosphereBindGroups>,
Read<DynamicUniformIndex<Atmosphere>>,
Read<DynamicUniformIndex<GpuAtmosphereSettings>>,
Read<AtmosphereTransformsOffset>,
Read<ViewUniformOffset>,
Read<ViewLightsUniformOffset>,
);
fn run(
&self,
_graph: &mut RenderGraphContext,
render_context: &mut RenderContext,
(
settings,
bind_groups,
atmosphere_uniforms_offset,
settings_uniforms_offset,
atmosphere_transforms_offset,
view_uniforms_offset,
lights_uniforms_offset,
): QueryItem<Self::ViewQuery>,
world: &World,
) -> Result<(), NodeRunError> {
let pipelines = world.resource::<AtmosphereLutPipelines>();
let pipeline_cache = world.resource::<PipelineCache>();
let (
Some(transmittance_lut_pipeline),
Some(multiscattering_lut_pipeline),
Some(sky_view_lut_pipeline),
Some(aerial_view_lut_pipeline),
) = (
pipeline_cache.get_compute_pipeline(pipelines.transmittance_lut),
pipeline_cache.get_compute_pipeline(pipelines.multiscattering_lut),
pipeline_cache.get_compute_pipeline(pipelines.sky_view_lut),
pipeline_cache.get_compute_pipeline(pipelines.aerial_view_lut),
)
else {
return Ok(());
};
let diagnostics = render_context.diagnostic_recorder();
let command_encoder = render_context.command_encoder();
let mut luts_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor {
label: Some("atmosphere_luts"),
timestamp_writes: None,
});
let pass_span = diagnostics.pass_span(&mut luts_pass, "atmosphere_luts");
fn dispatch_2d(compute_pass: &mut ComputePass, size: UVec2) {
const WORKGROUP_SIZE: u32 = 16;
let workgroups_x = size.x.div_ceil(WORKGROUP_SIZE);
let workgroups_y = size.y.div_ceil(WORKGROUP_SIZE);
compute_pass.dispatch_workgroups(workgroups_x, workgroups_y, 1);
}
// Transmittance LUT
luts_pass.set_pipeline(transmittance_lut_pipeline);
luts_pass.set_bind_group(
0,
&bind_groups.transmittance_lut,
&[
atmosphere_uniforms_offset.index(),
settings_uniforms_offset.index(),
],
);
dispatch_2d(&mut luts_pass, settings.transmittance_lut_size);
// Multiscattering LUT
luts_pass.set_pipeline(multiscattering_lut_pipeline);
luts_pass.set_bind_group(
0,
&bind_groups.multiscattering_lut,
&[
atmosphere_uniforms_offset.index(),
settings_uniforms_offset.index(),
],
);
luts_pass.dispatch_workgroups(
settings.multiscattering_lut_size.x,
settings.multiscattering_lut_size.y,
1,
);
// Sky View LUT
luts_pass.set_pipeline(sky_view_lut_pipeline);
luts_pass.set_bind_group(
0,
&bind_groups.sky_view_lut,
&[
atmosphere_uniforms_offset.index(),
settings_uniforms_offset.index(),
atmosphere_transforms_offset.index(),
view_uniforms_offset.offset,
lights_uniforms_offset.offset,
],
);
dispatch_2d(&mut luts_pass, settings.sky_view_lut_size);
// Aerial View LUT
luts_pass.set_pipeline(aerial_view_lut_pipeline);
luts_pass.set_bind_group(
0,
&bind_groups.aerial_view_lut,
&[
atmosphere_uniforms_offset.index(),
settings_uniforms_offset.index(),
view_uniforms_offset.offset,
lights_uniforms_offset.offset,
],
);
dispatch_2d(&mut luts_pass, settings.aerial_view_lut_size.xy());
pass_span.end(&mut luts_pass);
Ok(())
}
}
#[derive(Default)]
pub(super) struct RenderSkyNode;
impl ViewNode for RenderSkyNode {
type ViewQuery = (
Read<AtmosphereBindGroups>,
Read<ViewTarget>,
Read<DynamicUniformIndex<Atmosphere>>,
Read<DynamicUniformIndex<GpuAtmosphereSettings>>,
Read<AtmosphereTransformsOffset>,
Read<ViewUniformOffset>,
Read<ViewLightsUniformOffset>,
Read<RenderSkyPipelineId>,
);
fn run<'w>(
&self,
_graph: &mut RenderGraphContext,
render_context: &mut RenderContext<'w>,
(
atmosphere_bind_groups,
view_target,
atmosphere_uniforms_offset,
settings_uniforms_offset,
atmosphere_transforms_offset,
view_uniforms_offset,
lights_uniforms_offset,
render_sky_pipeline_id,
): QueryItem<'w, '_, Self::ViewQuery>,
world: &'w World,
) -> Result<(), NodeRunError> {
let pipeline_cache = world.resource::<PipelineCache>();
let Some(render_sky_pipeline) =
pipeline_cache.get_render_pipeline(render_sky_pipeline_id.0)
else {
return Ok(());
}; //TODO: warning
let diagnostics = render_context.diagnostic_recorder();
let mut render_sky_pass =
render_context
.command_encoder()
.begin_render_pass(&RenderPassDescriptor {
label: Some("render_sky"),
color_attachments: &[Some(view_target.get_color_attachment())],
depth_stencil_attachment: None,
timestamp_writes: None,
occlusion_query_set: None,
});
let pass_span = diagnostics.pass_span(&mut render_sky_pass, "render_sky");
render_sky_pass.set_pipeline(render_sky_pipeline);
render_sky_pass.set_bind_group(
0,
&atmosphere_bind_groups.render_sky,
&[
atmosphere_uniforms_offset.index(),
settings_uniforms_offset.index(),
atmosphere_transforms_offset.index(),
view_uniforms_offset.offset,
lights_uniforms_offset.offset,
],
);
render_sky_pass.draw(0..3, 0..1);
pass_span.end(&mut render_sky_pass);
Ok(())
}
}

View File

@@ -0,0 +1,82 @@
enable dual_source_blending;
#import bevy_pbr::atmosphere::{
types::{Atmosphere, AtmosphereSettings},
bindings::{atmosphere, view, atmosphere_transforms, settings},
functions::{
sample_transmittance_lut, sample_transmittance_lut_segment,
sample_sky_view_lut, direction_world_to_atmosphere,
uv_to_ray_direction, uv_to_ndc, sample_aerial_view_lut,
sample_sun_radiance, ndc_to_camera_dist, raymarch_atmosphere,
get_view_position, max_atmosphere_distance
},
};
#import bevy_render::view::View;
#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput
#ifdef MULTISAMPLED
@group(0) @binding(13) var depth_texture: texture_depth_multisampled_2d;
#else
@group(0) @binding(13) var depth_texture: texture_depth_2d;
#endif
struct RenderSkyOutput {
#ifdef DUAL_SOURCE_BLENDING
@location(0) @blend_src(0) inscattering: vec4<f32>,
@location(0) @blend_src(1) transmittance: vec4<f32>,
#else
@location(0) inscattering: vec4<f32>,
#endif
}
@fragment
fn main(in: FullscreenVertexOutput) -> RenderSkyOutput {
let depth = textureLoad(depth_texture, vec2<i32>(in.position.xy), 0);
let ray_dir_ws = uv_to_ray_direction(in.uv);
let world_pos = get_view_position();
let r = length(world_pos);
let up = normalize(world_pos);
let mu = dot(ray_dir_ws, up);
let max_samples = settings.sky_max_samples;
let should_raymarch = settings.rendering_method == 1u;
var transmittance: vec3<f32>;
var inscattering: vec3<f32>;
let sun_radiance = sample_sun_radiance(ray_dir_ws);
if depth == 0.0 {
let ray_dir_as = direction_world_to_atmosphere(ray_dir_ws, up);
transmittance = sample_transmittance_lut(r, mu);
inscattering = sample_sky_view_lut(r, ray_dir_as);
if should_raymarch {
let t_max = max_atmosphere_distance(r, mu);
let result = raymarch_atmosphere(world_pos, ray_dir_ws, t_max, max_samples, in.uv, true);
inscattering = result.inscattering;
transmittance = result.transmittance;
}
inscattering += sun_radiance * transmittance;
} else {
let t = ndc_to_camera_dist(vec3(uv_to_ndc(in.uv), depth));
inscattering = sample_aerial_view_lut(in.uv, t);
transmittance = sample_transmittance_lut_segment(r, mu, t);
if should_raymarch {
let result = raymarch_atmosphere(world_pos, ray_dir_ws, t, max_samples, in.uv, false);
inscattering = result.inscattering;
transmittance = result.transmittance;
}
}
// exposure compensation
inscattering *= view.exposure;
#ifdef DUAL_SOURCE_BLENDING
return RenderSkyOutput(vec4(inscattering, 0.0), vec4(transmittance, 1.0));
#else
let mean_transmittance = (transmittance.r + transmittance.g + transmittance.b) / 3.0;
return RenderSkyOutput(vec4(inscattering, mean_transmittance));
#endif
}

View File

@@ -0,0 +1,700 @@
use crate::render::pbr::{GpuLights, LightMeta};
use bevy_asset::{load_embedded_asset, Handle};
use bevy_camera::{Camera, Camera3d};
use crate::render::FullscreenShader;
use bevy_ecs::{
component::Component,
entity::Entity,
query::With,
resource::Resource,
system::{Commands, Query, Res, ResMut},
world::{FromWorld, World},
};
use bevy_image::ToExtents;
use bevy_math::{Affine3A, Mat4, Vec3A};
use crate::render::{
extract_component::ComponentUniforms,
render_resource::{binding_types::*, *},
renderer::{RenderDevice, RenderQueue},
texture::{CachedTexture, TextureCache},
view::{ExtractedView, Msaa, ViewDepthTexture, ViewUniform, ViewUniforms},
};
use bevy_shader::Shader;
use bevy_utils::default;
use super::{Atmosphere, GpuAtmosphereSettings};
#[derive(Resource)]
pub(crate) struct AtmosphereBindGroupLayouts {
pub transmittance_lut: BindGroupLayout,
pub multiscattering_lut: BindGroupLayout,
pub sky_view_lut: BindGroupLayout,
pub aerial_view_lut: BindGroupLayout,
}
#[derive(Resource)]
pub(crate) struct RenderSkyBindGroupLayouts {
pub render_sky: BindGroupLayout,
pub render_sky_msaa: BindGroupLayout,
pub fullscreen_shader: FullscreenShader,
pub fragment_shader: Handle<Shader>,
}
impl FromWorld for AtmosphereBindGroupLayouts {
fn from_world(world: &mut World) -> Self {
let render_device = world.resource::<RenderDevice>();
let transmittance_lut = render_device.create_bind_group_layout(
"transmittance_lut_bind_group_layout",
&BindGroupLayoutEntries::with_indices(
ShaderStages::COMPUTE,
(
(0, uniform_buffer::<Atmosphere>(true)),
(1, uniform_buffer::<GpuAtmosphereSettings>(true)),
(
// transmittance lut storage texture
13,
texture_storage_2d(
TextureFormat::Rgba16Float,
StorageTextureAccess::WriteOnly,
),
),
),
),
);
let multiscattering_lut = render_device.create_bind_group_layout(
"multiscattering_lut_bind_group_layout",
&BindGroupLayoutEntries::with_indices(
ShaderStages::COMPUTE,
(
(0, uniform_buffer::<Atmosphere>(true)),
(1, uniform_buffer::<GpuAtmosphereSettings>(true)),
(5, texture_2d(TextureSampleType::Float { filterable: true })), //transmittance lut and sampler
(6, sampler(SamplerBindingType::Filtering)),
(
//multiscattering lut storage texture
13,
texture_storage_2d(
TextureFormat::Rgba16Float,
StorageTextureAccess::WriteOnly,
),
),
),
),
);
let sky_view_lut = render_device.create_bind_group_layout(
"sky_view_lut_bind_group_layout",
&BindGroupLayoutEntries::with_indices(
ShaderStages::COMPUTE,
(
(0, uniform_buffer::<Atmosphere>(true)),
(1, uniform_buffer::<GpuAtmosphereSettings>(true)),
(2, uniform_buffer::<AtmosphereTransform>(true)),
(3, uniform_buffer::<ViewUniform>(true)),
(4, uniform_buffer::<GpuLights>(true)),
(5, texture_2d(TextureSampleType::Float { filterable: true })), //transmittance lut and sampler
(6, sampler(SamplerBindingType::Filtering)),
(7, texture_2d(TextureSampleType::Float { filterable: true })), //multiscattering lut and sampler
(8, sampler(SamplerBindingType::Filtering)),
(
13,
texture_storage_2d(
TextureFormat::Rgba16Float,
StorageTextureAccess::WriteOnly,
),
),
),
),
);
let aerial_view_lut = render_device.create_bind_group_layout(
"aerial_view_lut_bind_group_layout",
&BindGroupLayoutEntries::with_indices(
ShaderStages::COMPUTE,
(
(0, uniform_buffer::<Atmosphere>(true)),
(1, uniform_buffer::<GpuAtmosphereSettings>(true)),
(3, uniform_buffer::<ViewUniform>(true)),
(4, uniform_buffer::<GpuLights>(true)),
(5, texture_2d(TextureSampleType::Float { filterable: true })), //transmittance lut and sampler
(6, sampler(SamplerBindingType::Filtering)),
(7, texture_2d(TextureSampleType::Float { filterable: true })), //multiscattering lut and sampler
(8, sampler(SamplerBindingType::Filtering)),
(
//Aerial view lut storage texture
13,
texture_storage_3d(
TextureFormat::Rgba16Float,
StorageTextureAccess::WriteOnly,
),
),
),
),
);
Self {
transmittance_lut,
multiscattering_lut,
sky_view_lut,
aerial_view_lut,
}
}
}
impl FromWorld for RenderSkyBindGroupLayouts {
fn from_world(world: &mut World) -> Self {
let render_device = world.resource::<RenderDevice>();
let render_sky = render_device.create_bind_group_layout(
"render_sky_bind_group_layout",
&BindGroupLayoutEntries::with_indices(
ShaderStages::FRAGMENT,
(
(0, uniform_buffer::<Atmosphere>(true)),
(1, uniform_buffer::<GpuAtmosphereSettings>(true)),
(2, uniform_buffer::<AtmosphereTransform>(true)),
(3, uniform_buffer::<ViewUniform>(true)),
(4, uniform_buffer::<GpuLights>(true)),
(5, texture_2d(TextureSampleType::Float { filterable: true })), //transmittance lut and sampler
(6, sampler(SamplerBindingType::Filtering)),
(7, texture_2d(TextureSampleType::Float { filterable: true })), //multiscattering lut and sampler
(8, sampler(SamplerBindingType::Filtering)),
(9, texture_2d(TextureSampleType::Float { filterable: true })), //sky view lut and sampler
(10, sampler(SamplerBindingType::Filtering)),
(
// aerial view lut and sampler
11,
texture_3d(TextureSampleType::Float { filterable: true }),
),
(12, sampler(SamplerBindingType::Filtering)),
(
//view depth texture
13,
texture_2d(TextureSampleType::Depth),
),
),
),
);
let render_sky_msaa = render_device.create_bind_group_layout(
"render_sky_msaa_bind_group_layout",
&BindGroupLayoutEntries::with_indices(
ShaderStages::FRAGMENT,
(
(0, uniform_buffer::<Atmosphere>(true)),
(1, uniform_buffer::<GpuAtmosphereSettings>(true)),
(2, uniform_buffer::<AtmosphereTransform>(true)),
(3, uniform_buffer::<ViewUniform>(true)),
(4, uniform_buffer::<GpuLights>(true)),
(5, texture_2d(TextureSampleType::Float { filterable: true })), //transmittance lut and sampler
(6, sampler(SamplerBindingType::Filtering)),
(7, texture_2d(TextureSampleType::Float { filterable: true })), //multiscattering lut and sampler
(8, sampler(SamplerBindingType::Filtering)),
(9, texture_2d(TextureSampleType::Float { filterable: true })), //sky view lut and sampler
(10, sampler(SamplerBindingType::Filtering)),
(
// aerial view lut and sampler
11,
texture_3d(TextureSampleType::Float { filterable: true }),
),
(12, sampler(SamplerBindingType::Filtering)),
(
//view depth texture
13,
texture_2d_multisampled(TextureSampleType::Depth),
),
),
),
);
Self {
render_sky,
render_sky_msaa,
fullscreen_shader: world.resource::<FullscreenShader>().clone(),
fragment_shader: load_embedded_asset!(world, "render_sky.wgsl"),
}
}
}
#[derive(Resource)]
pub struct AtmosphereSamplers {
pub transmittance_lut: Sampler,
pub multiscattering_lut: Sampler,
pub sky_view_lut: Sampler,
pub aerial_view_lut: Sampler,
}
impl FromWorld for AtmosphereSamplers {
fn from_world(world: &mut World) -> Self {
let render_device = world.resource::<RenderDevice>();
let base_sampler = SamplerDescriptor {
mag_filter: FilterMode::Linear,
min_filter: FilterMode::Linear,
mipmap_filter: FilterMode::Nearest,
..Default::default()
};
let transmittance_lut = render_device.create_sampler(&SamplerDescriptor {
label: Some("transmittance_lut_sampler"),
..base_sampler
});
let multiscattering_lut = render_device.create_sampler(&SamplerDescriptor {
label: Some("multiscattering_lut_sampler"),
..base_sampler
});
let sky_view_lut = render_device.create_sampler(&SamplerDescriptor {
label: Some("sky_view_lut_sampler"),
address_mode_u: AddressMode::Repeat,
..base_sampler
});
let aerial_view_lut = render_device.create_sampler(&SamplerDescriptor {
label: Some("aerial_view_lut_sampler"),
..base_sampler
});
Self {
transmittance_lut,
multiscattering_lut,
sky_view_lut,
aerial_view_lut,
}
}
}
#[derive(Resource)]
pub(crate) struct AtmosphereLutPipelines {
pub transmittance_lut: CachedComputePipelineId,
pub multiscattering_lut: CachedComputePipelineId,
pub sky_view_lut: CachedComputePipelineId,
pub aerial_view_lut: CachedComputePipelineId,
}
impl FromWorld for AtmosphereLutPipelines {
fn from_world(world: &mut World) -> Self {
let pipeline_cache = world.resource::<PipelineCache>();
let layouts = world.resource::<AtmosphereBindGroupLayouts>();
let transmittance_lut = pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor {
label: Some("transmittance_lut_pipeline".into()),
layout: vec![layouts.transmittance_lut.clone()],
shader: load_embedded_asset!(world, "transmittance_lut.wgsl"),
..default()
});
let multiscattering_lut =
pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor {
label: Some("multi_scattering_lut_pipeline".into()),
layout: vec![layouts.multiscattering_lut.clone()],
shader: load_embedded_asset!(world, "multiscattering_lut.wgsl"),
..default()
});
let sky_view_lut = pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor {
label: Some("sky_view_lut_pipeline".into()),
layout: vec![layouts.sky_view_lut.clone()],
shader: load_embedded_asset!(world, "sky_view_lut.wgsl"),
..default()
});
let aerial_view_lut = pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor {
label: Some("aerial_view_lut_pipeline".into()),
layout: vec![layouts.aerial_view_lut.clone()],
shader: load_embedded_asset!(world, "aerial_view_lut.wgsl"),
..default()
});
Self {
transmittance_lut,
multiscattering_lut,
sky_view_lut,
aerial_view_lut,
}
}
}
#[derive(Component)]
pub(crate) struct RenderSkyPipelineId(pub CachedRenderPipelineId);
#[derive(Copy, Clone, Hash, PartialEq, Eq)]
pub(crate) struct RenderSkyPipelineKey {
pub msaa_samples: u32,
pub dual_source_blending: bool,
}
impl SpecializedRenderPipeline for RenderSkyBindGroupLayouts {
type Key = RenderSkyPipelineKey;
fn specialize(&self, key: Self::Key) -> RenderPipelineDescriptor {
let mut shader_defs = Vec::new();
if key.msaa_samples > 1 {
shader_defs.push("MULTISAMPLED".into());
}
if key.dual_source_blending {
shader_defs.push("DUAL_SOURCE_BLENDING".into());
}
let dst_factor = if key.dual_source_blending {
BlendFactor::Src1
} else {
BlendFactor::SrcAlpha
};
RenderPipelineDescriptor {
label: Some(format!("render_sky_pipeline_{}", key.msaa_samples).into()),
layout: vec![if key.msaa_samples == 1 {
self.render_sky.clone()
} else {
self.render_sky_msaa.clone()
}],
vertex: self.fullscreen_shader.to_vertex_state(),
fragment: Some(FragmentState {
shader: self.fragment_shader.clone(),
shader_defs,
targets: vec![Some(ColorTargetState {
format: TextureFormat::Rgba16Float,
blend: Some(BlendState {
color: BlendComponent {
src_factor: BlendFactor::One,
dst_factor,
operation: BlendOperation::Add,
},
alpha: BlendComponent {
src_factor: BlendFactor::Zero,
dst_factor: BlendFactor::One,
operation: BlendOperation::Add,
},
}),
write_mask: ColorWrites::ALL,
})],
..default()
}),
multisample: MultisampleState {
count: key.msaa_samples,
..default()
},
..default()
}
}
}
pub(super) fn queue_render_sky_pipelines(
views: Query<(Entity, &Msaa), (With<Camera>, With<Atmosphere>)>,
pipeline_cache: Res<PipelineCache>,
layouts: Res<RenderSkyBindGroupLayouts>,
mut specializer: ResMut<SpecializedRenderPipelines<RenderSkyBindGroupLayouts>>,
render_device: Res<RenderDevice>,
mut commands: Commands,
) {
for (entity, msaa) in &views {
let id = specializer.specialize(
&pipeline_cache,
&layouts,
RenderSkyPipelineKey {
msaa_samples: msaa.samples(),
dual_source_blending: render_device
.features()
.contains(WgpuFeatures::DUAL_SOURCE_BLENDING),
},
);
commands.entity(entity).insert(RenderSkyPipelineId(id));
}
}
#[derive(Component)]
pub struct AtmosphereTextures {
pub transmittance_lut: CachedTexture,
pub multiscattering_lut: CachedTexture,
pub sky_view_lut: CachedTexture,
pub aerial_view_lut: CachedTexture,
}
pub(super) fn prepare_atmosphere_textures(
views: Query<(Entity, &GpuAtmosphereSettings), With<Atmosphere>>,
render_device: Res<RenderDevice>,
mut texture_cache: ResMut<TextureCache>,
mut commands: Commands,
) {
for (entity, lut_settings) in &views {
let transmittance_lut = texture_cache.get(
&render_device,
TextureDescriptor {
label: Some("transmittance_lut"),
size: lut_settings.transmittance_lut_size.to_extents(),
mip_level_count: 1,
sample_count: 1,
dimension: TextureDimension::D2,
format: TextureFormat::Rgba16Float,
usage: TextureUsages::STORAGE_BINDING | TextureUsages::TEXTURE_BINDING,
view_formats: &[],
},
);
let multiscattering_lut = texture_cache.get(
&render_device,
TextureDescriptor {
label: Some("multiscattering_lut"),
size: lut_settings.multiscattering_lut_size.to_extents(),
mip_level_count: 1,
sample_count: 1,
dimension: TextureDimension::D2,
format: TextureFormat::Rgba16Float,
usage: TextureUsages::STORAGE_BINDING | TextureUsages::TEXTURE_BINDING,
view_formats: &[],
},
);
let sky_view_lut = texture_cache.get(
&render_device,
TextureDescriptor {
label: Some("sky_view_lut"),
size: lut_settings.sky_view_lut_size.to_extents(),
mip_level_count: 1,
sample_count: 1,
dimension: TextureDimension::D2,
format: TextureFormat::Rgba16Float,
usage: TextureUsages::STORAGE_BINDING | TextureUsages::TEXTURE_BINDING,
view_formats: &[],
},
);
let aerial_view_lut = texture_cache.get(
&render_device,
TextureDescriptor {
label: Some("aerial_view_lut"),
size: lut_settings.aerial_view_lut_size.to_extents(),
mip_level_count: 1,
sample_count: 1,
dimension: TextureDimension::D3,
format: TextureFormat::Rgba16Float,
usage: TextureUsages::STORAGE_BINDING | TextureUsages::TEXTURE_BINDING,
view_formats: &[],
},
);
commands.entity(entity).insert({
AtmosphereTextures {
transmittance_lut,
multiscattering_lut,
sky_view_lut,
aerial_view_lut,
}
});
}
}
#[derive(Resource, Default)]
pub struct AtmosphereTransforms {
uniforms: DynamicUniformBuffer<AtmosphereTransform>,
}
impl AtmosphereTransforms {
#[inline]
pub fn uniforms(&self) -> &DynamicUniformBuffer<AtmosphereTransform> {
&self.uniforms
}
}
#[derive(ShaderType)]
pub struct AtmosphereTransform {
world_from_atmosphere: Mat4,
}
#[derive(Component)]
pub struct AtmosphereTransformsOffset {
index: u32,
}
impl AtmosphereTransformsOffset {
#[inline]
pub fn index(&self) -> u32 {
self.index
}
}
pub(super) fn prepare_atmosphere_transforms(
views: Query<(Entity, &ExtractedView), (With<Atmosphere>, With<Camera3d>)>,
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
mut atmo_uniforms: ResMut<AtmosphereTransforms>,
mut commands: Commands,
) {
let atmo_count = views.iter().len();
let Some(mut writer) =
atmo_uniforms
.uniforms
.get_writer(atmo_count, &render_device, &render_queue)
else {
return;
};
for (entity, view) in &views {
let world_from_view = view.world_from_view.affine();
let camera_z = world_from_view.matrix3.z_axis;
let camera_y = world_from_view.matrix3.y_axis;
let atmo_z = camera_z
.with_y(0.0)
.try_normalize()
.unwrap_or_else(|| camera_y.with_y(0.0).normalize());
let atmo_y = Vec3A::Y;
let atmo_x = atmo_y.cross(atmo_z).normalize();
let world_from_atmosphere =
Affine3A::from_cols(atmo_x, atmo_y, atmo_z, world_from_view.translation);
let world_from_atmosphere = Mat4::from(world_from_atmosphere);
commands.entity(entity).insert(AtmosphereTransformsOffset {
index: writer.write(&AtmosphereTransform {
world_from_atmosphere,
}),
});
}
}
#[derive(Component)]
pub(crate) struct AtmosphereBindGroups {
pub transmittance_lut: BindGroup,
pub multiscattering_lut: BindGroup,
pub sky_view_lut: BindGroup,
pub aerial_view_lut: BindGroup,
pub render_sky: BindGroup,
}
pub(super) fn prepare_atmosphere_bind_groups(
views: Query<
(Entity, &AtmosphereTextures, &ViewDepthTexture, &Msaa),
(With<Camera3d>, With<Atmosphere>),
>,
render_device: Res<RenderDevice>,
layouts: Res<AtmosphereBindGroupLayouts>,
render_sky_layouts: Res<RenderSkyBindGroupLayouts>,
samplers: Res<AtmosphereSamplers>,
view_uniforms: Res<ViewUniforms>,
lights_uniforms: Res<LightMeta>,
atmosphere_transforms: Res<AtmosphereTransforms>,
atmosphere_uniforms: Res<ComponentUniforms<Atmosphere>>,
settings_uniforms: Res<ComponentUniforms<GpuAtmosphereSettings>>,
mut commands: Commands,
) {
if views.iter().len() == 0 {
return;
}
let atmosphere_binding = atmosphere_uniforms
.binding()
.expect("Failed to prepare atmosphere bind groups. Atmosphere uniform buffer missing");
let transforms_binding = atmosphere_transforms
.uniforms()
.binding()
.expect("Failed to prepare atmosphere bind groups. Atmosphere transforms buffer missing");
let settings_binding = settings_uniforms.binding().expect(
"Failed to prepare atmosphere bind groups. AtmosphereSettings uniform buffer missing",
);
let view_binding = view_uniforms
.uniforms
.binding()
.expect("Failed to prepare atmosphere bind groups. View uniform buffer missing");
let lights_binding = lights_uniforms
.view_gpu_lights
.binding()
.expect("Failed to prepare atmosphere bind groups. Lights uniform buffer missing");
for (entity, textures, view_depth_texture, msaa) in &views {
let transmittance_lut = render_device.create_bind_group(
"transmittance_lut_bind_group",
&layouts.transmittance_lut,
&BindGroupEntries::with_indices((
(0, atmosphere_binding.clone()),
(1, settings_binding.clone()),
(13, &textures.transmittance_lut.default_view),
)),
);
let multiscattering_lut = render_device.create_bind_group(
"multiscattering_lut_bind_group",
&layouts.multiscattering_lut,
&BindGroupEntries::with_indices((
(0, atmosphere_binding.clone()),
(1, settings_binding.clone()),
(5, &textures.transmittance_lut.default_view),
(6, &samplers.transmittance_lut),
(13, &textures.multiscattering_lut.default_view),
)),
);
let sky_view_lut = render_device.create_bind_group(
"sky_view_lut_bind_group",
&layouts.sky_view_lut,
&BindGroupEntries::with_indices((
(0, atmosphere_binding.clone()),
(1, settings_binding.clone()),
(2, transforms_binding.clone()),
(3, view_binding.clone()),
(4, lights_binding.clone()),
(5, &textures.transmittance_lut.default_view),
(6, &samplers.transmittance_lut),
(7, &textures.multiscattering_lut.default_view),
(8, &samplers.multiscattering_lut),
(13, &textures.sky_view_lut.default_view),
)),
);
let aerial_view_lut = render_device.create_bind_group(
"sky_view_lut_bind_group",
&layouts.aerial_view_lut,
&BindGroupEntries::with_indices((
(0, atmosphere_binding.clone()),
(1, settings_binding.clone()),
(3, view_binding.clone()),
(4, lights_binding.clone()),
(5, &textures.transmittance_lut.default_view),
(6, &samplers.transmittance_lut),
(7, &textures.multiscattering_lut.default_view),
(8, &samplers.multiscattering_lut),
(13, &textures.aerial_view_lut.default_view),
)),
);
let render_sky = render_device.create_bind_group(
"render_sky_bind_group",
if *msaa == Msaa::Off {
&render_sky_layouts.render_sky
} else {
&render_sky_layouts.render_sky_msaa
},
&BindGroupEntries::with_indices((
(0, atmosphere_binding.clone()),
(1, settings_binding.clone()),
(2, transforms_binding.clone()),
(3, view_binding.clone()),
(4, lights_binding.clone()),
(5, &textures.transmittance_lut.default_view),
(6, &samplers.transmittance_lut),
(7, &textures.multiscattering_lut.default_view),
(8, &samplers.multiscattering_lut),
(9, &textures.sky_view_lut.default_view),
(10, &samplers.sky_view_lut),
(11, &textures.aerial_view_lut.default_view),
(12, &samplers.aerial_view_lut),
(13, view_depth_texture.view()),
)),
);
commands.entity(entity).insert(AtmosphereBindGroups {
transmittance_lut,
multiscattering_lut,
sky_view_lut,
aerial_view_lut,
render_sky,
});
}
}

View File

@@ -0,0 +1,44 @@
#import bevy_pbr::{
mesh_view_types::Lights,
atmosphere::{
types::{Atmosphere, AtmosphereSettings},
bindings::{atmosphere, view, settings},
functions::{
sample_atmosphere, AtmosphereSample,
sample_local_inscattering, get_view_position,
max_atmosphere_distance, direction_atmosphere_to_world,
sky_view_lut_uv_to_zenith_azimuth, zenith_azimuth_to_ray_dir,
MIDPOINT_RATIO, raymarch_atmosphere, EPSILON
},
}
}
#import bevy_render::{
view::View,
maths::HALF_PI,
}
#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput
@group(0) @binding(13) var sky_view_lut_out: texture_storage_2d<rgba16float, write>;
@compute
@workgroup_size(16, 16, 1)
fn main(@builtin(global_invocation_id) idx: vec3<u32>) {
let uv = vec2<f32>(idx.xy) / vec2<f32>(settings.sky_view_lut_size);
let cam_pos = get_view_position();
let r = length(cam_pos);
var zenith_azimuth = sky_view_lut_uv_to_zenith_azimuth(r, uv);
let ray_dir_as = zenith_azimuth_to_ray_dir(zenith_azimuth.x, zenith_azimuth.y);
let ray_dir_ws = direction_atmosphere_to_world(ray_dir_as);
let world_pos = vec3(0.0, r, 0.0);
let up = normalize(world_pos);
let mu = dot(ray_dir_ws, up);
let t_max = max_atmosphere_distance(r, mu);
let result = raymarch_atmosphere(world_pos, ray_dir_ws, t_max, settings.sky_view_lut_samples, uv, true);
textureStore(sky_view_lut_out, idx.xy, vec4(result.inscattering, 1.0));
}

View File

@@ -0,0 +1,48 @@
#import bevy_pbr::atmosphere::{
types::{Atmosphere, AtmosphereSettings},
bindings::{settings, atmosphere},
functions::{AtmosphereSample, sample_atmosphere, get_local_r, max_atmosphere_distance, MIDPOINT_RATIO},
bruneton_functions::{transmittance_lut_uv_to_r_mu, distance_to_bottom_atmosphere_boundary, distance_to_top_atmosphere_boundary},
}
#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput
@group(0) @binding(13) var transmittance_lut_out: texture_storage_2d<rgba16float, write>;
@compute
@workgroup_size(16, 16, 1)
fn main(@builtin(global_invocation_id) idx: vec3<u32>) {
let uv: vec2<f32> = (vec2<f32>(idx.xy) + 0.5) / vec2<f32>(settings.transmittance_lut_size);
// map UV coordinates to view height (r) and zenith cos angle (mu)
let r_mu = transmittance_lut_uv_to_r_mu(uv);
// compute the optical depth from view height r to the top atmosphere boundary
let optical_depth = ray_optical_depth(r_mu.x, r_mu.y, settings.transmittance_lut_samples);
let transmittance = exp(-optical_depth);
textureStore(transmittance_lut_out, idx.xy, vec4(transmittance, 1.0));
}
/// Compute the optical depth of the atmosphere from the ground to the top atmosphere boundary
/// at a given view height (r) and zenith cos angle (mu)
fn ray_optical_depth(r: f32, mu: f32, sample_count: u32) -> vec3<f32> {
let t_max = max_atmosphere_distance(r, mu);
var optical_depth = vec3<f32>(0.0f);
var prev_t = 0.0f;
for (var i = 0u; i < sample_count; i++) {
let t_i = t_max * (f32(i) + MIDPOINT_RATIO) / f32(sample_count);
let dt = t_i - prev_t;
prev_t = t_i;
let r_i = get_local_r(r, mu, t_i);
let atmosphere_sample = sample_atmosphere(r_i);
let sample_optical_depth = atmosphere_sample.extinction * dt;
optical_depth += sample_optical_depth;
}
return optical_depth;
}

View File

@@ -0,0 +1,46 @@
#define_import_path bevy_pbr::atmosphere::types
struct Atmosphere {
// Radius of the planet
bottom_radius: f32, // units: m
// Radius at which we consider the atmosphere to 'end' for out calculations (from center of planet)
top_radius: f32, // units: m
ground_albedo: vec3<f32>,
rayleigh_density_exp_scale: f32,
rayleigh_scattering: vec3<f32>,
mie_density_exp_scale: f32,
mie_scattering: f32, // units: m^-1
mie_absorption: f32, // units: m^-1
mie_asymmetry: f32, // the "asymmetry" value of the phase function, unitless. Domain: (-1, 1)
ozone_layer_altitude: f32, // units: m
ozone_layer_width: f32, // units: m
ozone_absorption: vec3<f32>, // ozone absorption. units: m^-1
}
struct AtmosphereSettings {
transmittance_lut_size: vec2<u32>,
multiscattering_lut_size: vec2<u32>,
sky_view_lut_size: vec2<u32>,
aerial_view_lut_size: vec3<u32>,
transmittance_lut_samples: u32,
multiscattering_lut_dirs: u32,
multiscattering_lut_samples: u32,
sky_view_lut_samples: u32,
aerial_view_lut_samples: u32,
aerial_view_lut_max_distance: f32,
scene_units_to_m: f32,
sky_max_samples: u32,
rendering_method: u32,
}
// "Atmosphere space" is just the view position with y=0 and oriented horizontally,
// so the horizon stays a horizontal line in our luts
struct AtmosphereTransforms {
world_from_atmosphere: mat4x4<f32>,
}

Binary file not shown.

View File

@@ -0,0 +1,580 @@
use core::num::NonZero;
use bevy_camera::Camera;
use bevy_ecs::{entity::EntityHashMap, prelude::*};
use bevy_light::cluster::{ClusterableObjectCounts, Clusters, GlobalClusterSettings};
use bevy_math::{uvec4, UVec3, UVec4, Vec4};
use crate::render::{
render_resource::{
BindingResource, BufferBindingType, ShaderSize, ShaderType, StorageBuffer, UniformBuffer,
},
renderer::{RenderAdapter, RenderDevice, RenderQueue},
sync_world::RenderEntity,
Extract,
};
use tracing::warn;
use crate::render::pbr::MeshPipeline;
// NOTE: this must be kept in sync with the same constants in
// `mesh_view_types.wgsl`.
pub const MAX_UNIFORM_BUFFER_CLUSTERABLE_OBJECTS: usize = 204;
// Make sure that the clusterable object buffer doesn't overflow the maximum
// size of a UBO on WebGL 2.
const _: () =
assert!(size_of::<GpuClusterableObject>() * MAX_UNIFORM_BUFFER_CLUSTERABLE_OBJECTS <= 16384);
// NOTE: Clustered-forward rendering requires 3 storage buffer bindings so check that
// at least that many are supported using this constant and SupportedBindingType::from_device()
pub const CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT: u32 = 3;
// this must match CLUSTER_COUNT_SIZE in pbr.wgsl
// and must be large enough to contain MAX_UNIFORM_BUFFER_CLUSTERABLE_OBJECTS
const CLUSTER_COUNT_SIZE: u32 = 9;
const CLUSTER_OFFSET_MASK: u32 = (1 << (32 - (CLUSTER_COUNT_SIZE * 2))) - 1;
const CLUSTER_COUNT_MASK: u32 = (1 << CLUSTER_COUNT_SIZE) - 1;
pub(crate) fn make_global_cluster_settings(world: &World) -> GlobalClusterSettings {
let device = world.resource::<RenderDevice>();
let adapter = world.resource::<RenderAdapter>();
let clustered_decals_are_usable =
crate::render::pbr::decal::clustered::clustered_decals_are_usable(device, adapter);
let supports_storage_buffers = matches!(
device.get_supported_read_only_binding_type(CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT),
BufferBindingType::Storage { .. }
);
GlobalClusterSettings {
supports_storage_buffers,
clustered_decals_are_usable,
max_uniform_buffer_clusterable_objects: MAX_UNIFORM_BUFFER_CLUSTERABLE_OBJECTS,
view_cluster_bindings_max_indices: ViewClusterBindings::MAX_INDICES,
}
}
#[derive(Copy, Clone, ShaderType, Default, Debug)]
pub struct GpuClusterableObject {
// For point lights: the lower-right 2x2 values of the projection matrix [2][2] [2][3] [3][2] [3][3]
// For spot lights: 2 components of the direction (x,z), spot_scale and spot_offset
pub(crate) light_custom_data: Vec4,
pub(crate) color_inverse_square_range: Vec4,
pub(crate) position_radius: Vec4,
pub(crate) flags: u32,
pub(crate) shadow_depth_bias: f32,
pub(crate) shadow_normal_bias: f32,
pub(crate) spot_light_tan_angle: f32,
pub(crate) soft_shadow_size: f32,
pub(crate) shadow_map_near_z: f32,
pub(crate) decal_index: u32,
pub(crate) pad: f32,
}
#[derive(Resource)]
pub struct GlobalClusterableObjectMeta {
pub gpu_clusterable_objects: GpuClusterableObjects,
pub entity_to_index: EntityHashMap<usize>,
}
pub enum GpuClusterableObjects {
Uniform(UniformBuffer<GpuClusterableObjectsUniform>),
Storage(StorageBuffer<GpuClusterableObjectsStorage>),
}
#[derive(ShaderType)]
pub struct GpuClusterableObjectsUniform {
data: Box<[GpuClusterableObject; MAX_UNIFORM_BUFFER_CLUSTERABLE_OBJECTS]>,
}
#[derive(ShaderType, Default)]
pub struct GpuClusterableObjectsStorage {
#[size(runtime)]
data: Vec<GpuClusterableObject>,
}
#[derive(Component)]
pub struct ExtractedClusterConfig {
/// Special near value for cluster calculations
pub(crate) near: f32,
pub(crate) far: f32,
/// Number of clusters in `X` / `Y` / `Z` in the view frustum
pub(crate) dimensions: UVec3,
}
enum ExtractedClusterableObjectElement {
ClusterHeader(ClusterableObjectCounts),
ClusterableObjectEntity(Entity),
}
#[derive(Component)]
pub struct ExtractedClusterableObjects {
data: Vec<ExtractedClusterableObjectElement>,
}
#[derive(ShaderType)]
struct GpuClusterOffsetsAndCountsUniform {
data: Box<[UVec4; ViewClusterBindings::MAX_UNIFORM_ITEMS]>,
}
#[derive(ShaderType, Default)]
struct GpuClusterableObjectIndexListsStorage {
#[size(runtime)]
data: Vec<u32>,
}
#[derive(ShaderType, Default)]
struct GpuClusterOffsetsAndCountsStorage {
/// The starting offset, followed by the number of point lights, spot
/// lights, reflection probes, and irradiance volumes in each cluster, in
/// that order. The remaining fields are filled with zeroes.
#[size(runtime)]
data: Vec<[UVec4; 2]>,
}
enum ViewClusterBuffers {
Uniform {
// NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment
clusterable_object_index_lists: UniformBuffer<GpuClusterableObjectIndexListsUniform>,
// NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment
cluster_offsets_and_counts: UniformBuffer<GpuClusterOffsetsAndCountsUniform>,
},
Storage {
clusterable_object_index_lists: StorageBuffer<GpuClusterableObjectIndexListsStorage>,
cluster_offsets_and_counts: StorageBuffer<GpuClusterOffsetsAndCountsStorage>,
},
}
#[derive(Component)]
pub struct ViewClusterBindings {
n_indices: usize,
n_offsets: usize,
buffers: ViewClusterBuffers,
}
pub fn init_global_clusterable_object_meta(
mut commands: Commands,
render_device: Res<RenderDevice>,
) {
commands.insert_resource(GlobalClusterableObjectMeta::new(
render_device.get_supported_read_only_binding_type(CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT),
));
}
impl GlobalClusterableObjectMeta {
pub fn new(buffer_binding_type: BufferBindingType) -> Self {
Self {
gpu_clusterable_objects: GpuClusterableObjects::new(buffer_binding_type),
entity_to_index: EntityHashMap::default(),
}
}
}
impl GpuClusterableObjects {
fn new(buffer_binding_type: BufferBindingType) -> Self {
match buffer_binding_type {
BufferBindingType::Storage { .. } => Self::storage(),
BufferBindingType::Uniform => Self::uniform(),
}
}
fn uniform() -> Self {
Self::Uniform(UniformBuffer::default())
}
fn storage() -> Self {
Self::Storage(StorageBuffer::default())
}
pub(crate) fn set(&mut self, mut clusterable_objects: Vec<GpuClusterableObject>) {
match self {
GpuClusterableObjects::Uniform(buffer) => {
let len = clusterable_objects
.len()
.min(MAX_UNIFORM_BUFFER_CLUSTERABLE_OBJECTS);
let src = &clusterable_objects[..len];
let dst = &mut buffer.get_mut().data[..len];
dst.copy_from_slice(src);
}
GpuClusterableObjects::Storage(buffer) => {
buffer.get_mut().data.clear();
buffer.get_mut().data.append(&mut clusterable_objects);
}
}
}
pub(crate) fn write_buffer(
&mut self,
render_device: &RenderDevice,
render_queue: &RenderQueue,
) {
match self {
GpuClusterableObjects::Uniform(buffer) => {
buffer.write_buffer(render_device, render_queue);
}
GpuClusterableObjects::Storage(buffer) => {
buffer.write_buffer(render_device, render_queue);
}
}
}
pub fn binding(&self) -> Option<BindingResource<'_>> {
match self {
GpuClusterableObjects::Uniform(buffer) => buffer.binding(),
GpuClusterableObjects::Storage(buffer) => buffer.binding(),
}
}
pub fn min_size(buffer_binding_type: BufferBindingType) -> NonZero<u64> {
match buffer_binding_type {
BufferBindingType::Storage { .. } => GpuClusterableObjectsStorage::min_size(),
BufferBindingType::Uniform => GpuClusterableObjectsUniform::min_size(),
}
}
}
impl Default for GpuClusterableObjectsUniform {
fn default() -> Self {
Self {
data: Box::new(
[GpuClusterableObject::default(); MAX_UNIFORM_BUFFER_CLUSTERABLE_OBJECTS],
),
}
}
}
/// Extracts clusters from the main world from the render world.
pub fn extract_clusters(
mut commands: Commands,
views: Extract<Query<(RenderEntity, &Clusters, &Camera)>>,
mapper: Extract<Query<RenderEntity>>,
) {
for (entity, clusters, camera) in &views {
let mut entity_commands = commands
.get_entity(entity)
.expect("Clusters entity wasn't synced.");
if !camera.is_active {
entity_commands.remove::<(ExtractedClusterableObjects, ExtractedClusterConfig)>();
continue;
}
let entity_count: usize = clusters
.clusterable_objects
.iter()
.map(|l| l.entities.len())
.sum();
let mut data = Vec::with_capacity(clusters.clusterable_objects.len() + entity_count);
for cluster_objects in &clusters.clusterable_objects {
data.push(ExtractedClusterableObjectElement::ClusterHeader(
cluster_objects.counts,
));
for clusterable_entity in &cluster_objects.entities {
if let Ok(entity) = mapper.get(*clusterable_entity) {
data.push(ExtractedClusterableObjectElement::ClusterableObjectEntity(
entity,
));
}
}
}
entity_commands.insert((
ExtractedClusterableObjects { data },
ExtractedClusterConfig {
near: clusters.near,
far: clusters.far,
dimensions: clusters.dimensions,
},
));
}
}
pub fn prepare_clusters(
mut commands: Commands,
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
mesh_pipeline: Res<MeshPipeline>,
global_clusterable_object_meta: Res<GlobalClusterableObjectMeta>,
views: Query<(Entity, &ExtractedClusterableObjects)>,
) {
let render_device = render_device.into_inner();
let supports_storage_buffers = matches!(
mesh_pipeline.clustered_forward_buffer_binding_type,
BufferBindingType::Storage { .. }
);
for (entity, extracted_clusters) in &views {
let mut view_clusters_bindings =
ViewClusterBindings::new(mesh_pipeline.clustered_forward_buffer_binding_type);
view_clusters_bindings.clear();
for record in &extracted_clusters.data {
match record {
ExtractedClusterableObjectElement::ClusterHeader(counts) => {
let offset = view_clusters_bindings.n_indices();
view_clusters_bindings.push_offset_and_counts(offset, counts);
}
ExtractedClusterableObjectElement::ClusterableObjectEntity(entity) => {
if let Some(clusterable_object_index) =
global_clusterable_object_meta.entity_to_index.get(entity)
{
if view_clusters_bindings.n_indices() >= ViewClusterBindings::MAX_INDICES
&& !supports_storage_buffers
{
warn!(
"Clusterable object index lists are full! The clusterable \
objects in the view are present in too many clusters."
);
break;
}
view_clusters_bindings.push_index(*clusterable_object_index);
}
}
}
}
view_clusters_bindings.write_buffers(render_device, &render_queue);
commands.entity(entity).insert(view_clusters_bindings);
}
}
impl ViewClusterBindings {
pub const MAX_OFFSETS: usize = 16384 / 4;
const MAX_UNIFORM_ITEMS: usize = Self::MAX_OFFSETS / 4;
pub const MAX_INDICES: usize = 16384;
pub fn new(buffer_binding_type: BufferBindingType) -> Self {
Self {
n_indices: 0,
n_offsets: 0,
buffers: ViewClusterBuffers::new(buffer_binding_type),
}
}
pub fn clear(&mut self) {
match &mut self.buffers {
ViewClusterBuffers::Uniform {
clusterable_object_index_lists,
cluster_offsets_and_counts,
} => {
*clusterable_object_index_lists.get_mut().data =
[UVec4::ZERO; Self::MAX_UNIFORM_ITEMS];
*cluster_offsets_and_counts.get_mut().data = [UVec4::ZERO; Self::MAX_UNIFORM_ITEMS];
}
ViewClusterBuffers::Storage {
clusterable_object_index_lists,
cluster_offsets_and_counts,
..
} => {
clusterable_object_index_lists.get_mut().data.clear();
cluster_offsets_and_counts.get_mut().data.clear();
}
}
}
fn push_offset_and_counts(&mut self, offset: usize, counts: &ClusterableObjectCounts) {
match &mut self.buffers {
ViewClusterBuffers::Uniform {
cluster_offsets_and_counts,
..
} => {
let array_index = self.n_offsets >> 2; // >> 2 is equivalent to / 4
if array_index >= Self::MAX_UNIFORM_ITEMS {
warn!("cluster offset and count out of bounds!");
return;
}
let component = self.n_offsets & ((1 << 2) - 1);
let packed =
pack_offset_and_counts(offset, counts.point_lights, counts.spot_lights);
cluster_offsets_and_counts.get_mut().data[array_index][component] = packed;
}
ViewClusterBuffers::Storage {
cluster_offsets_and_counts,
..
} => {
cluster_offsets_and_counts.get_mut().data.push([
uvec4(
offset as u32,
counts.point_lights,
counts.spot_lights,
counts.reflection_probes,
),
uvec4(counts.irradiance_volumes, counts.decals, 0, 0),
]);
}
}
self.n_offsets += 1;
}
pub fn n_indices(&self) -> usize {
self.n_indices
}
pub fn push_index(&mut self, index: usize) {
match &mut self.buffers {
ViewClusterBuffers::Uniform {
clusterable_object_index_lists,
..
} => {
let array_index = self.n_indices >> 4; // >> 4 is equivalent to / 16
let component = (self.n_indices >> 2) & ((1 << 2) - 1);
let sub_index = self.n_indices & ((1 << 2) - 1);
let index = index as u32;
clusterable_object_index_lists.get_mut().data[array_index][component] |=
index << (8 * sub_index);
}
ViewClusterBuffers::Storage {
clusterable_object_index_lists,
..
} => {
clusterable_object_index_lists
.get_mut()
.data
.push(index as u32);
}
}
self.n_indices += 1;
}
pub fn write_buffers(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) {
match &mut self.buffers {
ViewClusterBuffers::Uniform {
clusterable_object_index_lists,
cluster_offsets_and_counts,
} => {
clusterable_object_index_lists.write_buffer(render_device, render_queue);
cluster_offsets_and_counts.write_buffer(render_device, render_queue);
}
ViewClusterBuffers::Storage {
clusterable_object_index_lists,
cluster_offsets_and_counts,
} => {
clusterable_object_index_lists.write_buffer(render_device, render_queue);
cluster_offsets_and_counts.write_buffer(render_device, render_queue);
}
}
}
pub fn clusterable_object_index_lists_binding(&self) -> Option<BindingResource<'_>> {
match &self.buffers {
ViewClusterBuffers::Uniform {
clusterable_object_index_lists,
..
} => clusterable_object_index_lists.binding(),
ViewClusterBuffers::Storage {
clusterable_object_index_lists,
..
} => clusterable_object_index_lists.binding(),
}
}
pub fn offsets_and_counts_binding(&self) -> Option<BindingResource<'_>> {
match &self.buffers {
ViewClusterBuffers::Uniform {
cluster_offsets_and_counts,
..
} => cluster_offsets_and_counts.binding(),
ViewClusterBuffers::Storage {
cluster_offsets_and_counts,
..
} => cluster_offsets_and_counts.binding(),
}
}
pub fn min_size_clusterable_object_index_lists(
buffer_binding_type: BufferBindingType,
) -> NonZero<u64> {
match buffer_binding_type {
BufferBindingType::Storage { .. } => GpuClusterableObjectIndexListsStorage::min_size(),
BufferBindingType::Uniform => GpuClusterableObjectIndexListsUniform::min_size(),
}
}
pub fn min_size_cluster_offsets_and_counts(
buffer_binding_type: BufferBindingType,
) -> NonZero<u64> {
match buffer_binding_type {
BufferBindingType::Storage { .. } => GpuClusterOffsetsAndCountsStorage::min_size(),
BufferBindingType::Uniform => GpuClusterOffsetsAndCountsUniform::min_size(),
}
}
}
impl ViewClusterBuffers {
fn new(buffer_binding_type: BufferBindingType) -> Self {
match buffer_binding_type {
BufferBindingType::Storage { .. } => Self::storage(),
BufferBindingType::Uniform => Self::uniform(),
}
}
fn uniform() -> Self {
ViewClusterBuffers::Uniform {
clusterable_object_index_lists: UniformBuffer::default(),
cluster_offsets_and_counts: UniformBuffer::default(),
}
}
fn storage() -> Self {
ViewClusterBuffers::Storage {
clusterable_object_index_lists: StorageBuffer::default(),
cluster_offsets_and_counts: StorageBuffer::default(),
}
}
}
// Compresses the offset and counts of point and spot lights so that they fit in
// a UBO.
//
// This function is only used if storage buffers are unavailable on this
// platform: typically, on WebGL 2.
//
// NOTE: With uniform buffer max binding size as 16384 bytes
// that means we can fit 204 clusterable objects in one uniform
// buffer, which means the count can be at most 204 so it
// needs 9 bits.
// The array of indices can also use u8 and that means the
// offset in to the array of indices needs to be able to address
// 16384 values. log2(16384) = 14 bits.
// We use 32 bits to store the offset and counts so
// we pack the offset into the upper 14 bits of a u32,
// the point light count into bits 9-17, and the spot light count into bits 0-8.
// [ 31 .. 18 | 17 .. 9 | 8 .. 0 ]
// [ offset | point light count | spot light count ]
//
// NOTE: This assumes CPU and GPU endianness are the same which is true
// for all common and tested x86/ARM CPUs and AMD/NVIDIA/Intel/Apple/etc GPUs
//
// NOTE: On platforms that use this function, we don't cluster light probes, so
// the number of light probes is irrelevant.
fn pack_offset_and_counts(offset: usize, point_count: u32, spot_count: u32) -> u32 {
((offset as u32 & CLUSTER_OFFSET_MASK) << (CLUSTER_COUNT_SIZE * 2))
| ((point_count & CLUSTER_COUNT_MASK) << CLUSTER_COUNT_SIZE)
| (spot_count & CLUSTER_COUNT_MASK)
}
#[derive(ShaderType)]
struct GpuClusterableObjectIndexListsUniform {
data: Box<[UVec4; ViewClusterBindings::MAX_UNIFORM_ITEMS]>,
}
// NOTE: Assert at compile time that GpuClusterableObjectIndexListsUniform
// fits within the maximum uniform buffer binding size
const _: () = assert!(GpuClusterableObjectIndexListsUniform::SHADER_SIZE.get() <= 16384);
impl Default for GpuClusterableObjectIndexListsUniform {
fn default() -> Self {
Self {
data: Box::new([UVec4::ZERO; ViewClusterBindings::MAX_UNIFORM_ITEMS]),
}
}
}
impl Default for GpuClusterOffsetsAndCountsUniform {
fn default() -> Self {
Self {
data: Box::new([UVec4::ZERO; ViewClusterBindings::MAX_UNIFORM_ITEMS]),
}
}
}

View File

@@ -0,0 +1,46 @@
use bevy_derive::{Deref, DerefMut};
use bevy_ecs::component::Component;
use bevy_ecs::entity::{Entity, EntityHashMap};
use bevy_ecs::reflect::ReflectComponent;
use bevy_reflect::{std_traits::ReflectDefault, Reflect};
use crate::render::sync_world::MainEntity;
#[derive(Component, Clone, Debug, Default, Reflect, Deref, DerefMut)]
#[reflect(Component, Debug, Default, Clone)]
pub struct RenderVisibleMeshEntities {
#[reflect(ignore, clone)]
pub entities: Vec<(Entity, MainEntity)>,
}
#[derive(Component, Clone, Debug, Default, Reflect)]
#[reflect(Component, Debug, Default, Clone)]
pub struct RenderCubemapVisibleEntities {
#[reflect(ignore, clone)]
pub(crate) data: [RenderVisibleMeshEntities; 6],
}
impl RenderCubemapVisibleEntities {
pub fn get(&self, i: usize) -> &RenderVisibleMeshEntities {
&self.data[i]
}
pub fn get_mut(&mut self, i: usize) -> &mut RenderVisibleMeshEntities {
&mut self.data[i]
}
pub fn iter(&self) -> impl DoubleEndedIterator<Item = &RenderVisibleMeshEntities> {
self.data.iter()
}
pub fn iter_mut(&mut self) -> impl DoubleEndedIterator<Item = &mut RenderVisibleMeshEntities> {
self.data.iter_mut()
}
}
#[derive(Component, Clone, Debug, Default, Reflect)]
#[reflect(Component, Default, Clone)]
pub struct RenderCascadesVisibleEntities {
/// Map of view entity to the visible entities for each cascade frustum.
#[reflect(ignore, clone)]
pub entities: EntityHashMap<Vec<RenderVisibleMeshEntities>>,
}

View File

@@ -0,0 +1,441 @@
//! Clustered decals, bounding regions that project textures onto surfaces.
//!
//! A *clustered decal* is a bounding box that projects a texture onto any
//! surface within its bounds along the positive Z axis. In Bevy, clustered
//! decals use the *clustered forward* rendering technique.
//!
//! Clustered decals are the highest-quality types of decals that Bevy supports,
//! but they require bindless textures. This means that they presently can't be
//! used on WebGL 2 or WebGPU. Bevy's clustered decals can be used
//! with forward or deferred rendering and don't require a prepass.
//!
//! On their own, clustered decals only project the base color of a texture. You
//! can, however, use the built-in *tag* field to customize the appearance of a
//! clustered decal arbitrarily. See the documentation in `clustered.wgsl` for
//! more information and the `clustered_decals` example for an example of use.
use core::{num::NonZero, ops::Deref};
use bevy_app::{App, Plugin};
use bevy_asset::AssetId;
use bevy_camera::visibility::ViewVisibility;
use bevy_derive::{Deref, DerefMut};
use bevy_ecs::{
entity::{Entity, EntityHashMap},
query::With,
resource::Resource,
schedule::IntoScheduleConfigs as _,
system::{Commands, Local, Query, Res, ResMut},
};
use bevy_image::Image;
use bevy_light::{ClusteredDecal, DirectionalLightTexture, PointLightTexture, SpotLightTexture};
use bevy_math::Mat4;
use bevy_platform::collections::HashMap;
use crate::render::{
render_asset::RenderAssets,
render_resource::{
binding_types, BindGroupLayoutEntryBuilder, Buffer, BufferUsages, RawBufferVec, Sampler,
SamplerBindingType, ShaderType, TextureSampleType, TextureView,
},
renderer::{RenderAdapter, RenderDevice, RenderQueue},
sync_component::SyncComponentPlugin,
sync_world::RenderEntity,
texture::{FallbackImage, GpuImage},
Extract, ExtractSchedule, Render, RenderApp, RenderSystems,
};
use bevy_shader::load_shader_library;
use bevy_transform::components::GlobalTransform;
use bytemuck::{Pod, Zeroable};
use crate::render::pbr::{binding_arrays_are_usable, prepare_lights, GlobalClusterableObjectMeta};
/// The maximum number of decals that can be present in a view.
///
/// This number is currently relatively low in order to work around the lack of
/// first-class binding arrays in `wgpu`. When that feature is implemented, this
/// limit can be increased.
pub(crate) const MAX_VIEW_DECALS: usize = 8;
/// A plugin that adds support for clustered decals.
///
/// In environments where bindless textures aren't available, clustered decals
/// can still be added to a scene, but they won't project any decals.
pub struct ClusteredDecalPlugin;
/// Stores information about all the clustered decals in the scene.
#[derive(Resource, Default)]
pub struct RenderClusteredDecals {
/// Maps an index in the shader binding array to the associated decal image.
///
/// [`Self::texture_to_binding_index`] holds the inverse mapping.
binding_index_to_textures: Vec<AssetId<Image>>,
/// Maps a decal image to the shader binding array.
///
/// [`Self::binding_index_to_textures`] holds the inverse mapping.
texture_to_binding_index: HashMap<AssetId<Image>, u32>,
/// The information concerning each decal that we provide to the shader.
decals: Vec<RenderClusteredDecal>,
/// Maps the [`bevy_render::sync_world::RenderEntity`] of each decal to the
/// index of that decal in the [`Self::decals`] list.
entity_to_decal_index: EntityHashMap<usize>,
}
impl RenderClusteredDecals {
/// Clears out this [`RenderClusteredDecals`] in preparation for a new
/// frame.
fn clear(&mut self) {
self.binding_index_to_textures.clear();
self.texture_to_binding_index.clear();
self.decals.clear();
self.entity_to_decal_index.clear();
}
pub fn insert_decal(
&mut self,
entity: Entity,
image: &AssetId<Image>,
local_from_world: Mat4,
tag: u32,
) {
let image_index = self.get_or_insert_image(image);
let decal_index = self.decals.len();
self.decals.push(RenderClusteredDecal {
local_from_world,
image_index,
tag,
pad_a: 0,
pad_b: 0,
});
self.entity_to_decal_index.insert(entity, decal_index);
}
pub fn get(&self, entity: Entity) -> Option<usize> {
self.entity_to_decal_index.get(&entity).copied()
}
}
/// The per-view bind group entries pertaining to decals.
pub(crate) struct RenderViewClusteredDecalBindGroupEntries<'a> {
/// The list of decals, corresponding to `mesh_view_bindings::decals` in the
/// shader.
pub(crate) decals: &'a Buffer,
/// The list of textures, corresponding to
/// `mesh_view_bindings::decal_textures` in the shader.
pub(crate) texture_views: Vec<&'a <TextureView as Deref>::Target>,
/// The sampler that the shader uses to sample decals, corresponding to
/// `mesh_view_bindings::decal_sampler` in the shader.
pub(crate) sampler: &'a Sampler,
}
/// A render-world resource that holds the buffer of [`ClusteredDecal`]s ready
/// to upload to the GPU.
#[derive(Resource, Deref, DerefMut)]
pub struct DecalsBuffer(RawBufferVec<RenderClusteredDecal>);
impl Default for DecalsBuffer {
fn default() -> Self {
DecalsBuffer(RawBufferVec::new(BufferUsages::STORAGE))
}
}
impl Plugin for ClusteredDecalPlugin {
fn build(&self, app: &mut App) {
load_shader_library!(app, "clustered.wgsl");
app.add_plugins(SyncComponentPlugin::<ClusteredDecal>::default());
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
return;
};
render_app
.init_resource::<DecalsBuffer>()
.init_resource::<RenderClusteredDecals>()
.add_systems(ExtractSchedule, (extract_decals, extract_clustered_decal))
.add_systems(
Render,
prepare_decals
.in_set(RenderSystems::ManageViews)
.after(prepare_lights),
)
.add_systems(
Render,
upload_decals.in_set(RenderSystems::PrepareResources),
);
}
}
// This is needed because of the orphan rule not allowing implementing
// foreign trait ExtractComponent on foreign type ClusteredDecal
fn extract_clustered_decal(
mut commands: Commands,
mut previous_len: Local<usize>,
query: Extract<Query<(RenderEntity, &ClusteredDecal)>>,
) {
let mut values = Vec::with_capacity(*previous_len);
for (entity, query_item) in &query {
values.push((entity, query_item.clone()));
}
*previous_len = values.len();
commands.try_insert_batch(values);
}
/// The GPU data structure that stores information about each decal.
#[derive(Clone, Copy, Default, ShaderType, Pod, Zeroable)]
#[repr(C)]
pub struct RenderClusteredDecal {
/// The inverse of the model matrix.
///
/// The shader uses this in order to back-transform world positions into
/// model space.
local_from_world: Mat4,
/// The index of the decal texture in the binding array.
image_index: u32,
/// A custom tag available for application-defined purposes.
tag: u32,
/// Padding.
pad_a: u32,
/// Padding.
pad_b: u32,
}
/// Extracts decals from the main world into the render world.
pub fn extract_decals(
decals: Extract<
Query<(
RenderEntity,
&ClusteredDecal,
&GlobalTransform,
&ViewVisibility,
)>,
>,
spot_light_textures: Extract<
Query<(
RenderEntity,
&SpotLightTexture,
&GlobalTransform,
&ViewVisibility,
)>,
>,
point_light_textures: Extract<
Query<(
RenderEntity,
&PointLightTexture,
&GlobalTransform,
&ViewVisibility,
)>,
>,
directional_light_textures: Extract<
Query<(
RenderEntity,
&DirectionalLightTexture,
&GlobalTransform,
&ViewVisibility,
)>,
>,
mut render_decals: ResMut<RenderClusteredDecals>,
) {
// Clear out the `RenderDecals` in preparation for a new frame.
render_decals.clear();
// Loop over each decal.
for (decal_entity, clustered_decal, global_transform, view_visibility) in &decals {
// If the decal is invisible, skip it.
if !view_visibility.get() {
continue;
}
render_decals.insert_decal(
decal_entity,
&clustered_decal.image.id(),
global_transform.affine().inverse().into(),
clustered_decal.tag,
);
}
for (decal_entity, texture, global_transform, view_visibility) in &spot_light_textures {
// If the decal is invisible, skip it.
if !view_visibility.get() {
continue;
}
render_decals.insert_decal(
decal_entity,
&texture.image.id(),
global_transform.affine().inverse().into(),
0,
);
}
for (decal_entity, texture, global_transform, view_visibility) in &point_light_textures {
// If the decal is invisible, skip it.
if !view_visibility.get() {
continue;
}
render_decals.insert_decal(
decal_entity,
&texture.image.id(),
global_transform.affine().inverse().into(),
texture.cubemap_layout as u32,
);
}
for (decal_entity, texture, global_transform, view_visibility) in &directional_light_textures {
// If the decal is invisible, skip it.
if !view_visibility.get() {
continue;
}
render_decals.insert_decal(
decal_entity,
&texture.image.id(),
global_transform.affine().inverse().into(),
if texture.tiled { 1 } else { 0 },
);
}
}
/// Adds all decals in the scene to the [`GlobalClusterableObjectMeta`] table.
fn prepare_decals(
decals: Query<Entity, With<ClusteredDecal>>,
mut global_clusterable_object_meta: ResMut<GlobalClusterableObjectMeta>,
render_decals: Res<RenderClusteredDecals>,
) {
for decal_entity in &decals {
if let Some(index) = render_decals.entity_to_decal_index.get(&decal_entity) {
global_clusterable_object_meta
.entity_to_index
.insert(decal_entity, *index);
}
}
}
/// Returns the layout for the clustered-decal-related bind group entries for a
/// single view.
pub(crate) fn get_bind_group_layout_entries(
render_device: &RenderDevice,
render_adapter: &RenderAdapter,
) -> Option<[BindGroupLayoutEntryBuilder; 3]> {
// If binding arrays aren't supported on the current platform, we have no
// bind group layout entries.
if !clustered_decals_are_usable(render_device, render_adapter) {
return None;
}
Some([
// `decals`
binding_types::storage_buffer_read_only::<RenderClusteredDecal>(false),
// `decal_textures`
binding_types::texture_2d(TextureSampleType::Float { filterable: true })
.count(NonZero::<u32>::new(MAX_VIEW_DECALS as u32).unwrap()),
// `decal_sampler`
binding_types::sampler(SamplerBindingType::Filtering),
])
}
impl<'a> RenderViewClusteredDecalBindGroupEntries<'a> {
/// Creates and returns the bind group entries for clustered decals for a
/// single view.
pub(crate) fn get(
render_decals: &RenderClusteredDecals,
decals_buffer: &'a DecalsBuffer,
images: &'a RenderAssets<GpuImage>,
fallback_image: &'a FallbackImage,
render_device: &RenderDevice,
render_adapter: &RenderAdapter,
) -> Option<RenderViewClusteredDecalBindGroupEntries<'a>> {
// Skip the entries if decals are unsupported on the current platform.
if !clustered_decals_are_usable(render_device, render_adapter) {
return None;
}
// We use the first sampler among all the images. This assumes that all
// images use the same sampler, which is a documented restriction. If
// there's no sampler, we just use the one from the fallback image.
let sampler = match render_decals
.binding_index_to_textures
.iter()
.filter_map(|image_id| images.get(*image_id))
.next()
{
Some(gpu_image) => &gpu_image.sampler,
None => &fallback_image.d2.sampler,
};
// Gather up the decal textures.
let mut texture_views = vec![];
for image_id in &render_decals.binding_index_to_textures {
match images.get(*image_id) {
None => texture_views.push(&*fallback_image.d2.texture_view),
Some(gpu_image) => texture_views.push(&*gpu_image.texture_view),
}
}
// Pad out the binding array to its maximum length, which is
// required on some platforms.
while texture_views.len() < MAX_VIEW_DECALS {
texture_views.push(&*fallback_image.d2.texture_view);
}
Some(RenderViewClusteredDecalBindGroupEntries {
decals: decals_buffer.buffer()?,
texture_views,
sampler,
})
}
}
impl RenderClusteredDecals {
/// Returns the index of the given image in the decal texture binding array,
/// adding it to the list if necessary.
fn get_or_insert_image(&mut self, image_id: &AssetId<Image>) -> u32 {
*self
.texture_to_binding_index
.entry(*image_id)
.or_insert_with(|| {
let index = self.binding_index_to_textures.len() as u32;
self.binding_index_to_textures.push(*image_id);
index
})
}
}
/// Uploads the list of decals from [`RenderClusteredDecals::decals`] to the
/// GPU.
fn upload_decals(
render_decals: Res<RenderClusteredDecals>,
mut decals_buffer: ResMut<DecalsBuffer>,
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
) {
decals_buffer.clear();
for &decal in &render_decals.decals {
decals_buffer.push(decal);
}
// Make sure the buffer is non-empty.
// Otherwise there won't be a buffer to bind.
if decals_buffer.is_empty() {
decals_buffer.push(RenderClusteredDecal::default());
}
decals_buffer.write_buffer(&render_device, &render_queue);
}
/// Returns true if clustered decals are usable on the current platform or false
/// otherwise.
///
/// Clustered decals are currently disabled on macOS and iOS due to insufficient
/// texture bindings and limited bindless support in `wgpu`.
pub fn clustered_decals_are_usable(
render_device: &RenderDevice,
render_adapter: &RenderAdapter,
) -> bool {
// Disable binding arrays on Metal. There aren't enough texture bindings available.
// See issue #17553.
// Re-enable this when `wgpu` has first-class bindless.
binding_arrays_are_usable(render_device, render_adapter)
&& cfg!(feature = "pbr_clustered_decals")
}

View File

@@ -0,0 +1,183 @@
// Support code for clustered decals.
//
// This module provides an iterator API, which you may wish to use in your own
// shaders if you want clustered decals to provide textures other than the base
// color. The iterator API allows you to iterate over all decals affecting the
// current fragment. Use `clustered_decal_iterator_new()` and
// `clustered_decal_iterator_next()` as follows:
//
// let view_z = get_view_z(vec4(world_position, 1.0));
// let is_orthographic = view_is_orthographic();
//
// let cluster_index =
// clustered_forward::fragment_cluster_index(frag_coord, view_z, is_orthographic);
// var clusterable_object_index_ranges =
// clustered_forward::unpack_clusterable_object_index_ranges(cluster_index);
//
// var iterator = clustered_decal_iterator_new(world_position, &clusterable_object_index_ranges);
// while (clustered_decal_iterator_next(&iterator)) {
// ... sample from the texture at iterator.texture_index at iterator.uv ...
// }
//
// In this way, in conjunction with a custom material, you can provide your own
// texture arrays that mirror `mesh_view_bindings::clustered_decal_textures` in
// order to support decals with normal maps, etc.
//
// Note that the order in which decals are returned is currently unpredictable,
// though generally stable from frame to frame.
#define_import_path bevy_pbr::decal::clustered
#import bevy_pbr::clustered_forward
#import bevy_pbr::clustered_forward::ClusterableObjectIndexRanges
#import bevy_pbr::mesh_view_bindings
#import bevy_render::maths
// An object that allows stepping through all clustered decals that affect a
// single fragment.
struct ClusteredDecalIterator {
// Public fields follow:
// The index of the decal texture in the binding array.
texture_index: i32,
// The UV coordinates at which to sample that decal texture.
uv: vec2<f32>,
// A custom tag you can use for your own purposes.
tag: u32,
// Private fields follow:
// The current offset of the index in the `ClusterableObjectIndexRanges` list.
decal_index_offset: i32,
// The end offset of the index in the `ClusterableObjectIndexRanges` list.
end_offset: i32,
// The world-space position of the fragment.
world_position: vec3<f32>,
}
#ifdef CLUSTERED_DECALS_ARE_USABLE
// Creates a new iterator over the decals at the current fragment.
//
// You can retrieve `clusterable_object_index_ranges` as follows:
//
// let view_z = get_view_z(world_position);
// let is_orthographic = view_is_orthographic();
//
// let cluster_index =
// clustered_forward::fragment_cluster_index(frag_coord, view_z, is_orthographic);
// var clusterable_object_index_ranges =
// clustered_forward::unpack_clusterable_object_index_ranges(cluster_index);
fn clustered_decal_iterator_new(
world_position: vec3<f32>,
clusterable_object_index_ranges: ptr<function, ClusterableObjectIndexRanges>
) -> ClusteredDecalIterator {
return ClusteredDecalIterator(
-1,
vec2(0.0),
0u,
// We subtract 1 because the first thing `decal_iterator_next` does is
// add 1.
i32((*clusterable_object_index_ranges).first_decal_offset) - 1,
i32((*clusterable_object_index_ranges).last_clusterable_object_index_offset),
world_position,
);
}
// Populates the `iterator.texture_index` and `iterator.uv` fields for the next
// decal overlapping the current world position.
//
// Returns true if another decal was found or false if no more decals were found
// for this position.
fn clustered_decal_iterator_next(iterator: ptr<function, ClusteredDecalIterator>) -> bool {
if ((*iterator).decal_index_offset == (*iterator).end_offset) {
return false;
}
(*iterator).decal_index_offset += 1;
while ((*iterator).decal_index_offset < (*iterator).end_offset) {
let decal_index = i32(clustered_forward::get_clusterable_object_id(
u32((*iterator).decal_index_offset)
));
let decal_space_vector =
(mesh_view_bindings::clustered_decals.decals[decal_index].local_from_world *
vec4((*iterator).world_position, 1.0)).xyz;
if (all(decal_space_vector >= vec3(-0.5)) && all(decal_space_vector <= vec3(0.5))) {
(*iterator).texture_index =
i32(mesh_view_bindings::clustered_decals.decals[decal_index].image_index);
(*iterator).uv = decal_space_vector.xy * vec2(1.0, -1.0) + vec2(0.5);
(*iterator).tag =
mesh_view_bindings::clustered_decals.decals[decal_index].tag;
return true;
}
(*iterator).decal_index_offset += 1;
}
return false;
}
#endif // CLUSTERED_DECALS_ARE_USABLE
// Returns the view-space Z coordinate for the given world position.
fn get_view_z(world_position: vec3<f32>) -> f32 {
return dot(vec4<f32>(
mesh_view_bindings::view.view_from_world[0].z,
mesh_view_bindings::view.view_from_world[1].z,
mesh_view_bindings::view.view_from_world[2].z,
mesh_view_bindings::view.view_from_world[3].z
), vec4(world_position, 1.0));
}
// Returns true if the current view describes an orthographic projection or
// false otherwise.
fn view_is_orthographic() -> bool {
return mesh_view_bindings::view.clip_from_view[3].w == 1.0;
}
// Modifies the base color at the given position to account for decals.
//
// Returns the new base color with decals taken into account. If no decals
// overlap the current world position, returns the supplied base color
// unmodified.
fn apply_decal_base_color(
world_position: vec3<f32>,
frag_coord: vec2<f32>,
initial_base_color: vec4<f32>,
) -> vec4<f32> {
var base_color = initial_base_color;
#ifdef CLUSTERED_DECALS_ARE_USABLE
// Fetch the clusterable object index ranges for this world position.
let view_z = get_view_z(world_position);
let is_orthographic = view_is_orthographic();
let cluster_index =
clustered_forward::fragment_cluster_index(frag_coord, view_z, is_orthographic);
var clusterable_object_index_ranges =
clustered_forward::unpack_clusterable_object_index_ranges(cluster_index);
// Iterate over decals.
var iterator = clustered_decal_iterator_new(world_position, &clusterable_object_index_ranges);
while (clustered_decal_iterator_next(&iterator)) {
// Sample the current decal.
let decal_base_color = textureSampleLevel(
mesh_view_bindings::clustered_decal_textures[iterator.texture_index],
mesh_view_bindings::clustered_decal_sampler,
iterator.uv,
0.0
);
// Blend with the accumulated fragment.
base_color = vec4(
mix(base_color.rgb, decal_base_color.rgb, decal_base_color.a),
base_color.a + decal_base_color.a
);
}
#endif // CLUSTERED_DECALS_ARE_USABLE
return base_color;
}

View File

@@ -0,0 +1,165 @@
use crate::render::pbr::{
ExtendedMaterial, Material, MaterialExtension, MaterialExtensionKey, MaterialExtensionPipeline,
MaterialPlugin, StandardMaterial,
};
use bevy_app::{App, Plugin};
use bevy_asset::{Asset, Assets, Handle};
use bevy_ecs::{
component::Component, lifecycle::HookContext, resource::Resource, world::DeferredWorld,
};
use bevy_math::{prelude::Rectangle, Quat, Vec2, Vec3};
use bevy_mesh::{Mesh, Mesh3d, MeshBuilder, MeshVertexBufferLayoutRef, Meshable};
use bevy_reflect::{Reflect, TypePath};
use crate::render::{
alpha::AlphaMode,
render_asset::RenderAssets,
render_resource::{
AsBindGroup, AsBindGroupShaderType, CompareFunction, RenderPipelineDescriptor, ShaderType,
SpecializedMeshPipelineError,
},
texture::GpuImage,
RenderDebugFlags,
};
use bevy_shader::load_shader_library;
/// Plugin to render [`ForwardDecal`]s.
pub struct ForwardDecalPlugin;
impl Plugin for ForwardDecalPlugin {
fn build(&self, app: &mut App) {
load_shader_library!(app, "forward_decal.wgsl");
let mesh = app.world_mut().resource_mut::<Assets<Mesh>>().add(
Rectangle::from_size(Vec2::ONE)
.mesh()
.build()
.rotated_by(Quat::from_rotation_arc(Vec3::Z, Vec3::Y))
.with_generated_tangents()
.unwrap(),
);
app.insert_resource(ForwardDecalMesh(mesh));
app.add_plugins(MaterialPlugin::<ForwardDecalMaterial<StandardMaterial>> {
prepass_enabled: false,
shadows_enabled: false,
debug_flags: RenderDebugFlags::default(),
..Default::default()
});
}
}
/// A decal that renders via a 1x1 transparent quad mesh, smoothly alpha-blending with the underlying
/// geometry towards the edges.
///
/// Because forward decals are meshes, you can use arbitrary materials to control their appearance.
///
/// # Usage Notes
///
/// * Spawn this component on an entity with a [`crate::MeshMaterial3d`] component holding a [`ForwardDecalMaterial`].
/// * Any camera rendering a forward decal must have the [`bevy_core_pipeline::prepass::DepthPrepass`] component.
/// * Looking at forward decals at a steep angle can cause distortion. This can be mitigated by padding your decal's
/// texture with extra transparent pixels on the edges.
/// * On Wasm, requires using WebGPU and disabling `Msaa` on your camera.
#[derive(Component, Reflect)]
#[require(Mesh3d)]
#[component(on_add=forward_decal_set_mesh)]
pub struct ForwardDecal;
/// Type alias for an extended material with a [`ForwardDecalMaterialExt`] extension.
///
/// Make sure to register the [`MaterialPlugin`] for this material in your app setup.
///
/// [`StandardMaterial`] comes with out of the box support for forward decals.
#[expect(type_alias_bounds, reason = "Type alias generics not yet stable")]
pub type ForwardDecalMaterial<B: Material> = ExtendedMaterial<B, ForwardDecalMaterialExt>;
/// Material extension for a [`ForwardDecal`].
///
/// In addition to wrapping your material type with this extension, your shader must use
/// the `bevy_pbr::decal::forward::get_forward_decal_info` function.
///
/// The `FORWARD_DECAL` shader define will be made available to your shader so that you can gate
/// the forward decal code behind an ifdef.
#[derive(Asset, AsBindGroup, TypePath, Clone, Debug)]
#[uniform(200, ForwardDecalMaterialExtUniform)]
pub struct ForwardDecalMaterialExt {
/// Controls the distance threshold for decal blending with surfaces.
///
/// This parameter determines how far away a surface can be before the decal no longer blends
/// with it and instead renders with full opacity.
///
/// Lower values cause the decal to only blend with close surfaces, while higher values allow
/// blending with more distant surfaces.
///
/// Units are in meters.
pub depth_fade_factor: f32,
}
#[derive(Clone, Default, ShaderType)]
pub struct ForwardDecalMaterialExtUniform {
pub inv_depth_fade_factor: f32,
}
impl AsBindGroupShaderType<ForwardDecalMaterialExtUniform> for ForwardDecalMaterialExt {
fn as_bind_group_shader_type(
&self,
_images: &RenderAssets<GpuImage>,
) -> ForwardDecalMaterialExtUniform {
ForwardDecalMaterialExtUniform {
inv_depth_fade_factor: 1.0 / self.depth_fade_factor.max(0.001),
}
}
}
impl MaterialExtension for ForwardDecalMaterialExt {
fn alpha_mode() -> Option<AlphaMode> {
Some(AlphaMode::Blend)
}
fn specialize(
_pipeline: &MaterialExtensionPipeline,
descriptor: &mut RenderPipelineDescriptor,
_layout: &MeshVertexBufferLayoutRef,
_key: MaterialExtensionKey<Self>,
) -> Result<(), SpecializedMeshPipelineError> {
descriptor.depth_stencil.as_mut().unwrap().depth_compare = CompareFunction::Always;
descriptor.vertex.shader_defs.push("FORWARD_DECAL".into());
if let Some(fragment) = &mut descriptor.fragment {
fragment.shader_defs.push("FORWARD_DECAL".into());
}
if let Some(label) = &mut descriptor.label {
*label = format!("forward_decal_{label}").into();
}
Ok(())
}
}
impl Default for ForwardDecalMaterialExt {
fn default() -> Self {
Self {
depth_fade_factor: 8.0,
}
}
}
#[derive(Resource)]
struct ForwardDecalMesh(Handle<Mesh>);
// Note: We need to use a hook here instead of required components since we cannot access resources
// with required components, and we can't otherwise get a handle to the asset from a required
// component constructor, since the constructor must be a function pointer, and we intentionally do
// not want to use `uuid_handle!`.
fn forward_decal_set_mesh(mut world: DeferredWorld, HookContext { entity, .. }: HookContext) {
let decal_mesh = world.resource::<ForwardDecalMesh>().0.clone();
let mut entity = world.entity_mut(entity);
let mut entity_mesh = entity.get_mut::<Mesh3d>().unwrap();
// Only replace the mesh handle if the mesh handle is defaulted.
if **entity_mesh == Handle::default() {
entity_mesh.0 = decal_mesh;
}
}

View File

@@ -0,0 +1,52 @@
#define_import_path bevy_pbr::decal::forward
#import bevy_pbr::{
forward_io::VertexOutput,
mesh_functions::get_world_from_local,
mesh_view_bindings::view,
pbr_functions::calculate_tbn_mikktspace,
prepass_utils::prepass_depth,
view_transformations::depth_ndc_to_view_z,
}
#import bevy_render::maths::project_onto
@group(#{MATERIAL_BIND_GROUP}) @binding(200)
var<uniform> inv_depth_fade_factor: f32;
struct ForwardDecalInformation {
world_position: vec4<f32>,
uv: vec2<f32>,
alpha: f32,
}
fn get_forward_decal_info(in: VertexOutput) -> ForwardDecalInformation {
let world_from_local = get_world_from_local(in.instance_index);
let scale = (world_from_local * vec4(1.0, 1.0, 1.0, 0.0)).xyz;
let scaled_tangent = vec4(in.world_tangent.xyz / scale, in.world_tangent.w);
let V = normalize(view.world_position - in.world_position.xyz);
// Transform V from fragment to camera in world space to tangent space.
let TBN = calculate_tbn_mikktspace(in.world_normal, scaled_tangent);
let T = TBN[0];
let B = TBN[1];
let N = TBN[2];
let Vt = vec3(dot(V, T), dot(V, B), dot(V, N));
let frag_depth = depth_ndc_to_view_z(in.position.z);
let depth_pass_depth = depth_ndc_to_view_z(prepass_depth(in.position, 0u));
let diff_depth = frag_depth - depth_pass_depth;
let diff_depth_abs = abs(diff_depth);
// Apply UV parallax
let contact_on_decal = project_onto(V * diff_depth, in.world_normal);
let normal_depth = length(contact_on_decal);
let view_steepness = abs(Vt.z);
let delta_uv = normal_depth * Vt.xy * vec2(1.0, -1.0) / view_steepness;
let uv = in.uv + delta_uv;
let world_position = vec4(in.world_position.xyz + V * diff_depth_abs, in.world_position.w);
let alpha = saturate(1.0 - (normal_depth * inv_depth_fade_factor));
return ForwardDecalInformation(world_position, uv, alpha);
}

View File

@@ -0,0 +1,11 @@
//! Decal rendering.
//!
//! Decals are a material that render on top of the surface that they're placed above.
//! They can be used to render signs, paint, snow, impact craters, and other effects on top of surfaces.
// TODO: Once other decal types are added, write a paragraph comparing the different types in the module docs.
pub mod clustered;
mod forward;
pub use forward::*;

View File

@@ -0,0 +1,88 @@
#import bevy_pbr::{
prepass_utils,
pbr_types::STANDARD_MATERIAL_FLAGS_UNLIT_BIT,
pbr_functions,
pbr_deferred_functions::pbr_input_from_deferred_gbuffer,
pbr_deferred_types::unpack_unorm3x4_plus_unorm_20_,
lighting,
mesh_view_bindings::deferred_prepass_texture,
}
#ifdef SCREEN_SPACE_AMBIENT_OCCLUSION
#import bevy_pbr::mesh_view_bindings::screen_space_ambient_occlusion_texture
#import bevy_pbr::ssao_utils::ssao_multibounce
#endif
struct FullscreenVertexOutput {
@builtin(position)
position: vec4<f32>,
@location(0)
uv: vec2<f32>,
};
struct PbrDeferredLightingDepthId {
depth_id: u32, // limited to u8
#ifdef SIXTEEN_BYTE_ALIGNMENT
// WebGL2 structs must be 16 byte aligned.
_webgl2_padding_0: f32,
_webgl2_padding_1: f32,
_webgl2_padding_2: f32,
#endif
}
@group(2) @binding(0)
var<uniform> depth_id: PbrDeferredLightingDepthId;
@vertex
fn vertex(@builtin(vertex_index) vertex_index: u32) -> FullscreenVertexOutput {
// See the full screen vertex shader for explanation above for how this works.
let uv = vec2<f32>(f32(vertex_index >> 1u), f32(vertex_index & 1u)) * 2.0;
// Depth is stored as unorm, so we are dividing the u8 depth_id by 255.0 here.
let clip_position = vec4<f32>(uv * vec2<f32>(2.0, -2.0) + vec2<f32>(-1.0, 1.0), f32(depth_id.depth_id) / 255.0, 1.0);
return FullscreenVertexOutput(clip_position, uv);
}
@fragment
fn fragment(in: FullscreenVertexOutput) -> @location(0) vec4<f32> {
var frag_coord = vec4(in.position.xy, 0.0, 0.0);
let deferred_data = textureLoad(deferred_prepass_texture, vec2<i32>(frag_coord.xy), 0);
#ifdef WEBGL2
frag_coord.z = unpack_unorm3x4_plus_unorm_20_(deferred_data.b).w;
#else
#ifdef DEPTH_PREPASS
frag_coord.z = prepass_utils::prepass_depth(in.position, 0u);
#endif
#endif
var pbr_input = pbr_input_from_deferred_gbuffer(frag_coord, deferred_data);
var output_color = vec4(0.0);
// NOTE: Unlit bit not set means == 0 is true, so the true case is if lit
if ((pbr_input.material.flags & STANDARD_MATERIAL_FLAGS_UNLIT_BIT) == 0u) {
#ifdef SCREEN_SPACE_AMBIENT_OCCLUSION
let ssao = textureLoad(screen_space_ambient_occlusion_texture, vec2<i32>(in.position.xy), 0i).r;
let ssao_multibounce = ssao_multibounce(ssao, pbr_input.material.base_color.rgb);
pbr_input.diffuse_occlusion = min(pbr_input.diffuse_occlusion, ssao_multibounce);
// Neubelt and Pettineo 2013, "Crafting a Next-gen Material Pipeline for The Order: 1886"
let NdotV = max(dot(pbr_input.N, pbr_input.V), 0.0001);
var perceptual_roughness: f32 = pbr_input.material.perceptual_roughness;
let roughness = lighting::perceptualRoughnessToRoughness(perceptual_roughness);
// Use SSAO to estimate the specular occlusion.
// Lagarde and Rousiers 2014, "Moving Frostbite to Physically Based Rendering"
pbr_input.specular_occlusion = saturate(pow(NdotV + ssao, exp2(-16.0 * roughness - 1.0)) - 1.0 + ssao);
#endif // SCREEN_SPACE_AMBIENT_OCCLUSION
output_color = pbr_functions::apply_pbr_lighting(pbr_input);
} else {
output_color = pbr_input.material.base_color;
}
output_color = pbr_functions::main_pass_post_lighting_processing(pbr_input, output_color);
return output_color;
}

View File

@@ -0,0 +1,570 @@
use crate::render::pbr::{
graph::NodePbr, MeshPipeline, MeshViewBindGroup, RenderViewLightProbes,
ScreenSpaceAmbientOcclusion, ScreenSpaceReflectionsUniform, ViewEnvironmentMapUniformOffset,
ViewLightProbesUniformOffset, ViewScreenSpaceReflectionsUniformOffset,
TONEMAPPING_LUT_SAMPLER_BINDING_INDEX, TONEMAPPING_LUT_TEXTURE_BINDING_INDEX,
};
use crate::render::pbr::{DistanceFog, MeshPipelineKey, ViewFogUniformOffset, ViewLightsUniformOffset};
use bevy_app::prelude::*;
use bevy_asset::{embedded_asset, load_embedded_asset, AssetServer, Handle};
use crate::render::{
core_3d::graph::{Core3d, Node3d},
deferred::{
copy_lighting_id::DeferredLightingIdDepthTexture, DEFERRED_LIGHTING_PASS_ID_DEPTH_FORMAT,
},
prepass::{DeferredPrepass, DepthPrepass, MotionVectorPrepass, NormalPrepass},
tonemapping::{DebandDither, Tonemapping},
};
use bevy_ecs::{prelude::*, query::QueryItem};
use bevy_image::BevyDefault as _;
use bevy_light::{EnvironmentMapLight, IrradianceVolume, ShadowFilteringMethod};
use crate::render::RenderStartup;
use crate::render::{
diagnostic::RecordDiagnostics,
extract_component::{
ComponentUniforms, ExtractComponent, ExtractComponentPlugin, UniformComponentPlugin,
},
render_graph::{NodeRunError, RenderGraphContext, RenderGraphExt, ViewNode, ViewNodeRunner},
render_resource::{binding_types::uniform_buffer, *},
renderer::{RenderContext, RenderDevice},
view::{ExtractedView, ViewTarget, ViewUniformOffset},
Render, RenderApp, RenderSystems,
};
use bevy_shader::{Shader, ShaderDefVal};
use bevy_utils::default;
pub struct DeferredPbrLightingPlugin;
pub const DEFAULT_PBR_DEFERRED_LIGHTING_PASS_ID: u8 = 1;
/// Component with a `depth_id` for specifying which corresponding materials should be rendered by this specific PBR deferred lighting pass.
///
/// Will be automatically added to entities with the [`DeferredPrepass`] component that don't already have a [`PbrDeferredLightingDepthId`].
#[derive(Component, Clone, Copy, ExtractComponent, ShaderType)]
pub struct PbrDeferredLightingDepthId {
depth_id: u32,
#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))]
_webgl2_padding_0: f32,
#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))]
_webgl2_padding_1: f32,
#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))]
_webgl2_padding_2: f32,
}
impl PbrDeferredLightingDepthId {
pub fn new(value: u8) -> PbrDeferredLightingDepthId {
PbrDeferredLightingDepthId {
depth_id: value as u32,
#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))]
_webgl2_padding_0: 0.0,
#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))]
_webgl2_padding_1: 0.0,
#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))]
_webgl2_padding_2: 0.0,
}
}
pub fn set(&mut self, value: u8) {
self.depth_id = value as u32;
}
pub fn get(&self) -> u8 {
self.depth_id as u8
}
}
impl Default for PbrDeferredLightingDepthId {
fn default() -> Self {
PbrDeferredLightingDepthId {
depth_id: DEFAULT_PBR_DEFERRED_LIGHTING_PASS_ID as u32,
#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))]
_webgl2_padding_0: 0.0,
#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))]
_webgl2_padding_1: 0.0,
#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))]
_webgl2_padding_2: 0.0,
}
}
}
impl Plugin for DeferredPbrLightingPlugin {
fn build(&self, app: &mut App) {
app.add_plugins((
ExtractComponentPlugin::<PbrDeferredLightingDepthId>::default(),
UniformComponentPlugin::<PbrDeferredLightingDepthId>::default(),
))
.add_systems(PostUpdate, insert_deferred_lighting_pass_id_component);
embedded_asset!(app, "deferred_lighting.wgsl");
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
return;
};
render_app
.init_resource::<SpecializedRenderPipelines<DeferredLightingLayout>>()
.add_systems(RenderStartup, init_deferred_lighting_layout)
.add_systems(
Render,
(prepare_deferred_lighting_pipelines.in_set(RenderSystems::Prepare),),
)
.add_render_graph_node::<ViewNodeRunner<DeferredOpaquePass3dPbrLightingNode>>(
Core3d,
NodePbr::DeferredLightingPass,
)
.add_render_graph_edges(
Core3d,
(
Node3d::StartMainPass,
NodePbr::DeferredLightingPass,
Node3d::MainOpaquePass,
),
);
}
}
#[derive(Default)]
pub struct DeferredOpaquePass3dPbrLightingNode;
impl ViewNode for DeferredOpaquePass3dPbrLightingNode {
type ViewQuery = (
&'static ViewUniformOffset,
&'static ViewLightsUniformOffset,
&'static ViewFogUniformOffset,
&'static ViewLightProbesUniformOffset,
&'static ViewScreenSpaceReflectionsUniformOffset,
&'static ViewEnvironmentMapUniformOffset,
&'static MeshViewBindGroup,
&'static ViewTarget,
&'static DeferredLightingIdDepthTexture,
&'static DeferredLightingPipeline,
);
fn run(
&self,
_graph_context: &mut RenderGraphContext,
render_context: &mut RenderContext,
(
view_uniform_offset,
view_lights_offset,
view_fog_offset,
view_light_probes_offset,
view_ssr_offset,
view_environment_map_offset,
mesh_view_bind_group,
target,
deferred_lighting_id_depth_texture,
deferred_lighting_pipeline,
): QueryItem<Self::ViewQuery>,
world: &World,
) -> Result<(), NodeRunError> {
let pipeline_cache = world.resource::<PipelineCache>();
let deferred_lighting_layout = world.resource::<DeferredLightingLayout>();
let Some(pipeline) =
pipeline_cache.get_render_pipeline(deferred_lighting_pipeline.pipeline_id)
else {
return Ok(());
};
let deferred_lighting_pass_id =
world.resource::<ComponentUniforms<PbrDeferredLightingDepthId>>();
let Some(deferred_lighting_pass_id_binding) =
deferred_lighting_pass_id.uniforms().binding()
else {
return Ok(());
};
let diagnostics = render_context.diagnostic_recorder();
let bind_group_2 = render_context.render_device().create_bind_group(
"deferred_lighting_layout_group_2",
&deferred_lighting_layout.bind_group_layout_2,
&BindGroupEntries::single(deferred_lighting_pass_id_binding),
);
let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor {
label: Some("deferred_lighting"),
color_attachments: &[Some(target.get_color_attachment())],
depth_stencil_attachment: Some(RenderPassDepthStencilAttachment {
view: &deferred_lighting_id_depth_texture.texture.default_view,
depth_ops: Some(Operations {
load: LoadOp::Load,
store: StoreOp::Discard,
}),
stencil_ops: None,
}),
timestamp_writes: None,
occlusion_query_set: None,
});
let pass_span = diagnostics.pass_span(&mut render_pass, "deferred_lighting");
render_pass.set_render_pipeline(pipeline);
render_pass.set_bind_group(
0,
&mesh_view_bind_group.main,
&[
view_uniform_offset.offset,
view_lights_offset.offset,
view_fog_offset.offset,
**view_light_probes_offset,
**view_ssr_offset,
**view_environment_map_offset,
],
);
render_pass.set_bind_group(1, &mesh_view_bind_group.binding_array, &[]);
render_pass.set_bind_group(2, &bind_group_2, &[]);
render_pass.draw(0..3, 0..1);
pass_span.end(&mut render_pass);
Ok(())
}
}
#[derive(Resource)]
pub struct DeferredLightingLayout {
mesh_pipeline: MeshPipeline,
bind_group_layout_2: BindGroupLayout,
deferred_lighting_shader: Handle<Shader>,
}
#[derive(Component)]
pub struct DeferredLightingPipeline {
pub pipeline_id: CachedRenderPipelineId,
}
impl SpecializedRenderPipeline for DeferredLightingLayout {
type Key = MeshPipelineKey;
fn specialize(&self, key: Self::Key) -> RenderPipelineDescriptor {
let mut shader_defs = Vec::new();
// Let the shader code know that it's running in a deferred pipeline.
shader_defs.push("DEFERRED_LIGHTING_PIPELINE".into());
#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))]
shader_defs.push("WEBGL2".into());
if key.contains(MeshPipelineKey::TONEMAP_IN_SHADER) {
shader_defs.push("TONEMAP_IN_SHADER".into());
shader_defs.push(ShaderDefVal::UInt(
"TONEMAPPING_LUT_TEXTURE_BINDING_INDEX".into(),
TONEMAPPING_LUT_TEXTURE_BINDING_INDEX,
));
shader_defs.push(ShaderDefVal::UInt(
"TONEMAPPING_LUT_SAMPLER_BINDING_INDEX".into(),
TONEMAPPING_LUT_SAMPLER_BINDING_INDEX,
));
let method = key.intersection(MeshPipelineKey::TONEMAP_METHOD_RESERVED_BITS);
if method == MeshPipelineKey::TONEMAP_METHOD_NONE {
shader_defs.push("TONEMAP_METHOD_NONE".into());
} else if method == MeshPipelineKey::TONEMAP_METHOD_REINHARD {
shader_defs.push("TONEMAP_METHOD_REINHARD".into());
} else if method == MeshPipelineKey::TONEMAP_METHOD_REINHARD_LUMINANCE {
shader_defs.push("TONEMAP_METHOD_REINHARD_LUMINANCE".into());
} else if method == MeshPipelineKey::TONEMAP_METHOD_ACES_FITTED {
shader_defs.push("TONEMAP_METHOD_ACES_FITTED".into());
} else if method == MeshPipelineKey::TONEMAP_METHOD_AGX {
shader_defs.push("TONEMAP_METHOD_AGX".into());
} else if method == MeshPipelineKey::TONEMAP_METHOD_SOMEWHAT_BORING_DISPLAY_TRANSFORM {
shader_defs.push("TONEMAP_METHOD_SOMEWHAT_BORING_DISPLAY_TRANSFORM".into());
} else if method == MeshPipelineKey::TONEMAP_METHOD_BLENDER_FILMIC {
shader_defs.push("TONEMAP_METHOD_BLENDER_FILMIC".into());
} else if method == MeshPipelineKey::TONEMAP_METHOD_TONY_MC_MAPFACE {
shader_defs.push("TONEMAP_METHOD_TONY_MC_MAPFACE".into());
}
// Debanding is tied to tonemapping in the shader, cannot run without it.
if key.contains(MeshPipelineKey::DEBAND_DITHER) {
shader_defs.push("DEBAND_DITHER".into());
}
}
if key.contains(MeshPipelineKey::SCREEN_SPACE_AMBIENT_OCCLUSION) {
shader_defs.push("SCREEN_SPACE_AMBIENT_OCCLUSION".into());
}
if key.contains(MeshPipelineKey::ENVIRONMENT_MAP) {
shader_defs.push("ENVIRONMENT_MAP".into());
}
if key.contains(MeshPipelineKey::IRRADIANCE_VOLUME) {
shader_defs.push("IRRADIANCE_VOLUME".into());
}
if key.contains(MeshPipelineKey::NORMAL_PREPASS) {
shader_defs.push("NORMAL_PREPASS".into());
}
if key.contains(MeshPipelineKey::DEPTH_PREPASS) {
shader_defs.push("DEPTH_PREPASS".into());
}
if key.contains(MeshPipelineKey::MOTION_VECTOR_PREPASS) {
shader_defs.push("MOTION_VECTOR_PREPASS".into());
}
if key.contains(MeshPipelineKey::SCREEN_SPACE_REFLECTIONS) {
shader_defs.push("SCREEN_SPACE_REFLECTIONS".into());
}
if key.contains(MeshPipelineKey::HAS_PREVIOUS_SKIN) {
shader_defs.push("HAS_PREVIOUS_SKIN".into());
}
if key.contains(MeshPipelineKey::HAS_PREVIOUS_MORPH) {
shader_defs.push("HAS_PREVIOUS_MORPH".into());
}
if key.contains(MeshPipelineKey::DISTANCE_FOG) {
shader_defs.push("DISTANCE_FOG".into());
}
// Always true, since we're in the deferred lighting pipeline
shader_defs.push("DEFERRED_PREPASS".into());
let shadow_filter_method =
key.intersection(MeshPipelineKey::SHADOW_FILTER_METHOD_RESERVED_BITS);
if shadow_filter_method == MeshPipelineKey::SHADOW_FILTER_METHOD_HARDWARE_2X2 {
shader_defs.push("SHADOW_FILTER_METHOD_HARDWARE_2X2".into());
} else if shadow_filter_method == MeshPipelineKey::SHADOW_FILTER_METHOD_GAUSSIAN {
shader_defs.push("SHADOW_FILTER_METHOD_GAUSSIAN".into());
} else if shadow_filter_method == MeshPipelineKey::SHADOW_FILTER_METHOD_TEMPORAL {
shader_defs.push("SHADOW_FILTER_METHOD_TEMPORAL".into());
}
if self.mesh_pipeline.binding_arrays_are_usable {
shader_defs.push("MULTIPLE_LIGHT_PROBES_IN_ARRAY".into());
shader_defs.push("MULTIPLE_LIGHTMAPS_IN_ARRAY".into());
}
#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))]
shader_defs.push("SIXTEEN_BYTE_ALIGNMENT".into());
let layout = self.mesh_pipeline.get_view_layout(key.into());
RenderPipelineDescriptor {
label: Some("deferred_lighting_pipeline".into()),
layout: vec![
layout.main_layout.clone(),
layout.binding_array_layout.clone(),
self.bind_group_layout_2.clone(),
],
vertex: VertexState {
shader: self.deferred_lighting_shader.clone(),
shader_defs: shader_defs.clone(),
..default()
},
fragment: Some(FragmentState {
shader: self.deferred_lighting_shader.clone(),
shader_defs,
targets: vec![Some(ColorTargetState {
format: if key.contains(MeshPipelineKey::HDR) {
ViewTarget::TEXTURE_FORMAT_HDR
} else {
TextureFormat::bevy_default()
},
blend: None,
write_mask: ColorWrites::ALL,
})],
..default()
}),
depth_stencil: Some(DepthStencilState {
format: DEFERRED_LIGHTING_PASS_ID_DEPTH_FORMAT,
depth_write_enabled: false,
depth_compare: CompareFunction::Equal,
stencil: StencilState {
front: StencilFaceState::IGNORE,
back: StencilFaceState::IGNORE,
read_mask: 0,
write_mask: 0,
},
bias: DepthBiasState {
constant: 0,
slope_scale: 0.0,
clamp: 0.0,
},
}),
..default()
}
}
}
pub fn init_deferred_lighting_layout(
mut commands: Commands,
render_device: Res<RenderDevice>,
mesh_pipeline: Res<MeshPipeline>,
asset_server: Res<AssetServer>,
) {
let layout = render_device.create_bind_group_layout(
"deferred_lighting_layout",
&BindGroupLayoutEntries::single(
ShaderStages::VERTEX_FRAGMENT,
uniform_buffer::<PbrDeferredLightingDepthId>(false),
),
);
commands.insert_resource(DeferredLightingLayout {
mesh_pipeline: mesh_pipeline.clone(),
bind_group_layout_2: layout,
deferred_lighting_shader: load_embedded_asset!(
asset_server.as_ref(),
"deferred_lighting.wgsl"
),
});
}
pub fn insert_deferred_lighting_pass_id_component(
mut commands: Commands,
views: Query<Entity, (With<DeferredPrepass>, Without<PbrDeferredLightingDepthId>)>,
) {
for entity in views.iter() {
commands
.entity(entity)
.insert(PbrDeferredLightingDepthId::default());
}
}
pub fn prepare_deferred_lighting_pipelines(
mut commands: Commands,
pipeline_cache: Res<PipelineCache>,
mut pipelines: ResMut<SpecializedRenderPipelines<DeferredLightingLayout>>,
deferred_lighting_layout: Res<DeferredLightingLayout>,
views: Query<(
Entity,
&ExtractedView,
Option<&Tonemapping>,
Option<&DebandDither>,
Option<&ShadowFilteringMethod>,
(
Has<ScreenSpaceAmbientOcclusion>,
Has<ScreenSpaceReflectionsUniform>,
Has<DistanceFog>,
),
(
Has<NormalPrepass>,
Has<DepthPrepass>,
Has<MotionVectorPrepass>,
Has<DeferredPrepass>,
),
Has<RenderViewLightProbes<EnvironmentMapLight>>,
Has<RenderViewLightProbes<IrradianceVolume>>,
Has<SkipDeferredLighting>,
)>,
) {
for (
entity,
view,
tonemapping,
dither,
shadow_filter_method,
(ssao, ssr, distance_fog),
(normal_prepass, depth_prepass, motion_vector_prepass, deferred_prepass),
has_environment_maps,
has_irradiance_volumes,
skip_deferred_lighting,
) in &views
{
// If there is no deferred prepass or we want to skip the deferred lighting pass,
// remove the old pipeline if there was one. This handles the case in which a
// view using deferred stops using it.
if !deferred_prepass || skip_deferred_lighting {
commands.entity(entity).remove::<DeferredLightingPipeline>();
continue;
}
let mut view_key = MeshPipelineKey::from_hdr(view.hdr);
if normal_prepass {
view_key |= MeshPipelineKey::NORMAL_PREPASS;
}
if depth_prepass {
view_key |= MeshPipelineKey::DEPTH_PREPASS;
}
if motion_vector_prepass {
view_key |= MeshPipelineKey::MOTION_VECTOR_PREPASS;
}
// Always true, since we're in the deferred lighting pipeline
view_key |= MeshPipelineKey::DEFERRED_PREPASS;
if !view.hdr {
if let Some(tonemapping) = tonemapping {
view_key |= MeshPipelineKey::TONEMAP_IN_SHADER;
view_key |= match tonemapping {
Tonemapping::None => MeshPipelineKey::TONEMAP_METHOD_NONE,
Tonemapping::Reinhard => MeshPipelineKey::TONEMAP_METHOD_REINHARD,
Tonemapping::ReinhardLuminance => {
MeshPipelineKey::TONEMAP_METHOD_REINHARD_LUMINANCE
}
Tonemapping::AcesFitted => MeshPipelineKey::TONEMAP_METHOD_ACES_FITTED,
Tonemapping::AgX => MeshPipelineKey::TONEMAP_METHOD_AGX,
Tonemapping::SomewhatBoringDisplayTransform => {
MeshPipelineKey::TONEMAP_METHOD_SOMEWHAT_BORING_DISPLAY_TRANSFORM
}
Tonemapping::TonyMcMapface => MeshPipelineKey::TONEMAP_METHOD_TONY_MC_MAPFACE,
Tonemapping::BlenderFilmic => MeshPipelineKey::TONEMAP_METHOD_BLENDER_FILMIC,
};
}
if let Some(DebandDither::Enabled) = dither {
view_key |= MeshPipelineKey::DEBAND_DITHER;
}
}
if ssao {
view_key |= MeshPipelineKey::SCREEN_SPACE_AMBIENT_OCCLUSION;
}
if ssr {
view_key |= MeshPipelineKey::SCREEN_SPACE_REFLECTIONS;
}
if distance_fog {
view_key |= MeshPipelineKey::DISTANCE_FOG;
}
// We don't need to check to see whether the environment map is loaded
// because [`gather_light_probes`] already checked that for us before
// adding the [`RenderViewEnvironmentMaps`] component.
if has_environment_maps {
view_key |= MeshPipelineKey::ENVIRONMENT_MAP;
}
if has_irradiance_volumes {
view_key |= MeshPipelineKey::IRRADIANCE_VOLUME;
}
match shadow_filter_method.unwrap_or(&ShadowFilteringMethod::default()) {
ShadowFilteringMethod::Hardware2x2 => {
view_key |= MeshPipelineKey::SHADOW_FILTER_METHOD_HARDWARE_2X2;
}
ShadowFilteringMethod::Gaussian => {
view_key |= MeshPipelineKey::SHADOW_FILTER_METHOD_GAUSSIAN;
}
ShadowFilteringMethod::Temporal => {
view_key |= MeshPipelineKey::SHADOW_FILTER_METHOD_TEMPORAL;
}
}
let pipeline_id =
pipelines.specialize(&pipeline_cache, &deferred_lighting_layout, view_key);
commands
.entity(entity)
.insert(DeferredLightingPipeline { pipeline_id });
}
}
/// Component to skip running the deferred lighting pass in [`DeferredOpaquePass3dPbrLightingNode`] for a specific view.
///
/// This works like [`crate::PbrPlugin::add_default_deferred_lighting_plugin`], but is per-view instead of global.
///
/// Useful for cases where you want to generate a gbuffer, but skip the built-in deferred lighting pass
/// to run your own custom lighting pass instead.
///
/// Insert this component in the render world only.
#[derive(Component, Clone, Copy, Default)]
pub struct SkipDeferredLighting;

View File

@@ -0,0 +1,153 @@
#define_import_path bevy_pbr::pbr_deferred_functions
#import bevy_pbr::{
pbr_types::{PbrInput, pbr_input_new, STANDARD_MATERIAL_FLAGS_UNLIT_BIT},
pbr_deferred_types as deferred_types,
pbr_functions,
rgb9e5,
mesh_view_bindings::view,
utils::{octahedral_encode, octahedral_decode},
prepass_io::FragmentOutput,
view_transformations::{position_ndc_to_world, frag_coord_to_ndc},
}
#ifdef MESHLET_MESH_MATERIAL_PASS
#import bevy_pbr::meshlet_visibility_buffer_resolve::VertexOutput
#else
#import bevy_pbr::prepass_io::VertexOutput
#endif
#ifdef MOTION_VECTOR_PREPASS
#import bevy_pbr::pbr_prepass_functions::calculate_motion_vector
#endif
// Creates the deferred gbuffer from a PbrInput.
fn deferred_gbuffer_from_pbr_input(in: PbrInput) -> vec4<u32> {
// Only monochrome occlusion supported. May not be worth including at all.
// Some models have baked occlusion, GLTF only supports monochrome.
// Real time occlusion is applied in the deferred lighting pass.
// Deriving luminance via Rec. 709. coefficients
// https://en.wikipedia.org/wiki/Rec._709
let rec_709_coeffs = vec3<f32>(0.2126, 0.7152, 0.0722);
let diffuse_occlusion = dot(in.diffuse_occlusion, rec_709_coeffs);
// Only monochrome specular supported.
let reflectance = dot(in.material.reflectance, rec_709_coeffs);
#ifdef WEBGL2 // More crunched for webgl so we can also fit depth.
var props = deferred_types::pack_unorm3x4_plus_unorm_20_(vec4(
reflectance,
in.material.metallic,
diffuse_occlusion,
in.frag_coord.z));
#else
var props = deferred_types::pack_unorm4x8_(vec4(
reflectance, // could be fewer bits
in.material.metallic, // could be fewer bits
diffuse_occlusion, // is this worth including?
0.0)); // spare
#endif // WEBGL2
let flags = deferred_types::deferred_flags_from_mesh_material_flags(in.flags, in.material.flags);
let octahedral_normal = octahedral_encode(normalize(in.N));
var base_color_srgb = vec3(0.0);
var emissive = in.material.emissive.rgb;
if ((in.material.flags & STANDARD_MATERIAL_FLAGS_UNLIT_BIT) != 0u) {
// Material is unlit, use emissive component of gbuffer for color data.
// Unlit materials are effectively emissive.
emissive = in.material.base_color.rgb;
} else {
base_color_srgb = pow(in.material.base_color.rgb, vec3(1.0 / 2.2));
}
// Utilize the emissive channel to transmit the lightmap data. To ensure
// it matches the output in forward shading, pre-multiply it with the
// calculated diffuse color.
let base_color = in.material.base_color.rgb;
let metallic = in.material.metallic;
let specular_transmission = in.material.specular_transmission;
let diffuse_transmission = in.material.diffuse_transmission;
let diffuse_color = pbr_functions::calculate_diffuse_color(
base_color,
metallic,
specular_transmission,
diffuse_transmission
);
emissive += in.lightmap_light * diffuse_color * view.exposure;
let deferred = vec4(
deferred_types::pack_unorm4x8_(vec4(base_color_srgb, in.material.perceptual_roughness)),
rgb9e5::vec3_to_rgb9e5_(emissive),
props,
deferred_types::pack_24bit_normal_and_flags(octahedral_normal, flags),
);
return deferred;
}
// Creates a PbrInput from the deferred gbuffer.
fn pbr_input_from_deferred_gbuffer(frag_coord: vec4<f32>, gbuffer: vec4<u32>) -> PbrInput {
var pbr = pbr_input_new();
let flags = deferred_types::unpack_flags(gbuffer.a);
let deferred_flags = deferred_types::mesh_material_flags_from_deferred_flags(flags);
pbr.flags = deferred_flags.x;
pbr.material.flags = deferred_flags.y;
let base_rough = deferred_types::unpack_unorm4x8_(gbuffer.r);
pbr.material.perceptual_roughness = base_rough.a;
let emissive = rgb9e5::rgb9e5_to_vec3_(gbuffer.g);
if ((pbr.material.flags & STANDARD_MATERIAL_FLAGS_UNLIT_BIT) != 0u) {
pbr.material.base_color = vec4(emissive, 1.0);
pbr.material.emissive = vec4(vec3(0.0), 0.0);
} else {
pbr.material.base_color = vec4(pow(base_rough.rgb, vec3(2.2)), 1.0);
pbr.material.emissive = vec4(emissive, 0.0);
}
#ifdef WEBGL2 // More crunched for webgl so we can also fit depth.
let props = deferred_types::unpack_unorm3x4_plus_unorm_20_(gbuffer.b);
// Bias to 0.5 since that's the value for almost all materials.
pbr.material.reflectance = vec3(saturate(props.r - 0.03333333333));
#else
let props = deferred_types::unpack_unorm4x8_(gbuffer.b);
pbr.material.reflectance = vec3(props.r);
#endif // WEBGL2
pbr.material.metallic = props.g;
pbr.diffuse_occlusion = vec3(props.b);
let octahedral_normal = deferred_types::unpack_24bit_normal(gbuffer.a);
let N = octahedral_decode(octahedral_normal);
let world_position = vec4(position_ndc_to_world(frag_coord_to_ndc(frag_coord)), 1.0);
let is_orthographic = view.clip_from_view[3].w == 1.0;
let V = pbr_functions::calculate_view(world_position, is_orthographic);
pbr.frag_coord = frag_coord;
pbr.world_normal = N;
pbr.world_position = world_position;
pbr.N = N;
pbr.V = V;
pbr.is_orthographic = is_orthographic;
return pbr;
}
#ifdef PREPASS_PIPELINE
fn deferred_output(in: VertexOutput, pbr_input: PbrInput) -> FragmentOutput {
var out: FragmentOutput;
// gbuffer
out.deferred = deferred_gbuffer_from_pbr_input(pbr_input);
// lighting pass id (used to determine which lighting shader to run for the fragment)
out.deferred_lighting_pass_id = pbr_input.material.deferred_lighting_pass_id;
// normal if required
#ifdef NORMAL_PREPASS
out.normal = vec4(in.world_normal * 0.5 + vec3(0.5), 1.0);
#endif
// motion vectors if required
#ifdef MOTION_VECTOR_PREPASS
#ifdef MESHLET_MESH_MATERIAL_PASS
out.motion_vector = in.motion_vector;
#else
out.motion_vector = calculate_motion_vector(in.world_position, in.previous_world_position);
#endif
#endif
return out;
}
#endif

View File

@@ -0,0 +1,89 @@
#define_import_path bevy_pbr::pbr_deferred_types
#import bevy_pbr::{
mesh_types::MESH_FLAGS_SHADOW_RECEIVER_BIT,
pbr_types::{STANDARD_MATERIAL_FLAGS_FOG_ENABLED_BIT, STANDARD_MATERIAL_FLAGS_UNLIT_BIT},
}
// Maximum of 8 bits available
const DEFERRED_FLAGS_UNLIT_BIT: u32 = 1u << 0u;
const DEFERRED_FLAGS_FOG_ENABLED_BIT: u32 = 1u << 1u;
const DEFERRED_MESH_FLAGS_SHADOW_RECEIVER_BIT: u32 = 1u << 2u;
fn deferred_flags_from_mesh_material_flags(mesh_flags: u32, mat_flags: u32) -> u32 {
var flags = 0u;
flags |= u32((mesh_flags & MESH_FLAGS_SHADOW_RECEIVER_BIT) != 0u) * DEFERRED_MESH_FLAGS_SHADOW_RECEIVER_BIT;
flags |= u32((mat_flags & STANDARD_MATERIAL_FLAGS_FOG_ENABLED_BIT) != 0u) * DEFERRED_FLAGS_FOG_ENABLED_BIT;
flags |= u32((mat_flags & STANDARD_MATERIAL_FLAGS_UNLIT_BIT) != 0u) * DEFERRED_FLAGS_UNLIT_BIT;
return flags;
}
fn mesh_material_flags_from_deferred_flags(deferred_flags: u32) -> vec2<u32> {
var mat_flags = 0u;
var mesh_flags = 0u;
mesh_flags |= u32((deferred_flags & DEFERRED_MESH_FLAGS_SHADOW_RECEIVER_BIT) != 0u) * MESH_FLAGS_SHADOW_RECEIVER_BIT;
mat_flags |= u32((deferred_flags & DEFERRED_FLAGS_FOG_ENABLED_BIT) != 0u) * STANDARD_MATERIAL_FLAGS_FOG_ENABLED_BIT;
mat_flags |= u32((deferred_flags & DEFERRED_FLAGS_UNLIT_BIT) != 0u) * STANDARD_MATERIAL_FLAGS_UNLIT_BIT;
return vec2(mesh_flags, mat_flags);
}
const U12MAXF = 4095.0;
const U16MAXF = 65535.0;
const U20MAXF = 1048575.0;
// Storing normals as oct24.
// Flags are stored in the remaining 8 bits.
// https://jcgt.org/published/0003/02/01/paper.pdf
// Could possibly go down to oct20 if the space is needed.
fn pack_24bit_normal_and_flags(octahedral_normal: vec2<f32>, flags: u32) -> u32 {
let unorm1 = u32(saturate(octahedral_normal.x) * U12MAXF + 0.5);
let unorm2 = u32(saturate(octahedral_normal.y) * U12MAXF + 0.5);
return (unorm1 & 0xFFFu) | ((unorm2 & 0xFFFu) << 12u) | ((flags & 0xFFu) << 24u);
}
fn unpack_24bit_normal(packed: u32) -> vec2<f32> {
let unorm1 = packed & 0xFFFu;
let unorm2 = (packed >> 12u) & 0xFFFu;
return vec2(f32(unorm1) / U12MAXF, f32(unorm2) / U12MAXF);
}
fn unpack_flags(packed: u32) -> u32 {
return (packed >> 24u) & 0xFFu;
}
// The builtin one didn't work in webgl.
// "'unpackUnorm4x8' : no matching overloaded function found"
// https://github.com/gfx-rs/naga/issues/2006
fn unpack_unorm4x8_(v: u32) -> vec4<f32> {
return vec4(
f32(v & 0xFFu),
f32((v >> 8u) & 0xFFu),
f32((v >> 16u) & 0xFFu),
f32((v >> 24u) & 0xFFu)
) / 255.0;
}
// 'packUnorm4x8' : no matching overloaded function found
// https://github.com/gfx-rs/naga/issues/2006
fn pack_unorm4x8_(values: vec4<f32>) -> u32 {
let v = vec4<u32>(saturate(values) * 255.0 + 0.5);
return (v.w << 24u) | (v.z << 16u) | (v.y << 8u) | v.x;
}
// Pack 3x 4bit unorm + 1x 20bit
fn pack_unorm3x4_plus_unorm_20_(v: vec4<f32>) -> u32 {
let sm = vec3<u32>(saturate(v.xyz) * 15.0 + 0.5);
let bg = u32(saturate(v.w) * U20MAXF + 0.5);
return (bg << 12u) | (sm.z << 8u) | (sm.y << 4u) | sm.x;
}
// Unpack 3x 4bit unorm + 1x 20bit
fn unpack_unorm3x4_plus_unorm_20_(v: u32) -> vec4<f32> {
return vec4(
f32(v & 0xfu) / 15.0,
f32((v >> 4u) & 0xFu) / 15.0,
f32((v >> 8u) & 0xFu) / 15.0,
f32((v >> 12u) & 0xFFFFFFu) / U20MAXF,
);
}

View File

@@ -0,0 +1,406 @@
use std::borrow::Cow;
use bevy_asset::Asset;
use bevy_ecs::system::SystemParamItem;
use bevy_mesh::MeshVertexBufferLayoutRef;
use bevy_platform::{collections::HashSet, hash::FixedHasher};
use bevy_reflect::{impl_type_path, Reflect};
use crate::render::{
alpha::AlphaMode,
render_resource::{
AsBindGroup, AsBindGroupError, BindGroupLayout, BindGroupLayoutEntry, BindlessDescriptor,
BindlessResourceType, BindlessSlabResourceLimit, RenderPipelineDescriptor,
SpecializedMeshPipelineError, UnpreparedBindGroup,
},
renderer::RenderDevice,
};
use bevy_shader::ShaderRef;
use crate::render::pbr::{Material, MaterialPipeline, MaterialPipelineKey, MeshPipeline, MeshPipelineKey};
pub struct MaterialExtensionPipeline {
pub mesh_pipeline: MeshPipeline,
}
pub struct MaterialExtensionKey<E: MaterialExtension> {
pub mesh_key: MeshPipelineKey,
pub bind_group_data: E::Data,
}
/// A subset of the `Material` trait for defining extensions to a base `Material`, such as the builtin `StandardMaterial`.
///
/// A user type implementing the trait should be used as the `E` generic param in an `ExtendedMaterial` struct.
pub trait MaterialExtension: Asset + AsBindGroup + Clone + Sized {
/// Returns this material's vertex shader. If [`ShaderRef::Default`] is returned, the base material mesh vertex shader
/// will be used.
fn vertex_shader() -> ShaderRef {
ShaderRef::Default
}
/// Returns this material's fragment shader. If [`ShaderRef::Default`] is returned, the base material mesh fragment shader
/// will be used.
fn fragment_shader() -> ShaderRef {
ShaderRef::Default
}
// Returns this materials AlphaMode. If None is returned, the base material alpha mode will be used.
fn alpha_mode() -> Option<AlphaMode> {
None
}
/// Returns this material's prepass vertex shader. If [`ShaderRef::Default`] is returned, the base material prepass vertex shader
/// will be used.
fn prepass_vertex_shader() -> ShaderRef {
ShaderRef::Default
}
/// Returns this material's prepass fragment shader. If [`ShaderRef::Default`] is returned, the base material prepass fragment shader
/// will be used.
fn prepass_fragment_shader() -> ShaderRef {
ShaderRef::Default
}
/// Returns this material's deferred vertex shader. If [`ShaderRef::Default`] is returned, the base material deferred vertex shader
/// will be used.
fn deferred_vertex_shader() -> ShaderRef {
ShaderRef::Default
}
/// Returns this material's prepass fragment shader. If [`ShaderRef::Default`] is returned, the base material deferred fragment shader
/// will be used.
fn deferred_fragment_shader() -> ShaderRef {
ShaderRef::Default
}
/// Returns this material's [`crate::meshlet::MeshletMesh`] fragment shader. If [`ShaderRef::Default`] is returned,
/// the default meshlet mesh fragment shader will be used.
#[cfg(feature = "meshlet")]
fn meshlet_mesh_fragment_shader() -> ShaderRef {
ShaderRef::Default
}
/// Returns this material's [`crate::meshlet::MeshletMesh`] prepass fragment shader. If [`ShaderRef::Default`] is returned,
/// the default meshlet mesh prepass fragment shader will be used.
#[cfg(feature = "meshlet")]
fn meshlet_mesh_prepass_fragment_shader() -> ShaderRef {
ShaderRef::Default
}
/// Returns this material's [`crate::meshlet::MeshletMesh`] deferred fragment shader. If [`ShaderRef::Default`] is returned,
/// the default meshlet mesh deferred fragment shader will be used.
#[cfg(feature = "meshlet")]
fn meshlet_mesh_deferred_fragment_shader() -> ShaderRef {
ShaderRef::Default
}
/// Customizes the default [`RenderPipelineDescriptor`] for a specific entity using the entity's
/// [`MaterialPipelineKey`] and [`MeshVertexBufferLayoutRef`] as input.
/// Specialization for the base material is applied before this function is called.
#[expect(
unused_variables,
reason = "The parameters here are intentionally unused by the default implementation; however, putting underscores here will result in the underscores being copied by rust-analyzer's tab completion."
)]
#[inline]
fn specialize(
pipeline: &MaterialExtensionPipeline,
descriptor: &mut RenderPipelineDescriptor,
layout: &MeshVertexBufferLayoutRef,
key: MaterialExtensionKey<Self>,
) -> Result<(), SpecializedMeshPipelineError> {
Ok(())
}
}
/// A material that extends a base [`Material`] with additional shaders and data.
///
/// The data from both materials will be combined and made available to the shader
/// so that shader functions built for the base material (and referencing the base material
/// bindings) will work as expected, and custom alterations based on custom data can also be used.
///
/// If the extension `E` returns a non-default result from `vertex_shader()` it will be used in place of the base
/// material's vertex shader.
///
/// If the extension `E` returns a non-default result from `fragment_shader()` it will be used in place of the base
/// fragment shader.
///
/// When used with `StandardMaterial` as the base, all the standard material fields are
/// present, so the `pbr_fragment` shader functions can be called from the extension shader (see
/// the `extended_material` example).
#[derive(Asset, Clone, Debug, Reflect)]
#[reflect(type_path = false)]
#[reflect(Clone)]
pub struct ExtendedMaterial<B: Material, E: MaterialExtension> {
pub base: B,
pub extension: E,
}
impl<B, E> Default for ExtendedMaterial<B, E>
where
B: Material + Default,
E: MaterialExtension + Default,
{
fn default() -> Self {
Self {
base: B::default(),
extension: E::default(),
}
}
}
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
#[repr(C, packed)]
pub struct MaterialExtensionBindGroupData<B, E> {
pub base: B,
pub extension: E,
}
// We don't use the `TypePath` derive here due to a bug where `#[reflect(type_path = false)]`
// causes the `TypePath` derive to not generate an implementation.
impl_type_path!((in bevy_pbr::extended_material) ExtendedMaterial<B: Material, E: MaterialExtension>);
impl<B: Material, E: MaterialExtension> AsBindGroup for ExtendedMaterial<B, E> {
type Data = MaterialExtensionBindGroupData<B::Data, E::Data>;
type Param = (<B as AsBindGroup>::Param, <E as AsBindGroup>::Param);
fn bindless_slot_count() -> Option<BindlessSlabResourceLimit> {
// We only enable bindless if both the base material and its extension
// are bindless. If we do enable bindless, we choose the smaller of the
// two slab size limits.
match (B::bindless_slot_count()?, E::bindless_slot_count()?) {
(BindlessSlabResourceLimit::Auto, BindlessSlabResourceLimit::Auto) => {
Some(BindlessSlabResourceLimit::Auto)
}
(BindlessSlabResourceLimit::Auto, BindlessSlabResourceLimit::Custom(limit))
| (BindlessSlabResourceLimit::Custom(limit), BindlessSlabResourceLimit::Auto) => {
Some(BindlessSlabResourceLimit::Custom(limit))
}
(
BindlessSlabResourceLimit::Custom(base_limit),
BindlessSlabResourceLimit::Custom(extended_limit),
) => Some(BindlessSlabResourceLimit::Custom(
base_limit.min(extended_limit),
)),
}
}
fn bind_group_data(&self) -> Self::Data {
MaterialExtensionBindGroupData {
base: self.base.bind_group_data(),
extension: self.extension.bind_group_data(),
}
}
fn unprepared_bind_group(
&self,
layout: &BindGroupLayout,
render_device: &RenderDevice,
(base_param, extended_param): &mut SystemParamItem<'_, '_, Self::Param>,
mut force_non_bindless: bool,
) -> Result<UnpreparedBindGroup, AsBindGroupError> {
force_non_bindless = force_non_bindless || Self::bindless_slot_count().is_none();
// add together the bindings of the base material and the user material
let UnpreparedBindGroup { mut bindings } = B::unprepared_bind_group(
&self.base,
layout,
render_device,
base_param,
force_non_bindless,
)?;
let extended_bindgroup = E::unprepared_bind_group(
&self.extension,
layout,
render_device,
extended_param,
force_non_bindless,
)?;
bindings.extend(extended_bindgroup.bindings.0);
Ok(UnpreparedBindGroup { bindings })
}
fn bind_group_layout_entries(
render_device: &RenderDevice,
mut force_non_bindless: bool,
) -> Vec<BindGroupLayoutEntry>
where
Self: Sized,
{
force_non_bindless = force_non_bindless || Self::bindless_slot_count().is_none();
// Add together the bindings of the standard material and the user
// material, skipping duplicate bindings. Duplicate bindings will occur
// when bindless mode is on, because of the common bindless resource
// arrays, and we need to eliminate the duplicates or `wgpu` will
// complain.
let mut entries = vec![];
let mut seen_bindings = HashSet::<_>::with_hasher(FixedHasher);
for entry in B::bind_group_layout_entries(render_device, force_non_bindless)
.into_iter()
.chain(E::bind_group_layout_entries(render_device, force_non_bindless).into_iter())
{
if seen_bindings.insert(entry.binding) {
entries.push(entry);
}
}
entries
}
fn bindless_descriptor() -> Option<BindlessDescriptor> {
// We're going to combine the two bindless descriptors.
let base_bindless_descriptor = B::bindless_descriptor()?;
let extended_bindless_descriptor = E::bindless_descriptor()?;
// Combining the buffers and index tables is straightforward.
let mut buffers = base_bindless_descriptor.buffers.to_vec();
let mut index_tables = base_bindless_descriptor.index_tables.to_vec();
buffers.extend(extended_bindless_descriptor.buffers.iter().cloned());
index_tables.extend(extended_bindless_descriptor.index_tables.iter().cloned());
// Combining the resources is a little trickier because the resource
// array is indexed by bindless index, so we have to merge the two
// arrays, not just concatenate them.
let max_bindless_index = base_bindless_descriptor
.resources
.len()
.max(extended_bindless_descriptor.resources.len());
let mut resources = Vec::with_capacity(max_bindless_index);
for bindless_index in 0..max_bindless_index {
// In the event of a conflicting bindless index, we choose the
// base's binding.
match base_bindless_descriptor.resources.get(bindless_index) {
None | Some(&BindlessResourceType::None) => resources.push(
extended_bindless_descriptor
.resources
.get(bindless_index)
.copied()
.unwrap_or(BindlessResourceType::None),
),
Some(&resource_type) => resources.push(resource_type),
}
}
Some(BindlessDescriptor {
resources: Cow::Owned(resources),
buffers: Cow::Owned(buffers),
index_tables: Cow::Owned(index_tables),
})
}
}
impl<B: Material, E: MaterialExtension> Material for ExtendedMaterial<B, E> {
fn vertex_shader() -> ShaderRef {
match E::vertex_shader() {
ShaderRef::Default => B::vertex_shader(),
specified => specified,
}
}
fn fragment_shader() -> ShaderRef {
match E::fragment_shader() {
ShaderRef::Default => B::fragment_shader(),
specified => specified,
}
}
fn alpha_mode(&self) -> AlphaMode {
match E::alpha_mode() {
Some(specified) => specified,
None => B::alpha_mode(&self.base),
}
}
fn opaque_render_method(&self) -> crate::render::pbr::material::OpaqueRendererMethod {
B::opaque_render_method(&self.base)
}
fn depth_bias(&self) -> f32 {
B::depth_bias(&self.base)
}
fn reads_view_transmission_texture(&self) -> bool {
B::reads_view_transmission_texture(&self.base)
}
fn prepass_vertex_shader() -> ShaderRef {
match E::prepass_vertex_shader() {
ShaderRef::Default => B::prepass_vertex_shader(),
specified => specified,
}
}
fn prepass_fragment_shader() -> ShaderRef {
match E::prepass_fragment_shader() {
ShaderRef::Default => B::prepass_fragment_shader(),
specified => specified,
}
}
fn deferred_vertex_shader() -> ShaderRef {
match E::deferred_vertex_shader() {
ShaderRef::Default => B::deferred_vertex_shader(),
specified => specified,
}
}
fn deferred_fragment_shader() -> ShaderRef {
match E::deferred_fragment_shader() {
ShaderRef::Default => B::deferred_fragment_shader(),
specified => specified,
}
}
#[cfg(feature = "meshlet")]
fn meshlet_mesh_fragment_shader() -> ShaderRef {
match E::meshlet_mesh_fragment_shader() {
ShaderRef::Default => B::meshlet_mesh_fragment_shader(),
specified => specified,
}
}
#[cfg(feature = "meshlet")]
fn meshlet_mesh_prepass_fragment_shader() -> ShaderRef {
match E::meshlet_mesh_prepass_fragment_shader() {
ShaderRef::Default => B::meshlet_mesh_prepass_fragment_shader(),
specified => specified,
}
}
#[cfg(feature = "meshlet")]
fn meshlet_mesh_deferred_fragment_shader() -> ShaderRef {
match E::meshlet_mesh_deferred_fragment_shader() {
ShaderRef::Default => B::meshlet_mesh_deferred_fragment_shader(),
specified => specified,
}
}
fn specialize(
pipeline: &MaterialPipeline,
descriptor: &mut RenderPipelineDescriptor,
layout: &MeshVertexBufferLayoutRef,
key: MaterialPipelineKey<Self>,
) -> Result<(), SpecializedMeshPipelineError> {
// Call the base material's specialize function
let base_key = MaterialPipelineKey::<B> {
mesh_key: key.mesh_key,
bind_group_data: key.bind_group_data.base,
};
B::specialize(pipeline, descriptor, layout, base_key)?;
// Call the extended material's specialize function afterwards
E::specialize(
&MaterialExtensionPipeline {
mesh_pipeline: pipeline.mesh_pipeline.clone(),
},
descriptor,
layout,
MaterialExtensionKey {
mesh_key: key.mesh_key,
bind_group_data: key.bind_group_data.extension,
},
)
}
}

View File

@@ -0,0 +1,477 @@
use bevy_camera::Camera;
use bevy_color::{Color, ColorToComponents, LinearRgba};
use bevy_ecs::prelude::*;
use bevy_math::{ops, Vec3};
use bevy_reflect::{std_traits::ReflectDefault, Reflect};
use crate::render::extract_component::ExtractComponent;
/// Configures the “classic” computer graphics [distance fog](https://en.wikipedia.org/wiki/Distance_fog) effect,
/// in which objects appear progressively more covered in atmospheric haze the further away they are from the camera.
/// Affects meshes rendered via the PBR [`StandardMaterial`](crate::StandardMaterial).
///
/// ## Falloff
///
/// The rate at which fog intensity increases with distance is controlled by the falloff mode.
/// Currently, the following fog falloff modes are supported:
///
/// - [`FogFalloff::Linear`]
/// - [`FogFalloff::Exponential`]
/// - [`FogFalloff::ExponentialSquared`]
/// - [`FogFalloff::Atmospheric`]
///
/// ## Example
///
/// ```
/// # use bevy_ecs::prelude::*;
/// # use crate::render::prelude::*;
/// # use bevy_camera::prelude::*;
/// # use bevy_pbr::prelude::*;
/// # use bevy_color::Color;
/// # fn system(mut commands: Commands) {
/// commands.spawn((
/// // Setup your camera as usual
/// Camera3d::default(),
/// // Add fog to the same entity
/// DistanceFog {
/// color: Color::WHITE,
/// falloff: FogFalloff::Exponential { density: 1e-3 },
/// ..Default::default()
/// },
/// ));
/// # }
/// # bevy_ecs::system::assert_is_system(system);
/// ```
///
/// ## Material Override
///
/// Once enabled for a specific camera, the fog effect can also be disabled for individual
/// [`StandardMaterial`](crate::StandardMaterial) instances via the `fog_enabled` flag.
#[derive(Debug, Clone, Component, Reflect, ExtractComponent)]
#[extract_component_filter(With<Camera>)]
#[reflect(Component, Default, Debug, Clone)]
pub struct DistanceFog {
/// The color of the fog effect.
///
/// **Tip:** The alpha channel of the color can be used to “modulate” the fog effect without
/// changing the fog falloff mode or parameters.
pub color: Color,
/// Color used to modulate the influence of directional light colors on the
/// fog, where the view direction aligns with each directional light direction,
/// producing a “glow” or light dispersion effect. (e.g. around the sun)
///
/// Use [`Color::NONE`] to disable the effect.
pub directional_light_color: Color,
/// The exponent applied to the directional light alignment calculation.
/// A higher value means a more concentrated “glow”.
pub directional_light_exponent: f32,
/// Determines which falloff mode to use, and its parameters.
pub falloff: FogFalloff,
}
/// Allows switching between different fog falloff modes, and configuring their parameters.
///
/// ## Convenience Methods
///
/// When using non-linear fog modes it can be hard to determine the right parameter values
/// for a given scene.
///
/// For easier artistic control, instead of creating the enum variants directly, you can use the
/// visibility-based convenience methods:
///
/// - For `FogFalloff::Exponential`:
/// - [`FogFalloff::from_visibility()`]
/// - [`FogFalloff::from_visibility_contrast()`]
///
/// - For `FogFalloff::ExponentialSquared`:
/// - [`FogFalloff::from_visibility_squared()`]
/// - [`FogFalloff::from_visibility_contrast_squared()`]
///
/// - For `FogFalloff::Atmospheric`:
/// - [`FogFalloff::from_visibility_color()`]
/// - [`FogFalloff::from_visibility_colors()`]
/// - [`FogFalloff::from_visibility_contrast_color()`]
/// - [`FogFalloff::from_visibility_contrast_colors()`]
#[derive(Debug, Clone, Reflect)]
#[reflect(Clone)]
pub enum FogFalloff {
/// A linear fog falloff that grows in intensity between `start` and `end` distances.
///
/// This falloff mode is simpler to control than other modes, however it can produce results that look “artificial”, depending on the scene.
///
/// ## Formula
///
/// The fog intensity for a given point in the scene is determined by the following formula:
///
/// ```text
/// let fog_intensity = 1.0 - ((end - distance) / (end - start)).clamp(0.0, 1.0);
/// ```
///
/// <svg width="370" height="212" viewBox="0 0 370 212" fill="none">
/// <title>Plot showing how linear fog falloff behaves for start and end values of 0.8 and 2.2, respectively.</title>
/// <path d="M331 151H42V49" stroke="currentColor" stroke-width="2"/>
/// <text font-family="sans-serif" fill="currentColor" style="white-space: pre" font-family="Inter" font-size="12" letter-spacing="0em"><tspan x="136" y="173.864">1</tspan></text>
/// <text font-family="sans-serif" fill="currentColor" style="white-space: pre" font-family="Inter" font-size="12" letter-spacing="0em"><tspan x="30" y="53.8636">1</tspan></text>
/// <text font-family="sans-serif" fill="currentColor" style="white-space: pre" font-family="Inter" font-size="12" letter-spacing="0em"><tspan x="42" y="173.864">0</tspan></text>
/// <text font-family="sans-serif" fill="currentColor" style="white-space: pre" font-family="Inter" font-size="12" letter-spacing="0em"><tspan x="232" y="173.864">2</tspan></text>
/// <text font-family="sans-serif" fill="currentColor" style="white-space: pre" font-family="Inter" font-size="12" letter-spacing="0em"><tspan x="332" y="173.864">3</tspan></text>
/// <text font-family="sans-serif" fill="currentColor" style="white-space: pre" font-family="Inter" font-size="12" letter-spacing="0em"><tspan x="161" y="190.864">distance</tspan></text>
/// <text font-family="sans-serif" transform="translate(10 132) rotate(-90)" fill="currentColor" style="white-space: pre" font-family="Inter" font-size="12" letter-spacing="0em"><tspan x="0" y="11.8636">fog intensity</tspan></text>
/// <path d="M43 150H117.227L263 48H331" stroke="#FF00E5"/>
/// <path d="M118 151V49" stroke="#FF00E5" stroke-dasharray="1 4"/>
/// <path d="M263 151V49" stroke="#FF00E5" stroke-dasharray="1 4"/>
/// <text font-family="sans-serif" fill="#FF00E5" style="white-space: pre" font-family="Inter" font-size="10" letter-spacing="0em"><tspan x="121" y="58.6364">start</tspan></text>
/// <text font-family="sans-serif" fill="#FF00E5" style="white-space: pre" font-family="Inter" font-size="10" letter-spacing="0em"><tspan x="267" y="58.6364">end</tspan></text>
/// </svg>
Linear {
/// Distance from the camera where fog is completely transparent, in world units.
start: f32,
/// Distance from the camera where fog is completely opaque, in world units.
end: f32,
},
/// An exponential fog falloff with a given `density`.
///
/// Initially gains intensity quickly with distance, then more slowly. Typically produces more natural results than [`FogFalloff::Linear`],
/// but is a bit harder to control.
///
/// To move the fog “further away”, use lower density values. To move it “closer” use higher density values.
///
/// ## Tips
///
/// - Use the [`FogFalloff::from_visibility()`] convenience method to create an exponential falloff with the proper
/// density for a desired visibility distance in world units;
/// - It's not _unusual_ to have very large or very small values for the density, depending on the scene
/// scale. Typically, for scenes with objects in the scale of thousands of units, you might want density values
/// in the ballpark of `0.001`. Conversely, for really small scale scenes you might want really high values of
/// density;
/// - Combine the `density` parameter with the [`DistanceFog`] `color`'s alpha channel for easier artistic control.
///
/// ## Formula
///
/// The fog intensity for a given point in the scene is determined by the following formula:
///
/// ```text
/// let fog_intensity = 1.0 - 1.0 / (distance * density).exp();
/// ```
///
/// <svg width="370" height="212" viewBox="0 0 370 212" fill="none">
/// <title>Plot showing how exponential fog falloff behaves for different density values</title>
/// <mask id="mask0_3_31" style="mask-type:alpha" maskUnits="userSpaceOnUse" x="42" y="42" width="286" height="108">
/// <rect x="42" y="42" width="286" height="108" fill="#D9D9D9"/>
/// </mask>
/// <g mask="url(#mask0_3_31)">
/// <path d="M42 150C42 150 98.3894 53 254.825 53L662 53" stroke="#FF003D" stroke-width="1"/>
/// <path d="M42 150C42 150 139.499 53 409.981 53L1114 53" stroke="#001AFF" stroke-width="1"/>
/// <path d="M42 150C42 150 206.348 53 662.281 53L1849 53" stroke="#14FF00" stroke-width="1"/>
/// </g>
/// <path d="M331 151H42V49" stroke="currentColor" stroke-width="2"/>
/// <text font-family="sans-serif" fill="currentColor" style="white-space: pre" font-size="12" letter-spacing="0em"><tspan x="136" y="173.864">1</tspan></text>
/// <text font-family="sans-serif" fill="currentColor" style="white-space: pre" font-size="12" letter-spacing="0em"><tspan x="30" y="53.8636">1</tspan></text>
/// <text font-family="sans-serif" fill="currentColor" style="white-space: pre" font-size="12" letter-spacing="0em"><tspan x="42" y="173.864">0</tspan></text>
/// <text font-family="sans-serif" fill="currentColor" style="white-space: pre" font-size="12" letter-spacing="0em"><tspan x="232" y="173.864">2</tspan></text>
/// <text font-family="sans-serif" fill="currentColor" style="white-space: pre" font-size="12" letter-spacing="0em"><tspan x="332" y="173.864">3</tspan></text>
/// <text font-family="sans-serif" fill="#FF003D" style="white-space: pre" font-size="10" letter-spacing="0em"><tspan x="77" y="64.6364">density = 2</tspan></text>
/// <text font-family="sans-serif" fill="#001AFF" style="white-space: pre" font-size="10" letter-spacing="0em"><tspan x="236" y="76.6364">density = 1</tspan></text>
/// <text font-family="sans-serif" fill="#14FF00" style="white-space: pre" font-size="10" letter-spacing="0em"><tspan x="205" y="115.636">density = 0.5</tspan></text>
/// <text font-family="sans-serif" fill="currentColor" style="white-space: pre" font-size="12" letter-spacing="0em"><tspan x="161" y="190.864">distance</tspan></text>
/// <text font-family="sans-serif" transform="translate(10 132) rotate(-90)" fill="currentColor" style="white-space: pre" font-size="12" letter-spacing="0em"><tspan x="0" y="11.8636">fog intensity</tspan></text>
/// </svg>
Exponential {
/// Multiplier applied to the world distance (within the exponential fog falloff calculation).
density: f32,
},
/// A squared exponential fog falloff with a given `density`.
///
/// Similar to [`FogFalloff::Exponential`], but grows more slowly in intensity for closer distances
/// before “catching up”.
///
/// To move the fog “further away”, use lower density values. To move it “closer” use higher density values.
///
/// ## Tips
///
/// - Use the [`FogFalloff::from_visibility_squared()`] convenience method to create an exponential squared falloff
/// with the proper density for a desired visibility distance in world units;
/// - Combine the `density` parameter with the [`DistanceFog`] `color`'s alpha channel for easier artistic control.
///
/// ## Formula
///
/// The fog intensity for a given point in the scene is determined by the following formula:
///
/// ```text
/// let fog_intensity = 1.0 - 1.0 / (distance * density).squared().exp();
/// ```
///
/// <svg width="370" height="212" viewBox="0 0 370 212" fill="none">
/// <title>Plot showing how exponential squared fog falloff behaves for different density values</title>
/// <mask id="mask0_1_3" style="mask-type:alpha" maskUnits="userSpaceOnUse" x="42" y="42" width="286" height="108">
/// <rect x="42" y="42" width="286" height="108" fill="#D9D9D9"/>
/// </mask>
/// <g mask="url(#mask0_1_3)">
/// <path d="M42 150C75.4552 150 74.9241 53.1724 166.262 53.1724L404 53.1724" stroke="#FF003D" stroke-width="1"/>
/// <path d="M42 150C107.986 150 106.939 53.1724 287.091 53.1724L756 53.1724" stroke="#001AFF" stroke-width="1"/>
/// <path d="M42 150C166.394 150 164.42 53.1724 504.035 53.1724L1388 53.1724" stroke="#14FF00" stroke-width="1"/>
/// </g>
/// <path d="M331 151H42V49" stroke="currentColor" stroke-width="2"/>
/// <text font-family="sans-serif" fill="currentColor" style="white-space: pre" font-size="12" letter-spacing="0em"><tspan x="136" y="173.864">1</tspan></text>
/// <text font-family="sans-serif" fill="currentColor" style="white-space: pre" font-size="12" letter-spacing="0em"><tspan x="30" y="53.8636">1</tspan></text>
/// <text font-family="sans-serif" fill="currentColor" style="white-space: pre" font-size="12" letter-spacing="0em"><tspan x="42" y="173.864">0</tspan></text>
/// <text font-family="sans-serif" fill="currentColor" style="white-space: pre" font-size="12" letter-spacing="0em"><tspan x="232" y="173.864">2</tspan></text>
/// <text font-family="sans-serif" fill="currentColor" style="white-space: pre" font-size="12" letter-spacing="0em"><tspan x="332" y="173.864">3</tspan></text>
/// <text font-family="sans-serif" fill="#FF003D" style="white-space: pre" font-size="10" letter-spacing="0em"><tspan x="61" y="54.6364">density = 2</tspan></text>
/// <text font-family="sans-serif" fill="#001AFF" style="white-space: pre" font-size="10" letter-spacing="0em"><tspan x="168" y="84.6364">density = 1</tspan></text>
/// <text font-family="sans-serif" fill="#14FF00" style="white-space: pre" font-size="10" letter-spacing="0em"><tspan x="174" y="121.636">density = 0.5</tspan></text>
/// <text font-family="sans-serif" fill="currentColor" style="white-space: pre" font-size="12" letter-spacing="0em"><tspan x="161" y="190.864">distance</tspan></text>
/// <text font-family="sans-serif" transform="translate(10 132) rotate(-90)" fill="currentColor" style="white-space: pre" font-size="12" letter-spacing="0em"><tspan x="0" y="11.8636">fog intensity</tspan></text>
/// </svg>
ExponentialSquared {
/// Multiplier applied to the world distance (within the exponential squared fog falloff calculation).
density: f32,
},
/// A more general form of the [`FogFalloff::Exponential`] mode. The falloff formula is separated into
/// two terms, `extinction` and `inscattering`, for a somewhat simplified atmospheric scattering model.
/// Additionally, individual color channels can have their own density values, resulting in a total of
/// six different configuration parameters.
///
/// ## Tips
///
/// - Use the [`FogFalloff::from_visibility_colors()`] or [`FogFalloff::from_visibility_color()`] convenience methods
/// to create an atmospheric falloff with the proper densities for a desired visibility distance in world units and
/// extinction and inscattering colors;
/// - Combine the atmospheric fog parameters with the [`DistanceFog`] `color`'s alpha channel for easier artistic control.
///
/// ## Formula
///
/// Unlike other modes, atmospheric falloff doesn't use a simple intensity-based blend of fog color with
/// object color. Instead, it calculates per-channel extinction and inscattering factors, which are
/// then used to calculate the final color.
///
/// ```text
/// let extinction_factor = 1.0 - 1.0 / (distance * extinction).exp();
/// let inscattering_factor = 1.0 - 1.0 / (distance * inscattering).exp();
/// let result = input_color * (1.0 - extinction_factor) + fog_color * inscattering_factor;
/// ```
///
/// ## Equivalence to [`FogFalloff::Exponential`]
///
/// For a density value of `D`, the following two falloff modes will produce identical visual results:
///
/// ```
/// # use bevy_pbr::prelude::*;
/// # use bevy_math::prelude::*;
/// # const D: f32 = 0.5;
/// #
/// let exponential = FogFalloff::Exponential {
/// density: D,
/// };
///
/// let atmospheric = FogFalloff::Atmospheric {
/// extinction: Vec3::new(D, D, D),
/// inscattering: Vec3::new(D, D, D),
/// };
/// ```
///
/// **Note:** While the results are identical, [`FogFalloff::Atmospheric`] is computationally more expensive.
Atmospheric {
/// Controls how much light is removed due to atmospheric “extinction”, i.e. loss of light due to
/// photons being absorbed by atmospheric particles.
///
/// Each component can be thought of as an independent per `R`/`G`/`B` channel `density` factor from
/// [`FogFalloff::Exponential`]: Multiplier applied to the world distance (within the fog
/// falloff calculation) for that specific channel.
///
/// **Note:**
/// This value is not a `Color`, since it affects the channels exponentially in a non-intuitive way.
/// For artistic control, use the [`FogFalloff::from_visibility_colors()`] convenience method.
extinction: Vec3,
/// Controls how much light is added due to light scattering from the sun through the atmosphere.
///
/// Each component can be thought of as an independent per `R`/`G`/`B` channel `density` factor from
/// [`FogFalloff::Exponential`]: A multiplier applied to the world distance (within the fog
/// falloff calculation) for that specific channel.
///
/// **Note:**
/// This value is not a `Color`, since it affects the channels exponentially in a non-intuitive way.
/// For artistic control, use the [`FogFalloff::from_visibility_colors()`] convenience method.
inscattering: Vec3,
},
}
impl FogFalloff {
/// Creates a [`FogFalloff::Exponential`] value from the given visibility distance in world units,
/// using the revised Koschmieder contrast threshold, [`FogFalloff::REVISED_KOSCHMIEDER_CONTRAST_THRESHOLD`].
pub fn from_visibility(visibility: f32) -> FogFalloff {
FogFalloff::from_visibility_contrast(
visibility,
FogFalloff::REVISED_KOSCHMIEDER_CONTRAST_THRESHOLD,
)
}
/// Creates a [`FogFalloff::Exponential`] value from the given visibility distance in world units,
/// and a given contrast threshold in the range of `0.0` to `1.0`.
pub fn from_visibility_contrast(visibility: f32, contrast_threshold: f32) -> FogFalloff {
FogFalloff::Exponential {
density: FogFalloff::koschmieder(visibility, contrast_threshold),
}
}
/// Creates a [`FogFalloff::ExponentialSquared`] value from the given visibility distance in world units,
/// using the revised Koschmieder contrast threshold, [`FogFalloff::REVISED_KOSCHMIEDER_CONTRAST_THRESHOLD`].
pub fn from_visibility_squared(visibility: f32) -> FogFalloff {
FogFalloff::from_visibility_contrast_squared(
visibility,
FogFalloff::REVISED_KOSCHMIEDER_CONTRAST_THRESHOLD,
)
}
/// Creates a [`FogFalloff::ExponentialSquared`] value from the given visibility distance in world units,
/// and a given contrast threshold in the range of `0.0` to `1.0`.
pub fn from_visibility_contrast_squared(
visibility: f32,
contrast_threshold: f32,
) -> FogFalloff {
FogFalloff::ExponentialSquared {
density: (FogFalloff::koschmieder(visibility, contrast_threshold) / visibility).sqrt(),
}
}
/// Creates a [`FogFalloff::Atmospheric`] value from the given visibility distance in world units,
/// and a shared color for both extinction and inscattering, using the revised Koschmieder contrast threshold,
/// [`FogFalloff::REVISED_KOSCHMIEDER_CONTRAST_THRESHOLD`].
pub fn from_visibility_color(
visibility: f32,
extinction_inscattering_color: Color,
) -> FogFalloff {
FogFalloff::from_visibility_contrast_colors(
visibility,
FogFalloff::REVISED_KOSCHMIEDER_CONTRAST_THRESHOLD,
extinction_inscattering_color,
extinction_inscattering_color,
)
}
/// Creates a [`FogFalloff::Atmospheric`] value from the given visibility distance in world units,
/// extinction and inscattering colors, using the revised Koschmieder contrast threshold,
/// [`FogFalloff::REVISED_KOSCHMIEDER_CONTRAST_THRESHOLD`].
///
/// ## Tips
/// - Alpha values of the provided colors can modulate the `extinction` and `inscattering` effects;
/// - Using an `extinction_color` of [`Color::WHITE`] or [`Color::NONE`] disables the extinction effect;
/// - Using an `inscattering_color` of [`Color::BLACK`] or [`Color::NONE`] disables the inscattering effect.
pub fn from_visibility_colors(
visibility: f32,
extinction_color: Color,
inscattering_color: Color,
) -> FogFalloff {
FogFalloff::from_visibility_contrast_colors(
visibility,
FogFalloff::REVISED_KOSCHMIEDER_CONTRAST_THRESHOLD,
extinction_color,
inscattering_color,
)
}
/// Creates a [`FogFalloff::Atmospheric`] value from the given visibility distance in world units,
/// a contrast threshold in the range of `0.0` to `1.0`, and a shared color for both extinction and inscattering.
pub fn from_visibility_contrast_color(
visibility: f32,
contrast_threshold: f32,
extinction_inscattering_color: Color,
) -> FogFalloff {
FogFalloff::from_visibility_contrast_colors(
visibility,
contrast_threshold,
extinction_inscattering_color,
extinction_inscattering_color,
)
}
/// Creates a [`FogFalloff::Atmospheric`] value from the given visibility distance in world units,
/// a contrast threshold in the range of `0.0` to `1.0`, extinction and inscattering colors.
///
/// ## Tips
/// - Alpha values of the provided colors can modulate the `extinction` and `inscattering` effects;
/// - Using an `extinction_color` of [`Color::WHITE`] or [`Color::NONE`] disables the extinction effect;
/// - Using an `inscattering_color` of [`Color::BLACK`] or [`Color::NONE`] disables the inscattering effect.
pub fn from_visibility_contrast_colors(
visibility: f32,
contrast_threshold: f32,
extinction_color: Color,
inscattering_color: Color,
) -> FogFalloff {
use core::f32::consts::E;
let [r_e, g_e, b_e, a_e] = LinearRgba::from(extinction_color).to_f32_array();
let [r_i, g_i, b_i, a_i] = LinearRgba::from(inscattering_color).to_f32_array();
FogFalloff::Atmospheric {
extinction: Vec3::new(
// Values are subtracted from 1.0 here to preserve the intuitive/artistic meaning of
// colors, since they're later subtracted. (e.g. by giving a blue extinction color, you
// get blue and _not_ yellow results)
ops::powf(1.0 - r_e, E),
ops::powf(1.0 - g_e, E),
ops::powf(1.0 - b_e, E),
) * FogFalloff::koschmieder(visibility, contrast_threshold)
* ops::powf(a_e, E),
inscattering: Vec3::new(ops::powf(r_i, E), ops::powf(g_i, E), ops::powf(b_i, E))
* FogFalloff::koschmieder(visibility, contrast_threshold)
* ops::powf(a_i, E),
}
}
/// A 2% contrast threshold was originally proposed by Koschmieder, being the
/// minimum visual contrast at which a human observer could detect an object.
/// We use a revised 5% contrast threshold, deemed more realistic for typical human observers.
pub const REVISED_KOSCHMIEDER_CONTRAST_THRESHOLD: f32 = 0.05;
/// Calculates the extinction coefficient β, from V and Cₜ, where:
///
/// - Cₜ is the contrast threshold, in the range of `0.0` to `1.0`
/// - V is the visibility distance in which a perfectly black object is still identifiable
/// against the horizon sky within the contrast threshold
///
/// We start with Koschmieder's equation:
///
/// ```text
/// -ln(Cₜ)
/// V = ─────────
/// β
/// ```
///
/// Multiplying both sides by β/V, that gives us:
///
/// ```text
/// -ln(Cₜ)
/// β = ─────────
/// V
/// ```
///
/// See:
/// - <https://en.wikipedia.org/wiki/Visibility>
/// - <https://www.biral.com/wp-content/uploads/2015/02/Introduction_to_visibility-v2-2.pdf>
pub fn koschmieder(v: f32, c_t: f32) -> f32 {
-ops::ln(c_t) / v
}
}
impl Default for DistanceFog {
fn default() -> Self {
DistanceFog {
color: Color::WHITE,
falloff: FogFalloff::Linear {
start: 0.0,
end: 100.0,
},
directional_light_color: Color::NONE,
directional_light_exponent: 8.0,
}
}
}

View File

@@ -0,0 +1,21 @@
// Copy the base mip (level 0) from a source cubemap to a destination cubemap,
// performing format conversion if needed (the destination is always rgba16float).
// The alpha channel is filled with 1.0.
@group(0) @binding(0) var src_cubemap: texture_2d_array<f32>;
@group(0) @binding(1) var dst_cubemap: texture_storage_2d_array<rgba16float, write>;
@compute
@workgroup_size(8, 8, 1)
fn copy(@builtin(global_invocation_id) global_id: vec3u) {
let size = textureDimensions(src_cubemap).xy;
// Bounds check
if (any(global_id.xy >= size)) {
return;
}
let color = textureLoad(src_cubemap, vec2u(global_id.xy), global_id.z, 0);
textureStore(dst_cubemap, vec2u(global_id.xy), global_id.z, vec4f(color.rgb, 1.0));
}

View File

@@ -0,0 +1,439 @@
// Single pass downsampling shader for creating the mip chain for an array texture
// Ported from https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK/blob/c16b1d286b5b438b75da159ab51ff426bacea3d1/sdk/include/FidelityFX/gpu/spd/ffx_spd.h
@group(0) @binding(0) var sampler_linear_clamp: sampler;
@group(0) @binding(1) var<uniform> constants: Constants;
#ifdef COMBINE_BIND_GROUP
@group(0) @binding(2) var mip_0: texture_2d_array<f32>;
@group(0) @binding(3) var mip_1: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(4) var mip_2: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(5) var mip_3: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(6) var mip_4: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(7) var mip_5: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(8) var mip_6: texture_storage_2d_array<rgba16float, read_write>;
@group(0) @binding(9) var mip_7: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(10) var mip_8: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(11) var mip_9: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(12) var mip_10: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(13) var mip_11: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(14) var mip_12: texture_storage_2d_array<rgba16float, write>;
#endif
#ifdef FIRST_PASS
@group(0) @binding(2) var mip_0: texture_2d_array<f32>;
@group(0) @binding(3) var mip_1: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(4) var mip_2: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(5) var mip_3: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(6) var mip_4: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(7) var mip_5: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(8) var mip_6: texture_storage_2d_array<rgba16float, write>;
#endif
#ifdef SECOND_PASS
@group(0) @binding(2) var mip_6: texture_2d_array<f32>;
@group(0) @binding(3) var mip_7: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(4) var mip_8: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(5) var mip_9: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(6) var mip_10: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(7) var mip_11: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(8) var mip_12: texture_storage_2d_array<rgba16float, write>;
#endif
struct Constants { mips: u32, inverse_input_size: vec2f }
var<workgroup> spd_intermediate_r: array<array<f32, 16>, 16>;
var<workgroup> spd_intermediate_g: array<array<f32, 16>, 16>;
var<workgroup> spd_intermediate_b: array<array<f32, 16>, 16>;
var<workgroup> spd_intermediate_a: array<array<f32, 16>, 16>;
@compute
@workgroup_size(256, 1, 1)
fn downsample_first(
@builtin(workgroup_id) workgroup_id: vec3u,
@builtin(local_invocation_index) local_invocation_index: u32
) {
let sub_xy = remap_for_wave_reduction(local_invocation_index % 64u);
let x = sub_xy.x + 8u * ((local_invocation_index >> 6u) % 2u);
let y = sub_xy.y + 8u * (local_invocation_index >> 7u);
spd_downsample_mips_0_1(x, y, workgroup_id.xy, local_invocation_index, constants.mips, workgroup_id.z);
spd_downsample_next_four(x, y, workgroup_id.xy, local_invocation_index, 2u, constants.mips, workgroup_id.z);
}
// TODO: Once wgpu supports globallycoherent buffers, make it actually a single pass
@compute
@workgroup_size(256, 1, 1)
fn downsample_second(
@builtin(workgroup_id) workgroup_id: vec3u,
@builtin(local_invocation_index) local_invocation_index: u32,
) {
let sub_xy = remap_for_wave_reduction(local_invocation_index % 64u);
let x = sub_xy.x + 8u * ((local_invocation_index >> 6u) % 2u);
let y = sub_xy.y + 8u * (local_invocation_index >> 7u);
spd_downsample_mips_6_7(x, y, constants.mips, workgroup_id.z);
spd_downsample_next_four(x, y, vec2(0u), local_invocation_index, 8u, constants.mips, workgroup_id.z);
}
fn spd_downsample_mips_0_1(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32, mips: u32, slice: u32) {
var v: array<vec4f, 4>;
var tex = (workgroup_id * 64u) + vec2(x * 2u, y * 2u);
var pix = (workgroup_id * 32u) + vec2(x, y);
v[0] = spd_reduce_load_source_image(tex, slice);
spd_store(pix, v[0], 0u, slice);
tex = (workgroup_id * 64u) + vec2(x * 2u + 32u, y * 2u);
pix = (workgroup_id * 32u) + vec2(x + 16u, y);
v[1] = spd_reduce_load_source_image(tex, slice);
spd_store(pix, v[1], 0u, slice);
tex = (workgroup_id * 64u) + vec2(x * 2u, y * 2u + 32u);
pix = (workgroup_id * 32u) + vec2(x, y + 16u);
v[2] = spd_reduce_load_source_image(tex, slice);
spd_store(pix, v[2], 0u, slice);
tex = (workgroup_id * 64u) + vec2(x * 2u + 32u, y * 2u + 32u);
pix = (workgroup_id * 32u) + vec2(x + 16u, y + 16u);
v[3] = spd_reduce_load_source_image(tex, slice);
spd_store(pix, v[3], 0u, slice);
if mips <= 1u { return; }
#ifdef SUBGROUP_SUPPORT
v[0] = spd_reduce_quad(v[0]);
v[1] = spd_reduce_quad(v[1]);
v[2] = spd_reduce_quad(v[2]);
v[3] = spd_reduce_quad(v[3]);
if local_invocation_index % 4u == 0u {
spd_store((workgroup_id * 16u) + vec2(x / 2u, y / 2u), v[0], 1u, slice);
spd_store_intermediate(x / 2u, y / 2u, v[0]);
spd_store((workgroup_id * 16u) + vec2(x / 2u + 8u, y / 2u), v[1], 1u, slice);
spd_store_intermediate(x / 2u + 8u, y / 2u, v[1]);
spd_store((workgroup_id * 16u) + vec2(x / 2u, y / 2u + 8u), v[2], 1u, slice);
spd_store_intermediate(x / 2u, y / 2u + 8u, v[2]);
spd_store((workgroup_id * 16u) + vec2(x / 2u + 8u, y / 2u + 8u), v[3], 1u, slice);
spd_store_intermediate(x / 2u + 8u, y / 2u + 8u, v[3]);
}
#else
for (var i = 0u; i < 4u; i++) {
spd_store_intermediate(x, y, v[i]);
workgroupBarrier();
if local_invocation_index < 64u {
v[i] = spd_reduce_intermediate(
vec2(x * 2u + 0u, y * 2u + 0u),
vec2(x * 2u + 1u, y * 2u + 0u),
vec2(x * 2u + 0u, y * 2u + 1u),
vec2(x * 2u + 1u, y * 2u + 1u),
);
spd_store(vec2(workgroup_id * 16) + vec2(x + (i % 2u) * 8u, y + (i / 2u) * 8u), v[i], 1u, slice);
}
workgroupBarrier();
}
if local_invocation_index < 64u {
spd_store_intermediate(x + 0u, y + 0u, v[0]);
spd_store_intermediate(x + 8u, y + 0u, v[1]);
spd_store_intermediate(x + 0u, y + 8u, v[2]);
spd_store_intermediate(x + 8u, y + 8u, v[3]);
}
#endif
}
fn spd_downsample_next_four(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32, base_mip: u32, mips: u32, slice: u32) {
if mips <= base_mip { return; }
workgroupBarrier();
spd_downsample_mip_2(x, y, workgroup_id, local_invocation_index, base_mip, slice);
if mips <= base_mip + 1u { return; }
workgroupBarrier();
spd_downsample_mip_3(x, y, workgroup_id, local_invocation_index, base_mip + 1u, slice);
if mips <= base_mip + 2u { return; }
workgroupBarrier();
spd_downsample_mip_4(x, y, workgroup_id, local_invocation_index, base_mip + 2u, slice);
if mips <= base_mip + 3u { return; }
workgroupBarrier();
spd_downsample_mip_5(x, y, workgroup_id, local_invocation_index, base_mip + 3u, slice);
}
fn spd_downsample_mip_2(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32, base_mip: u32, slice: u32) {
#ifdef SUBGROUP_SUPPORT
var v = spd_load_intermediate(x, y);
v = spd_reduce_quad(v);
if local_invocation_index % 4u == 0u {
spd_store((workgroup_id * 8u) + vec2(x / 2u, y / 2u), v, base_mip, slice);
spd_store_intermediate(x + (y / 2u) % 2u, y, v);
}
#else
if local_invocation_index < 64u {
let v = spd_reduce_intermediate(
vec2(x * 2u + 0u, y * 2u + 0u),
vec2(x * 2u + 1u, y * 2u + 0u),
vec2(x * 2u + 0u, y * 2u + 1u),
vec2(x * 2u + 1u, y * 2u + 1u),
);
spd_store((workgroup_id * 8u) + vec2(x, y), v, base_mip, slice);
spd_store_intermediate(x * 2u + y % 2u, y * 2u, v);
}
#endif
}
fn spd_downsample_mip_3(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32, base_mip: u32, slice: u32) {
#ifdef SUBGROUP_SUPPORT
if local_invocation_index < 64u {
var v = spd_load_intermediate(x * 2u + y % 2u, y * 2u);
v = spd_reduce_quad(v);
if local_invocation_index % 4u == 0u {
spd_store((workgroup_id * 4u) + vec2(x / 2u, y / 2u), v, base_mip, slice);
spd_store_intermediate(x * 2u + y / 2u, y * 2u, v);
}
}
#else
if local_invocation_index < 16u {
let v = spd_reduce_intermediate(
vec2(x * 4u + 0u + 0u, y * 4u + 0u),
vec2(x * 4u + 2u + 0u, y * 4u + 0u),
vec2(x * 4u + 0u + 1u, y * 4u + 2u),
vec2(x * 4u + 2u + 1u, y * 4u + 2u),
);
spd_store((workgroup_id * 4u) + vec2(x, y), v, base_mip, slice);
spd_store_intermediate(x * 4u + y, y * 4u, v);
}
#endif
}
fn spd_downsample_mip_4(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32, base_mip: u32, slice: u32) {
#ifdef SUBGROUP_SUPPORT
if local_invocation_index < 16u {
var v = spd_load_intermediate(x * 4u + y, y * 4u);
v = spd_reduce_quad(v);
if local_invocation_index % 4u == 0u {
spd_store((workgroup_id * 2u) + vec2(x / 2u, y / 2u), v, base_mip, slice);
spd_store_intermediate(x / 2u + y, 0u, v);
}
}
#else
if local_invocation_index < 4u {
let v = spd_reduce_intermediate(
vec2(x * 8u + 0u + 0u + y * 2u, y * 8u + 0u),
vec2(x * 8u + 4u + 0u + y * 2u, y * 8u + 0u),
vec2(x * 8u + 0u + 1u + y * 2u, y * 8u + 4u),
vec2(x * 8u + 4u + 1u + y * 2u, y * 8u + 4u),
);
spd_store((workgroup_id * 2u) + vec2(x, y), v, base_mip, slice);
spd_store_intermediate(x + y * 2u, 0u, v);
}
#endif
}
fn spd_downsample_mip_5(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32, base_mip: u32, slice: u32) {
#ifdef SUBGROUP_SUPPORT
if local_invocation_index < 4u {
var v = spd_load_intermediate(local_invocation_index, 0u);
v = spd_reduce_quad(v);
if local_invocation_index % 4u == 0u {
spd_store(workgroup_id, v, base_mip, slice);
}
}
#else
if local_invocation_index < 1u {
let v = spd_reduce_intermediate(vec2(0u, 0u), vec2(1u, 0u), vec2(2u, 0u), vec2(3u, 0u));
spd_store(workgroup_id, v, base_mip, slice);
}
#endif
}
fn spd_downsample_mips_6_7(x: u32, y: u32, mips: u32, slice: u32) {
var tex = vec2(x * 4u + 0u, y * 4u + 0u);
var pix = vec2(x * 2u + 0u, y * 2u + 0u);
let v0 = spd_reduce_load_4(
vec2(x * 4u + 0u, y * 4u + 0u),
vec2(x * 4u + 1u, y * 4u + 0u),
vec2(x * 4u + 0u, y * 4u + 1u),
vec2(x * 4u + 1u, y * 4u + 1u),
slice
);
spd_store(pix, v0, 6u, slice);
tex = vec2(x * 4u + 2u, y * 4u + 0u);
pix = vec2(x * 2u + 1u, y * 2u + 0u);
let v1 = spd_reduce_load_4(
vec2(x * 4u + 2u, y * 4u + 0u),
vec2(x * 4u + 3u, y * 4u + 0u),
vec2(x * 4u + 2u, y * 4u + 1u),
vec2(x * 4u + 3u, y * 4u + 1u),
slice
);
spd_store(pix, v1, 6u, slice);
tex = vec2(x * 4u + 0u, y * 4u + 2u);
pix = vec2(x * 2u + 0u, y * 2u + 1u);
let v2 = spd_reduce_load_4(
vec2(x * 4u + 0u, y * 4u + 2u),
vec2(x * 4u + 1u, y * 4u + 2u),
vec2(x * 4u + 0u, y * 4u + 3u),
vec2(x * 4u + 1u, y * 4u + 3u),
slice
);
spd_store(pix, v2, 6u, slice);
tex = vec2(x * 4u + 2u, y * 4u + 2u);
pix = vec2(x * 2u + 1u, y * 2u + 1u);
let v3 = spd_reduce_load_4(
vec2(x * 4u + 2u, y * 4u + 2u),
vec2(x * 4u + 3u, y * 4u + 2u),
vec2(x * 4u + 2u, y * 4u + 3u),
vec2(x * 4u + 3u, y * 4u + 3u),
slice
);
spd_store(pix, v3, 6u, slice);
if mips < 7u { return; }
let v = spd_reduce_4(v0, v1, v2, v3);
spd_store(vec2(x, y), v, 7u, slice);
spd_store_intermediate(x, y, v);
}
fn remap_for_wave_reduction(a: u32) -> vec2u {
// This function maps linear thread IDs to 2D coordinates in a special pattern
// to ensure that neighboring threads process neighboring pixels
// For example, this transforms linear thread IDs 0,1,2,3 into a 2×2 square
// Extract bits to form the X and Y coordinates
let x = insertBits(extractBits(a, 2u, 3u), a, 0u, 1u);
let y = insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u);
return vec2u(x, y);
}
fn spd_reduce_load_source_image(uv: vec2u, slice: u32) -> vec4f {
let texture_coord = (vec2f(uv) + 0.5) * constants.inverse_input_size;
#ifdef COMBINE_BIND_GROUP
let result = textureSampleLevel(mip_0, sampler_linear_clamp, texture_coord, slice, 0.0);
#endif
#ifdef FIRST_PASS
let result = textureSampleLevel(mip_0, sampler_linear_clamp, texture_coord, slice, 0.0);
#endif
#ifdef SECOND_PASS
let result = textureSampleLevel(mip_6, sampler_linear_clamp, texture_coord, slice, 0.0);
#endif
#ifdef SRGB_CONVERSION
return vec4(
srgb_from_linear(result.r),
srgb_from_linear(result.g),
srgb_from_linear(result.b),
result.a
);
#else
return result;
#endif
}
fn spd_store(pix: vec2u, value: vec4f, mip: u32, slice: u32) {
if mip >= constants.mips { return; }
switch mip {
#ifdef COMBINE_BIND_GROUP
case 0u: { textureStore(mip_1, pix, slice, value); }
case 1u: { textureStore(mip_2, pix, slice, value); }
case 2u: { textureStore(mip_3, pix, slice, value); }
case 3u: { textureStore(mip_4, pix, slice, value); }
case 4u: { textureStore(mip_5, pix, slice, value); }
case 5u: { textureStore(mip_6, pix, slice, value); }
case 6u: { textureStore(mip_7, pix, slice, value); }
case 7u: { textureStore(mip_8, pix, slice, value); }
case 8u: { textureStore(mip_9, pix, slice, value); }
case 9u: { textureStore(mip_10, pix, slice, value); }
case 10u: { textureStore(mip_11, pix, slice, value); }
case 11u: { textureStore(mip_12, pix, slice, value); }
#endif
#ifdef FIRST_PASS
case 0u: { textureStore(mip_1, pix, slice, value); }
case 1u: { textureStore(mip_2, pix, slice, value); }
case 2u: { textureStore(mip_3, pix, slice, value); }
case 3u: { textureStore(mip_4, pix, slice, value); }
case 4u: { textureStore(mip_5, pix, slice, value); }
case 5u: { textureStore(mip_6, pix, slice, value); }
#endif
#ifdef SECOND_PASS
case 6u: { textureStore(mip_7, pix, slice, value); }
case 7u: { textureStore(mip_8, pix, slice, value); }
case 8u: { textureStore(mip_9, pix, slice, value); }
case 9u: { textureStore(mip_10, pix, slice, value); }
case 10u: { textureStore(mip_11, pix, slice, value); }
case 11u: { textureStore(mip_12, pix, slice, value); }
#endif
default: {}
}
}
fn spd_store_intermediate(x: u32, y: u32, value: vec4f) {
spd_intermediate_r[x][y] = value.x;
spd_intermediate_g[x][y] = value.y;
spd_intermediate_b[x][y] = value.z;
spd_intermediate_a[x][y] = value.w;
}
fn spd_load_intermediate(x: u32, y: u32) -> vec4f {
return vec4(spd_intermediate_r[x][y], spd_intermediate_g[x][y], spd_intermediate_b[x][y], spd_intermediate_a[x][y]);
}
fn spd_reduce_intermediate(i0: vec2u, i1: vec2u, i2: vec2u, i3: vec2u) -> vec4f {
let v0 = spd_load_intermediate(i0.x, i0.y);
let v1 = spd_load_intermediate(i1.x, i1.y);
let v2 = spd_load_intermediate(i2.x, i2.y);
let v3 = spd_load_intermediate(i3.x, i3.y);
return spd_reduce_4(v0, v1, v2, v3);
}
fn spd_reduce_load_4(i0: vec2u, i1: vec2u, i2: vec2u, i3: vec2u, slice: u32) -> vec4f {
#ifdef COMBINE_BIND_GROUP
let v0 = textureLoad(mip_6, i0, slice);
let v1 = textureLoad(mip_6, i1, slice);
let v2 = textureLoad(mip_6, i2, slice);
let v3 = textureLoad(mip_6, i3, slice);
return spd_reduce_4(v0, v1, v2, v3);
#endif
#ifdef FIRST_PASS
return vec4(0.0, 0.0, 0.0, 0.0);
#endif
#ifdef SECOND_PASS
let v0 = textureLoad(mip_6, i0, slice, 0);
let v1 = textureLoad(mip_6, i1, slice, 0);
let v2 = textureLoad(mip_6, i2, slice, 0);
let v3 = textureLoad(mip_6, i3, slice, 0);
return spd_reduce_4(v0, v1, v2, v3);
#endif
}
fn spd_reduce_4(v0: vec4f, v1: vec4f, v2: vec4f, v3: vec4f) -> vec4f {
return (v0 + v1 + v2 + v3) * 0.25;
}
#ifdef SUBGROUP_SUPPORT
fn spd_reduce_quad(v: vec4f) -> vec4f {
let v0 = v;
let v1 = quadSwapX(v);
let v2 = quadSwapY(v);
let v3 = quadSwapDiagonal(v);
return spd_reduce_4(v0, v1, v2, v3);
}
#endif
fn srgb_from_linear(value: f32) -> f32 {
let j = vec3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
let k = vec2(1.055, -0.055);
return clamp(j.x, value * j.y, pow(value, j.z) * k.x + k.y);
}

View File

@@ -0,0 +1,185 @@
#import bevy_render::maths::PI
#import bevy_pbr::{
lighting,
utils::{sample_cosine_hemisphere, dir_to_cube_uv, sample_cube_dir, hammersley_2d, rand_vec2f}
}
struct FilteringConstants {
mip_level: f32,
sample_count: u32,
roughness: f32,
noise_size_bits: vec2u,
}
@group(0) @binding(0) var input_texture: texture_2d_array<f32>;
@group(0) @binding(1) var input_sampler: sampler;
@group(0) @binding(2) var output_texture: texture_storage_2d_array<rgba16float, write>;
@group(0) @binding(3) var<uniform> constants: FilteringConstants;
@group(0) @binding(4) var blue_noise_texture: texture_2d_array<f32>;
// Sample an environment map with a specific LOD
fn sample_environment(dir: vec3f, level: f32) -> vec4f {
let cube_uv = dir_to_cube_uv(dir);
return textureSampleLevel(input_texture, input_sampler, cube_uv.uv, cube_uv.face, level);
}
// Blue noise randomization
#ifdef HAS_BLUE_NOISE
fn sample_noise(pixel_coords: vec2u) -> vec4f {
let noise_size = vec2u(1) << constants.noise_size_bits;
let noise_size_mask = noise_size - vec2u(1u);
let noise_coords = pixel_coords & noise_size_mask;
let uv = vec2f(noise_coords) / vec2f(noise_size);
return textureSampleLevel(blue_noise_texture, input_sampler, uv, 0u, 0.0);
}
#else
// pseudo-random numbers using RNG
fn sample_noise(pixel_coords: vec2u) -> vec4f {
var rng_state: u32 = (pixel_coords.x * 3966231743u) ^ (pixel_coords.y * 3928936651u);
let rnd = rand_vec2f(&rng_state);
return vec4f(rnd, 0.0, 0.0);
}
#endif
// Calculate LOD for environment map lookup using filtered importance sampling
fn calculate_environment_map_lod(pdf: f32, width: f32, samples: f32) -> f32 {
// Solid angle of current sample
let omega_s = 1.0 / (samples * pdf);
// Solid angle of a texel in the environment map
let omega_p = 4.0 * PI / (6.0 * width * width);
// Filtered importance sampling: compute the correct LOD
return 0.5 * log2(omega_s / omega_p);
}
@compute
@workgroup_size(8, 8, 1)
fn generate_radiance_map(@builtin(global_invocation_id) global_id: vec3u) {
let size = textureDimensions(output_texture).xy;
let invSize = 1.0 / vec2f(size);
let coords = vec2u(global_id.xy);
let face = global_id.z;
if (any(coords >= size)) {
return;
}
// Convert texture coordinates to direction vector
let uv = (vec2f(coords) + 0.5) * invSize;
let normal = sample_cube_dir(uv, face);
// For radiance map, view direction = normal for perfect reflection
let view = normal;
// Convert perceptual roughness to physical microfacet roughness
let perceptual_roughness = constants.roughness;
let roughness = lighting::perceptualRoughnessToRoughness(perceptual_roughness);
// Get blue noise offset for stratification
let vector_noise = sample_noise(coords);
var radiance = vec3f(0.0);
var total_weight = 0.0;
// Skip sampling for mirror reflection (roughness = 0)
if (roughness < 0.01) {
radiance = sample_environment(normal, 0.0).rgb;
textureStore(output_texture, coords, face, vec4f(radiance, 1.0));
return;
}
// For higher roughness values, use importance sampling
let sample_count = constants.sample_count;
for (var i = 0u; i < sample_count; i++) {
// Get sample coordinates from Hammersley sequence with blue noise offset
var xi = hammersley_2d(i, sample_count);
xi = fract(xi + vector_noise.rg); // Apply Cranley-Patterson rotation
// Sample the GGX distribution with the spherical-cap VNDF method
let light_dir = lighting::sample_visible_ggx(xi, roughness, normal, view);
// Calculate weight (N·L)
let NdotL = dot(normal, light_dir);
if (NdotL > 0.0) {
// Reconstruct the microfacet half-vector from view and light and compute PDF terms
let half_vector = normalize(view + light_dir);
let NdotH = dot(normal, half_vector);
let NdotV = dot(normal, view);
// Get the geometric shadowing term
let G = lighting::G_Smith(NdotV, NdotL, roughness);
// PDF that matches the bounded-VNDF sampling
let pdf = lighting::ggx_vndf_pdf(view, NdotH, roughness);
// Calculate LOD using filtered importance sampling
// This is crucial to avoid fireflies and improve quality
let width = f32(size.x);
let lod = calculate_environment_map_lod(pdf, width, f32(sample_count));
// Get source mip level - ensure we don't go negative
let source_mip = max(0.0, lod);
// Sample environment map with the light direction
var sample_color = sample_environment(light_dir, source_mip).rgb;
// Accumulate weighted sample, including geometric term
radiance += sample_color * NdotL * G;
total_weight += NdotL * G;
}
}
// Normalize by total weight
if (total_weight > 0.0) {
radiance = radiance / total_weight;
}
// Write result to output texture
textureStore(output_texture, coords, face, vec4f(radiance, 1.0));
}
@compute
@workgroup_size(8, 8, 1)
fn generate_irradiance_map(@builtin(global_invocation_id) global_id: vec3u) {
let size = textureDimensions(output_texture).xy;
let invSize = 1.0 / vec2f(size);
let coords = vec2u(global_id.xy);
let face = global_id.z;
if (any(coords >= size)) {
return;
}
// Convert texture coordinates to direction vector
let uv = (vec2f(coords) + 0.5) * invSize;
let normal = sample_cube_dir(uv, face);
var irradiance = vec3f(0.0);
// Use uniform sampling on a hemisphere
for (var i = 0u; i < constants.sample_count; i++) {
// Build a deterministic RNG seed for this pixel / sample
// 4 randomly chosen 32-bit primes
var rng: u32 = (coords.x * 2131358057u) ^ (coords.y * 3416869721u) ^ (face * 1199786941u) ^ (i * 566200673u);
// Sample a direction from the upper hemisphere around the normal
var sample_dir = sample_cosine_hemisphere(normal, &rng);
// Sample environment with level 0 (no mip)
var sample_color = sample_environment(sample_dir, 0.0).rgb;
// Accumulate the contribution
irradiance += sample_color;
}
// Normalize by number of samples (cosine-weighted sampling already accounts for PDF)
irradiance = irradiance / f32(constants.sample_count);
// Write result to output texture
textureStore(output_texture, coords, face, vec4f(irradiance, 1.0));
}

View File

@@ -0,0 +1,310 @@
//! Environment maps and reflection probes.
//!
//! An *environment map* consists of a pair of diffuse and specular cubemaps
//! that together reflect the static surrounding area of a region in space. When
//! available, the PBR shader uses these to apply diffuse light and calculate
//! specular reflections.
//!
//! Environment maps come in two flavors, depending on what other components the
//! entities they're attached to have:
//!
//! 1. If attached to a view, they represent the objects located a very far
//! distance from the view, in a similar manner to a skybox. Essentially, these
//! *view environment maps* represent a higher-quality replacement for
//! [`AmbientLight`](bevy_light::AmbientLight) for outdoor scenes. The indirect light from such
//! environment maps are added to every point of the scene, including
//! interior enclosed areas.
//!
//! 2. If attached to a [`bevy_light::LightProbe`], environment maps represent the immediate
//! surroundings of a specific location in the scene. These types of
//! environment maps are known as *reflection probes*.
//!
//! Typically, environment maps are static (i.e. "baked", calculated ahead of
//! time) and so only reflect fixed static geometry. The environment maps must
//! be pre-filtered into a pair of cubemaps, one for the diffuse component and
//! one for the specular component, according to the [split-sum approximation].
//! To pre-filter your environment map, you can use the [glTF IBL Sampler] or
//! its [artist-friendly UI]. The diffuse map uses the Lambertian distribution,
//! while the specular map uses the GGX distribution.
//!
//! The Khronos Group has [several pre-filtered environment maps] available for
//! you to use.
//!
//! Currently, reflection probes (i.e. environment maps attached to light
//! probes) use binding arrays (also known as bindless textures) and
//! consequently aren't supported on WebGL2 or WebGPU. Reflection probes are
//! also unsupported if GLSL is in use, due to `naga` limitations. Environment
//! maps attached to views are, however, supported on all platforms.
//!
//! [split-sum approximation]: https://cdn2.unrealengine.com/Resources/files/2013SiggraphPresentationsNotes-26915738.pdf
//!
//! [glTF IBL Sampler]: https://github.com/KhronosGroup/glTF-IBL-Sampler
//!
//! [artist-friendly UI]: https://github.com/pcwalton/gltf-ibl-sampler-egui
//!
//! [several pre-filtered environment maps]: https://github.com/KhronosGroup/glTF-Sample-Environments
use bevy_asset::AssetId;
use bevy_ecs::{query::QueryItem, system::lifetimeless::Read};
use bevy_image::Image;
use bevy_light::EnvironmentMapLight;
use crate::render::{
extract_instances::ExtractInstance,
render_asset::RenderAssets,
render_resource::{
binding_types::{self, uniform_buffer},
BindGroupLayoutEntryBuilder, Sampler, SamplerBindingType, ShaderStages, TextureSampleType,
TextureView,
},
renderer::{RenderAdapter, RenderDevice},
texture::{FallbackImage, GpuImage},
};
use core::{num::NonZero, ops::Deref};
use crate::render::pbr::{
add_cubemap_texture_view, binding_arrays_are_usable, EnvironmentMapUniform,
MAX_VIEW_LIGHT_PROBES,
};
use super::{LightProbeComponent, RenderViewLightProbes};
/// Like [`EnvironmentMapLight`], but contains asset IDs instead of handles.
///
/// This is for use in the render app.
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
pub struct EnvironmentMapIds {
/// The blurry image that represents diffuse radiance surrounding a region.
pub(crate) diffuse: AssetId<Image>,
/// The typically-sharper, mipmapped image that represents specular radiance
/// surrounding a region.
pub(crate) specular: AssetId<Image>,
}
/// All the bind group entries necessary for PBR shaders to access the
/// environment maps exposed to a view.
pub(crate) enum RenderViewEnvironmentMapBindGroupEntries<'a> {
/// The version used when binding arrays aren't available on the current
/// platform.
Single {
/// The texture view of the view's diffuse cubemap.
diffuse_texture_view: &'a TextureView,
/// The texture view of the view's specular cubemap.
specular_texture_view: &'a TextureView,
/// The sampler used to sample elements of both `diffuse_texture_views` and
/// `specular_texture_views`.
sampler: &'a Sampler,
},
/// The version used when binding arrays are available on the current
/// platform.
Multiple {
/// A texture view of each diffuse cubemap, in the same order that they are
/// supplied to the view (i.e. in the same order as
/// `binding_index_to_cubemap` in [`RenderViewLightProbes`]).
///
/// This is a vector of `wgpu::TextureView`s. But we don't want to import
/// `wgpu` in this crate, so we refer to it indirectly like this.
diffuse_texture_views: Vec<&'a <TextureView as Deref>::Target>,
/// As above, but for specular cubemaps.
specular_texture_views: Vec<&'a <TextureView as Deref>::Target>,
/// The sampler used to sample elements of both `diffuse_texture_views` and
/// `specular_texture_views`.
sampler: &'a Sampler,
},
}
/// Information about the environment map attached to the view, if any. This is
/// a global environment map that lights everything visible in the view, as
/// opposed to a light probe which affects only a specific area.
pub struct EnvironmentMapViewLightProbeInfo {
/// The index of the diffuse and specular cubemaps in the binding arrays.
pub(crate) cubemap_index: i32,
/// The smallest mip level of the specular cubemap.
pub(crate) smallest_specular_mip_level: u32,
/// The scale factor applied to the diffuse and specular light in the
/// cubemap. This is in units of cd/m² (candela per square meter).
pub(crate) intensity: f32,
/// Whether this lightmap affects the diffuse lighting of lightmapped
/// meshes.
pub(crate) affects_lightmapped_mesh_diffuse: bool,
}
impl ExtractInstance for EnvironmentMapIds {
type QueryData = Read<EnvironmentMapLight>;
type QueryFilter = ();
fn extract(item: QueryItem<'_, '_, Self::QueryData>) -> Option<Self> {
Some(EnvironmentMapIds {
diffuse: item.diffuse_map.id(),
specular: item.specular_map.id(),
})
}
}
/// Returns the bind group layout entries for the environment map diffuse and
/// specular binding arrays respectively, in addition to the sampler.
pub(crate) fn get_bind_group_layout_entries(
render_device: &RenderDevice,
render_adapter: &RenderAdapter,
) -> [BindGroupLayoutEntryBuilder; 4] {
let mut texture_cube_binding =
binding_types::texture_cube(TextureSampleType::Float { filterable: true });
if binding_arrays_are_usable(render_device, render_adapter) {
texture_cube_binding =
texture_cube_binding.count(NonZero::<u32>::new(MAX_VIEW_LIGHT_PROBES as _).unwrap());
}
[
texture_cube_binding,
texture_cube_binding,
binding_types::sampler(SamplerBindingType::Filtering),
uniform_buffer::<EnvironmentMapUniform>(true).visibility(ShaderStages::FRAGMENT),
]
}
impl<'a> RenderViewEnvironmentMapBindGroupEntries<'a> {
/// Looks up and returns the bindings for the environment map diffuse and
/// specular binding arrays respectively, as well as the sampler.
pub(crate) fn get(
render_view_environment_maps: Option<&RenderViewLightProbes<EnvironmentMapLight>>,
images: &'a RenderAssets<GpuImage>,
fallback_image: &'a FallbackImage,
render_device: &RenderDevice,
render_adapter: &RenderAdapter,
) -> RenderViewEnvironmentMapBindGroupEntries<'a> {
if binding_arrays_are_usable(render_device, render_adapter) {
let mut diffuse_texture_views = vec![];
let mut specular_texture_views = vec![];
let mut sampler = None;
if let Some(environment_maps) = render_view_environment_maps {
for &cubemap_id in &environment_maps.binding_index_to_textures {
add_cubemap_texture_view(
&mut diffuse_texture_views,
&mut sampler,
cubemap_id.diffuse,
images,
fallback_image,
);
add_cubemap_texture_view(
&mut specular_texture_views,
&mut sampler,
cubemap_id.specular,
images,
fallback_image,
);
}
}
// Pad out the bindings to the size of the binding array using fallback
// textures. This is necessary on D3D12 and Metal.
diffuse_texture_views.resize(MAX_VIEW_LIGHT_PROBES, &*fallback_image.cube.texture_view);
specular_texture_views
.resize(MAX_VIEW_LIGHT_PROBES, &*fallback_image.cube.texture_view);
return RenderViewEnvironmentMapBindGroupEntries::Multiple {
diffuse_texture_views,
specular_texture_views,
sampler: sampler.unwrap_or(&fallback_image.cube.sampler),
};
}
if let Some(environment_maps) = render_view_environment_maps
&& let Some(cubemap) = environment_maps.binding_index_to_textures.first()
&& let (Some(diffuse_image), Some(specular_image)) =
(images.get(cubemap.diffuse), images.get(cubemap.specular))
{
return RenderViewEnvironmentMapBindGroupEntries::Single {
diffuse_texture_view: &diffuse_image.texture_view,
specular_texture_view: &specular_image.texture_view,
sampler: &diffuse_image.sampler,
};
}
RenderViewEnvironmentMapBindGroupEntries::Single {
diffuse_texture_view: &fallback_image.cube.texture_view,
specular_texture_view: &fallback_image.cube.texture_view,
sampler: &fallback_image.cube.sampler,
}
}
}
impl LightProbeComponent for EnvironmentMapLight {
type AssetId = EnvironmentMapIds;
// Information needed to render with the environment map attached to the
// view.
type ViewLightProbeInfo = EnvironmentMapViewLightProbeInfo;
fn id(&self, image_assets: &RenderAssets<GpuImage>) -> Option<Self::AssetId> {
if image_assets.get(&self.diffuse_map).is_none()
|| image_assets.get(&self.specular_map).is_none()
{
None
} else {
Some(EnvironmentMapIds {
diffuse: self.diffuse_map.id(),
specular: self.specular_map.id(),
})
}
}
fn intensity(&self) -> f32 {
self.intensity
}
fn affects_lightmapped_mesh_diffuse(&self) -> bool {
self.affects_lightmapped_mesh_diffuse
}
fn create_render_view_light_probes(
view_component: Option<&EnvironmentMapLight>,
image_assets: &RenderAssets<GpuImage>,
) -> RenderViewLightProbes<Self> {
let mut render_view_light_probes = RenderViewLightProbes::new();
// Find the index of the cubemap associated with the view, and determine
// its smallest mip level.
if let Some(EnvironmentMapLight {
diffuse_map: diffuse_map_handle,
specular_map: specular_map_handle,
intensity,
affects_lightmapped_mesh_diffuse,
..
}) = view_component
&& let (Some(_), Some(specular_map)) = (
image_assets.get(diffuse_map_handle),
image_assets.get(specular_map_handle),
)
{
render_view_light_probes.view_light_probe_info = EnvironmentMapViewLightProbeInfo {
cubemap_index: render_view_light_probes.get_or_insert_cubemap(&EnvironmentMapIds {
diffuse: diffuse_map_handle.id(),
specular: specular_map_handle.id(),
}) as i32,
smallest_specular_mip_level: specular_map.mip_level_count - 1,
intensity: *intensity,
affects_lightmapped_mesh_diffuse: *affects_lightmapped_mesh_diffuse,
};
};
render_view_light_probes
}
}
impl Default for EnvironmentMapViewLightProbeInfo {
fn default() -> Self {
Self {
cubemap_index: -1,
smallest_specular_mip_level: 0,
intensity: 1.0,
affects_lightmapped_mesh_diffuse: true,
}
}
}

View File

@@ -0,0 +1,279 @@
#define_import_path bevy_pbr::environment_map
#import bevy_pbr::light_probe::query_light_probe
#import bevy_pbr::mesh_view_bindings as bindings
#import bevy_pbr::mesh_view_bindings::light_probes
#import bevy_pbr::mesh_view_bindings::environment_map_uniform
#import bevy_pbr::lighting::{F_Schlick_vec, LightingInput, LayerLightingInput, LAYER_BASE, LAYER_CLEARCOAT}
#import bevy_pbr::clustered_forward::ClusterableObjectIndexRanges
struct EnvironmentMapLight {
diffuse: vec3<f32>,
specular: vec3<f32>,
};
struct EnvironmentMapRadiances {
irradiance: vec3<f32>,
radiance: vec3<f32>,
}
// Define two versions of this function, one for the case in which there are
// multiple light probes and one for the case in which only the view light probe
// is present.
#ifdef MULTIPLE_LIGHT_PROBES_IN_ARRAY
fn compute_radiances(
input: LayerLightingInput,
clusterable_object_index_ranges: ptr<function, ClusterableObjectIndexRanges>,
world_position: vec3<f32>,
found_diffuse_indirect: bool,
) -> EnvironmentMapRadiances {
// Unpack.
let N = input.N;
let R = input.R;
let perceptual_roughness = input.perceptual_roughness;
let roughness = input.roughness;
var radiances: EnvironmentMapRadiances;
// Search for a reflection probe that contains the fragment.
var query_result = query_light_probe(
world_position,
/*is_irradiance_volume=*/ false,
clusterable_object_index_ranges,
);
// If we didn't find a reflection probe, use the view environment map if applicable.
if (query_result.texture_index < 0) {
query_result.texture_index = light_probes.view_cubemap_index;
query_result.intensity = light_probes.intensity_for_view;
query_result.affects_lightmapped_mesh_diffuse =
light_probes.view_environment_map_affects_lightmapped_mesh_diffuse != 0u;
}
// If there's no cubemap, bail out.
if (query_result.texture_index < 0) {
radiances.irradiance = vec3(0.0);
radiances.radiance = vec3(0.0);
return radiances;
}
// Split-sum approximation for image based lighting: https://cdn2.unrealengine.com/Resources/files/2013SiggraphPresentationsNotes-26915738.pdf
let radiance_level = perceptual_roughness * f32(textureNumLevels(
bindings::specular_environment_maps[query_result.texture_index]) - 1u);
// If we're lightmapped, and we shouldn't accumulate diffuse light from the
// environment map, note that.
var enable_diffuse = !found_diffuse_indirect;
#ifdef LIGHTMAP
enable_diffuse = enable_diffuse && query_result.affects_lightmapped_mesh_diffuse;
#endif // LIGHTMAP
if (enable_diffuse) {
var irradiance_sample_dir = N;
// Rotating the world space ray direction by the environment light map transform matrix, it is
// equivalent to rotating the diffuse environment cubemap itself.
irradiance_sample_dir = (environment_map_uniform.transform * vec4(irradiance_sample_dir, 1.0)).xyz;
// Cube maps are left-handed so we negate the z coordinate.
irradiance_sample_dir.z = -irradiance_sample_dir.z;
radiances.irradiance = textureSampleLevel(
bindings::diffuse_environment_maps[query_result.texture_index],
bindings::environment_map_sampler,
irradiance_sample_dir,
0.0).rgb * query_result.intensity;
}
var radiance_sample_dir = radiance_sample_direction(N, R, roughness);
// Rotating the world space ray direction by the environment light map transform matrix, it is
// equivalent to rotating the specular environment cubemap itself.
radiance_sample_dir = (environment_map_uniform.transform * vec4(radiance_sample_dir, 1.0)).xyz;
// Cube maps are left-handed so we negate the z coordinate.
radiance_sample_dir.z = -radiance_sample_dir.z;
radiances.radiance = textureSampleLevel(
bindings::specular_environment_maps[query_result.texture_index],
bindings::environment_map_sampler,
radiance_sample_dir,
radiance_level).rgb * query_result.intensity;
return radiances;
}
#else // MULTIPLE_LIGHT_PROBES_IN_ARRAY
fn compute_radiances(
input: LayerLightingInput,
clusterable_object_index_ranges: ptr<function, ClusterableObjectIndexRanges>,
world_position: vec3<f32>,
found_diffuse_indirect: bool,
) -> EnvironmentMapRadiances {
// Unpack.
let N = input.N;
let R = input.R;
let perceptual_roughness = input.perceptual_roughness;
let roughness = input.roughness;
var radiances: EnvironmentMapRadiances;
if (light_probes.view_cubemap_index < 0) {
radiances.irradiance = vec3(0.0);
radiances.radiance = vec3(0.0);
return radiances;
}
// Split-sum approximation for image based lighting: https://cdn2.unrealengine.com/Resources/files/2013SiggraphPresentationsNotes-26915738.pdf
// Technically we could use textureNumLevels(specular_environment_map) - 1 here, but we use a uniform
// because textureNumLevels() does not work on WebGL2
let radiance_level = perceptual_roughness * f32(light_probes.smallest_specular_mip_level_for_view);
let intensity = light_probes.intensity_for_view;
// If we're lightmapped, and we shouldn't accumulate diffuse light from the
// environment map, note that.
var enable_diffuse = !found_diffuse_indirect;
#ifdef LIGHTMAP
enable_diffuse = enable_diffuse &&
light_probes.view_environment_map_affects_lightmapped_mesh_diffuse;
#endif // LIGHTMAP
if (enable_diffuse) {
var irradiance_sample_dir = N;
// Rotating the world space ray direction by the environment light map transform matrix, it is
// equivalent to rotating the diffuse environment cubemap itself.
irradiance_sample_dir = (environment_map_uniform.transform * vec4(irradiance_sample_dir, 1.0)).xyz;
// Cube maps are left-handed so we negate the z coordinate.
irradiance_sample_dir.z = -irradiance_sample_dir.z;
radiances.irradiance = textureSampleLevel(
bindings::diffuse_environment_map,
bindings::environment_map_sampler,
irradiance_sample_dir,
0.0).rgb * intensity;
}
var radiance_sample_dir = radiance_sample_direction(N, R, roughness);
// Rotating the world space ray direction by the environment light map transform matrix, it is
// equivalent to rotating the specular environment cubemap itself.
radiance_sample_dir = (environment_map_uniform.transform * vec4(radiance_sample_dir, 1.0)).xyz;
// Cube maps are left-handed so we negate the z coordinate.
radiance_sample_dir.z = -radiance_sample_dir.z;
radiances.radiance = textureSampleLevel(
bindings::specular_environment_map,
bindings::environment_map_sampler,
radiance_sample_dir,
radiance_level).rgb * intensity;
return radiances;
}
#endif // MULTIPLE_LIGHT_PROBES_IN_ARRAY
#ifdef STANDARD_MATERIAL_CLEARCOAT
// Adds the environment map light from the clearcoat layer to that of the base
// layer.
fn environment_map_light_clearcoat(
out: ptr<function, EnvironmentMapLight>,
input: ptr<function, LightingInput>,
clusterable_object_index_ranges: ptr<function, ClusterableObjectIndexRanges>,
found_diffuse_indirect: bool,
) {
// Unpack.
let world_position = (*input).P;
let clearcoat_NdotV = (*input).layers[LAYER_CLEARCOAT].NdotV;
let clearcoat_strength = (*input).clearcoat_strength;
// Calculate the Fresnel term `Fc` for the clearcoat layer.
// 0.04 is a hardcoded value for F0 from the Filament spec.
let clearcoat_F0 = vec3<f32>(0.04);
let Fc = F_Schlick_vec(clearcoat_F0, 1.0, clearcoat_NdotV) * clearcoat_strength;
let inv_Fc = 1.0 - Fc;
let clearcoat_radiances = compute_radiances(
(*input).layers[LAYER_CLEARCOAT],
clusterable_object_index_ranges,
world_position,
found_diffuse_indirect,
);
// Composite the clearcoat layer on top of the existing one.
// These formulas are from Filament:
// <https://google.github.io/filament/Filament.md.html#lighting/imagebasedlights/clearcoat>
(*out).diffuse *= inv_Fc;
(*out).specular = (*out).specular * inv_Fc * inv_Fc + clearcoat_radiances.radiance * Fc;
}
#endif // STANDARD_MATERIAL_CLEARCOAT
fn environment_map_light(
input: ptr<function, LightingInput>,
clusterable_object_index_ranges: ptr<function, ClusterableObjectIndexRanges>,
found_diffuse_indirect: bool,
) -> EnvironmentMapLight {
// Unpack.
let roughness = (*input).layers[LAYER_BASE].roughness;
let diffuse_color = (*input).diffuse_color;
let NdotV = (*input).layers[LAYER_BASE].NdotV;
let F_ab = (*input).F_ab;
let F0 = (*input).F0_;
let world_position = (*input).P;
var out: EnvironmentMapLight;
let radiances = compute_radiances(
(*input).layers[LAYER_BASE],
clusterable_object_index_ranges,
world_position,
found_diffuse_indirect,
);
if (all(radiances.irradiance == vec3(0.0)) && all(radiances.radiance == vec3(0.0))) {
out.diffuse = vec3(0.0);
out.specular = vec3(0.0);
return out;
}
// No real world material has specular values under 0.02, so we use this range as a
// "pre-baked specular occlusion" that extinguishes the fresnel term, for artistic control.
// See: https://google.github.io/filament/Filament.html#specularocclusion
let specular_occlusion = saturate(dot(F0, vec3(50.0 * 0.33)));
// Multiscattering approximation: https://www.jcgt.org/published/0008/01/03/paper.pdf
// Useful reference: https://bruop.github.io/ibl
let Fr = max(vec3(1.0 - roughness), F0) - F0;
let kS = F0 + Fr * pow(1.0 - NdotV, 5.0);
let Ess = F_ab.x + F_ab.y;
let FssEss = kS * Ess * specular_occlusion;
let Ems = 1.0 - Ess;
let Favg = F0 + (1.0 - F0) / 21.0;
let Fms = FssEss * Favg / (1.0 - Ems * Favg);
let FmsEms = Fms * Ems;
let Edss = 1.0 - (FssEss + FmsEms);
let kD = diffuse_color * Edss;
if (!found_diffuse_indirect) {
out.diffuse = (FmsEms + kD) * radiances.irradiance;
} else {
out.diffuse = vec3(0.0);
}
out.specular = FssEss * radiances.radiance;
#ifdef STANDARD_MATERIAL_CLEARCOAT
environment_map_light_clearcoat(
&out,
input,
clusterable_object_index_ranges,
found_diffuse_indirect,
);
#endif // STANDARD_MATERIAL_CLEARCOAT
return out;
}
// "Moving Frostbite to Physically Based Rendering 3.0", listing 22
// https://seblagarde.wordpress.com/wp-content/uploads/2015/07/course_notes_moving_frostbite_to_pbr_v32.pdf#page=70
fn radiance_sample_direction(N: vec3<f32>, R: vec3<f32>, roughness: f32) -> vec3<f32> {
let smoothness = saturate(1.0 - roughness);
let lerp_factor = smoothness * (sqrt(smoothness) + roughness);
return mix(N, R, lerp_factor);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,325 @@
//! Irradiance volumes, also known as voxel global illumination.
//!
//! An *irradiance volume* is a cuboid voxel region consisting of
//! regularly-spaced precomputed samples of diffuse indirect light. They're
//! ideal if you have a dynamic object such as a character that can move about
//! static non-moving geometry such as a level in a game, and you want that
//! dynamic object to be affected by the light bouncing off that static
//! geometry.
//!
//! To use irradiance volumes, you need to precompute, or *bake*, the indirect
//! light in your scene. Bevy doesn't currently come with a way to do this.
//! Fortunately, [Blender] provides a [baking tool] as part of the Eevee
//! renderer, and its irradiance volumes are compatible with those used by Bevy.
//! The [`bevy-baked-gi`] project provides a tool, `export-blender-gi`, that can
//! extract the baked irradiance volumes from the Blender `.blend` file and
//! package them up into a `.ktx2` texture for use by the engine. See the
//! documentation in the `bevy-baked-gi` project for more details on this
//! workflow.
//!
//! Like all light probes in Bevy, irradiance volumes are 1×1×1 cubes, centered
//! on the origin, that can be arbitrarily scaled, rotated, and positioned in a
//! scene with the [`bevy_transform::components::Transform`] component. The 3D
//! voxel grid will be stretched to fill the interior of the cube, with linear
//! interpolation, and the illumination from the irradiance volume will apply to
//! all fragments within that bounding region.
//!
//! Bevy's irradiance volumes are based on Valve's [*ambient cubes*] as used in
//! *Half-Life 2* ([Mitchell 2006, slide 27]). These encode a single color of
//! light from the six 3D cardinal directions and blend the sides together
//! according to the surface normal. For an explanation of why ambient cubes
//! were chosen over spherical harmonics, see [Why ambient cubes?] below.
//!
//! If you wish to use a tool other than `export-blender-gi` to produce the
//! irradiance volumes, you'll need to pack the irradiance volumes in the
//! following format. The irradiance volume of resolution *(Rx, Ry, Rz)* is
//! expected to be a 3D texture of dimensions *(Rx, 2Ry, 3Rz)*. The unnormalized
//! texture coordinate *(s, t, p)* of the voxel at coordinate *(x, y, z)* with
//! side *S* ∈ *{-X, +X, -Y, +Y, -Z, +Z}* is as follows:
//!
//! ```text
//! s = x
//!
//! t = y + ⎰ 0 if S ∈ {-X, -Y, -Z}
//! ⎱ Ry if S ∈ {+X, +Y, +Z}
//!
//! ⎧ 0 if S ∈ {-X, +X}
//! p = z + ⎨ Rz if S ∈ {-Y, +Y}
//! ⎩ 2Rz if S ∈ {-Z, +Z}
//! ```
//!
//! Visually, in a left-handed coordinate system with Y up, viewed from the
//! right, the 3D texture looks like a stacked series of voxel grids, one for
//! each cube side, in this order:
//!
//! | **+X** | **+Y** | **+Z** |
//! | ------ | ------ | ------ |
//! | **-X** | **-Y** | **-Z** |
//!
//! A terminology note: Other engines may refer to irradiance volumes as *voxel
//! global illumination*, *VXGI*, or simply as *light probes*. Sometimes *light
//! probe* refers to what Bevy calls a reflection probe. In Bevy, *light probe*
//! is a generic term that encompasses all cuboid bounding regions that capture
//! indirect illumination, whether based on voxels or not.
//!
//! Note that, if binding arrays aren't supported (e.g. on WebGPU or WebGL 2),
//! then only the closest irradiance volume to the view will be taken into
//! account during rendering. The required `wgpu` features are
//! [`bevy_render::settings::WgpuFeatures::TEXTURE_BINDING_ARRAY`] and
//! [`bevy_render::settings::WgpuFeatures::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING`].
//!
//! ## Why ambient cubes?
//!
//! This section describes the motivation behind the decision to use ambient
//! cubes in Bevy. It's not needed to use the feature; feel free to skip it
//! unless you're interested in its internal design.
//!
//! Bevy uses *Half-Life 2*-style ambient cubes (usually abbreviated as *HL2*)
//! as the representation of irradiance for light probes instead of the
//! more-popular spherical harmonics (*SH*). This might seem to be a surprising
//! choice, but it turns out to work well for the specific case of voxel
//! sampling on the GPU. Spherical harmonics have two problems that make them
//! less ideal for this use case:
//!
//! 1. The level 1 spherical harmonic coefficients can be negative. That
//! prevents the use of the efficient [RGB9E5 texture format], which only
//! encodes unsigned floating point numbers, and forces the use of the
//! less-efficient [RGBA16F format] if hardware interpolation is desired.
//!
//! 2. As an alternative to RGBA16F, level 1 spherical harmonics can be
//! normalized and scaled to the SH0 base color, as [Frostbite] does. This
//! allows them to be packed in standard LDR RGBA8 textures. However, this
//! prevents the use of hardware trilinear filtering, as the nonuniform scale
//! factor means that hardware interpolation no longer produces correct results.
//! The 8 texture fetches needed to interpolate between voxels can be upwards of
//! twice as slow as the hardware interpolation.
//!
//! The following chart summarizes the costs and benefits of ambient cubes,
//! level 1 spherical harmonics, and level 2 spherical harmonics:
//!
//! | Technique | HW-interpolated samples | Texel fetches | Bytes per voxel | Quality |
//! | ------------------------ | ----------------------- | ------------- | --------------- | ------- |
//! | Ambient cubes | 3 | 0 | 24 | Medium |
//! | Level 1 SH, compressed | 0 | 36 | 16 | Low |
//! | Level 1 SH, uncompressed | 4 | 0 | 24 | Low |
//! | Level 2 SH, compressed | 0 | 72 | 28 | High |
//! | Level 2 SH, uncompressed | 9 | 0 | 54 | High |
//!
//! (Note that the number of bytes per voxel can be reduced using various
//! texture compression methods, but the overall ratios remain similar.)
//!
//! From these data, we can see that ambient cubes balance fast lookups (from
//! leveraging hardware interpolation) with relatively-small storage
//! requirements and acceptable quality. Hence, they were chosen for irradiance
//! volumes in Bevy.
//!
//! [*ambient cubes*]: https://advances.realtimerendering.com/s2006/Mitchell-ShadingInValvesSourceEngine.pdf
//!
//! [spherical harmonics]: https://en.wikipedia.org/wiki/Spherical_harmonic_lighting
//!
//! [RGB9E5 texture format]: https://www.khronos.org/opengl/wiki/Small_Float_Formats#RGB9_E5
//!
//! [RGBA16F format]: https://www.khronos.org/opengl/wiki/Small_Float_Formats#Low-bitdepth_floats
//!
//! [Frostbite]: https://media.contentapi.ea.com/content/dam/eacom/frostbite/files/gdc2018-precomputedgiobalilluminationinfrostbite.pdf#page=53
//!
//! [Mitchell 2006, slide 27]: https://advances.realtimerendering.com/s2006/Mitchell-ShadingInValvesSourceEngine.pdf#page=27
//!
//! [Blender]: http://blender.org/
//!
//! [baking tool]: https://docs.blender.org/manual/en/latest/render/eevee/light_probes/volume.html
//!
//! [`bevy-baked-gi`]: https://github.com/pcwalton/bevy-baked-gi
//!
//! [Why ambient cubes?]: #why-ambient-cubes
use bevy_image::Image;
use bevy_light::IrradianceVolume;
use crate::render::{
render_asset::RenderAssets,
render_resource::{
binding_types, BindGroupLayoutEntryBuilder, Sampler, SamplerBindingType, TextureSampleType,
TextureView,
},
renderer::{RenderAdapter, RenderDevice},
texture::{FallbackImage, GpuImage},
};
use core::{num::NonZero, ops::Deref};
use bevy_asset::AssetId;
use crate::render::pbr::{
add_cubemap_texture_view, binding_arrays_are_usable, RenderViewLightProbes,
MAX_VIEW_LIGHT_PROBES,
};
use super::LightProbeComponent;
/// On WebGL and WebGPU, we must disable irradiance volumes, as otherwise we can
/// overflow the number of texture bindings when deferred rendering is in use
/// (see issue #11885).
pub(crate) const IRRADIANCE_VOLUMES_ARE_USABLE: bool = cfg!(not(target_arch = "wasm32"));
/// All the bind group entries necessary for PBR shaders to access the
/// irradiance volumes exposed to a view.
pub(crate) enum RenderViewIrradianceVolumeBindGroupEntries<'a> {
/// The version used when binding arrays aren't available on the current platform.
Single {
/// The texture view of the closest light probe.
texture_view: &'a TextureView,
/// A sampler used to sample voxels of the irradiance volume.
sampler: &'a Sampler,
},
/// The version used when binding arrays are available on the current
/// platform.
Multiple {
/// A texture view of the voxels of each irradiance volume, in the same
/// order that they are supplied to the view (i.e. in the same order as
/// `binding_index_to_cubemap` in [`RenderViewLightProbes`]).
///
/// This is a vector of `wgpu::TextureView`s. But we don't want to import
/// `wgpu` in this crate, so we refer to it indirectly like this.
texture_views: Vec<&'a <TextureView as Deref>::Target>,
/// A sampler used to sample voxels of the irradiance volumes.
sampler: &'a Sampler,
},
}
impl<'a> RenderViewIrradianceVolumeBindGroupEntries<'a> {
/// Looks up and returns the bindings for any irradiance volumes visible in
/// the view, as well as the sampler.
pub(crate) fn get(
render_view_irradiance_volumes: Option<&RenderViewLightProbes<IrradianceVolume>>,
images: &'a RenderAssets<GpuImage>,
fallback_image: &'a FallbackImage,
render_device: &RenderDevice,
render_adapter: &RenderAdapter,
) -> RenderViewIrradianceVolumeBindGroupEntries<'a> {
if binding_arrays_are_usable(render_device, render_adapter) {
RenderViewIrradianceVolumeBindGroupEntries::get_multiple(
render_view_irradiance_volumes,
images,
fallback_image,
)
} else {
RenderViewIrradianceVolumeBindGroupEntries::single(
render_view_irradiance_volumes,
images,
fallback_image,
)
}
}
/// Looks up and returns the bindings for any irradiance volumes visible in
/// the view, as well as the sampler. This is the version used when binding
/// arrays are available on the current platform.
fn get_multiple(
render_view_irradiance_volumes: Option<&RenderViewLightProbes<IrradianceVolume>>,
images: &'a RenderAssets<GpuImage>,
fallback_image: &'a FallbackImage,
) -> RenderViewIrradianceVolumeBindGroupEntries<'a> {
let mut texture_views = vec![];
let mut sampler = None;
if let Some(irradiance_volumes) = render_view_irradiance_volumes {
for &cubemap_id in &irradiance_volumes.binding_index_to_textures {
add_cubemap_texture_view(
&mut texture_views,
&mut sampler,
cubemap_id,
images,
fallback_image,
);
}
}
// Pad out the bindings to the size of the binding array using fallback
// textures. This is necessary on D3D12 and Metal.
texture_views.resize(MAX_VIEW_LIGHT_PROBES, &*fallback_image.d3.texture_view);
RenderViewIrradianceVolumeBindGroupEntries::Multiple {
texture_views,
sampler: sampler.unwrap_or(&fallback_image.d3.sampler),
}
}
/// Looks up and returns the bindings for any irradiance volumes visible in
/// the view, as well as the sampler. This is the version used when binding
/// arrays aren't available on the current platform.
fn single(
render_view_irradiance_volumes: Option<&RenderViewLightProbes<IrradianceVolume>>,
images: &'a RenderAssets<GpuImage>,
fallback_image: &'a FallbackImage,
) -> RenderViewIrradianceVolumeBindGroupEntries<'a> {
if let Some(irradiance_volumes) = render_view_irradiance_volumes
&& let Some(irradiance_volume) = irradiance_volumes.render_light_probes.first()
&& irradiance_volume.texture_index >= 0
&& let Some(image_id) = irradiance_volumes
.binding_index_to_textures
.get(irradiance_volume.texture_index as usize)
&& let Some(image) = images.get(*image_id)
{
return RenderViewIrradianceVolumeBindGroupEntries::Single {
texture_view: &image.texture_view,
sampler: &image.sampler,
};
}
RenderViewIrradianceVolumeBindGroupEntries::Single {
texture_view: &fallback_image.d3.texture_view,
sampler: &fallback_image.d3.sampler,
}
}
}
/// Returns the bind group layout entries for the voxel texture and sampler
/// respectively.
pub(crate) fn get_bind_group_layout_entries(
render_device: &RenderDevice,
render_adapter: &RenderAdapter,
) -> [BindGroupLayoutEntryBuilder; 2] {
let mut texture_3d_binding =
binding_types::texture_3d(TextureSampleType::Float { filterable: true });
if binding_arrays_are_usable(render_device, render_adapter) {
texture_3d_binding =
texture_3d_binding.count(NonZero::<u32>::new(MAX_VIEW_LIGHT_PROBES as _).unwrap());
}
[
texture_3d_binding,
binding_types::sampler(SamplerBindingType::Filtering),
]
}
impl LightProbeComponent for IrradianceVolume {
type AssetId = AssetId<Image>;
// Irradiance volumes can't be attached to the view, so we store nothing
// here.
type ViewLightProbeInfo = ();
fn id(&self, image_assets: &RenderAssets<GpuImage>) -> Option<Self::AssetId> {
if image_assets.get(&self.voxels).is_none() {
None
} else {
Some(self.voxels.id())
}
}
fn intensity(&self) -> f32 {
self.intensity
}
fn affects_lightmapped_mesh_diffuse(&self) -> bool {
self.affects_lightmapped_meshes
}
fn create_render_view_light_probes(
_: Option<&Self>,
_: &RenderAssets<GpuImage>,
) -> RenderViewLightProbes<Self> {
RenderViewLightProbes::new()
}
}

View File

@@ -0,0 +1,73 @@
#define_import_path bevy_pbr::irradiance_volume
#import bevy_pbr::light_probe::query_light_probe
#import bevy_pbr::mesh_view_bindings::{
irradiance_volumes,
irradiance_volume,
irradiance_volume_sampler,
light_probes,
};
#import bevy_pbr::clustered_forward::ClusterableObjectIndexRanges
#ifdef IRRADIANCE_VOLUMES_ARE_USABLE
// See:
// https://advances.realtimerendering.com/s2006/Mitchell-ShadingInValvesSourceEngine.pdf
// Slide 28, "Ambient Cube Basis"
fn irradiance_volume_light(
world_position: vec3<f32>,
N: vec3<f32>,
clusterable_object_index_ranges: ptr<function, ClusterableObjectIndexRanges>,
) -> vec3<f32> {
// Search for an irradiance volume that contains the fragment.
let query_result = query_light_probe(
world_position,
/*is_irradiance_volume=*/ true,
clusterable_object_index_ranges,
);
// If there was no irradiance volume found, bail out.
if (query_result.texture_index < 0) {
return vec3(0.0f);
}
// If we're lightmapped, and the irradiance volume contributes no diffuse
// light, then bail out.
#ifdef LIGHTMAP
if (!query_result.affects_lightmapped_mesh_diffuse) {
return vec3(0.0f);
}
#endif // LIGHTMAP
#ifdef MULTIPLE_LIGHT_PROBES_IN_ARRAY
let irradiance_volume_texture = irradiance_volumes[query_result.texture_index];
#else
let irradiance_volume_texture = irradiance_volume;
#endif
let atlas_resolution = vec3<f32>(textureDimensions(irradiance_volume_texture));
let resolution = vec3<f32>(textureDimensions(irradiance_volume_texture) / vec3(1u, 2u, 3u));
// Make sure to clamp to the edges to avoid texture bleed.
var unit_pos = (query_result.light_from_world * vec4(world_position, 1.0f)).xyz;
let stp = clamp((unit_pos + 0.5) * resolution, vec3(0.5f), resolution - vec3(0.5f));
let uvw = stp / atlas_resolution;
// The bottom half of each cube slice is the negative part, so choose it if applicable on each
// slice.
let neg_offset = select(vec3(0.0f), vec3(0.5f), N < vec3(0.0f));
let uvw_x = uvw + vec3(0.0f, neg_offset.x, 0.0f);
let uvw_y = uvw + vec3(0.0f, neg_offset.y, 1.0f / 3.0f);
let uvw_z = uvw + vec3(0.0f, neg_offset.z, 2.0f / 3.0f);
let rgb_x = textureSampleLevel(irradiance_volume_texture, irradiance_volume_sampler, uvw_x, 0.0).rgb;
let rgb_y = textureSampleLevel(irradiance_volume_texture, irradiance_volume_sampler, uvw_y, 0.0).rgb;
let rgb_z = textureSampleLevel(irradiance_volume_texture, irradiance_volume_sampler, uvw_z, 0.0).rgb;
// Use Valve's formula to sample.
let NN = N * N;
return (rgb_x * NN.x + rgb_y * NN.y + rgb_z * NN.z) * query_result.intensity;
}
#endif // IRRADIANCE_VOLUMES_ARE_USABLE

View File

@@ -0,0 +1,154 @@
#define_import_path bevy_pbr::light_probe
#import bevy_pbr::clustered_forward
#import bevy_pbr::clustered_forward::ClusterableObjectIndexRanges
#import bevy_pbr::mesh_view_bindings::light_probes
#import bevy_pbr::mesh_view_types::LightProbe
// The result of searching for a light probe.
struct LightProbeQueryResult {
// The index of the light probe texture or textures in the binding array or
// arrays.
texture_index: i32,
// A scale factor that's applied to the diffuse and specular light from the
// light probe. This is in units of cd/m² (candela per square meter).
intensity: f32,
// Transform from world space to the light probe model space. In light probe
// model space, the light probe is a 1×1×1 cube centered on the origin.
light_from_world: mat4x4<f32>,
// Whether this light probe contributes diffuse light to lightmapped meshes.
affects_lightmapped_mesh_diffuse: bool,
};
fn transpose_affine_matrix(matrix: mat3x4<f32>) -> mat4x4<f32> {
let matrix4x4 = mat4x4<f32>(
matrix[0],
matrix[1],
matrix[2],
vec4<f32>(0.0, 0.0, 0.0, 1.0));
return transpose(matrix4x4);
}
#if AVAILABLE_STORAGE_BUFFER_BINDINGS >= 3
// Searches for a light probe that contains the fragment.
//
// This is the version that's used when storage buffers are available and
// light probes are clustered.
//
// TODO: Interpolate between multiple light probes.
fn query_light_probe(
world_position: vec3<f32>,
is_irradiance_volume: bool,
clusterable_object_index_ranges: ptr<function, ClusterableObjectIndexRanges>,
) -> LightProbeQueryResult {
var result: LightProbeQueryResult;
result.texture_index = -1;
// Reflection probe indices are followed by irradiance volume indices in the
// cluster index list. Use this fact to create our bracketing range of
// indices.
var start_offset: u32;
var end_offset: u32;
if is_irradiance_volume {
start_offset = (*clusterable_object_index_ranges).first_irradiance_volume_index_offset;
end_offset = (*clusterable_object_index_ranges).first_decal_offset;
} else {
start_offset = (*clusterable_object_index_ranges).first_reflection_probe_index_offset;
end_offset = (*clusterable_object_index_ranges).first_irradiance_volume_index_offset;
}
for (var light_probe_index_offset: u32 = start_offset;
light_probe_index_offset < end_offset && result.texture_index < 0;
light_probe_index_offset += 1u) {
let light_probe_index = i32(clustered_forward::get_clusterable_object_id(
light_probe_index_offset));
var light_probe: LightProbe;
if is_irradiance_volume {
light_probe = light_probes.irradiance_volumes[light_probe_index];
} else {
light_probe = light_probes.reflection_probes[light_probe_index];
}
// Unpack the inverse transform.
let light_from_world =
transpose_affine_matrix(light_probe.light_from_world_transposed);
// Check to see if the transformed point is inside the unit cube
// centered at the origin.
let probe_space_pos = (light_from_world * vec4<f32>(world_position, 1.0f)).xyz;
if (all(abs(probe_space_pos) <= vec3(0.5f))) {
result.texture_index = light_probe.cubemap_index;
result.intensity = light_probe.intensity;
result.light_from_world = light_from_world;
result.affects_lightmapped_mesh_diffuse =
light_probe.affects_lightmapped_mesh_diffuse != 0u;
break;
}
}
return result;
}
#else // AVAILABLE_STORAGE_BUFFER_BINDINGS >= 3
// Searches for a light probe that contains the fragment.
//
// This is the version that's used when storage buffers aren't available and
// light probes aren't clustered. It simply does a brute force search of all
// light probes. Because platforms without sufficient SSBO bindings typically
// lack bindless shaders, there will usually only be one of each type of light
// probe present anyway.
fn query_light_probe(
world_position: vec3<f32>,
is_irradiance_volume: bool,
clusterable_object_index_ranges: ptr<function, ClusterableObjectIndexRanges>,
) -> LightProbeQueryResult {
var result: LightProbeQueryResult;
result.texture_index = -1;
var light_probe_count: i32;
if is_irradiance_volume {
light_probe_count = light_probes.irradiance_volume_count;
} else {
light_probe_count = light_probes.reflection_probe_count;
}
for (var light_probe_index: i32 = 0;
light_probe_index < light_probe_count && result.texture_index < 0;
light_probe_index += 1) {
var light_probe: LightProbe;
if is_irradiance_volume {
light_probe = light_probes.irradiance_volumes[light_probe_index];
} else {
light_probe = light_probes.reflection_probes[light_probe_index];
}
// Unpack the inverse transform.
let light_from_world =
transpose_affine_matrix(light_probe.light_from_world_transposed);
// Check to see if the transformed point is inside the unit cube
// centered at the origin.
let probe_space_pos = (light_from_world * vec4<f32>(world_position, 1.0f)).xyz;
if (all(abs(probe_space_pos) <= vec3(0.5f))) {
result.texture_index = light_probe.cubemap_index;
result.intensity = light_probe.intensity;
result.light_from_world = light_from_world;
result.affects_lightmapped_mesh_diffuse =
light_probe.affects_lightmapped_mesh_diffuse != 0u;
// TODO: Workaround for ICE in DXC https://github.com/microsoft/DirectXShaderCompiler/issues/6183
// We can't use `break` here because of the ICE.
// So instead we rely on the fact that we set `result.texture_index`
// above and check its value in the `for` loop header before
// looping.
// break;
}
}
return result;
}
#endif // AVAILABLE_STORAGE_BUFFER_BINDINGS >= 3

View File

@@ -0,0 +1,731 @@
//! Light probes for baked global illumination.
use bevy_app::{App, Plugin};
use bevy_asset::AssetId;
use bevy_camera::{
primitives::{Aabb, Frustum},
Camera3d,
};
use bevy_derive::{Deref, DerefMut};
use bevy_ecs::{
component::Component,
entity::Entity,
query::With,
resource::Resource,
schedule::IntoScheduleConfigs,
system::{Commands, Local, Query, Res, ResMut},
};
use bevy_image::Image;
use bevy_light::{EnvironmentMapLight, IrradianceVolume, LightProbe};
use bevy_math::{Affine3A, FloatOrd, Mat4, Vec3A, Vec4};
use bevy_platform::collections::HashMap;
use crate::render::{
extract_instances::ExtractInstancesPlugin,
render_asset::RenderAssets,
render_resource::{DynamicUniformBuffer, Sampler, ShaderType, TextureView},
renderer::{RenderAdapter, RenderAdapterInfo, RenderDevice, RenderQueue, WgpuWrapper},
settings::WgpuFeatures,
sync_world::RenderEntity,
texture::{FallbackImage, GpuImage},
view::ExtractedView,
Extract, ExtractSchedule, Render, RenderApp, RenderSystems,
};
use bevy_shader::load_shader_library;
use bevy_transform::{components::Transform, prelude::GlobalTransform};
use tracing::error;
use core::{hash::Hash, ops::Deref};
use crate::render::pbr::{
generate::EnvironmentMapGenerationPlugin, light_probe::environment_map::EnvironmentMapIds,
};
pub mod environment_map;
pub mod generate;
pub mod irradiance_volume;
/// The maximum number of each type of light probe that each view will consider.
///
/// Because the fragment shader does a linear search through the list for each
/// fragment, this number needs to be relatively small.
pub const MAX_VIEW_LIGHT_PROBES: usize = 8;
/// How many texture bindings are used in the fragment shader, *not* counting
/// environment maps or irradiance volumes.
const STANDARD_MATERIAL_FRAGMENT_SHADER_MIN_TEXTURE_BINDINGS: usize = 16;
/// Adds support for light probes: cuboid bounding regions that apply global
/// illumination to objects within them.
///
/// This also adds support for view environment maps: diffuse and specular
/// cubemaps applied to all objects that a view renders.
pub struct LightProbePlugin;
/// A GPU type that stores information about a light probe.
#[derive(Clone, Copy, ShaderType, Default)]
struct RenderLightProbe {
/// The transform from the world space to the model space. This is used to
/// efficiently check for bounding box intersection.
light_from_world_transposed: [Vec4; 3],
/// The index of the texture or textures in the appropriate binding array or
/// arrays.
///
/// For example, for reflection probes this is the index of the cubemap in
/// the diffuse and specular texture arrays.
texture_index: i32,
/// Scale factor applied to the light generated by this light probe.
///
/// See the comment in [`EnvironmentMapLight`] for details.
intensity: f32,
/// Whether this light probe adds to the diffuse contribution of the
/// irradiance for meshes with lightmaps.
affects_lightmapped_mesh_diffuse: u32,
}
/// A per-view shader uniform that specifies all the light probes that the view
/// takes into account.
#[derive(ShaderType)]
pub struct LightProbesUniform {
/// The list of applicable reflection probes, sorted from nearest to the
/// camera to the farthest away from the camera.
reflection_probes: [RenderLightProbe; MAX_VIEW_LIGHT_PROBES],
/// The list of applicable irradiance volumes, sorted from nearest to the
/// camera to the farthest away from the camera.
irradiance_volumes: [RenderLightProbe; MAX_VIEW_LIGHT_PROBES],
/// The number of reflection probes in the list.
reflection_probe_count: i32,
/// The number of irradiance volumes in the list.
irradiance_volume_count: i32,
/// The index of the diffuse and specular environment maps associated with
/// the view itself. This is used as a fallback if no reflection probe in
/// the list contains the fragment.
view_cubemap_index: i32,
/// The smallest valid mipmap level for the specular environment cubemap
/// associated with the view.
smallest_specular_mip_level_for_view: u32,
/// The intensity of the environment cubemap associated with the view.
///
/// See the comment in [`EnvironmentMapLight`] for details.
intensity_for_view: f32,
/// Whether the environment map attached to the view affects the diffuse
/// lighting for lightmapped meshes.
///
/// This will be 1 if the map does affect lightmapped meshes or 0 otherwise.
view_environment_map_affects_lightmapped_mesh_diffuse: u32,
}
/// A GPU buffer that stores information about all light probes.
#[derive(Resource, Default, Deref, DerefMut)]
pub struct LightProbesBuffer(DynamicUniformBuffer<LightProbesUniform>);
/// A component attached to each camera in the render world that stores the
/// index of the [`LightProbesUniform`] in the [`LightProbesBuffer`].
#[derive(Component, Default, Deref, DerefMut)]
pub struct ViewLightProbesUniformOffset(u32);
/// Information that [`gather_light_probes`] keeps about each light probe.
///
/// This information is parameterized by the [`LightProbeComponent`] type. This
/// will either be [`EnvironmentMapLight`] for reflection probes or
/// [`IrradianceVolume`] for irradiance volumes.
struct LightProbeInfo<C>
where
C: LightProbeComponent,
{
// The transform from world space to light probe space.
// Stored as the transpose of the inverse transform to compress the structure
// on the GPU (from 4 `Vec4`s to 3 `Vec4`s). The shader will transpose it
// to recover the original inverse transform.
light_from_world: [Vec4; 3],
// The transform from light probe space to world space.
world_from_light: Affine3A,
// Scale factor applied to the diffuse and specular light generated by this
// reflection probe.
//
// See the comment in [`EnvironmentMapLight`] for details.
intensity: f32,
// Whether this light probe adds to the diffuse contribution of the
// irradiance for meshes with lightmaps.
affects_lightmapped_mesh_diffuse: bool,
// The IDs of all assets associated with this light probe.
//
// Because each type of light probe component may reference different types
// of assets (e.g. a reflection probe references two cubemap assets while an
// irradiance volume references a single 3D texture asset), this is generic.
asset_id: C::AssetId,
}
/// A component, part of the render world, that stores the mapping from asset ID
/// or IDs to the texture index in the appropriate binding arrays.
///
/// Cubemap textures belonging to environment maps are collected into binding
/// arrays, and the index of each texture is presented to the shader for runtime
/// lookup. 3D textures belonging to reflection probes are likewise collected
/// into binding arrays, and the shader accesses the 3D texture by index.
///
/// This component is attached to each view in the render world, because each
/// view may have a different set of light probes that it considers and therefore
/// the texture indices are per-view.
#[derive(Component, Default)]
pub struct RenderViewLightProbes<C>
where
C: LightProbeComponent,
{
/// The list of environment maps presented to the shader, in order.
binding_index_to_textures: Vec<C::AssetId>,
/// The reverse of `binding_index_to_cubemap`: a map from the texture ID to
/// the index in `binding_index_to_cubemap`.
cubemap_to_binding_index: HashMap<C::AssetId, u32>,
/// Information about each light probe, ready for upload to the GPU, sorted
/// in order from closest to the camera to farthest.
///
/// Note that this is not necessarily ordered by binding index. So don't
/// write code like
/// `render_light_probes[cubemap_to_binding_index[asset_id]]`; instead
/// search for the light probe with the appropriate binding index in this
/// array.
render_light_probes: Vec<RenderLightProbe>,
/// Information needed to render the light probe attached directly to the
/// view, if applicable.
///
/// A light probe attached directly to a view represents a "global" light
/// probe that affects all objects not in the bounding region of any light
/// probe. Currently, the only light probe type that supports this is the
/// [`EnvironmentMapLight`].
view_light_probe_info: C::ViewLightProbeInfo,
}
/// A trait implemented by all components that represent light probes.
///
/// Currently, the two light probe types are [`EnvironmentMapLight`] and
/// [`IrradianceVolume`], for reflection probes and irradiance volumes
/// respectively.
///
/// Most light probe systems are written to be generic over the type of light
/// probe. This allows much of the code to be shared and enables easy addition
/// of more light probe types (e.g. real-time reflection planes) in the future.
pub trait LightProbeComponent: Send + Sync + Component + Sized {
/// Holds [`AssetId`]s of the texture or textures that this light probe
/// references.
///
/// This can just be [`AssetId`] if the light probe only references one
/// texture. If it references multiple textures, it will be a structure
/// containing those asset IDs.
type AssetId: Send + Sync + Clone + Eq + Hash;
/// If the light probe can be attached to the view itself (as opposed to a
/// cuboid region within the scene), this contains the information that will
/// be passed to the GPU in order to render it. Otherwise, this will be
/// `()`.
///
/// Currently, only reflection probes (i.e. [`EnvironmentMapLight`]) can be
/// attached directly to views.
type ViewLightProbeInfo: Send + Sync + Default;
/// Returns the asset ID or asset IDs of the texture or textures referenced
/// by this light probe.
fn id(&self, image_assets: &RenderAssets<GpuImage>) -> Option<Self::AssetId>;
/// Returns the intensity of this light probe.
///
/// This is a scaling factor that will be multiplied by the value or values
/// sampled from the texture.
fn intensity(&self) -> f32;
/// Returns true if this light probe contributes diffuse lighting to meshes
/// with lightmaps or false otherwise.
fn affects_lightmapped_mesh_diffuse(&self) -> bool;
/// Creates an instance of [`RenderViewLightProbes`] containing all the
/// information needed to render this light probe.
///
/// This is called for every light probe in view every frame.
fn create_render_view_light_probes(
view_component: Option<&Self>,
image_assets: &RenderAssets<GpuImage>,
) -> RenderViewLightProbes<Self>;
}
/// The uniform struct extracted from [`EnvironmentMapLight`].
/// Will be available for use in the Environment Map shader.
#[derive(Component, ShaderType, Clone)]
pub struct EnvironmentMapUniform {
/// The world space transformation matrix of the sample ray for environment cubemaps.
transform: Mat4,
}
impl Default for EnvironmentMapUniform {
fn default() -> Self {
EnvironmentMapUniform {
transform: Mat4::IDENTITY,
}
}
}
/// A GPU buffer that stores the environment map settings for each view.
#[derive(Resource, Default, Deref, DerefMut)]
pub struct EnvironmentMapUniformBuffer(pub DynamicUniformBuffer<EnvironmentMapUniform>);
/// A component that stores the offset within the
/// [`EnvironmentMapUniformBuffer`] for each view.
#[derive(Component, Default, Deref, DerefMut)]
pub struct ViewEnvironmentMapUniformOffset(u32);
impl Plugin for LightProbePlugin {
fn build(&self, app: &mut App) {
load_shader_library!(app, "light_probe.wgsl");
load_shader_library!(app, "environment_map.wgsl");
load_shader_library!(app, "irradiance_volume.wgsl");
app.add_plugins((
EnvironmentMapGenerationPlugin,
ExtractInstancesPlugin::<EnvironmentMapIds>::new(),
));
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
return;
};
render_app
.init_resource::<LightProbesBuffer>()
.init_resource::<EnvironmentMapUniformBuffer>()
.add_systems(ExtractSchedule, gather_environment_map_uniform)
.add_systems(ExtractSchedule, gather_light_probes::<EnvironmentMapLight>)
.add_systems(ExtractSchedule, gather_light_probes::<IrradianceVolume>)
.add_systems(
Render,
(upload_light_probes, prepare_environment_uniform_buffer)
.in_set(RenderSystems::PrepareResources),
);
}
}
/// Extracts [`EnvironmentMapLight`] from views and creates [`EnvironmentMapUniform`] for them.
///
/// Compared to the `ExtractComponentPlugin`, this implementation will create a default instance
/// if one does not already exist.
fn gather_environment_map_uniform(
view_query: Extract<Query<(RenderEntity, Option<&EnvironmentMapLight>), With<Camera3d>>>,
mut commands: Commands,
) {
for (view_entity, environment_map_light) in view_query.iter() {
let environment_map_uniform = if let Some(environment_map_light) = environment_map_light {
EnvironmentMapUniform {
transform: Transform::from_rotation(environment_map_light.rotation)
.to_matrix()
.inverse(),
}
} else {
EnvironmentMapUniform::default()
};
commands
.get_entity(view_entity)
.expect("Environment map light entity wasn't synced.")
.insert(environment_map_uniform);
}
}
/// Gathers up all light probes of a single type in the scene and assigns them
/// to views, performing frustum culling and distance sorting in the process.
fn gather_light_probes<C>(
image_assets: Res<RenderAssets<GpuImage>>,
light_probe_query: Extract<Query<(&GlobalTransform, &C), With<LightProbe>>>,
view_query: Extract<
Query<(RenderEntity, &GlobalTransform, &Frustum, Option<&C>), With<Camera3d>>,
>,
mut reflection_probes: Local<Vec<LightProbeInfo<C>>>,
mut view_reflection_probes: Local<Vec<LightProbeInfo<C>>>,
mut commands: Commands,
) where
C: LightProbeComponent,
{
// Create [`LightProbeInfo`] for every light probe in the scene.
reflection_probes.clear();
reflection_probes.extend(
light_probe_query
.iter()
.filter_map(|query_row| LightProbeInfo::new(query_row, &image_assets)),
);
// Build up the light probes uniform and the key table.
for (view_entity, view_transform, view_frustum, view_component) in view_query.iter() {
// Cull light probes outside the view frustum.
view_reflection_probes.clear();
view_reflection_probes.extend(
reflection_probes
.iter()
.filter(|light_probe_info| light_probe_info.frustum_cull(view_frustum))
.cloned(),
);
// Sort by distance to camera.
view_reflection_probes.sort_by_cached_key(|light_probe_info| {
light_probe_info.camera_distance_sort_key(view_transform)
});
// Create the light probes list.
let mut render_view_light_probes =
C::create_render_view_light_probes(view_component, &image_assets);
// Gather up the light probes in the list.
render_view_light_probes.maybe_gather_light_probes(&view_reflection_probes);
// Record the per-view light probes.
if render_view_light_probes.is_empty() {
commands
.get_entity(view_entity)
.expect("View entity wasn't synced.")
.remove::<RenderViewLightProbes<C>>();
} else {
commands
.get_entity(view_entity)
.expect("View entity wasn't synced.")
.insert(render_view_light_probes);
}
}
}
/// Gathers up environment map settings for each applicable view and
/// writes them into a GPU buffer.
pub fn prepare_environment_uniform_buffer(
mut commands: Commands,
views: Query<(Entity, Option<&EnvironmentMapUniform>), With<ExtractedView>>,
mut environment_uniform_buffer: ResMut<EnvironmentMapUniformBuffer>,
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
) {
let Some(mut writer) =
environment_uniform_buffer.get_writer(views.iter().len(), &render_device, &render_queue)
else {
return;
};
for (view, environment_uniform) in views.iter() {
let uniform_offset = match environment_uniform {
None => 0,
Some(environment_uniform) => writer.write(environment_uniform),
};
commands
.entity(view)
.insert(ViewEnvironmentMapUniformOffset(uniform_offset));
}
}
// A system that runs after [`gather_light_probes`] and populates the GPU
// uniforms with the results.
//
// Note that, unlike [`gather_light_probes`], this system is not generic over
// the type of light probe. It collects light probes of all types together into
// a single structure, ready to be passed to the shader.
fn upload_light_probes(
mut commands: Commands,
views: Query<Entity, With<ExtractedView>>,
mut light_probes_buffer: ResMut<LightProbesBuffer>,
mut view_light_probes_query: Query<(
Option<&RenderViewLightProbes<EnvironmentMapLight>>,
Option<&RenderViewLightProbes<IrradianceVolume>>,
)>,
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
) {
// If there are no views, bail.
if views.is_empty() {
return;
}
// Initialize the uniform buffer writer.
let mut writer = light_probes_buffer
.get_writer(views.iter().len(), &render_device, &render_queue)
.unwrap();
// Process each view.
for view_entity in views.iter() {
let Ok((render_view_environment_maps, render_view_irradiance_volumes)) =
view_light_probes_query.get_mut(view_entity)
else {
error!("Failed to find `RenderViewLightProbes` for the view!");
continue;
};
// Initialize the uniform with only the view environment map, if there
// is one.
let mut light_probes_uniform = LightProbesUniform {
reflection_probes: [RenderLightProbe::default(); MAX_VIEW_LIGHT_PROBES],
irradiance_volumes: [RenderLightProbe::default(); MAX_VIEW_LIGHT_PROBES],
reflection_probe_count: render_view_environment_maps
.map(RenderViewLightProbes::len)
.unwrap_or_default()
.min(MAX_VIEW_LIGHT_PROBES) as i32,
irradiance_volume_count: render_view_irradiance_volumes
.map(RenderViewLightProbes::len)
.unwrap_or_default()
.min(MAX_VIEW_LIGHT_PROBES) as i32,
view_cubemap_index: render_view_environment_maps
.map(|maps| maps.view_light_probe_info.cubemap_index)
.unwrap_or(-1),
smallest_specular_mip_level_for_view: render_view_environment_maps
.map(|maps| maps.view_light_probe_info.smallest_specular_mip_level)
.unwrap_or(0),
intensity_for_view: render_view_environment_maps
.map(|maps| maps.view_light_probe_info.intensity)
.unwrap_or(1.0),
view_environment_map_affects_lightmapped_mesh_diffuse: render_view_environment_maps
.map(|maps| maps.view_light_probe_info.affects_lightmapped_mesh_diffuse as u32)
.unwrap_or(1),
};
// Add any environment maps that [`gather_light_probes`] found to the
// uniform.
if let Some(render_view_environment_maps) = render_view_environment_maps {
render_view_environment_maps.add_to_uniform(
&mut light_probes_uniform.reflection_probes,
&mut light_probes_uniform.reflection_probe_count,
);
}
// Add any irradiance volumes that [`gather_light_probes`] found to the
// uniform.
if let Some(render_view_irradiance_volumes) = render_view_irradiance_volumes {
render_view_irradiance_volumes.add_to_uniform(
&mut light_probes_uniform.irradiance_volumes,
&mut light_probes_uniform.irradiance_volume_count,
);
}
// Queue the view's uniforms to be written to the GPU.
let uniform_offset = writer.write(&light_probes_uniform);
commands
.entity(view_entity)
.insert(ViewLightProbesUniformOffset(uniform_offset));
}
}
impl Default for LightProbesUniform {
fn default() -> Self {
Self {
reflection_probes: [RenderLightProbe::default(); MAX_VIEW_LIGHT_PROBES],
irradiance_volumes: [RenderLightProbe::default(); MAX_VIEW_LIGHT_PROBES],
reflection_probe_count: 0,
irradiance_volume_count: 0,
view_cubemap_index: -1,
smallest_specular_mip_level_for_view: 0,
intensity_for_view: 1.0,
view_environment_map_affects_lightmapped_mesh_diffuse: 1,
}
}
}
impl<C> LightProbeInfo<C>
where
C: LightProbeComponent,
{
/// Given the set of light probe components, constructs and returns
/// [`LightProbeInfo`]. This is done for every light probe in the scene
/// every frame.
fn new(
(light_probe_transform, environment_map): (&GlobalTransform, &C),
image_assets: &RenderAssets<GpuImage>,
) -> Option<LightProbeInfo<C>> {
let light_from_world_transposed =
Mat4::from(light_probe_transform.affine().inverse()).transpose();
environment_map.id(image_assets).map(|id| LightProbeInfo {
world_from_light: light_probe_transform.affine(),
light_from_world: [
light_from_world_transposed.x_axis,
light_from_world_transposed.y_axis,
light_from_world_transposed.z_axis,
],
asset_id: id,
intensity: environment_map.intensity(),
affects_lightmapped_mesh_diffuse: environment_map.affects_lightmapped_mesh_diffuse(),
})
}
/// Returns true if this light probe is in the viewing frustum of the camera
/// or false if it isn't.
fn frustum_cull(&self, view_frustum: &Frustum) -> bool {
view_frustum.intersects_obb(
&Aabb {
center: Vec3A::default(),
half_extents: Vec3A::splat(0.5),
},
&self.world_from_light,
true,
false,
)
}
/// Returns the squared distance from this light probe to the camera,
/// suitable for distance sorting.
fn camera_distance_sort_key(&self, view_transform: &GlobalTransform) -> FloatOrd {
FloatOrd(
(self.world_from_light.translation - view_transform.translation_vec3a())
.length_squared(),
)
}
}
impl<C> RenderViewLightProbes<C>
where
C: LightProbeComponent,
{
/// Creates a new empty list of light probes.
fn new() -> RenderViewLightProbes<C> {
RenderViewLightProbes {
binding_index_to_textures: vec![],
cubemap_to_binding_index: HashMap::default(),
render_light_probes: vec![],
view_light_probe_info: C::ViewLightProbeInfo::default(),
}
}
/// Returns true if there are no light probes in the list.
pub(crate) fn is_empty(&self) -> bool {
self.binding_index_to_textures.is_empty()
}
/// Returns the number of light probes in the list.
pub(crate) fn len(&self) -> usize {
self.binding_index_to_textures.len()
}
/// Adds a cubemap to the list of bindings, if it wasn't there already, and
/// returns its index within that list.
pub(crate) fn get_or_insert_cubemap(&mut self, cubemap_id: &C::AssetId) -> u32 {
*self
.cubemap_to_binding_index
.entry((*cubemap_id).clone())
.or_insert_with(|| {
let index = self.binding_index_to_textures.len() as u32;
self.binding_index_to_textures.push((*cubemap_id).clone());
index
})
}
/// Adds all the light probes in this structure to the supplied array, which
/// is expected to be shipped to the GPU.
fn add_to_uniform(
&self,
render_light_probes: &mut [RenderLightProbe; MAX_VIEW_LIGHT_PROBES],
render_light_probe_count: &mut i32,
) {
render_light_probes[0..self.render_light_probes.len()]
.copy_from_slice(&self.render_light_probes[..]);
*render_light_probe_count = self.render_light_probes.len() as i32;
}
/// Gathers up all light probes of the given type in the scene and records
/// them in this structure.
fn maybe_gather_light_probes(&mut self, light_probes: &[LightProbeInfo<C>]) {
for light_probe in light_probes.iter().take(MAX_VIEW_LIGHT_PROBES) {
// Determine the index of the cubemap in the binding array.
let cubemap_index = self.get_or_insert_cubemap(&light_probe.asset_id);
// Write in the light probe data.
self.render_light_probes.push(RenderLightProbe {
light_from_world_transposed: light_probe.light_from_world,
texture_index: cubemap_index as i32,
intensity: light_probe.intensity,
affects_lightmapped_mesh_diffuse: light_probe.affects_lightmapped_mesh_diffuse
as u32,
});
}
}
}
impl<C> Clone for LightProbeInfo<C>
where
C: LightProbeComponent,
{
fn clone(&self) -> Self {
Self {
light_from_world: self.light_from_world,
world_from_light: self.world_from_light,
intensity: self.intensity,
affects_lightmapped_mesh_diffuse: self.affects_lightmapped_mesh_diffuse,
asset_id: self.asset_id.clone(),
}
}
}
/// Adds a diffuse or specular texture view to the `texture_views` list, and
/// populates `sampler` if this is the first such view.
pub(crate) fn add_cubemap_texture_view<'a>(
texture_views: &mut Vec<&'a <TextureView as Deref>::Target>,
sampler: &mut Option<&'a Sampler>,
image_id: AssetId<Image>,
images: &'a RenderAssets<GpuImage>,
fallback_image: &'a FallbackImage,
) {
match images.get(image_id) {
None => {
// Use the fallback image if the cubemap isn't loaded yet.
texture_views.push(&*fallback_image.cube.texture_view);
}
Some(image) => {
// If this is the first texture view, populate `sampler`.
if sampler.is_none() {
*sampler = Some(&image.sampler);
}
texture_views.push(&*image.texture_view);
}
}
}
/// Many things can go wrong when attempting to use texture binding arrays
/// (a.k.a. bindless textures). This function checks for these pitfalls:
///
/// 1. If GLSL support is enabled at the feature level, then in debug mode
/// `naga_oil` will attempt to compile all shader modules under GLSL to check
/// validity of names, even if GLSL isn't actually used. This will cause a crash
/// if binding arrays are enabled, because binding arrays are currently
/// unimplemented in the GLSL backend of Naga. Therefore, we disable binding
/// arrays if the `shader_format_glsl` feature is present.
///
/// 2. If there aren't enough texture bindings available to accommodate all the
/// binding arrays, the driver will panic. So we also bail out if there aren't
/// enough texture bindings available in the fragment shader.
///
/// 3. If binding arrays aren't supported on the hardware, then we obviously
/// can't use them. Adreno <= 610 claims to support bindless, but seems to be
/// too buggy to be usable.
///
/// 4. If binding arrays are supported on the hardware, but they can only be
/// accessed by uniform indices, that's not good enough, and we bail out.
///
/// If binding arrays aren't usable, we disable reflection probes and limit the
/// number of irradiance volumes in the scene to 1.
pub(crate) fn binding_arrays_are_usable(
render_device: &RenderDevice,
render_adapter: &RenderAdapter,
) -> bool {
let adapter_info = RenderAdapterInfo(WgpuWrapper::new(render_adapter.get_info()));
!cfg!(feature = "shader_format_glsl")
&& crate::render::get_adreno_model(&adapter_info).is_none_or(|model| model > 610)
&& render_device.limits().max_storage_textures_per_shader_stage
>= (STANDARD_MATERIAL_FRAGMENT_SHADER_MIN_TEXTURE_BINDINGS + MAX_VIEW_LIGHT_PROBES)
as u32
&& render_device.features().contains(
WgpuFeatures::TEXTURE_BINDING_ARRAY
| WgpuFeatures::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING,
)
}

View File

@@ -0,0 +1,99 @@
#define_import_path bevy_pbr::lightmap
#import bevy_pbr::mesh_bindings::mesh
#ifdef MULTIPLE_LIGHTMAPS_IN_ARRAY
@group(2) @binding(4) var lightmaps_textures: binding_array<texture_2d<f32>, 4>;
@group(2) @binding(5) var lightmaps_samplers: binding_array<sampler, 4>;
#else // MULTIPLE_LIGHTMAPS_IN_ARRAY
@group(2) @binding(4) var lightmaps_texture: texture_2d<f32>;
@group(2) @binding(5) var lightmaps_sampler: sampler;
#endif // MULTIPLE_LIGHTMAPS_IN_ARRAY
// Samples the lightmap, if any, and returns indirect illumination from it.
fn lightmap(uv: vec2<f32>, exposure: f32, instance_index: u32) -> vec3<f32> {
let packed_uv_rect = mesh[instance_index].lightmap_uv_rect;
let uv_rect = vec4<f32>(
unpack2x16unorm(packed_uv_rect.x),
unpack2x16unorm(packed_uv_rect.y),
);
let lightmap_uv = mix(uv_rect.xy, uv_rect.zw, uv);
let lightmap_slot = mesh[instance_index].material_and_lightmap_bind_group_slot >> 16u;
// Bicubic 4-tap
// https://developer.nvidia.com/gpugems/gpugems2/part-iii-high-quality-rendering/chapter-20-fast-third-order-texture-filtering
// https://advances.realtimerendering.com/s2021/jpatry_advances2021/index.html#/111/0/2
#ifdef LIGHTMAP_BICUBIC_SAMPLING
let texture_size = vec2<f32>(lightmap_size(lightmap_slot));
let texel_size = 1.0 / texture_size;
let puv = lightmap_uv * texture_size + 0.5;
let iuv = floor(puv);
let fuv = fract(puv);
let g0x = g0(fuv.x);
let g1x = g1(fuv.x);
let h0x = h0_approx(fuv.x);
let h1x = h1_approx(fuv.x);
let h0y = h0_approx(fuv.y);
let h1y = h1_approx(fuv.y);
let p0 = (vec2(iuv.x + h0x, iuv.y + h0y) - 0.5) * texel_size;
let p1 = (vec2(iuv.x + h1x, iuv.y + h0y) - 0.5) * texel_size;
let p2 = (vec2(iuv.x + h0x, iuv.y + h1y) - 0.5) * texel_size;
let p3 = (vec2(iuv.x + h1x, iuv.y + h1y) - 0.5) * texel_size;
let color = g0(fuv.y) * (g0x * sample(p0, lightmap_slot) + g1x * sample(p1, lightmap_slot)) + g1(fuv.y) * (g0x * sample(p2, lightmap_slot) + g1x * sample(p3, lightmap_slot));
#else
let color = sample(lightmap_uv, lightmap_slot);
#endif
return color * exposure;
}
fn lightmap_size(lightmap_slot: u32) -> vec2<u32> {
#ifdef MULTIPLE_LIGHTMAPS_IN_ARRAY
return textureDimensions(lightmaps_textures[lightmap_slot]);
#else
return textureDimensions(lightmaps_texture);
#endif
}
fn sample(uv: vec2<f32>, lightmap_slot: u32) -> vec3<f32> {
// Mipmapping lightmaps is usually a bad idea due to leaking across UV
// islands, so there's no harm in using mip level 0 and it lets us avoid
// control flow uniformity problems.
#ifdef MULTIPLE_LIGHTMAPS_IN_ARRAY
return textureSampleLevel(lightmaps_textures[lightmap_slot], lightmaps_samplers[lightmap_slot], uv, 0.0).rgb;
#else
return textureSampleLevel(lightmaps_texture, lightmaps_sampler, uv, 0.0).rgb;
#endif
}
fn w0(a: f32) -> f32 {
return (1.0 / 6.0) * (a * (a * (-a + 3.0) - 3.0) + 1.0);
}
fn w1(a: f32) -> f32 {
return (1.0 / 6.0) * (a * a * (3.0 * a - 6.0) + 4.0);
}
fn w2(a: f32) -> f32 {
return (1.0 / 6.0) * (a * (a * (-3.0 * a + 3.0) + 3.0) + 1.0);
}
fn w3(a: f32) -> f32 {
return (1.0 / 6.0) * (a * a * a);
}
fn g0(a: f32) -> f32 {
return w0(a) + w1(a);
}
fn g1(a: f32) -> f32 {
return w2(a) + w3(a);
}
fn h0_approx(a: f32) -> f32 {
return -0.2 - a * (0.24 * a - 0.44);
}
fn h1_approx(a: f32) -> f32 {
return 1.0 + a * (0.24 * a - 0.04);
}

View File

@@ -0,0 +1,519 @@
//! Lightmaps, baked lighting textures that can be applied at runtime to provide
//! diffuse global illumination.
//!
//! Bevy doesn't currently have any way to actually bake lightmaps, but they can
//! be baked in an external tool like [Blender](http://blender.org), for example
//! with an addon like [The Lightmapper]. The tools in the [`bevy-baked-gi`]
//! project support other lightmap baking methods.
//!
//! When a [`Lightmap`] component is added to an entity with a [`Mesh3d`] and a
//! [`MeshMaterial3d<StandardMaterial>`], Bevy applies the lightmap when rendering. The brightness
//! of the lightmap may be controlled with the `lightmap_exposure` field on
//! [`StandardMaterial`].
//!
//! During the rendering extraction phase, we extract all lightmaps into the
//! [`RenderLightmaps`] table, which lives in the render world. Mesh bindgroup
//! and mesh uniform creation consults this table to determine which lightmap to
//! supply to the shader. Essentially, the lightmap is a special type of texture
//! that is part of the mesh instance rather than part of the material (because
//! multiple meshes can share the same material, whereas sharing lightmaps is
//! nonsensical).
//!
//! Note that multiple meshes can't be drawn in a single drawcall if they use
//! different lightmap textures, unless bindless textures are in use. If you
//! want to instance a lightmapped mesh, and your platform doesn't support
//! bindless textures, combine the lightmap textures into a single atlas, and
//! set the `uv_rect` field on [`Lightmap`] appropriately.
//!
//! [The Lightmapper]: https://github.com/Naxela/The_Lightmapper
//! [`Mesh3d`]: bevy_mesh::Mesh3d
//! [`MeshMaterial3d<StandardMaterial>`]: crate::StandardMaterial
//! [`StandardMaterial`]: crate::StandardMaterial
//! [`bevy-baked-gi`]: https://github.com/pcwalton/bevy-baked-gi
use bevy_app::{App, Plugin};
use bevy_asset::{AssetId, Handle};
use bevy_camera::visibility::ViewVisibility;
use bevy_derive::{Deref, DerefMut};
use bevy_ecs::{
component::Component,
entity::Entity,
lifecycle::RemovedComponents,
query::{Changed, Or},
reflect::ReflectComponent,
resource::Resource,
schedule::IntoScheduleConfigs,
system::{Commands, Query, Res, ResMut},
};
use bevy_image::Image;
use bevy_math::{uvec2, vec4, Rect, UVec2};
use bevy_platform::collections::HashSet;
use bevy_reflect::{std_traits::ReflectDefault, Reflect};
use crate::render::{
render_asset::RenderAssets,
render_resource::{Sampler, TextureView, WgpuSampler, WgpuTextureView},
renderer::RenderAdapter,
sync_world::MainEntity,
texture::{FallbackImage, GpuImage},
Extract, ExtractSchedule, RenderApp, RenderStartup,
};
use crate::render::{renderer::RenderDevice, sync_world::MainEntityHashMap};
use bevy_shader::load_shader_library;
use bevy_utils::default;
use fixedbitset::FixedBitSet;
use nonmax::{NonMaxU16, NonMaxU32};
use tracing::error;
use crate::render::pbr::{binding_arrays_are_usable, MeshExtractionSystems};
/// The number of lightmaps that we store in a single slab, if bindless textures
/// are in use.
///
/// If bindless textures aren't in use, then only a single lightmap can be bound
/// at a time.
pub const LIGHTMAPS_PER_SLAB: usize = 4;
/// A plugin that provides an implementation of lightmaps.
pub struct LightmapPlugin;
/// A component that applies baked indirect diffuse global illumination from a
/// lightmap.
///
/// When assigned to an entity that contains a [`Mesh3d`](bevy_mesh::Mesh3d) and a
/// [`MeshMaterial3d<StandardMaterial>`](crate::StandardMaterial), if the mesh
/// has a second UV layer ([`ATTRIBUTE_UV_1`](bevy_mesh::Mesh::ATTRIBUTE_UV_1)),
/// then the lightmap will render using those UVs.
#[derive(Component, Clone, Reflect)]
#[reflect(Component, Default, Clone)]
pub struct Lightmap {
/// The lightmap texture.
pub image: Handle<Image>,
/// The rectangle within the lightmap texture that the UVs are relative to.
///
/// The top left coordinate is the `min` part of the rect, and the bottom
/// right coordinate is the `max` part of the rect. The rect ranges from (0,
/// 0) to (1, 1).
///
/// This field allows lightmaps for a variety of meshes to be packed into a
/// single atlas.
pub uv_rect: Rect,
/// Whether bicubic sampling should be used for sampling this lightmap.
///
/// Bicubic sampling is higher quality, but slower, and may lead to light leaks.
///
/// If true, the lightmap texture's sampler must be set to [`bevy_image::ImageSampler::linear`].
pub bicubic_sampling: bool,
}
/// Lightmap data stored in the render world.
///
/// There is one of these per visible lightmapped mesh instance.
#[derive(Debug)]
pub(crate) struct RenderLightmap {
/// The rectangle within the lightmap texture that the UVs are relative to.
///
/// The top left coordinate is the `min` part of the rect, and the bottom
/// right coordinate is the `max` part of the rect. The rect ranges from (0,
/// 0) to (1, 1).
pub(crate) uv_rect: Rect,
/// The index of the slab (i.e. binding array) in which the lightmap is
/// located.
pub(crate) slab_index: LightmapSlabIndex,
/// The index of the slot (i.e. element within the binding array) in which
/// the lightmap is located.
///
/// If bindless lightmaps aren't in use, this will be 0.
pub(crate) slot_index: LightmapSlotIndex,
// Whether or not bicubic sampling should be used for this lightmap.
pub(crate) bicubic_sampling: bool,
}
/// Stores data for all lightmaps in the render world.
///
/// This is cleared and repopulated each frame during the `extract_lightmaps`
/// system.
#[derive(Resource)]
pub struct RenderLightmaps {
/// The mapping from every lightmapped entity to its lightmap info.
///
/// Entities without lightmaps, or for which the mesh or lightmap isn't
/// loaded, won't have entries in this table.
pub(crate) render_lightmaps: MainEntityHashMap<RenderLightmap>,
/// The slabs (binding arrays) containing the lightmaps.
pub(crate) slabs: Vec<LightmapSlab>,
free_slabs: FixedBitSet,
pending_lightmaps: HashSet<(LightmapSlabIndex, LightmapSlotIndex)>,
/// Whether bindless textures are supported on this platform.
pub(crate) bindless_supported: bool,
}
/// A binding array that contains lightmaps.
///
/// This will have a single binding if bindless lightmaps aren't in use.
pub struct LightmapSlab {
/// The GPU images in this slab.
lightmaps: Vec<AllocatedLightmap>,
free_slots_bitmask: u32,
}
struct AllocatedLightmap {
gpu_image: GpuImage,
// This will only be present if the lightmap is allocated but not loaded.
asset_id: Option<AssetId<Image>>,
}
/// The index of the slab (binding array) in which a lightmap is located.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Deref, DerefMut)]
#[repr(transparent)]
pub struct LightmapSlabIndex(pub(crate) NonMaxU32);
/// The index of the slot (element within the binding array) in the slab in
/// which a lightmap is located.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Deref, DerefMut)]
#[repr(transparent)]
pub struct LightmapSlotIndex(pub(crate) NonMaxU16);
impl Plugin for LightmapPlugin {
fn build(&self, app: &mut App) {
load_shader_library!(app, "lightmap.wgsl");
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
return;
};
render_app
.add_systems(RenderStartup, init_render_lightmaps)
.add_systems(
ExtractSchedule,
extract_lightmaps.after(MeshExtractionSystems),
);
}
}
/// Extracts all lightmaps from the scene and populates the [`RenderLightmaps`]
/// resource.
fn extract_lightmaps(
render_lightmaps: ResMut<RenderLightmaps>,
changed_lightmaps_query: Extract<
Query<
(Entity, &ViewVisibility, &Lightmap),
Or<(Changed<ViewVisibility>, Changed<Lightmap>)>,
>,
>,
mut removed_lightmaps_query: Extract<RemovedComponents<Lightmap>>,
images: Res<RenderAssets<GpuImage>>,
fallback_images: Res<FallbackImage>,
) {
let render_lightmaps = render_lightmaps.into_inner();
// Loop over each entity.
for (entity, view_visibility, lightmap) in changed_lightmaps_query.iter() {
if render_lightmaps
.render_lightmaps
.contains_key(&MainEntity::from(entity))
{
continue;
}
// Only process visible entities.
if !view_visibility.get() {
continue;
}
let (slab_index, slot_index) =
render_lightmaps.allocate(&fallback_images, lightmap.image.id());
render_lightmaps.render_lightmaps.insert(
entity.into(),
RenderLightmap::new(
lightmap.uv_rect,
slab_index,
slot_index,
lightmap.bicubic_sampling,
),
);
render_lightmaps
.pending_lightmaps
.insert((slab_index, slot_index));
}
for entity in removed_lightmaps_query.read() {
if changed_lightmaps_query.contains(entity) {
continue;
}
let Some(RenderLightmap {
slab_index,
slot_index,
..
}) = render_lightmaps
.render_lightmaps
.remove(&MainEntity::from(entity))
else {
continue;
};
render_lightmaps.remove(&fallback_images, slab_index, slot_index);
render_lightmaps
.pending_lightmaps
.remove(&(slab_index, slot_index));
}
render_lightmaps
.pending_lightmaps
.retain(|&(slab_index, slot_index)| {
let Some(asset_id) = render_lightmaps.slabs[usize::from(slab_index)].lightmaps
[usize::from(slot_index)]
.asset_id
else {
error!(
"Allocated lightmap should have been removed from `pending_lightmaps` by now"
);
return false;
};
let Some(gpu_image) = images.get(asset_id) else {
return true;
};
render_lightmaps.slabs[usize::from(slab_index)].insert(slot_index, gpu_image.clone());
false
});
}
impl RenderLightmap {
/// Creates a new lightmap from a texture, a UV rect, and a slab and slot
/// index pair.
fn new(
uv_rect: Rect,
slab_index: LightmapSlabIndex,
slot_index: LightmapSlotIndex,
bicubic_sampling: bool,
) -> Self {
Self {
uv_rect,
slab_index,
slot_index,
bicubic_sampling,
}
}
}
/// Packs the lightmap UV rect into 64 bits (4 16-bit unsigned integers).
pub(crate) fn pack_lightmap_uv_rect(maybe_rect: Option<Rect>) -> UVec2 {
match maybe_rect {
Some(rect) => {
let rect_uvec4 = (vec4(rect.min.x, rect.min.y, rect.max.x, rect.max.y) * 65535.0)
.round()
.as_uvec4();
uvec2(
rect_uvec4.x | (rect_uvec4.y << 16),
rect_uvec4.z | (rect_uvec4.w << 16),
)
}
None => UVec2::ZERO,
}
}
impl Default for Lightmap {
fn default() -> Self {
Self {
image: Default::default(),
uv_rect: Rect::new(0.0, 0.0, 1.0, 1.0),
bicubic_sampling: false,
}
}
}
pub fn init_render_lightmaps(
mut commands: Commands,
render_device: Res<RenderDevice>,
render_adapter: Res<RenderAdapter>,
) {
let bindless_supported = binding_arrays_are_usable(&render_device, &render_adapter);
commands.insert_resource(RenderLightmaps {
render_lightmaps: default(),
slabs: vec![],
free_slabs: FixedBitSet::new(),
pending_lightmaps: default(),
bindless_supported,
});
}
impl RenderLightmaps {
/// Creates a new slab, appends it to the end of the list, and returns its
/// slab index.
fn create_slab(&mut self, fallback_images: &FallbackImage) -> LightmapSlabIndex {
let slab_index = LightmapSlabIndex::from(self.slabs.len());
self.free_slabs.grow_and_insert(slab_index.into());
self.slabs
.push(LightmapSlab::new(fallback_images, self.bindless_supported));
slab_index
}
fn allocate(
&mut self,
fallback_images: &FallbackImage,
image_id: AssetId<Image>,
) -> (LightmapSlabIndex, LightmapSlotIndex) {
let slab_index = match self.free_slabs.minimum() {
None => self.create_slab(fallback_images),
Some(slab_index) => slab_index.into(),
};
let slab = &mut self.slabs[usize::from(slab_index)];
let slot_index = slab.allocate(image_id);
if slab.is_full() {
self.free_slabs.remove(slab_index.into());
}
(slab_index, slot_index)
}
fn remove(
&mut self,
fallback_images: &FallbackImage,
slab_index: LightmapSlabIndex,
slot_index: LightmapSlotIndex,
) {
let slab = &mut self.slabs[usize::from(slab_index)];
slab.remove(fallback_images, slot_index);
if !slab.is_full() {
self.free_slabs.grow_and_insert(slot_index.into());
}
}
}
impl LightmapSlab {
fn new(fallback_images: &FallbackImage, bindless_supported: bool) -> LightmapSlab {
let count = if bindless_supported {
LIGHTMAPS_PER_SLAB
} else {
1
};
LightmapSlab {
lightmaps: (0..count)
.map(|_| AllocatedLightmap {
gpu_image: fallback_images.d2.clone(),
asset_id: None,
})
.collect(),
free_slots_bitmask: (1 << count) - 1,
}
}
fn is_full(&self) -> bool {
self.free_slots_bitmask == 0
}
fn allocate(&mut self, image_id: AssetId<Image>) -> LightmapSlotIndex {
let index = LightmapSlotIndex::from(self.free_slots_bitmask.trailing_zeros());
self.free_slots_bitmask &= !(1 << u32::from(index));
self.lightmaps[usize::from(index)].asset_id = Some(image_id);
index
}
fn insert(&mut self, index: LightmapSlotIndex, gpu_image: GpuImage) {
self.lightmaps[usize::from(index)] = AllocatedLightmap {
gpu_image,
asset_id: None,
}
}
fn remove(&mut self, fallback_images: &FallbackImage, index: LightmapSlotIndex) {
self.lightmaps[usize::from(index)] = AllocatedLightmap {
gpu_image: fallback_images.d2.clone(),
asset_id: None,
};
self.free_slots_bitmask |= 1 << u32::from(index);
}
/// Returns the texture views and samplers for the lightmaps in this slab,
/// ready to be placed into a bind group.
///
/// This is used when constructing bind groups in bindless mode. Before
/// returning, this function pads out the arrays with fallback images in
/// order to fulfill requirements of platforms that require full binding
/// arrays (e.g. DX12).
pub(crate) fn build_binding_arrays(&self) -> (Vec<&WgpuTextureView>, Vec<&WgpuSampler>) {
(
self.lightmaps
.iter()
.map(|allocated_lightmap| &*allocated_lightmap.gpu_image.texture_view)
.collect(),
self.lightmaps
.iter()
.map(|allocated_lightmap| &*allocated_lightmap.gpu_image.sampler)
.collect(),
)
}
/// Returns the texture view and sampler corresponding to the first
/// lightmap, which must exist.
///
/// This is used when constructing bind groups in non-bindless mode.
pub(crate) fn bindings_for_first_lightmap(&self) -> (&TextureView, &Sampler) {
(
&self.lightmaps[0].gpu_image.texture_view,
&self.lightmaps[0].gpu_image.sampler,
)
}
}
impl From<u32> for LightmapSlabIndex {
fn from(value: u32) -> Self {
Self(NonMaxU32::new(value).unwrap())
}
}
impl From<usize> for LightmapSlabIndex {
fn from(value: usize) -> Self {
Self::from(value as u32)
}
}
impl From<u32> for LightmapSlotIndex {
fn from(value: u32) -> Self {
Self(NonMaxU16::new(value as u16).unwrap())
}
}
impl From<usize> for LightmapSlotIndex {
fn from(value: usize) -> Self {
Self::from(value as u32)
}
}
impl From<LightmapSlabIndex> for usize {
fn from(value: LightmapSlabIndex) -> Self {
value.0.get() as usize
}
}
impl From<LightmapSlotIndex> for usize {
fn from(value: LightmapSlotIndex) -> Self {
value.0.get() as usize
}
}
impl From<LightmapSlotIndex> for u16 {
fn from(value: LightmapSlotIndex) -> Self {
value.0.get()
}
}
impl From<LightmapSlotIndex> for u32 {
fn from(value: LightmapSlotIndex) -> Self {
value.0.get() as u32
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,75 @@
use crate::render::pbr::Material;
use bevy_asset::{AsAssetId, AssetId, Handle};
use bevy_derive::{Deref, DerefMut};
use bevy_ecs::{component::Component, reflect::ReflectComponent};
use bevy_reflect::{std_traits::ReflectDefault, Reflect};
use derive_more::derive::From;
/// A [material](Material) used for rendering a [`Mesh3d`].
///
/// See [`Material`] for general information about 3D materials and how to implement your own materials.
///
/// [`Mesh3d`]: bevy_mesh::Mesh3d
///
/// # Example
///
/// ```
/// # use bevy_pbr::{Material, MeshMaterial3d, StandardMaterial};
/// # use bevy_ecs::prelude::*;
/// # use bevy_mesh::{Mesh, Mesh3d};
/// # use bevy_color::palettes::basic::RED;
/// # use bevy_asset::Assets;
/// # use bevy_math::primitives::Capsule3d;
/// #
/// // Spawn an entity with a mesh using `StandardMaterial`.
/// fn setup(
/// mut commands: Commands,
/// mut meshes: ResMut<Assets<Mesh>>,
/// mut materials: ResMut<Assets<StandardMaterial>>,
/// ) {
/// commands.spawn((
/// Mesh3d(meshes.add(Capsule3d::default())),
/// MeshMaterial3d(materials.add(StandardMaterial {
/// base_color: RED.into(),
/// ..Default::default()
/// })),
/// ));
/// }
/// ```
#[derive(Component, Clone, Debug, Deref, DerefMut, Reflect, From)]
#[reflect(Component, Default, Clone, PartialEq)]
pub struct MeshMaterial3d<M: Material>(pub Handle<M>);
impl<M: Material> Default for MeshMaterial3d<M> {
fn default() -> Self {
Self(Handle::default())
}
}
impl<M: Material> PartialEq for MeshMaterial3d<M> {
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
}
}
impl<M: Material> Eq for MeshMaterial3d<M> {}
impl<M: Material> From<MeshMaterial3d<M>> for AssetId<M> {
fn from(material: MeshMaterial3d<M>) -> Self {
material.id()
}
}
impl<M: Material> From<&MeshMaterial3d<M>> for AssetId<M> {
fn from(material: &MeshMaterial3d<M>) -> Self {
material.id()
}
}
impl<M: Material> AsAssetId for MeshMaterial3d<M> {
type Asset = M;
fn as_asset_id(&self) -> AssetId<Self::Asset> {
self.id()
}
}

View File

@@ -0,0 +1,319 @@
use std::sync::Arc;
use bevy_asset::{
io::{Reader, Writer},
saver::{AssetSaver, SavedAsset},
Asset, AssetLoader, AsyncReadExt, AsyncWriteExt, LoadContext,
};
use bevy_math::{Vec2, Vec3};
use bevy_reflect::TypePath;
use crate::render::render_resource::ShaderType;
use bevy_tasks::block_on;
use bytemuck::{Pod, Zeroable};
use lz4_flex::frame::{FrameDecoder, FrameEncoder};
use std::io::{Read, Write};
use thiserror::Error;
/// Unique identifier for the [`MeshletMesh`] asset format.
const MESHLET_MESH_ASSET_MAGIC: u64 = 1717551717668;
/// The current version of the [`MeshletMesh`] asset format.
pub const MESHLET_MESH_ASSET_VERSION: u64 = 2;
/// A mesh that has been pre-processed into multiple small clusters of triangles called meshlets.
///
/// A [`bevy_mesh::Mesh`] can be converted to a [`MeshletMesh`] using `MeshletMesh::from_mesh` when the `meshlet_processor` cargo feature is enabled.
/// The conversion step is very slow, and is meant to be ran once ahead of time, and not during runtime. This type of mesh is not suitable for
/// dynamically generated geometry.
///
/// There are restrictions on the [`crate::Material`] functionality that can be used with this type of mesh.
/// * Materials have no control over the vertex shader or vertex attributes.
/// * Materials must be opaque. Transparent, alpha masked, and transmissive materials are not supported.
/// * Do not use normal maps baked from higher-poly geometry. Use the high-poly geometry directly and skip the normal map.
/// * If additional detail is needed, a smaller tiling normal map not baked from a mesh is ok.
/// * Material shaders must not use builtin functions that automatically calculate derivatives <https://gpuweb.github.io/gpuweb/wgsl/#derivatives>.
/// * Performing manual arithmetic on texture coordinates (UVs) is forbidden. Use the chain-rule version of arithmetic functions instead (TODO: not yet implemented).
/// * Limited control over [`bevy_render::render_resource::RenderPipelineDescriptor`] attributes.
/// * Materials must use the [`crate::Material::meshlet_mesh_fragment_shader`] method (and similar variants for prepass/deferred shaders)
/// which requires certain shader patterns that differ from the regular material shaders.
///
/// See also [`super::MeshletMesh3d`] and [`super::MeshletPlugin`].
#[derive(Asset, TypePath, Clone)]
pub struct MeshletMesh {
/// Quantized and bitstream-packed vertex positions for meshlet vertices.
pub(crate) vertex_positions: Arc<[u32]>,
/// Octahedral-encoded and 2x16snorm packed normals for meshlet vertices.
pub(crate) vertex_normals: Arc<[u32]>,
/// Uncompressed vertex texture coordinates for meshlet vertices.
pub(crate) vertex_uvs: Arc<[Vec2]>,
/// Triangle indices for meshlets.
pub(crate) indices: Arc<[u8]>,
/// The BVH8 used for culling and LOD selection of the meshlets. The root is at index 0.
pub(crate) bvh: Arc<[BvhNode]>,
/// The list of meshlets making up this mesh.
pub(crate) meshlets: Arc<[Meshlet]>,
/// Spherical bounding volumes.
pub(crate) meshlet_cull_data: Arc<[MeshletCullData]>,
/// The tight AABB of the meshlet mesh, used for frustum and occlusion culling at the instance
/// level.
pub(crate) aabb: MeshletAabb,
/// The depth of the culling BVH, used to determine the number of dispatches at runtime.
pub(crate) bvh_depth: u32,
}
/// A single BVH8 node in the BVH used for culling and LOD selection of a [`MeshletMesh`].
#[derive(Copy, Clone, Default, Pod, Zeroable)]
#[repr(C)]
pub struct BvhNode {
/// The tight AABBs of this node's children, used for frustum and occlusion during BVH
/// traversal.
pub aabbs: [MeshletAabbErrorOffset; 8],
/// The LOD bounding spheres of this node's children, used for LOD selection during BVH
/// traversal.
pub lod_bounds: [MeshletBoundingSphere; 8],
/// If `u8::MAX`, it indicates that the child of each children is a BVH node, otherwise it is the number of meshlets in the group.
pub child_counts: [u8; 8],
pub _padding: [u32; 2],
}
/// A single meshlet within a [`MeshletMesh`].
#[derive(Copy, Clone, Pod, Zeroable)]
#[repr(C)]
pub struct Meshlet {
/// The bit offset within the parent mesh's [`MeshletMesh::vertex_positions`] buffer where the vertex positions for this meshlet begin.
pub start_vertex_position_bit: u32,
/// The offset within the parent mesh's [`MeshletMesh::vertex_normals`] and [`MeshletMesh::vertex_uvs`] buffers
/// where non-position vertex attributes for this meshlet begin.
pub start_vertex_attribute_id: u32,
/// The offset within the parent mesh's [`MeshletMesh::indices`] buffer where the indices for this meshlet begin.
pub start_index_id: u32,
/// The amount of vertices in this meshlet.
pub vertex_count: u8,
/// The amount of triangles in this meshlet.
pub triangle_count: u8,
/// Unused.
pub padding: u16,
/// Number of bits used to store the X channel of vertex positions within this meshlet.
pub bits_per_vertex_position_channel_x: u8,
/// Number of bits used to store the Y channel of vertex positions within this meshlet.
pub bits_per_vertex_position_channel_y: u8,
/// Number of bits used to store the Z channel of vertex positions within this meshlet.
pub bits_per_vertex_position_channel_z: u8,
/// Power of 2 factor used to quantize vertex positions within this meshlet.
pub vertex_position_quantization_factor: u8,
/// Minimum quantized X channel value of vertex positions within this meshlet.
pub min_vertex_position_channel_x: f32,
/// Minimum quantized Y channel value of vertex positions within this meshlet.
pub min_vertex_position_channel_y: f32,
/// Minimum quantized Z channel value of vertex positions within this meshlet.
pub min_vertex_position_channel_z: f32,
}
/// Bounding spheres used for culling and choosing level of detail for a [`Meshlet`].
#[derive(Copy, Clone, Pod, Zeroable)]
#[repr(C)]
pub struct MeshletCullData {
/// Tight bounding box, used for frustum and occlusion culling for this meshlet.
pub aabb: MeshletAabbErrorOffset,
/// Bounding sphere used for determining if this meshlet's group is at the correct level of detail for a given view.
pub lod_group_sphere: MeshletBoundingSphere,
}
/// An axis-aligned bounding box used for a [`Meshlet`].
#[derive(Copy, Clone, Default, Pod, Zeroable, ShaderType)]
#[repr(C)]
pub struct MeshletAabb {
pub center: Vec3,
pub half_extent: Vec3,
}
// An axis-aligned bounding box used for a [`Meshlet`].
#[derive(Copy, Clone, Default, Pod, Zeroable, ShaderType)]
#[repr(C)]
pub struct MeshletAabbErrorOffset {
pub center: Vec3,
pub error: f32,
pub half_extent: Vec3,
pub child_offset: u32,
}
/// A spherical bounding volume used for a [`Meshlet`].
#[derive(Copy, Clone, Default, Pod, Zeroable)]
#[repr(C)]
pub struct MeshletBoundingSphere {
pub center: Vec3,
pub radius: f32,
}
/// An [`AssetSaver`] for `.meshlet_mesh` [`MeshletMesh`] assets.
pub struct MeshletMeshSaver;
impl AssetSaver for MeshletMeshSaver {
type Asset = MeshletMesh;
type Settings = ();
type OutputLoader = MeshletMeshLoader;
type Error = MeshletMeshSaveOrLoadError;
async fn save(
&self,
writer: &mut Writer,
asset: SavedAsset<'_, MeshletMesh>,
_settings: &(),
) -> Result<(), MeshletMeshSaveOrLoadError> {
// Write asset magic number
writer
.write_all(&MESHLET_MESH_ASSET_MAGIC.to_le_bytes())
.await?;
// Write asset version
writer
.write_all(&MESHLET_MESH_ASSET_VERSION.to_le_bytes())
.await?;
writer.write_all(bytemuck::bytes_of(&asset.aabb)).await?;
writer
.write_all(bytemuck::bytes_of(&asset.bvh_depth))
.await?;
// Compress and write asset data
let mut writer = FrameEncoder::new(AsyncWriteSyncAdapter(writer));
write_slice(&asset.vertex_positions, &mut writer)?;
write_slice(&asset.vertex_normals, &mut writer)?;
write_slice(&asset.vertex_uvs, &mut writer)?;
write_slice(&asset.indices, &mut writer)?;
write_slice(&asset.bvh, &mut writer)?;
write_slice(&asset.meshlets, &mut writer)?;
write_slice(&asset.meshlet_cull_data, &mut writer)?;
// BUG: Flushing helps with an async_fs bug, but it still fails sometimes. https://github.com/smol-rs/async-fs/issues/45
// ERROR bevy_asset::server: Failed to load asset with asset loader MeshletMeshLoader: failed to fill whole buffer
writer.flush()?;
writer.finish()?;
Ok(())
}
}
/// An [`AssetLoader`] for `.meshlet_mesh` [`MeshletMesh`] assets.
pub struct MeshletMeshLoader;
impl AssetLoader for MeshletMeshLoader {
type Asset = MeshletMesh;
type Settings = ();
type Error = MeshletMeshSaveOrLoadError;
async fn load(
&self,
reader: &mut dyn Reader,
_settings: &(),
_load_context: &mut LoadContext<'_>,
) -> Result<MeshletMesh, MeshletMeshSaveOrLoadError> {
// Load and check magic number
let magic = async_read_u64(reader).await?;
if magic != MESHLET_MESH_ASSET_MAGIC {
return Err(MeshletMeshSaveOrLoadError::WrongFileType);
}
// Load and check asset version
let version = async_read_u64(reader).await?;
if version != MESHLET_MESH_ASSET_VERSION {
return Err(MeshletMeshSaveOrLoadError::WrongVersion { found: version });
}
let mut bytes = [0u8; size_of::<MeshletAabb>()];
reader.read_exact(&mut bytes).await?;
let aabb = bytemuck::cast(bytes);
let mut bytes = [0u8; size_of::<u32>()];
reader.read_exact(&mut bytes).await?;
let bvh_depth = u32::from_le_bytes(bytes);
// Load and decompress asset data
let reader = &mut FrameDecoder::new(AsyncReadSyncAdapter(reader));
let vertex_positions = read_slice(reader)?;
let vertex_normals = read_slice(reader)?;
let vertex_uvs = read_slice(reader)?;
let indices = read_slice(reader)?;
let bvh = read_slice(reader)?;
let meshlets = read_slice(reader)?;
let meshlet_cull_data = read_slice(reader)?;
Ok(MeshletMesh {
vertex_positions,
vertex_normals,
vertex_uvs,
indices,
bvh,
meshlets,
meshlet_cull_data,
aabb,
bvh_depth,
})
}
fn extensions(&self) -> &[&str] {
&["meshlet_mesh"]
}
}
#[derive(Error, Debug)]
pub enum MeshletMeshSaveOrLoadError {
#[error("file was not a MeshletMesh asset")]
WrongFileType,
#[error("expected asset version {MESHLET_MESH_ASSET_VERSION} but found version {found}")]
WrongVersion { found: u64 },
#[error("failed to compress or decompress asset data")]
CompressionOrDecompression(#[from] lz4_flex::frame::Error),
#[error(transparent)]
Io(#[from] std::io::Error),
}
async fn async_read_u64(reader: &mut dyn Reader) -> Result<u64, std::io::Error> {
let mut bytes = [0u8; 8];
reader.read_exact(&mut bytes).await?;
Ok(u64::from_le_bytes(bytes))
}
fn read_u64(reader: &mut dyn Read) -> Result<u64, std::io::Error> {
let mut bytes = [0u8; 8];
reader.read_exact(&mut bytes)?;
Ok(u64::from_le_bytes(bytes))
}
fn write_slice<T: Pod>(
field: &[T],
writer: &mut dyn Write,
) -> Result<(), MeshletMeshSaveOrLoadError> {
writer.write_all(&(field.len() as u64).to_le_bytes())?;
writer.write_all(bytemuck::cast_slice(field))?;
Ok(())
}
fn read_slice<T: Pod>(reader: &mut dyn Read) -> Result<Arc<[T]>, std::io::Error> {
let len = read_u64(reader)? as usize;
let mut data: Arc<[T]> = core::iter::repeat_with(T::zeroed).take(len).collect();
let slice = Arc::get_mut(&mut data).unwrap();
reader.read_exact(bytemuck::cast_slice_mut(slice))?;
Ok(data)
}
// TODO: Use async for everything and get rid of this adapter
struct AsyncWriteSyncAdapter<'a>(&'a mut Writer);
impl Write for AsyncWriteSyncAdapter<'_> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
block_on(self.0.write(buf))
}
fn flush(&mut self) -> std::io::Result<()> {
block_on(self.0.flush())
}
}
// TODO: Use async for everything and get rid of this adapter
struct AsyncReadSyncAdapter<'a>(&'a mut dyn Reader);
impl Read for AsyncReadSyncAdapter<'_> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
block_on(self.0.read(buf))
}
}

View File

@@ -0,0 +1,18 @@
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
@group(0) @binding(0) var meshlet_visibility_buffer: texture_storage_2d<r64uint, write>;
#else
@group(0) @binding(0) var meshlet_visibility_buffer: texture_storage_2d<r32uint, write>;
#endif
var<push_constant> view_size: vec2<u32>;
@compute
@workgroup_size(16, 16, 1)
fn clear_visibility_buffer(@builtin(global_invocation_id) global_id: vec3<u32>) {
if any(global_id.xy >= view_size) { return; }
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
textureStore(meshlet_visibility_buffer, global_id.xy, vec4(0lu));
#else
textureStore(meshlet_visibility_buffer, global_id.xy, vec4(0u));
#endif
}

View File

@@ -0,0 +1,110 @@
#import bevy_pbr::meshlet_bindings::{
InstancedOffset,
get_aabb,
get_aabb_error,
get_aabb_child_offset,
constants,
meshlet_bvh_nodes,
meshlet_bvh_cull_count_read,
meshlet_bvh_cull_count_write,
meshlet_bvh_cull_dispatch,
meshlet_bvh_cull_queue,
meshlet_meshlet_cull_count_early,
meshlet_meshlet_cull_count_late,
meshlet_meshlet_cull_dispatch_early,
meshlet_meshlet_cull_dispatch_late,
meshlet_meshlet_cull_queue,
meshlet_second_pass_bvh_count,
meshlet_second_pass_bvh_dispatch,
meshlet_second_pass_bvh_queue,
}
#import bevy_pbr::meshlet_cull_shared::{
lod_error_is_imperceptible,
aabb_in_frustum,
should_occlusion_cull_aabb,
}
@compute
@workgroup_size(128, 1, 1) // 8 threads per node, 16 nodes per workgroup
fn cull_bvh(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
// Calculate the queue ID for this thread
let dispatch_id = global_invocation_id.x;
var node = dispatch_id >> 3u;
let subnode = dispatch_id & 7u;
if node >= meshlet_bvh_cull_count_read { return; }
node = select(node, constants.rightmost_slot - node, constants.read_from_front == 0u);
let instanced_offset = meshlet_bvh_cull_queue[node];
let instance_id = instanced_offset.instance_id;
let bvh_node = &meshlet_bvh_nodes[instanced_offset.offset];
var aabb_error_offset = (*bvh_node).aabbs[subnode];
let aabb = get_aabb(&aabb_error_offset);
let parent_error = get_aabb_error(&aabb_error_offset);
let lod_sphere = (*bvh_node).lod_bounds[subnode];
let parent_is_imperceptible = lod_error_is_imperceptible(lod_sphere, parent_error, instance_id);
// Error and frustum cull, in both passes
if parent_is_imperceptible || !aabb_in_frustum(aabb, instance_id) { return; }
let child_offset = get_aabb_child_offset(&aabb_error_offset);
let index = subnode >> 2u;
let bit_offset = subnode & 3u;
let packed_child_count = (*bvh_node).child_counts[index];
let child_count = extractBits(packed_child_count, bit_offset * 8u, 8u);
var value = InstancedOffset(instance_id, child_offset);
// If we pass, try occlusion culling
// If this node was occluded, push it's children to the second pass to check against this frame's HZB
if should_occlusion_cull_aabb(aabb, instance_id) {
#ifdef MESHLET_FIRST_CULLING_PASS
if child_count == 255u {
let id = atomicAdd(&meshlet_second_pass_bvh_count, 1u);
meshlet_second_pass_bvh_queue[id] = value;
if ((id & 15u) == 0u) {
atomicAdd(&meshlet_second_pass_bvh_dispatch.x, 1u);
}
} else {
let base = atomicAdd(&meshlet_meshlet_cull_count_late, child_count);
let start = constants.rightmost_slot - base;
for (var i = start; i < start - child_count; i--) {
meshlet_meshlet_cull_queue[i] = value;
value.offset += 1u;
}
let req = (base + child_count + 127u) >> 7u;
atomicMax(&meshlet_meshlet_cull_dispatch_late.x, req);
}
#endif
return;
}
// If we pass, push the children to the next BVH cull
if child_count == 255u {
let id = atomicAdd(&meshlet_bvh_cull_count_write, 1u);
let index = select(constants.rightmost_slot - id, id, constants.read_from_front == 0u);
meshlet_bvh_cull_queue[index] = value;
if ((id & 15u) == 0u) {
atomicAdd(&meshlet_bvh_cull_dispatch.x, 1u);
}
} else {
#ifdef MESHLET_FIRST_CULLING_PASS
let base = atomicAdd(&meshlet_meshlet_cull_count_early, child_count);
let end = base + child_count;
for (var i = base; i < end; i++) {
meshlet_meshlet_cull_queue[i] = value;
value.offset += 1u;
}
let req = (end + 127u) >> 7u;
atomicMax(&meshlet_meshlet_cull_dispatch_early.x, req);
#else
let base = atomicAdd(&meshlet_meshlet_cull_count_late, child_count);
let start = constants.rightmost_slot - base;
for (var i = start; i < start - child_count; i--) {
meshlet_meshlet_cull_queue[i] = value;
value.offset += 1u;
}
let req = (base + child_count + 127u) >> 7u;
atomicMax(&meshlet_meshlet_cull_dispatch_late.x, req);
#endif
}
}

View File

@@ -0,0 +1,93 @@
#import bevy_pbr::meshlet_bindings::{
InstancedOffset,
get_aabb,
get_aabb_error,
constants,
view,
meshlet_instance_uniforms,
meshlet_cull_data,
meshlet_software_raster_indirect_args,
meshlet_hardware_raster_indirect_args,
meshlet_previous_raster_counts,
meshlet_raster_clusters,
meshlet_meshlet_cull_count_read,
meshlet_meshlet_cull_count_write,
meshlet_meshlet_cull_dispatch,
meshlet_meshlet_cull_queue,
}
#import bevy_pbr::meshlet_cull_shared::{
ScreenAabb,
project_aabb,
lod_error_is_imperceptible,
aabb_in_frustum,
should_occlusion_cull_aabb,
}
#import bevy_render::maths::affine3_to_square
@compute
@workgroup_size(128, 1, 1) // 1 cluster per thread
fn cull_clusters(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
if global_invocation_id.x >= meshlet_meshlet_cull_count_read { return; }
#ifdef MESHLET_FIRST_CULLING_PASS
let meshlet_id = global_invocation_id.x;
#else
let meshlet_id = constants.rightmost_slot - global_invocation_id.x;
#endif
let instanced_offset = meshlet_meshlet_cull_queue[meshlet_id];
let instance_id = instanced_offset.instance_id;
let cull_data = &meshlet_cull_data[instanced_offset.offset];
var aabb_error_offset = (*cull_data).aabb;
let aabb = get_aabb(&aabb_error_offset);
let error = get_aabb_error(&aabb_error_offset);
let lod_sphere = (*cull_data).lod_group_sphere;
let is_imperceptible = lod_error_is_imperceptible(lod_sphere, error, instance_id);
// Error and frustum cull, in both passes
if !is_imperceptible || !aabb_in_frustum(aabb, instance_id) { return; }
// If we pass, try occlusion culling
// If this node was occluded, push it's children to the second pass to check against this frame's HZB
if should_occlusion_cull_aabb(aabb, instance_id) {
#ifdef MESHLET_FIRST_CULLING_PASS
let id = atomicAdd(&meshlet_meshlet_cull_count_write, 1u);
let value = InstancedOffset(instance_id, instanced_offset.offset);
meshlet_meshlet_cull_queue[constants.rightmost_slot - id] = value;
if ((id & 127u) == 0) {
atomicAdd(&meshlet_meshlet_cull_dispatch.x, 1u);
}
#endif
return;
}
// If we pass, rasterize the meshlet
// Check how big the cluster is in screen space
let world_from_local = affine3_to_square(meshlet_instance_uniforms[instance_id].world_from_local);
let clip_from_local = view.clip_from_world * world_from_local;
let projection = view.clip_from_world;
var near: f32;
if projection[3][3] == 1.0 {
near = projection[3][2] / projection[2][2];
} else {
near = projection[3][2];
}
var screen_aabb = ScreenAabb(vec3<f32>(0.0), vec3<f32>(0.0));
var sw_raster = project_aabb(clip_from_local, near, aabb, &screen_aabb);
if sw_raster {
let aabb_size = (screen_aabb.max.xy - screen_aabb.min.xy) * view.viewport.zw;
sw_raster = all(aabb_size <= vec2<f32>(64.0));
}
var buffer_slot: u32;
if sw_raster {
// Append this cluster to the list for software rasterization
buffer_slot = atomicAdd(&meshlet_software_raster_indirect_args.x, 1u);
buffer_slot += meshlet_previous_raster_counts[0];
} else {
// Append this cluster to the list for hardware rasterization
buffer_slot = atomicAdd(&meshlet_hardware_raster_indirect_args.instance_count, 1u);
buffer_slot += meshlet_previous_raster_counts[1];
buffer_slot = constants.rightmost_slot - buffer_slot;
}
meshlet_raster_clusters[buffer_slot] = InstancedOffset(instance_id, instanced_offset.offset);
}

Some files were not shown because too many files have changed in this diff Show More