Vendor Bevy rendering crates (Phase 1 complete)

Closes #6, #7, #8, #9, #10 Refs #2, #122 Vendored bevy_render, bevy_core_pipeline, and bevy_pbr from Bevy v0.17.2 (commit 566358363126dd69f6e457e47f306c68f8041d2a) into libmarathon. - ~51K LOC vendored to crates/libmarathon/src/render/ - Merged bevy_render_macros into crates/macros/ - Fixed 773→0 compilation errors - Updated dependencies (encase 0.10→0.11, added 4 new deps) - Removed bevy_render/pbr/core_pipeline from app Cargo features All builds passing, macOS smoke test successful. Signed-off-by: Sienna Meridian Satterwhite <sienna@r3t.io>
2025-12-23 23:50:49 +00:00
parent 7b8fed178e
commit f3f8094530
265 changed files with 83142 additions and 643 deletions
--- a/crates/libmarathon/src/render/render_resource/batched_uniform_buffer.rs
+++ b/crates/libmarathon/src/render/render_resource/batched_uniform_buffer.rs
@@ -0,0 +1,157 @@
+use super::{GpuArrayBufferIndex, GpuArrayBufferable};
+use crate::render::{
+    render_resource::DynamicUniformBuffer,
+    renderer::{RenderDevice, RenderQueue},
+};
+use core::{marker::PhantomData, num::NonZero};
+use encase::{
+    private::{ArrayMetadata, BufferMut, Metadata, RuntimeSizedArray, WriteInto, Writer},
+    ShaderType,
+};
+use nonmax::NonMaxU32;
+use wgpu::{BindingResource, Limits};
+
+// 1MB else we will make really large arrays on macOS which reports very large
+// `max_uniform_buffer_binding_size`. On macOS this ends up being the minimum
+// size of the uniform buffer as well as the size of each chunk of data at a
+// dynamic offset.
+#[cfg(any(
+    not(feature = "webgl"),
+    not(target_arch = "wasm32"),
+    feature = "webgpu"
+))]
+const MAX_REASONABLE_UNIFORM_BUFFER_BINDING_SIZE: u32 = 1 << 20;
+
+// WebGL2 quirk: using uniform buffers larger than 4KB will cause extremely
+// long shader compilation times, so the limit needs to be lower on WebGL2.
+// This is due to older shader compilers/GPUs that don't support dynamically
+// indexing uniform buffers, and instead emulate it with large switch statements
+// over buffer indices that take a long time to compile.
+#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))]
+const MAX_REASONABLE_UNIFORM_BUFFER_BINDING_SIZE: u32 = 1 << 12;
+
+/// Similar to [`DynamicUniformBuffer`], except every N elements (depending on size)
+/// are grouped into a batch as an `array<T, N>` in WGSL.
+///
+/// This reduces the number of rebindings required due to having to pass dynamic
+/// offsets to bind group commands, and if indices into the array can be passed
+/// in via other means, it enables batching of draw commands.
+pub struct BatchedUniformBuffer<T: GpuArrayBufferable> {
+    // Batches of fixed-size arrays of T are written to this buffer so that
+    // each batch in a fixed-size array can be bound at a dynamic offset.
+    uniforms: DynamicUniformBuffer<MaxCapacityArray<Vec<T>>>,
+    // A batch of T are gathered into this `MaxCapacityArray` until it is full,
+    // then it is written into the `DynamicUniformBuffer`, cleared, and new T
+    // are gathered here, and so on for each batch.
+    temp: MaxCapacityArray<Vec<T>>,
+    current_offset: u32,
+    dynamic_offset_alignment: u32,
+}
+
+impl<T: GpuArrayBufferable> BatchedUniformBuffer<T> {
+    pub fn batch_size(limits: &Limits) -> usize {
+        (limits
+            .max_uniform_buffer_binding_size
+            .min(MAX_REASONABLE_UNIFORM_BUFFER_BINDING_SIZE) as u64
+            / T::min_size().get()) as usize
+    }
+
+    pub fn new(limits: &Limits) -> Self {
+        let capacity = Self::batch_size(limits);
+        let alignment = limits.min_uniform_buffer_offset_alignment;
+
+        Self {
+            uniforms: DynamicUniformBuffer::new_with_alignment(alignment as u64),
+            temp: MaxCapacityArray(Vec::with_capacity(capacity), capacity),
+            current_offset: 0,
+            dynamic_offset_alignment: alignment,
+        }
+    }
+
+    #[inline]
+    pub fn size(&self) -> NonZero<u64> {
+        self.temp.size()
+    }
+
+    pub fn clear(&mut self) {
+        self.uniforms.clear();
+        self.current_offset = 0;
+        self.temp.0.clear();
+    }
+
+    pub fn push(&mut self, component: T) -> GpuArrayBufferIndex<T> {
+        let result = GpuArrayBufferIndex {
+            index: self.temp.0.len() as u32,
+            dynamic_offset: NonMaxU32::new(self.current_offset),
+            element_type: PhantomData,
+        };
+        self.temp.0.push(component);
+        if self.temp.0.len() == self.temp.1 {
+            self.flush();
+        }
+        result
+    }
+
+    pub fn flush(&mut self) {
+        self.uniforms.push(&self.temp);
+
+        self.current_offset +=
+            align_to_next(self.temp.size().get(), self.dynamic_offset_alignment as u64) as u32;
+
+        self.temp.0.clear();
+    }
+
+    pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
+        if !self.temp.0.is_empty() {
+            self.flush();
+        }
+        self.uniforms.write_buffer(device, queue);
+    }
+
+    #[inline]
+    pub fn binding(&self) -> Option<BindingResource<'_>> {
+        let mut binding = self.uniforms.binding();
+        if let Some(BindingResource::Buffer(binding)) = &mut binding {
+            // MaxCapacityArray is runtime-sized so can't use T::min_size()
+            binding.size = Some(self.size());
+        }
+        binding
+    }
+}
+
+#[inline]
+fn align_to_next(value: u64, alignment: u64) -> u64 {
+    debug_assert!(alignment.is_power_of_two());
+    ((value - 1) | (alignment - 1)) + 1
+}
+
+// ----------------------------------------------------------------------------
+// MaxCapacityArray was implemented by Teodor Tanasoaia for encase. It was
+// copied here as it was not yet included in an encase release and it is
+// unclear if it is the correct long-term solution for encase.
+
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
+struct MaxCapacityArray<T>(T, usize);
+
+impl<T> ShaderType for MaxCapacityArray<T>
+where
+    T: ShaderType<ExtraMetadata = ArrayMetadata>,
+{
+    type ExtraMetadata = ArrayMetadata;
+
+    const METADATA: Metadata<Self::ExtraMetadata> = T::METADATA;
+
+    fn size(&self) -> NonZero<u64> {
+        Self::METADATA.stride().mul(self.1.max(1) as u64).0
+    }
+}
+
+impl<T> WriteInto for MaxCapacityArray<T>
+where
+    T: WriteInto + RuntimeSizedArray,
+{
+    fn write_into<B: BufferMut>(&self, writer: &mut Writer<B>) {
+        debug_assert!(self.0.len() <= self.1);
+        self.0.write_into(writer);
+    }
+}
--- a/crates/libmarathon/src/render/render_resource/bind_group.rs
+++ b/crates/libmarathon/src/render/render_resource/bind_group.rs
@@ -0,0 +1,725 @@
+use crate::render::{
+    define_atomic_id,
+    render_asset::RenderAssets,
+    render_resource::{BindGroupLayout, Buffer, Sampler, TextureView},
+    renderer::{RenderDevice, WgpuWrapper},
+    texture::GpuImage,
+};
+use bevy_derive::{Deref, DerefMut};
+use bevy_ecs::system::{SystemParam, SystemParamItem};
+pub use macros::AsBindGroup;
+use core::ops::Deref;
+use encase::ShaderType;
+use thiserror::Error;
+use wgpu::{
+    BindGroupEntry, BindGroupLayoutEntry, BindingResource, SamplerBindingType, TextureViewDimension,
+};
+
+use super::{BindlessDescriptor, BindlessSlabResourceLimit};
+
+define_atomic_id!(BindGroupId);
+
+/// Bind groups are responsible for binding render resources (e.g. buffers, textures, samplers)
+/// to a [`TrackedRenderPass`](crate::render_phase::TrackedRenderPass).
+/// This makes them accessible in the pipeline (shaders) as uniforms.
+///
+/// This is a lightweight thread-safe wrapper around wgpu's own [`BindGroup`](wgpu::BindGroup),
+/// which can be cloned as needed to workaround lifetime management issues. It may be converted
+/// from and dereferences to wgpu's [`BindGroup`](wgpu::BindGroup).
+///
+/// Can be created via [`RenderDevice::create_bind_group`](RenderDevice::create_bind_group).
+#[derive(Clone, Debug)]
+pub struct BindGroup {
+    id: BindGroupId,
+    value: WgpuWrapper<wgpu::BindGroup>,
+}
+
+impl BindGroup {
+    /// Returns the [`BindGroupId`] representing the unique ID of the bind group.
+    #[inline]
+    pub fn id(&self) -> BindGroupId {
+        self.id
+    }
+}
+
+impl PartialEq for BindGroup {
+    fn eq(&self, other: &Self) -> bool {
+        self.id == other.id
+    }
+}
+
+impl Eq for BindGroup {}
+
+impl core::hash::Hash for BindGroup {
+    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
+        self.id.0.hash(state);
+    }
+}
+
+impl From<wgpu::BindGroup> for BindGroup {
+    fn from(value: wgpu::BindGroup) -> Self {
+        BindGroup {
+            id: BindGroupId::new(),
+            value: WgpuWrapper::new(value),
+        }
+    }
+}
+
+impl<'a> From<&'a BindGroup> for Option<&'a wgpu::BindGroup> {
+    fn from(value: &'a BindGroup) -> Self {
+        Some(value.deref())
+    }
+}
+
+impl<'a> From<&'a mut BindGroup> for Option<&'a wgpu::BindGroup> {
+    fn from(value: &'a mut BindGroup) -> Self {
+        Some(&*value)
+    }
+}
+
+impl Deref for BindGroup {
+    type Target = wgpu::BindGroup;
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        &self.value
+    }
+}
+
+/// Converts a value to a [`BindGroup`] with a given [`BindGroupLayout`], which can then be used in Bevy shaders.
+/// This trait can be derived (and generally should be). Read on for details and examples.
+///
+/// This is an opinionated trait that is intended to make it easy to generically
+/// convert a type into a [`BindGroup`]. It provides access to specific render resources,
+/// such as [`RenderAssets<GpuImage>`] and [`crate::texture::FallbackImage`]. If a type has a [`Handle<Image>`](bevy_asset::Handle),
+/// these can be used to retrieve the corresponding [`Texture`](crate::render_resource::Texture) resource.
+///
+/// [`AsBindGroup::as_bind_group`] is intended to be called once, then the result cached somewhere. It is generally
+/// ok to do "expensive" work here, such as creating a [`Buffer`] for a uniform.
+///
+/// If for some reason a [`BindGroup`] cannot be created yet (for example, the [`Texture`](crate::render_resource::Texture)
+/// for an [`Image`](bevy_image::Image) hasn't loaded yet), just return [`AsBindGroupError::RetryNextUpdate`], which signals that the caller
+/// should retry again later.
+///
+/// # Deriving
+///
+/// This trait can be derived. Field attributes like `uniform` and `texture` are used to define which fields should be bindings,
+/// what their binding type is, and what index they should be bound at:
+///
+/// ```
+/// # use crate::render::render_resource::*;
+/// # use bevy_image::Image;
+/// # use bevy_color::LinearRgba;
+/// # use bevy_asset::Handle;
+/// # use crate::render::storage::ShaderStorageBuffer;
+///
+/// #[derive(AsBindGroup)]
+/// struct CoolMaterial {
+///     #[uniform(0)]
+///     color: LinearRgba,
+///     #[texture(1)]
+///     #[sampler(2)]
+///     color_texture: Handle<Image>,
+///     #[storage(3, read_only)]
+///     storage_buffer: Handle<ShaderStorageBuffer>,
+///     #[storage(4, read_only, buffer)]
+///     raw_buffer: Buffer,
+///     #[storage_texture(5)]
+///     storage_texture: Handle<Image>,
+/// }
+/// ```
+///
+/// In WGSL shaders, the binding would look like this:
+///
+/// ```wgsl
+/// @group(#{MATERIAL_BIND_GROUP}) @binding(0) var<uniform> color: vec4<f32>;
+/// @group(#{MATERIAL_BIND_GROUP}) @binding(1) var color_texture: texture_2d<f32>;
+/// @group(#{MATERIAL_BIND_GROUP}) @binding(2) var color_sampler: sampler;
+/// @group(#{MATERIAL_BIND_GROUP}) @binding(3) var<storage> storage_buffer: array<f32>;
+/// @group(#{MATERIAL_BIND_GROUP}) @binding(4) var<storage> raw_buffer: array<f32>;
+/// @group(#{MATERIAL_BIND_GROUP}) @binding(5) var storage_texture: texture_storage_2d<rgba8unorm, read_write>;
+/// ```
+/// Note that the "group" index is determined by the usage context. It is not defined in [`AsBindGroup`]. For example, in Bevy material bind groups
+/// are generally bound to group 2.
+///
+/// The following field-level attributes are supported:
+///
+/// ## `uniform(BINDING_INDEX)`
+///
+///  * The field will be converted to a shader-compatible type using the [`ShaderType`] trait, written to a [`Buffer`], and bound as a uniform.
+///    [`ShaderType`] is implemented for most math types already, such as [`f32`], [`Vec4`](bevy_math::Vec4), and
+///    [`LinearRgba`](bevy_color::LinearRgba). It can also be derived for custom structs.
+///
+/// ## `texture(BINDING_INDEX, arguments)`
+///
+///  * This field's [`Handle<Image>`](bevy_asset::Handle) will be used to look up the matching [`Texture`](crate::render_resource::Texture)
+///    GPU resource, which will be bound as a texture in shaders. The field will be assumed to implement [`Into<Option<Handle<Image>>>`]. In practice,
+///    most fields should be a [`Handle<Image>`](bevy_asset::Handle) or [`Option<Handle<Image>>`]. If the value of an [`Option<Handle<Image>>`] is
+///    [`None`], the [`crate::texture::FallbackImage`] resource will be used instead. This attribute can be used in conjunction with a `sampler` binding attribute
+///    (with a different binding index) if a binding of the sampler for the [`Image`](bevy_image::Image) is also required.
+///
+/// | Arguments             | Values                                                                  | Default              |
+/// |-----------------------|-------------------------------------------------------------------------|----------------------|
+/// | `dimension` = "..."   | `"1d"`, `"2d"`, `"2d_array"`, `"3d"`, `"cube"`, `"cube_array"`          | `"2d"`               |
+/// | `sample_type` = "..." | `"float"`, `"depth"`, `"s_int"` or `"u_int"`                            | `"float"`            |
+/// | `filterable` = ...    | `true`, `false`                                                         | `true`               |
+/// | `multisampled` = ...  | `true`, `false`                                                         | `false`              |
+/// | `visibility(...)`     | `all`, `none`, or a list-combination of `vertex`, `fragment`, `compute` | `vertex`, `fragment` |
+///
+/// ## `storage_texture(BINDING_INDEX, arguments)`
+///
+/// * This field's [`Handle<Image>`](bevy_asset::Handle) will be used to look up the matching [`Texture`](crate::render_resource::Texture)
+///   GPU resource, which will be bound as a storage texture in shaders. The field will be assumed to implement [`Into<Option<Handle<Image>>>`]. In practice,
+///   most fields should be a [`Handle<Image>`](bevy_asset::Handle) or [`Option<Handle<Image>>`]. If the value of an [`Option<Handle<Image>>`] is
+///   [`None`], the [`crate::texture::FallbackImage`] resource will be used instead.
+///
+/// | Arguments              | Values                                                                                     | Default       |
+/// |------------------------|--------------------------------------------------------------------------------------------|---------------|
+/// | `dimension` = "..."    | `"1d"`, `"2d"`, `"2d_array"`, `"3d"`, `"cube"`, `"cube_array"`                             | `"2d"`        |
+/// | `image_format` = ...   | any member of [`TextureFormat`](crate::render_resource::TextureFormat)                     | `Rgba8Unorm`  |
+/// | `access` = ...         | any member of [`StorageTextureAccess`](crate::render_resource::StorageTextureAccess)       | `ReadWrite`   |
+/// | `visibility(...)`      | `all`, `none`, or a list-combination of `vertex`, `fragment`, `compute`                    | `compute`     |
+///
+/// ## `sampler(BINDING_INDEX, arguments)`
+///
+/// * This field's [`Handle<Image>`](bevy_asset::Handle) will be used to look up the matching [`Sampler`] GPU
+///   resource, which will be bound as a sampler in shaders. The field will be assumed to implement [`Into<Option<Handle<Image>>>`]. In practice,
+///   most fields should be a [`Handle<Image>`](bevy_asset::Handle) or [`Option<Handle<Image>>`]. If the value of an [`Option<Handle<Image>>`] is
+///   [`None`], the [`crate::texture::FallbackImage`] resource will be used instead. This attribute can be used in conjunction with a `texture` binding attribute
+///   (with a different binding index) if a binding of the texture for the [`Image`](bevy_image::Image) is also required.
+///
+/// | Arguments              | Values                                                                  | Default                |
+/// |------------------------|-------------------------------------------------------------------------|------------------------|
+/// | `sampler_type` = "..." | `"filtering"`, `"non_filtering"`, `"comparison"`.                       |  `"filtering"`         |
+/// | `visibility(...)`      | `all`, `none`, or a list-combination of `vertex`, `fragment`, `compute` |   `vertex`, `fragment` |
+///
+/// ## `storage(BINDING_INDEX, arguments)`
+///
+/// * The field's [`Handle<Storage>`](bevy_asset::Handle) will be used to look
+///   up the matching [`Buffer`] GPU resource, which will be bound as a storage
+///   buffer in shaders. If the `storage` attribute is used, the field is expected
+///   a raw buffer, and the buffer will be bound as a storage buffer in shaders.
+///   In bindless mode, `binding_array()` argument that specifies the binding
+///   number of the resulting storage buffer binding array must be present.
+///
+/// | Arguments              | Values                                                                  | Default                |
+/// |------------------------|-------------------------------------------------------------------------|------------------------|
+/// | `visibility(...)`      | `all`, `none`, or a list-combination of `vertex`, `fragment`, `compute` | `vertex`, `fragment`   |
+/// | `read_only`            | if present then value is true, otherwise false                          | `false`                |
+/// | `buffer`               | if present then the field will be assumed to be a raw wgpu buffer       |                        |
+/// | `binding_array(...)`   | the binding number of the binding array, for bindless mode              | bindless mode disabled |
+///
+/// Note that fields without field-level binding attributes will be ignored.
+/// ```
+/// # use crate::render::{render_resource::AsBindGroup};
+/// # use bevy_color::LinearRgba;
+/// # use bevy_asset::Handle;
+/// #[derive(AsBindGroup)]
+/// struct CoolMaterial {
+///     #[uniform(0)]
+///     color: LinearRgba,
+///     this_field_is_ignored: String,
+/// }
+/// ```
+///
+///  As mentioned above, [`Option<Handle<Image>>`] is also supported:
+/// ```
+/// # use bevy_asset::Handle;
+/// # use bevy_color::LinearRgba;
+/// # use bevy_image::Image;
+/// # use crate::render::render_resource::AsBindGroup;
+/// #[derive(AsBindGroup)]
+/// struct CoolMaterial {
+///     #[uniform(0)]
+///     color: LinearRgba,
+///     #[texture(1)]
+///     #[sampler(2)]
+///     color_texture: Option<Handle<Image>>,
+/// }
+/// ```
+/// This is useful if you want a texture to be optional. When the value is [`None`], the [`crate::texture::FallbackImage`] will be used for the binding instead, which defaults
+/// to "pure white".
+///
+/// Field uniforms with the same index will be combined into a single binding:
+/// ```
+/// # use crate::render::{render_resource::AsBindGroup};
+/// # use bevy_color::LinearRgba;
+/// #[derive(AsBindGroup)]
+/// struct CoolMaterial {
+///     #[uniform(0)]
+///     color: LinearRgba,
+///     #[uniform(0)]
+///     roughness: f32,
+/// }
+/// ```
+///
+/// In WGSL shaders, the binding would look like this:
+/// ```wgsl
+/// struct CoolMaterial {
+///     color: vec4<f32>,
+///     roughness: f32,
+/// };
+///
+/// @group(#{MATERIAL_BIND_GROUP}) @binding(0) var<uniform> material: CoolMaterial;
+/// ```
+///
+/// Some less common scenarios will require "struct-level" attributes. These are the currently supported struct-level attributes:
+/// ## `uniform(BINDING_INDEX, ConvertedShaderType)`
+///
+/// * This also creates a [`Buffer`] using [`ShaderType`] and binds it as a
+///   uniform, much like the field-level `uniform` attribute. The difference is
+///   that the entire [`AsBindGroup`] value is converted to `ConvertedShaderType`,
+///   which must implement [`ShaderType`], instead of a specific field
+///   implementing [`ShaderType`]. This is useful if more complicated conversion
+///   logic is required, or when using bindless mode (see below). The conversion
+///   is done using the [`AsBindGroupShaderType<ConvertedShaderType>`] trait,
+///   which is automatically implemented if `&Self` implements
+///   [`Into<ConvertedShaderType>`]. Outside of bindless mode, only use
+///   [`AsBindGroupShaderType`] if access to resources like
+///   [`RenderAssets<GpuImage>`] is required.
+///
+/// * In bindless mode (see `bindless(COUNT)`), this attribute becomes
+///   `uniform(BINDLESS_INDEX, ConvertedShaderType,
+///   binding_array(BINDING_INDEX))`. The resulting uniform buffers will be
+///   available in the shader as a binding array at the given `BINDING_INDEX`. The
+///   `BINDLESS_INDEX` specifies the offset of the buffer in the bindless index
+///   table.
+///
+///   For example, suppose that the material slot is stored in a variable named
+///   `slot`, the bindless index table is named `material_indices`, and that the
+///   first field (index 0) of the bindless index table type is named
+///   `material`. Then specifying `#[uniform(0, StandardMaterialUniform,
+///   binding_array(10)]` will create a binding array buffer declared in the
+///   shader as `var<storage> material_array:
+///   binding_array<StandardMaterialUniform>` and accessible as
+///   `material_array[material_indices[slot].material]`.
+///
+/// ## `data(BINDING_INDEX, ConvertedShaderType, binding_array(BINDING_INDEX))`
+///
+/// * This is very similar to `uniform(BINDING_INDEX, ConvertedShaderType,
+///   binding_array(BINDING_INDEX)` and in fact is identical if bindless mode
+///   isn't being used. The difference is that, in bindless mode, the `data`
+///   attribute produces a single buffer containing an array, not an array of
+///   buffers. For example, suppose you had the following declaration:
+///
+/// ```ignore
+/// #[uniform(0, StandardMaterialUniform, binding_array(10))]
+/// struct StandardMaterial { ... }
+/// ```
+///
+/// In bindless mode, this will produce a binding matching the following WGSL
+/// declaration:
+///
+/// ```wgsl
+/// @group(#{MATERIAL_BIND_GROUP}) @binding(10) var<storage> material_array: binding_array<StandardMaterial>;
+/// ```
+///
+/// On the other hand, if you write this declaration:
+///
+/// ```ignore
+/// #[data(0, StandardMaterialUniform, binding_array(10))]
+/// struct StandardMaterial { ... }
+/// ```
+///
+/// Then Bevy produces a binding that matches this WGSL declaration instead:
+///
+/// ```wgsl
+/// @group(#{MATERIAL_BIND_GROUP}) @binding(10) var<storage> material_array: array<StandardMaterial>;
+/// ```
+///
+/// * Just as with the structure-level `uniform` attribute, Bevy converts the
+///   entire [`AsBindGroup`] to `ConvertedShaderType`, using the
+///   [`AsBindGroupShaderType<ConvertedShaderType>`] trait.
+///
+/// * In non-bindless mode, the structure-level `data` attribute is the same as
+///   the structure-level `uniform` attribute and produces a single uniform buffer
+///   in the shader. The above example would result in a binding that looks like
+///   this in WGSL in non-bindless mode:
+///
+/// ```wgsl
+/// @group(#{MATERIAL_BIND_GROUP}) @binding(0) var<uniform> material: StandardMaterial;
+/// ```
+///
+/// * For efficiency reasons, `data` is generally preferred over `uniform`
+///   unless you need to place your data in individual buffers.
+///
+/// ## `bind_group_data(DataType)`
+///
+/// * The [`AsBindGroup`] type will be converted to some `DataType` using [`Into<DataType>`] and stored
+///   as [`AsBindGroup::Data`] as part of the [`AsBindGroup::as_bind_group`] call. This is useful if data needs to be stored alongside
+///   the generated bind group, such as a unique identifier for a material's bind group. The most common use case for this attribute
+///   is "shader pipeline specialization". See [`SpecializedRenderPipeline`](crate::render_resource::SpecializedRenderPipeline).
+///
+/// ## `bindless`
+///
+/// * This switch enables *bindless resources*, which changes the way Bevy
+///   supplies resources (textures, and samplers) to the shader.  When bindless
+///   resources are enabled, and the current platform supports them, Bevy will
+///   allocate textures, and samplers into *binding arrays*, separated based on
+///   type and will supply your shader with indices into those arrays.
+/// * Bindless textures and samplers are placed into the appropriate global
+///   array defined in `bevy_render::bindless` (`bindless.wgsl`).
+/// * Bevy doesn't currently support bindless buffers, except for those created
+///   with the `uniform(BINDLESS_INDEX, ConvertedShaderType,
+///   binding_array(BINDING_INDEX))` attribute. If you need to include a buffer in
+///   your object, and you can't create the data in that buffer with the `uniform`
+///   attribute, consider a non-bindless object instead.
+/// * If bindless mode is enabled, the `BINDLESS` definition will be
+///   available. Because not all platforms support bindless resources, you
+///   should check for the presence of this definition via `#ifdef` and fall
+///   back to standard bindings if it isn't present.
+/// * By default, in bindless mode, binding 0 becomes the *bindless index
+///   table*, which is an array of structures, each of which contains as many
+///   fields of type `u32` as the highest binding number in the structure
+///   annotated with `#[derive(AsBindGroup)]`. Again by default, the *i*th field
+///   of the bindless index table contains the index of the resource with binding
+///   *i* within the appropriate binding array.
+/// * In the case of materials, the index of the applicable table within the
+///   bindless index table list corresponding to the mesh currently being drawn
+///   can be retrieved with
+///   `mesh[in.instance_index].material_and_lightmap_bind_group_slot & 0xffffu`.
+/// * You can limit the size of the bindless slabs to N resources with the
+///   `limit(N)` declaration. For example, `#[bindless(limit(16))]` ensures that
+///   each slab will have no more than 16 total resources in it. If you don't
+///   specify a limit, Bevy automatically picks a reasonable one for the current
+///   platform.
+/// * The `index_table(range(M..N), binding(B))` declaration allows you to
+///   customize the layout of the bindless index table. This is useful for
+///   materials that are composed of multiple bind groups, such as
+///   `ExtendedMaterial`. In such cases, there will be multiple bindless index
+///   tables, so they can't both be assigned to binding 0 or their bindings will
+///   conflict.
+///   - The `binding(B)` attribute of the `index_table` attribute allows you to
+///     customize the binding (`@binding(B)`, in the shader) at which the index
+///     table will be bound.
+///   - The `range(M, N)` attribute of the `index_table` attribute allows you to
+///     change the mapping from the field index in the bindless index table to the
+///     bindless index. Instead of the field at index $i$ being mapped to the
+///     bindless index $i$, with the `range(M, N)` attribute the field at index
+///     $i$ in the bindless index table is mapped to the bindless index $i$ + M.
+///     The size of the index table will be set to N - M. Note that this may
+///     result in the table being too small to contain all the bindless bindings.
+/// * The purpose of bindless mode is to improve performance by reducing
+///   state changes. By grouping resources together into binding arrays, Bevy
+///   doesn't have to modify GPU state as often, decreasing API and driver
+///   overhead.
+/// * See the `shaders/shader_material_bindless` example for an example of how
+///   to use bindless mode. See the `shaders/extended_material_bindless` example
+///   for a more exotic example of bindless mode that demonstrates the
+///   `index_table` attribute.
+/// * The following diagram illustrates how bindless mode works using a subset
+///   of `StandardMaterial`:
+///
+/// ```text
+///      Shader Bindings                          Sampler Binding Array
+///     +----+-----------------------------+     +-----------+-----------+-----+
+/// +---|  0 | material_indices            |  +->| sampler 0 | sampler 1 | ... |
+/// |   +----+-----------------------------+  |  +-----------+-----------+-----+
+/// |   |  1 | bindless_samplers_filtering +--+        ^
+/// |   +----+-----------------------------+           +-------------------------------+
+/// |   | .. |            ...              |                                           |
+/// |   +----+-----------------------------+      Texture Binding Array                |
+/// |   |  5 | bindless_textures_2d        +--+  +-----------+-----------+-----+       |
+/// |   +----+-----------------------------+  +->| texture 0 | texture 1 | ... |       |
+/// |   | .. |            ...              |     +-----------+-----------+-----+       |
+/// |   +----+-----------------------------+           ^                               |
+/// |   + 10 | material_array              +--+        +---------------------------+   |
+/// |   +----+-----------------------------+  |                                    |   |
+/// |                                         |   Buffer Binding Array             |   |
+/// |                                         |  +----------+----------+-----+     |   |
+/// |                                         +->| buffer 0 | buffer 1 | ... |     |   |
+/// |    Material Bindless Indices               +----------+----------+-----+     |   |
+/// |   +----+-----------------------------+          ^                            |   |
+/// +-->|  0 | material                    +----------+                            |   |
+///     +----+-----------------------------+                                       |   |
+///     |  1 | base_color_texture          +---------------------------------------+   |
+///     +----+-----------------------------+                                           |
+///     |  2 | base_color_sampler          +-------------------------------------------+
+///     +----+-----------------------------+
+///     | .. |            ...              |
+///     +----+-----------------------------+
+/// ```
+///
+/// The previous `CoolMaterial` example illustrating "combining multiple field-level uniform attributes with the same binding index" can
+/// also be equivalently represented with a single struct-level uniform attribute:
+/// ```
+/// # use crate::render::{render_resource::{AsBindGroup, ShaderType}};
+/// # use bevy_color::LinearRgba;
+/// #[derive(AsBindGroup)]
+/// #[uniform(0, CoolMaterialUniform)]
+/// struct CoolMaterial {
+///     color: LinearRgba,
+///     roughness: f32,
+/// }
+///
+/// #[derive(ShaderType)]
+/// struct CoolMaterialUniform {
+///     color: LinearRgba,
+///     roughness: f32,
+/// }
+///
+/// impl From<&CoolMaterial> for CoolMaterialUniform {
+///     fn from(material: &CoolMaterial) -> CoolMaterialUniform {
+///         CoolMaterialUniform {
+///             color: material.color,
+///             roughness: material.roughness,
+///         }
+///     }
+/// }
+/// ```
+///
+/// Setting `bind_group_data` looks like this:
+/// ```
+/// # use crate::render::{render_resource::AsBindGroup};
+/// # use bevy_color::LinearRgba;
+/// #[derive(AsBindGroup)]
+/// #[bind_group_data(CoolMaterialKey)]
+/// struct CoolMaterial {
+///     #[uniform(0)]
+///     color: LinearRgba,
+///     is_shaded: bool,
+/// }
+///
+/// // Materials keys are intended to be small, cheap to hash, and
+/// // uniquely identify a specific material permutation.
+/// #[repr(C)]
+/// #[derive(Copy, Clone, Hash, Eq, PartialEq)]
+/// struct CoolMaterialKey {
+///     is_shaded: bool,
+/// }
+///
+/// impl From<&CoolMaterial> for CoolMaterialKey {
+///     fn from(material: &CoolMaterial) -> CoolMaterialKey {
+///         CoolMaterialKey {
+///             is_shaded: material.is_shaded,
+///         }
+///     }
+/// }
+/// ```
+pub trait AsBindGroup {
+    /// Data that will be stored alongside the "prepared" bind group.
+    type Data: Send + Sync;
+
+    type Param: SystemParam + 'static;
+
+    /// The number of slots per bind group, if bindless mode is enabled.
+    ///
+    /// If this bind group doesn't use bindless, then this will be `None`.
+    ///
+    /// Note that the *actual* slot count may be different from this value, due
+    /// to platform limitations. For example, if bindless resources aren't
+    /// supported on this platform, the actual slot count will be 1.
+    fn bindless_slot_count() -> Option<BindlessSlabResourceLimit> {
+        None
+    }
+
+    /// True if the hardware *actually* supports bindless textures for this
+    /// type, taking the device and driver capabilities into account.
+    ///
+    /// If this type doesn't use bindless textures, then the return value from
+    /// this function is meaningless.
+    fn bindless_supported(_: &RenderDevice) -> bool {
+        true
+    }
+
+    /// label
+    fn label() -> Option<&'static str> {
+        None
+    }
+
+    /// Creates a bind group for `self` matching the layout defined in [`AsBindGroup::bind_group_layout`].
+    fn as_bind_group(
+        &self,
+        layout: &BindGroupLayout,
+        render_device: &RenderDevice,
+        param: &mut SystemParamItem<'_, '_, Self::Param>,
+    ) -> Result<PreparedBindGroup, AsBindGroupError> {
+        let UnpreparedBindGroup { bindings } =
+            Self::unprepared_bind_group(self, layout, render_device, param, false)?;
+
+        let entries = bindings
+            .iter()
+            .map(|(index, binding)| BindGroupEntry {
+                binding: *index,
+                resource: binding.get_binding(),
+            })
+            .collect::<Vec<_>>();
+
+        let bind_group = render_device.create_bind_group(Self::label(), layout, &entries);
+
+        Ok(PreparedBindGroup {
+            bindings,
+            bind_group,
+        })
+    }
+
+    fn bind_group_data(&self) -> Self::Data;
+
+    /// Returns a vec of (binding index, `OwnedBindingResource`).
+    ///
+    /// In cases where `OwnedBindingResource` is not available (as for bindless
+    /// texture arrays currently), an implementor may return
+    /// `AsBindGroupError::CreateBindGroupDirectly` from this function and
+    /// instead define `as_bind_group` directly. This may prevent certain
+    /// features, such as bindless mode, from working correctly.
+    ///
+    /// Set `force_no_bindless` to true to require that bindless textures *not*
+    /// be used. `ExtendedMaterial` uses this in order to ensure that the base
+    /// material doesn't use bindless mode if the extension doesn't.
+    fn unprepared_bind_group(
+        &self,
+        layout: &BindGroupLayout,
+        render_device: &RenderDevice,
+        param: &mut SystemParamItem<'_, '_, Self::Param>,
+        force_no_bindless: bool,
+    ) -> Result<UnpreparedBindGroup, AsBindGroupError>;
+
+    /// Creates the bind group layout matching all bind groups returned by
+    /// [`AsBindGroup::as_bind_group`]
+    fn bind_group_layout(render_device: &RenderDevice) -> BindGroupLayout
+    where
+        Self: Sized,
+    {
+        render_device.create_bind_group_layout(
+            Self::label(),
+            &Self::bind_group_layout_entries(render_device, false),
+        )
+    }
+
+    /// Returns a vec of bind group layout entries.
+    ///
+    /// Set `force_no_bindless` to true to require that bindless textures *not*
+    /// be used. `ExtendedMaterial` uses this in order to ensure that the base
+    /// material doesn't use bindless mode if the extension doesn't.
+    fn bind_group_layout_entries(
+        render_device: &RenderDevice,
+        force_no_bindless: bool,
+    ) -> Vec<BindGroupLayoutEntry>
+    where
+        Self: Sized;
+
+    fn bindless_descriptor() -> Option<BindlessDescriptor> {
+        None
+    }
+}
+
+/// An error that occurs during [`AsBindGroup::as_bind_group`] calls.
+#[derive(Debug, Error)]
+pub enum AsBindGroupError {
+    /// The bind group could not be generated. Try again next frame.
+    #[error("The bind group could not be generated")]
+    RetryNextUpdate,
+    #[error("Create the bind group via `as_bind_group()` instead")]
+    CreateBindGroupDirectly,
+    #[error("At binding index {0}, the provided image sampler `{1}` does not match the required sampler type(s) `{2}`.")]
+    InvalidSamplerType(u32, String, String),
+}
+
+/// A prepared bind group returned as a result of [`AsBindGroup::as_bind_group`].
+pub struct PreparedBindGroup {
+    pub bindings: BindingResources,
+    pub bind_group: BindGroup,
+}
+
+/// a map containing `OwnedBindingResource`s, keyed by the target binding index
+pub struct UnpreparedBindGroup {
+    pub bindings: BindingResources,
+}
+
+/// A pair of binding index and binding resource, used as part of
+/// [`PreparedBindGroup`] and [`UnpreparedBindGroup`].
+#[derive(Deref, DerefMut)]
+pub struct BindingResources(pub Vec<(u32, OwnedBindingResource)>);
+
+/// An owned binding resource of any type (ex: a [`Buffer`], [`TextureView`], etc).
+/// This is used by types like [`PreparedBindGroup`] to hold a single list of all
+/// render resources used by bindings.
+#[derive(Debug)]
+pub enum OwnedBindingResource {
+    Buffer(Buffer),
+    TextureView(TextureViewDimension, TextureView),
+    Sampler(SamplerBindingType, Sampler),
+    Data(OwnedData),
+}
+
+/// Data that will be copied into a GPU buffer.
+///
+/// This corresponds to the `#[data]` attribute in `AsBindGroup`.
+#[derive(Debug, Deref, DerefMut)]
+pub struct OwnedData(pub Vec<u8>);
+
+impl OwnedBindingResource {
+    /// Creates a [`BindingResource`] reference to this
+    /// [`OwnedBindingResource`].
+    ///
+    /// Note that this operation panics if passed a
+    /// [`OwnedBindingResource::Data`], because [`OwnedData`] doesn't itself
+    /// correspond to any binding and instead requires the
+    /// `MaterialBindGroupAllocator` to pack it into a buffer.
+    pub fn get_binding(&self) -> BindingResource<'_> {
+        match self {
+            OwnedBindingResource::Buffer(buffer) => buffer.as_entire_binding(),
+            OwnedBindingResource::TextureView(_, view) => BindingResource::TextureView(view),
+            OwnedBindingResource::Sampler(_, sampler) => BindingResource::Sampler(sampler),
+            OwnedBindingResource::Data(_) => panic!("`OwnedData` has no binding resource"),
+        }
+    }
+}
+
+/// Converts a value to a [`ShaderType`] for use in a bind group.
+///
+/// This is automatically implemented for references that implement [`Into`].
+/// Generally normal [`Into`] / [`From`] impls should be preferred, but
+/// sometimes additional runtime metadata is required.
+/// This exists largely to make some [`AsBindGroup`] use cases easier.
+pub trait AsBindGroupShaderType<T: ShaderType> {
+    /// Return the `T` [`ShaderType`] for `self`. When used in [`AsBindGroup`]
+    /// derives, it is safe to assume that all images in `self` exist.
+    fn as_bind_group_shader_type(&self, images: &RenderAssets<GpuImage>) -> T;
+}
+
+impl<T, U: ShaderType> AsBindGroupShaderType<U> for T
+where
+    for<'a> &'a T: Into<U>,
+{
+    #[inline]
+    fn as_bind_group_shader_type(&self, _images: &RenderAssets<GpuImage>) -> U {
+        self.into()
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use bevy_asset::Handle;
+    use bevy_image::Image;
+
+    #[test]
+    fn texture_visibility() {
+        #[expect(
+            dead_code,
+            reason = "This is a derive macro compilation test. It will not be constructed."
+        )]
+        #[derive(AsBindGroup)]
+        pub struct TextureVisibilityTest {
+            #[texture(0, visibility(all))]
+            pub all: Handle<Image>,
+            #[texture(1, visibility(none))]
+            pub none: Handle<Image>,
+            #[texture(2, visibility(fragment))]
+            pub fragment: Handle<Image>,
+            #[texture(3, visibility(vertex))]
+            pub vertex: Handle<Image>,
+            #[texture(4, visibility(compute))]
+            pub compute: Handle<Image>,
+            #[texture(5, visibility(vertex, fragment))]
+            pub vertex_fragment: Handle<Image>,
+            #[texture(6, visibility(vertex, compute))]
+            pub vertex_compute: Handle<Image>,
+            #[texture(7, visibility(fragment, compute))]
+            pub fragment_compute: Handle<Image>,
+            #[texture(8, visibility(vertex, fragment, compute))]
+            pub vertex_fragment_compute: Handle<Image>,
+        }
+    }
+}
--- a/crates/libmarathon/src/render/render_resource/bind_group_entries.rs
+++ b/crates/libmarathon/src/render/render_resource/bind_group_entries.rs
@@ -0,0 +1,322 @@
+use variadics_please::all_tuples_with_size;
+use wgpu::{BindGroupEntry, BindingResource};
+
+use super::{Sampler, TextureView};
+
+/// Helper for constructing bindgroups.
+///
+/// Allows constructing the descriptor's entries as:
+/// ```ignore (render_device cannot be easily accessed)
+/// render_device.create_bind_group(
+///     "my_bind_group",
+///     &my_layout,
+///     &BindGroupEntries::with_indices((
+///         (2, &my_sampler),
+///         (3, my_uniform),
+///     )),
+/// );
+/// ```
+///
+/// instead of
+///
+/// ```ignore (render_device cannot be easily accessed)
+/// render_device.create_bind_group(
+///     "my_bind_group",
+///     &my_layout,
+///     &[
+///         BindGroupEntry {
+///             binding: 2,
+///             resource: BindingResource::Sampler(&my_sampler),
+///         },
+///         BindGroupEntry {
+///             binding: 3,
+///             resource: my_uniform,
+///         },
+///     ],
+/// );
+/// ```
+///
+/// or
+///
+/// ```ignore (render_device cannot be easily accessed)
+/// render_device.create_bind_group(
+///     "my_bind_group",
+///     &my_layout,
+///     &BindGroupEntries::sequential((
+///         &my_sampler,
+///         my_uniform,
+///     )),
+/// );
+/// ```
+///
+/// instead of
+///
+/// ```ignore (render_device cannot be easily accessed)
+/// render_device.create_bind_group(
+///     "my_bind_group",
+///     &my_layout,
+///     &[
+///         BindGroupEntry {
+///             binding: 0,
+///             resource: BindingResource::Sampler(&my_sampler),
+///         },
+///         BindGroupEntry {
+///             binding: 1,
+///             resource: my_uniform,
+///         },
+///     ],
+/// );
+/// ```
+///
+/// or
+///
+/// ```ignore (render_device cannot be easily accessed)
+/// render_device.create_bind_group(
+///     "my_bind_group",
+///     &my_layout,
+///     &BindGroupEntries::single(my_uniform),
+/// );
+/// ```
+///
+/// instead of
+///
+/// ```ignore (render_device cannot be easily accessed)
+/// render_device.create_bind_group(
+///     "my_bind_group",
+///     &my_layout,
+///     &[
+///         BindGroupEntry {
+///             binding: 0,
+///             resource: my_uniform,
+///         },
+///     ],
+/// );
+/// ```
+pub struct BindGroupEntries<'b, const N: usize = 1> {
+    entries: [BindGroupEntry<'b>; N],
+}
+
+impl<'b, const N: usize> BindGroupEntries<'b, N> {
+    #[inline]
+    pub fn sequential(resources: impl IntoBindingArray<'b, N>) -> Self {
+        let mut i = 0;
+        Self {
+            entries: resources.into_array().map(|resource| {
+                let binding = i;
+                i += 1;
+                BindGroupEntry { binding, resource }
+            }),
+        }
+    }
+
+    #[inline]
+    pub fn with_indices(indexed_resources: impl IntoIndexedBindingArray<'b, N>) -> Self {
+        Self {
+            entries: indexed_resources
+                .into_array()
+                .map(|(binding, resource)| BindGroupEntry { binding, resource }),
+        }
+    }
+}
+
+impl<'b> BindGroupEntries<'b, 1> {
+    pub fn single(resource: impl IntoBinding<'b>) -> [BindGroupEntry<'b>; 1] {
+        [BindGroupEntry {
+            binding: 0,
+            resource: resource.into_binding(),
+        }]
+    }
+}
+
+impl<'b, const N: usize> core::ops::Deref for BindGroupEntries<'b, N> {
+    type Target = [BindGroupEntry<'b>];
+
+    fn deref(&self) -> &[BindGroupEntry<'b>] {
+        &self.entries
+    }
+}
+
+pub trait IntoBinding<'a> {
+    fn into_binding(self) -> BindingResource<'a>;
+}
+
+impl<'a> IntoBinding<'a> for &'a TextureView {
+    #[inline]
+    fn into_binding(self) -> BindingResource<'a> {
+        BindingResource::TextureView(self)
+    }
+}
+
+impl<'a> IntoBinding<'a> for &'a wgpu::TextureView {
+    #[inline]
+    fn into_binding(self) -> BindingResource<'a> {
+        BindingResource::TextureView(self)
+    }
+}
+
+impl<'a> IntoBinding<'a> for &'a [&'a wgpu::TextureView] {
+    #[inline]
+    fn into_binding(self) -> BindingResource<'a> {
+        BindingResource::TextureViewArray(self)
+    }
+}
+
+impl<'a> IntoBinding<'a> for &'a Sampler {
+    #[inline]
+    fn into_binding(self) -> BindingResource<'a> {
+        BindingResource::Sampler(self)
+    }
+}
+
+impl<'a> IntoBinding<'a> for &'a [&'a wgpu::Sampler] {
+    #[inline]
+    fn into_binding(self) -> BindingResource<'a> {
+        BindingResource::SamplerArray(self)
+    }
+}
+
+impl<'a> IntoBinding<'a> for BindingResource<'a> {
+    #[inline]
+    fn into_binding(self) -> BindingResource<'a> {
+        self
+    }
+}
+
+impl<'a> IntoBinding<'a> for wgpu::BufferBinding<'a> {
+    #[inline]
+    fn into_binding(self) -> BindingResource<'a> {
+        BindingResource::Buffer(self)
+    }
+}
+
+impl<'a> IntoBinding<'a> for &'a [wgpu::BufferBinding<'a>] {
+    #[inline]
+    fn into_binding(self) -> BindingResource<'a> {
+        BindingResource::BufferArray(self)
+    }
+}
+
+pub trait IntoBindingArray<'b, const N: usize> {
+    fn into_array(self) -> [BindingResource<'b>; N];
+}
+
+macro_rules! impl_to_binding_slice {
+    ($N: expr, $(#[$meta:meta])* $(($T: ident, $I: ident)),*) => {
+        $(#[$meta])*
+        impl<'b, $($T: IntoBinding<'b>),*> IntoBindingArray<'b, $N> for ($($T,)*) {
+            #[inline]
+            fn into_array(self) -> [BindingResource<'b>; $N] {
+                let ($($I,)*) = self;
+                [$($I.into_binding(), )*]
+            }
+        }
+    }
+}
+
+all_tuples_with_size!(
+    #[doc(fake_variadic)]
+    impl_to_binding_slice,
+    1,
+    32,
+    T,
+    s
+);
+
+pub trait IntoIndexedBindingArray<'b, const N: usize> {
+    fn into_array(self) -> [(u32, BindingResource<'b>); N];
+}
+
+macro_rules! impl_to_indexed_binding_slice {
+    ($N: expr, $(($T: ident, $S: ident, $I: ident)),*) => {
+        impl<'b, $($T: IntoBinding<'b>),*> IntoIndexedBindingArray<'b, $N> for ($((u32, $T),)*) {
+            #[inline]
+            fn into_array(self) -> [(u32, BindingResource<'b>); $N] {
+                let ($(($S, $I),)*) = self;
+                [$(($S, $I.into_binding())), *]
+            }
+        }
+    }
+}
+
+all_tuples_with_size!(impl_to_indexed_binding_slice, 1, 32, T, n, s);
+
+pub struct DynamicBindGroupEntries<'b> {
+    entries: Vec<BindGroupEntry<'b>>,
+}
+
+impl<'b> Default for DynamicBindGroupEntries<'b> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<'b> DynamicBindGroupEntries<'b> {
+    pub fn sequential<const N: usize>(entries: impl IntoBindingArray<'b, N>) -> Self {
+        Self {
+            entries: entries
+                .into_array()
+                .into_iter()
+                .enumerate()
+                .map(|(ix, resource)| BindGroupEntry {
+                    binding: ix as u32,
+                    resource,
+                })
+                .collect(),
+        }
+    }
+
+    pub fn extend_sequential<const N: usize>(
+        mut self,
+        entries: impl IntoBindingArray<'b, N>,
+    ) -> Self {
+        let start = self.entries.last().unwrap().binding + 1;
+        self.entries.extend(
+            entries
+                .into_array()
+                .into_iter()
+                .enumerate()
+                .map(|(ix, resource)| BindGroupEntry {
+                    binding: start + ix as u32,
+                    resource,
+                }),
+        );
+        self
+    }
+
+    pub fn new_with_indices<const N: usize>(entries: impl IntoIndexedBindingArray<'b, N>) -> Self {
+        Self {
+            entries: entries
+                .into_array()
+                .into_iter()
+                .map(|(binding, resource)| BindGroupEntry { binding, resource })
+                .collect(),
+        }
+    }
+
+    pub fn new() -> Self {
+        Self {
+            entries: Vec::new(),
+        }
+    }
+
+    pub fn extend_with_indices<const N: usize>(
+        mut self,
+        entries: impl IntoIndexedBindingArray<'b, N>,
+    ) -> Self {
+        self.entries.extend(
+            entries
+                .into_array()
+                .into_iter()
+                .map(|(binding, resource)| BindGroupEntry { binding, resource }),
+        );
+        self
+    }
+}
+
+impl<'b> core::ops::Deref for DynamicBindGroupEntries<'b> {
+    type Target = [BindGroupEntry<'b>];
+
+    fn deref(&self) -> &[BindGroupEntry<'b>] {
+        &self.entries
+    }
+}
--- a/crates/libmarathon/src/render/render_resource/bind_group_layout.rs
+++ b/crates/libmarathon/src/render/render_resource/bind_group_layout.rs
@@ -0,0 +1,81 @@
+use crate::render::{define_atomic_id, renderer::RenderDevice, renderer::WgpuWrapper};
+use bevy_ecs::system::Res;
+use bevy_platform::sync::OnceLock;
+use core::ops::Deref;
+
+define_atomic_id!(BindGroupLayoutId);
+
+/// Bind group layouts define the interface of resources (e.g. buffers, textures, samplers)
+/// for a shader. The actual resource binding is done via a [`BindGroup`](super::BindGroup).
+///
+/// This is a lightweight thread-safe wrapper around wgpu's own [`BindGroupLayout`](wgpu::BindGroupLayout),
+/// which can be cloned as needed to workaround lifetime management issues. It may be converted
+/// from and dereferences to wgpu's [`BindGroupLayout`](wgpu::BindGroupLayout).
+///
+/// Can be created via [`RenderDevice::create_bind_group_layout`](crate::renderer::RenderDevice::create_bind_group_layout).
+#[derive(Clone, Debug)]
+pub struct BindGroupLayout {
+    id: BindGroupLayoutId,
+    value: WgpuWrapper<wgpu::BindGroupLayout>,
+}
+
+impl PartialEq for BindGroupLayout {
+    fn eq(&self, other: &Self) -> bool {
+        self.id == other.id
+    }
+}
+
+impl Eq for BindGroupLayout {}
+
+impl core::hash::Hash for BindGroupLayout {
+    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
+        self.id.0.hash(state);
+    }
+}
+
+impl BindGroupLayout {
+    /// Returns the [`BindGroupLayoutId`] representing the unique ID of the bind group layout.
+    #[inline]
+    pub fn id(&self) -> BindGroupLayoutId {
+        self.id
+    }
+
+    #[inline]
+    pub fn value(&self) -> &wgpu::BindGroupLayout {
+        &self.value
+    }
+}
+
+impl From<wgpu::BindGroupLayout> for BindGroupLayout {
+    fn from(value: wgpu::BindGroupLayout) -> Self {
+        BindGroupLayout {
+            id: BindGroupLayoutId::new(),
+            value: WgpuWrapper::new(value),
+        }
+    }
+}
+
+impl Deref for BindGroupLayout {
+    type Target = wgpu::BindGroupLayout;
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        &self.value
+    }
+}
+
+static EMPTY_BIND_GROUP_LAYOUT: OnceLock<BindGroupLayout> = OnceLock::new();
+
+pub(crate) fn init_empty_bind_group_layout(render_device: Res<RenderDevice>) {
+    let layout = render_device.create_bind_group_layout(Some("empty_bind_group_layout"), &[]);
+    EMPTY_BIND_GROUP_LAYOUT
+        .set(layout)
+        .expect("init_empty_bind_group_layout was called more than once");
+}
+
+pub fn empty_bind_group_layout() -> BindGroupLayout {
+    EMPTY_BIND_GROUP_LAYOUT
+        .get()
+        .expect("init_empty_bind_group_layout was not called")
+        .clone()
+}
--- a/crates/libmarathon/src/render/render_resource/bind_group_layout_entries.rs
+++ b/crates/libmarathon/src/render/render_resource/bind_group_layout_entries.rs
@@ -0,0 +1,592 @@
+use core::num::NonZero;
+use variadics_please::all_tuples_with_size;
+use wgpu::{BindGroupLayoutEntry, BindingType, ShaderStages};
+
+/// Helper for constructing bind group layouts.
+///
+/// Allows constructing the layout's entries as:
+/// ```ignore (render_device cannot be easily accessed)
+/// let layout = render_device.create_bind_group_layout(
+///     "my_bind_group_layout",
+///     &BindGroupLayoutEntries::with_indices(
+///         // The layout entries will only be visible in the fragment stage
+///         ShaderStages::FRAGMENT,
+///         (
+///             // Screen texture
+///             (2, texture_2d(TextureSampleType::Float { filterable: true })),
+///             // Sampler
+///             (3, sampler(SamplerBindingType::Filtering)),
+///         ),
+///     ),
+/// );
+/// ```
+///
+/// instead of
+///
+/// ```ignore (render_device cannot be easily accessed)
+/// let layout = render_device.create_bind_group_layout(
+///     "my_bind_group_layout",
+///     &[
+///         // Screen texture
+///         BindGroupLayoutEntry {
+///             binding: 2,
+///             visibility: ShaderStages::FRAGMENT,
+///             ty: BindingType::Texture {
+///                 sample_type: TextureSampleType::Float { filterable: true },
+///                 view_dimension: TextureViewDimension::D2,
+///                 multisampled: false,
+///             },
+///             count: None,
+///         },
+///         // Sampler
+///         BindGroupLayoutEntry {
+///             binding: 3,
+///             visibility: ShaderStages::FRAGMENT,
+///             ty: BindingType::Sampler(SamplerBindingType::Filtering),
+///             count: None,
+///         },
+///     ],
+/// );
+/// ```
+///
+/// or
+///
+/// ```ignore (render_device cannot be easily accessed)
+/// render_device.create_bind_group_layout(
+///     "my_bind_group_layout",
+///     &BindGroupLayoutEntries::sequential(
+///         ShaderStages::FRAGMENT,
+///         (
+///             // Screen texture
+///             texture_2d(TextureSampleType::Float { filterable: true }),
+///             // Sampler
+///             sampler(SamplerBindingType::Filtering),
+///         ),
+///     ),
+/// );
+/// ```
+///
+/// instead of
+///
+/// ```ignore (render_device cannot be easily accessed)
+/// let layout = render_device.create_bind_group_layout(
+///     "my_bind_group_layout",
+///     &[
+///         // Screen texture
+///         BindGroupLayoutEntry {
+///             binding: 0,
+///             visibility: ShaderStages::FRAGMENT,
+///             ty: BindingType::Texture {
+///                 sample_type: TextureSampleType::Float { filterable: true },
+///                 view_dimension: TextureViewDimension::D2,
+///                 multisampled: false,
+///             },
+///             count: None,
+///         },
+///         // Sampler
+///         BindGroupLayoutEntry {
+///             binding: 1,
+///             visibility: ShaderStages::FRAGMENT,
+///             ty: BindingType::Sampler(SamplerBindingType::Filtering),
+///             count: None,
+///         },
+///     ],
+/// );
+/// ```
+///
+/// or
+///
+/// ```ignore (render_device cannot be easily accessed)
+/// render_device.create_bind_group_layout(
+///     "my_bind_group_layout",
+///     &BindGroupLayoutEntries::single(
+///         ShaderStages::FRAGMENT,
+///         texture_2d(TextureSampleType::Float { filterable: true }),
+///     ),
+/// );
+/// ```
+///
+/// instead of
+///
+/// ```ignore (render_device cannot be easily accessed)
+/// let layout = render_device.create_bind_group_layout(
+///     "my_bind_group_layout",
+///     &[
+///         BindGroupLayoutEntry {
+///             binding: 0,
+///             visibility: ShaderStages::FRAGMENT,
+///             ty: BindingType::Texture {
+///                 sample_type: TextureSampleType::Float { filterable: true },
+///                 view_dimension: TextureViewDimension::D2,
+///                 multisampled: false,
+///             },
+///             count: None,
+///         },
+///     ],
+/// );
+/// ```
+
+#[derive(Clone, Copy)]
+pub struct BindGroupLayoutEntryBuilder {
+    ty: BindingType,
+    visibility: Option<ShaderStages>,
+    count: Option<NonZero<u32>>,
+}
+
+impl BindGroupLayoutEntryBuilder {
+    pub fn visibility(mut self, visibility: ShaderStages) -> Self {
+        self.visibility = Some(visibility);
+        self
+    }
+
+    pub fn count(mut self, count: NonZero<u32>) -> Self {
+        self.count = Some(count);
+        self
+    }
+
+    pub fn build(&self, binding: u32, default_visibility: ShaderStages) -> BindGroupLayoutEntry {
+        BindGroupLayoutEntry {
+            binding,
+            ty: self.ty,
+            visibility: self.visibility.unwrap_or(default_visibility),
+            count: self.count,
+        }
+    }
+}
+
+pub struct BindGroupLayoutEntries<const N: usize> {
+    entries: [BindGroupLayoutEntry; N],
+}
+
+impl<const N: usize> BindGroupLayoutEntries<N> {
+    #[inline]
+    pub fn sequential(
+        default_visibility: ShaderStages,
+        entries_ext: impl IntoBindGroupLayoutEntryBuilderArray<N>,
+    ) -> Self {
+        let mut i = 0;
+        Self {
+            entries: entries_ext.into_array().map(|entry| {
+                let binding = i;
+                i += 1;
+                entry.build(binding, default_visibility)
+            }),
+        }
+    }
+
+    #[inline]
+    pub fn with_indices(
+        default_visibility: ShaderStages,
+        indexed_entries: impl IntoIndexedBindGroupLayoutEntryBuilderArray<N>,
+    ) -> Self {
+        Self {
+            entries: indexed_entries
+                .into_array()
+                .map(|(binding, entry)| entry.build(binding, default_visibility)),
+        }
+    }
+}
+
+impl BindGroupLayoutEntries<1> {
+    pub fn single(
+        visibility: ShaderStages,
+        resource: impl IntoBindGroupLayoutEntryBuilder,
+    ) -> [BindGroupLayoutEntry; 1] {
+        [resource
+            .into_bind_group_layout_entry_builder()
+            .build(0, visibility)]
+    }
+}
+
+impl<const N: usize> core::ops::Deref for BindGroupLayoutEntries<N> {
+    type Target = [BindGroupLayoutEntry];
+    fn deref(&self) -> &[BindGroupLayoutEntry] {
+        &self.entries
+    }
+}
+
+pub trait IntoBindGroupLayoutEntryBuilder {
+    fn into_bind_group_layout_entry_builder(self) -> BindGroupLayoutEntryBuilder;
+}
+
+impl IntoBindGroupLayoutEntryBuilder for BindingType {
+    fn into_bind_group_layout_entry_builder(self) -> BindGroupLayoutEntryBuilder {
+        BindGroupLayoutEntryBuilder {
+            ty: self,
+            visibility: None,
+            count: None,
+        }
+    }
+}
+
+impl IntoBindGroupLayoutEntryBuilder for BindGroupLayoutEntry {
+    fn into_bind_group_layout_entry_builder(self) -> BindGroupLayoutEntryBuilder {
+        if self.binding != u32::MAX {
+            tracing::warn!("The BindGroupLayoutEntries api ignores the binding index when converting a raw wgpu::BindGroupLayoutEntry. You can ignore this warning by setting it to u32::MAX.");
+        }
+        BindGroupLayoutEntryBuilder {
+            ty: self.ty,
+            visibility: Some(self.visibility),
+            count: self.count,
+        }
+    }
+}
+
+impl IntoBindGroupLayoutEntryBuilder for BindGroupLayoutEntryBuilder {
+    fn into_bind_group_layout_entry_builder(self) -> BindGroupLayoutEntryBuilder {
+        self
+    }
+}
+
+pub trait IntoBindGroupLayoutEntryBuilderArray<const N: usize> {
+    fn into_array(self) -> [BindGroupLayoutEntryBuilder; N];
+}
+macro_rules! impl_to_binding_type_slice {
+    ($N: expr, $(#[$meta:meta])* $(($T: ident, $I: ident)),*) => {
+        $(#[$meta])*
+        impl<$($T: IntoBindGroupLayoutEntryBuilder),*> IntoBindGroupLayoutEntryBuilderArray<$N> for ($($T,)*) {
+            #[inline]
+            fn into_array(self) -> [BindGroupLayoutEntryBuilder; $N] {
+                let ($($I,)*) = self;
+                [$($I.into_bind_group_layout_entry_builder(), )*]
+            }
+        }
+    }
+}
+all_tuples_with_size!(
+    #[doc(fake_variadic)]
+    impl_to_binding_type_slice,
+    1,
+    32,
+    T,
+    s
+);
+
+pub trait IntoIndexedBindGroupLayoutEntryBuilderArray<const N: usize> {
+    fn into_array(self) -> [(u32, BindGroupLayoutEntryBuilder); N];
+}
+macro_rules! impl_to_indexed_binding_type_slice {
+    ($N: expr, $(($T: ident, $S: ident, $I: ident)),*) => {
+        impl<$($T: IntoBindGroupLayoutEntryBuilder),*> IntoIndexedBindGroupLayoutEntryBuilderArray<$N> for ($((u32, $T),)*) {
+            #[inline]
+            fn into_array(self) -> [(u32, BindGroupLayoutEntryBuilder); $N] {
+                let ($(($S, $I),)*) = self;
+                [$(($S, $I.into_bind_group_layout_entry_builder())), *]
+            }
+        }
+    }
+}
+all_tuples_with_size!(impl_to_indexed_binding_type_slice, 1, 32, T, n, s);
+
+impl<const N: usize> IntoBindGroupLayoutEntryBuilderArray<N> for [BindGroupLayoutEntry; N] {
+    fn into_array(self) -> [BindGroupLayoutEntryBuilder; N] {
+        self.map(IntoBindGroupLayoutEntryBuilder::into_bind_group_layout_entry_builder)
+    }
+}
+
+pub struct DynamicBindGroupLayoutEntries {
+    default_visibility: ShaderStages,
+    entries: Vec<BindGroupLayoutEntry>,
+}
+
+impl DynamicBindGroupLayoutEntries {
+    pub fn sequential<const N: usize>(
+        default_visibility: ShaderStages,
+        entries: impl IntoBindGroupLayoutEntryBuilderArray<N>,
+    ) -> Self {
+        Self {
+            default_visibility,
+            entries: entries
+                .into_array()
+                .into_iter()
+                .enumerate()
+                .map(|(ix, resource)| resource.build(ix as u32, default_visibility))
+                .collect(),
+        }
+    }
+
+    pub fn extend_sequential<const N: usize>(
+        mut self,
+        entries: impl IntoBindGroupLayoutEntryBuilderArray<N>,
+    ) -> Self {
+        let start = self.entries.last().unwrap().binding + 1;
+        self.entries.extend(
+            entries
+                .into_array()
+                .into_iter()
+                .enumerate()
+                .map(|(ix, resource)| resource.build(start + ix as u32, self.default_visibility)),
+        );
+        self
+    }
+
+    pub fn new_with_indices<const N: usize>(
+        default_visibility: ShaderStages,
+        entries: impl IntoIndexedBindGroupLayoutEntryBuilderArray<N>,
+    ) -> Self {
+        Self {
+            default_visibility,
+            entries: entries
+                .into_array()
+                .into_iter()
+                .map(|(binding, resource)| resource.build(binding, default_visibility))
+                .collect(),
+        }
+    }
+
+    pub fn new(default_visibility: ShaderStages) -> Self {
+        Self {
+            default_visibility,
+            entries: Vec::new(),
+        }
+    }
+
+    pub fn extend_with_indices<const N: usize>(
+        mut self,
+        entries: impl IntoIndexedBindGroupLayoutEntryBuilderArray<N>,
+    ) -> Self {
+        self.entries.extend(
+            entries
+                .into_array()
+                .into_iter()
+                .map(|(binding, resource)| resource.build(binding, self.default_visibility)),
+        );
+        self
+    }
+}
+
+impl core::ops::Deref for DynamicBindGroupLayoutEntries {
+    type Target = [BindGroupLayoutEntry];
+
+    fn deref(&self) -> &[BindGroupLayoutEntry] {
+        &self.entries
+    }
+}
+
+pub mod binding_types {
+    use crate::render::render_resource::{
+        BufferBindingType, SamplerBindingType, TextureSampleType, TextureViewDimension,
+    };
+    use core::num::NonZero;
+    use encase::ShaderType;
+    use wgpu::{StorageTextureAccess, TextureFormat};
+
+    use super::*;
+
+    pub fn storage_buffer<T: ShaderType>(has_dynamic_offset: bool) -> BindGroupLayoutEntryBuilder {
+        storage_buffer_sized(has_dynamic_offset, Some(T::min_size()))
+    }
+
+    pub fn storage_buffer_sized(
+        has_dynamic_offset: bool,
+        min_binding_size: Option<NonZero<u64>>,
+    ) -> BindGroupLayoutEntryBuilder {
+        BindingType::Buffer {
+            ty: BufferBindingType::Storage { read_only: false },
+            has_dynamic_offset,
+            min_binding_size,
+        }
+        .into_bind_group_layout_entry_builder()
+    }
+
+    pub fn storage_buffer_read_only<T: ShaderType>(
+        has_dynamic_offset: bool,
+    ) -> BindGroupLayoutEntryBuilder {
+        storage_buffer_read_only_sized(has_dynamic_offset, Some(T::min_size()))
+    }
+
+    pub fn storage_buffer_read_only_sized(
+        has_dynamic_offset: bool,
+        min_binding_size: Option<NonZero<u64>>,
+    ) -> BindGroupLayoutEntryBuilder {
+        BindingType::Buffer {
+            ty: BufferBindingType::Storage { read_only: true },
+            has_dynamic_offset,
+            min_binding_size,
+        }
+        .into_bind_group_layout_entry_builder()
+    }
+
+    pub fn uniform_buffer<T: ShaderType>(has_dynamic_offset: bool) -> BindGroupLayoutEntryBuilder {
+        uniform_buffer_sized(has_dynamic_offset, Some(T::min_size()))
+    }
+
+    pub fn uniform_buffer_sized(
+        has_dynamic_offset: bool,
+        min_binding_size: Option<NonZero<u64>>,
+    ) -> BindGroupLayoutEntryBuilder {
+        BindingType::Buffer {
+            ty: BufferBindingType::Uniform,
+            has_dynamic_offset,
+            min_binding_size,
+        }
+        .into_bind_group_layout_entry_builder()
+    }
+
+    pub fn texture_1d(sample_type: TextureSampleType) -> BindGroupLayoutEntryBuilder {
+        BindingType::Texture {
+            sample_type,
+            view_dimension: TextureViewDimension::D1,
+            multisampled: false,
+        }
+        .into_bind_group_layout_entry_builder()
+    }
+
+    pub fn texture_2d(sample_type: TextureSampleType) -> BindGroupLayoutEntryBuilder {
+        BindingType::Texture {
+            sample_type,
+            view_dimension: TextureViewDimension::D2,
+            multisampled: false,
+        }
+        .into_bind_group_layout_entry_builder()
+    }
+
+    pub fn texture_2d_multisampled(sample_type: TextureSampleType) -> BindGroupLayoutEntryBuilder {
+        BindingType::Texture {
+            sample_type,
+            view_dimension: TextureViewDimension::D2,
+            multisampled: true,
+        }
+        .into_bind_group_layout_entry_builder()
+    }
+
+    pub fn texture_2d_array(sample_type: TextureSampleType) -> BindGroupLayoutEntryBuilder {
+        BindingType::Texture {
+            sample_type,
+            view_dimension: TextureViewDimension::D2Array,
+            multisampled: false,
+        }
+        .into_bind_group_layout_entry_builder()
+    }
+
+    pub fn texture_2d_array_multisampled(
+        sample_type: TextureSampleType,
+    ) -> BindGroupLayoutEntryBuilder {
+        BindingType::Texture {
+            sample_type,
+            view_dimension: TextureViewDimension::D2Array,
+            multisampled: true,
+        }
+        .into_bind_group_layout_entry_builder()
+    }
+
+    pub fn texture_depth_2d() -> BindGroupLayoutEntryBuilder {
+        texture_2d(TextureSampleType::Depth).into_bind_group_layout_entry_builder()
+    }
+
+    pub fn texture_depth_2d_multisampled() -> BindGroupLayoutEntryBuilder {
+        texture_2d_multisampled(TextureSampleType::Depth).into_bind_group_layout_entry_builder()
+    }
+
+    pub fn texture_cube(sample_type: TextureSampleType) -> BindGroupLayoutEntryBuilder {
+        BindingType::Texture {
+            sample_type,
+            view_dimension: TextureViewDimension::Cube,
+            multisampled: false,
+        }
+        .into_bind_group_layout_entry_builder()
+    }
+
+    pub fn texture_cube_multisampled(
+        sample_type: TextureSampleType,
+    ) -> BindGroupLayoutEntryBuilder {
+        BindingType::Texture {
+            sample_type,
+            view_dimension: TextureViewDimension::Cube,
+            multisampled: true,
+        }
+        .into_bind_group_layout_entry_builder()
+    }
+
+    pub fn texture_cube_array(sample_type: TextureSampleType) -> BindGroupLayoutEntryBuilder {
+        BindingType::Texture {
+            sample_type,
+            view_dimension: TextureViewDimension::CubeArray,
+            multisampled: false,
+        }
+        .into_bind_group_layout_entry_builder()
+    }
+
+    pub fn texture_cube_array_multisampled(
+        sample_type: TextureSampleType,
+    ) -> BindGroupLayoutEntryBuilder {
+        BindingType::Texture {
+            sample_type,
+            view_dimension: TextureViewDimension::CubeArray,
+            multisampled: true,
+        }
+        .into_bind_group_layout_entry_builder()
+    }
+
+    pub fn texture_3d(sample_type: TextureSampleType) -> BindGroupLayoutEntryBuilder {
+        BindingType::Texture {
+            sample_type,
+            view_dimension: TextureViewDimension::D3,
+            multisampled: false,
+        }
+        .into_bind_group_layout_entry_builder()
+    }
+
+    pub fn texture_3d_multisampled(sample_type: TextureSampleType) -> BindGroupLayoutEntryBuilder {
+        BindingType::Texture {
+            sample_type,
+            view_dimension: TextureViewDimension::D3,
+            multisampled: true,
+        }
+        .into_bind_group_layout_entry_builder()
+    }
+
+    pub fn sampler(sampler_binding_type: SamplerBindingType) -> BindGroupLayoutEntryBuilder {
+        BindingType::Sampler(sampler_binding_type).into_bind_group_layout_entry_builder()
+    }
+
+    pub fn texture_storage_2d(
+        format: TextureFormat,
+        access: StorageTextureAccess,
+    ) -> BindGroupLayoutEntryBuilder {
+        BindingType::StorageTexture {
+            access,
+            format,
+            view_dimension: TextureViewDimension::D2,
+        }
+        .into_bind_group_layout_entry_builder()
+    }
+
+    pub fn texture_storage_2d_array(
+        format: TextureFormat,
+        access: StorageTextureAccess,
+    ) -> BindGroupLayoutEntryBuilder {
+        BindingType::StorageTexture {
+            access,
+            format,
+            view_dimension: TextureViewDimension::D2Array,
+        }
+        .into_bind_group_layout_entry_builder()
+    }
+
+    pub fn texture_storage_3d(
+        format: TextureFormat,
+        access: StorageTextureAccess,
+    ) -> BindGroupLayoutEntryBuilder {
+        BindingType::StorageTexture {
+            access,
+            format,
+            view_dimension: TextureViewDimension::D3,
+        }
+        .into_bind_group_layout_entry_builder()
+    }
+
+    pub fn acceleration_structure() -> BindGroupLayoutEntryBuilder {
+        BindingType::AccelerationStructure {
+            vertex_return: false,
+        }
+        .into_bind_group_layout_entry_builder()
+    }
+
+    pub fn acceleration_structure_vertex_return() -> BindGroupLayoutEntryBuilder {
+        BindingType::AccelerationStructure {
+            vertex_return: true,
+        }
+        .into_bind_group_layout_entry_builder()
+    }
+}
--- a/crates/libmarathon/src/render/render_resource/bindless.rs
+++ b/crates/libmarathon/src/render/render_resource/bindless.rs
@@ -0,0 +1,374 @@
+//! Types and functions relating to bindless resources.
+
+use std::borrow::Cow;
+use core::{
+    num::{NonZeroU32, NonZeroU64},
+    ops::Range,
+};
+
+use bevy_derive::{Deref, DerefMut};
+use wgpu::{
+    BindGroupLayoutEntry, SamplerBindingType, ShaderStages, TextureSampleType, TextureViewDimension,
+};
+
+use crate::render::render_resource::binding_types::storage_buffer_read_only_sized;
+
+use super::binding_types::{
+    sampler, texture_1d, texture_2d, texture_2d_array, texture_3d, texture_cube, texture_cube_array,
+};
+
+/// The default value for the number of resources that can be stored in a slab
+/// on this platform.
+///
+/// See the documentation for [`BindlessSlabResourceLimit`] for more
+/// information.
+#[cfg(any(target_os = "macos", target_os = "ios"))]
+pub const AUTO_BINDLESS_SLAB_RESOURCE_LIMIT: u32 = 64;
+/// The default value for the number of resources that can be stored in a slab
+/// on this platform.
+///
+/// See the documentation for [`BindlessSlabResourceLimit`] for more
+/// information.
+#[cfg(not(any(target_os = "macos", target_os = "ios")))]
+pub const AUTO_BINDLESS_SLAB_RESOURCE_LIMIT: u32 = 2048;
+
+/// The binding numbers for the built-in binding arrays of each bindless
+/// resource type.
+///
+/// In the case of materials, the material allocator manages these binding
+/// arrays.
+///
+/// `bindless.wgsl` contains declarations of these arrays for use in your
+/// shaders. If you change these, make sure to update that file as well.
+pub static BINDING_NUMBERS: [(BindlessResourceType, BindingNumber); 9] = [
+    (BindlessResourceType::SamplerFiltering, BindingNumber(1)),
+    (BindlessResourceType::SamplerNonFiltering, BindingNumber(2)),
+    (BindlessResourceType::SamplerComparison, BindingNumber(3)),
+    (BindlessResourceType::Texture1d, BindingNumber(4)),
+    (BindlessResourceType::Texture2d, BindingNumber(5)),
+    (BindlessResourceType::Texture2dArray, BindingNumber(6)),
+    (BindlessResourceType::Texture3d, BindingNumber(7)),
+    (BindlessResourceType::TextureCube, BindingNumber(8)),
+    (BindlessResourceType::TextureCubeArray, BindingNumber(9)),
+];
+
+/// The maximum number of resources that can be stored in a slab.
+///
+/// This limit primarily exists in order to work around `wgpu` performance
+/// problems involving large numbers of bindless resources. Also, some
+/// platforms, such as Metal, currently enforce limits on the number of
+/// resources in use.
+///
+/// This corresponds to `LIMIT` in the `#[bindless(LIMIT)]` attribute when
+/// deriving [`crate::render_resource::AsBindGroup`].
+#[derive(Clone, Copy, Default, PartialEq, Debug)]
+pub enum BindlessSlabResourceLimit {
+    /// Allows the renderer to choose a reasonable value for the resource limit
+    /// based on the platform.
+    ///
+    /// This value has been tuned, so you should default to this value unless
+    /// you have special platform-specific considerations that prevent you from
+    /// using it.
+    #[default]
+    Auto,
+
+    /// A custom value for the resource limit.
+    ///
+    /// Bevy will allocate no more than this number of resources in a slab,
+    /// unless exceeding this value is necessary in order to allocate at all
+    /// (i.e. unless the number of bindless resources in your bind group exceeds
+    /// this value), in which case Bevy can exceed it.
+    Custom(u32),
+}
+
+/// Information about the bindless resources in this object.
+///
+/// The material bind group allocator uses this descriptor in order to create
+/// and maintain bind groups. The fields within this bindless descriptor are
+/// [`Cow`]s in order to support both the common case in which the fields are
+/// simply `static` constants and the more unusual case in which the fields are
+/// dynamically generated efficiently. An example of the latter case is
+/// `ExtendedMaterial`, which needs to assemble a bindless descriptor from those
+/// of the base material and the material extension at runtime.
+///
+/// This structure will only be present if this object is bindless.
+pub struct BindlessDescriptor {
+    /// The bindless resource types that this object uses, in order of bindless
+    /// index.
+    ///
+    /// The resource assigned to binding index 0 will be at index 0, the
+    /// resource assigned to binding index will be at index 1 in this array, and
+    /// so on. Unused binding indices are set to [`BindlessResourceType::None`].
+    pub resources: Cow<'static, [BindlessResourceType]>,
+    /// The [`BindlessBufferDescriptor`] for each bindless buffer that this
+    /// object uses.
+    ///
+    /// The order of this array is irrelevant.
+    pub buffers: Cow<'static, [BindlessBufferDescriptor]>,
+    /// The [`BindlessIndexTableDescriptor`]s describing each bindless index
+    /// table.
+    ///
+    /// This list must be sorted by the first bindless index.
+    pub index_tables: Cow<'static, [BindlessIndexTableDescriptor]>,
+}
+
+/// The type of potentially-bindless resource.
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
+pub enum BindlessResourceType {
+    /// No bindless resource.
+    ///
+    /// This is used as a placeholder to fill holes in the
+    /// [`BindlessDescriptor::resources`] list.
+    None,
+    /// A storage buffer.
+    Buffer,
+    /// A filtering sampler.
+    SamplerFiltering,
+    /// A non-filtering sampler (nearest neighbor).
+    SamplerNonFiltering,
+    /// A comparison sampler (typically used for shadow maps).
+    SamplerComparison,
+    /// A 1D texture.
+    Texture1d,
+    /// A 2D texture.
+    Texture2d,
+    /// A 2D texture array.
+    ///
+    /// Note that this differs from a binding array. 2D texture arrays must all
+    /// have the same size and format.
+    Texture2dArray,
+    /// A 3D texture.
+    Texture3d,
+    /// A cubemap texture.
+    TextureCube,
+    /// A cubemap texture array.
+    ///
+    /// Note that this differs from a binding array. Cubemap texture arrays must
+    /// all have the same size and format.
+    TextureCubeArray,
+    /// Multiple instances of plain old data concatenated into a single buffer.
+    ///
+    /// This corresponds to the `#[data]` declaration in
+    /// [`crate::render_resource::AsBindGroup`].
+    ///
+    /// Note that this resource doesn't itself map to a GPU-level binding
+    /// resource and instead depends on the `MaterialBindGroupAllocator` to
+    /// create a binding resource for it.
+    DataBuffer,
+}
+
+/// Describes a bindless buffer.
+///
+/// Unlike samplers and textures, each buffer in a bind group gets its own
+/// unique bind group entry. That is, there isn't any `bindless_buffers` binding
+/// array to go along with `bindless_textures_2d`,
+/// `bindless_samplers_filtering`, etc. Therefore, this descriptor contains two
+/// indices: the *binding number* and the *bindless index*. The binding number
+/// is the `@binding` number used in the shader, while the bindless index is the
+/// index of the buffer in the bindless index table (which is itself
+/// conventionally bound to binding number 0).
+///
+/// When declaring the buffer in a derived implementation
+/// [`crate::render_resource::AsBindGroup`] with syntax like
+/// `#[uniform(BINDLESS_INDEX, StandardMaterialUniform,
+/// bindless(BINDING_NUMBER)]`, the bindless index is `BINDLESS_INDEX`, and the
+/// binding number is `BINDING_NUMBER`. Note the order.
+#[derive(Clone, Copy, Debug)]
+pub struct BindlessBufferDescriptor {
+    /// The actual binding number of the buffer.
+    ///
+    /// This is declared with `@binding` in WGSL. When deriving
+    /// [`crate::render_resource::AsBindGroup`], this is the `BINDING_NUMBER` in
+    /// `#[uniform(BINDLESS_INDEX, StandardMaterialUniform,
+    /// bindless(BINDING_NUMBER)]`.
+    pub binding_number: BindingNumber,
+    /// The index of the buffer in the bindless index table.
+    ///
+    /// In the shader, this is the index into the table bound to binding 0. When
+    /// deriving [`crate::render_resource::AsBindGroup`], this is the
+    /// `BINDLESS_INDEX` in `#[uniform(BINDLESS_INDEX, StandardMaterialUniform,
+    /// bindless(BINDING_NUMBER)]`.
+    pub bindless_index: BindlessIndex,
+    /// The size of the buffer in bytes, if known.
+    pub size: Option<usize>,
+}
+
+/// Describes the layout of the bindless index table, which maps bindless
+/// indices to indices within the binding arrays.
+#[derive(Clone)]
+pub struct BindlessIndexTableDescriptor {
+    /// The range of bindless indices that this descriptor covers.
+    pub indices: Range<BindlessIndex>,
+    /// The binding at which the index table itself will be bound.
+    ///
+    /// By default, this is binding 0, but it can be changed with the
+    /// `#[bindless(index_table(binding(B)))]` attribute.
+    pub binding_number: BindingNumber,
+}
+
+/// The index of the actual binding in the bind group.
+///
+/// This is the value specified in WGSL as `@binding`.
+#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Deref, DerefMut)]
+pub struct BindingNumber(pub u32);
+
+/// The index in the bindless index table.
+///
+/// This table is conventionally bound to binding number 0.
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Hash, Debug, Deref, DerefMut)]
+pub struct BindlessIndex(pub u32);
+
+/// Creates the bind group layout entries common to all shaders that use
+/// bindless bind groups.
+///
+/// `bindless_resource_count` specifies the total number of bindless resources.
+/// `bindless_slab_resource_limit` specifies the resolved
+/// [`BindlessSlabResourceLimit`] value.
+pub fn create_bindless_bind_group_layout_entries(
+    bindless_index_table_length: u32,
+    bindless_slab_resource_limit: u32,
+    bindless_index_table_binding_number: BindingNumber,
+) -> Vec<BindGroupLayoutEntry> {
+    let bindless_slab_resource_limit =
+        NonZeroU32::new(bindless_slab_resource_limit).expect("Bindless slot count must be nonzero");
+
+    // The maximum size of a binding array is the
+    // `bindless_slab_resource_limit`, which would occur if all of the bindless
+    // resources were of the same type. So we create our binding arrays with
+    // that size.
+
+    vec![
+        // Start with the bindless index table, bound to binding number 0.
+        storage_buffer_read_only_sized(
+            false,
+            NonZeroU64::new(bindless_index_table_length as u64 * size_of::<u32>() as u64),
+        )
+        .build(
+            *bindless_index_table_binding_number,
+            ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE,
+        ),
+        // Continue with the common bindless resource arrays.
+        sampler(SamplerBindingType::Filtering)
+            .count(bindless_slab_resource_limit)
+            .build(
+                1,
+                ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE,
+            ),
+        sampler(SamplerBindingType::NonFiltering)
+            .count(bindless_slab_resource_limit)
+            .build(
+                2,
+                ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE,
+            ),
+        sampler(SamplerBindingType::Comparison)
+            .count(bindless_slab_resource_limit)
+            .build(
+                3,
+                ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE,
+            ),
+        texture_1d(TextureSampleType::Float { filterable: true })
+            .count(bindless_slab_resource_limit)
+            .build(
+                4,
+                ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE,
+            ),
+        texture_2d(TextureSampleType::Float { filterable: true })
+            .count(bindless_slab_resource_limit)
+            .build(
+                5,
+                ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE,
+            ),
+        texture_2d_array(TextureSampleType::Float { filterable: true })
+            .count(bindless_slab_resource_limit)
+            .build(
+                6,
+                ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE,
+            ),
+        texture_3d(TextureSampleType::Float { filterable: true })
+            .count(bindless_slab_resource_limit)
+            .build(
+                7,
+                ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE,
+            ),
+        texture_cube(TextureSampleType::Float { filterable: true })
+            .count(bindless_slab_resource_limit)
+            .build(
+                8,
+                ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE,
+            ),
+        texture_cube_array(TextureSampleType::Float { filterable: true })
+            .count(bindless_slab_resource_limit)
+            .build(
+                9,
+                ShaderStages::FRAGMENT | ShaderStages::VERTEX | ShaderStages::COMPUTE,
+            ),
+    ]
+}
+
+impl BindlessSlabResourceLimit {
+    /// Determines the actual bindless slab resource limit on this platform.
+    pub fn resolve(&self) -> u32 {
+        match *self {
+            BindlessSlabResourceLimit::Auto => AUTO_BINDLESS_SLAB_RESOURCE_LIMIT,
+            BindlessSlabResourceLimit::Custom(limit) => limit,
+        }
+    }
+}
+
+impl BindlessResourceType {
+    /// Returns the binding number for the common array of this resource type.
+    ///
+    /// For example, if you pass `BindlessResourceType::Texture2d`, this will
+    /// return 5, in order to match the `@group(2) @binding(5) var
+    /// bindless_textures_2d: binding_array<texture_2d<f32>>` declaration in
+    /// `bindless.wgsl`.
+    ///
+    /// Not all resource types have fixed binding numbers. If you call
+    /// [`Self::binding_number`] on such a resource type, it returns `None`.
+    ///
+    /// Note that this returns a static reference to the binding number, not the
+    /// binding number itself. This is to conform to an idiosyncratic API in
+    /// `wgpu` whereby binding numbers for binding arrays are taken by `&u32`
+    /// *reference*, not by `u32` value.
+    pub fn binding_number(&self) -> Option<&'static BindingNumber> {
+        match BINDING_NUMBERS.binary_search_by_key(self, |(key, _)| *key) {
+            Ok(binding_number) => Some(&BINDING_NUMBERS[binding_number].1),
+            Err(_) => None,
+        }
+    }
+}
+
+impl From<TextureViewDimension> for BindlessResourceType {
+    fn from(texture_view_dimension: TextureViewDimension) -> Self {
+        match texture_view_dimension {
+            TextureViewDimension::D1 => BindlessResourceType::Texture1d,
+            TextureViewDimension::D2 => BindlessResourceType::Texture2d,
+            TextureViewDimension::D2Array => BindlessResourceType::Texture2dArray,
+            TextureViewDimension::Cube => BindlessResourceType::TextureCube,
+            TextureViewDimension::CubeArray => BindlessResourceType::TextureCubeArray,
+            TextureViewDimension::D3 => BindlessResourceType::Texture3d,
+        }
+    }
+}
+
+impl From<SamplerBindingType> for BindlessResourceType {
+    fn from(sampler_binding_type: SamplerBindingType) -> Self {
+        match sampler_binding_type {
+            SamplerBindingType::Filtering => BindlessResourceType::SamplerFiltering,
+            SamplerBindingType::NonFiltering => BindlessResourceType::SamplerNonFiltering,
+            SamplerBindingType::Comparison => BindlessResourceType::SamplerComparison,
+        }
+    }
+}
+
+impl From<u32> for BindlessIndex {
+    fn from(value: u32) -> Self {
+        Self(value)
+    }
+}
+
+impl From<u32> for BindingNumber {
+    fn from(value: u32) -> Self {
+        Self(value)
+    }
+}
--- a/crates/libmarathon/src/render/render_resource/buffer.rs
+++ b/crates/libmarathon/src/render/render_resource/buffer.rs
@@ -0,0 +1,95 @@
+use crate::render::define_atomic_id;
+use crate::render::renderer::WgpuWrapper;
+use core::ops::{Bound, Deref, RangeBounds};
+
+define_atomic_id!(BufferId);
+
+#[derive(Clone, Debug)]
+pub struct Buffer {
+    id: BufferId,
+    value: WgpuWrapper<wgpu::Buffer>,
+}
+
+impl Buffer {
+    #[inline]
+    pub fn id(&self) -> BufferId {
+        self.id
+    }
+
+    pub fn slice(&self, bounds: impl RangeBounds<wgpu::BufferAddress>) -> BufferSlice<'_> {
+        // need to compute and store this manually because wgpu doesn't export offset and size on wgpu::BufferSlice
+        let offset = match bounds.start_bound() {
+            Bound::Included(&bound) => bound,
+            Bound::Excluded(&bound) => bound + 1,
+            Bound::Unbounded => 0,
+        };
+        let size = match bounds.end_bound() {
+            Bound::Included(&bound) => bound + 1,
+            Bound::Excluded(&bound) => bound,
+            Bound::Unbounded => self.value.size(),
+        } - offset;
+        BufferSlice {
+            id: self.id,
+            offset,
+            size,
+            value: self.value.slice(bounds),
+        }
+    }
+
+    #[inline]
+    pub fn unmap(&self) {
+        self.value.unmap();
+    }
+}
+
+impl From<wgpu::Buffer> for Buffer {
+    fn from(value: wgpu::Buffer) -> Self {
+        Buffer {
+            id: BufferId::new(),
+            value: WgpuWrapper::new(value),
+        }
+    }
+}
+
+impl Deref for Buffer {
+    type Target = wgpu::Buffer;
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        &self.value
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct BufferSlice<'a> {
+    id: BufferId,
+    offset: wgpu::BufferAddress,
+    value: wgpu::BufferSlice<'a>,
+    size: wgpu::BufferAddress,
+}
+
+impl<'a> BufferSlice<'a> {
+    #[inline]
+    pub fn id(&self) -> BufferId {
+        self.id
+    }
+
+    #[inline]
+    pub fn offset(&self) -> wgpu::BufferAddress {
+        self.offset
+    }
+
+    #[inline]
+    pub fn size(&self) -> wgpu::BufferAddress {
+        self.size
+    }
+}
+
+impl<'a> Deref for BufferSlice<'a> {
+    type Target = wgpu::BufferSlice<'a>;
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        &self.value
+    }
+}
--- a/crates/libmarathon/src/render/render_resource/buffer_vec.rs
+++ b/crates/libmarathon/src/render/render_resource/buffer_vec.rs
@@ -0,0 +1,587 @@
+use core::{iter, marker::PhantomData};
+
+use crate::render::{
+    render_resource::Buffer,
+    renderer::{RenderDevice, RenderQueue},
+};
+use bytemuck::{must_cast_slice, NoUninit};
+use encase::{
+    internal::{WriteInto, Writer},
+    ShaderType,
+};
+use thiserror::Error;
+use wgpu::{BindingResource, BufferAddress, BufferUsages};
+
+use super::GpuArrayBufferable;
+
+/// A structure for storing raw bytes that have already been properly formatted
+/// for use by the GPU.
+///
+/// "Properly formatted" means that item data already meets the alignment and padding
+/// requirements for how it will be used on the GPU. The item type must implement [`NoUninit`]
+/// for its data representation to be directly copyable.
+///
+/// Index, vertex, and instance-rate vertex buffers have no alignment nor padding requirements and
+/// so this helper type is a good choice for them.
+///
+/// The contained data is stored in system RAM. Calling [`reserve`](RawBufferVec::reserve)
+/// allocates VRAM from the [`RenderDevice`].
+/// [`write_buffer`](RawBufferVec::write_buffer) queues copying of the data
+/// from system RAM to VRAM.
+///
+/// Other options for storing GPU-accessible data are:
+/// * [`BufferVec`]
+/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
+/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
+/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
+/// * [`StorageBuffer`](crate::render_resource::StorageBuffer)
+/// * [`Texture`](crate::render_resource::Texture)
+/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
+pub struct RawBufferVec<T: NoUninit> {
+    values: Vec<T>,
+    buffer: Option<Buffer>,
+    capacity: usize,
+    item_size: usize,
+    buffer_usage: BufferUsages,
+    label: Option<String>,
+    changed: bool,
+}
+
+impl<T: NoUninit> RawBufferVec<T> {
+    /// Creates a new [`RawBufferVec`] with the given [`BufferUsages`].
+    pub const fn new(buffer_usage: BufferUsages) -> Self {
+        Self {
+            values: Vec::new(),
+            buffer: None,
+            capacity: 0,
+            item_size: size_of::<T>(),
+            buffer_usage,
+            label: None,
+            changed: false,
+        }
+    }
+
+    /// Returns a handle to the buffer, if the data has been uploaded.
+    #[inline]
+    pub fn buffer(&self) -> Option<&Buffer> {
+        self.buffer.as_ref()
+    }
+
+    /// Returns the binding for the buffer if the data has been uploaded.
+    #[inline]
+    pub fn binding(&self) -> Option<BindingResource<'_>> {
+        Some(BindingResource::Buffer(
+            self.buffer()?.as_entire_buffer_binding(),
+        ))
+    }
+
+    /// Returns the amount of space that the GPU will use before reallocating.
+    #[inline]
+    pub fn capacity(&self) -> usize {
+        self.capacity
+    }
+
+    /// Returns the number of items that have been pushed to this buffer.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.values.len()
+    }
+
+    /// Returns true if the buffer is empty.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.values.is_empty()
+    }
+
+    /// Adds a new value and returns its index.
+    pub fn push(&mut self, value: T) -> usize {
+        let index = self.values.len();
+        self.values.push(value);
+        index
+    }
+
+    pub fn append(&mut self, other: &mut RawBufferVec<T>) {
+        self.values.append(&mut other.values);
+    }
+
+    /// Returns the value at the given index.
+    pub fn get(&self, index: u32) -> Option<&T> {
+        self.values.get(index as usize)
+    }
+
+    /// Sets the value at the given index.
+    ///
+    /// The index must be less than [`RawBufferVec::len`].
+    pub fn set(&mut self, index: u32, value: T) {
+        self.values[index as usize] = value;
+    }
+
+    /// Preallocates space for `count` elements in the internal CPU-side buffer.
+    ///
+    /// Unlike [`RawBufferVec::reserve`], this doesn't have any effect on the GPU buffer.
+    pub fn reserve_internal(&mut self, count: usize) {
+        self.values.reserve(count);
+    }
+
+    /// Changes the debugging label of the buffer.
+    ///
+    /// The next time the buffer is updated (via [`reserve`](Self::reserve)), Bevy will inform
+    /// the driver of the new label.
+    pub fn set_label(&mut self, label: Option<&str>) {
+        let label = label.map(str::to_string);
+
+        if label != self.label {
+            self.changed = true;
+        }
+
+        self.label = label;
+    }
+
+    /// Returns the label
+    pub fn get_label(&self) -> Option<&str> {
+        self.label.as_deref()
+    }
+
+    /// Creates a [`Buffer`] on the [`RenderDevice`] with size
+    /// at least `size_of::<T>() * capacity`, unless a such a buffer already exists.
+    ///
+    /// If a [`Buffer`] exists, but is too small, references to it will be discarded,
+    /// and a new [`Buffer`] will be created. Any previously created [`Buffer`]s
+    /// that are no longer referenced will be deleted by the [`RenderDevice`]
+    /// once it is done using them (typically 1-2 frames).
+    ///
+    /// In addition to any [`BufferUsages`] provided when
+    /// the `RawBufferVec` was created, the buffer on the [`RenderDevice`]
+    /// is marked as [`BufferUsages::COPY_DST`](BufferUsages).
+    pub fn reserve(&mut self, capacity: usize, device: &RenderDevice) {
+        let size = self.item_size * capacity;
+        if capacity > self.capacity || (self.changed && size > 0) {
+            self.capacity = capacity;
+            self.buffer = Some(device.create_buffer(&wgpu::BufferDescriptor {
+                label: self.label.as_deref(),
+                size: size as BufferAddress,
+                usage: BufferUsages::COPY_DST | self.buffer_usage,
+                mapped_at_creation: false,
+            }));
+            self.changed = false;
+        }
+    }
+
+    /// Queues writing of data from system RAM to VRAM using the [`RenderDevice`]
+    /// and the provided [`RenderQueue`].
+    ///
+    /// Before queuing the write, a [`reserve`](RawBufferVec::reserve) operation
+    /// is executed.
+    pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
+        if self.values.is_empty() {
+            return;
+        }
+        self.reserve(self.values.len(), device);
+        if let Some(buffer) = &self.buffer {
+            let range = 0..self.item_size * self.values.len();
+            let bytes: &[u8] = must_cast_slice(&self.values);
+            queue.write_buffer(buffer, 0, &bytes[range]);
+        }
+    }
+
+    /// Queues writing of data from system RAM to VRAM using the [`RenderDevice`]
+    /// and the provided [`RenderQueue`].
+    ///
+    /// If the buffer is not initialized on the GPU or the range is bigger than the capacity it will
+    /// return an error. You'll need to either reserve a new buffer which will lose data on the GPU
+    /// or create a new buffer and copy the old data to it.
+    ///
+    /// This will only write the data contained in the given range. It is useful if you only want
+    /// to update a part of the buffer.
+    pub fn write_buffer_range(
+        &mut self,
+        render_queue: &RenderQueue,
+        range: core::ops::Range<usize>,
+    ) -> Result<(), WriteBufferRangeError> {
+        if self.values.is_empty() {
+            return Err(WriteBufferRangeError::NoValuesToUpload);
+        }
+        if range.end > self.item_size * self.capacity {
+            return Err(WriteBufferRangeError::RangeBiggerThanBuffer);
+        }
+        if let Some(buffer) = &self.buffer {
+            // Cast only the bytes we need to write
+            let bytes: &[u8] = must_cast_slice(&self.values[range.start..range.end]);
+            render_queue.write_buffer(buffer, (range.start * self.item_size) as u64, bytes);
+            Ok(())
+        } else {
+            Err(WriteBufferRangeError::BufferNotInitialized)
+        }
+    }
+
+    /// Reduces the length of the buffer.
+    pub fn truncate(&mut self, len: usize) {
+        self.values.truncate(len);
+    }
+
+    /// Removes all elements from the buffer.
+    pub fn clear(&mut self) {
+        self.values.clear();
+    }
+
+    /// Removes and returns the last element in the buffer.
+    pub fn pop(&mut self) -> Option<T> {
+        self.values.pop()
+    }
+
+    pub fn values(&self) -> &Vec<T> {
+        &self.values
+    }
+
+    pub fn values_mut(&mut self) -> &mut Vec<T> {
+        &mut self.values
+    }
+}
+
+impl<T> RawBufferVec<T>
+where
+    T: NoUninit + Default,
+{
+    pub fn grow_set(&mut self, index: u32, value: T) {
+        while index as usize + 1 > self.len() {
+            self.values.push(T::default());
+        }
+        self.values[index as usize] = value;
+    }
+}
+
+impl<T: NoUninit> Extend<T> for RawBufferVec<T> {
+    #[inline]
+    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
+        self.values.extend(iter);
+    }
+}
+
+/// Like [`RawBufferVec`], but doesn't require that the data type `T` be
+/// [`NoUninit`].
+///
+/// This is a high-performance data structure that you should use whenever
+/// possible if your data is more complex than is suitable for [`RawBufferVec`].
+/// The [`ShaderType`] trait from the `encase` library is used to ensure that
+/// the data is correctly aligned for use by the GPU.
+///
+/// For performance reasons, unlike [`RawBufferVec`], this type doesn't allow
+/// CPU access to the data after it's been added via [`BufferVec::push`]. If you
+/// need CPU access to the data, consider another type, such as
+/// [`StorageBuffer`][super::StorageBuffer].
+///
+/// Other options for storing GPU-accessible data are:
+/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
+/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
+/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
+/// * [`RawBufferVec`]
+/// * [`StorageBuffer`](crate::render_resource::StorageBuffer)
+/// * [`Texture`](crate::render_resource::Texture)
+/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
+pub struct BufferVec<T>
+where
+    T: ShaderType + WriteInto,
+{
+    data: Vec<u8>,
+    buffer: Option<Buffer>,
+    capacity: usize,
+    buffer_usage: BufferUsages,
+    label: Option<String>,
+    label_changed: bool,
+    phantom: PhantomData<T>,
+}
+
+impl<T> BufferVec<T>
+where
+    T: ShaderType + WriteInto,
+{
+    /// Creates a new [`BufferVec`] with the given [`BufferUsages`].
+    pub const fn new(buffer_usage: BufferUsages) -> Self {
+        Self {
+            data: vec![],
+            buffer: None,
+            capacity: 0,
+            buffer_usage,
+            label: None,
+            label_changed: false,
+            phantom: PhantomData,
+        }
+    }
+
+    /// Returns a handle to the buffer, if the data has been uploaded.
+    #[inline]
+    pub fn buffer(&self) -> Option<&Buffer> {
+        self.buffer.as_ref()
+    }
+
+    /// Returns the binding for the buffer if the data has been uploaded.
+    #[inline]
+    pub fn binding(&self) -> Option<BindingResource<'_>> {
+        Some(BindingResource::Buffer(
+            self.buffer()?.as_entire_buffer_binding(),
+        ))
+    }
+
+    /// Returns the amount of space that the GPU will use before reallocating.
+    #[inline]
+    pub fn capacity(&self) -> usize {
+        self.capacity
+    }
+
+    /// Returns the number of items that have been pushed to this buffer.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.data.len() / u64::from(T::min_size()) as usize
+    }
+
+    /// Returns true if the buffer is empty.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.data.is_empty()
+    }
+
+    /// Adds a new value and returns its index.
+    pub fn push(&mut self, value: T) -> usize {
+        let element_size = u64::from(T::min_size()) as usize;
+        let offset = self.data.len();
+
+        // TODO: Consider using unsafe code to push uninitialized, to prevent
+        // the zeroing. It shows up in profiles.
+        self.data.extend(iter::repeat_n(0, element_size));
+
+        // Take a slice of the new data for `write_into` to use. This is
+        // important: it hoists the bounds check up here so that the compiler
+        // can eliminate all the bounds checks that `write_into` will emit.
+        let mut dest = &mut self.data[offset..(offset + element_size)];
+        value.write_into(&mut Writer::new(&value, &mut dest, 0).unwrap());
+
+        offset / u64::from(T::min_size()) as usize
+    }
+
+    /// Changes the debugging label of the buffer.
+    ///
+    /// The next time the buffer is updated (via [`Self::reserve`]), Bevy will inform
+    /// the driver of the new label.
+    pub fn set_label(&mut self, label: Option<&str>) {
+        let label = label.map(str::to_string);
+
+        if label != self.label {
+            self.label_changed = true;
+        }
+
+        self.label = label;
+    }
+
+    /// Returns the label
+    pub fn get_label(&self) -> Option<&str> {
+        self.label.as_deref()
+    }
+
+    /// Creates a [`Buffer`] on the [`RenderDevice`] with size
+    /// at least `size_of::<T>() * capacity`, unless such a buffer already exists.
+    ///
+    /// If a [`Buffer`] exists, but is too small, references to it will be discarded,
+    /// and a new [`Buffer`] will be created. Any previously created [`Buffer`]s
+    /// that are no longer referenced will be deleted by the [`RenderDevice`]
+    /// once it is done using them (typically 1-2 frames).
+    ///
+    /// In addition to any [`BufferUsages`] provided when
+    /// the `BufferVec` was created, the buffer on the [`RenderDevice`]
+    /// is marked as [`BufferUsages::COPY_DST`](BufferUsages).
+    pub fn reserve(&mut self, capacity: usize, device: &RenderDevice) {
+        if capacity <= self.capacity && !self.label_changed {
+            return;
+        }
+
+        self.capacity = capacity;
+        let size = u64::from(T::min_size()) as usize * capacity;
+        self.buffer = Some(device.create_buffer(&wgpu::BufferDescriptor {
+            label: self.label.as_deref(),
+            size: size as BufferAddress,
+            usage: BufferUsages::COPY_DST | self.buffer_usage,
+            mapped_at_creation: false,
+        }));
+        self.label_changed = false;
+    }
+
+    /// Queues writing of data from system RAM to VRAM using the [`RenderDevice`]
+    /// and the provided [`RenderQueue`].
+    ///
+    /// Before queuing the write, a [`reserve`](BufferVec::reserve) operation is
+    /// executed.
+    pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
+        if self.data.is_empty() {
+            return;
+        }
+
+        self.reserve(self.data.len() / u64::from(T::min_size()) as usize, device);
+
+        let Some(buffer) = &self.buffer else { return };
+        queue.write_buffer(buffer, 0, &self.data);
+    }
+
+    /// Queues writing of data from system RAM to VRAM using the [`RenderDevice`]
+    /// and the provided [`RenderQueue`].
+    ///
+    /// If the buffer is not initialized on the GPU or the range is bigger than the capacity it will
+    /// return an error. You'll need to either reserve a new buffer which will lose data on the GPU
+    /// or create a new buffer and copy the old data to it.
+    ///
+    /// This will only write the data contained in the given range. It is useful if you only want
+    /// to update a part of the buffer.
+    pub fn write_buffer_range(
+        &mut self,
+        render_queue: &RenderQueue,
+        range: core::ops::Range<usize>,
+    ) -> Result<(), WriteBufferRangeError> {
+        if self.data.is_empty() {
+            return Err(WriteBufferRangeError::NoValuesToUpload);
+        }
+        let item_size = u64::from(T::min_size()) as usize;
+        if range.end > item_size * self.capacity {
+            return Err(WriteBufferRangeError::RangeBiggerThanBuffer);
+        }
+        if let Some(buffer) = &self.buffer {
+            let bytes = &self.data[range.start..range.end];
+            render_queue.write_buffer(buffer, (range.start * item_size) as u64, bytes);
+            Ok(())
+        } else {
+            Err(WriteBufferRangeError::BufferNotInitialized)
+        }
+    }
+
+    /// Reduces the length of the buffer.
+    pub fn truncate(&mut self, len: usize) {
+        self.data.truncate(u64::from(T::min_size()) as usize * len);
+    }
+
+    /// Removes all elements from the buffer.
+    pub fn clear(&mut self) {
+        self.data.clear();
+    }
+}
+
+/// Like a [`BufferVec`], but only reserves space on the GPU for elements
+/// instead of initializing them CPU-side.
+///
+/// This type is useful when you're accumulating "output slots" for a GPU
+/// compute shader to write into.
+///
+/// The type `T` need not be [`NoUninit`], unlike [`RawBufferVec`]; it only has to
+/// be [`GpuArrayBufferable`].
+pub struct UninitBufferVec<T>
+where
+    T: GpuArrayBufferable,
+{
+    buffer: Option<Buffer>,
+    len: usize,
+    capacity: usize,
+    item_size: usize,
+    buffer_usage: BufferUsages,
+    label: Option<String>,
+    label_changed: bool,
+    phantom: PhantomData<T>,
+}
+
+impl<T> UninitBufferVec<T>
+where
+    T: GpuArrayBufferable,
+{
+    /// Creates a new [`UninitBufferVec`] with the given [`BufferUsages`].
+    pub const fn new(buffer_usage: BufferUsages) -> Self {
+        Self {
+            len: 0,
+            buffer: None,
+            capacity: 0,
+            item_size: size_of::<T>(),
+            buffer_usage,
+            label: None,
+            label_changed: false,
+            phantom: PhantomData,
+        }
+    }
+
+    /// Returns the buffer, if allocated.
+    #[inline]
+    pub fn buffer(&self) -> Option<&Buffer> {
+        self.buffer.as_ref()
+    }
+
+    /// Returns the binding for the buffer if the data has been uploaded.
+    #[inline]
+    pub fn binding(&self) -> Option<BindingResource<'_>> {
+        Some(BindingResource::Buffer(
+            self.buffer()?.as_entire_buffer_binding(),
+        ))
+    }
+
+    /// Reserves space for one more element in the buffer and returns its index.
+    pub fn add(&mut self) -> usize {
+        self.add_multiple(1)
+    }
+
+    /// Reserves space for the given number of elements in the buffer and
+    /// returns the index of the first one.
+    pub fn add_multiple(&mut self, count: usize) -> usize {
+        let index = self.len;
+        self.len += count;
+        index
+    }
+
+    /// Returns true if no elements have been added to this [`UninitBufferVec`].
+    pub fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+
+    /// Removes all elements from the buffer.
+    pub fn clear(&mut self) {
+        self.len = 0;
+    }
+
+    /// Returns the length of the buffer.
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Materializes the buffer on the GPU with space for `capacity` elements.
+    ///
+    /// If the buffer is already big enough, this function doesn't reallocate
+    /// the buffer.
+    pub fn reserve(&mut self, capacity: usize, device: &RenderDevice) {
+        if capacity <= self.capacity && !self.label_changed {
+            return;
+        }
+
+        self.capacity = capacity;
+        let size = self.item_size * capacity;
+        self.buffer = Some(device.create_buffer(&wgpu::BufferDescriptor {
+            label: self.label.as_deref(),
+            size: size as BufferAddress,
+            usage: BufferUsages::COPY_DST | self.buffer_usage,
+            mapped_at_creation: false,
+        }));
+
+        self.label_changed = false;
+    }
+
+    /// Materializes the buffer on the GPU, with an appropriate size for the
+    /// elements that have been pushed so far.
+    pub fn write_buffer(&mut self, device: &RenderDevice) {
+        if !self.is_empty() {
+            self.reserve(self.len, device);
+        }
+    }
+}
+
+/// Error returned when `write_buffer_range` fails
+///
+/// See [`RawBufferVec::write_buffer_range`] [`BufferVec::write_buffer_range`]
+#[derive(Debug, Eq, PartialEq, Copy, Clone, Error)]
+pub enum WriteBufferRangeError {
+    #[error("the range is bigger than the capacity of the buffer")]
+    RangeBiggerThanBuffer,
+    #[error("the gpu buffer is not initialized")]
+    BufferNotInitialized,
+    #[error("there are no values to upload")]
+    NoValuesToUpload,
+}
--- a/crates/libmarathon/src/render/render_resource/gpu_array_buffer.rs
+++ b/crates/libmarathon/src/render/render_resource/gpu_array_buffer.rs
@@ -0,0 +1,118 @@
+use super::{
+    binding_types::{storage_buffer_read_only, uniform_buffer_sized},
+    BindGroupLayoutEntryBuilder, BufferVec,
+};
+use crate::render::{
+    render_resource::batched_uniform_buffer::BatchedUniformBuffer,
+    renderer::{RenderDevice, RenderQueue},
+};
+use bevy_ecs::{prelude::Component, resource::Resource};
+use core::marker::PhantomData;
+use encase::{private::WriteInto, ShaderSize, ShaderType};
+use nonmax::NonMaxU32;
+use wgpu::{BindingResource, BufferUsages};
+
+/// Trait for types able to go in a [`GpuArrayBuffer`].
+pub trait GpuArrayBufferable: ShaderType + ShaderSize + WriteInto + Clone {}
+
+impl<T: ShaderType + ShaderSize + WriteInto + Clone> GpuArrayBufferable for T {}
+
+/// Stores an array of elements to be transferred to the GPU and made accessible to shaders as a read-only array.
+///
+/// On platforms that support storage buffers, this is equivalent to
+/// [`BufferVec<T>`]. Otherwise, this falls back to a dynamic offset
+/// uniform buffer with the largest array of T that fits within a uniform buffer
+/// binding (within reasonable limits).
+///
+/// Other options for storing GPU-accessible data are:
+/// * [`BufferVec`]
+/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
+/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
+/// * [`RawBufferVec`](crate::render_resource::RawBufferVec)
+/// * [`StorageBuffer`](crate::render_resource::StorageBuffer)
+/// * [`Texture`](crate::render_resource::Texture)
+/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
+#[derive(Resource)]
+pub enum GpuArrayBuffer<T: GpuArrayBufferable> {
+    Uniform(BatchedUniformBuffer<T>),
+    Storage(BufferVec<T>),
+}
+
+impl<T: GpuArrayBufferable> GpuArrayBuffer<T> {
+    pub fn new(device: &RenderDevice) -> Self {
+        let limits = device.limits();
+        if limits.max_storage_buffers_per_shader_stage == 0 {
+            GpuArrayBuffer::Uniform(BatchedUniformBuffer::new(&limits))
+        } else {
+            GpuArrayBuffer::Storage(BufferVec::new(BufferUsages::STORAGE))
+        }
+    }
+
+    pub fn clear(&mut self) {
+        match self {
+            GpuArrayBuffer::Uniform(buffer) => buffer.clear(),
+            GpuArrayBuffer::Storage(buffer) => buffer.clear(),
+        }
+    }
+
+    pub fn push(&mut self, value: T) -> GpuArrayBufferIndex<T> {
+        match self {
+            GpuArrayBuffer::Uniform(buffer) => buffer.push(value),
+            GpuArrayBuffer::Storage(buffer) => {
+                let index = buffer.push(value) as u32;
+                GpuArrayBufferIndex {
+                    index,
+                    dynamic_offset: None,
+                    element_type: PhantomData,
+                }
+            }
+        }
+    }
+
+    pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
+        match self {
+            GpuArrayBuffer::Uniform(buffer) => buffer.write_buffer(device, queue),
+            GpuArrayBuffer::Storage(buffer) => buffer.write_buffer(device, queue),
+        }
+    }
+
+    pub fn binding_layout(device: &RenderDevice) -> BindGroupLayoutEntryBuilder {
+        if device.limits().max_storage_buffers_per_shader_stage == 0 {
+            uniform_buffer_sized(
+                true,
+                // BatchedUniformBuffer uses a MaxCapacityArray that is runtime-sized, so we use
+                // None here and let wgpu figure out the size.
+                None,
+            )
+        } else {
+            storage_buffer_read_only::<T>(false)
+        }
+    }
+
+    pub fn binding(&self) -> Option<BindingResource<'_>> {
+        match self {
+            GpuArrayBuffer::Uniform(buffer) => buffer.binding(),
+            GpuArrayBuffer::Storage(buffer) => buffer.binding(),
+        }
+    }
+
+    pub fn batch_size(device: &RenderDevice) -> Option<u32> {
+        let limits = device.limits();
+        if limits.max_storage_buffers_per_shader_stage == 0 {
+            Some(BatchedUniformBuffer::<T>::batch_size(&limits) as u32)
+        } else {
+            None
+        }
+    }
+}
+
+/// An index into a [`GpuArrayBuffer`] for a given element.
+#[derive(Component, Clone)]
+pub struct GpuArrayBufferIndex<T: GpuArrayBufferable> {
+    /// The index to use in a shader into the array.
+    pub index: u32,
+    /// The dynamic offset to use when setting the bind group in a pass.
+    /// Only used on platforms that don't support storage buffers.
+    pub dynamic_offset: Option<NonMaxU32>,
+    pub element_type: PhantomData<T>,
+}
--- a/crates/libmarathon/src/render/render_resource/mod.rs
+++ b/crates/libmarathon/src/render/render_resource/mod.rs
@@ -0,0 +1,75 @@
+mod batched_uniform_buffer;
+mod bind_group;
+mod bind_group_entries;
+mod bind_group_layout;
+mod bind_group_layout_entries;
+mod bindless;
+mod buffer;
+mod buffer_vec;
+mod gpu_array_buffer;
+mod pipeline;
+mod pipeline_cache;
+mod pipeline_specializer;
+pub mod resource_macros;
+mod specializer;
+mod storage_buffer;
+mod texture;
+mod uniform_buffer;
+
+pub use bind_group::*;
+pub use bind_group_entries::*;
+pub use bind_group_layout::*;
+pub use bind_group_layout_entries::*;
+pub use bindless::*;
+pub use buffer::*;
+pub use buffer_vec::*;
+pub use gpu_array_buffer::*;
+pub use pipeline::*;
+pub use pipeline_cache::*;
+pub use pipeline_specializer::*;
+pub use specializer::*;
+pub use storage_buffer::*;
+pub use texture::*;
+pub use uniform_buffer::*;
+
+// TODO: decide where re-exports should go
+pub use wgpu::{
+    util::{
+        BufferInitDescriptor, DispatchIndirectArgs, DrawIndexedIndirectArgs, DrawIndirectArgs,
+        TextureDataOrder,
+    },
+    AccelerationStructureFlags, AccelerationStructureGeometryFlags,
+    AccelerationStructureUpdateMode, AdapterInfo as WgpuAdapterInfo, AddressMode, AstcBlock,
+    AstcChannel, BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor,
+    BindGroupLayoutEntry, BindingResource, BindingType, Blas, BlasBuildEntry, BlasGeometries,
+    BlasGeometrySizeDescriptors, BlasTriangleGeometry, BlasTriangleGeometrySizeDescriptor,
+    BlendComponent, BlendFactor, BlendOperation, BlendState, BufferAddress, BufferAsyncError,
+    BufferBinding, BufferBindingType, BufferDescriptor, BufferSize, BufferUsages, ColorTargetState,
+    ColorWrites, CommandEncoder, CommandEncoderDescriptor, CompareFunction, ComputePass,
+    ComputePassDescriptor, ComputePipelineDescriptor as RawComputePipelineDescriptor,
+    CreateBlasDescriptor, CreateTlasDescriptor, DepthBiasState, DepthStencilState, DownlevelFlags,
+    Extent3d, Face, Features as WgpuFeatures, FilterMode, FragmentState as RawFragmentState,
+    FrontFace, ImageSubresourceRange, IndexFormat, Limits as WgpuLimits, LoadOp, MapMode,
+    MultisampleState, Operations, Origin3d, PipelineCompilationOptions, PipelineLayout,
+    PipelineLayoutDescriptor, PollType, PolygonMode, PrimitiveState, PrimitiveTopology,
+    PushConstantRange, RenderPassColorAttachment, RenderPassDepthStencilAttachment,
+    RenderPassDescriptor, RenderPipelineDescriptor as RawRenderPipelineDescriptor,
+    Sampler as WgpuSampler, SamplerBindingType, SamplerBindingType as WgpuSamplerBindingType,
+    SamplerDescriptor, ShaderModule, ShaderModuleDescriptor, ShaderSource, ShaderStages,
+    StencilFaceState, StencilOperation, StencilState, StorageTextureAccess, StoreOp,
+    TexelCopyBufferInfo, TexelCopyBufferLayout, TexelCopyTextureInfo, TextureAspect,
+    TextureDescriptor, TextureDimension, TextureFormat, TextureFormatFeatureFlags,
+    TextureFormatFeatures, TextureSampleType, TextureUsages, TextureView as WgpuTextureView,
+    TextureViewDescriptor, TextureViewDimension, Tlas, TlasInstance, VertexAttribute,
+    VertexBufferLayout as RawVertexBufferLayout, VertexFormat, VertexState as RawVertexState,
+    VertexStepMode, COPY_BUFFER_ALIGNMENT,
+};
+
+pub mod encase {
+    pub use bevy_encase_derive::ShaderType;
+    pub use encase::*;
+}
+
+pub use self::encase::{ShaderSize, ShaderType};
+
+pub use naga::ShaderStage;
--- a/crates/libmarathon/src/render/render_resource/pipeline.rs
+++ b/crates/libmarathon/src/render/render_resource/pipeline.rs
@@ -0,0 +1,183 @@
+use super::empty_bind_group_layout;
+use crate::render::renderer::WgpuWrapper;
+use crate::render::{define_atomic_id, render_resource::BindGroupLayout};
+use std::borrow::Cow;
+use bevy_asset::Handle;
+use bevy_mesh::VertexBufferLayout;
+use bevy_shader::{Shader, ShaderDefVal};
+use core::iter;
+use core::ops::Deref;
+use thiserror::Error;
+use wgpu::{
+    ColorTargetState, DepthStencilState, MultisampleState, PrimitiveState, PushConstantRange,
+};
+
+define_atomic_id!(RenderPipelineId);
+
+/// A [`RenderPipeline`] represents a graphics pipeline and its stages (shaders), bindings and vertex buffers.
+///
+/// May be converted from and dereferences to a wgpu [`RenderPipeline`](wgpu::RenderPipeline).
+/// Can be created via [`RenderDevice::create_render_pipeline`](crate::renderer::RenderDevice::create_render_pipeline).
+#[derive(Clone, Debug)]
+pub struct RenderPipeline {
+    id: RenderPipelineId,
+    value: WgpuWrapper<wgpu::RenderPipeline>,
+}
+
+impl RenderPipeline {
+    #[inline]
+    pub fn id(&self) -> RenderPipelineId {
+        self.id
+    }
+}
+
+impl From<wgpu::RenderPipeline> for RenderPipeline {
+    fn from(value: wgpu::RenderPipeline) -> Self {
+        RenderPipeline {
+            id: RenderPipelineId::new(),
+            value: WgpuWrapper::new(value),
+        }
+    }
+}
+
+impl Deref for RenderPipeline {
+    type Target = wgpu::RenderPipeline;
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        &self.value
+    }
+}
+
+define_atomic_id!(ComputePipelineId);
+
+/// A [`ComputePipeline`] represents a compute pipeline and its single shader stage.
+///
+/// May be converted from and dereferences to a wgpu [`ComputePipeline`](wgpu::ComputePipeline).
+/// Can be created via [`RenderDevice::create_compute_pipeline`](crate::renderer::RenderDevice::create_compute_pipeline).
+#[derive(Clone, Debug)]
+pub struct ComputePipeline {
+    id: ComputePipelineId,
+    value: WgpuWrapper<wgpu::ComputePipeline>,
+}
+
+impl ComputePipeline {
+    /// Returns the [`ComputePipelineId`].
+    #[inline]
+    pub fn id(&self) -> ComputePipelineId {
+        self.id
+    }
+}
+
+impl From<wgpu::ComputePipeline> for ComputePipeline {
+    fn from(value: wgpu::ComputePipeline) -> Self {
+        ComputePipeline {
+            id: ComputePipelineId::new(),
+            value: WgpuWrapper::new(value),
+        }
+    }
+}
+
+impl Deref for ComputePipeline {
+    type Target = wgpu::ComputePipeline;
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        &self.value
+    }
+}
+
+/// Describes a render (graphics) pipeline.
+#[derive(Clone, Debug, PartialEq, Default)]
+pub struct RenderPipelineDescriptor {
+    /// Debug label of the pipeline. This will show up in graphics debuggers for easy identification.
+    pub label: Option<Cow<'static, str>>,
+    /// The layout of bind groups for this pipeline.
+    pub layout: Vec<BindGroupLayout>,
+    /// The push constant ranges for this pipeline.
+    /// Supply an empty vector if the pipeline doesn't use push constants.
+    pub push_constant_ranges: Vec<PushConstantRange>,
+    /// The compiled vertex stage, its entry point, and the input buffers layout.
+    pub vertex: VertexState,
+    /// The properties of the pipeline at the primitive assembly and rasterization level.
+    pub primitive: PrimitiveState,
+    /// The effect of draw calls on the depth and stencil aspects of the output target, if any.
+    pub depth_stencil: Option<DepthStencilState>,
+    /// The multi-sampling properties of the pipeline.
+    pub multisample: MultisampleState,
+    /// The compiled fragment stage, its entry point, and the color targets.
+    pub fragment: Option<FragmentState>,
+    /// Whether to zero-initialize workgroup memory by default. If you're not sure, set this to true.
+    /// If this is false, reading from workgroup variables before writing to them will result in garbage values.
+    pub zero_initialize_workgroup_memory: bool,
+}
+
+#[derive(Copy, Clone, Debug, Error)]
+#[error("RenderPipelineDescriptor has no FragmentState configured")]
+pub struct NoFragmentStateError;
+
+impl RenderPipelineDescriptor {
+    pub fn fragment_mut(&mut self) -> Result<&mut FragmentState, NoFragmentStateError> {
+        self.fragment.as_mut().ok_or(NoFragmentStateError)
+    }
+
+    pub fn set_layout(&mut self, index: usize, layout: BindGroupLayout) {
+        filling_set_at(&mut self.layout, index, empty_bind_group_layout(), layout);
+    }
+}
+
+#[derive(Clone, Debug, Eq, PartialEq, Default)]
+pub struct VertexState {
+    /// The compiled shader module for this stage.
+    pub shader: Handle<Shader>,
+    pub shader_defs: Vec<ShaderDefVal>,
+    /// The name of the entry point in the compiled shader, or `None` if the default entry point
+    /// is used.
+    pub entry_point: Option<Cow<'static, str>>,
+    /// The format of any vertex buffers used with this pipeline.
+    pub buffers: Vec<VertexBufferLayout>,
+}
+
+/// Describes the fragment process in a render pipeline.
+#[derive(Clone, Debug, PartialEq, Eq, Default)]
+pub struct FragmentState {
+    /// The compiled shader module for this stage.
+    pub shader: Handle<Shader>,
+    pub shader_defs: Vec<ShaderDefVal>,
+    /// The name of the entry point in the compiled shader, or `None` if the default entry point
+    /// is used.
+    pub entry_point: Option<Cow<'static, str>>,
+    /// The color state of the render targets.
+    pub targets: Vec<Option<ColorTargetState>>,
+}
+
+impl FragmentState {
+    pub fn set_target(&mut self, index: usize, target: ColorTargetState) {
+        filling_set_at(&mut self.targets, index, None, Some(target));
+    }
+}
+
+/// Describes a compute pipeline.
+#[derive(Clone, Debug, PartialEq, Eq, Default)]
+pub struct ComputePipelineDescriptor {
+    pub label: Option<Cow<'static, str>>,
+    pub layout: Vec<BindGroupLayout>,
+    pub push_constant_ranges: Vec<PushConstantRange>,
+    /// The compiled shader module for this stage.
+    pub shader: Handle<Shader>,
+    pub shader_defs: Vec<ShaderDefVal>,
+    /// The name of the entry point in the compiled shader, or `None` if the default entry point
+    /// is used.
+    pub entry_point: Option<Cow<'static, str>>,
+    /// Whether to zero-initialize workgroup memory by default. If you're not sure, set this to true.
+    /// If this is false, reading from workgroup variables before writing to them will result in garbage values.
+    pub zero_initialize_workgroup_memory: bool,
+}
+
+// utility function to set a value at the specified index, extending with
+// a filler value if the index is out of bounds.
+fn filling_set_at<T: Clone>(vec: &mut Vec<T>, index: usize, filler: T, value: T) {
+    let num_to_fill = (index + 1).saturating_sub(vec.len());
+    vec.extend(iter::repeat_n(filler, num_to_fill));
+    vec[index] = value;
+}
--- a/crates/libmarathon/src/render/render_resource/pipeline_cache.rs
+++ b/crates/libmarathon/src/render/render_resource/pipeline_cache.rs
@@ -0,0 +1,831 @@
+use crate::render::{
+    render_resource::*,
+    renderer::{RenderAdapter, RenderDevice, WgpuWrapper},
+    Extract,
+};
+use std::{borrow::Cow, sync::Arc};
+use bevy_asset::{AssetEvent, AssetId, Assets, Handle};
+use bevy_ecs::{
+    message::MessageReader,
+    resource::Resource,
+    system::{Res, ResMut},
+};
+use bevy_platform::collections::{HashMap, HashSet};
+use bevy_shader::{
+    CachedPipelineId, PipelineCacheError, Shader, ShaderCache, ShaderCacheSource, ShaderDefVal,
+    ValidateShader,
+};
+use bevy_tasks::Task;
+use bevy_utils::default;
+use core::{future::Future, hash::Hash, mem};
+use std::sync::{Mutex, PoisonError};
+use tracing::error;
+use wgpu::{PipelineCompilationOptions, VertexBufferLayout as RawVertexBufferLayout};
+
+/// A descriptor for a [`Pipeline`].
+///
+/// Used to store a heterogenous collection of render and compute pipeline descriptors together.
+#[derive(Debug)]
+pub enum PipelineDescriptor {
+    RenderPipelineDescriptor(Box<RenderPipelineDescriptor>),
+    ComputePipelineDescriptor(Box<ComputePipelineDescriptor>),
+}
+
+/// A pipeline defining the data layout and shader logic for a specific GPU task.
+///
+/// Used to store a heterogenous collection of render and compute pipelines together.
+#[derive(Debug)]
+pub enum Pipeline {
+    RenderPipeline(RenderPipeline),
+    ComputePipeline(ComputePipeline),
+}
+
+/// Index of a cached render pipeline in a [`PipelineCache`].
+#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, PartialOrd, Ord)]
+pub struct CachedRenderPipelineId(CachedPipelineId);
+
+impl CachedRenderPipelineId {
+    /// An invalid cached render pipeline index, often used to initialize a variable.
+    pub const INVALID: Self = CachedRenderPipelineId(usize::MAX);
+
+    #[inline]
+    pub fn id(&self) -> usize {
+        self.0
+    }
+}
+
+/// Index of a cached compute pipeline in a [`PipelineCache`].
+#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
+pub struct CachedComputePipelineId(CachedPipelineId);
+
+impl CachedComputePipelineId {
+    /// An invalid cached compute pipeline index, often used to initialize a variable.
+    pub const INVALID: Self = CachedComputePipelineId(usize::MAX);
+
+    #[inline]
+    pub fn id(&self) -> usize {
+        self.0
+    }
+}
+
+pub struct CachedPipeline {
+    pub descriptor: PipelineDescriptor,
+    pub state: CachedPipelineState,
+}
+
+/// State of a cached pipeline inserted into a [`PipelineCache`].
+#[cfg_attr(
+    not(target_arch = "wasm32"),
+    expect(
+        clippy::large_enum_variant,
+        reason = "See https://github.com/bevyengine/bevy/issues/19220"
+    )
+)]
+#[derive(Debug)]
+pub enum CachedPipelineState {
+    /// The pipeline GPU object is queued for creation.
+    Queued,
+    /// The pipeline GPU object is being created.
+    Creating(Task<Result<Pipeline, PipelineCacheError>>),
+    /// The pipeline GPU object was created successfully and is available (allocated on the GPU).
+    Ok(Pipeline),
+    /// An error occurred while trying to create the pipeline GPU object.
+    Err(PipelineCacheError),
+}
+
+impl CachedPipelineState {
+    /// Convenience method to "unwrap" a pipeline state into its underlying GPU object.
+    ///
+    /// # Returns
+    ///
+    /// The method returns the allocated pipeline GPU object.
+    ///
+    /// # Panics
+    ///
+    /// This method panics if the pipeline GPU object is not available, either because it is
+    /// pending creation or because an error occurred while attempting to create GPU object.
+    pub fn unwrap(&self) -> &Pipeline {
+        match self {
+            CachedPipelineState::Ok(pipeline) => pipeline,
+            CachedPipelineState::Queued => {
+                panic!("Pipeline has not been compiled yet. It is still in the 'Queued' state.")
+            }
+            CachedPipelineState::Creating(..) => {
+                panic!("Pipeline has not been compiled yet. It is still in the 'Creating' state.")
+            }
+            CachedPipelineState::Err(err) => panic!("{}", err),
+        }
+    }
+}
+
+type LayoutCacheKey = (Vec<BindGroupLayoutId>, Vec<PushConstantRange>);
+#[derive(Default)]
+struct LayoutCache {
+    layouts: HashMap<LayoutCacheKey, Arc<WgpuWrapper<PipelineLayout>>>,
+}
+
+impl LayoutCache {
+    fn get(
+        &mut self,
+        render_device: &RenderDevice,
+        bind_group_layouts: &[BindGroupLayout],
+        push_constant_ranges: Vec<PushConstantRange>,
+    ) -> Arc<WgpuWrapper<PipelineLayout>> {
+        let bind_group_ids = bind_group_layouts.iter().map(BindGroupLayout::id).collect();
+        self.layouts
+            .entry((bind_group_ids, push_constant_ranges))
+            .or_insert_with_key(|(_, push_constant_ranges)| {
+                let bind_group_layouts = bind_group_layouts
+                    .iter()
+                    .map(BindGroupLayout::value)
+                    .collect::<Vec<_>>();
+                Arc::new(WgpuWrapper::new(render_device.create_pipeline_layout(
+                    &PipelineLayoutDescriptor {
+                        bind_group_layouts: &bind_group_layouts,
+                        push_constant_ranges,
+                        ..default()
+                    },
+                )))
+            })
+            .clone()
+    }
+}
+
+#[expect(
+    clippy::result_large_err,
+    reason = "See https://github.com/bevyengine/bevy/issues/19220"
+)]
+fn load_module(
+    render_device: &RenderDevice,
+    shader_source: ShaderCacheSource,
+    validate_shader: &ValidateShader,
+) -> Result<WgpuWrapper<ShaderModule>, PipelineCacheError> {
+    let shader_source = match shader_source {
+        #[cfg(feature = "shader_format_spirv")]
+        ShaderCacheSource::SpirV(data) => wgpu::util::make_spirv(data),
+        #[cfg(not(feature = "shader_format_spirv"))]
+        ShaderCacheSource::SpirV(_) => {
+            unimplemented!("Enable feature \"shader_format_spirv\" to use SPIR-V shaders")
+        }
+        ShaderCacheSource::Wgsl(src) => ShaderSource::Wgsl(Cow::Owned(src)),
+        #[cfg(not(feature = "decoupled_naga"))]
+        ShaderCacheSource::Naga(src) => ShaderSource::Naga(Cow::Owned(src)),
+    };
+    let module_descriptor = ShaderModuleDescriptor {
+        label: None,
+        source: shader_source,
+    };
+
+    render_device
+        .wgpu_device()
+        .push_error_scope(wgpu::ErrorFilter::Validation);
+
+    let shader_module = WgpuWrapper::new(match validate_shader {
+        ValidateShader::Enabled => {
+            render_device.create_and_validate_shader_module(module_descriptor)
+        }
+        // SAFETY: we are interfacing with shader code, which may contain undefined behavior,
+        // such as indexing out of bounds.
+        // The checks required are prohibitively expensive and a poor default for game engines.
+        ValidateShader::Disabled => unsafe {
+            render_device.create_shader_module(module_descriptor)
+        },
+    });
+
+    let error = render_device.wgpu_device().pop_error_scope();
+
+    // `now_or_never` will return Some if the future is ready and None otherwise.
+    // On native platforms, wgpu will yield the error immediately while on wasm it may take longer since the browser APIs are asynchronous.
+    // So to keep the complexity of the ShaderCache low, we will only catch this error early on native platforms,
+    // and on wasm the error will be handled by wgpu and crash the application.
+    if let Some(Some(wgpu::Error::Validation { description, .. })) =
+        bevy_tasks::futures::now_or_never(error)
+    {
+        return Err(PipelineCacheError::CreateShaderModule(description));
+    }
+
+    Ok(shader_module)
+}
+
+/// Cache for render and compute pipelines.
+///
+/// The cache stores existing render and compute pipelines allocated on the GPU, as well as
+/// pending creation. Pipelines inserted into the cache are identified by a unique ID, which
+/// can be used to retrieve the actual GPU object once it's ready. The creation of the GPU
+/// pipeline object is deferred to the [`RenderSystems::Render`] step, just before the render
+/// graph starts being processed, as this requires access to the GPU.
+///
+/// Note that the cache does not perform automatic deduplication of identical pipelines. It is
+/// up to the user not to insert the same pipeline twice to avoid wasting GPU resources.
+///
+/// [`RenderSystems::Render`]: crate::RenderSystems::Render
+#[derive(Resource)]
+pub struct PipelineCache {
+    layout_cache: Arc<Mutex<LayoutCache>>,
+    shader_cache: Arc<Mutex<ShaderCache<WgpuWrapper<ShaderModule>, RenderDevice>>>,
+    device: RenderDevice,
+    pipelines: Vec<CachedPipeline>,
+    waiting_pipelines: HashSet<CachedPipelineId>,
+    new_pipelines: Mutex<Vec<CachedPipeline>>,
+    global_shader_defs: Vec<ShaderDefVal>,
+    /// If `true`, disables asynchronous pipeline compilation.
+    /// This has no effect on macOS, wasm, or without the `multi_threaded` feature.
+    synchronous_pipeline_compilation: bool,
+}
+
+impl PipelineCache {
+    /// Returns an iterator over the pipelines in the pipeline cache.
+    pub fn pipelines(&self) -> impl Iterator<Item = &CachedPipeline> {
+        self.pipelines.iter()
+    }
+
+    /// Returns a iterator of the IDs of all currently waiting pipelines.
+    pub fn waiting_pipelines(&self) -> impl Iterator<Item = CachedPipelineId> + '_ {
+        self.waiting_pipelines.iter().copied()
+    }
+
+    /// Create a new pipeline cache associated with the given render device.
+    pub fn new(
+        device: RenderDevice,
+        render_adapter: RenderAdapter,
+        synchronous_pipeline_compilation: bool,
+    ) -> Self {
+        let mut global_shader_defs = Vec::new();
+        #[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))]
+        {
+            global_shader_defs.push("NO_ARRAY_TEXTURES_SUPPORT".into());
+            global_shader_defs.push("NO_CUBE_ARRAY_TEXTURES_SUPPORT".into());
+            global_shader_defs.push("SIXTEEN_BYTE_ALIGNMENT".into());
+        }
+
+        if cfg!(target_abi = "sim") {
+            global_shader_defs.push("NO_CUBE_ARRAY_TEXTURES_SUPPORT".into());
+        }
+
+        global_shader_defs.push(ShaderDefVal::UInt(
+            String::from("AVAILABLE_STORAGE_BUFFER_BINDINGS"),
+            device.limits().max_storage_buffers_per_shader_stage,
+        ));
+
+        Self {
+            shader_cache: Arc::new(Mutex::new(ShaderCache::new(
+                device.features(),
+                render_adapter.get_downlevel_capabilities().flags,
+                load_module,
+            ))),
+            device,
+            layout_cache: default(),
+            waiting_pipelines: default(),
+            new_pipelines: default(),
+            pipelines: default(),
+            global_shader_defs,
+            synchronous_pipeline_compilation,
+        }
+    }
+
+    /// Get the state of a cached render pipeline.
+    ///
+    /// See [`PipelineCache::queue_render_pipeline()`].
+    #[inline]
+    pub fn get_render_pipeline_state(&self, id: CachedRenderPipelineId) -> &CachedPipelineState {
+        // If the pipeline id isn't in `pipelines`, it's queued in `new_pipelines`
+        self.pipelines
+            .get(id.0)
+            .map_or(&CachedPipelineState::Queued, |pipeline| &pipeline.state)
+    }
+
+    /// Get the state of a cached compute pipeline.
+    ///
+    /// See [`PipelineCache::queue_compute_pipeline()`].
+    #[inline]
+    pub fn get_compute_pipeline_state(&self, id: CachedComputePipelineId) -> &CachedPipelineState {
+        // If the pipeline id isn't in `pipelines`, it's queued in `new_pipelines`
+        self.pipelines
+            .get(id.0)
+            .map_or(&CachedPipelineState::Queued, |pipeline| &pipeline.state)
+    }
+
+    /// Get the render pipeline descriptor a cached render pipeline was inserted from.
+    ///
+    /// See [`PipelineCache::queue_render_pipeline()`].
+    ///
+    /// **Note**: Be careful calling this method. It will panic if called with a pipeline that
+    /// has been queued but has not yet been processed by [`PipelineCache::process_queue()`].
+    #[inline]
+    pub fn get_render_pipeline_descriptor(
+        &self,
+        id: CachedRenderPipelineId,
+    ) -> &RenderPipelineDescriptor {
+        match &self.pipelines[id.0].descriptor {
+            PipelineDescriptor::RenderPipelineDescriptor(descriptor) => descriptor,
+            PipelineDescriptor::ComputePipelineDescriptor(_) => unreachable!(),
+        }
+    }
+
+    /// Get the compute pipeline descriptor a cached render pipeline was inserted from.
+    ///
+    /// See [`PipelineCache::queue_compute_pipeline()`].
+    ///
+    /// **Note**: Be careful calling this method. It will panic if called with a pipeline that
+    /// has been queued but has not yet been processed by [`PipelineCache::process_queue()`].
+    #[inline]
+    pub fn get_compute_pipeline_descriptor(
+        &self,
+        id: CachedComputePipelineId,
+    ) -> &ComputePipelineDescriptor {
+        match &self.pipelines[id.0].descriptor {
+            PipelineDescriptor::RenderPipelineDescriptor(_) => unreachable!(),
+            PipelineDescriptor::ComputePipelineDescriptor(descriptor) => descriptor,
+        }
+    }
+
+    /// Try to retrieve a render pipeline GPU object from a cached ID.
+    ///
+    /// # Returns
+    ///
+    /// This method returns a successfully created render pipeline if any, or `None` if the pipeline
+    /// was not created yet or if there was an error during creation. You can check the actual creation
+    /// state with [`PipelineCache::get_render_pipeline_state()`].
+    #[inline]
+    pub fn get_render_pipeline(&self, id: CachedRenderPipelineId) -> Option<&RenderPipeline> {
+        if let CachedPipelineState::Ok(Pipeline::RenderPipeline(pipeline)) =
+            &self.pipelines.get(id.0)?.state
+        {
+            Some(pipeline)
+        } else {
+            None
+        }
+    }
+
+    /// Wait for a render pipeline to finish compiling.
+    #[inline]
+    pub fn block_on_render_pipeline(&mut self, id: CachedRenderPipelineId) {
+        if self.pipelines.len() <= id.0 {
+            self.process_queue();
+        }
+
+        let state = &mut self.pipelines[id.0].state;
+        if let CachedPipelineState::Creating(task) = state {
+            *state = match bevy_tasks::block_on(task) {
+                Ok(p) => CachedPipelineState::Ok(p),
+                Err(e) => CachedPipelineState::Err(e),
+            };
+        }
+    }
+
+    /// Try to retrieve a compute pipeline GPU object from a cached ID.
+    ///
+    /// # Returns
+    ///
+    /// This method returns a successfully created compute pipeline if any, or `None` if the pipeline
+    /// was not created yet or if there was an error during creation. You can check the actual creation
+    /// state with [`PipelineCache::get_compute_pipeline_state()`].
+    #[inline]
+    pub fn get_compute_pipeline(&self, id: CachedComputePipelineId) -> Option<&ComputePipeline> {
+        if let CachedPipelineState::Ok(Pipeline::ComputePipeline(pipeline)) =
+            &self.pipelines.get(id.0)?.state
+        {
+            Some(pipeline)
+        } else {
+            None
+        }
+    }
+
+    /// Insert a render pipeline into the cache, and queue its creation.
+    ///
+    /// The pipeline is always inserted and queued for creation. There is no attempt to deduplicate it with
+    /// an already cached pipeline.
+    ///
+    /// # Returns
+    ///
+    /// This method returns the unique render shader ID of the cached pipeline, which can be used to query
+    /// the caching state with [`get_render_pipeline_state()`] and to retrieve the created GPU pipeline once
+    /// it's ready with [`get_render_pipeline()`].
+    ///
+    /// [`get_render_pipeline_state()`]: PipelineCache::get_render_pipeline_state
+    /// [`get_render_pipeline()`]: PipelineCache::get_render_pipeline
+    pub fn queue_render_pipeline(
+        &self,
+        descriptor: RenderPipelineDescriptor,
+    ) -> CachedRenderPipelineId {
+        let mut new_pipelines = self
+            .new_pipelines
+            .lock()
+            .unwrap_or_else(PoisonError::into_inner);
+        let id = CachedRenderPipelineId(self.pipelines.len() + new_pipelines.len());
+        new_pipelines.push(CachedPipeline {
+            descriptor: PipelineDescriptor::RenderPipelineDescriptor(Box::new(descriptor)),
+            state: CachedPipelineState::Queued,
+        });
+        id
+    }
+
+    /// Insert a compute pipeline into the cache, and queue its creation.
+    ///
+    /// The pipeline is always inserted and queued for creation. There is no attempt to deduplicate it with
+    /// an already cached pipeline.
+    ///
+    /// # Returns
+    ///
+    /// This method returns the unique compute shader ID of the cached pipeline, which can be used to query
+    /// the caching state with [`get_compute_pipeline_state()`] and to retrieve the created GPU pipeline once
+    /// it's ready with [`get_compute_pipeline()`].
+    ///
+    /// [`get_compute_pipeline_state()`]: PipelineCache::get_compute_pipeline_state
+    /// [`get_compute_pipeline()`]: PipelineCache::get_compute_pipeline
+    pub fn queue_compute_pipeline(
+        &self,
+        descriptor: ComputePipelineDescriptor,
+    ) -> CachedComputePipelineId {
+        let mut new_pipelines = self
+            .new_pipelines
+            .lock()
+            .unwrap_or_else(PoisonError::into_inner);
+        let id = CachedComputePipelineId(self.pipelines.len() + new_pipelines.len());
+        new_pipelines.push(CachedPipeline {
+            descriptor: PipelineDescriptor::ComputePipelineDescriptor(Box::new(descriptor)),
+            state: CachedPipelineState::Queued,
+        });
+        id
+    }
+
+    fn set_shader(&mut self, id: AssetId<Shader>, shader: Shader) {
+        let mut shader_cache = self.shader_cache.lock().unwrap();
+        let pipelines_to_queue = shader_cache.set_shader(id, shader);
+        for cached_pipeline in pipelines_to_queue {
+            self.pipelines[cached_pipeline].state = CachedPipelineState::Queued;
+            self.waiting_pipelines.insert(cached_pipeline);
+        }
+    }
+
+    fn remove_shader(&mut self, shader: AssetId<Shader>) {
+        let mut shader_cache = self.shader_cache.lock().unwrap();
+        let pipelines_to_queue = shader_cache.remove(shader);
+        for cached_pipeline in pipelines_to_queue {
+            self.pipelines[cached_pipeline].state = CachedPipelineState::Queued;
+            self.waiting_pipelines.insert(cached_pipeline);
+        }
+    }
+
+    fn start_create_render_pipeline(
+        &mut self,
+        id: CachedPipelineId,
+        descriptor: RenderPipelineDescriptor,
+    ) -> CachedPipelineState {
+        let device = self.device.clone();
+        let shader_cache = self.shader_cache.clone();
+        let layout_cache = self.layout_cache.clone();
+
+        create_pipeline_task(
+            async move {
+                let mut shader_cache = shader_cache.lock().unwrap();
+                let mut layout_cache = layout_cache.lock().unwrap();
+
+                let vertex_module = match shader_cache.get(
+                    &device,
+                    id,
+                    descriptor.vertex.shader.id(),
+                    &descriptor.vertex.shader_defs,
+                ) {
+                    Ok(module) => module,
+                    Err(err) => return Err(err),
+                };
+
+                let fragment_module = match &descriptor.fragment {
+                    Some(fragment) => {
+                        match shader_cache.get(
+                            &device,
+                            id,
+                            fragment.shader.id(),
+                            &fragment.shader_defs,
+                        ) {
+                            Ok(module) => Some(module),
+                            Err(err) => return Err(err),
+                        }
+                    }
+                    None => None,
+                };
+
+                let layout =
+                    if descriptor.layout.is_empty() && descriptor.push_constant_ranges.is_empty() {
+                        None
+                    } else {
+                        Some(layout_cache.get(
+                            &device,
+                            &descriptor.layout,
+                            descriptor.push_constant_ranges.to_vec(),
+                        ))
+                    };
+
+                drop((shader_cache, layout_cache));
+
+                let vertex_buffer_layouts = descriptor
+                    .vertex
+                    .buffers
+                    .iter()
+                    .map(|layout| RawVertexBufferLayout {
+                        array_stride: layout.array_stride,
+                        attributes: &layout.attributes,
+                        step_mode: layout.step_mode,
+                    })
+                    .collect::<Vec<_>>();
+
+                let fragment_data = descriptor.fragment.as_ref().map(|fragment| {
+                    (
+                        fragment_module.unwrap(),
+                        fragment.entry_point.as_deref(),
+                        fragment.targets.as_slice(),
+                    )
+                });
+
+                // TODO: Expose the rest of this somehow
+                let compilation_options = PipelineCompilationOptions {
+                    constants: &[],
+                    zero_initialize_workgroup_memory: descriptor.zero_initialize_workgroup_memory,
+                };
+
+                let descriptor = RawRenderPipelineDescriptor {
+                    multiview: None,
+                    depth_stencil: descriptor.depth_stencil.clone(),
+                    label: descriptor.label.as_deref(),
+                    layout: layout.as_ref().map(|layout| -> &PipelineLayout { layout }),
+                    multisample: descriptor.multisample,
+                    primitive: descriptor.primitive,
+                    vertex: RawVertexState {
+                        buffers: &vertex_buffer_layouts,
+                        entry_point: descriptor.vertex.entry_point.as_deref(),
+                        module: &vertex_module,
+                        // TODO: Should this be the same as the fragment compilation options?
+                        compilation_options: compilation_options.clone(),
+                    },
+                    fragment: fragment_data
+                        .as_ref()
+                        .map(|(module, entry_point, targets)| RawFragmentState {
+                            entry_point: entry_point.as_deref(),
+                            module,
+                            targets,
+                            // TODO: Should this be the same as the vertex compilation options?
+                            compilation_options,
+                        }),
+                    cache: None,
+                };
+
+                Ok(Pipeline::RenderPipeline(
+                    device.create_render_pipeline(&descriptor),
+                ))
+            },
+            self.synchronous_pipeline_compilation,
+        )
+    }
+
+    fn start_create_compute_pipeline(
+        &mut self,
+        id: CachedPipelineId,
+        descriptor: ComputePipelineDescriptor,
+    ) -> CachedPipelineState {
+        let device = self.device.clone();
+        let shader_cache = self.shader_cache.clone();
+        let layout_cache = self.layout_cache.clone();
+
+        create_pipeline_task(
+            async move {
+                let mut shader_cache = shader_cache.lock().unwrap();
+                let mut layout_cache = layout_cache.lock().unwrap();
+
+                let compute_module = match shader_cache.get(
+                    &device,
+                    id,
+                    descriptor.shader.id(),
+                    &descriptor.shader_defs,
+                ) {
+                    Ok(module) => module,
+                    Err(err) => return Err(err),
+                };
+
+                let layout =
+                    if descriptor.layout.is_empty() && descriptor.push_constant_ranges.is_empty() {
+                        None
+                    } else {
+                        Some(layout_cache.get(
+                            &device,
+                            &descriptor.layout,
+                            descriptor.push_constant_ranges.to_vec(),
+                        ))
+                    };
+
+                drop((shader_cache, layout_cache));
+
+                let descriptor = RawComputePipelineDescriptor {
+                    label: descriptor.label.as_deref(),
+                    layout: layout.as_ref().map(|layout| -> &PipelineLayout { layout }),
+                    module: &compute_module,
+                    entry_point: descriptor.entry_point.as_deref(),
+                    // TODO: Expose the rest of this somehow
+                    compilation_options: PipelineCompilationOptions {
+                        constants: &[],
+                        zero_initialize_workgroup_memory: descriptor
+                            .zero_initialize_workgroup_memory,
+                    },
+                    cache: None,
+                };
+
+                Ok(Pipeline::ComputePipeline(
+                    device.create_compute_pipeline(&descriptor),
+                ))
+            },
+            self.synchronous_pipeline_compilation,
+        )
+    }
+
+    /// Process the pipeline queue and create all pending pipelines if possible.
+    ///
+    /// This is generally called automatically during the [`RenderSystems::Render`] step, but can
+    /// be called manually to force creation at a different time.
+    ///
+    /// [`RenderSystems::Render`]: crate::RenderSystems::Render
+    pub fn process_queue(&mut self) {
+        let mut waiting_pipelines = mem::take(&mut self.waiting_pipelines);
+        let mut pipelines = mem::take(&mut self.pipelines);
+
+        {
+            let mut new_pipelines = self
+                .new_pipelines
+                .lock()
+                .unwrap_or_else(PoisonError::into_inner);
+            for new_pipeline in new_pipelines.drain(..) {
+                let id = pipelines.len();
+                pipelines.push(new_pipeline);
+                waiting_pipelines.insert(id);
+            }
+        }
+
+        for id in waiting_pipelines {
+            self.process_pipeline(&mut pipelines[id], id);
+        }
+
+        self.pipelines = pipelines;
+    }
+
+    fn process_pipeline(&mut self, cached_pipeline: &mut CachedPipeline, id: usize) {
+        match &mut cached_pipeline.state {
+            CachedPipelineState::Queued => {
+                cached_pipeline.state = match &cached_pipeline.descriptor {
+                    PipelineDescriptor::RenderPipelineDescriptor(descriptor) => {
+                        self.start_create_render_pipeline(id, *descriptor.clone())
+                    }
+                    PipelineDescriptor::ComputePipelineDescriptor(descriptor) => {
+                        self.start_create_compute_pipeline(id, *descriptor.clone())
+                    }
+                };
+            }
+
+            CachedPipelineState::Creating(task) => match bevy_tasks::futures::check_ready(task) {
+                Some(Ok(pipeline)) => {
+                    cached_pipeline.state = CachedPipelineState::Ok(pipeline);
+                    return;
+                }
+                Some(Err(err)) => cached_pipeline.state = CachedPipelineState::Err(err),
+                _ => (),
+            },
+
+            CachedPipelineState::Err(err) => match err {
+                // Retry
+                PipelineCacheError::ShaderNotLoaded(_)
+                | PipelineCacheError::ShaderImportNotYetAvailable => {
+                    cached_pipeline.state = CachedPipelineState::Queued;
+                }
+
+                // Shader could not be processed ... retrying won't help
+                PipelineCacheError::ProcessShaderError(err) => {
+                    let error_detail =
+                        err.emit_to_string(&self.shader_cache.lock().unwrap().composer);
+                    if std::env::var("VERBOSE_SHADER_ERROR")
+                        .is_ok_and(|v| !(v.is_empty() || v == "0" || v == "false"))
+                    {
+                        error!("{}", pipeline_error_context(cached_pipeline));
+                    }
+                    error!("failed to process shader error:\n{}", error_detail);
+                    return;
+                }
+                PipelineCacheError::CreateShaderModule(description) => {
+                    error!("failed to create shader module: {}", description);
+                    return;
+                }
+            },
+
+            CachedPipelineState::Ok(_) => return,
+        }
+
+        // Retry
+        self.waiting_pipelines.insert(id);
+    }
+
+    pub(crate) fn process_pipeline_queue_system(mut cache: ResMut<Self>) {
+        cache.process_queue();
+    }
+
+    pub(crate) fn extract_shaders(
+        mut cache: ResMut<Self>,
+        shaders: Extract<Res<Assets<Shader>>>,
+        mut events: Extract<MessageReader<AssetEvent<Shader>>>,
+    ) {
+        for event in events.read() {
+            #[expect(
+                clippy::match_same_arms,
+                reason = "LoadedWithDependencies is marked as a TODO, so it's likely this will no longer lint soon."
+            )]
+            match event {
+                // PERF: Instead of blocking waiting for the shader cache lock, try again next frame if the lock is currently held
+                AssetEvent::Added { id } | AssetEvent::Modified { id } => {
+                    if let Some(shader) = shaders.get(*id) {
+                        let mut shader = shader.clone();
+                        shader.shader_defs.extend(cache.global_shader_defs.clone());
+
+                        cache.set_shader(*id, shader);
+                    }
+                }
+                AssetEvent::Removed { id } => cache.remove_shader(*id),
+                AssetEvent::Unused { .. } => {}
+                AssetEvent::LoadedWithDependencies { .. } => {
+                    // TODO: handle this
+                }
+            }
+        }
+    }
+}
+
+fn pipeline_error_context(cached_pipeline: &CachedPipeline) -> String {
+    fn format(
+        shader: &Handle<Shader>,
+        entry: &Option<Cow<'static, str>>,
+        shader_defs: &[ShaderDefVal],
+    ) -> String {
+        let source = match shader.path() {
+            Some(path) => path.path().to_string_lossy().to_string(),
+            None => String::new(),
+        };
+        let entry = match entry {
+            Some(entry) => entry.to_string(),
+            None => String::new(),
+        };
+        let shader_defs = shader_defs
+            .iter()
+            .flat_map(|def| match def {
+                ShaderDefVal::Bool(k, v) if *v => Some(k.to_string()),
+                ShaderDefVal::Int(k, v) => Some(format!("{k} = {v}")),
+                ShaderDefVal::UInt(k, v) => Some(format!("{k} = {v}")),
+                _ => None,
+            })
+            .collect::<Vec<_>>()
+            .join(", ");
+        format!("{source}:{entry}\nshader defs: {shader_defs}")
+    }
+    match &cached_pipeline.descriptor {
+        PipelineDescriptor::RenderPipelineDescriptor(desc) => {
+            let vert = &desc.vertex;
+            let vert_str = format(&vert.shader, &vert.entry_point, &vert.shader_defs);
+            let Some(frag) = desc.fragment.as_ref() else {
+                return vert_str;
+            };
+            let frag_str = format(&frag.shader, &frag.entry_point, &frag.shader_defs);
+            format!("vertex {vert_str}\nfragment {frag_str}")
+        }
+        PipelineDescriptor::ComputePipelineDescriptor(desc) => {
+            format(&desc.shader, &desc.entry_point, &desc.shader_defs)
+        }
+    }
+}
+
+#[cfg(all(
+    not(target_arch = "wasm32"),
+    not(target_os = "macos"),
+    feature = "multi_threaded"
+))]
+fn create_pipeline_task(
+    task: impl Future<Output = Result<Pipeline, PipelineCacheError>> + Send + 'static,
+    sync: bool,
+) -> CachedPipelineState {
+    if !sync {
+        return CachedPipelineState::Creating(bevy_tasks::AsyncComputeTaskPool::get().spawn(task));
+    }
+
+    match bevy_tasks::block_on(task) {
+        Ok(pipeline) => CachedPipelineState::Ok(pipeline),
+        Err(err) => CachedPipelineState::Err(err),
+    }
+}
+
+#[cfg(any(
+    target_arch = "wasm32",
+    target_os = "macos",
+    not(feature = "multi_threaded")
+))]
+fn create_pipeline_task(
+    task: impl Future<Output = Result<Pipeline, PipelineCacheError>> + Send + 'static,
+    _sync: bool,
+) -> CachedPipelineState {
+    match bevy_tasks::block_on(task) {
+        Ok(pipeline) => CachedPipelineState::Ok(pipeline),
+        Err(err) => CachedPipelineState::Err(err),
+    }
+}
--- a/crates/libmarathon/src/render/render_resource/pipeline_specializer.rs
+++ b/crates/libmarathon/src/render/render_resource/pipeline_specializer.rs
@@ -0,0 +1,259 @@
+use crate::render::render_resource::{
+    CachedComputePipelineId, CachedRenderPipelineId, ComputePipelineDescriptor, PipelineCache,
+    RenderPipelineDescriptor,
+};
+use bevy_ecs::resource::Resource;
+use bevy_mesh::{MeshVertexBufferLayoutRef, MissingVertexAttributeError, VertexBufferLayout};
+use bevy_platform::{
+    collections::{
+        hash_map::{Entry, RawEntryMut, VacantEntry},
+        HashMap,
+    },
+    hash::FixedHasher,
+};
+use bevy_utils::default;
+use core::{fmt::Debug, hash::Hash};
+use thiserror::Error;
+use tracing::error;
+
+/// A trait that allows constructing different variants of a render pipeline from a key.
+///
+/// Note: This is intended for modifying your pipeline descriptor on the basis of a key. If your key
+/// contains no data then you don't need to specialize. For example, if you are using the
+/// [`AsBindGroup`](crate::render_resource::AsBindGroup) without the `#[bind_group_data]` attribute,
+/// you don't need to specialize. Instead, create the pipeline directly from [`PipelineCache`] and
+/// store its ID.
+///
+/// See [`SpecializedRenderPipelines`] for more info.
+pub trait SpecializedRenderPipeline {
+    /// The key that defines each "variant" of the render pipeline.
+    type Key: Clone + Hash + PartialEq + Eq;
+
+    /// Construct a new render pipeline based on the provided key.
+    fn specialize(&self, key: Self::Key) -> RenderPipelineDescriptor;
+}
+
+/// A convenience cache for creating different variants of a render pipeline based on some key.
+///
+/// Some render pipelines may need to be configured differently depending on the exact situation.
+/// This cache allows constructing different render pipelines for each situation based on a key,
+/// making it easy to A) construct the necessary pipelines, and B) reuse already constructed
+/// pipelines.
+///
+/// Note: This is intended for modifying your pipeline descriptor on the basis of a key. If your key
+/// contains no data then you don't need to specialize. For example, if you are using the
+/// [`AsBindGroup`](crate::render_resource::AsBindGroup) without the `#[bind_group_data]` attribute,
+/// you don't need to specialize. Instead, create the pipeline directly from [`PipelineCache`] and
+/// store its ID.
+#[derive(Resource)]
+pub struct SpecializedRenderPipelines<S: SpecializedRenderPipeline> {
+    cache: HashMap<S::Key, CachedRenderPipelineId>,
+}
+
+impl<S: SpecializedRenderPipeline> Default for SpecializedRenderPipelines<S> {
+    fn default() -> Self {
+        Self { cache: default() }
+    }
+}
+
+impl<S: SpecializedRenderPipeline> SpecializedRenderPipelines<S> {
+    /// Get or create a specialized instance of the pipeline corresponding to `key`.
+    pub fn specialize(
+        &mut self,
+        cache: &PipelineCache,
+        pipeline_specializer: &S,
+        key: S::Key,
+    ) -> CachedRenderPipelineId {
+        *self.cache.entry(key.clone()).or_insert_with(|| {
+            let descriptor = pipeline_specializer.specialize(key);
+            cache.queue_render_pipeline(descriptor)
+        })
+    }
+}
+
+/// A trait that allows constructing different variants of a compute pipeline from a key.
+///
+/// Note: This is intended for modifying your pipeline descriptor on the basis of a key. If your key
+/// contains no data then you don't need to specialize. For example, if you are using the
+/// [`AsBindGroup`](crate::render_resource::AsBindGroup) without the `#[bind_group_data]` attribute,
+/// you don't need to specialize. Instead, create the pipeline directly from [`PipelineCache`] and
+/// store its ID.
+///
+/// See [`SpecializedComputePipelines`] for more info.
+pub trait SpecializedComputePipeline {
+    /// The key that defines each "variant" of the compute pipeline.
+    type Key: Clone + Hash + PartialEq + Eq;
+
+    /// Construct a new compute pipeline based on the provided key.
+    fn specialize(&self, key: Self::Key) -> ComputePipelineDescriptor;
+}
+
+/// A convenience cache for creating different variants of a compute pipeline based on some key.
+///
+/// Some compute pipelines may need to be configured differently depending on the exact situation.
+/// This cache allows constructing different compute pipelines for each situation based on a key,
+/// making it easy to A) construct the necessary pipelines, and B) reuse already constructed
+/// pipelines.
+///
+/// Note: This is intended for modifying your pipeline descriptor on the basis of a key. If your key
+/// contains no data then you don't need to specialize. For example, if you are using the
+/// [`AsBindGroup`](crate::render_resource::AsBindGroup) without the `#[bind_group_data]` attribute,
+/// you don't need to specialize. Instead, create the pipeline directly from [`PipelineCache`] and
+/// store its ID.
+#[derive(Resource)]
+pub struct SpecializedComputePipelines<S: SpecializedComputePipeline> {
+    cache: HashMap<S::Key, CachedComputePipelineId>,
+}
+
+impl<S: SpecializedComputePipeline> Default for SpecializedComputePipelines<S> {
+    fn default() -> Self {
+        Self { cache: default() }
+    }
+}
+
+impl<S: SpecializedComputePipeline> SpecializedComputePipelines<S> {
+    /// Get or create a specialized instance of the pipeline corresponding to `key`.
+    pub fn specialize(
+        &mut self,
+        cache: &PipelineCache,
+        specialize_pipeline: &S,
+        key: S::Key,
+    ) -> CachedComputePipelineId {
+        *self.cache.entry(key.clone()).or_insert_with(|| {
+            let descriptor = specialize_pipeline.specialize(key);
+            cache.queue_compute_pipeline(descriptor)
+        })
+    }
+}
+
+/// A trait that allows constructing different variants of a render pipeline from a key and the
+/// particular mesh's vertex buffer layout.
+///
+/// See [`SpecializedMeshPipelines`] for more info.
+pub trait SpecializedMeshPipeline {
+    /// The key that defines each "variant" of the render pipeline.
+    type Key: Clone + Hash + PartialEq + Eq;
+
+    /// Construct a new render pipeline based on the provided key and vertex layout.
+    ///
+    /// The returned pipeline descriptor should have a single vertex buffer, which is derived from
+    /// `layout`.
+    fn specialize(
+        &self,
+        key: Self::Key,
+        layout: &MeshVertexBufferLayoutRef,
+    ) -> Result<RenderPipelineDescriptor, SpecializedMeshPipelineError>;
+}
+
+/// A cache of different variants of a render pipeline based on a key and the particular mesh's
+/// vertex buffer layout.
+#[derive(Resource)]
+pub struct SpecializedMeshPipelines<S: SpecializedMeshPipeline> {
+    mesh_layout_cache: HashMap<(MeshVertexBufferLayoutRef, S::Key), CachedRenderPipelineId>,
+    vertex_layout_cache: VertexLayoutCache<S>,
+}
+
+type VertexLayoutCache<S> = HashMap<
+    VertexBufferLayout,
+    HashMap<<S as SpecializedMeshPipeline>::Key, CachedRenderPipelineId>,
+>;
+
+impl<S: SpecializedMeshPipeline> Default for SpecializedMeshPipelines<S> {
+    fn default() -> Self {
+        Self {
+            mesh_layout_cache: Default::default(),
+            vertex_layout_cache: Default::default(),
+        }
+    }
+}
+
+impl<S: SpecializedMeshPipeline> SpecializedMeshPipelines<S> {
+    /// Construct a new render pipeline based on the provided key and the mesh's vertex buffer
+    /// layout.
+    #[inline]
+    pub fn specialize(
+        &mut self,
+        cache: &PipelineCache,
+        pipeline_specializer: &S,
+        key: S::Key,
+        layout: &MeshVertexBufferLayoutRef,
+    ) -> Result<CachedRenderPipelineId, SpecializedMeshPipelineError> {
+        return match self.mesh_layout_cache.entry((layout.clone(), key.clone())) {
+            Entry::Occupied(entry) => Ok(*entry.into_mut()),
+            Entry::Vacant(entry) => specialize_slow(
+                &mut self.vertex_layout_cache,
+                cache,
+                pipeline_specializer,
+                key,
+                layout,
+                entry,
+            ),
+        };
+
+        #[cold]
+        fn specialize_slow<S>(
+            vertex_layout_cache: &mut VertexLayoutCache<S>,
+            cache: &PipelineCache,
+            specialize_pipeline: &S,
+            key: S::Key,
+            layout: &MeshVertexBufferLayoutRef,
+            entry: VacantEntry<
+                (MeshVertexBufferLayoutRef, S::Key),
+                CachedRenderPipelineId,
+                FixedHasher,
+            >,
+        ) -> Result<CachedRenderPipelineId, SpecializedMeshPipelineError>
+        where
+            S: SpecializedMeshPipeline,
+        {
+            let descriptor = specialize_pipeline
+                .specialize(key.clone(), layout)
+                .map_err(|mut err| {
+                    {
+                        let SpecializedMeshPipelineError::MissingVertexAttribute(err) = &mut err;
+                        err.pipeline_type = Some(core::any::type_name::<S>());
+                    }
+                    err
+                })?;
+            // Different MeshVertexBufferLayouts can produce the same final VertexBufferLayout
+            // We want compatible vertex buffer layouts to use the same pipelines, so we must "deduplicate" them
+            let layout_map = match vertex_layout_cache
+                .raw_entry_mut()
+                .from_key(&descriptor.vertex.buffers[0])
+            {
+                RawEntryMut::Occupied(entry) => entry.into_mut(),
+                RawEntryMut::Vacant(entry) => {
+                    entry
+                        .insert(descriptor.vertex.buffers[0].clone(), Default::default())
+                        .1
+                }
+            };
+            Ok(*entry.insert(match layout_map.entry(key) {
+                Entry::Occupied(entry) => {
+                    if cfg!(debug_assertions) {
+                        let stored_descriptor = cache.get_render_pipeline_descriptor(*entry.get());
+                        if stored_descriptor != &descriptor {
+                            error!(
+                                "The cached pipeline descriptor for {} is not \
+                                    equal to the generated descriptor for the given key. \
+                                    This means the SpecializePipeline implementation uses \
+                                    unused' MeshVertexBufferLayout information to specialize \
+                                    the pipeline. This is not allowed because it would invalidate \
+                                    the pipeline cache.",
+                                core::any::type_name::<S>()
+                            );
+                        }
+                    }
+                    *entry.into_mut()
+                }
+                Entry::Vacant(entry) => *entry.insert(cache.queue_render_pipeline(descriptor)),
+            }))
+        }
+    }
+}
+
+#[derive(Error, Debug)]
+pub enum SpecializedMeshPipelineError {
+    #[error(transparent)]
+    MissingVertexAttribute(#[from] MissingVertexAttributeError),
+}
--- a/crates/libmarathon/src/render/render_resource/resource_macros.rs
+++ b/crates/libmarathon/src/render/render_resource/resource_macros.rs
@@ -0,0 +1,39 @@
+#[macro_export]
+macro_rules! define_atomic_id {
+    ($atomic_id_type:ident) => {
+        #[derive(Copy, Clone, Hash, Eq, PartialEq, PartialOrd, Ord, Debug)]
+        pub struct $atomic_id_type(core::num::NonZero<u32>);
+
+        impl $atomic_id_type {
+            #[expect(
+                clippy::new_without_default,
+                reason = "Implementing the `Default` trait on atomic IDs would imply that two `<AtomicIdType>::default()` equal each other. By only implementing `new()`, we indicate that each atomic ID created will be unique."
+            )]
+            pub fn new() -> Self {
+                use core::sync::atomic::{AtomicU32, Ordering};
+
+                static COUNTER: AtomicU32 = AtomicU32::new(1);
+
+                let counter = COUNTER.fetch_add(1, Ordering::Relaxed);
+                Self(core::num::NonZero::<u32>::new(counter).unwrap_or_else(|| {
+                    panic!(
+                        "The system ran out of unique `{}`s.",
+                        stringify!($atomic_id_type)
+                    );
+                }))
+            }
+        }
+
+        impl From<$atomic_id_type> for core::num::NonZero<u32> {
+            fn from(value: $atomic_id_type) -> Self {
+                value.0
+            }
+        }
+
+        impl From<core::num::NonZero<u32>> for $atomic_id_type {
+            fn from(value: core::num::NonZero<u32>) -> Self {
+                Self(value)
+            }
+        }
+    };
+}
--- a/crates/libmarathon/src/render/render_resource/specializer.rs
+++ b/crates/libmarathon/src/render/render_resource/specializer.rs
@@ -0,0 +1,353 @@
+use super::{
+    CachedComputePipelineId, CachedRenderPipelineId, ComputePipeline, ComputePipelineDescriptor,
+    PipelineCache, RenderPipeline, RenderPipelineDescriptor,
+};
+use bevy_ecs::error::BevyError;
+use bevy_platform::{
+    collections::{
+        hash_map::{Entry, VacantEntry},
+        HashMap,
+    },
+    hash::FixedHasher,
+};
+use core::{hash::Hash, marker::PhantomData};
+use tracing::error;
+use variadics_please::all_tuples;
+
+pub use macros::{Specializer, SpecializerKey};
+
+/// Defines a type that is able to be "specialized" and cached by creating and transforming
+/// its descriptor type. This is implemented for [`RenderPipeline`] and [`ComputePipeline`], and
+/// likely will not have much utility for other types.
+///
+/// See docs on [`Specializer`] for more info.
+pub trait Specializable {
+    type Descriptor: PartialEq + Clone + Send + Sync;
+    type CachedId: Clone + Send + Sync;
+    fn queue(pipeline_cache: &PipelineCache, descriptor: Self::Descriptor) -> Self::CachedId;
+    fn get_descriptor(pipeline_cache: &PipelineCache, id: Self::CachedId) -> &Self::Descriptor;
+}
+
+impl Specializable for RenderPipeline {
+    type Descriptor = RenderPipelineDescriptor;
+    type CachedId = CachedRenderPipelineId;
+
+    fn queue(pipeline_cache: &PipelineCache, descriptor: Self::Descriptor) -> Self::CachedId {
+        pipeline_cache.queue_render_pipeline(descriptor)
+    }
+
+    fn get_descriptor(
+        pipeline_cache: &PipelineCache,
+        id: CachedRenderPipelineId,
+    ) -> &Self::Descriptor {
+        pipeline_cache.get_render_pipeline_descriptor(id)
+    }
+}
+
+impl Specializable for ComputePipeline {
+    type Descriptor = ComputePipelineDescriptor;
+
+    type CachedId = CachedComputePipelineId;
+
+    fn queue(pipeline_cache: &PipelineCache, descriptor: Self::Descriptor) -> Self::CachedId {
+        pipeline_cache.queue_compute_pipeline(descriptor)
+    }
+
+    fn get_descriptor(
+        pipeline_cache: &PipelineCache,
+        id: CachedComputePipelineId,
+    ) -> &Self::Descriptor {
+        pipeline_cache.get_compute_pipeline_descriptor(id)
+    }
+}
+
+/// Defines a type capable of "specializing" values of a type T.
+///
+/// Specialization is the process of generating variants of a type T
+/// from small hashable keys, and specializers themselves can be
+/// thought of as [pure functions] from the key type to `T`, that
+/// [memoize] their results based on the key.
+///
+/// <div class="warning">
+/// Because specialization is designed for use with render and compute
+/// pipelines, specializers act on <i>descriptors</i> of <code>T</code> rather
+/// than produce <code>T</code> itself, but the above comparison is still valid.
+/// </div>
+///
+/// Since compiling render and compute pipelines can be so slow,
+/// specialization allows a Bevy app to detect when it would compile
+/// a duplicate pipeline and reuse what's already in the cache. While
+/// pipelines could all be memoized hashing each whole descriptor, this
+/// would be much slower and could still create duplicates. In contrast,
+/// memoizing groups of *related* pipelines based on a small hashable
+/// key is much faster. See the docs on [`SpecializerKey`] for more info.
+///
+/// ## Composing Specializers
+///
+/// This trait can be derived with `#[derive(Specializer)]` for structs whose
+/// fields all implement [`Specializer`]. This allows for composing multiple
+/// specializers together, and makes encapsulation and separating concerns
+/// between specializers much nicer. One could make individual specializers
+/// for common operations and place them in entirely separate modules, then
+/// compose them together with a single `#[derive]`
+///
+/// ```rust
+/// # use bevy_ecs::error::BevyError;
+/// # use crate::render::render_resource::Specializer;
+/// # use crate::render::render_resource::SpecializerKey;
+/// # use crate::render::render_resource::RenderPipeline;
+/// # use crate::render::render_resource::RenderPipelineDescriptor;
+/// struct A;
+/// struct B;
+/// #[derive(Copy, Clone, PartialEq, Eq, Hash, SpecializerKey)]
+/// struct BKey { contrived_number: u32 };
+///
+/// impl Specializer<RenderPipeline> for A {
+///     type Key = ();
+///
+///     fn specialize(
+///         &self,
+///         key: (),
+///         descriptor: &mut RenderPipelineDescriptor
+///     ) -> Result<(), BevyError>  {
+/// #       let _ = descriptor;
+///         // mutate the descriptor here
+///         Ok(key)
+///     }
+/// }
+///
+/// impl Specializer<RenderPipeline> for B {
+///     type Key = BKey;
+///
+///     fn specialize(
+///         &self,
+///         key: BKey,
+///         descriptor: &mut RenderPipelineDescriptor
+///     ) -> Result<BKey, BevyError> {
+/// #       let _ = descriptor;
+///         // mutate the descriptor here
+///         Ok(key)
+///     }
+/// }
+///
+/// #[derive(Specializer)]
+/// #[specialize(RenderPipeline)]
+/// struct C {
+///     #[key(default)]
+///     a: A,
+///     b: B,
+/// }
+///
+/// /*
+/// The generated implementation:
+/// impl Specializer<RenderPipeline> for C {
+///     type Key = BKey;
+///     fn specialize(
+///         &self,
+///         key: Self::Key,
+///         descriptor: &mut RenderPipelineDescriptor
+///     ) -> Result<Canonical<Self::Key>, BevyError> {
+///         let _ = self.a.specialize((), descriptor);
+///         let key = self.b.specialize(key, descriptor);
+///         Ok(key)
+///     }
+/// }
+/// */
+/// ```
+///
+/// The key type for a composed specializer will be a tuple of the keys
+/// of each field, and their specialization logic will be applied in field
+/// order. Since derive macros can't have generic parameters, the derive macro
+/// requires an additional `#[specialize(..targets)]` attribute to specify a
+/// list of types to target for the implementation. `#[specialize(all)]` is
+/// also allowed, and will generate a fully generic implementation at the cost
+/// of slightly worse error messages.
+///
+/// Additionally, each field can optionally take a `#[key]` attribute to
+/// specify a "key override". This will hide that field's key from being
+/// exposed by the wrapper, and always use the value given by the attribute.
+/// Values for this attribute may either be `default` which will use the key's
+/// [`Default`] implementation, or a valid rust expression of the key type.
+///
+/// [pure functions]: https://en.wikipedia.org/wiki/Pure_function
+/// [memoize]: https://en.wikipedia.org/wiki/Memoization
+pub trait Specializer<T: Specializable>: Send + Sync + 'static {
+    type Key: SpecializerKey;
+    fn specialize(
+        &self,
+        key: Self::Key,
+        descriptor: &mut T::Descriptor,
+    ) -> Result<Canonical<Self::Key>, BevyError>;
+}
+
+// TODO: update docs for `SpecializerKey` with a more concrete example
+// once we've migrated mesh layout specialization
+
+/// Defines a type that is able to be used as a key for [`Specializer`]s
+///
+/// <div class = "warning">
+/// <strong>Most types should implement this trait with the included derive macro.</strong> <br/>
+/// This generates a "canonical" key type, with <code>IS_CANONICAL = true</code>, and <code>Canonical = Self</code>
+/// </div>
+///
+/// ## What's a "canonical" key?
+///
+/// The specialization API memoizes pipelines based on the hash of each key, but this
+/// can still produce duplicates. For example, if one used a list of vertex attributes
+/// as a key, even if all the same attributes were present they could be in any order.
+/// In each case, though the keys would be "different" they would produce the same
+/// pipeline.
+///
+/// To address this, during specialization keys are processed into a [canonical]
+/// (or "standard") form that represents the actual descriptor that was produced.
+/// In the previous example, that would be the final `VertexBufferLayout` contained
+/// by the pipeline descriptor. This new key is used by [`Variants`] to
+/// perform additional checks for duplicates, but only if required. If a key is
+/// canonical from the start, then there's no need.
+///
+/// For implementors: the main property of a canonical key is that if two keys hash
+/// differently, they should nearly always produce different descriptors.
+///
+/// [canonical]: https://en.wikipedia.org/wiki/Canonicalization
+pub trait SpecializerKey: Clone + Hash + Eq {
+    /// Denotes whether this key is canonical or not. This should only be `true`
+    /// if and only if `Canonical = Self`.
+    const IS_CANONICAL: bool;
+
+    /// The canonical key type to convert this into during specialization.
+    type Canonical: Hash + Eq;
+}
+
+pub type Canonical<T> = <T as SpecializerKey>::Canonical;
+
+impl<T: Specializable> Specializer<T> for () {
+    type Key = ();
+
+    fn specialize(
+        &self,
+        _key: Self::Key,
+        _descriptor: &mut T::Descriptor,
+    ) -> Result<(), BevyError> {
+        Ok(())
+    }
+}
+
+impl<T: Specializable, V: Send + Sync + 'static> Specializer<T> for PhantomData<V> {
+    type Key = ();
+
+    fn specialize(
+        &self,
+        _key: Self::Key,
+        _descriptor: &mut T::Descriptor,
+    ) -> Result<(), BevyError> {
+        Ok(())
+    }
+}
+
+macro_rules! impl_specialization_key_tuple {
+    ($(#[$meta:meta])* $($T:ident),*) => {
+        $(#[$meta])*
+        impl <$($T: SpecializerKey),*> SpecializerKey for ($($T,)*) {
+            const IS_CANONICAL: bool = true $(&& <$T as SpecializerKey>::IS_CANONICAL)*;
+            type Canonical = ($(Canonical<$T>,)*);
+        }
+    };
+}
+
+all_tuples!(
+    #[doc(fake_variadic)]
+    impl_specialization_key_tuple,
+    0,
+    12,
+    T
+);
+
+/// A cache for variants of a resource type created by a specializer.
+/// At most one resource will be created for each key.
+pub struct Variants<T: Specializable, S: Specializer<T>> {
+    specializer: S,
+    base_descriptor: T::Descriptor,
+    primary_cache: HashMap<S::Key, T::CachedId>,
+    secondary_cache: HashMap<Canonical<S::Key>, T::CachedId>,
+}
+
+impl<T: Specializable, S: Specializer<T>> Variants<T, S> {
+    /// Creates a new [`Variants`] from a [`Specializer`] and a base descriptor.
+    #[inline]
+    pub fn new(specializer: S, base_descriptor: T::Descriptor) -> Self {
+        Self {
+            specializer,
+            base_descriptor,
+            primary_cache: Default::default(),
+            secondary_cache: Default::default(),
+        }
+    }
+
+    /// Specializes a resource given the [`Specializer`]'s key type.
+    #[inline]
+    pub fn specialize(
+        &mut self,
+        pipeline_cache: &PipelineCache,
+        key: S::Key,
+    ) -> Result<T::CachedId, BevyError> {
+        let entry = self.primary_cache.entry(key.clone());
+        match entry {
+            Entry::Occupied(entry) => Ok(entry.get().clone()),
+            Entry::Vacant(entry) => Self::specialize_slow(
+                &self.specializer,
+                self.base_descriptor.clone(),
+                pipeline_cache,
+                key,
+                entry,
+                &mut self.secondary_cache,
+            ),
+        }
+    }
+
+    #[cold]
+    fn specialize_slow(
+        specializer: &S,
+        base_descriptor: T::Descriptor,
+        pipeline_cache: &PipelineCache,
+        key: S::Key,
+        primary_entry: VacantEntry<S::Key, T::CachedId, FixedHasher>,
+        secondary_cache: &mut HashMap<Canonical<S::Key>, T::CachedId>,
+    ) -> Result<T::CachedId, BevyError> {
+        let mut descriptor = base_descriptor.clone();
+        let canonical_key = specializer.specialize(key.clone(), &mut descriptor)?;
+
+        // if the whole key is canonical, the secondary cache isn't needed.
+        if <S::Key as SpecializerKey>::IS_CANONICAL {
+            return Ok(primary_entry
+                .insert(<T as Specializable>::queue(pipeline_cache, descriptor))
+                .clone());
+        }
+
+        let id = match secondary_cache.entry(canonical_key) {
+            Entry::Occupied(entry) => {
+                if cfg!(debug_assertions) {
+                    let stored_descriptor =
+                        <T as Specializable>::get_descriptor(pipeline_cache, entry.get().clone());
+                    if &descriptor != stored_descriptor {
+                        error!(
+                            "Invalid Specializer<{}> impl for {}: the cached descriptor \
+                            is not equal to the generated descriptor for the given key. \
+                            This means the Specializer implementation uses unused information \
+                            from the key to specialize the pipeline. This is not allowed \
+                            because it would invalidate the cache.",
+                            core::any::type_name::<T>(),
+                            core::any::type_name::<S>()
+                        );
+                    }
+                }
+                entry.into_mut().clone()
+            }
+            Entry::Vacant(entry) => entry
+                .insert(<T as Specializable>::queue(pipeline_cache, descriptor))
+                .clone(),
+        };
+
+        primary_entry.insert(id.clone());
+        Ok(id)
+    }
+}
--- a/crates/libmarathon/src/render/render_resource/storage_buffer.rs
+++ b/crates/libmarathon/src/render/render_resource/storage_buffer.rs
@@ -0,0 +1,285 @@
+use core::marker::PhantomData;
+
+use super::Buffer;
+use crate::render::renderer::{RenderDevice, RenderQueue};
+use encase::{
+    internal::WriteInto, DynamicStorageBuffer as DynamicStorageBufferWrapper, ShaderType,
+    StorageBuffer as StorageBufferWrapper,
+};
+use wgpu::{util::BufferInitDescriptor, BindingResource, BufferBinding, BufferSize, BufferUsages};
+
+use super::IntoBinding;
+
+/// Stores data to be transferred to the GPU and made accessible to shaders as a storage buffer.
+///
+/// Storage buffers can be made available to shaders in some combination of read/write mode, and can store large amounts of data.
+/// Note however that WebGL2 does not support storage buffers, so consider alternative options in this case.
+///
+/// Storage buffers can store runtime-sized arrays, but only if they are the last field in a structure.
+///
+/// The contained data is stored in system RAM. [`write_buffer`](StorageBuffer::write_buffer) queues
+/// copying of the data from system RAM to VRAM. Storage buffers must conform to [std430 alignment/padding requirements], which
+/// is automatically enforced by this structure.
+///
+/// Other options for storing GPU-accessible data are:
+/// * [`BufferVec`](crate::render_resource::BufferVec)
+/// * [`DynamicStorageBuffer`]
+/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
+/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
+/// * [`RawBufferVec`](crate::render_resource::RawBufferVec)
+/// * [`Texture`](crate::render_resource::Texture)
+/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
+///
+/// [std430 alignment/padding requirements]: https://www.w3.org/TR/WGSL/#address-spaces-storage
+pub struct StorageBuffer<T: ShaderType> {
+    value: T,
+    scratch: StorageBufferWrapper<Vec<u8>>,
+    buffer: Option<Buffer>,
+    label: Option<String>,
+    changed: bool,
+    buffer_usage: BufferUsages,
+    last_written_size: Option<BufferSize>,
+}
+
+impl<T: ShaderType> From<T> for StorageBuffer<T> {
+    fn from(value: T) -> Self {
+        Self {
+            value,
+            scratch: StorageBufferWrapper::new(Vec::new()),
+            buffer: None,
+            label: None,
+            changed: false,
+            buffer_usage: BufferUsages::COPY_DST | BufferUsages::STORAGE,
+            last_written_size: None,
+        }
+    }
+}
+
+impl<T: ShaderType + Default> Default for StorageBuffer<T> {
+    fn default() -> Self {
+        Self {
+            value: T::default(),
+            scratch: StorageBufferWrapper::new(Vec::new()),
+            buffer: None,
+            label: None,
+            changed: false,
+            buffer_usage: BufferUsages::COPY_DST | BufferUsages::STORAGE,
+            last_written_size: None,
+        }
+    }
+}
+
+impl<T: ShaderType + WriteInto> StorageBuffer<T> {
+    #[inline]
+    pub fn buffer(&self) -> Option<&Buffer> {
+        self.buffer.as_ref()
+    }
+
+    #[inline]
+    pub fn binding(&self) -> Option<BindingResource<'_>> {
+        Some(BindingResource::Buffer(BufferBinding {
+            buffer: self.buffer()?,
+            offset: 0,
+            size: self.last_written_size,
+        }))
+    }
+
+    pub fn set(&mut self, value: T) {
+        self.value = value;
+    }
+
+    pub fn get(&self) -> &T {
+        &self.value
+    }
+
+    pub fn get_mut(&mut self) -> &mut T {
+        &mut self.value
+    }
+
+    pub fn set_label(&mut self, label: Option<&str>) {
+        let label = label.map(str::to_string);
+
+        if label != self.label {
+            self.changed = true;
+        }
+
+        self.label = label;
+    }
+
+    pub fn get_label(&self) -> Option<&str> {
+        self.label.as_deref()
+    }
+
+    /// Add more [`BufferUsages`] to the buffer.
+    ///
+    /// This method only allows addition of flags to the default usage flags.
+    ///
+    /// The default values for buffer usage are `BufferUsages::COPY_DST` and `BufferUsages::STORAGE`.
+    pub fn add_usages(&mut self, usage: BufferUsages) {
+        self.buffer_usage |= usage;
+        self.changed = true;
+    }
+
+    /// Queues writing of data from system RAM to VRAM using the [`RenderDevice`]
+    /// and the provided [`RenderQueue`].
+    ///
+    /// If there is no GPU-side buffer allocated to hold the data currently stored, or if a GPU-side buffer previously
+    /// allocated does not have enough capacity, a new GPU-side buffer is created.
+    pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
+        self.scratch.write(&self.value).unwrap();
+
+        let capacity = self.buffer.as_deref().map(wgpu::Buffer::size).unwrap_or(0);
+        let size = self.scratch.as_ref().len() as u64;
+
+        if capacity < size || self.changed {
+            self.buffer = Some(device.create_buffer_with_data(&BufferInitDescriptor {
+                label: self.label.as_deref(),
+                usage: self.buffer_usage,
+                contents: self.scratch.as_ref(),
+            }));
+            self.changed = false;
+        } else if let Some(buffer) = &self.buffer {
+            queue.write_buffer(buffer, 0, self.scratch.as_ref());
+        }
+
+        self.last_written_size = BufferSize::new(size);
+    }
+}
+
+impl<'a, T: ShaderType + WriteInto> IntoBinding<'a> for &'a StorageBuffer<T> {
+    #[inline]
+    fn into_binding(self) -> BindingResource<'a> {
+        self.binding().expect("Failed to get buffer")
+    }
+}
+
+/// Stores data to be transferred to the GPU and made accessible to shaders as a dynamic storage buffer.
+///
+/// This is just a [`StorageBuffer`], but also allows you to set dynamic offsets.
+///
+/// Dynamic storage buffers can be made available to shaders in some combination of read/write mode, and can store large amounts
+/// of data. Note however that WebGL2 does not support storage buffers, so consider alternative options in this case. Dynamic
+/// storage buffers support multiple separate bindings at dynamic byte offsets and so have a
+/// [`push`](DynamicStorageBuffer::push) method.
+///
+/// The contained data is stored in system RAM. [`write_buffer`](DynamicStorageBuffer::write_buffer)
+/// queues copying of the data from system RAM to VRAM. The data within a storage buffer binding must conform to
+/// [std430 alignment/padding requirements]. `DynamicStorageBuffer` takes care of serializing the inner type to conform to
+/// these requirements. Each item [`push`](DynamicStorageBuffer::push)ed into this structure
+/// will additionally be aligned to meet dynamic offset alignment requirements.
+///
+/// Other options for storing GPU-accessible data are:
+/// * [`BufferVec`](crate::render_resource::BufferVec)
+/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
+/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
+/// * [`RawBufferVec`](crate::render_resource::RawBufferVec)
+/// * [`StorageBuffer`]
+/// * [`Texture`](crate::render_resource::Texture)
+/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
+///
+/// [std430 alignment/padding requirements]: https://www.w3.org/TR/WGSL/#address-spaces-storage
+pub struct DynamicStorageBuffer<T: ShaderType> {
+    scratch: DynamicStorageBufferWrapper<Vec<u8>>,
+    buffer: Option<Buffer>,
+    label: Option<String>,
+    changed: bool,
+    buffer_usage: BufferUsages,
+    last_written_size: Option<BufferSize>,
+    _marker: PhantomData<fn() -> T>,
+}
+
+impl<T: ShaderType> Default for DynamicStorageBuffer<T> {
+    fn default() -> Self {
+        Self {
+            scratch: DynamicStorageBufferWrapper::new(Vec::new()),
+            buffer: None,
+            label: None,
+            changed: false,
+            buffer_usage: BufferUsages::COPY_DST | BufferUsages::STORAGE,
+            last_written_size: None,
+            _marker: PhantomData,
+        }
+    }
+}
+
+impl<T: ShaderType + WriteInto> DynamicStorageBuffer<T> {
+    #[inline]
+    pub fn buffer(&self) -> Option<&Buffer> {
+        self.buffer.as_ref()
+    }
+
+    #[inline]
+    pub fn binding(&self) -> Option<BindingResource<'_>> {
+        Some(BindingResource::Buffer(BufferBinding {
+            buffer: self.buffer()?,
+            offset: 0,
+            size: self.last_written_size,
+        }))
+    }
+
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.scratch.as_ref().is_empty()
+    }
+
+    #[inline]
+    pub fn push(&mut self, value: T) -> u32 {
+        self.scratch.write(&value).unwrap() as u32
+    }
+
+    pub fn set_label(&mut self, label: Option<&str>) {
+        let label = label.map(str::to_string);
+
+        if label != self.label {
+            self.changed = true;
+        }
+
+        self.label = label;
+    }
+
+    pub fn get_label(&self) -> Option<&str> {
+        self.label.as_deref()
+    }
+
+    /// Add more [`BufferUsages`] to the buffer.
+    ///
+    /// This method only allows addition of flags to the default usage flags.
+    ///
+    /// The default values for buffer usage are `BufferUsages::COPY_DST` and `BufferUsages::STORAGE`.
+    pub fn add_usages(&mut self, usage: BufferUsages) {
+        self.buffer_usage |= usage;
+        self.changed = true;
+    }
+
+    #[inline]
+    pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
+        let capacity = self.buffer.as_deref().map(wgpu::Buffer::size).unwrap_or(0);
+        let size = self.scratch.as_ref().len() as u64;
+
+        if capacity < size || (self.changed && size > 0) {
+            self.buffer = Some(device.create_buffer_with_data(&BufferInitDescriptor {
+                label: self.label.as_deref(),
+                usage: self.buffer_usage,
+                contents: self.scratch.as_ref(),
+            }));
+            self.changed = false;
+        } else if let Some(buffer) = &self.buffer {
+            queue.write_buffer(buffer, 0, self.scratch.as_ref());
+        }
+
+        self.last_written_size = BufferSize::new(size);
+    }
+
+    #[inline]
+    pub fn clear(&mut self) {
+        self.scratch.as_mut().clear();
+        self.scratch.set_offset(0);
+    }
+}
+
+impl<'a, T: ShaderType + WriteInto> IntoBinding<'a> for &'a DynamicStorageBuffer<T> {
+    #[inline]
+    fn into_binding(self) -> BindingResource<'a> {
+        self.binding().expect("Failed to get buffer")
+    }
+}
--- a/crates/libmarathon/src/render/render_resource/texture.rs
+++ b/crates/libmarathon/src/render/render_resource/texture.rs
@@ -0,0 +1,166 @@
+use crate::render::define_atomic_id;
+use crate::render::renderer::WgpuWrapper;
+use bevy_derive::{Deref, DerefMut};
+use bevy_ecs::resource::Resource;
+use core::ops::Deref;
+
+define_atomic_id!(TextureId);
+
+/// A GPU-accessible texture.
+///
+/// May be converted from and dereferences to a wgpu [`Texture`](wgpu::Texture).
+/// Can be created via [`RenderDevice::create_texture`](crate::renderer::RenderDevice::create_texture).
+///
+/// Other options for storing GPU-accessible data are:
+/// * [`BufferVec`](crate::render_resource::BufferVec)
+/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
+/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
+/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
+/// * [`RawBufferVec`](crate::render_resource::RawBufferVec)
+/// * [`StorageBuffer`](crate::render_resource::StorageBuffer)
+/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
+#[derive(Clone, Debug)]
+pub struct Texture {
+    id: TextureId,
+    value: WgpuWrapper<wgpu::Texture>,
+}
+
+impl Texture {
+    /// Returns the [`TextureId`].
+    #[inline]
+    pub fn id(&self) -> TextureId {
+        self.id
+    }
+
+    /// Creates a view of this texture.
+    pub fn create_view(&self, desc: &wgpu::TextureViewDescriptor) -> TextureView {
+        TextureView::from(self.value.create_view(desc))
+    }
+}
+
+impl From<wgpu::Texture> for Texture {
+    fn from(value: wgpu::Texture) -> Self {
+        Texture {
+            id: TextureId::new(),
+            value: WgpuWrapper::new(value),
+        }
+    }
+}
+
+impl Deref for Texture {
+    type Target = wgpu::Texture;
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        &self.value
+    }
+}
+
+define_atomic_id!(TextureViewId);
+
+/// Describes a [`Texture`] with its associated metadata required by a pipeline or [`BindGroup`](super::BindGroup).
+#[derive(Clone, Debug)]
+pub struct TextureView {
+    id: TextureViewId,
+    value: WgpuWrapper<wgpu::TextureView>,
+}
+
+pub struct SurfaceTexture {
+    value: WgpuWrapper<wgpu::SurfaceTexture>,
+}
+
+impl SurfaceTexture {
+    pub fn present(self) {
+        self.value.into_inner().present();
+    }
+}
+
+impl TextureView {
+    /// Returns the [`TextureViewId`].
+    #[inline]
+    pub fn id(&self) -> TextureViewId {
+        self.id
+    }
+}
+
+impl From<wgpu::TextureView> for TextureView {
+    fn from(value: wgpu::TextureView) -> Self {
+        TextureView {
+            id: TextureViewId::new(),
+            value: WgpuWrapper::new(value),
+        }
+    }
+}
+
+impl From<wgpu::SurfaceTexture> for SurfaceTexture {
+    fn from(value: wgpu::SurfaceTexture) -> Self {
+        SurfaceTexture {
+            value: WgpuWrapper::new(value),
+        }
+    }
+}
+
+impl Deref for TextureView {
+    type Target = wgpu::TextureView;
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        &self.value
+    }
+}
+
+impl Deref for SurfaceTexture {
+    type Target = wgpu::SurfaceTexture;
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        &self.value
+    }
+}
+
+define_atomic_id!(SamplerId);
+
+/// A Sampler defines how a pipeline will sample from a [`TextureView`].
+/// They define image filters (including anisotropy) and address (wrapping) modes, among other things.
+///
+/// May be converted from and dereferences to a wgpu [`Sampler`](wgpu::Sampler).
+/// Can be created via [`RenderDevice::create_sampler`](crate::renderer::RenderDevice::create_sampler).
+#[derive(Clone, Debug)]
+pub struct Sampler {
+    id: SamplerId,
+    value: WgpuWrapper<wgpu::Sampler>,
+}
+
+impl Sampler {
+    /// Returns the [`SamplerId`].
+    #[inline]
+    pub fn id(&self) -> SamplerId {
+        self.id
+    }
+}
+
+impl From<wgpu::Sampler> for Sampler {
+    fn from(value: wgpu::Sampler) -> Self {
+        Sampler {
+            id: SamplerId::new(),
+            value: WgpuWrapper::new(value),
+        }
+    }
+}
+
+impl Deref for Sampler {
+    type Target = wgpu::Sampler;
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        &self.value
+    }
+}
+
+/// A rendering resource for the default image sampler which is set during renderer
+/// initialization.
+///
+/// The [`ImagePlugin`](bevy_image::ImagePlugin) can be set during app initialization to change the default
+/// image sampler.
+#[derive(Resource, Debug, Clone, Deref, DerefMut)]
+pub struct DefaultImageSampler(pub(crate) Sampler);
--- a/crates/libmarathon/src/render/render_resource/uniform_buffer.rs
+++ b/crates/libmarathon/src/render/render_resource/uniform_buffer.rs
@@ -0,0 +1,402 @@
+use core::{marker::PhantomData, num::NonZero};
+
+use crate::render::{
+    render_resource::Buffer,
+    renderer::{RenderDevice, RenderQueue},
+};
+use encase::{
+    internal::{AlignmentValue, BufferMut, WriteInto},
+    DynamicUniformBuffer as DynamicUniformBufferWrapper, ShaderType,
+    UniformBuffer as UniformBufferWrapper,
+};
+use wgpu::{
+    util::BufferInitDescriptor, BindingResource, BufferBinding, BufferDescriptor, BufferUsages,
+};
+
+use super::IntoBinding;
+
+/// Stores data to be transferred to the GPU and made accessible to shaders as a uniform buffer.
+///
+/// Uniform buffers are available to shaders on a read-only basis. Uniform buffers are commonly used to make available to shaders
+/// parameters that are constant during shader execution, and are best used for data that is relatively small in size as they are
+/// only guaranteed to support up to 16kB per binding.
+///
+/// The contained data is stored in system RAM. [`write_buffer`](UniformBuffer::write_buffer) queues
+/// copying of the data from system RAM to VRAM. Data in uniform buffers must follow [std140 alignment/padding requirements],
+/// which is automatically enforced by this structure. Per the WGPU spec, uniform buffers cannot store runtime-sized array
+/// (vectors), or structures with fields that are vectors.
+///
+/// Other options for storing GPU-accessible data are:
+/// * [`BufferVec`](crate::render_resource::BufferVec)
+/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
+/// * [`DynamicUniformBuffer`]
+/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
+/// * [`RawBufferVec`](crate::render_resource::RawBufferVec)
+/// * [`StorageBuffer`](crate::render_resource::StorageBuffer)
+/// * [`Texture`](crate::render_resource::Texture)
+///
+/// [std140 alignment/padding requirements]: https://www.w3.org/TR/WGSL/#address-spaces-uniform
+pub struct UniformBuffer<T: ShaderType> {
+    value: T,
+    scratch: UniformBufferWrapper<Vec<u8>>,
+    buffer: Option<Buffer>,
+    label: Option<String>,
+    changed: bool,
+    buffer_usage: BufferUsages,
+}
+
+impl<T: ShaderType> From<T> for UniformBuffer<T> {
+    fn from(value: T) -> Self {
+        Self {
+            value,
+            scratch: UniformBufferWrapper::new(Vec::new()),
+            buffer: None,
+            label: None,
+            changed: false,
+            buffer_usage: BufferUsages::COPY_DST | BufferUsages::UNIFORM,
+        }
+    }
+}
+
+impl<T: ShaderType + Default> Default for UniformBuffer<T> {
+    fn default() -> Self {
+        Self {
+            value: T::default(),
+            scratch: UniformBufferWrapper::new(Vec::new()),
+            buffer: None,
+            label: None,
+            changed: false,
+            buffer_usage: BufferUsages::COPY_DST | BufferUsages::UNIFORM,
+        }
+    }
+}
+
+impl<T: ShaderType + WriteInto> UniformBuffer<T> {
+    #[inline]
+    pub fn buffer(&self) -> Option<&Buffer> {
+        self.buffer.as_ref()
+    }
+
+    #[inline]
+    pub fn binding(&self) -> Option<BindingResource<'_>> {
+        Some(BindingResource::Buffer(
+            self.buffer()?.as_entire_buffer_binding(),
+        ))
+    }
+
+    /// Set the data the buffer stores.
+    pub fn set(&mut self, value: T) {
+        self.value = value;
+    }
+
+    pub fn get(&self) -> &T {
+        &self.value
+    }
+
+    pub fn get_mut(&mut self) -> &mut T {
+        &mut self.value
+    }
+
+    pub fn set_label(&mut self, label: Option<&str>) {
+        let label = label.map(str::to_string);
+
+        if label != self.label {
+            self.changed = true;
+        }
+
+        self.label = label;
+    }
+
+    pub fn get_label(&self) -> Option<&str> {
+        self.label.as_deref()
+    }
+
+    /// Add more [`BufferUsages`] to the buffer.
+    ///
+    /// This method only allows addition of flags to the default usage flags.
+    ///
+    /// The default values for buffer usage are `BufferUsages::COPY_DST` and `BufferUsages::UNIFORM`.
+    pub fn add_usages(&mut self, usage: BufferUsages) {
+        self.buffer_usage |= usage;
+        self.changed = true;
+    }
+
+    /// Queues writing of data from system RAM to VRAM using the [`RenderDevice`]
+    /// and the provided [`RenderQueue`], if a GPU-side backing buffer already exists.
+    ///
+    /// If a GPU-side buffer does not already exist for this data, such a buffer is initialized with currently
+    /// available data.
+    pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
+        self.scratch.write(&self.value).unwrap();
+
+        if self.changed || self.buffer.is_none() {
+            self.buffer = Some(device.create_buffer_with_data(&BufferInitDescriptor {
+                label: self.label.as_deref(),
+                usage: self.buffer_usage,
+                contents: self.scratch.as_ref(),
+            }));
+            self.changed = false;
+        } else if let Some(buffer) = &self.buffer {
+            queue.write_buffer(buffer, 0, self.scratch.as_ref());
+        }
+    }
+}
+
+impl<'a, T: ShaderType + WriteInto> IntoBinding<'a> for &'a UniformBuffer<T> {
+    #[inline]
+    fn into_binding(self) -> BindingResource<'a> {
+        self.buffer()
+            .expect("Failed to get buffer")
+            .as_entire_buffer_binding()
+            .into_binding()
+    }
+}
+
+/// Stores data to be transferred to the GPU and made accessible to shaders as a dynamic uniform buffer.
+///
+/// Dynamic uniform buffers are available to shaders on a read-only basis. Dynamic uniform buffers are commonly used to make
+/// available to shaders runtime-sized arrays of parameters that are otherwise constant during shader execution, and are best
+/// suited to data that is relatively small in size as they are only guaranteed to support up to 16kB per binding.
+///
+/// The contained data is stored in system RAM. [`write_buffer`](DynamicUniformBuffer::write_buffer) queues
+/// copying of the data from system RAM to VRAM. Data in uniform buffers must follow [std140 alignment/padding requirements],
+/// which is automatically enforced by this structure. Per the WGPU spec, uniform buffers cannot store runtime-sized array
+/// (vectors), or structures with fields that are vectors.
+///
+/// Other options for storing GPU-accessible data are:
+/// * [`BufferVec`](crate::render_resource::BufferVec)
+/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
+/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
+/// * [`RawBufferVec`](crate::render_resource::RawBufferVec)
+/// * [`StorageBuffer`](crate::render_resource::StorageBuffer)
+/// * [`Texture`](crate::render_resource::Texture)
+/// * [`UniformBuffer`]
+///
+/// [std140 alignment/padding requirements]: https://www.w3.org/TR/WGSL/#address-spaces-uniform
+pub struct DynamicUniformBuffer<T: ShaderType> {
+    scratch: DynamicUniformBufferWrapper<Vec<u8>>,
+    buffer: Option<Buffer>,
+    label: Option<String>,
+    changed: bool,
+    buffer_usage: BufferUsages,
+    _marker: PhantomData<fn() -> T>,
+}
+
+impl<T: ShaderType> Default for DynamicUniformBuffer<T> {
+    fn default() -> Self {
+        Self {
+            scratch: DynamicUniformBufferWrapper::new(Vec::new()),
+            buffer: None,
+            label: None,
+            changed: false,
+            buffer_usage: BufferUsages::COPY_DST | BufferUsages::UNIFORM,
+            _marker: PhantomData,
+        }
+    }
+}
+
+impl<T: ShaderType + WriteInto> DynamicUniformBuffer<T> {
+    pub fn new_with_alignment(alignment: u64) -> Self {
+        Self {
+            scratch: DynamicUniformBufferWrapper::new_with_alignment(Vec::new(), alignment),
+            buffer: None,
+            label: None,
+            changed: false,
+            buffer_usage: BufferUsages::COPY_DST | BufferUsages::UNIFORM,
+            _marker: PhantomData,
+        }
+    }
+
+    #[inline]
+    pub fn buffer(&self) -> Option<&Buffer> {
+        self.buffer.as_ref()
+    }
+
+    #[inline]
+    pub fn binding(&self) -> Option<BindingResource<'_>> {
+        Some(BindingResource::Buffer(BufferBinding {
+            buffer: self.buffer()?,
+            offset: 0,
+            size: Some(T::min_size()),
+        }))
+    }
+
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.scratch.as_ref().is_empty()
+    }
+
+    /// Push data into the `DynamicUniformBuffer`'s internal vector (residing on system RAM).
+    #[inline]
+    pub fn push(&mut self, value: &T) -> u32 {
+        self.scratch.write(value).unwrap() as u32
+    }
+
+    pub fn set_label(&mut self, label: Option<&str>) {
+        let label = label.map(str::to_string);
+
+        if label != self.label {
+            self.changed = true;
+        }
+
+        self.label = label;
+    }
+
+    pub fn get_label(&self) -> Option<&str> {
+        self.label.as_deref()
+    }
+
+    /// Add more [`BufferUsages`] to the buffer.
+    ///
+    /// This method only allows addition of flags to the default usage flags.
+    ///
+    /// The default values for buffer usage are `BufferUsages::COPY_DST` and `BufferUsages::UNIFORM`.
+    pub fn add_usages(&mut self, usage: BufferUsages) {
+        self.buffer_usage |= usage;
+        self.changed = true;
+    }
+
+    /// Creates a writer that can be used to directly write elements into the target buffer.
+    ///
+    /// This method uses less memory and performs fewer memory copies using over [`push`] and [`write_buffer`].
+    ///
+    /// `max_count` *must* be greater than or equal to the number of elements that are to be written to the buffer, or
+    /// the writer will panic while writing.  Dropping the writer will schedule the buffer write into the provided
+    /// [`RenderQueue`].
+    ///
+    /// If there is no GPU-side buffer allocated to hold the data currently stored, or if a GPU-side buffer previously
+    /// allocated does not have enough capacity to hold `max_count` elements, a new GPU-side buffer is created.
+    ///
+    /// Returns `None` if there is no allocated GPU-side buffer, and `max_count` is 0.
+    ///
+    /// [`push`]: Self::push
+    /// [`write_buffer`]: Self::write_buffer
+    #[inline]
+    pub fn get_writer<'a>(
+        &'a mut self,
+        max_count: usize,
+        device: &RenderDevice,
+        queue: &'a RenderQueue,
+    ) -> Option<DynamicUniformBufferWriter<'a, T>> {
+        let alignment = if cfg!(target_abi = "sim") {
+            // On iOS simulator on silicon macs, metal validation check that the host OS alignment
+            // is respected, but the device reports the correct value for iOS, which is smaller.
+            // Use the larger value.
+            // See https://github.com/gfx-rs/wgpu/issues/7057 - remove if it's not needed anymore.
+            AlignmentValue::new(256)
+        } else {
+            AlignmentValue::new(device.limits().min_uniform_buffer_offset_alignment as u64)
+        };
+
+        let mut capacity = self.buffer.as_deref().map(wgpu::Buffer::size).unwrap_or(0);
+        let size = alignment
+            .round_up(T::min_size().get())
+            .checked_mul(max_count as u64)
+            .unwrap();
+
+        if capacity < size || (self.changed && size > 0) {
+            let buffer = device.create_buffer(&BufferDescriptor {
+                label: self.label.as_deref(),
+                usage: self.buffer_usage,
+                size,
+                mapped_at_creation: false,
+            });
+            capacity = buffer.size();
+            self.buffer = Some(buffer);
+            self.changed = false;
+        }
+
+        if let Some(buffer) = self.buffer.as_deref() {
+            let buffer_view = queue
+                .write_buffer_with(buffer, 0, NonZero::<u64>::new(buffer.size())?)
+                .unwrap();
+            Some(DynamicUniformBufferWriter {
+                buffer: encase::DynamicUniformBuffer::new_with_alignment(
+                    QueueWriteBufferViewWrapper {
+                        capacity: capacity as usize,
+                        buffer_view,
+                    },
+                    alignment.get(),
+                ),
+                _marker: PhantomData,
+            })
+        } else {
+            None
+        }
+    }
+
+    /// Queues writing of data from system RAM to VRAM using the [`RenderDevice`]
+    /// and the provided [`RenderQueue`].
+    ///
+    /// If there is no GPU-side buffer allocated to hold the data currently stored, or if a GPU-side buffer previously
+    /// allocated does not have enough capacity, a new GPU-side buffer is created.
+    #[inline]
+    pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
+        let capacity = self.buffer.as_deref().map(wgpu::Buffer::size).unwrap_or(0);
+        let size = self.scratch.as_ref().len() as u64;
+
+        if capacity < size || (self.changed && size > 0) {
+            self.buffer = Some(device.create_buffer_with_data(&BufferInitDescriptor {
+                label: self.label.as_deref(),
+                usage: self.buffer_usage,
+                contents: self.scratch.as_ref(),
+            }));
+            self.changed = false;
+        } else if let Some(buffer) = &self.buffer {
+            queue.write_buffer(buffer, 0, self.scratch.as_ref());
+        }
+    }
+
+    #[inline]
+    pub fn clear(&mut self) {
+        self.scratch.as_mut().clear();
+        self.scratch.set_offset(0);
+    }
+}
+
+/// A writer that can be used to directly write elements into the target buffer.
+///
+/// For more information, see [`DynamicUniformBuffer::get_writer`].
+pub struct DynamicUniformBufferWriter<'a, T> {
+    buffer: encase::DynamicUniformBuffer<QueueWriteBufferViewWrapper<'a>>,
+    _marker: PhantomData<fn() -> T>,
+}
+
+impl<'a, T: ShaderType + WriteInto> DynamicUniformBufferWriter<'a, T> {
+    pub fn write(&mut self, value: &T) -> u32 {
+        self.buffer.write(value).unwrap() as u32
+    }
+}
+
+/// A wrapper to work around the orphan rule so that [`wgpu::QueueWriteBufferView`] can  implement
+/// [`BufferMut`].
+struct QueueWriteBufferViewWrapper<'a> {
+    buffer_view: wgpu::QueueWriteBufferView<'a>,
+    // Must be kept separately and cannot be retrieved from buffer_view, as the read-only access will
+    // invoke a panic.
+    capacity: usize,
+}
+
+impl<'a> BufferMut for QueueWriteBufferViewWrapper<'a> {
+    #[inline]
+    fn capacity(&self) -> usize {
+        self.capacity
+    }
+
+    #[inline]
+    fn write<const N: usize>(&mut self, offset: usize, val: &[u8; N]) {
+        self.buffer_view.write(offset, val);
+    }
+
+    #[inline]
+    fn write_slice(&mut self, offset: usize, val: &[u8]) {
+        self.buffer_view.write_slice(offset, val);
+    }
+}
+
+impl<'a, T: ShaderType + WriteInto> IntoBinding<'a> for &'a DynamicUniformBuffer<T> {
+    #[inline]
+    fn into_binding(self) -> BindingResource<'a> {
+        self.binding().unwrap()
+    }
+}