diff --git a/render/wgpu/shaders/bitmap.frag b/render/wgpu/shaders/bitmap.frag index 932ff6c50..dd9f4d8e8 100644 --- a/render/wgpu/shaders/bitmap.frag +++ b/render/wgpu/shaders/bitmap.frag @@ -1,16 +1,17 @@ #version 450 // Push constants: matrix + color -layout(push_constant) uniform FragmentPushConstants { - layout(offset = 64) vec4 mult_color; +layout(set = 1, binding = 0) uniform Transforms { + mat4 world_matrix; + vec4 mult_color; vec4 add_color; }; // Set 2: bitmap -layout(set = 1, binding = 1) uniform texture2D t_color; +layout(set = 2, binding = 1) uniform texture2D t_color; // Set 3: sampler -layout(set = 2, binding = 0) uniform sampler s_color; +layout(set = 3, binding = 0) uniform sampler s_color; layout(location=0) in vec2 frag_uv; diff --git a/render/wgpu/shaders/bitmap.frag.spv b/render/wgpu/shaders/bitmap.frag.spv index 4ab7d8b06..6551e1dd3 100644 Binary files a/render/wgpu/shaders/bitmap.frag.spv and b/render/wgpu/shaders/bitmap.frag.spv differ diff --git a/render/wgpu/shaders/color.frag b/render/wgpu/shaders/color.frag index 4177ae376..fe00146ac 100644 --- a/render/wgpu/shaders/color.frag +++ b/render/wgpu/shaders/color.frag @@ -3,8 +3,9 @@ layout(location=0) in vec4 frag_color; // Push constants: matrix + color -layout(push_constant) uniform FragmentPushConstants { - layout(offset = 64) vec4 mult_color; +layout(set = 1, binding = 0) uniform Transforms { + mat4 world_matrix; + vec4 mult_color; vec4 add_color; }; diff --git a/render/wgpu/shaders/color.frag.spv b/render/wgpu/shaders/color.frag.spv index 996ab4e1b..2be98865b 100644 Binary files a/render/wgpu/shaders/color.frag.spv and b/render/wgpu/shaders/color.frag.spv differ diff --git a/render/wgpu/shaders/color.vert b/render/wgpu/shaders/color.vert index 496c4cf0b..246dc00e4 100644 --- a/render/wgpu/shaders/color.vert +++ b/render/wgpu/shaders/color.vert @@ -6,8 +6,10 @@ layout(set = 0, binding = 0) uniform Globals { }; // Push constants: matrix + color -layout(push_constant) uniform VertexPushConstants { +layout(set = 1, binding = 0) uniform Transforms { mat4 world_matrix; + vec4 mult_color; + vec4 add_color; }; layout(location = 0) in vec2 position; diff --git a/render/wgpu/shaders/color.vert.spv b/render/wgpu/shaders/color.vert.spv index a13c707a3..4357fd7ea 100644 Binary files a/render/wgpu/shaders/color.vert.spv and b/render/wgpu/shaders/color.vert.spv differ diff --git a/render/wgpu/shaders/gradient.frag b/render/wgpu/shaders/gradient.frag index c793893c2..6a0c7f4f9 100644 --- a/render/wgpu/shaders/gradient.frag +++ b/render/wgpu/shaders/gradient.frag @@ -1,13 +1,14 @@ #version 450 // Push constants: matrix + color -layout(push_constant) uniform FragmentPushConstants { - layout(offset = 64) vec4 mult_color; +layout(set = 1, binding = 0) uniform Transforms { + mat4 world_matrix; + vec4 mult_color; vec4 add_color; }; // Set 1: gradient -layout(std430, set = 1, binding = 1) readonly buffer Gradient { +layout(std430, set = 2, binding = 1) readonly buffer Gradient { vec4 u_colors[16]; float u_ratios[16]; int u_gradient_type; diff --git a/render/wgpu/shaders/gradient.frag.spv b/render/wgpu/shaders/gradient.frag.spv index 53898ddbe..dad2c557d 100644 Binary files a/render/wgpu/shaders/gradient.frag.spv and b/render/wgpu/shaders/gradient.frag.spv differ diff --git a/render/wgpu/shaders/texture.vert b/render/wgpu/shaders/texture.vert index 9651531ea..e689c01df 100644 --- a/render/wgpu/shaders/texture.vert +++ b/render/wgpu/shaders/texture.vert @@ -1,17 +1,19 @@ #version 450 // Set 0: globals -layout(set = 0, binding = 0) uniform Globals { +layout(set = 0, binding = 0) uniform Transforms { mat4 view_matrix; }; // Push constants: matrix + color -layout(push_constant) uniform VertexPushConstants { +layout(set = 1, binding = 0) uniform DynamicUniforms { mat4 world_matrix; + vec4 mult_color; + vec4 add_color; }; // Set 1: bitmap or gradient -layout(set = 1, binding = 0) uniform Texture { +layout(set = 2, binding = 0) uniform Texture { mat4 u_matrix; }; diff --git a/render/wgpu/shaders/texture.vert.spv b/render/wgpu/shaders/texture.vert.spv index d6f41e25b..924e87de9 100644 Binary files a/render/wgpu/shaders/texture.vert.spv and b/render/wgpu/shaders/texture.vert.spv differ diff --git a/render/wgpu/src/globals.rs b/render/wgpu/src/globals.rs index f5d6e004b..42d9a5000 100644 --- a/render/wgpu/src/globals.rs +++ b/render/wgpu/src/globals.rs @@ -1,3 +1,4 @@ +//use super::utils::create_debug_label; use bytemuck::{Pod, Zeroable}; use wgpu::util::DeviceExt; diff --git a/render/wgpu/src/lib.rs b/render/wgpu/src/lib.rs index f6b13aaab..e9f5c7e2c 100644 --- a/render/wgpu/src/lib.rs +++ b/render/wgpu/src/lib.rs @@ -30,12 +30,14 @@ mod bitmaps; mod globals; mod pipelines; pub mod target; +mod uniform_buffer; #[cfg(feature = "clap")] pub mod clap; use crate::bitmaps::BitmapSamplers; use crate::globals::Globals; +use crate::uniform_buffer::UniformBuffer; use std::collections::HashMap; use std::path::Path; pub use wgpu; @@ -43,8 +45,10 @@ pub use wgpu; pub struct Descriptors { pub device: wgpu::Device, pub info: wgpu::AdapterInfo, + pub limits: wgpu::Limits, queue: wgpu::Queue, globals: Globals, + uniform_buffers: UniformBuffer, pipelines: Pipelines, bitmap_samplers: BitmapSamplers, msaa_sample_count: u32, @@ -56,23 +60,45 @@ impl Descriptors { queue: wgpu::Queue, info: wgpu::AdapterInfo, ) -> Result { + let limits = device.limits(); // TODO: Allow this to be set from command line/settings file. let msaa_sample_count = 4; - let bitmap_samplers = BitmapSamplers::new(&device); let globals = Globals::new(&device); + let uniform_buffer_layout_label = create_debug_label!("Uniform buffer bind group layout"); + let uniform_buffer_layout = + device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: true, + min_binding_size: None, + }, + count: None, + }], + label: uniform_buffer_layout_label.as_deref(), + }); + let uniform_buffers = UniformBuffer::new( + uniform_buffer_layout, + limits.min_uniform_buffer_offset_alignment, + ); let pipelines = Pipelines::new( &device, msaa_sample_count, bitmap_samplers.layout(), globals.layout(), + uniform_buffers.layout(), )?; Ok(Self { device, info, + limits, queue, globals, + uniform_buffers, pipelines, bitmap_samplers, msaa_sample_count, @@ -99,7 +125,7 @@ pub struct WgpuRenderBackend { #[allow(dead_code)] struct Frame<'a, T: RenderTarget> { - frame_data: Box<(wgpu::CommandEncoder, T::Frame)>, + frame_data: Box<(wgpu::CommandEncoder, T::Frame, wgpu::CommandEncoder)>, // TODO: This is a self-reference to the above, so we // use some unsafe to cast the lifetime away. We know this @@ -129,6 +155,7 @@ pub enum MaskState { #[derive(Copy, Clone, Debug, Pod, Zeroable)] struct Transforms { world_matrix: [[f32; 4]; 4], + color_adjustments: ColorAdjustments, } #[repr(C)] @@ -376,13 +403,8 @@ impl WgpuRenderBackend { let (device, queue) = block_on(adapter.request_device( &wgpu::DeviceDescriptor { label: None, - features: wgpu::Features::PUSH_CONSTANTS, - limits: wgpu::Limits { - max_push_constant_size: (std::mem::size_of::() - + std::mem::size_of::()) - as u32, - ..Default::default() - }, + features: wgpu::Features::empty(), + ..Default::default() }, trace_path, ))?; @@ -808,6 +830,7 @@ impl RenderBackend for WgpuRenderBackend { fn begin_frame(&mut self, clear: Color) { self.mask_state = MaskState::NoMask; self.num_masks = 0; + self.descriptors.uniform_buffers.reset(); let frame_output = match self.target.get_next_texture() { Ok(frame) => frame, @@ -830,7 +853,14 @@ impl RenderBackend for WgpuRenderBackend { .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: label.as_deref(), }); - let mut frame_data = Box::new((draw_encoder, frame_output)); + let uniform_encoder_label = create_debug_label!("Uniform upload command encoder"); + let uniform_encoder = + self.descriptors + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: uniform_encoder_label.as_deref(), + }); + let mut frame_data = Box::new((draw_encoder, frame_output, uniform_encoder)); self.descriptors .globals @@ -917,24 +947,26 @@ impl RenderBackend for WgpuRenderBackend { .bitmap_pipelines .pipeline_for(self.mask_state), ); - frame.render_pass.set_push_constants( - wgpu::ShaderStages::VERTEX, - 0, - bytemuck::cast_slice(&[Transforms { world_matrix }]), - ); - frame.render_pass.set_push_constants( - wgpu::ShaderStages::FRAGMENT, - std::mem::size_of::() as u32, - bytemuck::cast_slice(&[ColorAdjustments::from(transform.color_transform)]), - ); frame .render_pass .set_bind_group(0, self.descriptors.globals.bind_group(), &[]); + + self.descriptors.uniform_buffers.write_uniforms( + &self.descriptors.device, + &mut frame.frame_data.2, + &mut frame.render_pass, + 1, + &Transforms { + world_matrix, + color_adjustments: ColorAdjustments::from(transform.color_transform), + }, + ); + frame .render_pass - .set_bind_group(1, &texture.bind_group, &[]); + .set_bind_group(2, &texture.bind_group, &[]); frame.render_pass.set_bind_group( - 2, + 3, self.descriptors .bitmap_samplers .get_bind_group(false, smoothing), @@ -988,6 +1020,17 @@ impl RenderBackend for WgpuRenderBackend { .render_pass .set_bind_group(0, self.descriptors.globals.bind_group(), &[]); + self.descriptors.uniform_buffers.write_uniforms( + &self.descriptors.device, + &mut frame.frame_data.2, + &mut frame.render_pass, + 1, + &Transforms { + world_matrix, + color_adjustments: ColorAdjustments::from(transform.color_transform), + }, + ); + for draw in &mesh.draws { match &draw.draw_type { DrawType::Color => { @@ -1005,7 +1048,7 @@ impl RenderBackend for WgpuRenderBackend { .gradient_pipelines .pipeline_for(self.mask_state), ); - frame.render_pass.set_bind_group(1, bind_group, &[]); + frame.render_pass.set_bind_group(2, bind_group, &[]); } DrawType::Bitmap { is_repeating, @@ -1019,9 +1062,9 @@ impl RenderBackend for WgpuRenderBackend { .bitmap_pipelines .pipeline_for(self.mask_state), ); - frame.render_pass.set_bind_group(1, bind_group, &[]); + frame.render_pass.set_bind_group(2, bind_group, &[]); frame.render_pass.set_bind_group( - 2, + 3, self.descriptors .bitmap_samplers .get_bind_group(*is_repeating, *is_smoothed), @@ -1030,16 +1073,6 @@ impl RenderBackend for WgpuRenderBackend { } } - frame.render_pass.set_push_constants( - wgpu::ShaderStages::VERTEX, - 0, - bytemuck::cast_slice(&[Transforms { world_matrix }]), - ); - frame.render_pass.set_push_constants( - wgpu::ShaderStages::FRAGMENT, - std::mem::size_of::() as u32, - bytemuck::cast_slice(&[ColorAdjustments::from(transform.color_transform)]), - ); frame .render_pass .set_vertex_buffer(0, draw.vertex_buffer.slice(..)); @@ -1097,23 +1130,24 @@ impl RenderBackend for WgpuRenderBackend { .pipeline_for(self.mask_state), ); - frame.render_pass.set_push_constants( - wgpu::ShaderStages::VERTEX, - 0, - bytemuck::cast_slice(&[Transforms { world_matrix }]), - ); - frame.render_pass.set_push_constants( - wgpu::ShaderStages::FRAGMENT, - std::mem::size_of::() as u32, - bytemuck::cast_slice(&[ColorAdjustments { - mult_color, - add_color, - }]), - ); - frame .render_pass .set_bind_group(0, self.descriptors.globals.bind_group(), &[]); + + self.descriptors.uniform_buffers.write_uniforms( + &self.descriptors.device, + &mut frame.frame_data.2, + &mut frame.render_pass, + 1, + &Transforms { + world_matrix, + color_adjustments: ColorAdjustments { + mult_color, + add_color, + }, + }, + ); + frame .render_pass .set_vertex_buffer(0, self.quad_vbo.slice(..)); @@ -1140,12 +1174,13 @@ impl RenderBackend for WgpuRenderBackend { if let Some(frame) = self.current_frame.take() { // Finalize render pass. drop(frame.render_pass); - + self.descriptors.uniform_buffers.finish(); let draw_encoder = frame.frame_data.0; + let uniform_encoder = frame.frame_data.2; self.target.submit( &self.descriptors.device, &self.descriptors.queue, - vec![draw_encoder.finish()], + vec![uniform_encoder.finish(), draw_encoder.finish()], frame.frame_data.1, ); } diff --git a/render/wgpu/src/pipelines.rs b/render/wgpu/src/pipelines.rs index 862dffca7..144d14b56 100644 --- a/render/wgpu/src/pipelines.rs +++ b/render/wgpu/src/pipelines.rs @@ -30,6 +30,7 @@ impl Pipelines { msaa_sample_count: u32, sampler_layout: &wgpu::BindGroupLayout, globals_layout: &wgpu::BindGroupLayout, + dynamic_uniforms_layout: &wgpu::BindGroupLayout, ) -> Result { // TODO: Naga validation errors when encountering push constants currently. // Disable validation for now. Remove this when Naga can swallow it. @@ -68,6 +69,7 @@ impl Pipelines { msaa_sample_count, &vertex_buffers_description, globals_layout, + dynamic_uniforms_layout, ); let bitmap_bind_layout_label = create_debug_label!("Bitmap shape bind group layout"); @@ -106,6 +108,7 @@ impl Pipelines { &vertex_buffers_description, sampler_layout, globals_layout, + dynamic_uniforms_layout, &bitmap_bind_layout, ); @@ -144,6 +147,7 @@ impl Pipelines { msaa_sample_count, &vertex_buffers_description, globals_layout, + dynamic_uniforms_layout, &gradient_bind_layout, ); @@ -206,24 +210,13 @@ fn create_color_pipelines( msaa_sample_count: u32, vertex_buffers_description: &[wgpu::VertexBufferLayout<'_>], globals_layout: &wgpu::BindGroupLayout, + dynamic_uniforms_layout: &wgpu::BindGroupLayout, ) -> ShapePipeline { - let transforms_size = std::mem::size_of::() as u32; - let colors_size = std::mem::size_of::() as u32; - let pipeline_layout_label = create_debug_label!("Color shape pipeline layout"); let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { label: pipeline_layout_label.as_deref(), - bind_group_layouts: &[globals_layout], - push_constant_ranges: &[ - wgpu::PushConstantRange { - stages: wgpu::ShaderStages::VERTEX, - range: 0..transforms_size, - }, - wgpu::PushConstantRange { - stages: wgpu::ShaderStages::FRAGMENT, - range: transforms_size..transforms_size + colors_size, - }, - ], + bind_group_layouts: &[globals_layout, dynamic_uniforms_layout], + push_constant_ranges: &[], }); let mask_pipelines = enum_map! { @@ -381,22 +374,19 @@ fn create_bitmap_pipeline( vertex_buffers_layout: &[wgpu::VertexBufferLayout<'_>], sampler_layout: &wgpu::BindGroupLayout, globals_layout: &wgpu::BindGroupLayout, + dynamic_uniforms_layout: &wgpu::BindGroupLayout, bitmap_bind_layout: &wgpu::BindGroupLayout, ) -> ShapePipeline { let pipeline_layout_label = create_debug_label!("Bitmap shape pipeline layout"); let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { label: pipeline_layout_label.as_deref(), - bind_group_layouts: &[globals_layout, bitmap_bind_layout, sampler_layout], - push_constant_ranges: &[ - wgpu::PushConstantRange { - stages: wgpu::ShaderStages::VERTEX, - range: 0..64, - }, - wgpu::PushConstantRange { - stages: wgpu::ShaderStages::FRAGMENT, - range: 64..96, - }, + bind_group_layouts: &[ + globals_layout, + dynamic_uniforms_layout, + bitmap_bind_layout, + sampler_layout, ], + push_constant_ranges: &[], }); let mask_pipelines = enum_map! { @@ -552,22 +542,18 @@ fn create_gradient_pipeline( msaa_sample_count: u32, vertex_buffers_layout: &[wgpu::VertexBufferLayout<'_>], globals_layout: &wgpu::BindGroupLayout, + dynamic_uniforms_layout: &wgpu::BindGroupLayout, gradient_bind_layout: &wgpu::BindGroupLayout, ) -> ShapePipeline { let pipeline_layout_label = create_debug_label!("Gradient shape pipeline layout"); let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { label: pipeline_layout_label.as_deref(), - bind_group_layouts: &[globals_layout, gradient_bind_layout], - push_constant_ranges: &[ - wgpu::PushConstantRange { - stages: wgpu::ShaderStages::VERTEX, - range: 0..64, - }, - wgpu::PushConstantRange { - stages: wgpu::ShaderStages::FRAGMENT, - range: 64..96, - }, + bind_group_layouts: &[ + globals_layout, + dynamic_uniforms_layout, + gradient_bind_layout, ], + push_constant_ranges: &[], }); let mask_pipelines = enum_map! { diff --git a/render/wgpu/src/uniform_buffer.rs b/render/wgpu/src/uniform_buffer.rs new file mode 100644 index 000000000..551748d87 --- /dev/null +++ b/render/wgpu/src/uniform_buffer.rs @@ -0,0 +1,148 @@ +use bytemuck::Pod; +use futures::{ + executor::{LocalPool, LocalSpawner}, + task::LocalSpawnExt, +}; +use std::{convert::TryInto, marker::PhantomData, mem}; +use wgpu::util::StagingBelt; + +/// A simple chunked bump allacator for managing dynamic uniforms that change per-draw. +/// Each draw call may use `UniformBuffer::write_uniforms` can be used to queue +/// the upload of uniform data to the GPU. +pub struct UniformBuffer { + blocks: Vec, + buffer_layout: wgpu::BindGroupLayout, + staging_belt: StagingBelt, + executor: LocalPool, + spawner: LocalSpawner, + aligned_uniforms_size: u32, + cur_block: usize, + cur_offset: u32, + _phantom: PhantomData, +} + +impl UniformBuffer { + /// The size of each block. + /// Uniforms are copied into each block until it reaches capacity, at which point a new + /// block will be allocated. + const BLOCK_SIZE: u32 = 65536; + + /// The uniform data size for a single draw call. + const UNIFORMS_SIZE: u64 = mem::size_of::() as u64; + + /// Creates a new `UniformBuffer` with the given uniform layout. + pub fn new(buffer_layout: wgpu::BindGroupLayout, uniform_alignment: u32) -> Self { + // Create local executor for uniform uploads. + let executor = LocalPool::new(); + let spawner = executor.spawner(); + + // Calculate alignment of uniforms. + let align_mask = uniform_alignment - 1; + let aligned_uniforms_size = (Self::UNIFORMS_SIZE as u32 + align_mask) & !align_mask; + + Self { + blocks: Vec::with_capacity(8), + buffer_layout, + executor, + spawner, + staging_belt: StagingBelt::new(u64::from(Self::BLOCK_SIZE) / 2), + aligned_uniforms_size, + cur_block: 0, + cur_offset: 0, + _phantom: PhantomData, + } + } + + /// Returns the bind group layout for the uniforms in this buffer. + pub fn layout(&self) -> &wgpu::BindGroupLayout { + &self.buffer_layout + } + + /// Resets the buffer and staging belt. + /// Should be called at the start of a frame. + pub fn reset(&mut self) { + self.cur_block = 0; + self.cur_offset = 0; + let _ = self.spawner.spawn_local(self.staging_belt.recall()); + self.executor.run_until_stalled(); + } + + /// Enqueue `data` for upload into the given command encoder, and set the bind group on `render_pass` + /// to use the uniform data. + pub fn write_uniforms<'a>( + &'a mut self, + device: &wgpu::Device, + command_encoder: &mut wgpu::CommandEncoder, + render_pass: &mut wgpu::RenderPass<'a>, + bind_group_index: u32, + data: &T, + ) { + // Allocate a new block if we've exceeded our capacity. + if self.cur_block >= self.blocks.len() { + self.allocate_block(device); + } + let block = &self.blocks[self.cur_block]; + + // Copy the data into the buffer via the staging belt. + self.staging_belt + .write_buffer( + command_encoder, + &block.buffer, + self.cur_offset.into(), + Self::UNIFORMS_SIZE.try_into().unwrap(), + device, + ) + .copy_from_slice(bytemuck::cast_slice(std::slice::from_ref(data))); + + // Set the bind group to the final uniform location. + render_pass.set_bind_group(bind_group_index, &block.bind_group, &[self.cur_offset]); + + // Advance offset. + self.cur_offset += self.aligned_uniforms_size; + // Advance to next buffer if we are out of room in this buffer. + if Self::BLOCK_SIZE - self.cur_offset < self.aligned_uniforms_size { + self.cur_block += 1; + self.cur_offset = 0; + } + } + + /// Should be called at the end of a frame. + pub fn finish(&mut self) { + self.staging_belt.finish(); + } + + /// Adds a newly allocated buffer to the block list, and returns it. + fn allocate_block(&mut self, device: &wgpu::Device) -> &Block { + let buffer_label = create_debug_label!("Dynamic buffer"); + let buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: buffer_label.as_deref(), + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, + size: Self::BLOCK_SIZE.into(), + mapped_at_creation: false, + }); + + let bind_group_label = create_debug_label!("Dynamic buffer bind group"); + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: bind_group_label.as_deref(), + layout: &self.buffer_layout, + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding { + buffer: &buffer, + offset: 0, + size: wgpu::BufferSize::new(std::mem::size_of::() as u64), + }), + }], + }); + + self.blocks.push(Block { buffer, bind_group }); + self.blocks.last().unwrap() + } +} + +/// A block of GPU memory that will contain our uniforms. +#[derive(Debug)] +struct Block { + buffer: wgpu::Buffer, + bind_group: wgpu::BindGroup, +}