wgpu: Use uniforms instead of push constants

This commit is contained in:
Mike Welsh 2021-09-07 13:25:56 -07:00
parent 0cbdad1ff2
commit decc38a45c
14 changed files with 275 additions and 98 deletions

View File

@ -1,16 +1,17 @@
#version 450
// Push constants: matrix + color
layout(push_constant) uniform FragmentPushConstants {
layout(offset = 64) vec4 mult_color;
layout(set = 1, binding = 0) uniform Transforms {
mat4 world_matrix;
vec4 mult_color;
vec4 add_color;
};
// Set 2: bitmap
layout(set = 1, binding = 1) uniform texture2D t_color;
layout(set = 2, binding = 1) uniform texture2D t_color;
// Set 3: sampler
layout(set = 2, binding = 0) uniform sampler s_color;
layout(set = 3, binding = 0) uniform sampler s_color;
layout(location=0) in vec2 frag_uv;

Binary file not shown.

View File

@ -3,8 +3,9 @@
layout(location=0) in vec4 frag_color;
// Push constants: matrix + color
layout(push_constant) uniform FragmentPushConstants {
layout(offset = 64) vec4 mult_color;
layout(set = 1, binding = 0) uniform Transforms {
mat4 world_matrix;
vec4 mult_color;
vec4 add_color;
};

Binary file not shown.

View File

@ -6,8 +6,10 @@ layout(set = 0, binding = 0) uniform Globals {
};
// Push constants: matrix + color
layout(push_constant) uniform VertexPushConstants {
layout(set = 1, binding = 0) uniform Transforms {
mat4 world_matrix;
vec4 mult_color;
vec4 add_color;
};
layout(location = 0) in vec2 position;

Binary file not shown.

View File

@ -1,13 +1,14 @@
#version 450
// Push constants: matrix + color
layout(push_constant) uniform FragmentPushConstants {
layout(offset = 64) vec4 mult_color;
layout(set = 1, binding = 0) uniform Transforms {
mat4 world_matrix;
vec4 mult_color;
vec4 add_color;
};
// Set 1: gradient
layout(std430, set = 1, binding = 1) readonly buffer Gradient {
layout(std430, set = 2, binding = 1) readonly buffer Gradient {
vec4 u_colors[16];
float u_ratios[16];
int u_gradient_type;

View File

@ -1,17 +1,19 @@
#version 450
// Set 0: globals
layout(set = 0, binding = 0) uniform Globals {
layout(set = 0, binding = 0) uniform Transforms {
mat4 view_matrix;
};
// Push constants: matrix + color
layout(push_constant) uniform VertexPushConstants {
layout(set = 1, binding = 0) uniform DynamicUniforms {
mat4 world_matrix;
vec4 mult_color;
vec4 add_color;
};
// Set 1: bitmap or gradient
layout(set = 1, binding = 0) uniform Texture {
layout(set = 2, binding = 0) uniform Texture {
mat4 u_matrix;
};

Binary file not shown.

View File

@ -1,3 +1,4 @@
//use super::utils::create_debug_label;
use bytemuck::{Pod, Zeroable};
use wgpu::util::DeviceExt;

View File

@ -30,12 +30,14 @@ mod bitmaps;
mod globals;
mod pipelines;
pub mod target;
mod uniform_buffer;
#[cfg(feature = "clap")]
pub mod clap;
use crate::bitmaps::BitmapSamplers;
use crate::globals::Globals;
use crate::uniform_buffer::UniformBuffer;
use std::collections::HashMap;
use std::path::Path;
pub use wgpu;
@ -43,8 +45,10 @@ pub use wgpu;
pub struct Descriptors {
pub device: wgpu::Device,
pub info: wgpu::AdapterInfo,
pub limits: wgpu::Limits,
queue: wgpu::Queue,
globals: Globals,
uniform_buffers: UniformBuffer<Transforms>,
pipelines: Pipelines,
bitmap_samplers: BitmapSamplers,
msaa_sample_count: u32,
@ -56,23 +60,45 @@ impl Descriptors {
queue: wgpu::Queue,
info: wgpu::AdapterInfo,
) -> Result<Self, Error> {
let limits = device.limits();
// TODO: Allow this to be set from command line/settings file.
let msaa_sample_count = 4;
let bitmap_samplers = BitmapSamplers::new(&device);
let globals = Globals::new(&device);
let uniform_buffer_layout_label = create_debug_label!("Uniform buffer bind group layout");
let uniform_buffer_layout =
device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
entries: &[wgpu::BindGroupLayoutEntry {
binding: 0,
visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Uniform,
has_dynamic_offset: true,
min_binding_size: None,
},
count: None,
}],
label: uniform_buffer_layout_label.as_deref(),
});
let uniform_buffers = UniformBuffer::new(
uniform_buffer_layout,
limits.min_uniform_buffer_offset_alignment,
);
let pipelines = Pipelines::new(
&device,
msaa_sample_count,
bitmap_samplers.layout(),
globals.layout(),
uniform_buffers.layout(),
)?;
Ok(Self {
device,
info,
limits,
queue,
globals,
uniform_buffers,
pipelines,
bitmap_samplers,
msaa_sample_count,
@ -99,7 +125,7 @@ pub struct WgpuRenderBackend<T: RenderTarget> {
#[allow(dead_code)]
struct Frame<'a, T: RenderTarget> {
frame_data: Box<(wgpu::CommandEncoder, T::Frame)>,
frame_data: Box<(wgpu::CommandEncoder, T::Frame, wgpu::CommandEncoder)>,
// TODO: This is a self-reference to the above, so we
// use some unsafe to cast the lifetime away. We know this
@ -129,6 +155,7 @@ pub enum MaskState {
#[derive(Copy, Clone, Debug, Pod, Zeroable)]
struct Transforms {
world_matrix: [[f32; 4]; 4],
color_adjustments: ColorAdjustments,
}
#[repr(C)]
@ -376,13 +403,8 @@ impl<T: RenderTarget> WgpuRenderBackend<T> {
let (device, queue) = block_on(adapter.request_device(
&wgpu::DeviceDescriptor {
label: None,
features: wgpu::Features::PUSH_CONSTANTS,
limits: wgpu::Limits {
max_push_constant_size: (std::mem::size_of::<Transforms>()
+ std::mem::size_of::<ColorAdjustments>())
as u32,
..Default::default()
},
features: wgpu::Features::empty(),
..Default::default()
},
trace_path,
))?;
@ -808,6 +830,7 @@ impl<T: RenderTarget + 'static> RenderBackend for WgpuRenderBackend<T> {
fn begin_frame(&mut self, clear: Color) {
self.mask_state = MaskState::NoMask;
self.num_masks = 0;
self.descriptors.uniform_buffers.reset();
let frame_output = match self.target.get_next_texture() {
Ok(frame) => frame,
@ -830,7 +853,14 @@ impl<T: RenderTarget + 'static> RenderBackend for WgpuRenderBackend<T> {
.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: label.as_deref(),
});
let mut frame_data = Box::new((draw_encoder, frame_output));
let uniform_encoder_label = create_debug_label!("Uniform upload command encoder");
let uniform_encoder =
self.descriptors
.device
.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: uniform_encoder_label.as_deref(),
});
let mut frame_data = Box::new((draw_encoder, frame_output, uniform_encoder));
self.descriptors
.globals
@ -917,24 +947,26 @@ impl<T: RenderTarget + 'static> RenderBackend for WgpuRenderBackend<T> {
.bitmap_pipelines
.pipeline_for(self.mask_state),
);
frame.render_pass.set_push_constants(
wgpu::ShaderStages::VERTEX,
0,
bytemuck::cast_slice(&[Transforms { world_matrix }]),
);
frame.render_pass.set_push_constants(
wgpu::ShaderStages::FRAGMENT,
std::mem::size_of::<Transforms>() as u32,
bytemuck::cast_slice(&[ColorAdjustments::from(transform.color_transform)]),
);
frame
.render_pass
.set_bind_group(0, self.descriptors.globals.bind_group(), &[]);
self.descriptors.uniform_buffers.write_uniforms(
&self.descriptors.device,
&mut frame.frame_data.2,
&mut frame.render_pass,
1,
&Transforms {
world_matrix,
color_adjustments: ColorAdjustments::from(transform.color_transform),
},
);
frame
.render_pass
.set_bind_group(1, &texture.bind_group, &[]);
.set_bind_group(2, &texture.bind_group, &[]);
frame.render_pass.set_bind_group(
2,
3,
self.descriptors
.bitmap_samplers
.get_bind_group(false, smoothing),
@ -988,6 +1020,17 @@ impl<T: RenderTarget + 'static> RenderBackend for WgpuRenderBackend<T> {
.render_pass
.set_bind_group(0, self.descriptors.globals.bind_group(), &[]);
self.descriptors.uniform_buffers.write_uniforms(
&self.descriptors.device,
&mut frame.frame_data.2,
&mut frame.render_pass,
1,
&Transforms {
world_matrix,
color_adjustments: ColorAdjustments::from(transform.color_transform),
},
);
for draw in &mesh.draws {
match &draw.draw_type {
DrawType::Color => {
@ -1005,7 +1048,7 @@ impl<T: RenderTarget + 'static> RenderBackend for WgpuRenderBackend<T> {
.gradient_pipelines
.pipeline_for(self.mask_state),
);
frame.render_pass.set_bind_group(1, bind_group, &[]);
frame.render_pass.set_bind_group(2, bind_group, &[]);
}
DrawType::Bitmap {
is_repeating,
@ -1019,9 +1062,9 @@ impl<T: RenderTarget + 'static> RenderBackend for WgpuRenderBackend<T> {
.bitmap_pipelines
.pipeline_for(self.mask_state),
);
frame.render_pass.set_bind_group(1, bind_group, &[]);
frame.render_pass.set_bind_group(2, bind_group, &[]);
frame.render_pass.set_bind_group(
2,
3,
self.descriptors
.bitmap_samplers
.get_bind_group(*is_repeating, *is_smoothed),
@ -1030,16 +1073,6 @@ impl<T: RenderTarget + 'static> RenderBackend for WgpuRenderBackend<T> {
}
}
frame.render_pass.set_push_constants(
wgpu::ShaderStages::VERTEX,
0,
bytemuck::cast_slice(&[Transforms { world_matrix }]),
);
frame.render_pass.set_push_constants(
wgpu::ShaderStages::FRAGMENT,
std::mem::size_of::<Transforms>() as u32,
bytemuck::cast_slice(&[ColorAdjustments::from(transform.color_transform)]),
);
frame
.render_pass
.set_vertex_buffer(0, draw.vertex_buffer.slice(..));
@ -1097,23 +1130,24 @@ impl<T: RenderTarget + 'static> RenderBackend for WgpuRenderBackend<T> {
.pipeline_for(self.mask_state),
);
frame.render_pass.set_push_constants(
wgpu::ShaderStages::VERTEX,
0,
bytemuck::cast_slice(&[Transforms { world_matrix }]),
);
frame.render_pass.set_push_constants(
wgpu::ShaderStages::FRAGMENT,
std::mem::size_of::<Transforms>() as u32,
bytemuck::cast_slice(&[ColorAdjustments {
mult_color,
add_color,
}]),
);
frame
.render_pass
.set_bind_group(0, self.descriptors.globals.bind_group(), &[]);
self.descriptors.uniform_buffers.write_uniforms(
&self.descriptors.device,
&mut frame.frame_data.2,
&mut frame.render_pass,
1,
&Transforms {
world_matrix,
color_adjustments: ColorAdjustments {
mult_color,
add_color,
},
},
);
frame
.render_pass
.set_vertex_buffer(0, self.quad_vbo.slice(..));
@ -1140,12 +1174,13 @@ impl<T: RenderTarget + 'static> RenderBackend for WgpuRenderBackend<T> {
if let Some(frame) = self.current_frame.take() {
// Finalize render pass.
drop(frame.render_pass);
self.descriptors.uniform_buffers.finish();
let draw_encoder = frame.frame_data.0;
let uniform_encoder = frame.frame_data.2;
self.target.submit(
&self.descriptors.device,
&self.descriptors.queue,
vec![draw_encoder.finish()],
vec![uniform_encoder.finish(), draw_encoder.finish()],
frame.frame_data.1,
);
}

View File

@ -30,6 +30,7 @@ impl Pipelines {
msaa_sample_count: u32,
sampler_layout: &wgpu::BindGroupLayout,
globals_layout: &wgpu::BindGroupLayout,
dynamic_uniforms_layout: &wgpu::BindGroupLayout,
) -> Result<Self, Error> {
// TODO: Naga validation errors when encountering push constants currently.
// Disable validation for now. Remove this when Naga can swallow it.
@ -68,6 +69,7 @@ impl Pipelines {
msaa_sample_count,
&vertex_buffers_description,
globals_layout,
dynamic_uniforms_layout,
);
let bitmap_bind_layout_label = create_debug_label!("Bitmap shape bind group layout");
@ -106,6 +108,7 @@ impl Pipelines {
&vertex_buffers_description,
sampler_layout,
globals_layout,
dynamic_uniforms_layout,
&bitmap_bind_layout,
);
@ -144,6 +147,7 @@ impl Pipelines {
msaa_sample_count,
&vertex_buffers_description,
globals_layout,
dynamic_uniforms_layout,
&gradient_bind_layout,
);
@ -206,24 +210,13 @@ fn create_color_pipelines(
msaa_sample_count: u32,
vertex_buffers_description: &[wgpu::VertexBufferLayout<'_>],
globals_layout: &wgpu::BindGroupLayout,
dynamic_uniforms_layout: &wgpu::BindGroupLayout,
) -> ShapePipeline {
let transforms_size = std::mem::size_of::<crate::Transforms>() as u32;
let colors_size = std::mem::size_of::<crate::ColorAdjustments>() as u32;
let pipeline_layout_label = create_debug_label!("Color shape pipeline layout");
let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
label: pipeline_layout_label.as_deref(),
bind_group_layouts: &[globals_layout],
push_constant_ranges: &[
wgpu::PushConstantRange {
stages: wgpu::ShaderStages::VERTEX,
range: 0..transforms_size,
},
wgpu::PushConstantRange {
stages: wgpu::ShaderStages::FRAGMENT,
range: transforms_size..transforms_size + colors_size,
},
],
bind_group_layouts: &[globals_layout, dynamic_uniforms_layout],
push_constant_ranges: &[],
});
let mask_pipelines = enum_map! {
@ -381,22 +374,19 @@ fn create_bitmap_pipeline(
vertex_buffers_layout: &[wgpu::VertexBufferLayout<'_>],
sampler_layout: &wgpu::BindGroupLayout,
globals_layout: &wgpu::BindGroupLayout,
dynamic_uniforms_layout: &wgpu::BindGroupLayout,
bitmap_bind_layout: &wgpu::BindGroupLayout,
) -> ShapePipeline {
let pipeline_layout_label = create_debug_label!("Bitmap shape pipeline layout");
let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
label: pipeline_layout_label.as_deref(),
bind_group_layouts: &[globals_layout, bitmap_bind_layout, sampler_layout],
push_constant_ranges: &[
wgpu::PushConstantRange {
stages: wgpu::ShaderStages::VERTEX,
range: 0..64,
},
wgpu::PushConstantRange {
stages: wgpu::ShaderStages::FRAGMENT,
range: 64..96,
},
bind_group_layouts: &[
globals_layout,
dynamic_uniforms_layout,
bitmap_bind_layout,
sampler_layout,
],
push_constant_ranges: &[],
});
let mask_pipelines = enum_map! {
@ -552,22 +542,18 @@ fn create_gradient_pipeline(
msaa_sample_count: u32,
vertex_buffers_layout: &[wgpu::VertexBufferLayout<'_>],
globals_layout: &wgpu::BindGroupLayout,
dynamic_uniforms_layout: &wgpu::BindGroupLayout,
gradient_bind_layout: &wgpu::BindGroupLayout,
) -> ShapePipeline {
let pipeline_layout_label = create_debug_label!("Gradient shape pipeline layout");
let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
label: pipeline_layout_label.as_deref(),
bind_group_layouts: &[globals_layout, gradient_bind_layout],
push_constant_ranges: &[
wgpu::PushConstantRange {
stages: wgpu::ShaderStages::VERTEX,
range: 0..64,
},
wgpu::PushConstantRange {
stages: wgpu::ShaderStages::FRAGMENT,
range: 64..96,
},
bind_group_layouts: &[
globals_layout,
dynamic_uniforms_layout,
gradient_bind_layout,
],
push_constant_ranges: &[],
});
let mask_pipelines = enum_map! {

View File

@ -0,0 +1,148 @@
use bytemuck::Pod;
use futures::{
executor::{LocalPool, LocalSpawner},
task::LocalSpawnExt,
};
use std::{convert::TryInto, marker::PhantomData, mem};
use wgpu::util::StagingBelt;
/// A simple chunked bump allacator for managing dynamic uniforms that change per-draw.
/// Each draw call may use `UniformBuffer::write_uniforms` can be used to queue
/// the upload of uniform data to the GPU.
pub struct UniformBuffer<T: Pod> {
blocks: Vec<Block>,
buffer_layout: wgpu::BindGroupLayout,
staging_belt: StagingBelt,
executor: LocalPool,
spawner: LocalSpawner,
aligned_uniforms_size: u32,
cur_block: usize,
cur_offset: u32,
_phantom: PhantomData<T>,
}
impl<T: Pod> UniformBuffer<T> {
/// The size of each block.
/// Uniforms are copied into each block until it reaches capacity, at which point a new
/// block will be allocated.
const BLOCK_SIZE: u32 = 65536;
/// The uniform data size for a single draw call.
const UNIFORMS_SIZE: u64 = mem::size_of::<T>() as u64;
/// Creates a new `UniformBuffer` with the given uniform layout.
pub fn new(buffer_layout: wgpu::BindGroupLayout, uniform_alignment: u32) -> Self {
// Create local executor for uniform uploads.
let executor = LocalPool::new();
let spawner = executor.spawner();
// Calculate alignment of uniforms.
let align_mask = uniform_alignment - 1;
let aligned_uniforms_size = (Self::UNIFORMS_SIZE as u32 + align_mask) & !align_mask;
Self {
blocks: Vec::with_capacity(8),
buffer_layout,
executor,
spawner,
staging_belt: StagingBelt::new(u64::from(Self::BLOCK_SIZE) / 2),
aligned_uniforms_size,
cur_block: 0,
cur_offset: 0,
_phantom: PhantomData,
}
}
/// Returns the bind group layout for the uniforms in this buffer.
pub fn layout(&self) -> &wgpu::BindGroupLayout {
&self.buffer_layout
}
/// Resets the buffer and staging belt.
/// Should be called at the start of a frame.
pub fn reset(&mut self) {
self.cur_block = 0;
self.cur_offset = 0;
let _ = self.spawner.spawn_local(self.staging_belt.recall());
self.executor.run_until_stalled();
}
/// Enqueue `data` for upload into the given command encoder, and set the bind group on `render_pass`
/// to use the uniform data.
pub fn write_uniforms<'a>(
&'a mut self,
device: &wgpu::Device,
command_encoder: &mut wgpu::CommandEncoder,
render_pass: &mut wgpu::RenderPass<'a>,
bind_group_index: u32,
data: &T,
) {
// Allocate a new block if we've exceeded our capacity.
if self.cur_block >= self.blocks.len() {
self.allocate_block(device);
}
let block = &self.blocks[self.cur_block];
// Copy the data into the buffer via the staging belt.
self.staging_belt
.write_buffer(
command_encoder,
&block.buffer,
self.cur_offset.into(),
Self::UNIFORMS_SIZE.try_into().unwrap(),
device,
)
.copy_from_slice(bytemuck::cast_slice(std::slice::from_ref(data)));
// Set the bind group to the final uniform location.
render_pass.set_bind_group(bind_group_index, &block.bind_group, &[self.cur_offset]);
// Advance offset.
self.cur_offset += self.aligned_uniforms_size;
// Advance to next buffer if we are out of room in this buffer.
if Self::BLOCK_SIZE - self.cur_offset < self.aligned_uniforms_size {
self.cur_block += 1;
self.cur_offset = 0;
}
}
/// Should be called at the end of a frame.
pub fn finish(&mut self) {
self.staging_belt.finish();
}
/// Adds a newly allocated buffer to the block list, and returns it.
fn allocate_block(&mut self, device: &wgpu::Device) -> &Block {
let buffer_label = create_debug_label!("Dynamic buffer");
let buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: buffer_label.as_deref(),
usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
size: Self::BLOCK_SIZE.into(),
mapped_at_creation: false,
});
let bind_group_label = create_debug_label!("Dynamic buffer bind group");
let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: bind_group_label.as_deref(),
layout: &self.buffer_layout,
entries: &[wgpu::BindGroupEntry {
binding: 0,
resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding {
buffer: &buffer,
offset: 0,
size: wgpu::BufferSize::new(std::mem::size_of::<T>() as u64),
}),
}],
});
self.blocks.push(Block { buffer, bind_group });
self.blocks.last().unwrap()
}
}
/// A block of GPU memory that will contain our uniforms.
#[derive(Debug)]
struct Block {
buffer: wgpu::Buffer,
bind_group: wgpu::BindGroup,
}