Reworked wgpu buffers, updated glow side to have proper transform location storage, attempting to fix visibility modifiers, implemented some of the feedback received in initial PR.

This commit is contained in:
shan 2022-10-04 18:24:46 -07:00
parent 5d0fffc626
commit 6e7b3ced0b
20 changed files with 411 additions and 417 deletions

View file

@ -132,7 +132,7 @@ impl Backend {
);
}
if !layer.meshes.0.is_empty() {
if !layer.meshes.is_empty() {
let scaled = transformation
* Transformation::scale(scale_factor, scale_factor);

View file

@ -1,91 +1,124 @@
//! Utilities for static buffer operations.
use bytemuck::{Pod, Zeroable};
use std::marker::PhantomData;
use std::mem;
//128 triangles/indices
const DEFAULT_STATIC_BUFFER_COUNT: wgpu::BufferAddress = 128;
/// A generic buffer struct useful for items which have no alignment requirements
/// (e.g. Vertex, Index buffers) and are set once and never changed until destroyed.
///
/// This buffer is mapped to the GPU on creation, so must be initialized with the correct capacity.
#[derive(Debug)]
pub(crate) struct StaticBuffer {
//stored sequentially per mesh iteration
pub(crate) struct StaticBuffer<T> {
//stored sequentially per mesh iteration; refers to the offset index in the GPU buffer
offsets: Vec<wgpu::BufferAddress>,
label: &'static str,
usages: wgpu::BufferUsages,
gpu: wgpu::Buffer,
//the static size of the buffer
size: wgpu::BufferAddress,
_data: PhantomData<T>,
}
impl StaticBuffer {
impl<T: Pod + Zeroable> StaticBuffer<T> {
/// Initialize a new static buffer.
pub fn new(
device: &wgpu::Device,
label: &'static str,
size: u64,
usage: wgpu::BufferUsages,
total_offsets: usize,
usages: wgpu::BufferUsages,
) -> Self {
let size = (mem::size_of::<T>() as u64) * DEFAULT_STATIC_BUFFER_COUNT;
Self {
offsets: Vec::with_capacity(total_offsets),
gpu: device.create_buffer(&wgpu::BufferDescriptor {
label: Some(label),
size,
usage,
mapped_at_creation: true,
}),
offsets: Vec::new(),
label,
usages,
gpu: Self::gpu_buffer(device, label, size, usages),
size,
_data: Default::default(),
}
}
/// Resolves pending write operations & unmaps buffer from host memory.
pub fn flush(&self) {
(&self.gpu).unmap();
fn gpu_buffer(
device: &wgpu::Device,
label: &'static str,
size: wgpu::BufferAddress,
usage: wgpu::BufferUsages,
) -> wgpu::Buffer {
device.create_buffer(&wgpu::BufferDescriptor {
label: Some(label),
size,
usage,
mapped_at_creation: false,
})
}
/// Returns whether or not the buffer needs to be recreated. This can happen whenever the mesh
/// data is re-submitted.
pub fn needs_recreate(&self, new_size: usize) -> bool {
self.size != new_size as u64
}
/// Returns whether or not the buffer needs to be recreated. This can happen whenever mesh data
/// changes & a redraw is requested.
pub fn recreate_if_needed(
&mut self,
device: &wgpu::Device,
new_count: usize,
) -> bool {
let size =
wgpu::BufferAddress::from((mem::size_of::<T>() * new_count) as u64);
/// Writes the current vertex data to the gpu buffer with a memcpy & stores its offset.
pub fn write(&mut self, offset: u64, content: &[u8]) {
//offset has to be divisible by 8 for alignment reasons
let actual_offset = if offset % 8 != 0 {
offset + 4
if self.size <= size {
self.offsets.clear();
self.size = size;
self.gpu = Self::gpu_buffer(device, self.label, size, self.usages);
true
} else {
offset
};
false
}
}
let mut buffer = self
.gpu
.slice(actual_offset..(actual_offset + content.len() as u64))
.get_mapped_range_mut();
buffer.copy_from_slice(content);
self.offsets.push(actual_offset);
/// Writes the current vertex data to the gpu buffer if it is currently writable with a memcpy &
/// stores its offset.
///
/// This will return either the offset of the written bytes, or `None` if the GPU buffer is not
/// currently writable.
pub fn write(
&mut self,
device: &wgpu::Device,
staging_belt: &mut wgpu::util::StagingBelt,
encoder: &mut wgpu::CommandEncoder,
offset: u64,
content: &[T],
) -> u64 {
let bytes = bytemuck::cast_slice(content);
let bytes_size = bytes.len() as u64;
if let Some(buffer_size) = wgpu::BufferSize::new(bytes_size as u64) {
//offset has to be divisible by 8 for alignment reasons
let actual_offset = if offset % 8 != 0 { offset + 4 } else { offset };
let mut buffer = staging_belt.write_buffer(
encoder,
&self.gpu,
actual_offset,
buffer_size,
device,
);
buffer.copy_from_slice(bytes);
self.offsets.push(actual_offset);
}
bytes_size
}
fn offset_at(&self, index: usize) -> &wgpu::BufferAddress {
self.offsets
.get(index)
.expect(&format!("Offset index {} is not in range.", index))
.expect("Offset at index does not exist.")
}
/// Returns the slice calculated from the offset stored at the given index.
/// e.g. to calculate the slice for the 2nd mesh in the layer, this would be the offset at index
/// e.g. to calculate the slice for the 2nd mesh in the layer, this would be the offset at index
/// 1 that we stored earlier when writing.
pub fn slice_from_index<T>(
&self,
index: usize,
) -> wgpu::BufferSlice<'_> {
pub fn slice_from_index(&self, index: usize) -> wgpu::BufferSlice<'_> {
self.gpu.slice(self.offset_at(index)..)
}
}
/// Returns true if the current buffer doesn't exist & needs to be created, or if it's too small
/// for the new content.
pub(crate) fn needs_recreate(
buffer: &Option<StaticBuffer>,
new_size: usize,
) -> bool {
match buffer {
None => true,
Some(buf) => buf.needs_recreate(new_size),
}
}

View file

@ -50,7 +50,6 @@ impl DynamicBufferType {
}
}
//TODO think about making cpu & gpu buffers optional
pub(crate) struct DynamicBuffer<T: ShaderType> {
offsets: Vec<wgpu::DynamicOffset>,
cpu: DynamicBufferType,
@ -183,7 +182,7 @@ impl<T: ShaderType + WriteInto> DynamicBuffer<T> {
let offset = self
.offsets
.get(index)
.expect(&format!("Index {} not found in offsets.", index))
.expect("Index not found in offsets.")
.clone();
offset

View file

@ -3,11 +3,11 @@ use crate::{settings, Transformation};
use core::fmt;
use std::fmt::Formatter;
use iced_graphics::layer::Meshes;
use iced_graphics::layer::{attribute_count_of, Mesh};
use iced_graphics::shader::Shader;
use iced_graphics::Size;
use crate::buffers::buffer::{needs_recreate, StaticBuffer};
use crate::buffers::buffer::StaticBuffer;
use crate::triangle::gradient::GradientPipeline;
use crate::triangle::solid::SolidPipeline;
pub use iced_graphics::triangle::{Mesh2D, Vertex2D};
@ -20,10 +20,9 @@ mod solid;
#[derive(Debug)]
pub(crate) struct Pipeline {
blit: Option<msaa::Blit>,
// these are optional so we don't allocate any memory to the GPU if
// application has no triangle meshes.
vertex_buffer: Option<StaticBuffer>,
index_buffer: Option<StaticBuffer>,
vertex_buffer: StaticBuffer<Vertex2D>,
index_buffer: StaticBuffer<u32>,
index_strides: Vec<u32>,
pipelines: TrianglePipelines,
}
@ -69,8 +68,17 @@ impl Pipeline {
) -> Pipeline {
Pipeline {
blit: antialiasing.map(|a| msaa::Blit::new(device, format, a)),
vertex_buffer: None,
index_buffer: None,
vertex_buffer: StaticBuffer::new(
device,
"iced_wgpu::triangle vertex buffer",
wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
),
index_buffer: StaticBuffer::new(
device,
"iced_wgpu::triangle vertex buffer",
wgpu::BufferUsages::INDEX | wgpu::BufferUsages::COPY_DST,
),
index_strides: Vec::new(),
pipelines: TrianglePipelines {
solid: SolidPipeline::new(device, format, antialiasing),
gradient: GradientPipeline::new(device, format, antialiasing),
@ -88,177 +96,152 @@ impl Pipeline {
target_size: Size<u32>,
transformation: Transformation,
scale_factor: f32,
meshes: &Meshes<'_>,
meshes: &[Mesh<'_>],
) {
//count the total number of vertices & indices we need to handle
let (total_vertices, total_indices) = meshes.attribute_count();
//count the total amount of vertices & indices we need to handle
let (total_vertices, total_indices) = attribute_count_of(meshes);
//Only create buffers if they need to be re-sized or don't exist
if needs_recreate(&self.vertex_buffer, total_vertices) {
//mapped to GPU at creation with total vertices
self.vertex_buffer = Some(StaticBuffer::new(
// Then we ensure the current attribute buffers are big enough, resizing if necessary
// with wgpu this means recreating the buffer.
//We are not currently using the return value of these functions as we have no system in
//place to calculate mesh diff, or to know whether or not that would be more performant for
//the majority of use cases. Therefore we will write GPU data every frame (for now).
let _ = self.vertex_buffer.recreate_if_needed(device, total_vertices);
let _ = self.index_buffer.recreate_if_needed(device, total_indices);
//prepare dynamic buffers & data store for writing
self.index_strides.clear();
self.pipelines.clear();
let mut vertex_offset = 0;
let mut index_offset = 0;
for mesh in meshes {
let transform = transformation
* Transformation::translate(mesh.origin.x, mesh.origin.y);
//write to both buffers
let new_vertex_offset = self.vertex_buffer.write(
device,
"iced_wgpu::triangle vertex buffer",
//TODO: a more reasonable default to prevent frequent resizing calls
// before this was 10_000
(std::mem::size_of::<Vertex2D>() * total_vertices) as u64,
wgpu::BufferUsages::VERTEX,
meshes.0.len(),
))
}
staging_belt,
encoder,
vertex_offset,
&mesh.buffers.vertices,
);
if needs_recreate(&self.index_buffer, total_indices) {
//mapped to GPU at creation with total indices
self.index_buffer = Some(StaticBuffer::new(
let new_index_offset = self.index_buffer.write(
device,
"iced_wgpu::triangle index buffer",
//TODO: a more reasonable default to prevent frequent resizing calls
// before this was 10_000
(std::mem::size_of::<Vertex2D>() * total_indices) as u64,
wgpu::BufferUsages::INDEX,
meshes.0.len(),
));
}
staging_belt,
encoder,
index_offset,
&mesh.buffers.indices,
);
if let Some(vertex_buffer) = &mut self.vertex_buffer {
if let Some(index_buffer) = &mut self.index_buffer {
let mut offset_v = 0;
let mut offset_i = 0;
//TODO: store this more efficiently
let mut indices_lengths = Vec::with_capacity(meshes.0.len());
vertex_offset = vertex_offset + new_vertex_offset;
index_offset = index_offset + new_index_offset;
//iterate through meshes to write all attribute data
for mesh in meshes.0.iter() {
let transform = transformation
* Transformation::translate(
mesh.origin.x,
mesh.origin.y,
);
self.index_strides.push(mesh.buffers.indices.len() as u32);
let vertices = bytemuck::cast_slice(&mesh.buffers.vertices);
let indices = bytemuck::cast_slice(&mesh.buffers.indices);
//TODO: it's (probably) more efficient to reduce this write command and
// iterate first and then upload
vertex_buffer.write(offset_v, vertices);
index_buffer.write(offset_i, indices);
offset_v += vertices.len() as u64;
offset_i += indices.len() as u64;
indices_lengths.push(mesh.buffers.indices.len());
match mesh.shader {
Shader::Solid(color) => {
self.pipelines.solid.push(transform, color);
}
Shader::Gradient(gradient) => {
self.pipelines.gradient.push(transform, gradient);
}
}
//push uniform data to CPU buffers
match mesh.shader {
Shader::Solid(color) => {
self.pipelines.solid.push(transform, color);
}
Shader::Gradient(gradient) => {
self.pipelines.gradient.push(transform, gradient);
}
}
}
//done writing to gpu buffer, unmap from host memory since we don't need it
//anymore
vertex_buffer.flush();
index_buffer.flush();
//write uniform data to GPU
self.pipelines.write(device, staging_belt, encoder);
//resize & memcpy uniforms from CPU buffers to GPU buffers for all pipelines
self.pipelines.write(device, staging_belt, encoder);
//configure the render pass now that the data is uploaded to the GPU
{
//configure antialiasing pass
let (attachment, resolve_target, load) = if let Some(blit) =
&mut self.blit
{
let (attachment, resolve_target) =
blit.targets(device, target_size.width, target_size.height);
//configure the render pass now that the data is uploaded to the GPU
{
//configure antialiasing pass
let (attachment, resolve_target, load) =
if let Some(blit) = &mut self.blit {
let (attachment, resolve_target) = blit.targets(
device,
target_size.width,
target_size.height,
);
(
attachment,
Some(resolve_target),
wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT),
)
} else {
(target, None, wgpu::LoadOp::Load)
};
(
attachment,
Some(resolve_target),
wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT),
)
} else {
(target, None, wgpu::LoadOp::Load)
};
let mut render_pass = encoder.begin_render_pass(
&wgpu::RenderPassDescriptor {
label: Some("iced_wgpu::triangle render pass"),
color_attachments: &[Some(
wgpu::RenderPassColorAttachment {
view: attachment,
resolve_target,
ops: wgpu::Operations { load, store: true },
},
)],
depth_stencil_attachment: None,
let mut render_pass =
encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
label: Some("iced_wgpu::triangle render pass"),
color_attachments: &[Some(
wgpu::RenderPassColorAttachment {
view: attachment,
resolve_target,
ops: wgpu::Operations { load, store: true },
},
);
)],
depth_stencil_attachment: None,
});
//TODO: do this a better way; store it in the respective pipelines perhaps
// to be more readable
let mut num_solids = 0;
let mut num_gradients = 0;
//TODO I can't figure out a clean way to encapsulate these into their appropriate
// structs without displeasing the borrow checker due to the lifetime requirements of
// render_pass & using a mutable reference to each pipeline in a loop...
let mut num_solids = 0;
let mut num_gradients = 0;
//TODO: try to avoid this extra iteration if possible
for index in 0..meshes.0.len() {
let clip_bounds =
(meshes.0[index].clip_bounds * scale_factor).snap();
for (index, mesh) in meshes.iter().enumerate() {
let clip_bounds = (mesh.clip_bounds * scale_factor).snap();
render_pass.set_scissor_rect(
clip_bounds.x,
clip_bounds.y,
clip_bounds.width,
clip_bounds.height,
);
match meshes.0[index].shader {
Shader::Solid(_) => {
self.pipelines.solid.configure_render_pass(
&mut render_pass,
num_solids,
);
num_solids += 1;
}
Shader::Gradient(_) => {
self.pipelines.gradient.configure_render_pass(
&mut render_pass,
num_gradients,
);
num_gradients += 1;
}
}
render_pass.set_index_buffer(
index_buffer.slice_from_index::<u32>(index),
wgpu::IndexFormat::Uint32,
);
render_pass.set_vertex_buffer(
0,
vertex_buffer.slice_from_index::<Vertex2D>(index),
);
render_pass.draw_indexed(
0..(indices_lengths[index] as u32),
0,
0..1,
render_pass.set_scissor_rect(
clip_bounds.x,
clip_bounds.y,
clip_bounds.width,
clip_bounds.height,
);
match mesh.shader {
Shader::Solid(_) => {
self.pipelines.solid.configure_render_pass(
&mut render_pass,
num_solids,
);
num_solids += 1;
}
}
Shader::Gradient(_) => {
self.pipelines.gradient.configure_render_pass(
&mut render_pass,
num_gradients,
);
num_gradients += 1;
}
};
render_pass.set_vertex_buffer(
0,
self.vertex_buffer.slice_from_index(index),
);
render_pass.set_index_buffer(
self.index_buffer.slice_from_index(index),
wgpu::IndexFormat::Uint32,
);
render_pass.draw_indexed(
0..(self.index_strides[index] as u32),
0,
0..1,
);
}
}
if let Some(blit) = &mut self.blit {
blit.draw(encoder, target);
}
//cleanup
self.pipelines.clear();
}
}

View file

@ -253,13 +253,13 @@ impl GradientPipeline {
pub fn configure_render_pass<'a>(
&'a self,
render_pass: &mut wgpu::RenderPass<'a>,
index: usize,
count: usize,
) {
render_pass.set_pipeline(&self.pipeline);
render_pass.set_bind_group(
0,
&self.bind_group,
&[self.uniform_buffer.offset_at_index(index)],
);
&[self.uniform_buffer.offset_at_index(count)],
)
}
}

View file

@ -8,15 +8,15 @@ use encase::ShaderType;
use glam::Vec4;
use iced_graphics::Transformation;
pub(super) struct SolidPipeline {
pub struct SolidPipeline {
pipeline: wgpu::RenderPipeline,
pub(super) buffer: DynamicBuffer<SolidUniforms>,
pub(crate) buffer: DynamicBuffer<SolidUniforms>,
bind_group_layout: wgpu::BindGroupLayout,
bind_group: wgpu::BindGroup,
}
#[derive(Debug, Clone, Copy, ShaderType)]
pub(super) struct SolidUniforms {
pub struct SolidUniforms {
transform: glam::Mat4,
color: Vec4,
}
@ -156,14 +156,13 @@ impl SolidPipeline {
pub fn configure_render_pass<'a>(
&'a self,
render_pass: &mut wgpu::RenderPass<'a>,
index: usize,
count: usize,
) {
render_pass.set_pipeline(&self.pipeline);
render_pass.set_bind_group(
0,
&self.bind_group,
&[self.buffer.offset_at_index(index)],
);
&[self.buffer.offset_at_index(count)],
)
}
}
}