Merge pull request #2357 from iced-rs/wgpu/use-staging-belt

Use a `StagingBelt` in `iced_wgpu` for regular buffer uploads
2024-03-30 23:49:26 +01:00 · 2024-03-30 23:49:26 +01:00 · c7a4fad4a2
commit c7a4fad4a2
parent 5071e3d231 4c74bebc70
15 changed files with 156 additions and 53 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -129,7 +129,7 @@ cosmic-text = "0.10"
 dark-light = "1.0"
 futures = "0.3"
 glam = "0.25"
-glyphon = "0.5"
+glyphon = { git = "https://github.com/hecrj/glyphon.git", rev = "ceed55403ce53e120ce9d1fae17dcfe388726118" }
 guillotiere = "0.6"
 half = "2.2"
 image = "0.24"
@ -155,7 +155,6 @@ thiserror = "1.0"
 tiny-skia = "0.11"
 tokio = "1.0"
 tracing = "0.1"
-xxhash-rust = { version = "0.8", features = ["xxh3"] }
 unicode-segmentation = "1.0"
 wasm-bindgen-futures = "0.4"
 wasm-timer = "0.2"
--- a/core/Cargo.toml
+++ b/core/Cargo.toml
@ -21,10 +21,10 @@ log.workspace = true
 num-traits.workspace = true
 once_cell.workspace = true
 palette.workspace = true
+rustc-hash.workspace = true
 smol_str.workspace = true
 thiserror.workspace = true
 web-time.workspace = true
-xxhash-rust.workspace = true

 dark-light.workspace = true
 dark-light.optional = true
--- a/core/src/hasher.rs
+++ b/core/src/hasher.rs
@ -1,7 +1,7 @@
 /// The hasher used to compare layouts.
 #[allow(missing_debug_implementations)] // Doesn't really make sense to have debug on the hasher state anyways.
 #[derive(Default)]
-pub struct Hasher(xxhash_rust::xxh3::Xxh3);
+pub struct Hasher(rustc_hash::FxHasher);

 impl core::hash::Hasher for Hasher {
    fn write(&mut self, bytes: &[u8]) {
--- a/graphics/Cargo.toml
+++ b/graphics/Cargo.toml
@ -34,7 +34,6 @@ raw-window-handle.workspace = true
 rustc-hash.workspace = true
 thiserror.workspace = true
 unicode-segmentation.workspace = true
-xxhash-rust.workspace = true

 image.workspace = true
 image.optional = true
--- a/graphics/src/text/cache.rs
+++ b/graphics/src/text/cache.rs
@ -2,9 +2,9 @@
 use crate::core::{Font, Size};
 use crate::text;

-use rustc_hash::{FxHashMap, FxHashSet};
+use rustc_hash::{FxHashMap, FxHashSet, FxHasher};
 use std::collections::hash_map;
-use std::hash::{BuildHasher, Hash, Hasher};
+use std::hash::{Hash, Hasher};

 /// A store of recently used sections of text.
 #[allow(missing_debug_implementations)]
@ -13,11 +13,8 @@ pub struct Cache {
    entries: FxHashMap<KeyHash, Entry>,
    aliases: FxHashMap<KeyHash, KeyHash>,
    recently_used: FxHashSet<KeyHash>,
-    hasher: HashBuilder,
 }

-type HashBuilder = xxhash_rust::xxh3::Xxh3Builder;
-
 impl Cache {
    /// Creates a new empty [`Cache`].
    pub fn new() -> Self {
@ -35,7 +32,7 @@ impl Cache {
        font_system: &mut cosmic_text::FontSystem,
        key: Key<'_>,
    ) -> (KeyHash, &mut Entry) {
-        let hash = key.hash(self.hasher.build_hasher());
+        let hash = key.hash(FxHasher::default());

        if let Some(hash) = self.aliases.get(&hash) {
            let _ = self.recently_used.insert(*hash);
@ -77,7 +74,7 @@ impl Cache {
            ] {
                if key.bounds != bounds {
                    let _ = self.aliases.insert(
-                        Key { bounds, ..key }.hash(self.hasher.build_hasher()),
+                        Key { bounds, ..key }.hash(FxHasher::default()),
                        hash,
                    );
                }
--- a/tiny_skia/Cargo.toml
+++ b/tiny_skia/Cargo.toml
@ -25,7 +25,6 @@ log.workspace = true
 rustc-hash.workspace = true
 softbuffer.workspace = true
 tiny-skia.workspace = true
-xxhash-rust.workspace = true

 resvg.workspace = true
 resvg.optional = true
--- a/wgpu/src/backend.rs
+++ b/wgpu/src/backend.rs
@ -1,3 +1,4 @@
+use crate::buffer;
 use crate::core::{Color, Size, Transformation};
 use crate::graphics::backend;
 use crate::graphics::color;
@ -30,6 +31,7 @@ pub struct Backend {
    pipeline_storage: pipeline::Storage,
    #[cfg(any(feature = "image", feature = "svg"))]
    image_pipeline: image::Pipeline,
+    staging_belt: wgpu::util::StagingBelt,
 }

 impl Backend {
@ -61,6 +63,13 @@ impl Backend {

            #[cfg(any(feature = "image", feature = "svg"))]
            image_pipeline,
+
+            // TODO: Resize belt smartly (?)
+            // It would be great if the `StagingBelt` API exposed methods
+            // for introspection to detect when a resize may be worth it.
+            staging_belt: wgpu::util::StagingBelt::new(
+                buffer::MAX_WRITE_SIZE as u64,
+            ),
        }
    }

@ -105,6 +114,8 @@ impl Backend {
            &layers,
        );

+        self.staging_belt.finish();
+
        self.render(
            device,
            encoder,
@ -123,12 +134,20 @@ impl Backend {
        self.image_pipeline.end_frame();
    }

+    /// Recalls staging memory for future uploads.
+    ///
+    /// This method should be called after the command encoder
+    /// has been submitted.
+    pub fn recall(&mut self) {
+        self.staging_belt.recall();
+    }
+
    fn prepare(
        &mut self,
        device: &wgpu::Device,
        queue: &wgpu::Queue,
        format: wgpu::TextureFormat,
-        _encoder: &mut wgpu::CommandEncoder,
+        encoder: &mut wgpu::CommandEncoder,
        scale_factor: f32,
        target_size: Size<u32>,
        transformation: Transformation,
@ -144,7 +163,8 @@ impl Backend {
            if !layer.quads.is_empty() {
                self.quad_pipeline.prepare(
                    device,
-                    queue,
+                    encoder,
+                    &mut self.staging_belt,
                    &layer.quads,
                    transformation,
                    scale_factor,
@ -157,7 +177,8 @@ impl Backend {

                self.triangle_pipeline.prepare(
                    device,
-                    queue,
+                    encoder,
+                    &mut self.staging_belt,
                    &layer.meshes,
                    scaled,
                );
@ -171,8 +192,8 @@ impl Backend {

                    self.image_pipeline.prepare(
                        device,
-                        queue,
-                        _encoder,
+                        encoder,
+                        &mut self.staging_belt,
                        &layer.images,
                        scaled,
                        scale_factor,
@ -184,6 +205,7 @@ impl Backend {
                self.text_pipeline.prepare(
                    device,
                    queue,
+                    encoder,
                    &layer.text,
                    layer.bounds,
                    scale_factor,
--- a/wgpu/src/buffer.rs
+++ b/wgpu/src/buffer.rs
@ -1,6 +1,13 @@
 use std::marker::PhantomData;
+use std::num::NonZeroU64;
 use std::ops::RangeBounds;

+pub const MAX_WRITE_SIZE: usize = 100 * 1024;
+
+#[allow(unsafe_code)]
+const MAX_WRITE_SIZE_U64: NonZeroU64 =
+    unsafe { NonZeroU64::new_unchecked(MAX_WRITE_SIZE as u64) };
+
 #[derive(Debug)]
 pub struct Buffer<T> {
    label: &'static str,
@ -61,12 +68,46 @@ impl<T: bytemuck::Pod> Buffer<T> {
    /// Returns the size of the written bytes.
    pub fn write(
        &mut self,
-        queue: &wgpu::Queue,
+        device: &wgpu::Device,
+        encoder: &mut wgpu::CommandEncoder,
+        belt: &mut wgpu::util::StagingBelt,
        offset: usize,
        contents: &[T],
    ) -> usize {
        let bytes: &[u8] = bytemuck::cast_slice(contents);
-        queue.write_buffer(&self.raw, offset as u64, bytes);
+        let mut bytes_written = 0;
+
+        // Split write into multiple chunks if necessary
+        while bytes_written + MAX_WRITE_SIZE < bytes.len() {
+            belt.write_buffer(
+                encoder,
+                &self.raw,
+                (offset + bytes_written) as u64,
+                MAX_WRITE_SIZE_U64,
+                device,
+            )
+            .copy_from_slice(
+                &bytes[bytes_written..bytes_written + MAX_WRITE_SIZE],
+            );
+
+            bytes_written += MAX_WRITE_SIZE;
+        }
+
+        // There will always be some bytes left, since the previous
+        // loop guarantees `bytes_written < bytes.len()`
+        let bytes_left = ((bytes.len() - bytes_written) as u64)
+            .try_into()
+            .expect("non-empty write");
+
+        // Write them
+        belt.write_buffer(
+            encoder,
+            &self.raw,
+            (offset + bytes_written) as u64,
+            bytes_left,
+            device,
+        )
+        .copy_from_slice(&bytes[bytes_written..]);

        self.offsets.push(offset as u64);

--- a/wgpu/src/image.rs
+++ b/wgpu/src/image.rs
@ -83,21 +83,31 @@ impl Layer {
    fn prepare(
        &mut self,
        device: &wgpu::Device,
-        queue: &wgpu::Queue,
+        encoder: &mut wgpu::CommandEncoder,
+        belt: &mut wgpu::util::StagingBelt,
        nearest_instances: &[Instance],
        linear_instances: &[Instance],
        transformation: Transformation,
    ) {
-        queue.write_buffer(
+        let uniforms = Uniforms {
+            transform: transformation.into(),
+        };
+
+        let bytes = bytemuck::bytes_of(&uniforms);
+
+        belt.write_buffer(
+            encoder,
            &self.uniforms,
            0,
-            bytemuck::bytes_of(&Uniforms {
-                transform: transformation.into(),
-            }),
-        );
+            (bytes.len() as u64).try_into().expect("Sized uniforms"),
+            device,
+        )
+        .copy_from_slice(bytes);

-        self.nearest.upload(device, queue, nearest_instances);
-        self.linear.upload(device, queue, linear_instances);
+        self.nearest
+            .upload(device, encoder, belt, nearest_instances);
+
+        self.linear.upload(device, encoder, belt, linear_instances);
    }

    fn render<'a>(&'a self, render_pass: &mut wgpu::RenderPass<'a>) {
@ -158,7 +168,8 @@ impl Data {
    fn upload(
        &mut self,
        device: &wgpu::Device,
-        queue: &wgpu::Queue,
+        encoder: &mut wgpu::CommandEncoder,
+        belt: &mut wgpu::util::StagingBelt,
        instances: &[Instance],
    ) {
        self.instance_count = instances.len();
@ -168,7 +179,7 @@ impl Data {
        }

        let _ = self.instances.resize(device, instances.len());
-        let _ = self.instances.write(queue, 0, instances);
+        let _ = self.instances.write(device, encoder, belt, 0, instances);
    }

    fn render<'a>(&'a self, render_pass: &mut wgpu::RenderPass<'a>) {
@ -383,8 +394,8 @@ impl Pipeline {
    pub fn prepare(
        &mut self,
        device: &wgpu::Device,
-        queue: &wgpu::Queue,
        encoder: &mut wgpu::CommandEncoder,
+        belt: &mut wgpu::util::StagingBelt,
        images: &[layer::Image],
        transformation: Transformation,
        _scale: f32,
@ -501,7 +512,8 @@ impl Pipeline {

        layer.prepare(
            device,
-            queue,
+            encoder,
+            belt,
            nearest_instances,
            linear_instances,
            transformation,
--- a/wgpu/src/quad.rs
+++ b/wgpu/src/quad.rs
@ -57,7 +57,8 @@ impl Pipeline {
    pub fn prepare(
        &mut self,
        device: &wgpu::Device,
-        queue: &wgpu::Queue,
+        encoder: &mut wgpu::CommandEncoder,
+        belt: &mut wgpu::util::StagingBelt,
        quads: &Batch,
        transformation: Transformation,
        scale: f32,
@ -67,7 +68,7 @@ impl Pipeline {
        }

        let layer = &mut self.layers[self.prepare_layer];
-        layer.prepare(device, queue, quads, transformation, scale);
+        layer.prepare(device, encoder, belt, quads, transformation, scale);

        self.prepare_layer += 1;
    }
@ -162,7 +163,8 @@ impl Layer {
    pub fn prepare(
        &mut self,
        device: &wgpu::Device,
-        queue: &wgpu::Queue,
+        encoder: &mut wgpu::CommandEncoder,
+        belt: &mut wgpu::util::StagingBelt,
        quads: &Batch,
        transformation: Transformation,
        scale: f32,
@ -171,15 +173,25 @@ impl Layer {
        let _ = info_span!("Wgpu::Quad", "PREPARE").entered();

        let uniforms = Uniforms::new(transformation, scale);
+        let bytes = bytemuck::bytes_of(&uniforms);

-        queue.write_buffer(
+        belt.write_buffer(
+            encoder,
            &self.constants_buffer,
            0,
-            bytemuck::bytes_of(&uniforms),
-        );
+            (bytes.len() as u64).try_into().expect("Sized uniforms"),
+            device,
+        )
+        .copy_from_slice(bytes);

-        self.solid.prepare(device, queue, &quads.solids);
-        self.gradient.prepare(device, queue, &quads.gradients);
+        if !quads.solids.is_empty() {
+            self.solid.prepare(device, encoder, belt, &quads.solids);
+        }
+
+        if !quads.gradients.is_empty() {
+            self.gradient
+                .prepare(device, encoder, belt, &quads.gradients);
+        }
    }
 }

--- a/wgpu/src/quad/gradient.rs
+++ b/wgpu/src/quad/gradient.rs
@ -46,11 +46,12 @@ impl Layer {
    pub fn prepare(
        &mut self,
        device: &wgpu::Device,
-        queue: &wgpu::Queue,
+        encoder: &mut wgpu::CommandEncoder,
+        belt: &mut wgpu::util::StagingBelt,
        instances: &[Gradient],
    ) {
        let _ = self.instances.resize(device, instances.len());
-        let _ = self.instances.write(queue, 0, instances);
+        let _ = self.instances.write(device, encoder, belt, 0, instances);

        self.instance_count = instances.len();
    }
--- a/wgpu/src/quad/solid.rs
+++ b/wgpu/src/quad/solid.rs
@ -40,11 +40,12 @@ impl Layer {
    pub fn prepare(
        &mut self,
        device: &wgpu::Device,
-        queue: &wgpu::Queue,
+        encoder: &mut wgpu::CommandEncoder,
+        belt: &mut wgpu::util::StagingBelt,
        instances: &[Solid],
    ) {
        let _ = self.instances.resize(device, instances.len());
-        let _ = self.instances.write(queue, 0, instances);
+        let _ = self.instances.write(device, encoder, belt, 0, instances);

        self.instance_count = instances.len();
    }
--- a/wgpu/src/text.rs
+++ b/wgpu/src/text.rs
@ -53,6 +53,7 @@ impl Pipeline {
        &mut self,
        device: &wgpu::Device,
        queue: &wgpu::Queue,
+        encoder: &mut wgpu::CommandEncoder,
        sections: &[Text<'_>],
        layer_bounds: Rectangle,
        scale_factor: f32,
@ -262,6 +263,7 @@ impl Pipeline {
        let result = renderer.prepare(
            device,
            queue,
+            encoder,
            font_system,
            &mut self.atlas,
            glyphon::Resolution {
--- a/wgpu/src/triangle.rs
+++ b/wgpu/src/triangle.rs
@ -48,7 +48,8 @@ impl Layer {
    fn prepare(
        &mut self,
        device: &wgpu::Device,
-        queue: &wgpu::Queue,
+        encoder: &mut wgpu::CommandEncoder,
+        belt: &mut wgpu::util::StagingBelt,
        solid: &solid::Pipeline,
        gradient: &gradient::Pipeline,
        meshes: &[Mesh<'_>],
@ -103,33 +104,47 @@ impl Layer {
            let uniforms =
                Uniforms::new(transformation * mesh.transformation());

-            index_offset +=
-                self.index_buffer.write(queue, index_offset, indices);
+            index_offset += self.index_buffer.write(
+                device,
+                encoder,
+                belt,
+                index_offset,
+                indices,
+            );
+
            self.index_strides.push(indices.len() as u32);

            match mesh {
                Mesh::Solid { buffers, .. } => {
                    solid_vertex_offset += self.solid.vertices.write(
-                        queue,
+                        device,
+                        encoder,
+                        belt,
                        solid_vertex_offset,
                        &buffers.vertices,
                    );

                    solid_uniform_offset += self.solid.uniforms.write(
-                        queue,
+                        device,
+                        encoder,
+                        belt,
                        solid_uniform_offset,
                        &[uniforms],
                    );
                }
                Mesh::Gradient { buffers, .. } => {
                    gradient_vertex_offset += self.gradient.vertices.write(
-                        queue,
+                        device,
+                        encoder,
+                        belt,
                        gradient_vertex_offset,
                        &buffers.vertices,
                    );

                    gradient_uniform_offset += self.gradient.uniforms.write(
-                        queue,
+                        device,
+                        encoder,
+                        belt,
                        gradient_uniform_offset,
                        &[uniforms],
                    );
@ -237,7 +252,8 @@ impl Pipeline {
    pub fn prepare(
        &mut self,
        device: &wgpu::Device,
-        queue: &wgpu::Queue,
+        encoder: &mut wgpu::CommandEncoder,
+        belt: &mut wgpu::util::StagingBelt,
        meshes: &[Mesh<'_>],
        transformation: Transformation,
    ) {
@ -252,7 +268,8 @@ impl Pipeline {
        let layer = &mut self.layers[self.prepare_layer];
        layer.prepare(
            device,
-            queue,
+            encoder,
+            belt,
            &self.solid,
            &self.gradient,
            meshes,
--- a/wgpu/src/window/compositor.rs
+++ b/wgpu/src/window/compositor.rs
@ -243,6 +243,7 @@ pub fn present<T: AsRef<str>>(

            // Submit work
            let _submission = compositor.queue.submit(Some(encoder.finish()));
+            backend.recall();
            frame.present();

            Ok(())