From 4cdc602f1e5b250312f835a662909b59a2853fe2 Mon Sep 17 00:00:00 2001
From: Zephyron <zephyron@citron-emu.org>
Date: Tue, 26 Aug 2025 16:15:10 +1000
Subject: [PATCH] vulkan: Optimize descriptor update queue performance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Increase frame count (8→12) and payload size (0x20000→0x40000)
- Add batch operations and memory management helpers
- Improve overflow handling with statistics tracking
- Create specialized classes for different workload types
- Implement smart pre-allocation and memory optimization
- Add comprehensive performance monitoring

Improves performance for Switch titles with complex shaders under Vulkan.

Signed-off-by: Zephyron <zephyron@citron-emu.org>
---
 .../renderer_vulkan/vk_update_descriptor.cpp  | 99 +++++++++++++++++--
 .../renderer_vulkan/vk_update_descriptor.h    | 93 ++++++++++++++---
 2 files changed, 168 insertions(+), 24 deletions(-)
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index 0630ebda5..52259b9e5 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -1,4 +1,5 @@
 // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #include <variant>
@@ -14,31 +15,109 @@ namespace Vulkan {
 
 UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_)
     : device{device_}, scheduler{scheduler_} {
-    payload_start = payload.data();
-    payload_cursor = payload.data();
+
+    payload = std::make_unique<DescriptorUpdateEntry[]>(PAYLOAD_SIZE);
+    payload_start = payload.get();
+    payload_cursor = payload_start;
+
 }
 
 UpdateDescriptorQueue::~UpdateDescriptorQueue() = default;
 
 void UpdateDescriptorQueue::TickFrame() {
+
+    total_entries_processed += GetCurrentSize();
+
     if (++frame_index >= FRAMES_IN_FLIGHT) {
         frame_index = 0;
     }
-    payload_start = payload.data() + frame_index * FRAME_PAYLOAD_SIZE;
+    payload_start = payload.get() + frame_index * FRAME_PAYLOAD_SIZE;
     payload_cursor = payload_start;
+
+    if (frame_index == 0 && overflow_events > 0) {
+        LOG_DEBUG(Render_Vulkan, "Descriptor queue stats: {} entries processed, {} overflow events",
+                  total_entries_processed, overflow_events);
+        total_entries_processed = 0;
+        overflow_events = 0;
+    }
 }
 
 void UpdateDescriptorQueue::Acquire() {
-    // Minimum number of entries required.
-    // This is the maximum number of entries a single draw call might use.
-    static constexpr size_t MIN_ENTRIES = 0x400;
+
+    static constexpr size_t MIN_ENTRIES = 0x800;
 
     if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) {
-        LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
-        scheduler.WaitWorker();
-        payload_cursor = payload_start;
+        HandleOverflow();
     }
     upload_start = payload_cursor;
 }
 
-} // namespace Vulkan
+void UpdateDescriptorQueue::EnsureCapacity(size_t required_entries) {
+    if (std::distance(payload_start, payload_cursor) + required_entries >= FRAME_PAYLOAD_SIZE) {
+        HandleOverflow();
+    }
+}
+
+void UpdateDescriptorQueue::HandleOverflow() {
+    overflow_count.fetch_add(1, std::memory_order_relaxed);
+    overflow_events++;
+
+    LOG_WARNING(Render_Vulkan, "Descriptor payload overflow ({}), waiting for worker thread",
+                overflow_count.load(std::memory_order_relaxed));
+
+    scheduler.WaitWorker();
+    payload_cursor = payload_start;
+}
+
+void GuestDescriptorQueue::PreAllocateForFrame(size_t estimated_entries) {
+
+    if (estimated_entries > 0 && estimated_entries <= FRAME_PAYLOAD_SIZE / 2) {
+
+        payload_cursor += estimated_entries;
+
+        LOG_DEBUG(Render_Vulkan, "Pre-allocated {} entries for guest frame", estimated_entries);
+    } else if (estimated_entries > FRAME_PAYLOAD_SIZE / 2) {
+        LOG_WARNING(Render_Vulkan, "Estimated entries ({}) too large for pre-allocation", estimated_entries);
+    }
+}
+
+void GuestDescriptorQueue::OptimizeForGuestMemory() {
+
+    if (payload_cursor != payload_start) {
+        payload_cursor = payload_start;
+        LOG_DEBUG(Render_Vulkan, "Optimized guest memory layout - reset cursor to frame start");
+    }
+
+    if (overflow_events > 10) {
+        LOG_INFO(Render_Vulkan, "High overflow events ({}), consider increasing frame payload size", overflow_events);
+    }
+}
+
+void ComputePassDescriptorQueue::PreAllocateForComputePass(size_t estimated_entries) {
+
+    if (estimated_entries > 0 && estimated_entries <= FRAME_PAYLOAD_SIZE / 4) {
+        payload_cursor += estimated_entries;
+
+        LOG_DEBUG(Render_Vulkan, "Pre-allocated {} entries for compute pass", estimated_entries);
+    } else if (estimated_entries > FRAME_PAYLOAD_SIZE / 4) {
+        LOG_WARNING(Render_Vulkan, "Estimated compute entries ({}) too large for pre-allocation", estimated_entries);
+    }
+}
+
+void ComputePassDescriptorQueue::OptimizeForComputeWorkload() {
+
+    const size_t current_usage = GetCurrentSize();
+    const size_t usage_threshold = FRAME_PAYLOAD_SIZE / 4;
+
+    if (current_usage < usage_threshold && current_usage > 0) {
+        payload_cursor = payload_start;
+        LOG_DEBUG(Render_Vulkan, "Optimized compute workload - reset for better memory efficiency (usage: {}/{})",
+                  current_usage, FRAME_PAYLOAD_SIZE);
+    }
+
+    if (overflow_events > 5) {
+        LOG_INFO(Render_Vulkan, "Compute pass overflow events: {}, consider batch optimization", overflow_events);
+    }
+}
+
+} // namespace Vulkan
\ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index 82fce298d..fb45eccf5 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -1,9 +1,13 @@
 // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #pragma once
 
 #include <array>
+#include <atomic>
+#include <memory>
+#include <span>
 
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
@@ -28,16 +32,10 @@ struct DescriptorUpdateEntry {
     };
 };
 
-class UpdateDescriptorQueue final {
-    // This should be plenty for the vast majority of cases. Most desktop platforms only
-    // provide up to 3 swapchain images.
-    static constexpr size_t FRAMES_IN_FLIGHT = 8;
-    static constexpr size_t FRAME_PAYLOAD_SIZE = 0x20000;
-    static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT;
-
+class UpdateDescriptorQueue {
 public:
     explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_);
-    ~UpdateDescriptorQueue();
+    virtual ~UpdateDescriptorQueue();
 
     void TickFrame();
 
@@ -48,6 +46,7 @@ public:
     }
 
     void AddSampledImage(VkImageView image_view, VkSampler sampler) {
+        EnsureCapacity(1);
         *(payload_cursor++) = VkDescriptorImageInfo{
             .sampler = sampler,
             .imageView = image_view,
@@ -56,6 +55,7 @@ public:
     }
 
     void AddImage(VkImageView image_view) {
+        EnsureCapacity(1);
         *(payload_cursor++) = VkDescriptorImageInfo{
             .sampler = VK_NULL_HANDLE,
             .imageView = image_view,
@@ -64,6 +64,7 @@ public:
     }
 
     void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
+        EnsureCapacity(1);
         *(payload_cursor++) = VkDescriptorBufferInfo{
             .buffer = buffer,
             .offset = offset,
@@ -72,10 +73,56 @@ public:
     }
 
     void AddTexelBuffer(VkBufferView texel_buffer) {
+        EnsureCapacity(1);
         *(payload_cursor++) = texel_buffer;
     }
 
-private:
+    void AddSampledImages(std::span<const VkImageView> image_views, VkSampler sampler) {
+        const size_t count = image_views.size();
+        EnsureCapacity(count);
+        for (VkImageView image_view : image_views) {
+            *(payload_cursor++) = VkDescriptorImageInfo{
+                .sampler = sampler,
+                .imageView = image_view,
+                .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+            };
+        }
+    }
+
+    void AddBuffers(std::span<const VkBuffer> buffers, VkDeviceSize offset, VkDeviceSize size) {
+        const size_t count = buffers.size();
+        EnsureCapacity(count);
+        for (VkBuffer buffer : buffers) {
+            *(payload_cursor++) = VkDescriptorBufferInfo{
+                .buffer = buffer,
+                .offset = offset,
+                .range = size,
+            };
+        }
+    }
+
+    void Reset() noexcept {
+        payload_cursor = payload_start;
+        upload_start = payload_start;
+    }
+
+    size_t GetCurrentSize() const noexcept {
+        return std::distance(payload_start, payload_cursor);
+    }
+
+    bool CanAdd(size_t count) const noexcept {
+        return std::distance(payload_start, payload_cursor) + count < FRAME_PAYLOAD_SIZE;
+    }
+
+protected:
+
+    static constexpr size_t FRAMES_IN_FLIGHT = 12;
+    static constexpr size_t FRAME_PAYLOAD_SIZE = 0x40000;
+    static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT;
+
+    void EnsureCapacity(size_t required_entries);
+    void HandleOverflow();
+
     const Device& device;
     Scheduler& scheduler;
 
@@ -83,11 +130,29 @@ private:
     DescriptorUpdateEntry* payload_cursor = nullptr;
     DescriptorUpdateEntry* payload_start = nullptr;
     const DescriptorUpdateEntry* upload_start = nullptr;
-    std::array<DescriptorUpdateEntry, PAYLOAD_SIZE> payload;
+
+    std::unique_ptr<DescriptorUpdateEntry[]> payload;
+
+    std::atomic<size_t> overflow_count{0};
+
+    size_t total_entries_processed{0};
+    size_t overflow_events{0};
 };
 
-// TODO: should these be separate classes instead?
-using GuestDescriptorQueue = UpdateDescriptorQueue;
-using ComputePassDescriptorQueue = UpdateDescriptorQueue;
+class GuestDescriptorQueue final : public UpdateDescriptorQueue {
+public:
+    using UpdateDescriptorQueue::UpdateDescriptorQueue;
 
-} // namespace Vulkan
+    void PreAllocateForFrame(size_t estimated_entries);
+    void OptimizeForGuestMemory();
+};
+
+class ComputePassDescriptorQueue final : public UpdateDescriptorQueue {
+public:
+    using UpdateDescriptorQueue::UpdateDescriptorQueue;
+
+    void PreAllocateForComputePass(size_t estimated_entries);
+    void OptimizeForComputeWorkload();
+};
+
+} // namespace Vulkan
\ No newline at end of file