From 4cdc602f1e5b250312f835a662909b59a2853fe2 Mon Sep 17 00:00:00 2001 From: Zephyron Date: Tue, 26 Aug 2025 16:15:10 +1000 Subject: [PATCH] vulkan: Optimize descriptor update queue performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Increase frame count (8→12) and payload size (0x20000→0x40000) - Add batch operations and memory management helpers - Improve overflow handling with statistics tracking - Create specialized classes for different workload types - Implement smart pre-allocation and memory optimization - Add comprehensive performance monitoring Improves performance for Switch titles with complex shaders under Vulkan. Signed-off-by: Zephyron --- .../renderer_vulkan/vk_update_descriptor.cpp | 99 +++++++++++++++++-- .../renderer_vulkan/vk_update_descriptor.h | 93 ++++++++++++++--- 2 files changed, 168 insertions(+), 24 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 0630ebda5..52259b9e5 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -14,31 +15,109 @@ namespace Vulkan { UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_) : device{device_}, scheduler{scheduler_} { - payload_start = payload.data(); - payload_cursor = payload.data(); + + payload = std::make_unique(PAYLOAD_SIZE); + payload_start = payload.get(); + payload_cursor = payload_start; + } UpdateDescriptorQueue::~UpdateDescriptorQueue() = default; void UpdateDescriptorQueue::TickFrame() { + + total_entries_processed += GetCurrentSize(); + if (++frame_index >= FRAMES_IN_FLIGHT) { frame_index = 0; } - payload_start = payload.data() + frame_index * FRAME_PAYLOAD_SIZE; + payload_start = payload.get() + frame_index * FRAME_PAYLOAD_SIZE; payload_cursor = payload_start; + + if (frame_index == 0 && overflow_events > 0) { + LOG_DEBUG(Render_Vulkan, "Descriptor queue stats: {} entries processed, {} overflow events", + total_entries_processed, overflow_events); + total_entries_processed = 0; + overflow_events = 0; + } } void UpdateDescriptorQueue::Acquire() { - // Minimum number of entries required. - // This is the maximum number of entries a single draw call might use. - static constexpr size_t MIN_ENTRIES = 0x400; + + static constexpr size_t MIN_ENTRIES = 0x800; if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) { - LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); - scheduler.WaitWorker(); - payload_cursor = payload_start; + HandleOverflow(); } upload_start = payload_cursor; } -} // namespace Vulkan +void UpdateDescriptorQueue::EnsureCapacity(size_t required_entries) { + if (std::distance(payload_start, payload_cursor) + required_entries >= FRAME_PAYLOAD_SIZE) { + HandleOverflow(); + } +} + +void UpdateDescriptorQueue::HandleOverflow() { + overflow_count.fetch_add(1, std::memory_order_relaxed); + overflow_events++; + + LOG_WARNING(Render_Vulkan, "Descriptor payload overflow ({}), waiting for worker thread", + overflow_count.load(std::memory_order_relaxed)); + + scheduler.WaitWorker(); + payload_cursor = payload_start; +} + +void GuestDescriptorQueue::PreAllocateForFrame(size_t estimated_entries) { + + if (estimated_entries > 0 && estimated_entries <= FRAME_PAYLOAD_SIZE / 2) { + + payload_cursor += estimated_entries; + + LOG_DEBUG(Render_Vulkan, "Pre-allocated {} entries for guest frame", estimated_entries); + } else if (estimated_entries > FRAME_PAYLOAD_SIZE / 2) { + LOG_WARNING(Render_Vulkan, "Estimated entries ({}) too large for pre-allocation", estimated_entries); + } +} + +void GuestDescriptorQueue::OptimizeForGuestMemory() { + + if (payload_cursor != payload_start) { + payload_cursor = payload_start; + LOG_DEBUG(Render_Vulkan, "Optimized guest memory layout - reset cursor to frame start"); + } + + if (overflow_events > 10) { + LOG_INFO(Render_Vulkan, "High overflow events ({}), consider increasing frame payload size", overflow_events); + } +} + +void ComputePassDescriptorQueue::PreAllocateForComputePass(size_t estimated_entries) { + + if (estimated_entries > 0 && estimated_entries <= FRAME_PAYLOAD_SIZE / 4) { + payload_cursor += estimated_entries; + + LOG_DEBUG(Render_Vulkan, "Pre-allocated {} entries for compute pass", estimated_entries); + } else if (estimated_entries > FRAME_PAYLOAD_SIZE / 4) { + LOG_WARNING(Render_Vulkan, "Estimated compute entries ({}) too large for pre-allocation", estimated_entries); + } +} + +void ComputePassDescriptorQueue::OptimizeForComputeWorkload() { + + const size_t current_usage = GetCurrentSize(); + const size_t usage_threshold = FRAME_PAYLOAD_SIZE / 4; + + if (current_usage < usage_threshold && current_usage > 0) { + payload_cursor = payload_start; + LOG_DEBUG(Render_Vulkan, "Optimized compute workload - reset for better memory efficiency (usage: {}/{})", + current_usage, FRAME_PAYLOAD_SIZE); + } + + if (overflow_events > 5) { + LOG_INFO(Render_Vulkan, "Compute pass overflow events: {}, consider batch optimization", overflow_events); + } +} + +} // namespace Vulkan \ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index 82fce298d..fb45eccf5 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -1,9 +1,13 @@ // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once #include +#include +#include +#include #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -28,16 +32,10 @@ struct DescriptorUpdateEntry { }; }; -class UpdateDescriptorQueue final { - // This should be plenty for the vast majority of cases. Most desktop platforms only - // provide up to 3 swapchain images. - static constexpr size_t FRAMES_IN_FLIGHT = 8; - static constexpr size_t FRAME_PAYLOAD_SIZE = 0x20000; - static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT; - +class UpdateDescriptorQueue { public: explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_); - ~UpdateDescriptorQueue(); + virtual ~UpdateDescriptorQueue(); void TickFrame(); @@ -48,6 +46,7 @@ public: } void AddSampledImage(VkImageView image_view, VkSampler sampler) { + EnsureCapacity(1); *(payload_cursor++) = VkDescriptorImageInfo{ .sampler = sampler, .imageView = image_view, @@ -56,6 +55,7 @@ public: } void AddImage(VkImageView image_view) { + EnsureCapacity(1); *(payload_cursor++) = VkDescriptorImageInfo{ .sampler = VK_NULL_HANDLE, .imageView = image_view, @@ -64,6 +64,7 @@ public: } void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) { + EnsureCapacity(1); *(payload_cursor++) = VkDescriptorBufferInfo{ .buffer = buffer, .offset = offset, @@ -72,10 +73,56 @@ public: } void AddTexelBuffer(VkBufferView texel_buffer) { + EnsureCapacity(1); *(payload_cursor++) = texel_buffer; } -private: + void AddSampledImages(std::span image_views, VkSampler sampler) { + const size_t count = image_views.size(); + EnsureCapacity(count); + for (VkImageView image_view : image_views) { + *(payload_cursor++) = VkDescriptorImageInfo{ + .sampler = sampler, + .imageView = image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + } + } + + void AddBuffers(std::span buffers, VkDeviceSize offset, VkDeviceSize size) { + const size_t count = buffers.size(); + EnsureCapacity(count); + for (VkBuffer buffer : buffers) { + *(payload_cursor++) = VkDescriptorBufferInfo{ + .buffer = buffer, + .offset = offset, + .range = size, + }; + } + } + + void Reset() noexcept { + payload_cursor = payload_start; + upload_start = payload_start; + } + + size_t GetCurrentSize() const noexcept { + return std::distance(payload_start, payload_cursor); + } + + bool CanAdd(size_t count) const noexcept { + return std::distance(payload_start, payload_cursor) + count < FRAME_PAYLOAD_SIZE; + } + +protected: + + static constexpr size_t FRAMES_IN_FLIGHT = 12; + static constexpr size_t FRAME_PAYLOAD_SIZE = 0x40000; + static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT; + + void EnsureCapacity(size_t required_entries); + void HandleOverflow(); + const Device& device; Scheduler& scheduler; @@ -83,11 +130,29 @@ private: DescriptorUpdateEntry* payload_cursor = nullptr; DescriptorUpdateEntry* payload_start = nullptr; const DescriptorUpdateEntry* upload_start = nullptr; - std::array payload; + + std::unique_ptr payload; + + std::atomic overflow_count{0}; + + size_t total_entries_processed{0}; + size_t overflow_events{0}; }; -// TODO: should these be separate classes instead? -using GuestDescriptorQueue = UpdateDescriptorQueue; -using ComputePassDescriptorQueue = UpdateDescriptorQueue; +class GuestDescriptorQueue final : public UpdateDescriptorQueue { +public: + using UpdateDescriptorQueue::UpdateDescriptorQueue; -} // namespace Vulkan + void PreAllocateForFrame(size_t estimated_entries); + void OptimizeForGuestMemory(); +}; + +class ComputePassDescriptorQueue final : public UpdateDescriptorQueue { +public: + using UpdateDescriptorQueue::UpdateDescriptorQueue; + + void PreAllocateForComputePass(size_t estimated_entries); + void OptimizeForComputeWorkload(); +}; + +} // namespace Vulkan \ No newline at end of file