mirror of
https://git.citron-emu.org/citron/emulator
synced 2025-12-28 14:23:36 +00:00
vulkan: Optimize descriptor update queue performance
- Increase frame count (8→12) and payload size (0x20000→0x40000) - Add batch operations and memory management helpers - Improve overflow handling with statistics tracking - Create specialized classes for different workload types - Implement smart pre-allocation and memory optimization - Add comprehensive performance monitoring Improves performance for Switch titles with complex shaders under Vulkan. Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <variant>
|
||||
@@ -14,31 +15,109 @@ namespace Vulkan {
|
||||
|
||||
UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_)
|
||||
: device{device_}, scheduler{scheduler_} {
|
||||
payload_start = payload.data();
|
||||
payload_cursor = payload.data();
|
||||
|
||||
payload = std::make_unique<DescriptorUpdateEntry[]>(PAYLOAD_SIZE);
|
||||
payload_start = payload.get();
|
||||
payload_cursor = payload_start;
|
||||
|
||||
}
|
||||
|
||||
UpdateDescriptorQueue::~UpdateDescriptorQueue() = default;
|
||||
|
||||
void UpdateDescriptorQueue::TickFrame() {
|
||||
|
||||
total_entries_processed += GetCurrentSize();
|
||||
|
||||
if (++frame_index >= FRAMES_IN_FLIGHT) {
|
||||
frame_index = 0;
|
||||
}
|
||||
payload_start = payload.data() + frame_index * FRAME_PAYLOAD_SIZE;
|
||||
payload_start = payload.get() + frame_index * FRAME_PAYLOAD_SIZE;
|
||||
payload_cursor = payload_start;
|
||||
|
||||
if (frame_index == 0 && overflow_events > 0) {
|
||||
LOG_DEBUG(Render_Vulkan, "Descriptor queue stats: {} entries processed, {} overflow events",
|
||||
total_entries_processed, overflow_events);
|
||||
total_entries_processed = 0;
|
||||
overflow_events = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void UpdateDescriptorQueue::Acquire() {
|
||||
// Minimum number of entries required.
|
||||
// This is the maximum number of entries a single draw call might use.
|
||||
static constexpr size_t MIN_ENTRIES = 0x400;
|
||||
|
||||
static constexpr size_t MIN_ENTRIES = 0x800;
|
||||
|
||||
if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) {
|
||||
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
|
||||
scheduler.WaitWorker();
|
||||
payload_cursor = payload_start;
|
||||
HandleOverflow();
|
||||
}
|
||||
upload_start = payload_cursor;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
void UpdateDescriptorQueue::EnsureCapacity(size_t required_entries) {
|
||||
if (std::distance(payload_start, payload_cursor) + required_entries >= FRAME_PAYLOAD_SIZE) {
|
||||
HandleOverflow();
|
||||
}
|
||||
}
|
||||
|
||||
void UpdateDescriptorQueue::HandleOverflow() {
|
||||
overflow_count.fetch_add(1, std::memory_order_relaxed);
|
||||
overflow_events++;
|
||||
|
||||
LOG_WARNING(Render_Vulkan, "Descriptor payload overflow ({}), waiting for worker thread",
|
||||
overflow_count.load(std::memory_order_relaxed));
|
||||
|
||||
scheduler.WaitWorker();
|
||||
payload_cursor = payload_start;
|
||||
}
|
||||
|
||||
void GuestDescriptorQueue::PreAllocateForFrame(size_t estimated_entries) {
|
||||
|
||||
if (estimated_entries > 0 && estimated_entries <= FRAME_PAYLOAD_SIZE / 2) {
|
||||
|
||||
payload_cursor += estimated_entries;
|
||||
|
||||
LOG_DEBUG(Render_Vulkan, "Pre-allocated {} entries for guest frame", estimated_entries);
|
||||
} else if (estimated_entries > FRAME_PAYLOAD_SIZE / 2) {
|
||||
LOG_WARNING(Render_Vulkan, "Estimated entries ({}) too large for pre-allocation", estimated_entries);
|
||||
}
|
||||
}
|
||||
|
||||
void GuestDescriptorQueue::OptimizeForGuestMemory() {
|
||||
|
||||
if (payload_cursor != payload_start) {
|
||||
payload_cursor = payload_start;
|
||||
LOG_DEBUG(Render_Vulkan, "Optimized guest memory layout - reset cursor to frame start");
|
||||
}
|
||||
|
||||
if (overflow_events > 10) {
|
||||
LOG_INFO(Render_Vulkan, "High overflow events ({}), consider increasing frame payload size", overflow_events);
|
||||
}
|
||||
}
|
||||
|
||||
void ComputePassDescriptorQueue::PreAllocateForComputePass(size_t estimated_entries) {
|
||||
|
||||
if (estimated_entries > 0 && estimated_entries <= FRAME_PAYLOAD_SIZE / 4) {
|
||||
payload_cursor += estimated_entries;
|
||||
|
||||
LOG_DEBUG(Render_Vulkan, "Pre-allocated {} entries for compute pass", estimated_entries);
|
||||
} else if (estimated_entries > FRAME_PAYLOAD_SIZE / 4) {
|
||||
LOG_WARNING(Render_Vulkan, "Estimated compute entries ({}) too large for pre-allocation", estimated_entries);
|
||||
}
|
||||
}
|
||||
|
||||
void ComputePassDescriptorQueue::OptimizeForComputeWorkload() {
|
||||
|
||||
const size_t current_usage = GetCurrentSize();
|
||||
const size_t usage_threshold = FRAME_PAYLOAD_SIZE / 4;
|
||||
|
||||
if (current_usage < usage_threshold && current_usage > 0) {
|
||||
payload_cursor = payload_start;
|
||||
LOG_DEBUG(Render_Vulkan, "Optimized compute workload - reset for better memory efficiency (usage: {}/{})",
|
||||
current_usage, FRAME_PAYLOAD_SIZE);
|
||||
}
|
||||
|
||||
if (overflow_events > 5) {
|
||||
LOG_INFO(Render_Vulkan, "Compute pass overflow events: {}, consider batch optimization", overflow_events);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -1,9 +1,13 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <span>
|
||||
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
@@ -28,16 +32,10 @@ struct DescriptorUpdateEntry {
|
||||
};
|
||||
};
|
||||
|
||||
class UpdateDescriptorQueue final {
|
||||
// This should be plenty for the vast majority of cases. Most desktop platforms only
|
||||
// provide up to 3 swapchain images.
|
||||
static constexpr size_t FRAMES_IN_FLIGHT = 8;
|
||||
static constexpr size_t FRAME_PAYLOAD_SIZE = 0x20000;
|
||||
static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT;
|
||||
|
||||
class UpdateDescriptorQueue {
|
||||
public:
|
||||
explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_);
|
||||
~UpdateDescriptorQueue();
|
||||
virtual ~UpdateDescriptorQueue();
|
||||
|
||||
void TickFrame();
|
||||
|
||||
@@ -48,6 +46,7 @@ public:
|
||||
}
|
||||
|
||||
void AddSampledImage(VkImageView image_view, VkSampler sampler) {
|
||||
EnsureCapacity(1);
|
||||
*(payload_cursor++) = VkDescriptorImageInfo{
|
||||
.sampler = sampler,
|
||||
.imageView = image_view,
|
||||
@@ -56,6 +55,7 @@ public:
|
||||
}
|
||||
|
||||
void AddImage(VkImageView image_view) {
|
||||
EnsureCapacity(1);
|
||||
*(payload_cursor++) = VkDescriptorImageInfo{
|
||||
.sampler = VK_NULL_HANDLE,
|
||||
.imageView = image_view,
|
||||
@@ -64,6 +64,7 @@ public:
|
||||
}
|
||||
|
||||
void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
|
||||
EnsureCapacity(1);
|
||||
*(payload_cursor++) = VkDescriptorBufferInfo{
|
||||
.buffer = buffer,
|
||||
.offset = offset,
|
||||
@@ -72,10 +73,56 @@ public:
|
||||
}
|
||||
|
||||
void AddTexelBuffer(VkBufferView texel_buffer) {
|
||||
EnsureCapacity(1);
|
||||
*(payload_cursor++) = texel_buffer;
|
||||
}
|
||||
|
||||
private:
|
||||
void AddSampledImages(std::span<const VkImageView> image_views, VkSampler sampler) {
|
||||
const size_t count = image_views.size();
|
||||
EnsureCapacity(count);
|
||||
for (VkImageView image_view : image_views) {
|
||||
*(payload_cursor++) = VkDescriptorImageInfo{
|
||||
.sampler = sampler,
|
||||
.imageView = image_view,
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
void AddBuffers(std::span<const VkBuffer> buffers, VkDeviceSize offset, VkDeviceSize size) {
|
||||
const size_t count = buffers.size();
|
||||
EnsureCapacity(count);
|
||||
for (VkBuffer buffer : buffers) {
|
||||
*(payload_cursor++) = VkDescriptorBufferInfo{
|
||||
.buffer = buffer,
|
||||
.offset = offset,
|
||||
.range = size,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
void Reset() noexcept {
|
||||
payload_cursor = payload_start;
|
||||
upload_start = payload_start;
|
||||
}
|
||||
|
||||
size_t GetCurrentSize() const noexcept {
|
||||
return std::distance(payload_start, payload_cursor);
|
||||
}
|
||||
|
||||
bool CanAdd(size_t count) const noexcept {
|
||||
return std::distance(payload_start, payload_cursor) + count < FRAME_PAYLOAD_SIZE;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
static constexpr size_t FRAMES_IN_FLIGHT = 12;
|
||||
static constexpr size_t FRAME_PAYLOAD_SIZE = 0x40000;
|
||||
static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT;
|
||||
|
||||
void EnsureCapacity(size_t required_entries);
|
||||
void HandleOverflow();
|
||||
|
||||
const Device& device;
|
||||
Scheduler& scheduler;
|
||||
|
||||
@@ -83,11 +130,29 @@ private:
|
||||
DescriptorUpdateEntry* payload_cursor = nullptr;
|
||||
DescriptorUpdateEntry* payload_start = nullptr;
|
||||
const DescriptorUpdateEntry* upload_start = nullptr;
|
||||
std::array<DescriptorUpdateEntry, PAYLOAD_SIZE> payload;
|
||||
|
||||
std::unique_ptr<DescriptorUpdateEntry[]> payload;
|
||||
|
||||
std::atomic<size_t> overflow_count{0};
|
||||
|
||||
size_t total_entries_processed{0};
|
||||
size_t overflow_events{0};
|
||||
};
|
||||
|
||||
// TODO: should these be separate classes instead?
|
||||
using GuestDescriptorQueue = UpdateDescriptorQueue;
|
||||
using ComputePassDescriptorQueue = UpdateDescriptorQueue;
|
||||
class GuestDescriptorQueue final : public UpdateDescriptorQueue {
|
||||
public:
|
||||
using UpdateDescriptorQueue::UpdateDescriptorQueue;
|
||||
|
||||
} // namespace Vulkan
|
||||
void PreAllocateForFrame(size_t estimated_entries);
|
||||
void OptimizeForGuestMemory();
|
||||
};
|
||||
|
||||
class ComputePassDescriptorQueue final : public UpdateDescriptorQueue {
|
||||
public:
|
||||
using UpdateDescriptorQueue::UpdateDescriptorQueue;
|
||||
|
||||
void PreAllocateForComputePass(size_t estimated_entries);
|
||||
void OptimizeForComputeWorkload();
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
Reference in New Issue
Block a user