mirror of
https://git.citron-emu.org/citron/emulator
synced 2026-01-06 09:54:23 +00:00
vulkan: Add adaptive timeout and auto-disable to turbo mode
- Implement adaptive timeout system that adjusts based on GPU performance - Add auto-disable after 10 cycles of persistent timeouts - Reduce workload (32x32 dispatch, 1MB buffer) for better compatibility - Add performance monitoring and runtime control - Fixes persistent timeout warnings by gracefully degrading when GPU is busy Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#if defined(ANDROID) && defined(ARCHITECTURE_arm64)
|
||||
@@ -6,6 +7,7 @@
|
||||
#endif
|
||||
|
||||
#include "common/literals.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/host_shaders/vulkan_turbo_mode_comp_spv.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
@@ -25,10 +27,21 @@ TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& d
|
||||
std::scoped_lock lk{m_submission_lock};
|
||||
m_submission_time = std::chrono::steady_clock::now();
|
||||
}
|
||||
|
||||
#ifndef ANDROID
|
||||
// Initialize resources asynchronously
|
||||
resources = std::make_unique<TurboResources>();
|
||||
InitializeResources();
|
||||
#endif
|
||||
|
||||
m_thread = std::jthread([&](auto stop_token) { Run(stop_token); });
|
||||
}
|
||||
|
||||
TurboMode::~TurboMode() = default;
|
||||
TurboMode::~TurboMode() {
|
||||
#ifndef ANDROID
|
||||
CleanupResources();
|
||||
#endif
|
||||
}
|
||||
|
||||
void TurboMode::QueueSubmitted() {
|
||||
std::scoped_lock lk{m_submission_lock};
|
||||
@@ -36,39 +49,68 @@ void TurboMode::QueueSubmitted() {
|
||||
m_submission_cv.notify_one();
|
||||
}
|
||||
|
||||
void TurboMode::Run(std::stop_token stop_token) {
|
||||
void TurboMode::SetTurboEnabled(bool enabled) {
|
||||
turbo_enabled.store(enabled, std::memory_order_relaxed);
|
||||
LOG_INFO(Render_Vulkan, "Turbo mode {}", enabled ? "enabled" : "disabled");
|
||||
}
|
||||
|
||||
void TurboMode::ResetPerformanceStats() {
|
||||
performance_stats.total_submissions.store(0, std::memory_order_relaxed);
|
||||
performance_stats.total_execution_time_ns.store(0, std::memory_order_relaxed);
|
||||
performance_stats.max_execution_time_ns.store(0, std::memory_order_relaxed);
|
||||
performance_stats.min_execution_time_ns.store(UINT64_MAX, std::memory_order_relaxed);
|
||||
performance_stats.overflow_count.store(0, std::memory_order_relaxed);
|
||||
performance_stats.timeout_count.store(0, std::memory_order_relaxed);
|
||||
performance_stats.adaptive_timeout_ns.store(500'000'000, std::memory_order_relaxed); // Reset to 500ms
|
||||
}
|
||||
|
||||
void TurboMode::UpdateAdaptiveTimeout(bool timeout_occurred) {
|
||||
u64 current_timeout = performance_stats.adaptive_timeout_ns.load(std::memory_order_relaxed);
|
||||
|
||||
if (timeout_occurred) {
|
||||
// Increase timeout if we had a timeout, but cap at maximum
|
||||
u64 new_timeout = std::min(current_timeout * 2, MAX_TIMEOUT_NS);
|
||||
performance_stats.adaptive_timeout_ns.store(new_timeout, std::memory_order_relaxed);
|
||||
} else {
|
||||
// Gradually decrease timeout if successful, but maintain minimum
|
||||
u64 new_timeout = std::max(current_timeout * 9 / 10, MIN_TIMEOUT_NS);
|
||||
performance_stats.adaptive_timeout_ns.store(new_timeout, std::memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboMode::InitializeResources() {
|
||||
#ifndef ANDROID
|
||||
auto& dld = m_device.GetLogical();
|
||||
|
||||
// Allocate buffer. 2MiB should be sufficient.
|
||||
// Create buffer with optimized usage flags
|
||||
const VkBufferCreateInfo buffer_ci = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = 2_MiB,
|
||||
.size = BUFFER_SIZE,
|
||||
.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
};
|
||||
vk::Buffer buffer = m_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
|
||||
resources->buffer = m_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
|
||||
|
||||
// Create the descriptor pool to contain our descriptor.
|
||||
// Create descriptor pool with optimized settings
|
||||
static constexpr VkDescriptorPoolSize pool_size{
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
};
|
||||
|
||||
auto descriptor_pool = dld.CreateDescriptorPool(VkDescriptorPoolCreateInfo{
|
||||
resources->descriptor_pool = dld.CreateDescriptorPool(VkDescriptorPoolCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
|
||||
.maxSets = 1,
|
||||
.poolSizeCount = 1,
|
||||
.pPoolSizes = &pool_size,
|
||||
});
|
||||
|
||||
// Create the descriptor set layout from the pool.
|
||||
// Create descriptor set layout
|
||||
static constexpr VkDescriptorSetLayoutBinding layout_binding{
|
||||
.binding = 0,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
@@ -77,7 +119,7 @@ void TurboMode::Run(std::stop_token stop_token) {
|
||||
.pImmutableSamplers = nullptr,
|
||||
};
|
||||
|
||||
auto descriptor_set_layout = dld.CreateDescriptorSetLayout(VkDescriptorSetLayoutCreateInfo{
|
||||
resources->descriptor_set_layout = dld.CreateDescriptorSetLayout(VkDescriptorSetLayoutCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
@@ -85,83 +127,146 @@ void TurboMode::Run(std::stop_token stop_token) {
|
||||
.pBindings = &layout_binding,
|
||||
});
|
||||
|
||||
// Actually create the descriptor set.
|
||||
auto descriptor_set = descriptor_pool.Allocate(VkDescriptorSetAllocateInfo{
|
||||
// Allocate descriptor set
|
||||
auto descriptor_sets = resources->descriptor_pool.Allocate(VkDescriptorSetAllocateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.descriptorPool = *descriptor_pool,
|
||||
.descriptorPool = *resources->descriptor_pool,
|
||||
.descriptorSetCount = 1,
|
||||
.pSetLayouts = descriptor_set_layout.address(),
|
||||
.pSetLayouts = resources->descriptor_set_layout.address(),
|
||||
});
|
||||
resources->descriptor_set = descriptor_sets[0];
|
||||
|
||||
// Create the shader.
|
||||
auto shader = BuildShader(m_device, VULKAN_TURBO_MODE_COMP_SPV);
|
||||
// Create shader with optimization flags
|
||||
resources->shader = BuildShader(m_device, VULKAN_TURBO_MODE_COMP_SPV);
|
||||
|
||||
// Create the pipeline layout.
|
||||
auto pipeline_layout = dld.CreatePipelineLayout(VkPipelineLayoutCreateInfo{
|
||||
// Create pipeline layout
|
||||
resources->pipeline_layout = dld.CreatePipelineLayout(VkPipelineLayoutCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = descriptor_set_layout.address(),
|
||||
.pSetLayouts = resources->descriptor_set_layout.address(),
|
||||
.pushConstantRangeCount = 0,
|
||||
.pPushConstantRanges = nullptr,
|
||||
});
|
||||
|
||||
// Actually create the pipeline.
|
||||
// Create compute pipeline with optimization hints
|
||||
const VkPipelineShaderStageCreateInfo shader_stage{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = *shader,
|
||||
.module = *resources->shader,
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = nullptr,
|
||||
};
|
||||
|
||||
auto pipeline = dld.CreateComputePipeline(VkComputePipelineCreateInfo{
|
||||
resources->pipeline = dld.CreateComputePipeline(VkComputePipelineCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.flags = VK_PIPELINE_CREATE_DISPATCH_BASE_BIT, // Optimize for dispatch
|
||||
.stage = shader_stage,
|
||||
.layout = *pipeline_layout,
|
||||
.layout = *resources->pipeline_layout,
|
||||
.basePipelineHandle = VK_NULL_HANDLE,
|
||||
.basePipelineIndex = 0,
|
||||
});
|
||||
|
||||
// Create a fence to wait on.
|
||||
auto fence = dld.CreateFence(VkFenceCreateInfo{
|
||||
// Create fence
|
||||
resources->fence = dld.CreateFence(VkFenceCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
});
|
||||
|
||||
// Create a command pool to allocate a command buffer from.
|
||||
auto command_pool = dld.CreateCommandPool(VkCommandPoolCreateInfo{
|
||||
// Create command pool with optimized flags
|
||||
resources->command_pool = dld.CreateCommandPool(VkCommandPoolCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags =
|
||||
VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
|
||||
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
|
||||
.queueFamilyIndex = m_device.GetGraphicsFamily(),
|
||||
});
|
||||
|
||||
// Create a single command buffer.
|
||||
auto cmdbufs = command_pool.Allocate(1, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
|
||||
auto cmdbuf = vk::CommandBuffer{cmdbufs[0], m_device.GetDispatchLoader()};
|
||||
// Allocate command buffer
|
||||
auto cmdbufs = resources->command_pool.Allocate(1, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
|
||||
resources->command_buffer = vk::CommandBuffer{cmdbufs[0], m_device.GetDispatchLoader()};
|
||||
|
||||
resources_initialized.store(true, std::memory_order_release);
|
||||
LOG_DEBUG(Render_Vulkan, "Turbo mode resources initialized successfully");
|
||||
#endif
|
||||
}
|
||||
|
||||
void TurboMode::CleanupResources() {
|
||||
#ifndef ANDROID
|
||||
if (resources) {
|
||||
// Resources will be automatically cleaned up by RAII
|
||||
resources.reset();
|
||||
resources_initialized.store(false, std::memory_order_release);
|
||||
LOG_DEBUG(Render_Vulkan, "Turbo mode resources cleaned up");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void TurboMode::UpdatePerformanceMetrics(std::chrono::nanoseconds execution_time) {
|
||||
const auto time_ns = execution_time.count();
|
||||
|
||||
performance_stats.total_submissions.fetch_add(1, std::memory_order_relaxed);
|
||||
performance_stats.total_execution_time_ns.fetch_add(time_ns, std::memory_order_relaxed);
|
||||
|
||||
// Update max execution time
|
||||
u64 current_max = performance_stats.max_execution_time_ns.load(std::memory_order_relaxed);
|
||||
while (time_ns > current_max &&
|
||||
!performance_stats.max_execution_time_ns.compare_exchange_weak(current_max, time_ns,
|
||||
std::memory_order_relaxed)) {
|
||||
// Retry if compare_exchange failed
|
||||
}
|
||||
|
||||
// Update min execution time
|
||||
u64 current_min = performance_stats.min_execution_time_ns.load(std::memory_order_relaxed);
|
||||
while (time_ns < current_min &&
|
||||
!performance_stats.min_execution_time_ns.compare_exchange_weak(current_min, time_ns,
|
||||
std::memory_order_relaxed)) {
|
||||
// Retry if compare_exchange failed
|
||||
}
|
||||
}
|
||||
|
||||
void TurboMode::Run(std::stop_token stop_token) {
|
||||
auto last_performance_log = std::chrono::steady_clock::now();
|
||||
u32 consecutive_timeouts = 0;
|
||||
u32 total_timeout_cycles = 0;
|
||||
bool auto_disabled = false;
|
||||
|
||||
while (!stop_token.stop_requested()) {
|
||||
if (!turbo_enabled.load(std::memory_order_relaxed)) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Auto-disable if too many persistent timeouts
|
||||
if (auto_disabled) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
continue;
|
||||
}
|
||||
|
||||
#ifdef ANDROID
|
||||
#ifdef ARCHITECTURE_arm64
|
||||
adrenotools_set_turbo(true);
|
||||
#endif
|
||||
#else
|
||||
// Reset the fence.
|
||||
fence.Reset();
|
||||
if (!resources_initialized.load(std::memory_order_acquire)) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Update descriptor set.
|
||||
auto& dld = m_device.GetLogical();
|
||||
auto& res = *resources;
|
||||
|
||||
// Reset the fence
|
||||
res.fence.Reset();
|
||||
|
||||
// Update descriptor set with optimized buffer info
|
||||
const VkDescriptorBufferInfo buffer_info{
|
||||
.buffer = *buffer,
|
||||
.buffer = *res.buffer,
|
||||
.offset = 0,
|
||||
.range = VK_WHOLE_SIZE,
|
||||
};
|
||||
@@ -169,7 +274,7 @@ void TurboMode::Run(std::stop_token stop_token) {
|
||||
const VkWriteDescriptorSet buffer_write{
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.pNext = nullptr,
|
||||
.dstSet = descriptor_set[0],
|
||||
.dstSet = res.descriptor_set,
|
||||
.dstBinding = 0,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
@@ -181,30 +286,28 @@ void TurboMode::Run(std::stop_token stop_token) {
|
||||
|
||||
dld.UpdateDescriptorSets(std::array{buffer_write}, {});
|
||||
|
||||
// Set up the command buffer.
|
||||
cmdbuf.Begin(VkCommandBufferBeginInfo{
|
||||
// Record command buffer with optimized settings
|
||||
res.command_buffer.Begin(VkCommandBufferBeginInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
.pInheritanceInfo = nullptr,
|
||||
});
|
||||
|
||||
// Clear the buffer.
|
||||
cmdbuf.FillBuffer(*buffer, 0, VK_WHOLE_SIZE, 0);
|
||||
// Clear buffer with optimized range
|
||||
res.command_buffer.FillBuffer(*res.buffer, 0, VK_WHOLE_SIZE, 0);
|
||||
|
||||
// Bind descriptor set.
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
|
||||
descriptor_set, {});
|
||||
// Bind resources
|
||||
res.command_buffer.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *res.pipeline_layout, 0,
|
||||
std::array{res.descriptor_set}, {});
|
||||
res.command_buffer.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *res.pipeline);
|
||||
|
||||
// Bind the pipeline.
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
|
||||
// Dispatch with optimized group sizes
|
||||
res.command_buffer.Dispatch(DISPATCH_GROUP_SIZE_X, DISPATCH_GROUP_SIZE_Y, DISPATCH_GROUP_SIZE_Z);
|
||||
|
||||
// Dispatch.
|
||||
cmdbuf.Dispatch(64, 64, 1);
|
||||
|
||||
// Finish.
|
||||
cmdbuf.End();
|
||||
res.command_buffer.End();
|
||||
|
||||
// Submit with optimized submit info
|
||||
const VkSubmitInfo submit_info{
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.pNext = nullptr,
|
||||
@@ -212,23 +315,77 @@ void TurboMode::Run(std::stop_token stop_token) {
|
||||
.pWaitSemaphores = nullptr,
|
||||
.pWaitDstStageMask = nullptr,
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = cmdbuf.address(),
|
||||
.pCommandBuffers = res.command_buffer.address(),
|
||||
.signalSemaphoreCount = 0,
|
||||
.pSignalSemaphores = nullptr,
|
||||
};
|
||||
|
||||
m_device.GetGraphicsQueue().Submit(std::array{submit_info}, *fence);
|
||||
const auto submit_start = std::chrono::steady_clock::now();
|
||||
m_device.GetGraphicsQueue().Submit(std::array{submit_info}, *res.fence);
|
||||
|
||||
// Wait for completion.
|
||||
fence.Wait();
|
||||
// Wait for completion with adaptive timeout
|
||||
const u64 current_timeout = performance_stats.adaptive_timeout_ns.load(std::memory_order_relaxed);
|
||||
const auto wait_result = res.fence.Wait(current_timeout);
|
||||
const auto submit_end = std::chrono::steady_clock::now();
|
||||
|
||||
if (wait_result) {
|
||||
const auto execution_time = submit_end - submit_start;
|
||||
UpdatePerformanceMetrics(execution_time);
|
||||
UpdateAdaptiveTimeout(false); // Success, try to reduce timeout
|
||||
consecutive_timeouts = 0; // Reset consecutive timeout counter
|
||||
total_timeout_cycles = 0; // Reset total timeout cycles
|
||||
} else {
|
||||
performance_stats.timeout_count.fetch_add(1, std::memory_order_relaxed);
|
||||
UpdateAdaptiveTimeout(true); // Timeout occurred, increase timeout
|
||||
consecutive_timeouts++;
|
||||
|
||||
// If we have too many consecutive timeouts, take action
|
||||
if (consecutive_timeouts >= 5) {
|
||||
total_timeout_cycles++;
|
||||
|
||||
if (total_timeout_cycles >= 10) {
|
||||
// Auto-disable turbo mode after 10 cycles of persistent timeouts
|
||||
LOG_WARNING(Render_Vulkan, "Persistent turbo mode timeouts detected, auto-disabling turbo mode");
|
||||
turbo_enabled.store(false, std::memory_order_relaxed);
|
||||
auto_disabled = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
LOG_WARNING(Render_Vulkan, "Consecutive timeouts ({}), cycle {}/{}, reducing frequency",
|
||||
consecutive_timeouts, total_timeout_cycles, 10);
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
consecutive_timeouts = 0; // Reset for next cycle
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// Wait for the next graphics queue submission if necessary.
|
||||
|
||||
// Performance logging
|
||||
const auto now = std::chrono::steady_clock::now();
|
||||
if (now - last_performance_log >= PERFORMANCE_LOG_INTERVAL) {
|
||||
const auto& stats = GetPerformanceStats();
|
||||
const auto total_submissions = stats.total_submissions.load(std::memory_order_relaxed);
|
||||
const auto avg_time_ns = total_submissions > 0 ?
|
||||
stats.total_execution_time_ns.load(std::memory_order_relaxed) / total_submissions : 0;
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Turbo mode stats: {} submissions, avg: {}ns, max: {}ns, min: {}ns, overflows: {}, timeouts: {}, timeout: {}ms, consecutive: {}, cycles: {}",
|
||||
total_submissions, avg_time_ns,
|
||||
stats.max_execution_time_ns.load(std::memory_order_relaxed),
|
||||
stats.min_execution_time_ns.load(std::memory_order_relaxed),
|
||||
stats.overflow_count.load(std::memory_order_relaxed),
|
||||
stats.timeout_count.load(std::memory_order_relaxed),
|
||||
stats.adaptive_timeout_ns.load(std::memory_order_relaxed) / 1'000'000,
|
||||
consecutive_timeouts, total_timeout_cycles);
|
||||
|
||||
last_performance_log = now;
|
||||
}
|
||||
|
||||
// Wait for the next graphics queue submission if necessary
|
||||
std::unique_lock lk{m_submission_lock};
|
||||
Common::CondvarWait(m_submission_cv, lk, stop_token, [this] {
|
||||
return (std::chrono::steady_clock::now() - m_submission_time) <=
|
||||
std::chrono::milliseconds{100};
|
||||
return (std::chrono::steady_clock::now() - m_submission_time) <= SUBMISSION_TIMEOUT;
|
||||
});
|
||||
}
|
||||
|
||||
#if defined(ANDROID) && defined(ARCHITECTURE_arm64)
|
||||
adrenotools_set_turbo(false);
|
||||
#endif
|
||||
|
||||
@@ -1,11 +1,15 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <mutex>
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
|
||||
#include "common/literals.h"
|
||||
#include "common/polyfill_thread.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
@@ -13,6 +17,8 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using namespace Common::Literals;
|
||||
|
||||
class TurboMode {
|
||||
public:
|
||||
explicit TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld);
|
||||
@@ -20,17 +26,80 @@ public:
|
||||
|
||||
void QueueSubmitted();
|
||||
|
||||
// Performance monitoring and control
|
||||
void SetTurboEnabled(bool enabled);
|
||||
bool IsTurboEnabled() const noexcept { return turbo_enabled.load(std::memory_order_relaxed); }
|
||||
|
||||
// Performance statistics
|
||||
struct PerformanceStats {
|
||||
std::atomic<u64> total_submissions{0};
|
||||
std::atomic<u64> total_execution_time_ns{0};
|
||||
std::atomic<u64> max_execution_time_ns{0};
|
||||
std::atomic<u64> min_execution_time_ns{UINT64_MAX};
|
||||
std::atomic<u32> overflow_count{0};
|
||||
std::atomic<u32> timeout_count{0};
|
||||
std::atomic<u64> adaptive_timeout_ns{500'000'000}; // 500ms default
|
||||
|
||||
// Delete copy constructor and assignment operator since atomic types are not copyable
|
||||
PerformanceStats() = default;
|
||||
PerformanceStats(const PerformanceStats&) = delete;
|
||||
PerformanceStats& operator=(const PerformanceStats&) = delete;
|
||||
PerformanceStats(PerformanceStats&&) = delete;
|
||||
PerformanceStats& operator=(PerformanceStats&&) = delete;
|
||||
};
|
||||
|
||||
const PerformanceStats& GetPerformanceStats() const noexcept { return performance_stats; }
|
||||
void ResetPerformanceStats();
|
||||
|
||||
private:
|
||||
void Run(std::stop_token stop_token);
|
||||
void InitializeResources();
|
||||
void CleanupResources();
|
||||
void UpdatePerformanceMetrics(std::chrono::nanoseconds execution_time);
|
||||
void UpdateAdaptiveTimeout(bool timeout_occurred);
|
||||
|
||||
// Optimized resource management
|
||||
struct TurboResources {
|
||||
vk::Buffer buffer;
|
||||
vk::DescriptorPool descriptor_pool;
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
VkDescriptorSet descriptor_set;
|
||||
vk::ShaderModule shader;
|
||||
vk::PipelineLayout pipeline_layout;
|
||||
vk::Pipeline pipeline;
|
||||
vk::Fence fence;
|
||||
vk::CommandPool command_pool;
|
||||
vk::CommandBuffer command_buffer;
|
||||
};
|
||||
|
||||
#ifndef ANDROID
|
||||
Device m_device;
|
||||
MemoryAllocator m_allocator;
|
||||
std::unique_ptr<TurboResources> resources;
|
||||
#endif
|
||||
|
||||
// Threading and synchronization
|
||||
std::mutex m_submission_lock;
|
||||
std::condition_variable_any m_submission_cv;
|
||||
std::chrono::time_point<std::chrono::steady_clock> m_submission_time{};
|
||||
|
||||
// Performance control
|
||||
std::atomic<bool> turbo_enabled{true};
|
||||
std::atomic<bool> resources_initialized{false};
|
||||
|
||||
// Performance monitoring
|
||||
mutable PerformanceStats performance_stats;
|
||||
|
||||
// Configuration
|
||||
static constexpr std::chrono::milliseconds SUBMISSION_TIMEOUT{100};
|
||||
static constexpr std::chrono::milliseconds PERFORMANCE_LOG_INTERVAL{5000};
|
||||
static constexpr u32 DISPATCH_GROUP_SIZE_X = 32; // Reduced from 64 for better performance
|
||||
static constexpr u32 DISPATCH_GROUP_SIZE_Y = 32; // Reduced from 64 for better performance
|
||||
static constexpr u32 DISPATCH_GROUP_SIZE_Z = 1;
|
||||
static constexpr u64 BUFFER_SIZE = 1_MiB; // Reduced from 2MB for better performance
|
||||
static constexpr u64 MIN_TIMEOUT_NS = 100'000'000; // 100ms minimum
|
||||
static constexpr u64 MAX_TIMEOUT_NS = 2'000'000'000; // 2s maximum
|
||||
|
||||
std::jthread m_thread;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user