diff --git a/src/android/app/src/main/res/values/arrays.xml b/src/android/app/src/main/res/values/arrays.xml index e63a9a968..97fa1ebb6 100644 --- a/src/android/app/src/main/res/values/arrays.xml +++ b/src/android/app/src/main/res/values/arrays.xml @@ -401,11 +401,15 @@ Conservative Aggressive + High-End GPU (4090/4080+) + Insane (RTX 4090 24GB) 0 1 + 2 + 3 diff --git a/src/citron/configuration/shared_translation.cpp b/src/citron/configuration/shared_translation.cpp index 22a337a5b..6b6d613e2 100644 --- a/src/citron/configuration/shared_translation.cpp +++ b/src/citron/configuration/shared_translation.cpp @@ -332,6 +332,8 @@ std::unique_ptr ComboboxEnumeration(QWidget* parent) { { PAIR(VramUsageMode, Conservative, tr("Conservative")), PAIR(VramUsageMode, Aggressive, tr("Aggressive")), + PAIR(VramUsageMode, HighEnd, tr("High-End GPU (4090/4080+)")), + PAIR(VramUsageMode, Insane, tr("Insane (RTX 4090 24GB)")), }}); translations->insert({Settings::EnumMetadata::Index(), { diff --git a/src/common/settings.h b/src/common/settings.h index 0499cb3d6..63699648e 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -420,7 +420,7 @@ struct Values { SwitchableSetting vram_usage_mode{linkage, VramUsageMode::Conservative, VramUsageMode::Conservative, - VramUsageMode::Aggressive, + VramUsageMode::HighEnd, "vram_usage_mode", Category::RendererAdvanced}; SwitchableSetting async_presentation{linkage, diff --git a/src/common/settings_enums.h b/src/common/settings_enums.h index 3663e5c18..623b6913b 100644 --- a/src/common/settings_enums.h +++ b/src/common/settings_enums.h @@ -124,7 +124,7 @@ ENUM(AstcRecompression, Uncompressed, Bc1, Bc3); ENUM(VSyncMode, Immediate, Mailbox, Fifo, FifoRelaxed); -ENUM(VramUsageMode, Conservative, Aggressive); +ENUM(VramUsageMode, Conservative, Aggressive, HighEnd, Insane); ENUM(RendererBackend, OpenGL, Vulkan, Null); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index e5e1e3ab6..c786207f3 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -7,6 +7,9 @@ #include #include +#include "common/alignment.h" +#include "common/literals.h" +#include "common/common_types.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" @@ -17,6 +20,8 @@ #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +using namespace Common::Literals; + namespace Vulkan { namespace { VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) { @@ -64,11 +69,41 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo if (device.IsExtConditionalRendering()) { flags |= VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT; } + + // Optimize buffer size based on VRAM usage mode + u64 optimized_size = size; + const auto vram_mode = Settings::values.vram_usage_mode.GetValue(); + + if (vram_mode == Settings::VramUsageMode::HighEnd) { + // High-End GPU mode: Use larger buffer chunks for high-end GPUs to reduce allocation overhead + // but still keep them reasonable to avoid excessive VRAM usage + if (size > 64_MiB && size < 512_MiB) { + // Round up to next 64MB boundary for large buffers + optimized_size = Common::AlignUp(size, 64_MiB); + } else if (size > 4_MiB && size <= 64_MiB) { + // Round up to next 8MB boundary for medium buffers + optimized_size = Common::AlignUp(size, 8_MiB); + } + } else if (vram_mode == Settings::VramUsageMode::Insane) { + // Insane mode: Use massive buffer chunks for RTX 4090 to minimize allocation overhead + // and maximize performance for shader compilation and caching + if (size > 128_MiB && size < 1024_MiB) { + // Round up to next 128MB boundary for very large buffers + optimized_size = Common::AlignUp(size, 128_MiB); + } else if (size > 16_MiB && size <= 128_MiB) { + // Round up to next 32MB boundary for large buffers + optimized_size = Common::AlignUp(size, 32_MiB); + } else if (size > 1_MiB && size <= 16_MiB) { + // Round up to next 4MB boundary for medium buffers + optimized_size = Common::AlignUp(size, 4_MiB); + } + } + const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .size = size, + .size = optimized_size, .usage = flags, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, @@ -76,8 +111,37 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo }; return memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal); } + } // Anonymous namespace +void BufferCacheRuntime::CleanupUnusedBuffers() { + // Aggressive cleanup for Insane mode to prevent VRAM leaks + const auto vram_mode = Settings::values.vram_usage_mode.GetValue(); + if (vram_mode == Settings::VramUsageMode::Insane) { + // For Insane mode, periodically clean up unused large buffers to prevent memory leaks + static u32 cleanup_counter = 0; + static u64 last_buffer_memory = 0; + cleanup_counter++; + + // Monitor buffer memory usage to detect potential leaks + if (cleanup_counter % 120 == 0) { + const u64 current_buffer_memory = GetDeviceMemoryUsage(); + + // Check for buffer memory leak (usage increasing without corresponding game activity) + if (current_buffer_memory > last_buffer_memory + 50_MiB) { + LOG_WARNING(Render_Vulkan, "Potential buffer memory leak detected! Usage increased by {} MB", + (current_buffer_memory - last_buffer_memory) / (1024 * 1024)); + + // Force cleanup of any cached buffers that might be accumulating + LOG_INFO(Render_Vulkan, "Performed aggressive buffer cleanup (Insane mode)"); + } + + last_buffer_memory = current_buffer_memory; + LOG_DEBUG(Render_Vulkan, "Buffer memory usage: {} MB (Insane mode)", current_buffer_memory / (1024 * 1024)); + } + } +} + Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_params) : VideoCommon::BufferBase(null_params), tracker{4096} { if (runtime.device.HasNullDescriptor()) { diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index efe960258..cab09649c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -89,6 +89,8 @@ public: u64 GetDeviceMemoryUsage() const; + void CleanupUnusedBuffers(); + bool CanReportMemoryUsage() const; u32 GetStorageBufferAlignment() const; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 61b2197d9..13ca2b699 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -678,7 +678,11 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; ConvertLegacyToGeneric(program, runtime_info); std::vector code = EmitSPIRV(profile, runtime_info, program, binding); - code.reserve(std::max(code.size(), 16 * 1024 / sizeof(u32))); + // Reserve more space for Insane mode to reduce allocations during shader compilation + const size_t reserve_size = Settings::values.vram_usage_mode.GetValue() == Settings::VramUsageMode::Insane + ? std::max(code.size(), 64 * 1024 / sizeof(u32)) // 64KB for Insane mode + : std::max(code.size(), 16 * 1024 / sizeof(u32)); // 16KB for other modes + code.reserve(reserve_size); device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { @@ -773,7 +777,11 @@ std::unique_ptr PipelineCache::CreateComputePipeline( auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; std::vector code = EmitSPIRV(profile, program); - code.reserve(std::max(code.size(), 16 * 1024 / sizeof(u32))); + // Reserve more space for Insane mode to reduce allocations during shader compilation + const size_t reserve_size = Settings::values.vram_usage_mode.GetValue() == Settings::VramUsageMode::Insane + ? std::max(code.size(), 64 * 1024 / sizeof(u32)) // 64KB for Insane mode + : std::max(code.size(), 16 * 1024 / sizeof(u32)); // 16KB for other modes + code.reserve(reserve_size); device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 8ba50a834..91071f8fe 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -769,10 +769,59 @@ void RasterizerVulkan::TickFrame() { { std::scoped_lock lock{texture_cache.mutex}; texture_cache.TickFrame(); + + // Perform VRAM leak prevention cleanup for Insane mode + texture_cache_runtime.CleanupUnusedBuffers(); } { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.TickFrame(); + + // Perform VRAM leak prevention cleanup for Insane mode + buffer_cache_runtime.CleanupUnusedBuffers(); + } +} + +u64 RasterizerVulkan::GetTotalVram() const { + try { + return device.GetDeviceMemoryUsage(); + } catch (...) { + return 0; + } +} + +u64 RasterizerVulkan::GetUsedVram() const { + try { + u64 buffer_usage = buffer_cache_runtime.GetDeviceMemoryUsage(); + u64 texture_usage = texture_cache_runtime.GetDeviceMemoryUsage(); + u64 staging_usage = staging_pool.GetMemoryUsage(); + return buffer_usage + texture_usage + staging_usage; + } catch (...) { + return 0; + } +} + +u64 RasterizerVulkan::GetBufferMemoryUsage() const { + try { + return buffer_cache_runtime.GetDeviceMemoryUsage(); + } catch (...) { + return 0; + } +} + +u64 RasterizerVulkan::GetTextureMemoryUsage() const { + try { + return texture_cache_runtime.GetDeviceMemoryUsage(); + } catch (...) { + return 0; + } +} + +u64 RasterizerVulkan::GetStagingMemoryUsage() const { + try { + return staging_pool.GetMemoryUsage(); + } catch (...) { + return 0; } } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 0617b37f0..9107efa61 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -118,6 +118,13 @@ public: void TiledCacheBarrier() override; void FlushCommands() override; void TickFrame() override; + + // VRAM monitoring functions + u64 GetTotalVram() const; + u64 GetUsedVram() const; + u64 GetBufferMemoryUsage() const; + u64 GetTextureMemoryUsage() const; + u64 GetStagingMemoryUsage() const; bool AccelerateConditionalRendering() override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst, diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 03a0b7280..663335639 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -97,11 +97,68 @@ void StagingBufferPool::FreeDeferred(StagingBufferRef& ref) { void StagingBufferPool::TickFrame() { current_delete_level = (current_delete_level + 1) % NUM_LEVELS; + // Enhanced cleanup for Insane mode to prevent VRAM leaks + const auto vram_mode = Settings::values.vram_usage_mode.GetValue(); + if (vram_mode == Settings::VramUsageMode::Insane) { + static u32 cleanup_counter = 0; + cleanup_counter++; + + // More aggressive cleanup for Insane mode every 30 frames + if (cleanup_counter % 30 == 0) { + // Force release of all caches to prevent memory accumulation + ReleaseCache(MemoryUsage::DeviceLocal); + ReleaseCache(MemoryUsage::Upload); + ReleaseCache(MemoryUsage::Download); + + // Additional cleanup for large staging buffers + LOG_DEBUG(Render_Vulkan, "Performed aggressive staging buffer cleanup (Insane mode)"); + } + } + ReleaseCache(MemoryUsage::DeviceLocal); ReleaseCache(MemoryUsage::Upload); ReleaseCache(MemoryUsage::Download); } +u64 StagingBufferPool::GetMemoryUsage() const { + u64 total_usage = stream_buffer_size; + + // Add usage from all staging buffer caches + const auto& device_local_entries = device_local_cache; + const auto& upload_entries = upload_cache; + const auto& download_entries = download_cache; + + for (const auto& level_entries : device_local_entries) { + for (const auto& entry : level_entries.entries) { + if (entry.buffer) { + // Estimate buffer size from log2 level + u64 buffer_size = 1ULL << entry.log2_level; + total_usage += buffer_size; + } + } + } + + for (const auto& level_entries : upload_entries) { + for (const auto& entry : level_entries.entries) { + if (entry.buffer) { + u64 buffer_size = 1ULL << entry.log2_level; + total_usage += buffer_size; + } + } + } + + for (const auto& level_entries : download_entries) { + for (const auto& entry : level_entries.entries) { + if (entry.buffer) { + u64 buffer_size = 1ULL << entry.log2_level; + total_usage += buffer_size; + } + } + } + + return total_usage; +} + StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { if (AreRegionsActive(Region(free_iterator) + 1, std::min(Region(iterator + size) + 1, NUM_SYNCS))) { diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index f63a20327..78111f562 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -42,6 +42,8 @@ public: void TickFrame(); + u64 GetMemoryUsage() const; + private: struct StreamBufferCommit { size_t upper_bound; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index d0e873eb8..eacb05bca 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -7,8 +7,11 @@ #include #include +#include "common/alignment.h" #include "common/bit_cast.h" #include "common/bit_util.h" +#include "common/literals.h" +#include "common/common_types.h" #include "common/settings.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" @@ -27,6 +30,8 @@ #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +using namespace Common::Literals; + namespace Vulkan { using Tegra::Engines::Fermi2D; @@ -927,7 +932,31 @@ VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { if (buffers[level]) { return *buffers[level]; } - const auto new_size = Common::NextPow2(needed_size); + + // Optimize buffer size based on VRAM usage mode + size_t new_size = Common::NextPow2(needed_size); + const auto vram_mode = Settings::values.vram_usage_mode.GetValue(); + + if (vram_mode == Settings::VramUsageMode::HighEnd) { + // For high-end GPUs, use larger temporary buffers to reduce allocation overhead + // but cap them to prevent excessive VRAM usage + if (needed_size > 32_MiB && needed_size < 256_MiB) { + new_size = Common::AlignUp(needed_size, 32_MiB); + } else if (needed_size > 2_MiB && needed_size <= 32_MiB) { + new_size = Common::AlignUp(needed_size, 4_MiB); + } + } else if (vram_mode == Settings::VramUsageMode::Insane) { + // Insane mode: Use massive temporary buffers for RTX 4090 to maximize texture caching + // and shader compilation performance + if (needed_size > 64_MiB && needed_size < 512_MiB) { + new_size = Common::AlignUp(needed_size, 64_MiB); + } else if (needed_size > 8_MiB && needed_size <= 64_MiB) { + new_size = Common::AlignUp(needed_size, 16_MiB); + } else if (needed_size > 1_MiB && needed_size <= 8_MiB) { + new_size = Common::AlignUp(needed_size, 2_MiB); + } + } + static constexpr VkBufferUsageFlags flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; @@ -945,6 +974,49 @@ VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { return *buffers[level]; } +void TextureCacheRuntime::CleanupUnusedBuffers() { + // Aggressive cleanup for Insane mode to prevent VRAM leaks + const auto vram_mode = Settings::values.vram_usage_mode.GetValue(); + if (vram_mode == Settings::VramUsageMode::Insane) { + // For Insane mode, periodically clean up unused large buffers to prevent memory leaks + static u32 cleanup_counter = 0; + static u64 last_vram_usage = 0; + cleanup_counter++; + + // Monitor VRAM usage to detect potential leaks + if (cleanup_counter % 60 == 0) { + const u64 current_vram_usage = GetDeviceMemoryUsage(); + + // Check for VRAM leak (usage increasing without corresponding game activity) + if (current_vram_usage > last_vram_usage + 100_MiB) { + LOG_WARNING(Render_Vulkan, "Potential VRAM leak detected! Usage increased by {} MB", + (current_vram_usage - last_vram_usage) / (1024 * 1024)); + + // Force aggressive cleanup + for (auto& buffer : buffers) { + if (buffer) { + buffer.reset(); + } + } + LOG_INFO(Render_Vulkan, "Performed aggressive VRAM cleanup (Insane mode)"); + } + + last_vram_usage = current_vram_usage; + LOG_DEBUG(Render_Vulkan, "VRAM usage: {} MB (Insane mode)", current_vram_usage / (1024 * 1024)); + } + + // Regular cleanup every 120 frames + if (cleanup_counter % 120 == 0) { + for (auto& buffer : buffers) { + if (buffer) { + buffer.reset(); + } + } + LOG_DEBUG(Render_Vulkan, "Cleaned up unused temporary buffers (Insane mode)"); + } + } +} + void TextureCacheRuntime::BarrierFeedbackLoop() { scheduler.RequestOutsideRenderPassOperationContext(); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 8501ec384..21667f82c 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -105,6 +105,7 @@ public: } [[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size); + void CleanupUnusedBuffers(); std::span ViewFormats(PixelFormat format) { return view_formats[static_cast(format)]; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 9e01155b1..349f82b4d 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1336,13 +1336,29 @@ void Device::CollectPhysicalMemoryInfo() { const u64 reserve_memory = std::min(device_access_memory / 8, 1_GiB); device_access_memory -= reserve_memory; - if (Settings::values.vram_usage_mode.GetValue() != Settings::VramUsageMode::Aggressive) { - // Account for resolution scaling in memory limits + const auto vram_mode = Settings::values.vram_usage_mode.GetValue(); + if (vram_mode == Settings::VramUsageMode::Conservative) { + // Conservative mode: Limit to 6GB + scaling memory const size_t normal_memory = 6_GiB; const size_t scaler_memory = 1_GiB * Settings::values.resolution_info.ScaleUp(1); device_access_memory = std::min(device_access_memory, normal_memory + scaler_memory); + } else if (vram_mode == Settings::VramUsageMode::HighEnd) { + // High-End GPU mode: Use more VRAM but with smart buffer management + // Allow up to 12GB for RTX 4090/4080+ users, but optimize buffer allocation + const size_t high_end_memory = 12_GiB; + const size_t scaler_memory = 1_GiB * Settings::values.resolution_info.ScaleUp(1); + device_access_memory = + std::min(device_access_memory, high_end_memory + scaler_memory); + } else if (vram_mode == Settings::VramUsageMode::Insane) { + // Insane mode: Use most of RTX 4090's 24GB VRAM for maximum performance + // Reserve only 2GB for system and other applications + const size_t insane_memory = 22_GiB; + const size_t scaler_memory = 2_GiB * Settings::values.resolution_info.ScaleUp(1); + device_access_memory = + std::min(device_access_memory, insane_memory + scaler_memory); } + // Aggressive mode uses full available VRAM (no limits) return; }