From 7db12d7e808b23f57442cc0a41aa24b06d36c1ff Mon Sep 17 00:00:00 2001 From: Zephyron Date: Sat, 13 Dec 2025 09:49:04 +1000 Subject: [PATCH] video_core: MCI boot fixes and DMA multisized components support Add workarounds for Marvel Cosmic Invasion boot issues: - Skip first 2 compute dispatches (xbzk@eden-emu.dev) - Clamp staging buffers to 2GB to prevent Vulkan failures (xbzk@eden-emu.dev) - Validate staging buffer sizes before uploads (xbzk@eden-emu.dev) Also improve DMA engine to support multisized components (1-4 bytes) instead of hardcoded 4-byte components. Co-authored-by: xbzk Signed-off-by: Zephyron --- src/citron/util/title_ids.h | 3 +- src/video_core/buffer_cache/buffer_cache.h | 11 +++++ src/video_core/engines/maxwell_dma.cpp | 48 +++++++++++++++---- .../renderer_vulkan/vk_rasterizer.cpp | 12 +++++ .../vk_staging_buffer_pool.cpp | 12 ++++- .../renderer_vulkan/vk_staging_buffer_pool.h | 6 +++ 6 files changed, 79 insertions(+), 13 deletions(-) diff --git a/src/citron/util/title_ids.h b/src/citron/util/title_ids.h index 049fc3081..7098ffc89 100644 --- a/src/citron/util/title_ids.h +++ b/src/citron/util/title_ids.h @@ -14,9 +14,8 @@ private: public: static constexpr u64 FinalFantasyTactics = 0x010038B015560000ULL; - // Base title ID for Little Nightmares 3 (covers both 0x010066101A55A800 and 0x010066101A55A000) - // The base title ID is obtained by masking with 0xFFFFFFFFFFFFE000 static constexpr u64 LittleNightmares3Base = 0x010066101A55A000ULL; + static constexpr u64 MarvelCosmicInvasion = 0x010059D020C26000ULL; }; } // namespace UICommon diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 0c230f1fc..9302d6ace 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -9,6 +9,7 @@ #include #include "common/range_sets.inc" +#include "citron/util/title_ids.h" #include "video_core/buffer_cache/buffer_cache_base.h" #include "video_core/guest_memory.h" #include "video_core/host1x/gpu_device_memory_manager.h" @@ -1512,6 +1513,16 @@ void BufferCache

::MappedUploadMemory([[maybe_unused]] Buffer& buffer, if constexpr (USE_MEMORY_MAPS) { auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); const std::span staging_pointer = upload_staging.mapped_span; + + // Validate staging buffer size to prevent buffer overruns + // This can happen if the requested size exceeds driver limits (e.g., 2GB) + // Only apply this workaround for Marvel Cosmic Invasion + if (program_id == UICommon::TitleID::MarvelCosmicInvasion && + staging_pointer.size() < total_size_bytes) { + // Staging buffer is too small, skip this upload to avoid corruption + return; + } + for (BufferCopy& copy : copies) { u8* const src_pointer = staging_pointer.data() + copy.src_offset; const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset; diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 2ebd21fc5..a62089528 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include "common/algorithm.h" @@ -104,19 +105,46 @@ void MaxwellDMA::Launch() { } } } else { - // TODO: allow multisized components. auto& accelerate = rasterizer->AccessAccelerateDMA(); const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A; if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { - ASSERT(regs.remap_const.component_size_minus_one == 3); - accelerate.BufferClear(regs.offset_out, regs.line_length_in, - regs.remap_const.remap_consta_value); - read_buffer.resize_destructive(regs.line_length_in * sizeof(u32)); - std::span span(reinterpret_cast(read_buffer.data()), regs.line_length_in); - std::ranges::fill(span, regs.remap_const.remap_consta_value); - memory_manager.WriteBlockUnsafe(regs.offset_out, - reinterpret_cast(read_buffer.data()), - regs.line_length_in * sizeof(u32)); + // Support multisized components (1-4 bytes per component) + // component_size_minus_one: 0=1 byte, 1=2 bytes, 2=3 bytes, 3=4 bytes + const u32 component_size = regs.remap_const.component_size_minus_one + 1; + const u32 num_dst_components = regs.remap_const.num_dst_components_minus_one + 1; + const u32 bytes_per_element = num_dst_components * component_size; + const u32 total_size = regs.line_length_in * bytes_per_element; + + // Use accelerated buffer clear if available and matches the simple case + // (4-byte components, single component per element) + if (component_size == sizeof(u32) && num_dst_components == 1) { + accelerate.BufferClear(regs.offset_out, regs.line_length_in, + regs.remap_const.remap_consta_value); + } + + // Prepare buffer with properly sized components + // Each element contains num_dst_components, each of component_size bytes + // The constant value is decomposed into bytes and written to each component + read_buffer.resize_destructive(total_size); + u8* const buffer_ptr = read_buffer.data(); + const u32 constant_value = regs.remap_const.remap_consta_value; + + // Fill buffer: for each element, write num_dst_components of component_size bytes + // Each component gets the same constant value, decomposed according to component_size + for (u32 element = 0; element < regs.line_length_in; ++element) { + u8* element_ptr = buffer_ptr + (element * bytes_per_element); + + // Write each component with the constant value + for (u32 comp = 0; comp < num_dst_components; ++comp) { + u8* component_ptr = element_ptr + (comp * component_size); + // Extract bytes from constant value in little-endian order + for (u32 byte = 0; byte < component_size; ++byte) { + component_ptr[byte] = static_cast((constant_value >> (byte * 8)) & 0xFF); + } + } + } + + memory_manager.WriteBlockUnsafe(regs.offset_out, buffer_ptr, total_size); } else { memory_manager.FlushCaching(); const auto convert_linear_2_blocklinear_addr = [](u64 address) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 31cfacef0..2e18a1d0c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -37,6 +38,7 @@ #include "video_core/texture_cache/texture_cache_base.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#include "citron/util/title_ids.h" namespace Vulkan { @@ -487,6 +489,15 @@ void RasterizerVulkan::Clear(u32 layer_count) { } void RasterizerVulkan::DispatchCompute() { + // Skip first 2 dispatches for Marvel Cosmic Invasion to fix boot issues + if (program_id == UICommon::TitleID::MarvelCosmicInvasion) { + static u32 dispatch_count = 0; + if (dispatch_count < 2) { + dispatch_count++; + return; + } + } + FlushWork(); gpu_memory->FlushCaching(); @@ -1604,6 +1615,7 @@ void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) { const s32 channel_id = channel.bind_id; + staging_pool.SetProgramId(channel.program_id); BindToChannel(channel_id); { std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 663335639..7e6d8d17d 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #include @@ -16,6 +17,7 @@ #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#include "citron/util/title_ids.h" namespace Vulkan { namespace { @@ -235,7 +237,15 @@ std::optional StagingBufferPool::TryGetReservedBuffer(size_t s StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage, bool deferred) { - const u32 log2 = Common::Log2Ceil64(size); + u32 log2 = Common::Log2Ceil64(size); + + // Only apply this workaround for Marvel Cosmic Invasion + if (program_id == UICommon::TitleID::MarvelCosmicInvasion) { + static constexpr u32 MAX_STAGING_BUFFER_LOG2 = 31U; + // Calculate log2 of requested size, but clamp to maximum to prevent overflow + // This ensures we still round up to the next power of 2, but cap at 2GB + log2 = std::min(log2, MAX_STAGING_BUFFER_LOG2); + } VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 78111f562..52b623ae1 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #pragma once @@ -44,6 +45,10 @@ public: u64 GetMemoryUsage() const; + void SetProgramId(u64 program_id_) { + program_id = program_id_; + } + private: struct StreamBufferCommit { size_t upper_bound; @@ -121,6 +126,7 @@ private: size_t current_delete_level = 0; u64 buffer_index = 0; u64 unique_ids{}; + u64 program_id{}; }; } // namespace Vulkan