From 989049df079d6785da6f3714a178e0c79efaa60e Mon Sep 17 00:00:00 2001 From: Collecting Date: Sun, 28 Dec 2025 19:09:08 +0000 Subject: [PATCH] video_core: Optimize CDmaPusher command execution and logging Force inlining of the ExecuteCommand function to reduce CPU overhead in the GPU command processing hot path. Additionally, silence debug logging metadata within the function to prevent string-processing logic from blocking compiler optimizations. Includes safe guards for multi-compiler and cross-platform compatibility. Signed-off-by: Collecting --- src/video_core/cdma_pusher.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index 28a2d2090..f76dbb207 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Ryujinx Team and Contributors +// SPDX-FileCopyrightText: 2025 citron Emulator Project // SPDX-License-Identifier: MIT #include @@ -67,12 +68,20 @@ void CDmaPusher::ProcessEntries(ChCommandHeaderList&& entries) { } } +#if defined(__clang__) || defined(__GNUC__) +__attribute__((always_inline)) +#elif defined(_MSC_VER) +__forceinline +#endif void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { switch (current_class) { case ChClassId::NvDec: ThiStateWrite(nvdec_thi_state, offset, data); switch (static_cast(offset)) { case ThiMethod::IncSyncpt: { + #ifdef __clang__ + if constexpr (false) // Extreme Squeeze: Skip log metadata prep in hot path + #endif LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method"); const auto syncpoint_id = static_cast(data & 0xFF); const auto cond = static_cast((data >> 8) & 0xFF); @@ -85,6 +94,9 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { break; } case ThiMethod::SetMethod1: + #ifdef __clang__ + if constexpr (false) + #endif LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", static_cast(nvdec_thi_state.method_0)); nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data); @@ -97,6 +109,9 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { ThiStateWrite(vic_thi_state, static_cast(state_offset), {data}); switch (static_cast(state_offset)) { case ThiMethod::IncSyncpt: { + #ifdef __clang__ + if constexpr (false) + #endif LOG_DEBUG(Service_NVDRV, "VIC Class IncSyncpt Method"); const auto syncpoint_id = static_cast(data & 0xFF); const auto cond = static_cast((data >> 8) & 0xFF); @@ -109,6 +124,9 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { break; } case ThiMethod::SetMethod1: + #ifdef __clang__ + if constexpr (false) + #endif LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})", static_cast(vic_thi_state.method_0), data); vic_processor->ProcessMethod(static_cast(vic_thi_state.method_0), @@ -120,6 +138,9 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { break; case ChClassId::Control: // This device is mainly for syncpoint synchronization + #ifdef __clang__ + if constexpr (false) + #endif LOG_DEBUG(Service_NVDRV, "Host1X Class Method"); host1x_processor->ProcessMethod(static_cast(offset), data); break;