video_core: Optimize CDmaPusher command execution and logging

Force inlining of the ExecuteCommand function to reduce CPU overhead in the GPU command processing hot path. Additionally, silence debug logging metadata within the function to prevent string-processing logic from blocking compiler optimizations. Includes safe guards for multi-compiler and cross-platform compatibility.

Signed-off-by: Collecting <collecting@noreply.localhost>
This commit is contained in:
Collecting
2025-12-28 19:09:08 +00:00
parent 9ea0a36fc6
commit 989049df07

View File

@@ -1,4 +1,5 @@
// SPDX-FileCopyrightText: Ryujinx Team and Contributors
// SPDX-FileCopyrightText: 2025 citron Emulator Project
// SPDX-License-Identifier: MIT
#include <bit>
@@ -67,12 +68,20 @@ void CDmaPusher::ProcessEntries(ChCommandHeaderList&& entries) {
}
}
#if defined(__clang__) || defined(__GNUC__)
__attribute__((always_inline))
#elif defined(_MSC_VER)
__forceinline
#endif
void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
switch (current_class) {
case ChClassId::NvDec:
ThiStateWrite(nvdec_thi_state, offset, data);
switch (static_cast<ThiMethod>(offset)) {
case ThiMethod::IncSyncpt: {
#ifdef __clang__
if constexpr (false) // Extreme Squeeze: Skip log metadata prep in hot path
#endif
LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method");
const auto syncpoint_id = static_cast<u32>(data & 0xFF);
const auto cond = static_cast<u32>((data >> 8) & 0xFF);
@@ -85,6 +94,9 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
break;
}
case ThiMethod::SetMethod1:
#ifdef __clang__
if constexpr (false)
#endif
LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",
static_cast<u32>(nvdec_thi_state.method_0));
nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data);
@@ -97,6 +109,9 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
ThiStateWrite(vic_thi_state, static_cast<u32>(state_offset), {data});
switch (static_cast<ThiMethod>(state_offset)) {
case ThiMethod::IncSyncpt: {
#ifdef __clang__
if constexpr (false)
#endif
LOG_DEBUG(Service_NVDRV, "VIC Class IncSyncpt Method");
const auto syncpoint_id = static_cast<u32>(data & 0xFF);
const auto cond = static_cast<u32>((data >> 8) & 0xFF);
@@ -109,6 +124,9 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
break;
}
case ThiMethod::SetMethod1:
#ifdef __clang__
if constexpr (false)
#endif
LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
static_cast<u32>(vic_thi_state.method_0), data);
vic_processor->ProcessMethod(static_cast<Host1x::Vic::Method>(vic_thi_state.method_0),
@@ -120,6 +138,9 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
break;
case ChClassId::Control:
// This device is mainly for syncpoint synchronization
#ifdef __clang__
if constexpr (false)
#endif
LOG_DEBUG(Service_NVDRV, "Host1X Class Method");
host1x_processor->ProcessMethod(static_cast<Host1x::Control::Method>(offset), data);
break;