From 9bb2b769a491e35b34112dff3a06115c1c24e853 Mon Sep 17 00:00:00 2001 From: Zephyron Date: Sun, 11 May 2025 16:26:59 +1000 Subject: [PATCH 1/2] video_core: Use safe memory reads for KeplerCompute inline methods Changes: - Remove special case for KeplerCompute inline methods that used unsafe reads in high accuracy mode - Add special case to use safe reads for KeplerCompute inline methods even in normal accuracy mode Signed-off-by: Zephyron --- src/video_core/dma_pusher.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index fb2060ca4..dae02b12f 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -103,11 +103,12 @@ bool DmaPusher::Step() { unsafe_process(); return true; } - if (subchannel_type[dma_state.subchannel] == Engines::EngineTypes::KeplerCompute && - dma_state.method == ComputeInline) { - unsafe_process(); - return true; - } + safe_process(); + return true; + } + // Even in normal accuracy, use safe reads for KeplerCompute inline methods + if (subchannel_type[dma_state.subchannel] == Engines::EngineTypes::KeplerCompute && + dma_state.method == ComputeInline) { safe_process(); return true; } From 2e6bcc9ea64dad5b26dd4d038b61e1bf4eb73ae3 Mon Sep 17 00:00:00 2001 From: Zephyron Date: Sun, 11 May 2025 16:28:25 +1000 Subject: [PATCH 2/2] video_core, shader_recompiler: Improve Princess Peach: Showtime! support and performance 1. Add geometry shader support for Princess Peach: Showtime!: - Implement proper EmitInvocationInfo handling for geometry shaders - Support input topology vertex counting in all shader backends (GLASM, GLSL, SPIRV) 2. Performance optimizations: - Replace InputTopologyVertices switch statement with a constexpr lookup table - Pre-calculate vertex counts and shifts to reduce register pressure - Eliminate redundant calculations in shader backends Signed-off-by: Zephyron --- .../backend/glasm/emit_glasm_context_get_set.cpp | 7 +++++++ .../backend/glsl/emit_glsl_context_get_set.cpp | 6 ++++++ .../backend/spirv/emit_spirv_context_get_set.cpp | 5 +++++ src/shader_recompiler/runtime_info.h | 16 ++++++++++++++++ 4 files changed, 34 insertions(+) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index c7d7d5fef..061c2ea58 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -7,6 +7,7 @@ #include "shader_recompiler/backend/glasm/glasm_emit_context.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/shader_info.h" namespace Shader::Backend::GLASM { @@ -406,6 +407,12 @@ void EmitInvocationInfo(EmitContext& ctx, IR::Inst& inst) { case Stage::TessellationEval: ctx.Add("SHL.U {}.x,primitive.vertexcount,16;", inst); break; + case Stage::Geometry: { + // Pre-calculate the vertex count for better performance + const u32 vertex_count = InputTopologyVertices::vertices(ctx.runtime_info.input_topology) << 16; + ctx.Add("MOV.S {}.x,{};", inst, vertex_count); + break; + } default: LOG_WARNING(Shader, "(STUBBED) called"); ctx.Add("MOV.S {}.x,0x00ff0000;", inst); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 2e369ed72..63ccb2a1a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -426,6 +426,12 @@ void EmitInvocationInfo(EmitContext& ctx, IR::Inst& inst) { case Stage::TessellationEval: ctx.AddU32("{}=uint(gl_PatchVerticesIn)<<16;", inst); break; + case Stage::Geometry: { + // Pre-calculate the vertex count for better performance + const u32 vertex_count = InputTopologyVertices::vertices(ctx.runtime_info.input_topology); + ctx.AddU32("{}={}u;", inst, vertex_count << 16); + break; + } default: LOG_WARNING(Shader, "(STUBBED) called"); ctx.AddU32("{}=uint(0x00ff0000);", inst); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index feca5105f..36274ccfa 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -549,6 +549,11 @@ Id EmitInvocationInfo(EmitContext& ctx) { case Stage::TessellationEval: return ctx.OpShiftLeftLogical(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.patch_vertices_in), ctx.Const(16u)); + case Stage::Geometry: { + // Pre-calculate the shifted value for better performance + const u32 shifted_value = InputTopologyVertices::vertices(ctx.runtime_info.input_topology) << 16; + return ctx.Const(shifted_value); + } default: LOG_WARNING(Shader, "(STUBBED) called"); return ctx.Const(0x00ff0000u); diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 619c0b138..bc764e7af 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -30,6 +30,22 @@ enum class InputTopology { TrianglesAdjacency, }; +namespace InputTopologyVertices { + // Lookup table for vertex counts - faster than switch statement + inline constexpr std::array vertex_counts = { + 1, // Points + 2, // Lines + 4, // LinesAdjacency + 3, // Triangles + 6, // TrianglesAdjacency + }; + + // Force compile-time evaluation when possible + inline constexpr u32 vertices(InputTopology input_topology) { + return vertex_counts[static_cast(input_topology)]; + } +} + enum class CompareFunction { Never, Less,