mirror of
https://git.citron-emu.org/citron/emulator
synced 2025-12-20 11:03:56 +00:00
feat: add Low GPU Accuracy setting for maximum performance
Implements a new "Low" GPU accuracy level that prioritizes performance over accuracy by aggressively cutting corners in GPU emulation. Changes: - Add GpuAccuracy::Low enum and setting infrastructure - Implement IsGPULevelNormal() helper function - Skip texture cache checks and query operations - Use unsafe memory reads for DMA operations - Disable fence delays and query precision - Add UI support for desktop (Qt) and Android Performance optimizations: - Skips texture cache coherency checks (vk/gl_rasterizer.cpp) - Non-blocking query synchronization (query_cache.h) - Unsafe memory operations (dma_pusher.cpp) - No macro parameter refresh (maxwell_3d.cpp) - Immediate fence signaling (fence_manager.h) - Non-precise Vulkan queries (vk_query_cache.cpp) Ideal for lower-end hardware and users prioritizing FPS over accuracy. Works on both desktop and Android platforms. Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
@@ -74,6 +74,7 @@
|
||||
</integer-array>
|
||||
|
||||
<string-array name="rendererAccuracyNames">
|
||||
<item>@string/renderer_accuracy_low</item>
|
||||
<item>@string/renderer_accuracy_normal</item>
|
||||
<item>@string/renderer_accuracy_high</item>
|
||||
<item>@string/renderer_accuracy_extreme</item>
|
||||
@@ -83,6 +84,7 @@
|
||||
<item>0</item>
|
||||
<item>1</item>
|
||||
<item>2</item>
|
||||
<item>3</item>
|
||||
</integer-array>
|
||||
|
||||
<string-array name="rendererResolutionNames">
|
||||
|
||||
@@ -610,6 +610,7 @@
|
||||
<string name="renderer_none">None</string>
|
||||
|
||||
<!-- Renderer Accuracy -->
|
||||
<string name="renderer_accuracy_low">Low</string>
|
||||
<string name="renderer_accuracy_normal">Normal</string>
|
||||
<string name="renderer_accuracy_high">High</string>
|
||||
<string name="renderer_accuracy_extreme">Extreme (Slow)</string>
|
||||
|
||||
@@ -352,6 +352,7 @@ std::unique_ptr<ComboboxTranslationMap> ComboboxEnumeration(QWidget* parent) {
|
||||
}});
|
||||
translations->insert({Settings::EnumMetadata<Settings::GpuAccuracy>::Index(),
|
||||
{
|
||||
PAIR(GpuAccuracy, Low, tr("Low")),
|
||||
PAIR(GpuAccuracy, Normal, tr("Normal")),
|
||||
PAIR(GpuAccuracy, High, tr("High")),
|
||||
PAIR(GpuAccuracy, Extreme, tr("Extreme")),
|
||||
|
||||
@@ -51,6 +51,7 @@ static const std::map<Settings::ConsoleMode, QString> use_docked_mode_texts_map
|
||||
};
|
||||
|
||||
static const std::map<Settings::GpuAccuracy, QString> gpu_accuracy_texts_map = {
|
||||
{Settings::GpuAccuracy::Low, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Low"))},
|
||||
{Settings::GpuAccuracy::Normal, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Normal"))},
|
||||
{Settings::GpuAccuracy::High, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "High"))},
|
||||
{Settings::GpuAccuracy::Extreme, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Extreme"))},
|
||||
|
||||
@@ -3960,14 +3960,21 @@ void GMainWindow::OnToggleDockedMode() {
|
||||
|
||||
void GMainWindow::OnToggleGpuAccuracy() {
|
||||
switch (Settings::values.gpu_accuracy.GetValue()) {
|
||||
case Settings::GpuAccuracy::High: {
|
||||
case Settings::GpuAccuracy::Low: {
|
||||
Settings::values.gpu_accuracy.SetValue(Settings::GpuAccuracy::Normal);
|
||||
break;
|
||||
}
|
||||
case Settings::GpuAccuracy::Normal:
|
||||
case Settings::GpuAccuracy::Normal: {
|
||||
Settings::values.gpu_accuracy.SetValue(Settings::GpuAccuracy::High);
|
||||
break;
|
||||
}
|
||||
case Settings::GpuAccuracy::High: {
|
||||
Settings::values.gpu_accuracy.SetValue(Settings::GpuAccuracy::Low);
|
||||
break;
|
||||
}
|
||||
case Settings::GpuAccuracy::Extreme:
|
||||
default: {
|
||||
Settings::values.gpu_accuracy.SetValue(Settings::GpuAccuracy::High);
|
||||
Settings::values.gpu_accuracy.SetValue(Settings::GpuAccuracy::Normal);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -150,6 +150,12 @@ bool IsGPULevelHigh() {
|
||||
values.current_gpu_accuracy == GpuAccuracy::High;
|
||||
}
|
||||
|
||||
bool IsGPULevelNormal() {
|
||||
return values.current_gpu_accuracy == GpuAccuracy::Extreme ||
|
||||
values.current_gpu_accuracy == GpuAccuracy::High ||
|
||||
values.current_gpu_accuracy == GpuAccuracy::Normal;
|
||||
}
|
||||
|
||||
bool IsFastmemEnabled() {
|
||||
if (values.cpu_debug_mode) {
|
||||
return static_cast<bool>(values.cpuopt_fastmem);
|
||||
|
||||
@@ -393,7 +393,7 @@ struct Values {
|
||||
#else
|
||||
GpuAccuracy::High,
|
||||
#endif
|
||||
GpuAccuracy::Normal,
|
||||
GpuAccuracy::Low,
|
||||
GpuAccuracy::Extreme,
|
||||
"gpu_accuracy",
|
||||
Category::RendererAdvanced,
|
||||
@@ -661,6 +661,7 @@ extern Values values;
|
||||
void UpdateGPUAccuracy();
|
||||
bool IsGPULevelExtreme();
|
||||
bool IsGPULevelHigh();
|
||||
bool IsGPULevelNormal();
|
||||
|
||||
bool IsFastmemEnabled();
|
||||
void SetNceEnabled(bool is_64bit);
|
||||
|
||||
@@ -130,7 +130,7 @@ ENUM(RendererBackend, OpenGL, Vulkan, Null);
|
||||
|
||||
ENUM(ShaderBackend, Glsl, Glasm, SpirV);
|
||||
|
||||
ENUM(GpuAccuracy, Normal, High, Extreme);
|
||||
ENUM(GpuAccuracy, Low, Normal, High, Extreme);
|
||||
|
||||
ENUM(CpuBackend, Dynarmic, Nce);
|
||||
|
||||
|
||||
@@ -65,6 +65,8 @@ static const char* TranslateRenderer(Settings::RendererBackend backend) {
|
||||
|
||||
static const char* TranslateGPUAccuracyLevel(Settings::GpuAccuracy backend) {
|
||||
switch (backend) {
|
||||
case Settings::GpuAccuracy::Low:
|
||||
return "Low";
|
||||
case Settings::GpuAccuracy::Normal:
|
||||
return "Normal";
|
||||
case Settings::GpuAccuracy::High:
|
||||
|
||||
@@ -98,7 +98,8 @@ bool DmaPusher::Step() {
|
||||
&command_headers);
|
||||
ProcessCommands(headers);
|
||||
};
|
||||
if (Settings::IsGPULevelHigh()) {
|
||||
if (Settings::IsGPULevelNormal()) {
|
||||
// Normal/High/Extreme: Use safe reads for most operations
|
||||
if (dma_state.method >= MacroRegistersStart) {
|
||||
unsafe_process();
|
||||
return true;
|
||||
@@ -106,6 +107,10 @@ bool DmaPusher::Step() {
|
||||
safe_process();
|
||||
return true;
|
||||
}
|
||||
// Low accuracy: Use unsafe reads for maximum performance everywhere
|
||||
unsafe_process();
|
||||
return true;
|
||||
// Note: The code below is unreachable for Low, but kept for reference
|
||||
// Even in normal accuracy, use safe reads for KeplerCompute inline methods
|
||||
if (subchannel_type[dma_state.subchannel] == Engines::EngineTypes::KeplerCompute &&
|
||||
dma_state.method == ComputeInline) {
|
||||
|
||||
@@ -221,7 +221,8 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
|
||||
}
|
||||
|
||||
void Maxwell3D::RefreshParametersImpl() {
|
||||
if (!Settings::IsGPULevelHigh()) {
|
||||
if (!Settings::IsGPULevelNormal()) {
|
||||
// Skip parameter refresh for Low accuracy - ultimate performance
|
||||
return;
|
||||
}
|
||||
size_t current_index = 0;
|
||||
|
||||
@@ -72,7 +72,7 @@ public:
|
||||
}
|
||||
|
||||
void SignalFence(std::function<void()>&& func) {
|
||||
bool delay_fence = Settings::IsGPULevelHigh();
|
||||
bool delay_fence = Settings::IsGPULevelNormal();
|
||||
if constexpr (!can_async_check) {
|
||||
TryReleasePendingFences<false>();
|
||||
}
|
||||
|
||||
@@ -257,7 +257,7 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
|
||||
};
|
||||
u8* pointer = impl->device_memory.template GetPointer<u8>(cpu_addr);
|
||||
u8* pointer_timestamp = impl->device_memory.template GetPointer<u8>(cpu_addr + 8);
|
||||
bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
|
||||
bool is_synced = !Settings::IsGPULevelNormal() && is_fence;
|
||||
std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
|
||||
pointer, pointer_timestamp] {
|
||||
if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
|
||||
@@ -287,7 +287,8 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
|
||||
if (is_fence) {
|
||||
impl->rasterizer.SignalFence(std::move(operation));
|
||||
} else {
|
||||
if (!Settings::IsGPULevelHigh() && counter_type == QueryType::Payload) {
|
||||
if (!Settings::IsGPULevelNormal() && counter_type == QueryType::Payload) {
|
||||
// Low accuracy: Immediately write payload for ultimate performance
|
||||
if (has_timestamp) {
|
||||
u64 timestamp = impl->gpu.GetTicks();
|
||||
u64 value = static_cast<u64>(payload);
|
||||
|
||||
@@ -545,7 +545,8 @@ bool RasterizerOpenGL::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheT
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (!Settings::IsGPULevelHigh()) {
|
||||
if (!Settings::IsGPULevelNormal()) {
|
||||
// Skip texture cache checks for Low accuracy - ultimate performance
|
||||
return false;
|
||||
}
|
||||
if (True(which & VideoCommon::CacheType::TextureCache)) {
|
||||
@@ -740,7 +741,8 @@ bool RasterizerOpenGL::AccelerateConditionalRendering() {
|
||||
// Reimplement Host conditional rendering.
|
||||
return false;
|
||||
}
|
||||
// Medium / Low Hack: stub any checks on queries written into the buffer cache.
|
||||
// Normal / Low Hack: stub any checks on queries written into the buffer cache.
|
||||
// Low accuracy: Always stub for maximum performance
|
||||
const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()};
|
||||
Maxwell::ReportSemaphore::Compare cmp;
|
||||
if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp),
|
||||
|
||||
@@ -153,7 +153,7 @@ public:
|
||||
ReserveHostQuery();
|
||||
scheduler.Record([query_pool = current_query_pool,
|
||||
query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
|
||||
const bool use_precise = Settings::IsGPULevelHigh();
|
||||
const bool use_precise = Settings::IsGPULevelNormal();
|
||||
cmdbuf.BeginQuery(query_pool, static_cast<u32>(query_index),
|
||||
use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0);
|
||||
});
|
||||
@@ -1415,8 +1415,9 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool is_gpu_high = Settings::IsGPULevelHigh();
|
||||
const bool is_gpu_high = Settings::IsGPULevelNormal();
|
||||
if (!is_gpu_high && impl->device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
|
||||
// Low accuracy: stub conditional rendering on Intel for performance
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -565,7 +565,8 @@ bool RasterizerVulkan::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheT
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (!Settings::IsGPULevelHigh()) {
|
||||
if (!Settings::IsGPULevelNormal()) {
|
||||
// Skip texture cache checks for Low accuracy - ultimate performance
|
||||
return false;
|
||||
}
|
||||
if (True(which & VideoCommon::CacheType::TextureCache)) {
|
||||
|
||||
@@ -260,11 +260,14 @@ void Scheduler::AllocateNewContext() {
|
||||
if (query_cache) {
|
||||
#if ANDROID
|
||||
if (Settings::IsGPULevelHigh()) {
|
||||
// This is problematic on Android, disable on GPU Normal.
|
||||
// This is problematic on Android, disable on GPU Normal and Low.
|
||||
query_cache->NotifySegment(true);
|
||||
}
|
||||
#else
|
||||
if (Settings::IsGPULevelNormal()) {
|
||||
// Skip query cache operations for Low accuracy
|
||||
query_cache->NotifySegment(true);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@@ -278,13 +281,16 @@ void Scheduler::InvalidateState() {
|
||||
void Scheduler::EndPendingOperations() {
|
||||
#if ANDROID
|
||||
if (Settings::IsGPULevelHigh()) {
|
||||
// This is problematic on Android, disable on GPU Normal.
|
||||
// This is problematic on Android, disable on GPU Normal and Low.
|
||||
// query_cache->DisableStreams();
|
||||
}
|
||||
#else
|
||||
// query_cache->DisableStreams();
|
||||
#endif
|
||||
if (Settings::IsGPULevelNormal()) {
|
||||
// Skip query cache operations for Low accuracy
|
||||
query_cache->NotifySegment(false);
|
||||
}
|
||||
EndRenderPass();
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user