mirror of
https://git.citron-emu.org/citron/emulator
synced 2025-12-20 11:03:56 +00:00
feat: add Low GPU Accuracy setting for maximum performance
Implements a new "Low" GPU accuracy level that prioritizes performance over accuracy by aggressively cutting corners in GPU emulation. Changes: - Add GpuAccuracy::Low enum and setting infrastructure - Implement IsGPULevelNormal() helper function - Skip texture cache checks and query operations - Use unsafe memory reads for DMA operations - Disable fence delays and query precision - Add UI support for desktop (Qt) and Android Performance optimizations: - Skips texture cache coherency checks (vk/gl_rasterizer.cpp) - Non-blocking query synchronization (query_cache.h) - Unsafe memory operations (dma_pusher.cpp) - No macro parameter refresh (maxwell_3d.cpp) - Immediate fence signaling (fence_manager.h) - Non-precise Vulkan queries (vk_query_cache.cpp) Ideal for lower-end hardware and users prioritizing FPS over accuracy. Works on both desktop and Android platforms. Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
@@ -74,6 +74,7 @@
|
|||||||
</integer-array>
|
</integer-array>
|
||||||
|
|
||||||
<string-array name="rendererAccuracyNames">
|
<string-array name="rendererAccuracyNames">
|
||||||
|
<item>@string/renderer_accuracy_low</item>
|
||||||
<item>@string/renderer_accuracy_normal</item>
|
<item>@string/renderer_accuracy_normal</item>
|
||||||
<item>@string/renderer_accuracy_high</item>
|
<item>@string/renderer_accuracy_high</item>
|
||||||
<item>@string/renderer_accuracy_extreme</item>
|
<item>@string/renderer_accuracy_extreme</item>
|
||||||
@@ -83,6 +84,7 @@
|
|||||||
<item>0</item>
|
<item>0</item>
|
||||||
<item>1</item>
|
<item>1</item>
|
||||||
<item>2</item>
|
<item>2</item>
|
||||||
|
<item>3</item>
|
||||||
</integer-array>
|
</integer-array>
|
||||||
|
|
||||||
<string-array name="rendererResolutionNames">
|
<string-array name="rendererResolutionNames">
|
||||||
|
|||||||
@@ -610,6 +610,7 @@
|
|||||||
<string name="renderer_none">None</string>
|
<string name="renderer_none">None</string>
|
||||||
|
|
||||||
<!-- Renderer Accuracy -->
|
<!-- Renderer Accuracy -->
|
||||||
|
<string name="renderer_accuracy_low">Low</string>
|
||||||
<string name="renderer_accuracy_normal">Normal</string>
|
<string name="renderer_accuracy_normal">Normal</string>
|
||||||
<string name="renderer_accuracy_high">High</string>
|
<string name="renderer_accuracy_high">High</string>
|
||||||
<string name="renderer_accuracy_extreme">Extreme (Slow)</string>
|
<string name="renderer_accuracy_extreme">Extreme (Slow)</string>
|
||||||
|
|||||||
@@ -352,6 +352,7 @@ std::unique_ptr<ComboboxTranslationMap> ComboboxEnumeration(QWidget* parent) {
|
|||||||
}});
|
}});
|
||||||
translations->insert({Settings::EnumMetadata<Settings::GpuAccuracy>::Index(),
|
translations->insert({Settings::EnumMetadata<Settings::GpuAccuracy>::Index(),
|
||||||
{
|
{
|
||||||
|
PAIR(GpuAccuracy, Low, tr("Low")),
|
||||||
PAIR(GpuAccuracy, Normal, tr("Normal")),
|
PAIR(GpuAccuracy, Normal, tr("Normal")),
|
||||||
PAIR(GpuAccuracy, High, tr("High")),
|
PAIR(GpuAccuracy, High, tr("High")),
|
||||||
PAIR(GpuAccuracy, Extreme, tr("Extreme")),
|
PAIR(GpuAccuracy, Extreme, tr("Extreme")),
|
||||||
|
|||||||
@@ -51,6 +51,7 @@ static const std::map<Settings::ConsoleMode, QString> use_docked_mode_texts_map
|
|||||||
};
|
};
|
||||||
|
|
||||||
static const std::map<Settings::GpuAccuracy, QString> gpu_accuracy_texts_map = {
|
static const std::map<Settings::GpuAccuracy, QString> gpu_accuracy_texts_map = {
|
||||||
|
{Settings::GpuAccuracy::Low, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Low"))},
|
||||||
{Settings::GpuAccuracy::Normal, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Normal"))},
|
{Settings::GpuAccuracy::Normal, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Normal"))},
|
||||||
{Settings::GpuAccuracy::High, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "High"))},
|
{Settings::GpuAccuracy::High, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "High"))},
|
||||||
{Settings::GpuAccuracy::Extreme, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Extreme"))},
|
{Settings::GpuAccuracy::Extreme, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Extreme"))},
|
||||||
|
|||||||
@@ -3960,14 +3960,21 @@ void GMainWindow::OnToggleDockedMode() {
|
|||||||
|
|
||||||
void GMainWindow::OnToggleGpuAccuracy() {
|
void GMainWindow::OnToggleGpuAccuracy() {
|
||||||
switch (Settings::values.gpu_accuracy.GetValue()) {
|
switch (Settings::values.gpu_accuracy.GetValue()) {
|
||||||
case Settings::GpuAccuracy::High: {
|
case Settings::GpuAccuracy::Low: {
|
||||||
Settings::values.gpu_accuracy.SetValue(Settings::GpuAccuracy::Normal);
|
Settings::values.gpu_accuracy.SetValue(Settings::GpuAccuracy::Normal);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Settings::GpuAccuracy::Normal:
|
case Settings::GpuAccuracy::Normal: {
|
||||||
|
Settings::values.gpu_accuracy.SetValue(Settings::GpuAccuracy::High);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case Settings::GpuAccuracy::High: {
|
||||||
|
Settings::values.gpu_accuracy.SetValue(Settings::GpuAccuracy::Low);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case Settings::GpuAccuracy::Extreme:
|
case Settings::GpuAccuracy::Extreme:
|
||||||
default: {
|
default: {
|
||||||
Settings::values.gpu_accuracy.SetValue(Settings::GpuAccuracy::High);
|
Settings::values.gpu_accuracy.SetValue(Settings::GpuAccuracy::Normal);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -150,6 +150,12 @@ bool IsGPULevelHigh() {
|
|||||||
values.current_gpu_accuracy == GpuAccuracy::High;
|
values.current_gpu_accuracy == GpuAccuracy::High;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsGPULevelNormal() {
|
||||||
|
return values.current_gpu_accuracy == GpuAccuracy::Extreme ||
|
||||||
|
values.current_gpu_accuracy == GpuAccuracy::High ||
|
||||||
|
values.current_gpu_accuracy == GpuAccuracy::Normal;
|
||||||
|
}
|
||||||
|
|
||||||
bool IsFastmemEnabled() {
|
bool IsFastmemEnabled() {
|
||||||
if (values.cpu_debug_mode) {
|
if (values.cpu_debug_mode) {
|
||||||
return static_cast<bool>(values.cpuopt_fastmem);
|
return static_cast<bool>(values.cpuopt_fastmem);
|
||||||
|
|||||||
@@ -393,7 +393,7 @@ struct Values {
|
|||||||
#else
|
#else
|
||||||
GpuAccuracy::High,
|
GpuAccuracy::High,
|
||||||
#endif
|
#endif
|
||||||
GpuAccuracy::Normal,
|
GpuAccuracy::Low,
|
||||||
GpuAccuracy::Extreme,
|
GpuAccuracy::Extreme,
|
||||||
"gpu_accuracy",
|
"gpu_accuracy",
|
||||||
Category::RendererAdvanced,
|
Category::RendererAdvanced,
|
||||||
@@ -661,6 +661,7 @@ extern Values values;
|
|||||||
void UpdateGPUAccuracy();
|
void UpdateGPUAccuracy();
|
||||||
bool IsGPULevelExtreme();
|
bool IsGPULevelExtreme();
|
||||||
bool IsGPULevelHigh();
|
bool IsGPULevelHigh();
|
||||||
|
bool IsGPULevelNormal();
|
||||||
|
|
||||||
bool IsFastmemEnabled();
|
bool IsFastmemEnabled();
|
||||||
void SetNceEnabled(bool is_64bit);
|
void SetNceEnabled(bool is_64bit);
|
||||||
|
|||||||
@@ -130,7 +130,7 @@ ENUM(RendererBackend, OpenGL, Vulkan, Null);
|
|||||||
|
|
||||||
ENUM(ShaderBackend, Glsl, Glasm, SpirV);
|
ENUM(ShaderBackend, Glsl, Glasm, SpirV);
|
||||||
|
|
||||||
ENUM(GpuAccuracy, Normal, High, Extreme);
|
ENUM(GpuAccuracy, Low, Normal, High, Extreme);
|
||||||
|
|
||||||
ENUM(CpuBackend, Dynarmic, Nce);
|
ENUM(CpuBackend, Dynarmic, Nce);
|
||||||
|
|
||||||
|
|||||||
@@ -65,6 +65,8 @@ static const char* TranslateRenderer(Settings::RendererBackend backend) {
|
|||||||
|
|
||||||
static const char* TranslateGPUAccuracyLevel(Settings::GpuAccuracy backend) {
|
static const char* TranslateGPUAccuracyLevel(Settings::GpuAccuracy backend) {
|
||||||
switch (backend) {
|
switch (backend) {
|
||||||
|
case Settings::GpuAccuracy::Low:
|
||||||
|
return "Low";
|
||||||
case Settings::GpuAccuracy::Normal:
|
case Settings::GpuAccuracy::Normal:
|
||||||
return "Normal";
|
return "Normal";
|
||||||
case Settings::GpuAccuracy::High:
|
case Settings::GpuAccuracy::High:
|
||||||
|
|||||||
@@ -98,7 +98,8 @@ bool DmaPusher::Step() {
|
|||||||
&command_headers);
|
&command_headers);
|
||||||
ProcessCommands(headers);
|
ProcessCommands(headers);
|
||||||
};
|
};
|
||||||
if (Settings::IsGPULevelHigh()) {
|
if (Settings::IsGPULevelNormal()) {
|
||||||
|
// Normal/High/Extreme: Use safe reads for most operations
|
||||||
if (dma_state.method >= MacroRegistersStart) {
|
if (dma_state.method >= MacroRegistersStart) {
|
||||||
unsafe_process();
|
unsafe_process();
|
||||||
return true;
|
return true;
|
||||||
@@ -106,6 +107,10 @@ bool DmaPusher::Step() {
|
|||||||
safe_process();
|
safe_process();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
// Low accuracy: Use unsafe reads for maximum performance everywhere
|
||||||
|
unsafe_process();
|
||||||
|
return true;
|
||||||
|
// Note: The code below is unreachable for Low, but kept for reference
|
||||||
// Even in normal accuracy, use safe reads for KeplerCompute inline methods
|
// Even in normal accuracy, use safe reads for KeplerCompute inline methods
|
||||||
if (subchannel_type[dma_state.subchannel] == Engines::EngineTypes::KeplerCompute &&
|
if (subchannel_type[dma_state.subchannel] == Engines::EngineTypes::KeplerCompute &&
|
||||||
dma_state.method == ComputeInline) {
|
dma_state.method == ComputeInline) {
|
||||||
|
|||||||
@@ -221,7 +221,8 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Maxwell3D::RefreshParametersImpl() {
|
void Maxwell3D::RefreshParametersImpl() {
|
||||||
if (!Settings::IsGPULevelHigh()) {
|
if (!Settings::IsGPULevelNormal()) {
|
||||||
|
// Skip parameter refresh for Low accuracy - ultimate performance
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
size_t current_index = 0;
|
size_t current_index = 0;
|
||||||
|
|||||||
@@ -72,7 +72,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void SignalFence(std::function<void()>&& func) {
|
void SignalFence(std::function<void()>&& func) {
|
||||||
bool delay_fence = Settings::IsGPULevelHigh();
|
bool delay_fence = Settings::IsGPULevelNormal();
|
||||||
if constexpr (!can_async_check) {
|
if constexpr (!can_async_check) {
|
||||||
TryReleasePendingFences<false>();
|
TryReleasePendingFences<false>();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -257,7 +257,7 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
|
|||||||
};
|
};
|
||||||
u8* pointer = impl->device_memory.template GetPointer<u8>(cpu_addr);
|
u8* pointer = impl->device_memory.template GetPointer<u8>(cpu_addr);
|
||||||
u8* pointer_timestamp = impl->device_memory.template GetPointer<u8>(cpu_addr + 8);
|
u8* pointer_timestamp = impl->device_memory.template GetPointer<u8>(cpu_addr + 8);
|
||||||
bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
|
bool is_synced = !Settings::IsGPULevelNormal() && is_fence;
|
||||||
std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
|
std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
|
||||||
pointer, pointer_timestamp] {
|
pointer, pointer_timestamp] {
|
||||||
if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
|
if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
|
||||||
@@ -287,7 +287,8 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
|
|||||||
if (is_fence) {
|
if (is_fence) {
|
||||||
impl->rasterizer.SignalFence(std::move(operation));
|
impl->rasterizer.SignalFence(std::move(operation));
|
||||||
} else {
|
} else {
|
||||||
if (!Settings::IsGPULevelHigh() && counter_type == QueryType::Payload) {
|
if (!Settings::IsGPULevelNormal() && counter_type == QueryType::Payload) {
|
||||||
|
// Low accuracy: Immediately write payload for ultimate performance
|
||||||
if (has_timestamp) {
|
if (has_timestamp) {
|
||||||
u64 timestamp = impl->gpu.GetTicks();
|
u64 timestamp = impl->gpu.GetTicks();
|
||||||
u64 value = static_cast<u64>(payload);
|
u64 value = static_cast<u64>(payload);
|
||||||
|
|||||||
@@ -545,7 +545,8 @@ bool RasterizerOpenGL::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheT
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!Settings::IsGPULevelHigh()) {
|
if (!Settings::IsGPULevelNormal()) {
|
||||||
|
// Skip texture cache checks for Low accuracy - ultimate performance
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (True(which & VideoCommon::CacheType::TextureCache)) {
|
if (True(which & VideoCommon::CacheType::TextureCache)) {
|
||||||
@@ -740,7 +741,8 @@ bool RasterizerOpenGL::AccelerateConditionalRendering() {
|
|||||||
// Reimplement Host conditional rendering.
|
// Reimplement Host conditional rendering.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Medium / Low Hack: stub any checks on queries written into the buffer cache.
|
// Normal / Low Hack: stub any checks on queries written into the buffer cache.
|
||||||
|
// Low accuracy: Always stub for maximum performance
|
||||||
const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()};
|
const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()};
|
||||||
Maxwell::ReportSemaphore::Compare cmp;
|
Maxwell::ReportSemaphore::Compare cmp;
|
||||||
if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp),
|
if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp),
|
||||||
|
|||||||
@@ -153,7 +153,7 @@ public:
|
|||||||
ReserveHostQuery();
|
ReserveHostQuery();
|
||||||
scheduler.Record([query_pool = current_query_pool,
|
scheduler.Record([query_pool = current_query_pool,
|
||||||
query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
|
query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
|
||||||
const bool use_precise = Settings::IsGPULevelHigh();
|
const bool use_precise = Settings::IsGPULevelNormal();
|
||||||
cmdbuf.BeginQuery(query_pool, static_cast<u32>(query_index),
|
cmdbuf.BeginQuery(query_pool, static_cast<u32>(query_index),
|
||||||
use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0);
|
use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0);
|
||||||
});
|
});
|
||||||
@@ -1415,8 +1415,9 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const bool is_gpu_high = Settings::IsGPULevelHigh();
|
const bool is_gpu_high = Settings::IsGPULevelNormal();
|
||||||
if (!is_gpu_high && impl->device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
|
if (!is_gpu_high && impl->device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
|
||||||
|
// Low accuracy: stub conditional rendering on Intel for performance
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -565,7 +565,8 @@ bool RasterizerVulkan::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheT
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!Settings::IsGPULevelHigh()) {
|
if (!Settings::IsGPULevelNormal()) {
|
||||||
|
// Skip texture cache checks for Low accuracy - ultimate performance
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (True(which & VideoCommon::CacheType::TextureCache)) {
|
if (True(which & VideoCommon::CacheType::TextureCache)) {
|
||||||
|
|||||||
@@ -260,11 +260,14 @@ void Scheduler::AllocateNewContext() {
|
|||||||
if (query_cache) {
|
if (query_cache) {
|
||||||
#if ANDROID
|
#if ANDROID
|
||||||
if (Settings::IsGPULevelHigh()) {
|
if (Settings::IsGPULevelHigh()) {
|
||||||
// This is problematic on Android, disable on GPU Normal.
|
// This is problematic on Android, disable on GPU Normal and Low.
|
||||||
query_cache->NotifySegment(true);
|
query_cache->NotifySegment(true);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
query_cache->NotifySegment(true);
|
if (Settings::IsGPULevelNormal()) {
|
||||||
|
// Skip query cache operations for Low accuracy
|
||||||
|
query_cache->NotifySegment(true);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -278,13 +281,16 @@ void Scheduler::InvalidateState() {
|
|||||||
void Scheduler::EndPendingOperations() {
|
void Scheduler::EndPendingOperations() {
|
||||||
#if ANDROID
|
#if ANDROID
|
||||||
if (Settings::IsGPULevelHigh()) {
|
if (Settings::IsGPULevelHigh()) {
|
||||||
// This is problematic on Android, disable on GPU Normal.
|
// This is problematic on Android, disable on GPU Normal and Low.
|
||||||
// query_cache->DisableStreams();
|
// query_cache->DisableStreams();
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
// query_cache->DisableStreams();
|
// query_cache->DisableStreams();
|
||||||
#endif
|
#endif
|
||||||
query_cache->NotifySegment(false);
|
if (Settings::IsGPULevelNormal()) {
|
||||||
|
// Skip query cache operations for Low accuracy
|
||||||
|
query_cache->NotifySegment(false);
|
||||||
|
}
|
||||||
EndRenderPass();
|
EndRenderPass();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user