mirror of
https://git.citron-emu.org/citron/emulator
synced 2025-12-19 10:43:33 +00:00
fix(vulkan): support Android drivers without 8/16-bit storage features
Makes 8/16-bit storage buffer features optional and adds shader emulation when unavailable. Fixes Pokemon Legends: Z-A crashing on stock Qualcomm Adreno drivers. - Move storage8/16 from mandatory to recommended features - Add SPIR-V capability guards for workgroup memory 8/16-bit access - Implement int64 conversion lowering for compatibility - Separate cache version (v13) for emulated vs native storage - Update cache magic from 'yuzu' to 'citron' Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
@@ -28,7 +28,7 @@ std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count)
|
||||
} // Anonymous namespace
|
||||
|
||||
Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout && ctx.profile.support_int8) {
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
|
||||
@@ -39,7 +39,7 @@ Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
|
||||
}
|
||||
|
||||
Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout && ctx.profile.support_int8) {
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
|
||||
@@ -50,7 +50,7 @@ Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout && ctx.profile.support_int16) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
|
||||
} else {
|
||||
@@ -60,7 +60,7 @@ Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
|
||||
}
|
||||
|
||||
Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout && ctx.profile.support_int16) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
|
||||
} else {
|
||||
@@ -110,7 +110,7 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
|
||||
}
|
||||
|
||||
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout && ctx.profile.support_int8) {
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
|
||||
@@ -120,7 +120,7 @@ void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
|
||||
}
|
||||
|
||||
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout && ctx.profile.support_int16) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
|
||||
} else {
|
||||
|
||||
@@ -630,11 +630,12 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) {
|
||||
if (profile.support_explicit_workgroup_layout) {
|
||||
AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
|
||||
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
|
||||
if (program.info.uses_int8) {
|
||||
// Only add 8/16-bit workgroup capabilities if the device actually supports them
|
||||
if (program.info.uses_int8 && profile.support_int8) {
|
||||
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
|
||||
std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1);
|
||||
}
|
||||
if (program.info.uses_int16) {
|
||||
if (program.info.uses_int16 && profile.support_int16) {
|
||||
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
|
||||
std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2);
|
||||
}
|
||||
|
||||
@@ -181,11 +181,118 @@ void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) {
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
|
||||
}
|
||||
|
||||
void ConvertF16U64To32(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const auto value_pair = Unpack(ir, inst.Arg(0));
|
||||
// Convert low 32-bits to F16, high bits ignored
|
||||
const IR::F16 result = ir.ConvertUToF(16, 32, value_pair.first);
|
||||
inst.ReplaceUsesWith(result);
|
||||
}
|
||||
|
||||
void ConvertF32U64To32(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const auto value_pair = Unpack(ir, inst.Arg(0));
|
||||
// Convert low 32-bits to F32, high bits ignored
|
||||
const IR::F32 result = ir.ConvertUToF(32, 32, value_pair.first);
|
||||
inst.ReplaceUsesWith(result);
|
||||
}
|
||||
|
||||
void ConvertF64U64To32(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const auto value_pair = Unpack(ir, inst.Arg(0));
|
||||
// Convert low 32-bits to F64, high bits ignored
|
||||
const IR::F64 result = ir.ConvertUToF(64, 32, value_pair.first);
|
||||
inst.ReplaceUsesWith(result);
|
||||
}
|
||||
|
||||
void ConvertF16S64To32(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const auto value_pair = Unpack(ir, inst.Arg(0));
|
||||
// Convert low 32-bits to F16 as signed, high bits ignored
|
||||
const IR::F16 result = ir.ConvertSToF(16, 32, value_pair.first);
|
||||
inst.ReplaceUsesWith(result);
|
||||
}
|
||||
|
||||
void ConvertF32S64To32(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const auto value_pair = Unpack(ir, inst.Arg(0));
|
||||
// Convert low 32-bits to F32 as signed, high bits ignored
|
||||
const IR::F32 result = ir.ConvertSToF(32, 32, value_pair.first);
|
||||
inst.ReplaceUsesWith(result);
|
||||
}
|
||||
|
||||
void ConvertF64S64To32(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const auto value_pair = Unpack(ir, inst.Arg(0));
|
||||
// Convert low 32-bits to F64 as signed, high bits ignored
|
||||
const IR::F64 result = ir.ConvertSToF(64, 32, value_pair.first);
|
||||
inst.ReplaceUsesWith(result);
|
||||
}
|
||||
|
||||
void ConvertU64U32To32(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const IR::U32 value{inst.Arg(0)};
|
||||
// U32 to U64: zero-extend to U32x2
|
||||
const IR::Value result = ir.CompositeConstruct(value, ir.Imm32(0));
|
||||
inst.ReplaceUsesWith(result);
|
||||
}
|
||||
|
||||
void ConvertU32U64To32(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const auto value_pair = Unpack(ir, inst.Arg(0));
|
||||
// U64 to U32: take low 32-bits
|
||||
inst.ReplaceUsesWith(value_pair.first);
|
||||
}
|
||||
|
||||
void ConvertS64FTo32(IR::Block& block, IR::Inst& inst) {
|
||||
// Float to S64: convert to S32 and sign-extend
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const IR::F16F32F64 value{inst.Arg(0)};
|
||||
const IR::U32 low = ir.ConvertFToS(32, value);
|
||||
const IR::U32 high = ir.ShiftRightArithmetic(low, ir.Imm32(31)); // Sign extend
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(low, high));
|
||||
}
|
||||
|
||||
void ConvertU64FTo32(IR::Block& block, IR::Inst& inst) {
|
||||
// Float to U64: convert to U32 and zero-extend
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const IR::F16F32F64 value{inst.Arg(0)};
|
||||
const IR::U32 low = ir.ConvertFToU(32, value);
|
||||
const IR::U32 high = ir.Imm32(0); // Zero extend
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(low, high));
|
||||
}
|
||||
|
||||
void Lower(IR::Block& block, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::PackUint2x32:
|
||||
case IR::Opcode::UnpackUint2x32:
|
||||
return inst.ReplaceOpcode(IR::Opcode::Identity);
|
||||
// Conversion operations
|
||||
case IR::Opcode::ConvertF16U64:
|
||||
return ConvertF16U64To32(block, inst);
|
||||
case IR::Opcode::ConvertF32U64:
|
||||
return ConvertF32U64To32(block, inst);
|
||||
case IR::Opcode::ConvertF64U64:
|
||||
return ConvertF64U64To32(block, inst);
|
||||
case IR::Opcode::ConvertF16S64:
|
||||
return ConvertF16S64To32(block, inst);
|
||||
case IR::Opcode::ConvertF32S64:
|
||||
return ConvertF32S64To32(block, inst);
|
||||
case IR::Opcode::ConvertF64S64:
|
||||
return ConvertF64S64To32(block, inst);
|
||||
case IR::Opcode::ConvertU64U32:
|
||||
return ConvertU64U32To32(block, inst);
|
||||
case IR::Opcode::ConvertU32U64:
|
||||
return ConvertU32U64To32(block, inst);
|
||||
case IR::Opcode::ConvertS64F16:
|
||||
case IR::Opcode::ConvertS64F32:
|
||||
case IR::Opcode::ConvertS64F64:
|
||||
return ConvertS64FTo32(block, inst);
|
||||
case IR::Opcode::ConvertU64F16:
|
||||
case IR::Opcode::ConvertU64F32:
|
||||
case IR::Opcode::ConvertU64F64:
|
||||
return ConvertU64FTo32(block, inst);
|
||||
// Arithmetic operations
|
||||
case IR::Opcode::IAdd64:
|
||||
return IAdd64To32(block, inst);
|
||||
case IR::Opcode::ISub64:
|
||||
@@ -198,6 +305,7 @@ void Lower(IR::Block& block, IR::Inst& inst) {
|
||||
return ShiftRightLogical64To32(block, inst);
|
||||
case IR::Opcode::ShiftRightArithmetic64:
|
||||
return ShiftRightArithmetic64To32(block, inst);
|
||||
// Atomic operations
|
||||
case IR::Opcode::SharedAtomicExchange64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::SharedAtomicExchange32x2);
|
||||
case IR::Opcode::GlobalAtomicIAdd64:
|
||||
|
||||
@@ -54,8 +54,10 @@ using VideoCommon::FileEnvironment;
|
||||
using VideoCommon::GenericEnvironment;
|
||||
using VideoCommon::GraphicsEnvironment;
|
||||
|
||||
// Cache version is bumped when emulating 8/16-bit storage to avoid loading incompatible shaders
|
||||
constexpr u32 CACHE_VERSION = 11;
|
||||
constexpr std::array<char, 8> VULKAN_CACHE_MAGIC_NUMBER{'y', 'u', 'z', 'u', 'v', 'k', 'c', 'h'};
|
||||
constexpr u32 CACHE_VERSION_EMULATED_STORAGE = 13; // Bumped for int64 lowering fix
|
||||
constexpr std::array<char, 8> VULKAN_CACHE_MAGIC_NUMBER{'c', 'i', 't', 'r', 'o', 'n', 'v', 'k'};
|
||||
|
||||
template <typename Container>
|
||||
auto MakeSpan(Container& container) {
|
||||
@@ -321,8 +323,11 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||
.supported_spirv = device.SupportedSpirvVersion(),
|
||||
.unified_descriptor_binding = true,
|
||||
.support_descriptor_aliasing = device.IsDescriptorAliasingSupported(),
|
||||
.support_int8 = device.IsInt8Supported(),
|
||||
.support_int16 = device.IsShaderInt16Supported(),
|
||||
// Force int8/int16 emulation when storage buffer support is missing
|
||||
// Even if shader int8/int16 is supported, we need storage buffer support for buffer operations
|
||||
.support_int8 = device.Is8BitStorageSupported(),
|
||||
.support_int16 = device.Is16BitStorageSupported(),
|
||||
// Int64 support is independent of 8/16-bit storage - only check native capability
|
||||
.support_int64 = device.IsShaderInt64Supported(),
|
||||
.support_vertex_instance_id = false,
|
||||
.support_float_controls = device.IsKhrShaderFloatControlsSupported(),
|
||||
@@ -383,7 +388,10 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||
host_info = Shader::HostTranslateInfo{
|
||||
.support_float64 = device.IsFloat64Supported(),
|
||||
.support_float16 = device.IsFloat16Supported(),
|
||||
.support_int64 = device.IsShaderInt64Supported(),
|
||||
// Disable int64 support when emulating storage to ensure proper lowering
|
||||
.support_int64 = device.IsShaderInt64Supported() &&
|
||||
device.Is8BitStorageSupported() &&
|
||||
device.Is16BitStorageSupported(),
|
||||
.needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
|
||||
driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY,
|
||||
@@ -421,8 +429,11 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||
|
||||
PipelineCache::~PipelineCache() {
|
||||
if (use_vulkan_pipeline_cache && !vulkan_pipeline_cache_filename.empty()) {
|
||||
const u32 cache_ver = (!device.Is8BitStorageSupported() || !device.Is16BitStorageSupported())
|
||||
? CACHE_VERSION_EMULATED_STORAGE
|
||||
: CACHE_VERSION;
|
||||
SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache,
|
||||
CACHE_VERSION);
|
||||
cache_ver);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -482,8 +493,12 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
|
||||
|
||||
if (use_vulkan_pipeline_cache) {
|
||||
vulkan_pipeline_cache_filename = base_dir / "vulkan_pipelines.bin";
|
||||
// Use different cache version when emulating 8/16-bit storage to avoid loading invalid shaders
|
||||
const u32 cache_ver = (!device.Is8BitStorageSupported() || !device.Is16BitStorageSupported())
|
||||
? CACHE_VERSION_EMULATED_STORAGE
|
||||
: CACHE_VERSION;
|
||||
vulkan_pipeline_cache =
|
||||
LoadVulkanPipelineCache(vulkan_pipeline_cache_filename, CACHE_VERSION);
|
||||
LoadVulkanPipelineCache(vulkan_pipeline_cache_filename, cache_ver);
|
||||
}
|
||||
|
||||
struct {
|
||||
@@ -556,7 +571,11 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
|
||||
++state.total;
|
||||
++state.total_graphics;
|
||||
}};
|
||||
VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, CACHE_VERSION, load_compute,
|
||||
// Use different cache version when emulating 8/16-bit storage
|
||||
const u32 cache_ver = (!device.Is8BitStorageSupported() || !device.Is16BitStorageSupported())
|
||||
? CACHE_VERSION_EMULATED_STORAGE
|
||||
: CACHE_VERSION;
|
||||
VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, cache_ver, load_compute,
|
||||
load_graphics);
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Total Pipeline Count: {}", state.total);
|
||||
@@ -579,8 +598,9 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
|
||||
workers.WaitForRequests(stop_loading);
|
||||
|
||||
if (use_vulkan_pipeline_cache) {
|
||||
// Reuse cache_ver from above (already declared at line 571)
|
||||
SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache,
|
||||
CACHE_VERSION);
|
||||
cache_ver);
|
||||
}
|
||||
|
||||
if (state.statistics) {
|
||||
@@ -622,6 +642,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
||||
bool build_in_parallel) try {
|
||||
auto hash = key.Hash();
|
||||
LOG_INFO(Render_Vulkan, "0x{:016x}", hash);
|
||||
LOG_INFO(Render_Vulkan, "Creating graphics pipeline with {} stages", envs.size());
|
||||
size_t env_index{0};
|
||||
std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
|
||||
const bool uses_vertex_a{key.unique_hashes[0] != 0};
|
||||
@@ -685,14 +706,20 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
||||
|
||||
const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
|
||||
ConvertLegacyToGeneric(program, runtime_info);
|
||||
std::vector<u32> code = EmitSPIRV(profile, runtime_info, program, binding);
|
||||
// Reserve more space for Insane mode to reduce allocations during shader compilation
|
||||
const size_t reserve_size = Settings::values.vram_usage_mode.GetValue() == Settings::VramUsageMode::Insane
|
||||
? std::max<size_t>(code.size(), 64 * 1024 / sizeof(u32)) // 64KB for Insane mode
|
||||
: std::max<size_t>(code.size(), 16 * 1024 / sizeof(u32)); // 16KB for other modes
|
||||
code.reserve(reserve_size);
|
||||
device.SaveShader(code);
|
||||
modules[stage_index] = BuildShader(device, code);
|
||||
try {
|
||||
std::vector<u32> code = EmitSPIRV(profile, runtime_info, program, binding);
|
||||
// Reserve more space for Insane mode to reduce allocations during shader compilation
|
||||
const size_t reserve_size = Settings::values.vram_usage_mode.GetValue() == Settings::VramUsageMode::Insane
|
||||
? std::max<size_t>(code.size(), 64 * 1024 / sizeof(u32)) // 64KB for Insane mode
|
||||
: std::max<size_t>(code.size(), 16 * 1024 / sizeof(u32)); // 16KB for other modes
|
||||
code.reserve(reserve_size);
|
||||
device.SaveShader(code);
|
||||
modules[stage_index] = BuildShader(device, code);
|
||||
} catch (const std::exception& e) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to compile shader stage {} for pipeline 0x{:016x}: {}",
|
||||
index, hash, e.what());
|
||||
throw;
|
||||
}
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])};
|
||||
modules[stage_index].SetObjectNameEXT(name.c_str());
|
||||
@@ -741,7 +768,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
||||
env_ptrs.push_back(&envs[index]);
|
||||
}
|
||||
}
|
||||
SerializePipeline(key, env_ptrs, pipeline_cache_filename, CACHE_VERSION);
|
||||
const u32 cache_ver = (!device.Is8BitStorageSupported() || !device.Is16BitStorageSupported())
|
||||
? CACHE_VERSION_EMULATED_STORAGE
|
||||
: CACHE_VERSION;
|
||||
SerializePipeline(key, env_ptrs, pipeline_cache_filename, cache_ver);
|
||||
});
|
||||
return pipeline;
|
||||
}
|
||||
@@ -759,8 +789,11 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
|
||||
return pipeline;
|
||||
}
|
||||
serialization_thread.QueueWork([this, key, env_ = std::move(env)] {
|
||||
const u32 cache_ver = (!device.Is8BitStorageSupported() || !device.Is16BitStorageSupported())
|
||||
? CACHE_VERSION_EMULATED_STORAGE
|
||||
: CACHE_VERSION;
|
||||
SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env_},
|
||||
pipeline_cache_filename, CACHE_VERSION);
|
||||
pipeline_cache_filename, cache_ver);
|
||||
});
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
@@ -499,7 +499,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
if (is_qualcomm) {
|
||||
must_emulate_scaled_formats = true;
|
||||
|
||||
LOG_INFO(Render_Vulkan,
|
||||
LOG_INFO(Render_Vulkan,
|
||||
"Qualcomm Adreno drivers detected - enabling compatibility layer for extended dynamic state");
|
||||
enable_extended_dynamic_state_fallback = true;
|
||||
RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state,
|
||||
@@ -509,7 +509,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
"Qualcomm Adreno drivers detected - enabling compatibility layer for push descriptors");
|
||||
enable_push_descriptor_fallback = true;
|
||||
RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
||||
|
||||
|
||||
// Adreno driver compatibility for advanced shader features
|
||||
LOG_INFO(Render_Vulkan,
|
||||
"Qualcomm Adreno drivers detected - enabling compatibility layer for advanced shader features");
|
||||
@@ -521,6 +521,18 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
features.shader_atomic_int64.shaderSharedInt64Atomics = false;
|
||||
features.features.shaderInt64 = false;
|
||||
|
||||
// Check for 16-bit and 8-bit storage support
|
||||
if (!features.bit16_storage.uniformAndStorageBuffer16BitAccess ||
|
||||
!features.bit16_storage.storageBuffer16BitAccess) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Qualcomm Adreno driver missing 16-bit storage support - some games may have compatibility issues");
|
||||
}
|
||||
if (!features.bit8_storage.uniformAndStorageBuffer8BitAccess ||
|
||||
!features.bit8_storage.storageBuffer8BitAccess) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Qualcomm Adreno driver missing 8-bit storage support - some games may have compatibility issues");
|
||||
}
|
||||
|
||||
// Log detection of modern Adreno GPUs
|
||||
if (is_adreno8xx) {
|
||||
LOG_INFO(Render_Vulkan, "Detected Adreno 8xx series GPU (Snapdragon Elite) - using optimized driver settings");
|
||||
@@ -555,14 +567,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
if (is_arm) {
|
||||
must_emulate_scaled_formats = true;
|
||||
|
||||
LOG_INFO(Render_Vulkan,
|
||||
LOG_INFO(Render_Vulkan,
|
||||
"ARM Mali drivers detected - enabling compatibility layer for extended dynamic state");
|
||||
enable_extended_dynamic_state_fallback = true;
|
||||
RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state,
|
||||
VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
|
||||
|
||||
// Mali driver compatibility for advanced shader features
|
||||
LOG_INFO(Render_Vulkan,
|
||||
LOG_INFO(Render_Vulkan,
|
||||
"ARM Mali drivers detected - enabling compatibility layer for advanced shader features");
|
||||
enable_shader_int64_fallback = true;
|
||||
RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64,
|
||||
@@ -570,31 +582,31 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
features.shader_atomic_int64.shaderBufferInt64Atomics = false;
|
||||
features.shader_atomic_int64.shaderSharedInt64Atomics = false;
|
||||
features.features.shaderInt64 = false;
|
||||
|
||||
|
||||
LOG_INFO(Render_Vulkan, "ARM Mali drivers detected - enabling compatibility layer for custom border colors");
|
||||
enable_custom_border_color_fallback = true;
|
||||
RemoveExtensionFeature(extensions.custom_border_color, features.custom_border_color,
|
||||
VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
|
||||
// Samsung Xclipse driver compatibility layer
|
||||
const bool is_xclipse = driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY;
|
||||
if (is_xclipse) {
|
||||
must_emulate_scaled_formats = true;
|
||||
|
||||
LOG_INFO(Render_Vulkan,
|
||||
|
||||
LOG_INFO(Render_Vulkan,
|
||||
"Samsung Xclipse drivers detected - enabling compatibility layer for extended dynamic state");
|
||||
enable_extended_dynamic_state_fallback = true;
|
||||
RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state,
|
||||
VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
|
||||
|
||||
// Xclipse driver compatibility (AMD RDNA2-based with Samsung driver quirks)
|
||||
LOG_INFO(Render_Vulkan,
|
||||
LOG_INFO(Render_Vulkan,
|
||||
"Samsung Xclipse drivers detected - enabling comprehensive compatibility layer");
|
||||
|
||||
|
||||
// Compatibility layer for shader float controls (causes compilation issues)
|
||||
RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
|
||||
|
||||
|
||||
// Compatibility layer for 64-bit integer operations
|
||||
enable_shader_int64_fallback = true;
|
||||
RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64,
|
||||
@@ -602,12 +614,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
features.shader_atomic_int64.shaderBufferInt64Atomics = false;
|
||||
features.shader_atomic_int64.shaderSharedInt64Atomics = false;
|
||||
features.features.shaderInt64 = false;
|
||||
|
||||
|
||||
// Compatibility layer for custom border colors
|
||||
enable_custom_border_color_fallback = true;
|
||||
RemoveExtensionFeature(extensions.custom_border_color, features.custom_border_color,
|
||||
VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
|
||||
|
||||
|
||||
// Compatibility layer for push descriptors
|
||||
enable_push_descriptor_fallback = true;
|
||||
RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
||||
@@ -1330,12 +1342,18 @@ void Device::RemoveUnsuitableExtensions() {
|
||||
}
|
||||
|
||||
// VK_KHR_workgroup_memory_explicit_layout
|
||||
// This extension requires 8-bit and 16-bit storage support, so disable it on stock drivers
|
||||
extensions.workgroup_memory_explicit_layout =
|
||||
features.features.shaderInt16 &&
|
||||
features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout &&
|
||||
features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout8BitAccess &&
|
||||
features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout16BitAccess &&
|
||||
features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayoutScalarBlockLayout;
|
||||
features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayoutScalarBlockLayout &&
|
||||
// Also require storage buffer 8/16-bit support since the extension needs them
|
||||
features.bit8_storage.storageBuffer8BitAccess &&
|
||||
features.bit8_storage.uniformAndStorageBuffer8BitAccess &&
|
||||
features.bit16_storage.storageBuffer16BitAccess &&
|
||||
features.bit16_storage.uniformAndStorageBuffer16BitAccess;
|
||||
RemoveExtensionFeatureIfUnsuitable(extensions.workgroup_memory_explicit_layout,
|
||||
features.workgroup_memory_explicit_layout,
|
||||
VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
|
||||
|
||||
@@ -115,10 +115,6 @@ VK_DEFINE_HANDLE(VmaAllocator)
|
||||
|
||||
// Define features which must be supported.
|
||||
#define FOR_EACH_VK_MANDATORY_FEATURE(FEATURE_NAME) \
|
||||
FEATURE_NAME(bit16_storage, storageBuffer16BitAccess) \
|
||||
FEATURE_NAME(bit16_storage, uniformAndStorageBuffer16BitAccess) \
|
||||
FEATURE_NAME(bit8_storage, storageBuffer8BitAccess) \
|
||||
FEATURE_NAME(bit8_storage, uniformAndStorageBuffer8BitAccess) \
|
||||
FEATURE_NAME(features, depthBiasClamp) \
|
||||
FEATURE_NAME(features, depthClamp) \
|
||||
FEATURE_NAME(features, drawIndirectFirstInstance) \
|
||||
@@ -151,6 +147,10 @@ VK_DEFINE_HANDLE(VmaAllocator)
|
||||
|
||||
// Define features where the absence of the feature may result in a degraded experience.
|
||||
#define FOR_EACH_VK_RECOMMENDED_FEATURE(FEATURE_NAME) \
|
||||
FEATURE_NAME(bit16_storage, storageBuffer16BitAccess) \
|
||||
FEATURE_NAME(bit16_storage, uniformAndStorageBuffer16BitAccess) \
|
||||
FEATURE_NAME(bit8_storage, storageBuffer8BitAccess) \
|
||||
FEATURE_NAME(bit8_storage, uniformAndStorageBuffer8BitAccess) \
|
||||
FEATURE_NAME(custom_border_color, customBorderColors) \
|
||||
FEATURE_NAME(depth_bias_control, depthBiasControl) \
|
||||
FEATURE_NAME(depth_bias_control, leastRepresentableValueForceUnormRepresentation) \
|
||||
@@ -341,6 +341,18 @@ public:
|
||||
return features.shader_float16_int8.shaderInt8;
|
||||
}
|
||||
|
||||
/// Returns true if 16-bit storage buffer access is supported.
|
||||
bool Is16BitStorageSupported() const {
|
||||
return features.bit16_storage.storageBuffer16BitAccess &&
|
||||
features.bit16_storage.uniformAndStorageBuffer16BitAccess;
|
||||
}
|
||||
|
||||
/// Returns true if 8-bit storage buffer access is supported.
|
||||
bool Is8BitStorageSupported() const {
|
||||
return features.bit8_storage.storageBuffer8BitAccess &&
|
||||
features.bit8_storage.uniformAndStorageBuffer8BitAccess;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports binding multisample images as storage images.
|
||||
bool IsStorageImageMultisampleSupported() const {
|
||||
return features.features.shaderStorageImageMultisample;
|
||||
|
||||
Reference in New Issue
Block a user