fix(vulkan): support Android drivers without 8/16-bit storage features

Makes 8/16-bit storage buffer features optional and adds shader emulation
when unavailable. Fixes Pokemon Legends: Z-A crashing on stock Qualcomm
Adreno drivers.

- Move storage8/16 from mandatory to recommended features
- Add SPIR-V capability guards for workgroup memory 8/16-bit access
- Implement int64 conversion lowering for compatibility
- Separate cache version (v13) for emulated vs native storage
- Update cache magic from 'yuzu' to 'citron'

Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
Zephyron
2025-10-29 20:21:50 +10:00
parent ab09ef55ec
commit 275689a7c9
6 changed files with 218 additions and 46 deletions

View File

@@ -28,7 +28,7 @@ std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count)
} // Anonymous namespace } // Anonymous namespace
Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) { if (ctx.profile.support_explicit_workgroup_layout && ctx.profile.support_int8) {
const Id pointer{ const Id pointer{
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
@@ -39,7 +39,7 @@ Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
} }
Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) { if (ctx.profile.support_explicit_workgroup_layout && ctx.profile.support_int8) {
const Id pointer{ const Id pointer{
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
@@ -50,7 +50,7 @@ Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
} }
Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) { if (ctx.profile.support_explicit_workgroup_layout && ctx.profile.support_int16) {
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
} else { } else {
@@ -60,7 +60,7 @@ Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
} }
Id EmitLoadSharedS16(EmitContext& ctx, Id offset) { Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) { if (ctx.profile.support_explicit_workgroup_layout && ctx.profile.support_int16) {
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
} else { } else {
@@ -110,7 +110,7 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
} }
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
if (ctx.profile.support_explicit_workgroup_layout) { if (ctx.profile.support_explicit_workgroup_layout && ctx.profile.support_int8) {
const Id pointer{ const Id pointer{
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value)); ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
@@ -120,7 +120,7 @@ void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
} }
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
if (ctx.profile.support_explicit_workgroup_layout) { if (ctx.profile.support_explicit_workgroup_layout && ctx.profile.support_int16) {
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value)); ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
} else { } else {

View File

@@ -630,11 +630,12 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) {
if (profile.support_explicit_workgroup_layout) { if (profile.support_explicit_workgroup_layout) {
AddExtension("SPV_KHR_workgroup_memory_explicit_layout"); AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
if (program.info.uses_int8) { // Only add 8/16-bit workgroup capabilities if the device actually supports them
if (program.info.uses_int8 && profile.support_int8) {
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR); AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1); std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1);
} }
if (program.info.uses_int16) { if (program.info.uses_int16 && profile.support_int16) {
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2); std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2);
} }

View File

@@ -181,11 +181,118 @@ void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) {
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
} }
void ConvertF16U64To32(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const auto value_pair = Unpack(ir, inst.Arg(0));
// Convert low 32-bits to F16, high bits ignored
const IR::F16 result = ir.ConvertUToF(16, 32, value_pair.first);
inst.ReplaceUsesWith(result);
}
void ConvertF32U64To32(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const auto value_pair = Unpack(ir, inst.Arg(0));
// Convert low 32-bits to F32, high bits ignored
const IR::F32 result = ir.ConvertUToF(32, 32, value_pair.first);
inst.ReplaceUsesWith(result);
}
void ConvertF64U64To32(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const auto value_pair = Unpack(ir, inst.Arg(0));
// Convert low 32-bits to F64, high bits ignored
const IR::F64 result = ir.ConvertUToF(64, 32, value_pair.first);
inst.ReplaceUsesWith(result);
}
void ConvertF16S64To32(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const auto value_pair = Unpack(ir, inst.Arg(0));
// Convert low 32-bits to F16 as signed, high bits ignored
const IR::F16 result = ir.ConvertSToF(16, 32, value_pair.first);
inst.ReplaceUsesWith(result);
}
void ConvertF32S64To32(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const auto value_pair = Unpack(ir, inst.Arg(0));
// Convert low 32-bits to F32 as signed, high bits ignored
const IR::F32 result = ir.ConvertSToF(32, 32, value_pair.first);
inst.ReplaceUsesWith(result);
}
void ConvertF64S64To32(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const auto value_pair = Unpack(ir, inst.Arg(0));
// Convert low 32-bits to F64 as signed, high bits ignored
const IR::F64 result = ir.ConvertSToF(64, 32, value_pair.first);
inst.ReplaceUsesWith(result);
}
void ConvertU64U32To32(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const IR::U32 value{inst.Arg(0)};
// U32 to U64: zero-extend to U32x2
const IR::Value result = ir.CompositeConstruct(value, ir.Imm32(0));
inst.ReplaceUsesWith(result);
}
void ConvertU32U64To32(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const auto value_pair = Unpack(ir, inst.Arg(0));
// U64 to U32: take low 32-bits
inst.ReplaceUsesWith(value_pair.first);
}
void ConvertS64FTo32(IR::Block& block, IR::Inst& inst) {
// Float to S64: convert to S32 and sign-extend
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const IR::F16F32F64 value{inst.Arg(0)};
const IR::U32 low = ir.ConvertFToS(32, value);
const IR::U32 high = ir.ShiftRightArithmetic(low, ir.Imm32(31)); // Sign extend
inst.ReplaceUsesWith(ir.CompositeConstruct(low, high));
}
void ConvertU64FTo32(IR::Block& block, IR::Inst& inst) {
// Float to U64: convert to U32 and zero-extend
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const IR::F16F32F64 value{inst.Arg(0)};
const IR::U32 low = ir.ConvertFToU(32, value);
const IR::U32 high = ir.Imm32(0); // Zero extend
inst.ReplaceUsesWith(ir.CompositeConstruct(low, high));
}
void Lower(IR::Block& block, IR::Inst& inst) { void Lower(IR::Block& block, IR::Inst& inst) {
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::PackUint2x32: case IR::Opcode::PackUint2x32:
case IR::Opcode::UnpackUint2x32: case IR::Opcode::UnpackUint2x32:
return inst.ReplaceOpcode(IR::Opcode::Identity); return inst.ReplaceOpcode(IR::Opcode::Identity);
// Conversion operations
case IR::Opcode::ConvertF16U64:
return ConvertF16U64To32(block, inst);
case IR::Opcode::ConvertF32U64:
return ConvertF32U64To32(block, inst);
case IR::Opcode::ConvertF64U64:
return ConvertF64U64To32(block, inst);
case IR::Opcode::ConvertF16S64:
return ConvertF16S64To32(block, inst);
case IR::Opcode::ConvertF32S64:
return ConvertF32S64To32(block, inst);
case IR::Opcode::ConvertF64S64:
return ConvertF64S64To32(block, inst);
case IR::Opcode::ConvertU64U32:
return ConvertU64U32To32(block, inst);
case IR::Opcode::ConvertU32U64:
return ConvertU32U64To32(block, inst);
case IR::Opcode::ConvertS64F16:
case IR::Opcode::ConvertS64F32:
case IR::Opcode::ConvertS64F64:
return ConvertS64FTo32(block, inst);
case IR::Opcode::ConvertU64F16:
case IR::Opcode::ConvertU64F32:
case IR::Opcode::ConvertU64F64:
return ConvertU64FTo32(block, inst);
// Arithmetic operations
case IR::Opcode::IAdd64: case IR::Opcode::IAdd64:
return IAdd64To32(block, inst); return IAdd64To32(block, inst);
case IR::Opcode::ISub64: case IR::Opcode::ISub64:
@@ -198,6 +305,7 @@ void Lower(IR::Block& block, IR::Inst& inst) {
return ShiftRightLogical64To32(block, inst); return ShiftRightLogical64To32(block, inst);
case IR::Opcode::ShiftRightArithmetic64: case IR::Opcode::ShiftRightArithmetic64:
return ShiftRightArithmetic64To32(block, inst); return ShiftRightArithmetic64To32(block, inst);
// Atomic operations
case IR::Opcode::SharedAtomicExchange64: case IR::Opcode::SharedAtomicExchange64:
return inst.ReplaceOpcode(IR::Opcode::SharedAtomicExchange32x2); return inst.ReplaceOpcode(IR::Opcode::SharedAtomicExchange32x2);
case IR::Opcode::GlobalAtomicIAdd64: case IR::Opcode::GlobalAtomicIAdd64:

View File

@@ -54,8 +54,10 @@ using VideoCommon::FileEnvironment;
using VideoCommon::GenericEnvironment; using VideoCommon::GenericEnvironment;
using VideoCommon::GraphicsEnvironment; using VideoCommon::GraphicsEnvironment;
// Cache version is bumped when emulating 8/16-bit storage to avoid loading incompatible shaders
constexpr u32 CACHE_VERSION = 11; constexpr u32 CACHE_VERSION = 11;
constexpr std::array<char, 8> VULKAN_CACHE_MAGIC_NUMBER{'y', 'u', 'z', 'u', 'v', 'k', 'c', 'h'}; constexpr u32 CACHE_VERSION_EMULATED_STORAGE = 13; // Bumped for int64 lowering fix
constexpr std::array<char, 8> VULKAN_CACHE_MAGIC_NUMBER{'c', 'i', 't', 'r', 'o', 'n', 'v', 'k'};
template <typename Container> template <typename Container>
auto MakeSpan(Container& container) { auto MakeSpan(Container& container) {
@@ -321,8 +323,11 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.supported_spirv = device.SupportedSpirvVersion(), .supported_spirv = device.SupportedSpirvVersion(),
.unified_descriptor_binding = true, .unified_descriptor_binding = true,
.support_descriptor_aliasing = device.IsDescriptorAliasingSupported(), .support_descriptor_aliasing = device.IsDescriptorAliasingSupported(),
.support_int8 = device.IsInt8Supported(), // Force int8/int16 emulation when storage buffer support is missing
.support_int16 = device.IsShaderInt16Supported(), // Even if shader int8/int16 is supported, we need storage buffer support for buffer operations
.support_int8 = device.Is8BitStorageSupported(),
.support_int16 = device.Is16BitStorageSupported(),
// Int64 support is independent of 8/16-bit storage - only check native capability
.support_int64 = device.IsShaderInt64Supported(), .support_int64 = device.IsShaderInt64Supported(),
.support_vertex_instance_id = false, .support_vertex_instance_id = false,
.support_float_controls = device.IsKhrShaderFloatControlsSupported(), .support_float_controls = device.IsKhrShaderFloatControlsSupported(),
@@ -383,7 +388,10 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
host_info = Shader::HostTranslateInfo{ host_info = Shader::HostTranslateInfo{
.support_float64 = device.IsFloat64Supported(), .support_float64 = device.IsFloat64Supported(),
.support_float16 = device.IsFloat16Supported(), .support_float16 = device.IsFloat16Supported(),
.support_int64 = device.IsShaderInt64Supported(), // Disable int64 support when emulating storage to ensure proper lowering
.support_int64 = device.IsShaderInt64Supported() &&
device.Is8BitStorageSupported() &&
device.Is16BitStorageSupported(),
.needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY ||
driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY, driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY,
@@ -421,8 +429,11 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
PipelineCache::~PipelineCache() { PipelineCache::~PipelineCache() {
if (use_vulkan_pipeline_cache && !vulkan_pipeline_cache_filename.empty()) { if (use_vulkan_pipeline_cache && !vulkan_pipeline_cache_filename.empty()) {
const u32 cache_ver = (!device.Is8BitStorageSupported() || !device.Is16BitStorageSupported())
? CACHE_VERSION_EMULATED_STORAGE
: CACHE_VERSION;
SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache, SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache,
CACHE_VERSION); cache_ver);
} }
} }
@@ -482,8 +493,12 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
if (use_vulkan_pipeline_cache) { if (use_vulkan_pipeline_cache) {
vulkan_pipeline_cache_filename = base_dir / "vulkan_pipelines.bin"; vulkan_pipeline_cache_filename = base_dir / "vulkan_pipelines.bin";
// Use different cache version when emulating 8/16-bit storage to avoid loading invalid shaders
const u32 cache_ver = (!device.Is8BitStorageSupported() || !device.Is16BitStorageSupported())
? CACHE_VERSION_EMULATED_STORAGE
: CACHE_VERSION;
vulkan_pipeline_cache = vulkan_pipeline_cache =
LoadVulkanPipelineCache(vulkan_pipeline_cache_filename, CACHE_VERSION); LoadVulkanPipelineCache(vulkan_pipeline_cache_filename, cache_ver);
} }
struct { struct {
@@ -556,7 +571,11 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
++state.total; ++state.total;
++state.total_graphics; ++state.total_graphics;
}}; }};
VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, CACHE_VERSION, load_compute, // Use different cache version when emulating 8/16-bit storage
const u32 cache_ver = (!device.Is8BitStorageSupported() || !device.Is16BitStorageSupported())
? CACHE_VERSION_EMULATED_STORAGE
: CACHE_VERSION;
VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, cache_ver, load_compute,
load_graphics); load_graphics);
LOG_INFO(Render_Vulkan, "Total Pipeline Count: {}", state.total); LOG_INFO(Render_Vulkan, "Total Pipeline Count: {}", state.total);
@@ -579,8 +598,9 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
workers.WaitForRequests(stop_loading); workers.WaitForRequests(stop_loading);
if (use_vulkan_pipeline_cache) { if (use_vulkan_pipeline_cache) {
// Reuse cache_ver from above (already declared at line 571)
SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache, SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache,
CACHE_VERSION); cache_ver);
} }
if (state.statistics) { if (state.statistics) {
@@ -622,6 +642,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
bool build_in_parallel) try { bool build_in_parallel) try {
auto hash = key.Hash(); auto hash = key.Hash();
LOG_INFO(Render_Vulkan, "0x{:016x}", hash); LOG_INFO(Render_Vulkan, "0x{:016x}", hash);
LOG_INFO(Render_Vulkan, "Creating graphics pipeline with {} stages", envs.size());
size_t env_index{0}; size_t env_index{0};
std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
const bool uses_vertex_a{key.unique_hashes[0] != 0}; const bool uses_vertex_a{key.unique_hashes[0] != 0};
@@ -685,6 +706,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
ConvertLegacyToGeneric(program, runtime_info); ConvertLegacyToGeneric(program, runtime_info);
try {
std::vector<u32> code = EmitSPIRV(profile, runtime_info, program, binding); std::vector<u32> code = EmitSPIRV(profile, runtime_info, program, binding);
// Reserve more space for Insane mode to reduce allocations during shader compilation // Reserve more space for Insane mode to reduce allocations during shader compilation
const size_t reserve_size = Settings::values.vram_usage_mode.GetValue() == Settings::VramUsageMode::Insane const size_t reserve_size = Settings::values.vram_usage_mode.GetValue() == Settings::VramUsageMode::Insane
@@ -693,6 +715,11 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
code.reserve(reserve_size); code.reserve(reserve_size);
device.SaveShader(code); device.SaveShader(code);
modules[stage_index] = BuildShader(device, code); modules[stage_index] = BuildShader(device, code);
} catch (const std::exception& e) {
LOG_ERROR(Render_Vulkan, "Failed to compile shader stage {} for pipeline 0x{:016x}: {}",
index, hash, e.what());
throw;
}
if (device.HasDebuggingToolAttached()) { if (device.HasDebuggingToolAttached()) {
const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])}; const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])};
modules[stage_index].SetObjectNameEXT(name.c_str()); modules[stage_index].SetObjectNameEXT(name.c_str());
@@ -741,7 +768,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
env_ptrs.push_back(&envs[index]); env_ptrs.push_back(&envs[index]);
} }
} }
SerializePipeline(key, env_ptrs, pipeline_cache_filename, CACHE_VERSION); const u32 cache_ver = (!device.Is8BitStorageSupported() || !device.Is16BitStorageSupported())
? CACHE_VERSION_EMULATED_STORAGE
: CACHE_VERSION;
SerializePipeline(key, env_ptrs, pipeline_cache_filename, cache_ver);
}); });
return pipeline; return pipeline;
} }
@@ -759,8 +789,11 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
return pipeline; return pipeline;
} }
serialization_thread.QueueWork([this, key, env_ = std::move(env)] { serialization_thread.QueueWork([this, key, env_ = std::move(env)] {
const u32 cache_ver = (!device.Is8BitStorageSupported() || !device.Is16BitStorageSupported())
? CACHE_VERSION_EMULATED_STORAGE
: CACHE_VERSION;
SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env_}, SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env_},
pipeline_cache_filename, CACHE_VERSION); pipeline_cache_filename, cache_ver);
}); });
return pipeline; return pipeline;
} }

View File

@@ -521,6 +521,18 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
features.shader_atomic_int64.shaderSharedInt64Atomics = false; features.shader_atomic_int64.shaderSharedInt64Atomics = false;
features.features.shaderInt64 = false; features.features.shaderInt64 = false;
// Check for 16-bit and 8-bit storage support
if (!features.bit16_storage.uniformAndStorageBuffer16BitAccess ||
!features.bit16_storage.storageBuffer16BitAccess) {
LOG_WARNING(Render_Vulkan,
"Qualcomm Adreno driver missing 16-bit storage support - some games may have compatibility issues");
}
if (!features.bit8_storage.uniformAndStorageBuffer8BitAccess ||
!features.bit8_storage.storageBuffer8BitAccess) {
LOG_WARNING(Render_Vulkan,
"Qualcomm Adreno driver missing 8-bit storage support - some games may have compatibility issues");
}
// Log detection of modern Adreno GPUs // Log detection of modern Adreno GPUs
if (is_adreno8xx) { if (is_adreno8xx) {
LOG_INFO(Render_Vulkan, "Detected Adreno 8xx series GPU (Snapdragon Elite) - using optimized driver settings"); LOG_INFO(Render_Vulkan, "Detected Adreno 8xx series GPU (Snapdragon Elite) - using optimized driver settings");
@@ -1330,12 +1342,18 @@ void Device::RemoveUnsuitableExtensions() {
} }
// VK_KHR_workgroup_memory_explicit_layout // VK_KHR_workgroup_memory_explicit_layout
// This extension requires 8-bit and 16-bit storage support, so disable it on stock drivers
extensions.workgroup_memory_explicit_layout = extensions.workgroup_memory_explicit_layout =
features.features.shaderInt16 && features.features.shaderInt16 &&
features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout && features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout &&
features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout8BitAccess && features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout8BitAccess &&
features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout16BitAccess && features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout16BitAccess &&
features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayoutScalarBlockLayout; features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayoutScalarBlockLayout &&
// Also require storage buffer 8/16-bit support since the extension needs them
features.bit8_storage.storageBuffer8BitAccess &&
features.bit8_storage.uniformAndStorageBuffer8BitAccess &&
features.bit16_storage.storageBuffer16BitAccess &&
features.bit16_storage.uniformAndStorageBuffer16BitAccess;
RemoveExtensionFeatureIfUnsuitable(extensions.workgroup_memory_explicit_layout, RemoveExtensionFeatureIfUnsuitable(extensions.workgroup_memory_explicit_layout,
features.workgroup_memory_explicit_layout, features.workgroup_memory_explicit_layout,
VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);

View File

@@ -115,10 +115,6 @@ VK_DEFINE_HANDLE(VmaAllocator)
// Define features which must be supported. // Define features which must be supported.
#define FOR_EACH_VK_MANDATORY_FEATURE(FEATURE_NAME) \ #define FOR_EACH_VK_MANDATORY_FEATURE(FEATURE_NAME) \
FEATURE_NAME(bit16_storage, storageBuffer16BitAccess) \
FEATURE_NAME(bit16_storage, uniformAndStorageBuffer16BitAccess) \
FEATURE_NAME(bit8_storage, storageBuffer8BitAccess) \
FEATURE_NAME(bit8_storage, uniformAndStorageBuffer8BitAccess) \
FEATURE_NAME(features, depthBiasClamp) \ FEATURE_NAME(features, depthBiasClamp) \
FEATURE_NAME(features, depthClamp) \ FEATURE_NAME(features, depthClamp) \
FEATURE_NAME(features, drawIndirectFirstInstance) \ FEATURE_NAME(features, drawIndirectFirstInstance) \
@@ -151,6 +147,10 @@ VK_DEFINE_HANDLE(VmaAllocator)
// Define features where the absence of the feature may result in a degraded experience. // Define features where the absence of the feature may result in a degraded experience.
#define FOR_EACH_VK_RECOMMENDED_FEATURE(FEATURE_NAME) \ #define FOR_EACH_VK_RECOMMENDED_FEATURE(FEATURE_NAME) \
FEATURE_NAME(bit16_storage, storageBuffer16BitAccess) \
FEATURE_NAME(bit16_storage, uniformAndStorageBuffer16BitAccess) \
FEATURE_NAME(bit8_storage, storageBuffer8BitAccess) \
FEATURE_NAME(bit8_storage, uniformAndStorageBuffer8BitAccess) \
FEATURE_NAME(custom_border_color, customBorderColors) \ FEATURE_NAME(custom_border_color, customBorderColors) \
FEATURE_NAME(depth_bias_control, depthBiasControl) \ FEATURE_NAME(depth_bias_control, depthBiasControl) \
FEATURE_NAME(depth_bias_control, leastRepresentableValueForceUnormRepresentation) \ FEATURE_NAME(depth_bias_control, leastRepresentableValueForceUnormRepresentation) \
@@ -341,6 +341,18 @@ public:
return features.shader_float16_int8.shaderInt8; return features.shader_float16_int8.shaderInt8;
} }
/// Returns true if 16-bit storage buffer access is supported.
bool Is16BitStorageSupported() const {
return features.bit16_storage.storageBuffer16BitAccess &&
features.bit16_storage.uniformAndStorageBuffer16BitAccess;
}
/// Returns true if 8-bit storage buffer access is supported.
bool Is8BitStorageSupported() const {
return features.bit8_storage.storageBuffer8BitAccess &&
features.bit8_storage.uniformAndStorageBuffer8BitAccess;
}
/// Returns true if the device supports binding multisample images as storage images. /// Returns true if the device supports binding multisample images as storage images.
bool IsStorageImageMultisampleSupported() const { bool IsStorageImageMultisampleSupported() const {
return features.features.shaderStorageImageMultisample; return features.features.shaderStorageImageMultisample;