diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp index df05dad74..682d74845 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -28,7 +28,7 @@ std::pair ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) } // Anonymous namespace Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { + if (ctx.profile.support_explicit_workgroup_layout && ctx.profile.support_int8) { const Id pointer{ ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); @@ -39,7 +39,7 @@ Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { } Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { + if (ctx.profile.support_explicit_workgroup_layout && ctx.profile.support_int8) { const Id pointer{ ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); @@ -50,7 +50,7 @@ Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { } Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { + if (ctx.profile.support_explicit_workgroup_layout && ctx.profile.support_int16) { const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); } else { @@ -60,7 +60,7 @@ Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { } Id EmitLoadSharedS16(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { + if (ctx.profile.support_explicit_workgroup_layout && ctx.profile.support_int16) { const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); } else { @@ -110,7 +110,7 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { } void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { - if (ctx.profile.support_explicit_workgroup_layout) { + if (ctx.profile.support_explicit_workgroup_layout && ctx.profile.support_int8) { const Id pointer{ ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value)); @@ -120,7 +120,7 @@ void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { } void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { - if (ctx.profile.support_explicit_workgroup_layout) { + if (ctx.profile.support_explicit_workgroup_layout && ctx.profile.support_int16) { const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value)); } else { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 13f6a97d2..8cba8d0f0 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -630,11 +630,12 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { if (profile.support_explicit_workgroup_layout) { AddExtension("SPV_KHR_workgroup_memory_explicit_layout"); AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); - if (program.info.uses_int8) { + // Only add 8/16-bit workgroup capabilities if the device actually supports them + if (program.info.uses_int8 && profile.support_int8) { AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR); std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1); } - if (program.info.uses_int16) { + if (program.info.uses_int16 && profile.support_int16) { AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2); } diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp index cdb58f46b..fc4e752e9 100644 --- a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp +++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp @@ -181,11 +181,118 @@ void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) { inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); } +void ConvertF16U64To32(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto value_pair = Unpack(ir, inst.Arg(0)); + // Convert low 32-bits to F16, high bits ignored + const IR::F16 result = ir.ConvertUToF(16, 32, value_pair.first); + inst.ReplaceUsesWith(result); +} + +void ConvertF32U64To32(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto value_pair = Unpack(ir, inst.Arg(0)); + // Convert low 32-bits to F32, high bits ignored + const IR::F32 result = ir.ConvertUToF(32, 32, value_pair.first); + inst.ReplaceUsesWith(result); +} + +void ConvertF64U64To32(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto value_pair = Unpack(ir, inst.Arg(0)); + // Convert low 32-bits to F64, high bits ignored + const IR::F64 result = ir.ConvertUToF(64, 32, value_pair.first); + inst.ReplaceUsesWith(result); +} + +void ConvertF16S64To32(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto value_pair = Unpack(ir, inst.Arg(0)); + // Convert low 32-bits to F16 as signed, high bits ignored + const IR::F16 result = ir.ConvertSToF(16, 32, value_pair.first); + inst.ReplaceUsesWith(result); +} + +void ConvertF32S64To32(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto value_pair = Unpack(ir, inst.Arg(0)); + // Convert low 32-bits to F32 as signed, high bits ignored + const IR::F32 result = ir.ConvertSToF(32, 32, value_pair.first); + inst.ReplaceUsesWith(result); +} + +void ConvertF64S64To32(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto value_pair = Unpack(ir, inst.Arg(0)); + // Convert low 32-bits to F64 as signed, high bits ignored + const IR::F64 result = ir.ConvertSToF(64, 32, value_pair.first); + inst.ReplaceUsesWith(result); +} + +void ConvertU64U32To32(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const IR::U32 value{inst.Arg(0)}; + // U32 to U64: zero-extend to U32x2 + const IR::Value result = ir.CompositeConstruct(value, ir.Imm32(0)); + inst.ReplaceUsesWith(result); +} + +void ConvertU32U64To32(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto value_pair = Unpack(ir, inst.Arg(0)); + // U64 to U32: take low 32-bits + inst.ReplaceUsesWith(value_pair.first); +} + +void ConvertS64FTo32(IR::Block& block, IR::Inst& inst) { + // Float to S64: convert to S32 and sign-extend + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const IR::F16F32F64 value{inst.Arg(0)}; + const IR::U32 low = ir.ConvertFToS(32, value); + const IR::U32 high = ir.ShiftRightArithmetic(low, ir.Imm32(31)); // Sign extend + inst.ReplaceUsesWith(ir.CompositeConstruct(low, high)); +} + +void ConvertU64FTo32(IR::Block& block, IR::Inst& inst) { + // Float to U64: convert to U32 and zero-extend + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const IR::F16F32F64 value{inst.Arg(0)}; + const IR::U32 low = ir.ConvertFToU(32, value); + const IR::U32 high = ir.Imm32(0); // Zero extend + inst.ReplaceUsesWith(ir.CompositeConstruct(low, high)); +} + void Lower(IR::Block& block, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::PackUint2x32: case IR::Opcode::UnpackUint2x32: return inst.ReplaceOpcode(IR::Opcode::Identity); + // Conversion operations + case IR::Opcode::ConvertF16U64: + return ConvertF16U64To32(block, inst); + case IR::Opcode::ConvertF32U64: + return ConvertF32U64To32(block, inst); + case IR::Opcode::ConvertF64U64: + return ConvertF64U64To32(block, inst); + case IR::Opcode::ConvertF16S64: + return ConvertF16S64To32(block, inst); + case IR::Opcode::ConvertF32S64: + return ConvertF32S64To32(block, inst); + case IR::Opcode::ConvertF64S64: + return ConvertF64S64To32(block, inst); + case IR::Opcode::ConvertU64U32: + return ConvertU64U32To32(block, inst); + case IR::Opcode::ConvertU32U64: + return ConvertU32U64To32(block, inst); + case IR::Opcode::ConvertS64F16: + case IR::Opcode::ConvertS64F32: + case IR::Opcode::ConvertS64F64: + return ConvertS64FTo32(block, inst); + case IR::Opcode::ConvertU64F16: + case IR::Opcode::ConvertU64F32: + case IR::Opcode::ConvertU64F64: + return ConvertU64FTo32(block, inst); + // Arithmetic operations case IR::Opcode::IAdd64: return IAdd64To32(block, inst); case IR::Opcode::ISub64: @@ -198,6 +305,7 @@ void Lower(IR::Block& block, IR::Inst& inst) { return ShiftRightLogical64To32(block, inst); case IR::Opcode::ShiftRightArithmetic64: return ShiftRightArithmetic64To32(block, inst); + // Atomic operations case IR::Opcode::SharedAtomicExchange64: return inst.ReplaceOpcode(IR::Opcode::SharedAtomicExchange32x2); case IR::Opcode::GlobalAtomicIAdd64: diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index fad8c2b51..1102afd9d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -54,8 +54,10 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; +// Cache version is bumped when emulating 8/16-bit storage to avoid loading incompatible shaders constexpr u32 CACHE_VERSION = 11; -constexpr std::array VULKAN_CACHE_MAGIC_NUMBER{'y', 'u', 'z', 'u', 'v', 'k', 'c', 'h'}; +constexpr u32 CACHE_VERSION_EMULATED_STORAGE = 13; // Bumped for int64 lowering fix +constexpr std::array VULKAN_CACHE_MAGIC_NUMBER{'c', 'i', 't', 'r', 'o', 'n', 'v', 'k'}; template auto MakeSpan(Container& container) { @@ -321,8 +323,11 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, .supported_spirv = device.SupportedSpirvVersion(), .unified_descriptor_binding = true, .support_descriptor_aliasing = device.IsDescriptorAliasingSupported(), - .support_int8 = device.IsInt8Supported(), - .support_int16 = device.IsShaderInt16Supported(), + // Force int8/int16 emulation when storage buffer support is missing + // Even if shader int8/int16 is supported, we need storage buffer support for buffer operations + .support_int8 = device.Is8BitStorageSupported(), + .support_int16 = device.Is16BitStorageSupported(), + // Int64 support is independent of 8/16-bit storage - only check native capability .support_int64 = device.IsShaderInt64Supported(), .support_vertex_instance_id = false, .support_float_controls = device.IsKhrShaderFloatControlsSupported(), @@ -383,7 +388,10 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, host_info = Shader::HostTranslateInfo{ .support_float64 = device.IsFloat64Supported(), .support_float16 = device.IsFloat16Supported(), - .support_int64 = device.IsShaderInt64Supported(), + // Disable int64 support when emulating storage to ensure proper lowering + .support_int64 = device.IsShaderInt64Supported() && + device.Is8BitStorageSupported() && + device.Is16BitStorageSupported(), .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE || driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY, @@ -421,8 +429,11 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, PipelineCache::~PipelineCache() { if (use_vulkan_pipeline_cache && !vulkan_pipeline_cache_filename.empty()) { + const u32 cache_ver = (!device.Is8BitStorageSupported() || !device.Is16BitStorageSupported()) + ? CACHE_VERSION_EMULATED_STORAGE + : CACHE_VERSION; SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache, - CACHE_VERSION); + cache_ver); } } @@ -482,8 +493,12 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading if (use_vulkan_pipeline_cache) { vulkan_pipeline_cache_filename = base_dir / "vulkan_pipelines.bin"; + // Use different cache version when emulating 8/16-bit storage to avoid loading invalid shaders + const u32 cache_ver = (!device.Is8BitStorageSupported() || !device.Is16BitStorageSupported()) + ? CACHE_VERSION_EMULATED_STORAGE + : CACHE_VERSION; vulkan_pipeline_cache = - LoadVulkanPipelineCache(vulkan_pipeline_cache_filename, CACHE_VERSION); + LoadVulkanPipelineCache(vulkan_pipeline_cache_filename, cache_ver); } struct { @@ -556,7 +571,11 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading ++state.total; ++state.total_graphics; }}; - VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, CACHE_VERSION, load_compute, + // Use different cache version when emulating 8/16-bit storage + const u32 cache_ver = (!device.Is8BitStorageSupported() || !device.Is16BitStorageSupported()) + ? CACHE_VERSION_EMULATED_STORAGE + : CACHE_VERSION; + VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, cache_ver, load_compute, load_graphics); LOG_INFO(Render_Vulkan, "Total Pipeline Count: {}", state.total); @@ -579,8 +598,9 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading workers.WaitForRequests(stop_loading); if (use_vulkan_pipeline_cache) { + // Reuse cache_ver from above (already declared at line 571) SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache, - CACHE_VERSION); + cache_ver); } if (state.statistics) { @@ -622,6 +642,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( bool build_in_parallel) try { auto hash = key.Hash(); LOG_INFO(Render_Vulkan, "0x{:016x}", hash); + LOG_INFO(Render_Vulkan, "Creating graphics pipeline with {} stages", envs.size()); size_t env_index{0}; std::array programs; const bool uses_vertex_a{key.unique_hashes[0] != 0}; @@ -685,14 +706,20 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; ConvertLegacyToGeneric(program, runtime_info); - std::vector code = EmitSPIRV(profile, runtime_info, program, binding); - // Reserve more space for Insane mode to reduce allocations during shader compilation - const size_t reserve_size = Settings::values.vram_usage_mode.GetValue() == Settings::VramUsageMode::Insane - ? std::max(code.size(), 64 * 1024 / sizeof(u32)) // 64KB for Insane mode - : std::max(code.size(), 16 * 1024 / sizeof(u32)); // 16KB for other modes - code.reserve(reserve_size); - device.SaveShader(code); - modules[stage_index] = BuildShader(device, code); + try { + std::vector code = EmitSPIRV(profile, runtime_info, program, binding); + // Reserve more space for Insane mode to reduce allocations during shader compilation + const size_t reserve_size = Settings::values.vram_usage_mode.GetValue() == Settings::VramUsageMode::Insane + ? std::max(code.size(), 64 * 1024 / sizeof(u32)) // 64KB for Insane mode + : std::max(code.size(), 16 * 1024 / sizeof(u32)); // 16KB for other modes + code.reserve(reserve_size); + device.SaveShader(code); + modules[stage_index] = BuildShader(device, code); + } catch (const std::exception& e) { + LOG_ERROR(Render_Vulkan, "Failed to compile shader stage {} for pipeline 0x{:016x}: {}", + index, hash, e.what()); + throw; + } if (device.HasDebuggingToolAttached()) { const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])}; modules[stage_index].SetObjectNameEXT(name.c_str()); @@ -741,7 +768,10 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { env_ptrs.push_back(&envs[index]); } } - SerializePipeline(key, env_ptrs, pipeline_cache_filename, CACHE_VERSION); + const u32 cache_ver = (!device.Is8BitStorageSupported() || !device.Is16BitStorageSupported()) + ? CACHE_VERSION_EMULATED_STORAGE + : CACHE_VERSION; + SerializePipeline(key, env_ptrs, pipeline_cache_filename, cache_ver); }); return pipeline; } @@ -759,8 +789,11 @@ std::unique_ptr PipelineCache::CreateComputePipeline( return pipeline; } serialization_thread.QueueWork([this, key, env_ = std::move(env)] { + const u32 cache_ver = (!device.Is8BitStorageSupported() || !device.Is16BitStorageSupported()) + ? CACHE_VERSION_EMULATED_STORAGE + : CACHE_VERSION; SerializePipeline(key, std::array{&env_}, - pipeline_cache_filename, CACHE_VERSION); + pipeline_cache_filename, cache_ver); }); return pipeline; } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 1de02cd72..c7f8421d5 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -499,7 +499,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR if (is_qualcomm) { must_emulate_scaled_formats = true; - LOG_INFO(Render_Vulkan, + LOG_INFO(Render_Vulkan, "Qualcomm Adreno drivers detected - enabling compatibility layer for extended dynamic state"); enable_extended_dynamic_state_fallback = true; RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state, @@ -509,7 +509,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR "Qualcomm Adreno drivers detected - enabling compatibility layer for push descriptors"); enable_push_descriptor_fallback = true; RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); - + // Adreno driver compatibility for advanced shader features LOG_INFO(Render_Vulkan, "Qualcomm Adreno drivers detected - enabling compatibility layer for advanced shader features"); @@ -521,6 +521,18 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR features.shader_atomic_int64.shaderSharedInt64Atomics = false; features.features.shaderInt64 = false; + // Check for 16-bit and 8-bit storage support + if (!features.bit16_storage.uniformAndStorageBuffer16BitAccess || + !features.bit16_storage.storageBuffer16BitAccess) { + LOG_WARNING(Render_Vulkan, + "Qualcomm Adreno driver missing 16-bit storage support - some games may have compatibility issues"); + } + if (!features.bit8_storage.uniformAndStorageBuffer8BitAccess || + !features.bit8_storage.storageBuffer8BitAccess) { + LOG_WARNING(Render_Vulkan, + "Qualcomm Adreno driver missing 8-bit storage support - some games may have compatibility issues"); + } + // Log detection of modern Adreno GPUs if (is_adreno8xx) { LOG_INFO(Render_Vulkan, "Detected Adreno 8xx series GPU (Snapdragon Elite) - using optimized driver settings"); @@ -555,14 +567,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR if (is_arm) { must_emulate_scaled_formats = true; - LOG_INFO(Render_Vulkan, + LOG_INFO(Render_Vulkan, "ARM Mali drivers detected - enabling compatibility layer for extended dynamic state"); enable_extended_dynamic_state_fallback = true; RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); - + // Mali driver compatibility for advanced shader features - LOG_INFO(Render_Vulkan, + LOG_INFO(Render_Vulkan, "ARM Mali drivers detected - enabling compatibility layer for advanced shader features"); enable_shader_int64_fallback = true; RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64, @@ -570,31 +582,31 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR features.shader_atomic_int64.shaderBufferInt64Atomics = false; features.shader_atomic_int64.shaderSharedInt64Atomics = false; features.features.shaderInt64 = false; - + LOG_INFO(Render_Vulkan, "ARM Mali drivers detected - enabling compatibility layer for custom border colors"); enable_custom_border_color_fallback = true; RemoveExtensionFeature(extensions.custom_border_color, features.custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); } - + // Samsung Xclipse driver compatibility layer const bool is_xclipse = driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY; if (is_xclipse) { must_emulate_scaled_formats = true; - - LOG_INFO(Render_Vulkan, + + LOG_INFO(Render_Vulkan, "Samsung Xclipse drivers detected - enabling compatibility layer for extended dynamic state"); enable_extended_dynamic_state_fallback = true; RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); - + // Xclipse driver compatibility (AMD RDNA2-based with Samsung driver quirks) - LOG_INFO(Render_Vulkan, + LOG_INFO(Render_Vulkan, "Samsung Xclipse drivers detected - enabling comprehensive compatibility layer"); - + // Compatibility layer for shader float controls (causes compilation issues) RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME); - + // Compatibility layer for 64-bit integer operations enable_shader_int64_fallback = true; RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64, @@ -602,12 +614,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR features.shader_atomic_int64.shaderBufferInt64Atomics = false; features.shader_atomic_int64.shaderSharedInt64Atomics = false; features.features.shaderInt64 = false; - + // Compatibility layer for custom border colors enable_custom_border_color_fallback = true; RemoveExtensionFeature(extensions.custom_border_color, features.custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); - + // Compatibility layer for push descriptors enable_push_descriptor_fallback = true; RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); @@ -1330,12 +1342,18 @@ void Device::RemoveUnsuitableExtensions() { } // VK_KHR_workgroup_memory_explicit_layout + // This extension requires 8-bit and 16-bit storage support, so disable it on stock drivers extensions.workgroup_memory_explicit_layout = features.features.shaderInt16 && features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout && features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout8BitAccess && features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout16BitAccess && - features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayoutScalarBlockLayout; + features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayoutScalarBlockLayout && + // Also require storage buffer 8/16-bit support since the extension needs them + features.bit8_storage.storageBuffer8BitAccess && + features.bit8_storage.uniformAndStorageBuffer8BitAccess && + features.bit16_storage.storageBuffer16BitAccess && + features.bit16_storage.uniformAndStorageBuffer16BitAccess; RemoveExtensionFeatureIfUnsuitable(extensions.workgroup_memory_explicit_layout, features.workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 6d9edfeb9..47a87ba38 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -115,10 +115,6 @@ VK_DEFINE_HANDLE(VmaAllocator) // Define features which must be supported. #define FOR_EACH_VK_MANDATORY_FEATURE(FEATURE_NAME) \ - FEATURE_NAME(bit16_storage, storageBuffer16BitAccess) \ - FEATURE_NAME(bit16_storage, uniformAndStorageBuffer16BitAccess) \ - FEATURE_NAME(bit8_storage, storageBuffer8BitAccess) \ - FEATURE_NAME(bit8_storage, uniformAndStorageBuffer8BitAccess) \ FEATURE_NAME(features, depthBiasClamp) \ FEATURE_NAME(features, depthClamp) \ FEATURE_NAME(features, drawIndirectFirstInstance) \ @@ -151,6 +147,10 @@ VK_DEFINE_HANDLE(VmaAllocator) // Define features where the absence of the feature may result in a degraded experience. #define FOR_EACH_VK_RECOMMENDED_FEATURE(FEATURE_NAME) \ + FEATURE_NAME(bit16_storage, storageBuffer16BitAccess) \ + FEATURE_NAME(bit16_storage, uniformAndStorageBuffer16BitAccess) \ + FEATURE_NAME(bit8_storage, storageBuffer8BitAccess) \ + FEATURE_NAME(bit8_storage, uniformAndStorageBuffer8BitAccess) \ FEATURE_NAME(custom_border_color, customBorderColors) \ FEATURE_NAME(depth_bias_control, depthBiasControl) \ FEATURE_NAME(depth_bias_control, leastRepresentableValueForceUnormRepresentation) \ @@ -341,6 +341,18 @@ public: return features.shader_float16_int8.shaderInt8; } + /// Returns true if 16-bit storage buffer access is supported. + bool Is16BitStorageSupported() const { + return features.bit16_storage.storageBuffer16BitAccess && + features.bit16_storage.uniformAndStorageBuffer16BitAccess; + } + + /// Returns true if 8-bit storage buffer access is supported. + bool Is8BitStorageSupported() const { + return features.bit8_storage.storageBuffer8BitAccess && + features.bit8_storage.uniformAndStorageBuffer8BitAccess; + } + /// Returns true if the device supports binding multisample images as storage images. bool IsStorageImageMultisampleSupported() const { return features.features.shaderStorageImageMultisample;