From aaa565203cbbfa8019e5e66ae85f84f65cf506e5 Mon Sep 17 00:00:00 2001 From: Zephyron Date: Wed, 31 Dec 2025 16:12:36 +1000 Subject: [PATCH] feat(renderer): add BCn unswizzle shader and build improvements - Add block_linear_unswizzle_3d_bcn.comp compute shader for BCn format support - Update host shaders CMakeLists to include new shader This adds a new compute shader for BCn format handling. Signed-off-by: Zephyron --- src/video_core/host_shaders/CMakeLists.txt | 1 + .../block_linear_unswizzle_3d_bcn.comp | 160 ++++++++++++++++++ 2 files changed, 161 insertions(+) create mode 100644 src/video_core/host_shaders/block_linear_unswizzle_3d_bcn.comp diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index fbc661065..a14010350 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -19,6 +19,7 @@ set(SHADER_FILES blit_color_float.frag block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp + block_linear_unswizzle_3d_bcn.comp convert_abgr8_to_d24s8.frag convert_abgr8_to_d32f.frag convert_d32f_to_abgr8.frag diff --git a/src/video_core/host_shaders/block_linear_unswizzle_3d_bcn.comp b/src/video_core/host_shaders/block_linear_unswizzle_3d_bcn.comp new file mode 100644 index 000000000..31879f291 --- /dev/null +++ b/src/video_core/host_shaders/block_linear_unswizzle_3d_bcn.comp @@ -0,0 +1,160 @@ +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 430 + +#ifdef VULKAN + #extension GL_EXT_shader_16bit_storage : require + #extension GL_EXT_shader_8bit_storage : require + #define HAS_EXTENDED_TYPES 1 + #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { + #define END_PUSH_CONSTANTS }; + #define UNIFORM(n) + #define BINDING_SWIZZLE_BUFFER 0 + #define BINDING_INPUT_BUFFER 1 + #define BINDING_OUTPUT_BUFFER 2 +#else + #extension GL_NV_gpu_shader5 : enable + #ifdef GL_NV_gpu_shader5 + #define HAS_EXTENDED_TYPES 1 + #else + #define HAS_EXTENDED_TYPES 0 + #endif + #define BEGIN_PUSH_CONSTANTS + #define END_PUSH_CONSTANTS + #define UNIFORM(n) layout(location = n) uniform + #define BINDING_SWIZZLE_BUFFER 0 + #define BINDING_INPUT_BUFFER 1 + #define BINDING_OUTPUT_BUFFER 0 +#endif + +// --- Push Constants / Uniforms --- +#ifdef VULKAN +layout(push_constant) uniform PushConstants { + uvec3 blocks_dim; // Offset 0 + uint bytes_per_block_log2; // Offset 12 + + uvec3 origin; // Offset 16 + uint slice_size; // Offset 28 + + uint block_size; // Offset 32 + uint x_shift; // Offset 36 + uint block_height; // Offset 40 + uint block_height_mask; // Offset 44 + + uint block_depth; // Offset 48 + uint block_depth_mask; // Offset 52 + int _pad; // Offset 56 + + ivec3 destination; // Offset 60 +} pc; +#else +BEGIN_PUSH_CONSTANTS + UNIFORM(0) uvec3 origin; + UNIFORM(1) ivec3 destination; + UNIFORM(2) uint bytes_per_block_log2; + UNIFORM(3) uint slice_size; + UNIFORM(4) uint block_size; + UNIFORM(5) uint x_shift; + UNIFORM(6) uint block_height; + UNIFORM(7) uint block_height_mask; + UNIFORM(8) uint block_depth; + UNIFORM(9) uint block_depth_mask; + UNIFORM(10) uvec3 blocks_dim; +END_PUSH_CONSTANTS +#define pc // Map pc prefix to nothing for OpenGL compatibility +#endif + +// --- Buffers --- +layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { + uint swizzle_table[]; +}; + +#if HAS_EXTENDED_TYPES + layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; }; + layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; }; +#endif +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; }; + +layout(binding = BINDING_OUTPUT_BUFFER, std430) buffer OutputBuffer { + uint out_u32[]; +}; + +// --- Constants --- +layout(local_size_x = 32, local_size_y = 8, local_size_z = 1) in; + +const uint GOB_SIZE_X = 64; +const uint GOB_SIZE_Y = 8; +const uint GOB_SIZE_Z = 1; +const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; + +const uint GOB_SIZE_X_SHIFT = 6; +const uint GOB_SIZE_Y_SHIFT = 3; +const uint GOB_SIZE_Z_SHIFT = 0; +const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; +const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1u, GOB_SIZE_Y - 1u); + +// --- Helpers --- +uint SwizzleOffset(uvec2 pos) { + pos &= SWIZZLE_MASK; + return swizzle_table[pos.y * 64u + pos.x]; +} + +uvec4 ReadTexel(uint offset) { + uint bpl2 = pc.bytes_per_block_log2; + switch (bpl2) { +#if HAS_EXTENDED_TYPES + case 0u: return uvec4(u8data[offset], 0u, 0u, 0u); + case 1u: return uvec4(u16data[offset / 2u], 0u, 0u, 0u); +#else + case 0u: return uvec4(bitfieldExtract(u32data[offset / 4u], int((offset * 8u) & 24u), 8), 0u, 0u, 0u); + case 1u: return uvec4(bitfieldExtract(u32data[offset / 4u], int((offset * 8u) & 16u), 16), 0u, 0u, 0u); +#endif + case 2u: return uvec4(u32data[offset / 4u], 0u, 0u, 0u); + case 3u: return uvec4(u64data[offset / 8u], 0u, 0u); + case 4u: return u128data[offset / 16u]; + } + return uvec4(0u); +} + +void main() { + uvec3 block_coord = gl_GlobalInvocationID; + if (any(greaterThanEqual(block_coord, pc.blocks_dim))) { + return; + } + + uint bytes_per_block = 1u << pc.bytes_per_block_log2; + // Origin is in pixels, divide by 4 for block-space (e.g. BCn formats) + uvec3 pos; + pos.x = (block_coord.x + (pc.origin.x >> 2u)) * bytes_per_block; + pos.y = block_coord.y + (pc.origin.y >> 2u); + pos.z = block_coord.z + pc.origin.z; + + uint swizzle = SwizzleOffset(pos.xy); + uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; + uint offset = 0u; + // Apply block-linear offsets + offset += (pos.z >> pc.block_depth) * pc.slice_size; + offset += (pos.z & pc.block_depth_mask) << (GOB_SIZE_SHIFT + pc.block_height); + offset += (block_y >> pc.block_height) * pc.block_size; + offset += (block_y & pc.block_height_mask) << GOB_SIZE_SHIFT; + offset += (pos.x >> GOB_SIZE_X_SHIFT) << pc.x_shift; + offset += swizzle; + + uvec4 texel = ReadTexel(offset); + + // Calculate linear output index + uint block_index = block_coord.x + + (block_coord.y * pc.blocks_dim.x) + + (block_coord.z * pc.blocks_dim.x * pc.blocks_dim.y); + uint out_idx = block_index * (bytes_per_block >> 2u); + + out_u32[out_idx] = texel.x; + out_u32[out_idx + 1] = texel.y; + if (pc.bytes_per_block_log2 == 4u) { + out_u32[out_idx + 2] = texel.z; + out_u32[out_idx + 3] = texel.w; + } +}