Merge pull request 'video_core: Add ETC2 texture compression format support' (#80) from feature/etc2-texture-compression-support into main

Reviewed-on: https://git.citron-emu.org/Citron/Emulator/pulls/80
This commit is contained in:
Zephyron
2025-12-31 04:57:54 +00:00
10 changed files with 172 additions and 59 deletions

View File

@@ -231,6 +231,12 @@ struct FormatTuple {
{VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5_UNORM
{VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB
{VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9_FLOAT
{VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK}, // ETC2_RGB_UNORM
{VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK}, // ETC2_RGBA_UNORM
{VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK}, // ETC2_RGB_PTA_UNORM
{VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK}, // ETC2_RGB_SRGB
{VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK}, // ETC2_RGBA_SRGB
{VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK}, // ETC2_RGB_PTA_SRGB
// Depth formats
{VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT
@@ -299,6 +305,15 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with
tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32;
}
}
// Transcode on hardware that doesn't support ETC2 natively (shouldn't happen on Vulkan 1.0)
if (!device.IsOptimalEtc2Supported() && VideoCore::Surface::IsPixelFormatETC2(pixel_format)) {
const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
if (is_srgb) {
tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32;
} else {
tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32;
}
}
const bool attachable = (tuple.usage & Attachable) != 0;
const bool storage = (tuple.usage & Storage) != 0;

View File

@@ -36,7 +36,7 @@ VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) {
switch (framebuffer.pixel_format) {
case Service::android::PixelFormat::Rgba8888:
case Service::android::PixelFormat::Rgbx8888:
return VK_FORMAT_R8G8B8A8_UNORM;
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
case Service::android::PixelFormat::Rgb565:
return VK_FORMAT_R5G6B5_UNORM_PACK16;
case Service::android::PixelFormat::Bgra8888:
@@ -44,7 +44,7 @@ VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) {
default:
UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
static_cast<u32>(framebuffer.pixel_format));
return VK_FORMAT_R8G8B8A8_UNORM;
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
}
}
@@ -284,43 +284,19 @@ void Layer::UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t i
const DAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
const u8* const host_ptr = device_memory.GetPointer<u8>(framebuffer_addr);
// Calculate appropriate block height based on texture format and size
// This is critical for proper texture swizzling
// TODO(Rodrigo): Read this from HLE
constexpr u32 block_height_log2 = 4;
const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
u32 block_height_log2 = 4; // Default for most formats
// Adjust block height for specific formats that cause corruption
if (framebuffer.pixel_format == Service::android::PixelFormat::Rgb565) {
block_height_log2 = 3; // RGB565 needs smaller block height
} else if (framebuffer.width <= 256 && framebuffer.height <= 256) {
block_height_log2 = 3; // Smaller textures need smaller blocks
}
const u64 linear_size{GetSizeInBytes(framebuffer)};
const u64 tiled_size{Tegra::Texture::CalculateSize(
true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
if (host_ptr && tiled_size > 0 && linear_size > 0) {
// Validate texture data before unswizzling to prevent corruption
const u64 max_size = static_cast<u64>(framebuffer.stride) * framebuffer.height * 4; // Max possible size
if (tiled_size <= max_size && linear_size <= max_size) {
Tegra::Texture::UnswizzleTexture(
mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size),
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
} else {
// Fallback: copy raw data without unswizzling if sizes are invalid
const u64 copy_size = std::min(linear_size, static_cast<u64>(mapped_span.size() - image_offset));
if (copy_size > 0) {
std::memcpy(mapped_span.data() + image_offset, host_ptr, copy_size);
}
}
if (host_ptr) {
Tegra::Texture::UnswizzleTexture(
mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size),
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
buffer.Flush(); // Ensure host writes are visible before the GPU copy.
}
// Validate framebuffer dimensions to prevent corruption
const u32 max_dimension = 8192; // Reasonable maximum for Switch games
const u32 safe_width = std::min(framebuffer.width, max_dimension);
const u32 safe_height = std::min(framebuffer.height, max_dimension);
const VkBufferImageCopy copy{
.bufferOffset = image_offset,
.bufferRowLength = 0,
@@ -335,22 +311,20 @@ void Layer::UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t i
.imageOffset = {.x = 0, .y = 0, .z = 0},
.imageExtent =
{
.width = safe_width,
.height = safe_height,
.width = framebuffer.width,
.height = framebuffer.height,
.depth = 1,
},
};
scheduler.Record([this, copy, index = image_index](vk::CommandBuffer cmdbuf) {
const VkImage image = *raw_images[index];
// Enhanced memory barriers to prevent texture corruption and flickering
const VkImageMemoryBarrier base_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = 0,
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
@@ -362,34 +336,24 @@ void Layer::UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t i
.layerCount = 1,
},
};
// Transition to transfer destination
VkImageMemoryBarrier read_barrier = base_barrier;
read_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT;
read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
read_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
// Transition to shader read
VkImageMemoryBarrier write_barrier = base_barrier;
write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
write_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
write_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
// Ensure all previous operations complete before transfer
cmdbuf.PipelineBarrier(
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0, {}, {}, {read_barrier});
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
read_barrier);
cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy);
// Ensure transfer completes before shader access
cmdbuf.PipelineBarrier(
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, {}, {}, {write_barrier});
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, write_barrier);
});
}

View File

@@ -878,6 +878,11 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched
if (IsPixelFormatASTC(image_format) && !device.IsOptimalAstcSupported()) {
view_formats[index_a].push_back(VK_FORMAT_A8B8G8R8_UNORM_PACK32);
}
if (IsPixelFormatETC2(image_format) && !device.IsOptimalEtc2Supported()) {
const bool is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_format);
view_formats[index_a].push_back(is_srgb ? VK_FORMAT_A8B8G8R8_SRGB_PACK32
: VK_FORMAT_A8B8G8R8_UNORM_PACK32);
}
for (size_t index_b = 0; index_b < VideoCore::Surface::MaxPixelFormat; index_b++) {
const auto view_format = static_cast<PixelFormat>(index_b);
if (VideoCore::Surface::IsViewCompatible(image_format, view_format, false, true)) {
@@ -1488,6 +1493,10 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
flags |= VideoCommon::ImageFlagBits::Converted;
flags |= VideoCommon::ImageFlagBits::CostlyLoad;
}
if (IsPixelFormatETC2(info.format) && !runtime->device.IsOptimalEtc2Supported()) {
flags |= VideoCommon::ImageFlagBits::Converted;
flags |= VideoCommon::ImageFlagBits::CostlyLoad;
}
if (runtime->device.HasDebuggingToolAttached()) {
original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
}

View File

@@ -6,6 +6,7 @@
#include <fstream>
#include <memory>
#include <optional>
#include <unordered_set>
#include <utility>
#include "common/assert.h"
@@ -274,7 +275,46 @@ std::optional<u64> GenericEnvironment::TryFindSize() {
Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit,
bool via_header_index, u32 raw) {
const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
ASSERT(handle.first <= tic_limit);
if (handle.first > tic_limit) {
// Common sentinel values that games use to indicate "no texture" or "unbound texture"
// 0xfffffff8 = -8 (signed), commonly used as a sentinel value
constexpr u32 COMMON_SENTINEL_VALUES[] = {0xfffffff8, 0xffffffff};
const bool is_sentinel = std::find(std::begin(COMMON_SENTINEL_VALUES),
std::end(COMMON_SENTINEL_VALUES), raw) !=
std::end(COMMON_SENTINEL_VALUES);
// Log each unique invalid handle only once to reduce spam
static std::unordered_set<u32> logged_handles;
const bool already_logged = logged_handles.contains(raw);
if (!already_logged) {
logged_handles.insert(raw);
if (is_sentinel) {
// Sentinel values are expected and not errors, use DEBUG level
LOG_DEBUG(HW_GPU,
"Texture handle sentinel value detected (likely unbound texture). "
"Raw handle: 0x{:08x}, via_header_index: {}",
raw, via_header_index);
} else {
// Unexpected invalid handles are warnings
LOG_WARNING(HW_GPU,
"Texture handle index {} exceeds TIC limit {}, clamping to valid range. "
"Raw handle: 0x{:08x}, via_header_index: {}",
handle.first, tic_limit, raw, via_header_index);
}
}
// Return a default TICEntry with a safe fallback format
Tegra::Texture::TICEntry entry{};
// Set to a known safe format (A8B8G8R8_UNORM) using Assign method
entry.format.Assign(Tegra::Texture::TextureFormat::A8B8G8R8);
entry.r_type.Assign(Tegra::Texture::ComponentType::UNORM);
entry.g_type.Assign(Tegra::Texture::ComponentType::UNORM);
entry.b_type.Assign(Tegra::Texture::ComponentType::UNORM);
entry.a_type.Assign(Tegra::Texture::ComponentType::UNORM);
entry.texture_type.Assign(Tegra::Texture::TextureType::Texture2D);
return entry;
}
const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
Tegra::Texture::TICEntry entry;
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));

View File

@@ -295,6 +295,20 @@ bool IsPixelFormatBCn(PixelFormat format) {
}
}
bool IsPixelFormatETC2(PixelFormat format) {
switch (format) {
case PixelFormat::ETC2_RGB_UNORM:
case PixelFormat::ETC2_RGBA_UNORM:
case PixelFormat::ETC2_RGB_PTA_UNORM:
case PixelFormat::ETC2_RGB_SRGB:
case PixelFormat::ETC2_RGBA_SRGB:
case PixelFormat::ETC2_RGB_PTA_SRGB:
return true;
default:
return false;
}
}
bool IsPixelFormatSRGB(PixelFormat format) {
switch (format) {
case PixelFormat::A8B8G8R8_SRGB:
@@ -303,6 +317,9 @@ bool IsPixelFormatSRGB(PixelFormat format) {
case PixelFormat::BC2_SRGB:
case PixelFormat::BC3_SRGB:
case PixelFormat::BC7_SRGB:
case PixelFormat::ETC2_RGB_SRGB:
case PixelFormat::ETC2_RGBA_SRGB:
case PixelFormat::ETC2_RGB_PTA_SRGB:
case PixelFormat::ASTC_2D_4X4_SRGB:
case PixelFormat::ASTC_2D_8X8_SRGB:
case PixelFormat::ASTC_2D_8X5_SRGB:

View File

@@ -109,6 +109,12 @@ enum class PixelFormat {
ASTC_2D_6X5_UNORM,
ASTC_2D_6X5_SRGB,
E5B9G9R9_FLOAT,
ETC2_RGB_UNORM,
ETC2_RGBA_UNORM,
ETC2_RGB_PTA_UNORM,
ETC2_RGB_SRGB,
ETC2_RGBA_SRGB,
ETC2_RGB_PTA_SRGB,
MaxColorFormat,
@@ -250,6 +256,12 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
6, // ASTC_2D_6X5_UNORM
6, // ASTC_2D_6X5_SRGB
1, // E5B9G9R9_FLOAT
4, // ETC2_RGB_UNORM
4, // ETC2_RGBA_UNORM
4, // ETC2_RGB_PTA_UNORM
4, // ETC2_RGB_SRGB
4, // ETC2_RGBA_SRGB
4, // ETC2_RGB_PTA_SRGB
1, // D32_FLOAT
1, // D16_UNORM
1, // X8_D24_UNORM
@@ -360,6 +372,12 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
5, // ASTC_2D_6X5_UNORM
5, // ASTC_2D_6X5_SRGB
1, // E5B9G9R9_FLOAT
4, // ETC2_RGB_UNORM
4, // ETC2_RGBA_UNORM
4, // ETC2_RGB_PTA_UNORM
4, // ETC2_RGB_SRGB
4, // ETC2_RGBA_SRGB
4, // ETC2_RGB_PTA_SRGB
1, // D32_FLOAT
1, // D16_UNORM
1, // X8_D24_UNORM
@@ -470,6 +488,12 @@ constexpr std::array<u8, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
128, // ASTC_2D_6X5_UNORM
128, // ASTC_2D_6X5_SRGB
32, // E5B9G9R9_FLOAT
64, // ETC2_RGB_UNORM
128, // ETC2_RGBA_UNORM
64, // ETC2_RGB_PTA_UNORM
64, // ETC2_RGB_SRGB
128, // ETC2_RGBA_SRGB
64, // ETC2_RGB_PTA_SRGB
32, // D32_FLOAT
16, // D16_UNORM
32, // X8_D24_UNORM
@@ -507,6 +531,8 @@ bool IsPixelFormatASTC(PixelFormat format);
bool IsPixelFormatBCn(PixelFormat format);
bool IsPixelFormatETC2(PixelFormat format);
bool IsPixelFormatSRGB(PixelFormat format);
bool IsPixelFormatInteger(PixelFormat format);

View File

@@ -19,6 +19,8 @@ constexpr auto UNORM = ComponentType::UNORM;
constexpr auto SINT = ComponentType::SINT;
constexpr auto UINT = ComponentType::UINT;
constexpr auto FLOAT = ComponentType::FLOAT;
constexpr auto SNORM_FORCE_FP16 = ComponentType::SNORM_FORCE_FP16;
constexpr auto UNORM_FORCE_FP16 = ComponentType::UNORM_FORCE_FP16;
constexpr bool LINEAR = false;
constexpr bool SRGB = true;
@@ -197,6 +199,14 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
return PixelFormat::BC6H_SFLOAT;
case Hash(TextureFormat::BC6H_U16, FLOAT):
return PixelFormat::BC6H_UFLOAT;
case Hash(TextureFormat::ETC2_RGB, UNORM):
return PixelFormat::ETC2_RGB_UNORM;
case Hash(TextureFormat::ETC2_RGB_PTA, UNORM):
return PixelFormat::ETC2_RGB_PTA_UNORM;
case Hash(TextureFormat::ETC2_RGB_PTA, UNORM, SRGB):
return PixelFormat::ETC2_RGB_PTA_SRGB;
case Hash(TextureFormat::ETC2_RGBA, UNORM):
return PixelFormat::ETC2_RGBA_UNORM;
case Hash(TextureFormat::ASTC_2D_4X4, UNORM, LINEAR):
return PixelFormat::ASTC_2D_4X4_UNORM;
case Hash(TextureFormat::ASTC_2D_4X4, UNORM, SRGB):
@@ -253,10 +263,22 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
return PixelFormat::ASTC_2D_6X5_UNORM;
case Hash(TextureFormat::ASTC_2D_6X5, UNORM, SRGB):
return PixelFormat::ASTC_2D_6X5_SRGB;
// Format 90 (0x5a): ETC2_RGB_SRGB with components {UINT, UNORM_FORCE_FP16, UNORM, UNORM}
// ETC2 compressed formats can have unusual component type combinations, but the format itself
// determines the actual compression scheme
case Hash(static_cast<TextureFormat>(0x5a), UINT, UNORM_FORCE_FP16, UNORM, UNORM, SRGB):
return PixelFormat::ETC2_RGB_SRGB;
// Format 99 (0x63): ETC2_RGBA_SRGB with components {0, SNORM_FORCE_FP16, SINT, SNORM_FORCE_FP16}
// Component 0 is a swizzle source (Zero), not a ComponentType, but we handle it by checking
// the hash with component 0 explicitly. The hash for component 0 will be 0 << 1 = 0.
case Hash(static_cast<TextureFormat>(0x63), static_cast<ComponentType>(0), SNORM_FORCE_FP16, SINT, SNORM_FORCE_FP16, SRGB):
return PixelFormat::ETC2_RGBA_SRGB;
}
UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}",
static_cast<int>(format), is_srgb, static_cast<int>(red),
static_cast<int>(green), static_cast<int>(blue), static_cast<int>(alpha));
LOG_WARNING(HW_GPU,
"Unsupported texture format={} srgb={} components={{{} {} {} {}}}, falling back to "
"A8B8G8R8_UNORM",
static_cast<int>(format), is_srgb, static_cast<int>(red),
static_cast<int>(green), static_cast<int>(blue), static_cast<int>(alpha));
return PixelFormat::A8B8G8R8_UNORM;
}

View File

@@ -207,6 +207,18 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
return "ASTC_2D_6X5_SRGB";
case PixelFormat::E5B9G9R9_FLOAT:
return "E5B9G9R9_FLOAT";
case PixelFormat::ETC2_RGB_UNORM:
return "ETC2_RGB_UNORM";
case PixelFormat::ETC2_RGBA_UNORM:
return "ETC2_RGBA_UNORM";
case PixelFormat::ETC2_RGB_PTA_UNORM:
return "ETC2_RGB_PTA_UNORM";
case PixelFormat::ETC2_RGB_SRGB:
return "ETC2_RGB_SRGB";
case PixelFormat::ETC2_RGBA_SRGB:
return "ETC2_RGBA_SRGB";
case PixelFormat::ETC2_RGB_PTA_SRGB:
return "ETC2_RGB_PTA_SRGB";
case PixelFormat::D32_FLOAT:
return "D32_FLOAT";
case PixelFormat::D16_UNORM:

View File

@@ -87,6 +87,9 @@ enum class TextureFormat : u32 {
ASTC_2D_8X5 = 0x55,
ASTC_2D_10X5 = 0x56,
ASTC_2D_10X6 = 0x57,
// Additional formats found in games
ETC2_RGB_SRGB = 0x5a, // Format 90: ETC2 RGB with SRGB
ETC2_RGBA_SRGB = 0x63, // Format 99: ETC2 RGBA with SRGB
};
enum class TextureType : u32 {

View File

@@ -320,6 +320,11 @@ public:
return features.features.textureCompressionBC;
}
/// Returns true if ETC2 is natively supported.
bool IsOptimalEtc2Supported() const {
return features.features.textureCompressionETC2;
}
/// Returns true if descriptor aliasing is natively supported.
bool IsDescriptorAliasingSupported() const {
return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY;