Skip to content
Snippets Groups Projects
Commit d4ae43f9 authored by FernandoS27's avatar FernandoS27
Browse files

Remove old Swizzle algorithms and use 3d Swizzle

parent 4d959c6b
No related branches found
No related tags found
No related merge requests found
......@@ -40,97 +40,56 @@ struct alignas(64) SwizzleTable {
constexpr auto legacy_swizzle_table = SwizzleTable<8, 64, 1>();
constexpr auto fast_swizzle_table = SwizzleTable<8, 4, 16>();
static void LegacySwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel,
u8* swizzled_data, u8* unswizzled_data, bool unswizzle,
u32 block_height) {
std::array<u8*, 2> data_ptrs;
const std::size_t stride = width * bytes_per_pixel;
const std::size_t gobs_in_x = 64;
const std::size_t gobs_in_y = 8;
const std::size_t gobs_size = gobs_in_x * gobs_in_y;
const std::size_t image_width_in_gobs{(stride + gobs_in_x - 1) / gobs_in_x};
for (std::size_t y = 0; y < height; ++y) {
const std::size_t gob_y_address =
(y / (gobs_in_y * block_height)) * gobs_size * block_height * image_width_in_gobs +
(y % (gobs_in_y * block_height) / gobs_in_y) * gobs_size;
const auto& table = legacy_swizzle_table[y % gobs_in_y];
for (std::size_t x = 0; x < width; ++x) {
const std::size_t gob_address =
gob_y_address + (x * bytes_per_pixel / gobs_in_x) * gobs_size * block_height;
const std::size_t x2 = x * bytes_per_pixel;
const std::size_t swizzle_offset = gob_address + table[x2 % gobs_in_x];
const std::size_t pixel_index = (x + y * width) * out_bytes_per_pixel;
data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
}
}
}
static void FastSwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel,
u8* swizzled_data, u8* unswizzled_data, bool unswizzle,
u32 block_height) {
std::array<u8*, 2> data_ptrs;
const std::size_t stride{width * bytes_per_pixel};
const std::size_t gobs_in_x = 64;
const std::size_t gobs_in_y = 8;
const std::size_t gobs_size = gobs_in_x * gobs_in_y;
const std::size_t image_width_in_gobs{(stride + gobs_in_x - 1) / gobs_in_x};
const std::size_t copy_size{16};
for (std::size_t y = 0; y < height; ++y) {
const std::size_t initial_gob =
(y / (gobs_in_y * block_height)) * gobs_size * block_height * image_width_in_gobs +
(y % (gobs_in_y * block_height) / gobs_in_y) * gobs_size;
const std::size_t pixel_base{y * width * out_bytes_per_pixel};
const auto& table = fast_swizzle_table[y % gobs_in_y];
for (std::size_t xb = 0; xb < stride; xb += copy_size) {
const std::size_t gob_address{initial_gob +
(xb / gobs_in_x) * gobs_size * block_height};
const std::size_t swizzle_offset{gob_address + table[(xb / 16) % 4]};
const std::size_t out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
const std::size_t pixel_index{out_x + pixel_base};
data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
std::memcpy(data_ptrs[0], data_ptrs[1], copy_size);
}
}
}
void Precise3DProcessGobs(u8* swizzled_data, u8* unswizzled_data, bool unswizzle, const u32 x_start,
const u32 y_start, const u32 z_start, const u32 x_end, const u32 y_end,
const u32 z_end, const u32 tile_offset, const u32 xy_block_size,
const u32 layer_z, const u32 stride_x, const u32 bytes_per_pixel,
const u32 out_bytes_per_pixel) {
/**
* This function manages ALL the GOBs(Group of Bytes) Inside a single block.
* Instead of going gob by gob, we map the coordinates inside a block and manage from
* those. Block_Width is assumed to be 1.
*/
void Precise3DProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
const u32 y_end, const u32 z_end, const u32 tile_offset,
const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
std::array<u8*, 2> data_ptrs;
u32 z_adress = tile_offset;
const u32 gob_size = 64 * 8 * 1;
const u32 gob_size_x = 64;
const u32 gob_size_y = 8;
const u32 gob_size_z = 1;
const u32 gob_size = gob_size_x * gob_size_y * gob_size_z;
for (u32 z = z_start; z < z_end; z++) {
u32 y_adress = z_adress;
u32 pixel_base = layer_z * z + y_start * stride_x;
for (u32 y = y_start; y < y_end; y++) {
const auto& table = legacy_swizzle_table[y % 8];
const auto& table = legacy_swizzle_table[y % gob_size_y];
for (u32 x = x_start; x < x_end; x++) {
const u32 swizzle_offset{y_adress + table[x * bytes_per_pixel % 64]};
const u32 swizzle_offset{y_adress + table[x * bytes_per_pixel % gob_size_x]};
const u32 pixel_index{x * out_bytes_per_pixel + pixel_base};
data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
}
pixel_base += stride_x;
if ((y + 1) % 8 == 0)
if ((y + 1) % gob_size_y == 0)
y_adress += gob_size;
}
z_adress += xy_block_size;
}
}
void Precise3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 width,
u32 height, u32 depth, u32 bytes_per_pixel, u32 out_bytes_per_pixel,
u32 block_height, u32 block_depth) {
auto div_ceil = [](u32 x, u32 y) { return ((x + y - 1) / y); };
/**
* This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue.
* The body of this function takes care of splitting the swizzled texture into blocks,
* and managing the extents of it. Once all the parameters of a single block are obtained,
* the function calls '3DProcessBlock' to process that particular Block.
*
* Documentation for the memory layout and decoding can be found at:
* https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces
*/
void Precise3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
const u32 width, const u32 height, const u32 depth,
const u32 bytes_per_pixel, const u32 out_bytes_per_pixel,
const u32 block_height, const u32 block_depth) {
auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
const u32 stride_x = width * out_bytes_per_pixel;
const u32 layer_z = height * stride_x;
const u32 gob_x_bytes = 64;
......@@ -157,33 +116,41 @@ void Precise3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, bool unswizzl
for (u32 xb = 0; xb < blocks_on_x; xb++) {
const u32 x_start = xb * block_x_elements;
const u32 x_end = std::min(width, x_start + block_x_elements);
Precise3DProcessGobs(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
z_start, x_end, y_end, z_end, tile_offset, xy_block_size, layer_z,
stride_x, bytes_per_pixel, out_bytes_per_pixel);
Precise3DProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
tile_offset += block_size;
}
}
}
}
void Fast3DProcessGobs(u8* swizzled_data, u8* unswizzled_data, bool unswizzle, const u32 x_start,
const u32 y_start, const u32 z_start, const u32 x_end, const u32 y_end,
const u32 z_end, const u32 tile_offset, const u32 xy_block_size,
const u32 layer_z, const u32 stride_x, const u32 bytes_per_pixel,
const u32 out_bytes_per_pixel) {
/**
* This function manages ALL the GOBs(Group of Bytes) Inside a single block.
* Instead of going gob by gob, we map the coordinates inside a block and manage from
* those. Block_Width is assumed to be 1.
*/
void Fast3DProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
const u32 y_end, const u32 z_end, const u32 tile_offset,
const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
std::array<u8*, 2> data_ptrs;
u32 z_adress = tile_offset;
const u32 x_startb = x_start * bytes_per_pixel;
const u32 x_endb = x_end * bytes_per_pixel;
const u32 copy_size = 16;
const u32 gob_size = 64 * 8 * 1;
const u32 gob_size_x = 64;
const u32 gob_size_y = 8;
const u32 gob_size_z = 1;
const u32 gob_size = gob_size_x * gob_size_y * gob_size_z;
for (u32 z = z_start; z < z_end; z++) {
u32 y_adress = z_adress;
u32 pixel_base = layer_z * z + y_start * stride_x;
for (u32 y = y_start; y < y_end; y++) {
const auto& table = fast_swizzle_table[y % 8];
const auto& table = fast_swizzle_table[y % gob_size_y];
for (u32 xb = x_startb; xb < x_endb; xb += copy_size) {
const u32 swizzle_offset{y_adress + table[(xb / 16) % 4]};
const u32 swizzle_offset{y_adress + table[(xb / copy_size) % 4]};
const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
const u32 pixel_index{out_x + pixel_base};
data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
......@@ -191,18 +158,27 @@ void Fast3DProcessGobs(u8* swizzled_data, u8* unswizzled_data, bool unswizzle, c
std::memcpy(data_ptrs[0], data_ptrs[1], copy_size);
}
pixel_base += stride_x;
if ((y + 1) % 8 == 0)
if ((y + 1) % gob_size_y == 0)
y_adress += gob_size;
}
z_adress += xy_block_size;
}
}
void Fast3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 width,
u32 height, u32 depth, u32 bytes_per_pixel, u32 out_bytes_per_pixel,
u32 block_height, u32 block_depth) {
auto div_ceil = [](u32 x, u32 y) { return ((x + y - 1) / y); };
/**
* This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue.
* The body of this function takes care of splitting the swizzled texture into blocks,
* and managing the extents of it. Once all the parameters of a single block are obtained,
* the function calls '3DProcessBlock' to process that particular Block.
*
* Documentation for the memory layout and decoding can be found at:
* https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces
*/
void Fast3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
const u32 width, const u32 height, const u32 depth,
const u32 bytes_per_pixel, const u32 out_bytes_per_pixel,
const u32 block_height, const u32 block_depth) {
auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
const u32 stride_x = width * out_bytes_per_pixel;
const u32 layer_z = height * stride_x;
const u32 gob_x_bytes = 64;
......@@ -229,9 +205,9 @@ void Fast3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, bool unswizzle,
for (u32 xb = 0; xb < blocks_on_x; xb++) {
const u32 x_start = xb * block_x_elements;
const u32 x_end = std::min(width, x_start + block_x_elements);
Fast3DProcessGobs(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
z_start, x_end, y_end, z_end, tile_offset, xy_block_size, layer_z,
stride_x, bytes_per_pixel, out_bytes_per_pixel);
Fast3DProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
tile_offset += block_size;
}
}
......@@ -245,7 +221,7 @@ void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_
bytes_per_pixel, out_bytes_per_pixel, block_height, 1U);
} else {
Precise3DSwizzledData(swizzled_data, unswizzled_data, unswizzle, width, height, 1U,
bytes_per_pixel, out_bytes_per_pixel, block_height, 1U);
bytes_per_pixel, out_bytes_per_pixel, block_height, 1U);
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment