Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Suyu
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
many-archive
Suyu
Commits
d4ae43f9
There was an error fetching the commit references. Please try again later.
Commit
d4ae43f9
authored
6 years ago
by
FernandoS27
Browse files
Options
Downloads
Patches
Plain Diff
Remove old Swizzle algorithms and use 3d Swizzle
parent
4d959c6b
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/video_core/textures/decoders.cpp
+69
-93
69 additions, 93 deletions
src/video_core/textures/decoders.cpp
with
69 additions
and
93 deletions
src/video_core/textures/decoders.cpp
+
69
−
93
View file @
d4ae43f9
...
@@ -40,97 +40,56 @@ struct alignas(64) SwizzleTable {
...
@@ -40,97 +40,56 @@ struct alignas(64) SwizzleTable {
constexpr
auto
legacy_swizzle_table
=
SwizzleTable
<
8
,
64
,
1
>
();
constexpr
auto
legacy_swizzle_table
=
SwizzleTable
<
8
,
64
,
1
>
();
constexpr
auto
fast_swizzle_table
=
SwizzleTable
<
8
,
4
,
16
>
();
constexpr
auto
fast_swizzle_table
=
SwizzleTable
<
8
,
4
,
16
>
();
static
void
LegacySwizzleData
(
u32
width
,
u32
height
,
u32
bytes_per_pixel
,
u32
out_bytes_per_pixel
,
/**
u8
*
swizzled_data
,
u8
*
unswizzled_data
,
bool
unswizzle
,
* This function manages ALL the GOBs(Group of Bytes) Inside a single block.
u32
block_height
)
{
* Instead of going gob by gob, we map the coordinates inside a block and manage from
std
::
array
<
u8
*
,
2
>
data_ptrs
;
* those. Block_Width is assumed to be 1.
const
std
::
size_t
stride
=
width
*
bytes_per_pixel
;
*/
const
std
::
size_t
gobs_in_x
=
64
;
void
Precise3DProcessBlock
(
u8
*
swizzled_data
,
u8
*
unswizzled_data
,
const
bool
unswizzle
,
const
std
::
size_t
gobs_in_y
=
8
;
const
u32
x_start
,
const
u32
y_start
,
const
u32
z_start
,
const
u32
x_end
,
const
std
::
size_t
gobs_size
=
gobs_in_x
*
gobs_in_y
;
const
u32
y_end
,
const
u32
z_end
,
const
u32
tile_offset
,
const
std
::
size_t
image_width_in_gobs
{(
stride
+
gobs_in_x
-
1
)
/
gobs_in_x
};
const
u32
xy_block_size
,
const
u32
layer_z
,
const
u32
stride_x
,
for
(
std
::
size_t
y
=
0
;
y
<
height
;
++
y
)
{
const
u32
bytes_per_pixel
,
const
u32
out_bytes_per_pixel
)
{
const
std
::
size_t
gob_y_address
=
(
y
/
(
gobs_in_y
*
block_height
))
*
gobs_size
*
block_height
*
image_width_in_gobs
+
(
y
%
(
gobs_in_y
*
block_height
)
/
gobs_in_y
)
*
gobs_size
;
const
auto
&
table
=
legacy_swizzle_table
[
y
%
gobs_in_y
];
for
(
std
::
size_t
x
=
0
;
x
<
width
;
++
x
)
{
const
std
::
size_t
gob_address
=
gob_y_address
+
(
x
*
bytes_per_pixel
/
gobs_in_x
)
*
gobs_size
*
block_height
;
const
std
::
size_t
x2
=
x
*
bytes_per_pixel
;
const
std
::
size_t
swizzle_offset
=
gob_address
+
table
[
x2
%
gobs_in_x
];
const
std
::
size_t
pixel_index
=
(
x
+
y
*
width
)
*
out_bytes_per_pixel
;
data_ptrs
[
unswizzle
]
=
swizzled_data
+
swizzle_offset
;
data_ptrs
[
!
unswizzle
]
=
unswizzled_data
+
pixel_index
;
std
::
memcpy
(
data_ptrs
[
0
],
data_ptrs
[
1
],
bytes_per_pixel
);
}
}
}
static
void
FastSwizzleData
(
u32
width
,
u32
height
,
u32
bytes_per_pixel
,
u32
out_bytes_per_pixel
,
u8
*
swizzled_data
,
u8
*
unswizzled_data
,
bool
unswizzle
,
u32
block_height
)
{
std
::
array
<
u8
*
,
2
>
data_ptrs
;
const
std
::
size_t
stride
{
width
*
bytes_per_pixel
};
const
std
::
size_t
gobs_in_x
=
64
;
const
std
::
size_t
gobs_in_y
=
8
;
const
std
::
size_t
gobs_size
=
gobs_in_x
*
gobs_in_y
;
const
std
::
size_t
image_width_in_gobs
{(
stride
+
gobs_in_x
-
1
)
/
gobs_in_x
};
const
std
::
size_t
copy_size
{
16
};
for
(
std
::
size_t
y
=
0
;
y
<
height
;
++
y
)
{
const
std
::
size_t
initial_gob
=
(
y
/
(
gobs_in_y
*
block_height
))
*
gobs_size
*
block_height
*
image_width_in_gobs
+
(
y
%
(
gobs_in_y
*
block_height
)
/
gobs_in_y
)
*
gobs_size
;
const
std
::
size_t
pixel_base
{
y
*
width
*
out_bytes_per_pixel
};
const
auto
&
table
=
fast_swizzle_table
[
y
%
gobs_in_y
];
for
(
std
::
size_t
xb
=
0
;
xb
<
stride
;
xb
+=
copy_size
)
{
const
std
::
size_t
gob_address
{
initial_gob
+
(
xb
/
gobs_in_x
)
*
gobs_size
*
block_height
};
const
std
::
size_t
swizzle_offset
{
gob_address
+
table
[(
xb
/
16
)
%
4
]};
const
std
::
size_t
out_x
=
xb
*
out_bytes_per_pixel
/
bytes_per_pixel
;
const
std
::
size_t
pixel_index
{
out_x
+
pixel_base
};
data_ptrs
[
unswizzle
]
=
swizzled_data
+
swizzle_offset
;
data_ptrs
[
!
unswizzle
]
=
unswizzled_data
+
pixel_index
;
std
::
memcpy
(
data_ptrs
[
0
],
data_ptrs
[
1
],
copy_size
);
}
}
}
void
Precise3DProcessGobs
(
u8
*
swizzled_data
,
u8
*
unswizzled_data
,
bool
unswizzle
,
const
u32
x_start
,
const
u32
y_start
,
const
u32
z_start
,
const
u32
x_end
,
const
u32
y_end
,
const
u32
z_end
,
const
u32
tile_offset
,
const
u32
xy_block_size
,
const
u32
layer_z
,
const
u32
stride_x
,
const
u32
bytes_per_pixel
,
const
u32
out_bytes_per_pixel
)
{
std
::
array
<
u8
*
,
2
>
data_ptrs
;
std
::
array
<
u8
*
,
2
>
data_ptrs
;
u32
z_adress
=
tile_offset
;
u32
z_adress
=
tile_offset
;
const
u32
gob_size
=
64
*
8
*
1
;
const
u32
gob_size_x
=
64
;
const
u32
gob_size_y
=
8
;
const
u32
gob_size_z
=
1
;
const
u32
gob_size
=
gob_size_x
*
gob_size_y
*
gob_size_z
;
for
(
u32
z
=
z_start
;
z
<
z_end
;
z
++
)
{
for
(
u32
z
=
z_start
;
z
<
z_end
;
z
++
)
{
u32
y_adress
=
z_adress
;
u32
y_adress
=
z_adress
;
u32
pixel_base
=
layer_z
*
z
+
y_start
*
stride_x
;
u32
pixel_base
=
layer_z
*
z
+
y_start
*
stride_x
;
for
(
u32
y
=
y_start
;
y
<
y_end
;
y
++
)
{
for
(
u32
y
=
y_start
;
y
<
y_end
;
y
++
)
{
const
auto
&
table
=
legacy_swizzle_table
[
y
%
8
];
const
auto
&
table
=
legacy_swizzle_table
[
y
%
gob_size_y
];
for
(
u32
x
=
x_start
;
x
<
x_end
;
x
++
)
{
for
(
u32
x
=
x_start
;
x
<
x_end
;
x
++
)
{
const
u32
swizzle_offset
{
y_adress
+
table
[
x
*
bytes_per_pixel
%
64
]};
const
u32
swizzle_offset
{
y_adress
+
table
[
x
*
bytes_per_pixel
%
gob_size_x
]};
const
u32
pixel_index
{
x
*
out_bytes_per_pixel
+
pixel_base
};
const
u32
pixel_index
{
x
*
out_bytes_per_pixel
+
pixel_base
};
data_ptrs
[
unswizzle
]
=
swizzled_data
+
swizzle_offset
;
data_ptrs
[
unswizzle
]
=
swizzled_data
+
swizzle_offset
;
data_ptrs
[
!
unswizzle
]
=
unswizzled_data
+
pixel_index
;
data_ptrs
[
!
unswizzle
]
=
unswizzled_data
+
pixel_index
;
std
::
memcpy
(
data_ptrs
[
0
],
data_ptrs
[
1
],
bytes_per_pixel
);
std
::
memcpy
(
data_ptrs
[
0
],
data_ptrs
[
1
],
bytes_per_pixel
);
}
}
pixel_base
+=
stride_x
;
pixel_base
+=
stride_x
;
if
((
y
+
1
)
%
8
==
0
)
if
((
y
+
1
)
%
gob_size_y
==
0
)
y_adress
+=
gob_size
;
y_adress
+=
gob_size
;
}
}
z_adress
+=
xy_block_size
;
z_adress
+=
xy_block_size
;
}
}
}
}
void
Precise3DSwizzledData
(
u8
*
swizzled_data
,
u8
*
unswizzled_data
,
bool
unswizzle
,
u32
width
,
/**
u32
height
,
u32
depth
,
u32
bytes_per_pixel
,
u32
out_bytes_per_pixel
,
* This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue.
u32
block_height
,
u32
block_depth
)
{
* The body of this function takes care of splitting the swizzled texture into blocks,
auto
div_ceil
=
[](
u32
x
,
u32
y
)
{
return
((
x
+
y
-
1
)
/
y
);
};
* and managing the extents of it. Once all the parameters of a single block are obtained,
* the function calls '3DProcessBlock' to process that particular Block.
*
* Documentation for the memory layout and decoding can be found at:
* https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces
*/
void
Precise3DSwizzledData
(
u8
*
swizzled_data
,
u8
*
unswizzled_data
,
const
bool
unswizzle
,
const
u32
width
,
const
u32
height
,
const
u32
depth
,
const
u32
bytes_per_pixel
,
const
u32
out_bytes_per_pixel
,
const
u32
block_height
,
const
u32
block_depth
)
{
auto
div_ceil
=
[](
const
u32
x
,
const
u32
y
)
{
return
((
x
+
y
-
1
)
/
y
);
};
const
u32
stride_x
=
width
*
out_bytes_per_pixel
;
const
u32
stride_x
=
width
*
out_bytes_per_pixel
;
const
u32
layer_z
=
height
*
stride_x
;
const
u32
layer_z
=
height
*
stride_x
;
const
u32
gob_x_bytes
=
64
;
const
u32
gob_x_bytes
=
64
;
...
@@ -157,33 +116,41 @@ void Precise3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, bool unswizzl
...
@@ -157,33 +116,41 @@ void Precise3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, bool unswizzl
for
(
u32
xb
=
0
;
xb
<
blocks_on_x
;
xb
++
)
{
for
(
u32
xb
=
0
;
xb
<
blocks_on_x
;
xb
++
)
{
const
u32
x_start
=
xb
*
block_x_elements
;
const
u32
x_start
=
xb
*
block_x_elements
;
const
u32
x_end
=
std
::
min
(
width
,
x_start
+
block_x_elements
);
const
u32
x_end
=
std
::
min
(
width
,
x_start
+
block_x_elements
);
Precise3DProcess
Gobs
(
swizzled_data
,
unswizzled_data
,
unswizzle
,
x_start
,
y_start
,
Precise3DProcess
Block
(
swizzled_data
,
unswizzled_data
,
unswizzle
,
x_start
,
y_start
,
z_start
,
x_end
,
y_end
,
z_end
,
tile_offset
,
xy_block_size
,
layer_z
,
z_start
,
x_end
,
y_end
,
z_end
,
tile_offset
,
xy_block_size
,
stride_x
,
bytes_per_pixel
,
out_bytes_per_pixel
);
layer_z
,
stride_x
,
bytes_per_pixel
,
out_bytes_per_pixel
);
tile_offset
+=
block_size
;
tile_offset
+=
block_size
;
}
}
}
}
}
}
}
}
void
Fast3DProcessGobs
(
u8
*
swizzled_data
,
u8
*
unswizzled_data
,
bool
unswizzle
,
const
u32
x_start
,
/**
const
u32
y_start
,
const
u32
z_start
,
const
u32
x_end
,
const
u32
y_end
,
* This function manages ALL the GOBs(Group of Bytes) Inside a single block.
const
u32
z_end
,
const
u32
tile_offset
,
const
u32
xy_block_size
,
* Instead of going gob by gob, we map the coordinates inside a block and manage from
const
u32
layer_z
,
const
u32
stride_x
,
const
u32
bytes_per_pixel
,
* those. Block_Width is assumed to be 1.
const
u32
out_bytes_per_pixel
)
{
*/
void
Fast3DProcessBlock
(
u8
*
swizzled_data
,
u8
*
unswizzled_data
,
const
bool
unswizzle
,
const
u32
x_start
,
const
u32
y_start
,
const
u32
z_start
,
const
u32
x_end
,
const
u32
y_end
,
const
u32
z_end
,
const
u32
tile_offset
,
const
u32
xy_block_size
,
const
u32
layer_z
,
const
u32
stride_x
,
const
u32
bytes_per_pixel
,
const
u32
out_bytes_per_pixel
)
{
std
::
array
<
u8
*
,
2
>
data_ptrs
;
std
::
array
<
u8
*
,
2
>
data_ptrs
;
u32
z_adress
=
tile_offset
;
u32
z_adress
=
tile_offset
;
const
u32
x_startb
=
x_start
*
bytes_per_pixel
;
const
u32
x_startb
=
x_start
*
bytes_per_pixel
;
const
u32
x_endb
=
x_end
*
bytes_per_pixel
;
const
u32
x_endb
=
x_end
*
bytes_per_pixel
;
const
u32
copy_size
=
16
;
const
u32
copy_size
=
16
;
const
u32
gob_size
=
64
*
8
*
1
;
const
u32
gob_size_x
=
64
;
const
u32
gob_size_y
=
8
;
const
u32
gob_size_z
=
1
;
const
u32
gob_size
=
gob_size_x
*
gob_size_y
*
gob_size_z
;
for
(
u32
z
=
z_start
;
z
<
z_end
;
z
++
)
{
for
(
u32
z
=
z_start
;
z
<
z_end
;
z
++
)
{
u32
y_adress
=
z_adress
;
u32
y_adress
=
z_adress
;
u32
pixel_base
=
layer_z
*
z
+
y_start
*
stride_x
;
u32
pixel_base
=
layer_z
*
z
+
y_start
*
stride_x
;
for
(
u32
y
=
y_start
;
y
<
y_end
;
y
++
)
{
for
(
u32
y
=
y_start
;
y
<
y_end
;
y
++
)
{
const
auto
&
table
=
fast_swizzle_table
[
y
%
8
];
const
auto
&
table
=
fast_swizzle_table
[
y
%
gob_size_y
];
for
(
u32
xb
=
x_startb
;
xb
<
x_endb
;
xb
+=
copy_size
)
{
for
(
u32
xb
=
x_startb
;
xb
<
x_endb
;
xb
+=
copy_size
)
{
const
u32
swizzle_offset
{
y_adress
+
table
[(
xb
/
16
)
%
4
]};
const
u32
swizzle_offset
{
y_adress
+
table
[(
xb
/
copy_size
)
%
4
]};
const
u32
out_x
=
xb
*
out_bytes_per_pixel
/
bytes_per_pixel
;
const
u32
out_x
=
xb
*
out_bytes_per_pixel
/
bytes_per_pixel
;
const
u32
pixel_index
{
out_x
+
pixel_base
};
const
u32
pixel_index
{
out_x
+
pixel_base
};
data_ptrs
[
unswizzle
]
=
swizzled_data
+
swizzle_offset
;
data_ptrs
[
unswizzle
]
=
swizzled_data
+
swizzle_offset
;
...
@@ -191,18 +158,27 @@ void Fast3DProcessGobs(u8* swizzled_data, u8* unswizzled_data, bool unswizzle, c
...
@@ -191,18 +158,27 @@ void Fast3DProcessGobs(u8* swizzled_data, u8* unswizzled_data, bool unswizzle, c
std
::
memcpy
(
data_ptrs
[
0
],
data_ptrs
[
1
],
copy_size
);
std
::
memcpy
(
data_ptrs
[
0
],
data_ptrs
[
1
],
copy_size
);
}
}
pixel_base
+=
stride_x
;
pixel_base
+=
stride_x
;
if
((
y
+
1
)
%
8
==
0
)
if
((
y
+
1
)
%
gob_size_y
==
0
)
y_adress
+=
gob_size
;
y_adress
+=
gob_size
;
}
}
z_adress
+=
xy_block_size
;
z_adress
+=
xy_block_size
;
}
}
}
}
void
Fast3DSwizzledData
(
u8
*
swizzled_data
,
u8
*
unswizzled_data
,
bool
unswizzle
,
u32
width
,
/**
u32
height
,
u32
depth
,
u32
bytes_per_pixel
,
u32
out_bytes_per_pixel
,
* This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue.
u32
block_height
,
u32
block_depth
)
{
* The body of this function takes care of splitting the swizzled texture into blocks,
auto
div_ceil
=
[](
u32
x
,
u32
y
)
{
return
((
x
+
y
-
1
)
/
y
);
};
* and managing the extents of it. Once all the parameters of a single block are obtained,
* the function calls '3DProcessBlock' to process that particular Block.
*
* Documentation for the memory layout and decoding can be found at:
* https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces
*/
void
Fast3DSwizzledData
(
u8
*
swizzled_data
,
u8
*
unswizzled_data
,
const
bool
unswizzle
,
const
u32
width
,
const
u32
height
,
const
u32
depth
,
const
u32
bytes_per_pixel
,
const
u32
out_bytes_per_pixel
,
const
u32
block_height
,
const
u32
block_depth
)
{
auto
div_ceil
=
[](
const
u32
x
,
const
u32
y
)
{
return
((
x
+
y
-
1
)
/
y
);
};
const
u32
stride_x
=
width
*
out_bytes_per_pixel
;
const
u32
stride_x
=
width
*
out_bytes_per_pixel
;
const
u32
layer_z
=
height
*
stride_x
;
const
u32
layer_z
=
height
*
stride_x
;
const
u32
gob_x_bytes
=
64
;
const
u32
gob_x_bytes
=
64
;
...
@@ -229,9 +205,9 @@ void Fast3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, bool unswizzle,
...
@@ -229,9 +205,9 @@ void Fast3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, bool unswizzle,
for
(
u32
xb
=
0
;
xb
<
blocks_on_x
;
xb
++
)
{
for
(
u32
xb
=
0
;
xb
<
blocks_on_x
;
xb
++
)
{
const
u32
x_start
=
xb
*
block_x_elements
;
const
u32
x_start
=
xb
*
block_x_elements
;
const
u32
x_end
=
std
::
min
(
width
,
x_start
+
block_x_elements
);
const
u32
x_end
=
std
::
min
(
width
,
x_start
+
block_x_elements
);
Fast3DProcess
Gobs
(
swizzled_data
,
unswizzled_data
,
unswizzle
,
x_start
,
y_start
,
Fast3DProcess
Block
(
swizzled_data
,
unswizzled_data
,
unswizzle
,
x_start
,
y_start
,
z_start
,
x_end
,
y_end
,
z_end
,
tile_offset
,
xy_block_size
,
layer_z
,
z_start
,
x_end
,
y_end
,
z_end
,
tile_offset
,
xy_block_size
,
stride_x
,
bytes_per_pixel
,
out_bytes_per_pixel
);
layer_z
,
stride_x
,
bytes_per_pixel
,
out_bytes_per_pixel
);
tile_offset
+=
block_size
;
tile_offset
+=
block_size
;
}
}
}
}
...
@@ -245,7 +221,7 @@ void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_
...
@@ -245,7 +221,7 @@ void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_
bytes_per_pixel
,
out_bytes_per_pixel
,
block_height
,
1U
);
bytes_per_pixel
,
out_bytes_per_pixel
,
block_height
,
1U
);
}
else
{
}
else
{
Precise3DSwizzledData
(
swizzled_data
,
unswizzled_data
,
unswizzle
,
width
,
height
,
1U
,
Precise3DSwizzledData
(
swizzled_data
,
unswizzled_data
,
unswizzle
,
width
,
height
,
1U
,
bytes_per_pixel
,
out_bytes_per_pixel
,
block_height
,
1U
);
bytes_per_pixel
,
out_bytes_per_pixel
,
block_height
,
1U
);
}
}
}
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment