From af89723fa3d4ca13fc2ce7be545170d093eb4c31 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 11 Dec 2019 18:12:29 -0400
Subject: [PATCH] Shader_Ir: Correct TLD4S encoding and implement f16 flag.

---
 src/video_core/engines/shader_bytecode.h |  3 ++-
 src/video_core/shader/decode/texture.cpp | 21 ++++++++++++---------
 src/video_core/shader/shader_ir.h        |  2 +-
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 290d929df1..d6a2cc8b8a 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1292,6 +1292,7 @@ union Instruction {
         BitField<50, 1, u64> dc_flag;
         BitField<51, 1, u64> aoffi_flag;
         BitField<52, 2, u64> component;
+        BitField<55, 1, u64> fp16_flag;
 
         bool UsesMiscMode(TextureMiscMode mode) const {
             switch (mode) {
@@ -1972,7 +1973,7 @@ private:
             INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
             INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
             INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
-            INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
+            INST("11011111--00----", Id::TLD4S, Type::Texture, "TLD4S"),
             INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
             INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
             INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 67926afcbe..9aef5ddd55 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -138,7 +138,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
             values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
         }
 
-        WriteTexsInstructionFloat(bb, instr, values, true);
+        if (instr.tld4s.fp16_flag) {
+            WriteTexsInstructionHalfFloat(bb, instr, values, true);
+        } else {
+            WriteTexsInstructionFloat(bb, instr, values, true);
+        }
         break;
     }
     case OpCode::Id::TXD_B:
@@ -155,8 +159,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         const auto coord_count = GetCoordCount(texture_type);
 
         const Sampler* sampler = is_bindless
-                                  ? GetBindlessSampler(base_reg, {{texture_type, false, false}})
-                                  : GetSampler(instr.sampler, {{texture_type, false, false}});
+                                     ? GetBindlessSampler(base_reg, {{texture_type, false, false}})
+                                     : GetSampler(instr.sampler, {{texture_type, false, false}});
         Node4 values;
         if (sampler == nullptr) {
             for (u32 element = 0; element < values.size(); ++element) {
@@ -362,7 +366,7 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
     // Otherwise create a new mapping for this sampler
     const auto next_index = static_cast<u32>(used_samplers.size());
     return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow,
-                                      info.is_buffer);
+                                       info.is_buffer);
 }
 
 const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
@@ -392,7 +396,7 @@ const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
     // Otherwise create a new mapping for this sampler
     const auto next_index = static_cast<u32>(used_samplers.size());
     return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
-                                      info.is_shadow, info.is_buffer);
+                                       info.is_shadow, info.is_buffer);
 }
 
 void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
@@ -435,14 +439,14 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const
 }
 
 void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
-                                             const Node4& components) {
+                                             const Node4& components, bool ignore_mask) {
     // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
     // float instruction).
 
     Node4 values;
     u32 dest_elem = 0;
     for (u32 component = 0; component < 4; ++component) {
-        if (!instr.texs.IsComponentEnabled(component))
+        if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
             continue;
         values[dest_elem++] = components[component];
     }
@@ -525,7 +529,6 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
         }
     }
 
-
     for (u32 element = 0; element < values.size(); ++element) {
         auto copy_coords = coords;
         MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, bias, lod, {}, element};
@@ -642,7 +645,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
 
     const SamplerInfo info{texture_type, is_array, depth_compare, false};
     const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info)
-                                      : GetSampler(instr.sampler, info);
+                                         : GetSampler(instr.sampler, info);
     Node4 values;
     if (sampler == nullptr) {
         for (u32 element = 0; element < values.size(); ++element) {
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 8324432ae4..04ae5f8222 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -338,7 +338,7 @@ private:
     void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
                                    const Node4& components, bool ignore_mask = false);
     void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
-                                       const Node4& components);
+                                       const Node4& components, bool ignore_mask = false);
 
     Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                      Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
-- 
GitLab