From d8d557df86139ea0011fee0665d76e740681f7c0 Mon Sep 17 00:00:00 2001
From: FernandoS27 <fsahmkow27@gmail.com>
Date: Sat, 27 Oct 2018 20:09:26 -0400
Subject: [PATCH] Improved Shader accuracy on Vertex and Geometry Shaders with
 FFMA, FMUL and FADD

---
 .../renderer_opengl/gl_shader_decompiler.cpp  | 55 ++++++++++++++++++-
 .../renderer_opengl/gl_shader_gen.cpp         |  9 ++-
 2 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index dec291a7d9..002ae90a7d 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1510,8 +1510,25 @@ private:
                 ASSERT_MSG(instr.fmul.cc == 0, "FMUL cc is not implemented");
 
                 op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
-                regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
+
+                shader.AddLine('{');
+                ++shader.scope;
+
+                // This avoids optimizations of constant propagation and keeps the code as the original
+                // Sadly using the precise keyword causes "linking" errors on fragment shaders.
+                if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
+                    shader.AddLine("float tmp = " + op_a + " * " + op_b + ';');
+                } else {
+                    shader.AddLine("precise float tmp = " + op_a + " * " + op_b + ';');
+                }
+
+
+                regs.SetRegisterToFloat(instr.gpr0, 0,  "tmp", 1, 1,
                                         instr.alu.saturate_d);
+
+
+                --shader.scope;
+                shader.AddLine('}');
                 break;
             }
             case OpCode::Id::FADD_C:
@@ -1519,8 +1536,25 @@ private:
             case OpCode::Id::FADD_IMM: {
                 op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
                 op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
-                regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
+
+                shader.AddLine('{');
+                ++shader.scope;
+
+                // This avoids optimizations of constant propagation and keeps the code as the original
+                // Sadly using the precise keyword causes "linking" errors on fragment shaders.
+                if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
+                    shader.AddLine("float tmp = " + op_a + " + " + op_b + ';');
+                } else {
+                    shader.AddLine("precise float tmp = " + op_a + " + " + op_b + ';');
+                }
+                regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1,
                                         instr.alu.saturate_d);
+
+
+                --shader.scope;
+                shader.AddLine('}');
+
+
                 break;
             }
             case OpCode::Id::MUFU: {
@@ -2087,8 +2121,23 @@ private:
             }
             }
 
-            regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1,
+            shader.AddLine('{');
+            ++shader.scope;
+
+            // This avoids optimizations of constant propagation and keeps the code as the original
+            // Sadly using the precise keyword causes "linking" errors on fragment shaders.
+            if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
+                shader.AddLine("float tmp = fma(" + op_a + ", " + op_b + ", " + op_c + ");");
+            } else {
+                shader.AddLine("precise float tmp = fma(" + op_a + ", " + op_b + ", " + op_c + ");");
+            }
+
+            regs.SetRegisterToFloat(instr.gpr0, 0,  "tmp", 1, 1,
                                     instr.alu.saturate_d);
+
+
+            --shader.scope;
+            shader.AddLine('}');
             break;
         }
         case OpCode::Type::Hfma2: {
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index dfb5627067..08dd8dc6c5 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -15,7 +15,8 @@ static constexpr u32 PROGRAM_OFFSET{10};
 
 ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
     std::string out = "#version 430 core\n";
-    out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
+    out += "#extension GL_ARB_separate_shader_objects : enable\n";
+    out += "#extension GL_ARB_gpu_shader5 : enable\n\n";
     out += Decompiler::GetCommonDeclarations();
 
     out += R"(
@@ -83,7 +84,8 @@ void main() {
 
 ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
     std::string out = "#version 430 core\n";
-    out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
+    out += "#extension GL_ARB_separate_shader_objects : enable\n";
+    out += "#extension GL_ARB_gpu_shader5 : enable\n\n";
     out += Decompiler::GetCommonDeclarations();
     out += "bool exec_geometry();\n";
 
@@ -117,7 +119,8 @@ void main() {
 
 ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
     std::string out = "#version 430 core\n";
-    out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
+    out += "#extension GL_ARB_separate_shader_objects : enable\n";
+    out += "#extension GL_ARB_gpu_shader5 : enable\n\n";
     out += Decompiler::GetCommonDeclarations();
     out += "bool exec_fragment();\n";
 
-- 
GitLab