Skip to content
Snippets Groups Projects
Commit 623b2e4b authored by Fernando Sahmkow's avatar Fernando Sahmkow Committed by FernandoS27
Browse files

Corrections Half Float operations on const buffers and implement saturation.

parent 650d9b10
No related branches found
No related tags found
No related merge requests found
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
namespace VideoCommon::Shader { namespace VideoCommon::Shader {
using Tegra::Shader::HalfType;
using Tegra::Shader::Instruction; using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode; using Tegra::Shader::OpCode;
...@@ -22,7 +23,6 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { ...@@ -22,7 +23,6 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
} }
} }
UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented");
const bool negate_a = const bool negate_a =
opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
...@@ -32,35 +32,37 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { ...@@ -32,35 +32,37 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a); op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a);
Node op_b = [&]() { auto [type_b, op_b] = [&]() -> std::tuple<HalfType, Node> {
switch (opcode->get().GetId()) { switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_C: case OpCode::Id::HADD2_C:
case OpCode::Id::HMUL2_C: case OpCode::Id::HMUL2_C:
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::HADD2_R: case OpCode::Id::HADD2_R:
case OpCode::Id::HMUL2_R: case OpCode::Id::HMUL2_R:
return GetRegister(instr.gpr20); return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
default: default:
UNREACHABLE(); UNREACHABLE();
return Immediate(0); return {HalfType::F32, Immediate(0)};
} }
}(); }();
op_b = UnpackHalfFloat(op_b, instr.alu_half.type_b); op_b = UnpackHalfFloat(op_b, type_b);
op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); // redeclaration to avoid a bug in clang with reusing local bindings in lambdas
Node op_b_alt = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
Node value = [&]() { Node value = [&]() {
switch (opcode->get().GetId()) { switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_C: case OpCode::Id::HADD2_C:
case OpCode::Id::HADD2_R: case OpCode::Id::HADD2_R:
return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); return Operation(OperationCode::HAdd, PRECISE, op_a, op_b_alt);
case OpCode::Id::HMUL2_C: case OpCode::Id::HMUL2_C:
case OpCode::Id::HMUL2_R: case OpCode::Id::HMUL2_R:
return Operation(OperationCode::HMul, PRECISE, op_a, op_b); return Operation(OperationCode::HMul, PRECISE, op_a, op_b_alt);
default: default:
UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
return Immediate(0); return Immediate(0);
} }
}(); }();
value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
SetRegister(bb, instr.gpr0, value); SetRegister(bb, instr.gpr0, value);
...@@ -68,4 +70,4 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { ...@@ -68,4 +70,4 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
return pc; return pc;
} }
} // namespace VideoCommon::Shader } // namespace VideoCommon::Shader
\ No newline at end of file
...@@ -34,15 +34,14 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { ...@@ -34,15 +34,14 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
case OpCode::Id::HFMA2_CR: case OpCode::Id::HFMA2_CR:
neg_b = instr.hfma2.negate_b; neg_b = instr.hfma2.negate_b;
neg_c = instr.hfma2.negate_c; neg_c = instr.hfma2.negate_c;
return {instr.hfma2.saturate, instr.hfma2.type_b, return {instr.hfma2.saturate, HalfType::F32,
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
case OpCode::Id::HFMA2_RC: case OpCode::Id::HFMA2_RC:
neg_b = instr.hfma2.negate_b; neg_b = instr.hfma2.negate_b;
neg_c = instr.hfma2.negate_c; neg_c = instr.hfma2.negate_c;
return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
instr.hfma2.type_b, HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::HFMA2_RR: case OpCode::Id::HFMA2_RR:
neg_b = instr.hfma2.rr.negate_b; neg_b = instr.hfma2.rr.negate_b;
neg_c = instr.hfma2.rr.negate_c; neg_c = instr.hfma2.rr.negate_c;
...@@ -56,13 +55,13 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { ...@@ -56,13 +55,13 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
return {false, identity, Immediate(0), identity, Immediate(0)}; return {false, identity, Immediate(0), identity, Immediate(0)};
} }
}(); }();
UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
value = GetSaturatedHalfFloat(value, saturate);
value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
SetRegister(bb, instr.gpr0, value); SetRegister(bb, instr.gpr0, value);
...@@ -70,4 +69,4 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { ...@@ -70,4 +69,4 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
return pc; return pc;
} }
} // namespace VideoCommon::Shader } // namespace VideoCommon::Shader
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment