mirror of
				https://git.suyu.dev/suyu/suyu.git
				synced 2025-10-31 06:46:40 +08:00 
			
		
		
		
	shader: Implement FSWZADD
This commit is contained in:
		
							parent
							
								
									34aba9627a
								
							
						
					
					
						commit
						6c51f49632
					
				| @ -89,6 +89,7 @@ add_library(shader_recompiler STATIC | ||||
|     frontend/maxwell/translate/impl/floating_point_multiply.cpp | ||||
|     frontend/maxwell/translate/impl/floating_point_range_reduction.cpp | ||||
|     frontend/maxwell/translate/impl/floating_point_set_predicate.cpp | ||||
|     frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp | ||||
|     frontend/maxwell/translate/impl/half_floating_point_add.cpp | ||||
|     frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp | ||||
|     frontend/maxwell/translate/impl/half_floating_point_helper.cpp | ||||
|  | ||||
| @ -393,6 +393,14 @@ void EmitContext::DefineInputs(const Info& info) { | ||||
|         subgroup_local_invocation_id = | ||||
|             DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); | ||||
|     } | ||||
|     if (info.uses_fswzadd) { | ||||
|         const Id f32_one{Constant(F32[1], 1.0f)}; | ||||
|         const Id f32_minus_one{Constant(F32[1], -1.0f)}; | ||||
|         const Id f32_zero{Constant(F32[1], 0.0f)}; | ||||
|         fswzadd_lut_a = ConstantComposite(F32[4], f32_minus_one, f32_one, f32_minus_one, f32_zero); | ||||
|         fswzadd_lut_b = | ||||
|             ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one); | ||||
|     } | ||||
|     if (info.loads_position) { | ||||
|         const bool is_fragment{stage != Stage::Fragment}; | ||||
|         const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; | ||||
|  | ||||
| @ -103,6 +103,8 @@ public: | ||||
|     Id vertex_index{}; | ||||
|     Id base_vertex{}; | ||||
|     Id front_face{}; | ||||
|     Id fswzadd_lut_a{}; | ||||
|     Id fswzadd_lut_b{}; | ||||
| 
 | ||||
|     Id local_memory{}; | ||||
| 
 | ||||
|  | ||||
| @ -397,5 +397,6 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam | ||||
|                    Id segmentation_mask); | ||||
| Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||
|                         Id segmentation_mask); | ||||
| Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); | ||||
| 
 | ||||
| } // namespace Shader::Backend::SPIRV
 | ||||
|  | ||||
| @ -132,4 +132,20 @@ Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id | ||||
|     return SelectValue(ctx, in_range, value, src_thread_id); | ||||
| } | ||||
| 
 | ||||
| Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) { | ||||
|     const Id three{ctx.Constant(ctx.U32[1], 3)}; | ||||
|     Id mask{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||||
|     mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); | ||||
|     mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Constant(ctx.U32[1], 1)); | ||||
|     mask = ctx.OpShiftRightLogical(ctx.U32[1], swizzle, mask); | ||||
|     mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); | ||||
| 
 | ||||
|     const Id modifier_a{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_a, mask)}; | ||||
|     const Id modifier_b{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_b, mask)}; | ||||
| 
 | ||||
|     const Id result_a{ctx.OpFMul(ctx.F32[1], op_a, modifier_a)}; | ||||
|     const Id result_b{ctx.OpFMul(ctx.F32[1], op_b, modifier_b)}; | ||||
|     return ctx.OpFAdd(ctx.F32[1], result_a, result_b); | ||||
| } | ||||
| 
 | ||||
| } // namespace Shader::Backend::SPIRV
 | ||||
|  | ||||
| @ -1602,4 +1602,7 @@ U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, cons | ||||
|                                 const IR::U32& seg_mask) { | ||||
|     return Inst<U32>(Opcode::ShuffleButterfly, value, index, clamp, seg_mask); | ||||
| } | ||||
| F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control) { | ||||
|     return Inst<F32>(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle); | ||||
| } | ||||
| } // namespace Shader::IR
 | ||||
|  | ||||
| @ -277,6 +277,8 @@ public: | ||||
|                                   const IR::U32& seg_mask); | ||||
|     [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index, | ||||
|                                        const IR::U32& clamp, const IR::U32& seg_mask); | ||||
|     [[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, | ||||
|                                   FpControl control = {}); | ||||
| 
 | ||||
| private: | ||||
|     IR::Block::iterator insertion_point; | ||||
|  | ||||
| @ -408,3 +408,4 @@ OPCODE(ShuffleIndex,                                        U32,            U32, | ||||
| OPCODE(ShuffleUp,                                           U32,            U32,            U32,            U32,            U32,                            ) | ||||
| OPCODE(ShuffleDown,                                         U32,            U32,            U32,            U32,            U32,                            ) | ||||
| OPCODE(ShuffleButterfly,                                    U32,            U32,            U32,            U32,            U32,                            ) | ||||
| OPCODE(FSwizzleAdd,                                         F32,            F32,            F32,            U32,                                            ) | ||||
|  | ||||
| @ -0,0 +1,44 @@ | ||||
| // Copyright 2021 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/exception.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
| 
 | ||||
| namespace Shader::Maxwell { | ||||
| void TranslatorVisitor::FSWZADD(u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<0, 8, IR::Reg> dest_reg; | ||||
|         BitField<28, 8, u64> swizzle; | ||||
|         BitField<38, 1, u64> ndv; | ||||
|         BitField<39, 2, FpRounding> round; | ||||
|         BitField<44, 1, u64> ftz; | ||||
|         BitField<47, 1, u64> cc; | ||||
|     } const fswzadd{insn}; | ||||
| 
 | ||||
|     if (fswzadd.ndv != 0) { | ||||
|         throw NotImplementedException("FSWZADD NDV"); | ||||
|     } | ||||
| 
 | ||||
|     const IR::F32 src_a{GetFloatReg8(insn)}; | ||||
|     const IR::F32 src_b{GetFloatReg20(insn)}; | ||||
|     const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))}; | ||||
| 
 | ||||
|     const IR::FpControl fp_control{ | ||||
|         .no_contraction{false}, | ||||
|         .rounding{CastFpRounding(fswzadd.round)}, | ||||
|         .fmz_mode{fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, | ||||
|     }; | ||||
| 
 | ||||
|     const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)}; | ||||
|     F(fswzadd.dest_reg, result); | ||||
| 
 | ||||
|     if (fswzadd.cc != 0) { | ||||
|         throw NotImplementedException("FSWZADD CC"); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| } // namespace Shader::Maxwell
 | ||||
| @ -91,6 +91,10 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) { | ||||
|     return X(reg.index); | ||||
| } | ||||
| 
 | ||||
| IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) { | ||||
|     return ir.BitCast<IR::F32>(GetReg8(insn)); | ||||
| } | ||||
| 
 | ||||
| IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) { | ||||
|     return ir.BitCast<IR::F32>(GetReg20(insn)); | ||||
| } | ||||
|  | ||||
| @ -353,6 +353,7 @@ public: | ||||
|     [[nodiscard]] IR::U32 GetReg8(u64 insn); | ||||
|     [[nodiscard]] IR::U32 GetReg20(u64 insn); | ||||
|     [[nodiscard]] IR::U32 GetReg39(u64 insn); | ||||
|     [[nodiscard]] IR::F32 GetFloatReg8(u64 insn); | ||||
|     [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); | ||||
|     [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); | ||||
|     [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn); | ||||
|  | ||||
| @ -89,10 +89,6 @@ void TranslatorVisitor::FCHK_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::FCHK_imm); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::FSWZADD(u64) { | ||||
|     ThrowNotImplemented(Opcode::FSWZADD); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::GETCRSPTR(u64) { | ||||
|     ThrowNotImplemented(Opcode::GETCRSPTR); | ||||
| } | ||||
|  | ||||
| @ -389,6 +389,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { | ||||
|     case IR::Opcode::SubgroupBallot: | ||||
|         info.uses_subgroup_vote = true; | ||||
|         break; | ||||
|     case IR::Opcode::FSwizzleAdd: | ||||
|         info.uses_fswzadd = true; | ||||
|         break; | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
|  | ||||
| @ -94,6 +94,7 @@ struct Info { | ||||
|     bool uses_sparse_residency{}; | ||||
|     bool uses_demote_to_helper_invocation{}; | ||||
|     bool uses_subgroup_vote{}; | ||||
|     bool uses_fswzadd{}; | ||||
| 
 | ||||
|     IR::Type used_constant_buffer_types{}; | ||||
| 
 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user