mirror of
				https://git.suyu.dev/suyu/suyu.git
				synced 2025-10-25 20:06:58 +08:00 
			
		
		
		
	shader: Implement SHFL
This commit is contained in:
		
							parent
							
								
									49e87ea8ab
								
							
						
					
					
						commit
						32c5483beb
					
				| @ -16,7 +16,7 @@ add_library(shader_recompiler STATIC | ||||
|     backend/spirv/emit_spirv_select.cpp | ||||
|     backend/spirv/emit_spirv_special.cpp | ||||
|     backend/spirv/emit_spirv_undefined.cpp | ||||
|     backend/spirv/emit_spirv_vote.cpp | ||||
|     backend/spirv/emit_spirv_warp.cpp | ||||
|     environment.h | ||||
|     exception.h | ||||
|     file_environment.cpp | ||||
| @ -125,6 +125,7 @@ add_library(shader_recompiler STATIC | ||||
|     frontend/maxwell/translate/impl/texture_fetch.cpp | ||||
|     frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp | ||||
|     frontend/maxwell/translate/impl/vote.cpp | ||||
|     frontend/maxwell/translate/impl/warp_shuffle.cpp | ||||
|     frontend/maxwell/translate/translate.cpp | ||||
|     frontend/maxwell/translate/translate.h | ||||
|     ir_opt/collect_shader_info_pass.cpp | ||||
|  | ||||
| @ -274,7 +274,8 @@ void EmitContext::DefineInputs(const Info& info) { | ||||
|     if (info.uses_local_invocation_id) { | ||||
|         local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); | ||||
|     } | ||||
|     if (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote) { | ||||
|     if (info.uses_subgroup_invocation_id || | ||||
|         (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote)) { | ||||
|         subgroup_local_invocation_id = | ||||
|             DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); | ||||
|     } | ||||
|  | ||||
| @ -224,7 +224,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct | ||||
|         ctx.AddExtension("SPV_KHR_shader_draw_parameters"); | ||||
|         ctx.AddCapability(spv::Capability::DrawParameters); | ||||
|     } | ||||
|     if (info.uses_subgroup_vote && profile.support_vote) { | ||||
|     if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id) && profile.support_vote) { | ||||
|         ctx.AddExtension("SPV_KHR_shader_ballot"); | ||||
|         ctx.AddCapability(spv::Capability::SubgroupBallotKHR); | ||||
|         if (!profile.warp_size_potentially_larger_than_guest) { | ||||
| @ -315,4 +315,8 @@ void EmitGetSparseFromOp(EmitContext&) { | ||||
|     throw LogicError("Unreachable instruction"); | ||||
| } | ||||
| 
 | ||||
| void EmitGetInBoundsFromOp(EmitContext&) { | ||||
|     throw LogicError("Unreachable instruction"); | ||||
| } | ||||
| 
 | ||||
| } // namespace Shader::Backend::SPIRV
 | ||||
|  | ||||
| @ -158,6 +158,7 @@ void EmitGetSignFromOp(EmitContext& ctx); | ||||
| void EmitGetCarryFromOp(EmitContext& ctx); | ||||
| void EmitGetOverflowFromOp(EmitContext& ctx); | ||||
| void EmitGetSparseFromOp(EmitContext& ctx); | ||||
| void EmitGetInBoundsFromOp(EmitContext& ctx); | ||||
| Id EmitFPAbs16(EmitContext& ctx, Id value); | ||||
| Id EmitFPAbs32(EmitContext& ctx, Id value); | ||||
| Id EmitFPAbs64(EmitContext& ctx, Id value); | ||||
| @ -355,5 +356,13 @@ Id EmitVoteAll(EmitContext& ctx, Id pred); | ||||
| Id EmitVoteAny(EmitContext& ctx, Id pred); | ||||
| Id EmitVoteEqual(EmitContext& ctx, Id pred); | ||||
| Id EmitSubgroupBallot(EmitContext& ctx, Id pred); | ||||
| Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||
|                     Id segmentation_mask); | ||||
| Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||
|                  Id segmentation_mask); | ||||
| Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||
|                    Id segmentation_mask); | ||||
| Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||
|                         Id segmentation_mask); | ||||
| 
 | ||||
| } // namespace Shader::Backend::SPIRV
 | ||||
|  | ||||
| @ -1,58 +0,0 @@ | ||||
| // Copyright 2021 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||||
| 
 | ||||
| namespace Shader::Backend::SPIRV { | ||||
| namespace { | ||||
| Id LargeWarpBallot(EmitContext& ctx, Id ballot) { | ||||
|     const Id shift{ctx.Constant(ctx.U32[1], 5)}; | ||||
|     const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||||
|     return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index); | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| Id EmitVoteAll(EmitContext& ctx, Id pred) { | ||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||
|         return ctx.OpSubgroupAllKHR(ctx.U1, pred); | ||||
|     } | ||||
|     const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||||
|     const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||||
|     const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||||
|     const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||||
|     return ctx.OpIEqual(ctx.U1, lhs, active_mask); | ||||
| } | ||||
| 
 | ||||
| Id EmitVoteAny(EmitContext& ctx, Id pred) { | ||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||
|         return ctx.OpSubgroupAnyKHR(ctx.U1, pred); | ||||
|     } | ||||
|     const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||||
|     const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||||
|     const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||||
|     const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||||
|     return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); | ||||
| } | ||||
| 
 | ||||
| Id EmitVoteEqual(EmitContext& ctx, Id pred) { | ||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||
|         return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); | ||||
|     } | ||||
|     const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||||
|     const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||||
|     const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||||
|     const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; | ||||
|     return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), | ||||
|                            ctx.OpIEqual(ctx.U1, lhs, active_mask)); | ||||
| } | ||||
| 
 | ||||
| Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { | ||||
|     const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; | ||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||
|         return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); | ||||
|     } | ||||
|     return LargeWarpBallot(ctx, ballot); | ||||
| } | ||||
| 
 | ||||
| } // namespace Shader::Backend::SPIRV
 | ||||
							
								
								
									
										135
									
								
								src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										135
									
								
								src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,135 @@ | ||||
| // Copyright 2021 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||||
| 
 | ||||
| namespace Shader::Backend::SPIRV { | ||||
| namespace { | ||||
| Id LargeWarpBallot(EmitContext& ctx, Id ballot) { | ||||
|     const Id shift{ctx.Constant(ctx.U32[1], 5)}; | ||||
|     const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||||
|     return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index); | ||||
| } | ||||
| 
 | ||||
| void SetInBoundsFlag(IR::Inst* inst, Id result) { | ||||
|     IR::Inst* const in_bounds{inst->GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; | ||||
|     if (!in_bounds) { | ||||
|         return; | ||||
|     } | ||||
|     in_bounds->SetDefinition(result); | ||||
|     in_bounds->Invalidate(); | ||||
| } | ||||
| 
 | ||||
| Id ComputeMinThreadId(EmitContext& ctx, Id thread_id, Id segmentation_mask) { | ||||
|     return ctx.OpBitwiseAnd(ctx.U32[1], thread_id, segmentation_mask); | ||||
| } | ||||
| 
 | ||||
| Id ComputeMaxThreadId(EmitContext& ctx, Id min_thread_id, Id clamp, Id not_seg_mask) { | ||||
|     return ctx.OpBitwiseOr(ctx.U32[1], min_thread_id, | ||||
|                            ctx.OpBitwiseAnd(ctx.U32[1], clamp, not_seg_mask)); | ||||
| } | ||||
| 
 | ||||
| Id GetMaxThreadId(EmitContext& ctx, Id thread_id, Id clamp, Id segmentation_mask) { | ||||
|     const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | ||||
|     const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | ||||
|     return ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask); | ||||
| } | ||||
| 
 | ||||
| Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { | ||||
|     return ctx.OpSelect(ctx.U32[1], in_range, | ||||
|                         ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| Id EmitVoteAll(EmitContext& ctx, Id pred) { | ||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||
|         return ctx.OpSubgroupAllKHR(ctx.U1, pred); | ||||
|     } | ||||
|     const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||||
|     const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||||
|     const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||||
|     const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||||
|     return ctx.OpIEqual(ctx.U1, lhs, active_mask); | ||||
| } | ||||
| 
 | ||||
| Id EmitVoteAny(EmitContext& ctx, Id pred) { | ||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||
|         return ctx.OpSubgroupAnyKHR(ctx.U1, pred); | ||||
|     } | ||||
|     const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||||
|     const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||||
|     const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||||
|     const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||||
|     return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); | ||||
| } | ||||
| 
 | ||||
| Id EmitVoteEqual(EmitContext& ctx, Id pred) { | ||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||
|         return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); | ||||
|     } | ||||
|     const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||||
|     const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||||
|     const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||||
|     const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; | ||||
|     return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), | ||||
|                            ctx.OpIEqual(ctx.U1, lhs, active_mask)); | ||||
| } | ||||
| 
 | ||||
| Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { | ||||
|     const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; | ||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||
|         return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); | ||||
|     } | ||||
|     return LargeWarpBallot(ctx, ballot); | ||||
| } | ||||
| 
 | ||||
| Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||
|                     Id segmentation_mask) { | ||||
|     const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | ||||
|     const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||||
|     const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | ||||
|     const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; | ||||
| 
 | ||||
|     const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; | ||||
|     const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; | ||||
|     const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||||
| 
 | ||||
|     SetInBoundsFlag(inst, in_range); | ||||
|     return SelectValue(ctx, in_range, value, src_thread_id); | ||||
| } | ||||
| 
 | ||||
| Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||
|                  Id segmentation_mask) { | ||||
|     const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||||
|     const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||||
|     const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; | ||||
|     const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||||
| 
 | ||||
|     SetInBoundsFlag(inst, in_range); | ||||
|     return SelectValue(ctx, in_range, value, src_thread_id); | ||||
| } | ||||
| 
 | ||||
| Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||
|                    Id segmentation_mask) { | ||||
|     const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||||
|     const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||||
|     const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; | ||||
|     const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||||
| 
 | ||||
|     SetInBoundsFlag(inst, in_range); | ||||
|     return SelectValue(ctx, in_range, value, src_thread_id); | ||||
| } | ||||
| 
 | ||||
| Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||
|                         Id segmentation_mask) { | ||||
|     const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||||
|     const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||||
|     const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; | ||||
|     const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||||
| 
 | ||||
|     SetInBoundsFlag(inst, in_range); | ||||
|     return SelectValue(ctx, in_range, value, src_thread_id); | ||||
| } | ||||
| 
 | ||||
| } // namespace Shader::Backend::SPIRV
 | ||||
| @ -374,6 +374,10 @@ U1 IREmitter::GetSparseFromOp(const Value& op) { | ||||
|     return Inst<U1>(Opcode::GetSparseFromOp, op); | ||||
| } | ||||
| 
 | ||||
| U1 IREmitter::GetInBoundsFromOp(const Value& op) { | ||||
|     return Inst<U1>(Opcode::GetInBoundsFromOp, op); | ||||
| } | ||||
| 
 | ||||
| F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { | ||||
|     if (a.Type() != b.Type()) { | ||||
|         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | ||||
| @ -1486,4 +1490,23 @@ U32 IREmitter::SubgroupBallot(const U1& value) { | ||||
|     return Inst<U32>(Opcode::SubgroupBallot, value); | ||||
| } | ||||
| 
 | ||||
| U32 IREmitter::ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||
|                             const IR::U32& seg_mask) { | ||||
|     return Inst<U32>(Opcode::ShuffleIndex, value, index, clamp, seg_mask); | ||||
| } | ||||
| 
 | ||||
| U32 IREmitter::ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||
|                          const IR::U32& seg_mask) { | ||||
|     return Inst<U32>(Opcode::ShuffleUp, value, index, clamp, seg_mask); | ||||
| } | ||||
| 
 | ||||
| U32 IREmitter::ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||
|                            const IR::U32& seg_mask) { | ||||
|     return Inst<U32>(Opcode::ShuffleDown, value, index, clamp, seg_mask); | ||||
| } | ||||
| 
 | ||||
| U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||
|                                 const IR::U32& seg_mask) { | ||||
|     return Inst<U32>(Opcode::ShuffleButterfly, value, index, clamp, seg_mask); | ||||
| } | ||||
| } // namespace Shader::IR
 | ||||
|  | ||||
| @ -104,6 +104,7 @@ public: | ||||
|     [[nodiscard]] U1 GetCarryFromOp(const Value& op); | ||||
|     [[nodiscard]] U1 GetOverflowFromOp(const Value& op); | ||||
|     [[nodiscard]] U1 GetSparseFromOp(const Value& op); | ||||
|     [[nodiscard]] U1 GetInBoundsFromOp(const Value& op); | ||||
| 
 | ||||
|     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); | ||||
|     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); | ||||
| @ -147,7 +148,8 @@ public: | ||||
|     [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); | ||||
|     [[nodiscard]] F32 FPSqrt(const F32& value); | ||||
|     [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); | ||||
|     [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, const F16F32F64& max_value); | ||||
|     [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, | ||||
|                                     const F16F32F64& max_value); | ||||
|     [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); | ||||
|     [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); | ||||
|     [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); | ||||
| @ -242,6 +244,14 @@ public: | ||||
|     [[nodiscard]] U1 VoteAny(const U1& value); | ||||
|     [[nodiscard]] U1 VoteEqual(const U1& value); | ||||
|     [[nodiscard]] U32 SubgroupBallot(const U1& value); | ||||
|     [[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||
|                                    const IR::U32& seg_mask); | ||||
|     [[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||
|                                 const IR::U32& seg_mask); | ||||
|     [[nodiscard]] U32 ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||
|                                   const IR::U32& seg_mask); | ||||
|     [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index, | ||||
|                                        const IR::U32& clamp, const IR::U32& seg_mask); | ||||
| 
 | ||||
| private: | ||||
|     IR::Block::iterator insertion_point; | ||||
|  | ||||
| @ -89,6 +89,7 @@ bool Inst::IsPseudoInstruction() const noexcept { | ||||
|     case Opcode::GetCarryFromOp: | ||||
|     case Opcode::GetOverflowFromOp: | ||||
|     case Opcode::GetSparseFromOp: | ||||
|     case Opcode::GetInBoundsFromOp: | ||||
|         return true; | ||||
|     default: | ||||
|         return false; | ||||
| @ -123,6 +124,9 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { | ||||
|     case Opcode::GetSparseFromOp: | ||||
|         CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp); | ||||
|         return associated_insts->sparse_inst; | ||||
|     case Opcode::GetInBoundsFromOp: | ||||
|         CheckPseudoInstruction(associated_insts->in_bounds_inst, Opcode::GetInBoundsFromOp); | ||||
|         return associated_insts->in_bounds_inst; | ||||
|     default: | ||||
|         throw InvalidArgument("{} is not a pseudo-instruction", opcode); | ||||
|     } | ||||
| @ -262,6 +266,10 @@ void Inst::Use(const Value& value) { | ||||
|         AllocAssociatedInsts(assoc_inst); | ||||
|         SetPseudoInstruction(assoc_inst->sparse_inst, this); | ||||
|         break; | ||||
|     case Opcode::GetInBoundsFromOp: | ||||
|         AllocAssociatedInsts(assoc_inst); | ||||
|         SetPseudoInstruction(assoc_inst->in_bounds_inst, this); | ||||
|         break; | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
| @ -289,6 +297,10 @@ void Inst::UndoUse(const Value& value) { | ||||
|         AllocAssociatedInsts(assoc_inst); | ||||
|         RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); | ||||
|         break; | ||||
|     case Opcode::GetInBoundsFromOp: | ||||
|         AllocAssociatedInsts(assoc_inst); | ||||
|         RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp); | ||||
|         break; | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
|  | ||||
| @ -134,6 +134,7 @@ static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); | ||||
| 
 | ||||
| struct AssociatedInsts { | ||||
|     union { | ||||
|         Inst* in_bounds_inst; | ||||
|         Inst* sparse_inst; | ||||
|         Inst* zero_inst{}; | ||||
|     }; | ||||
|  | ||||
| @ -159,6 +159,7 @@ OPCODE(GetSignFromOp,                                       U1,             Opaq | ||||
| OPCODE(GetCarryFromOp,                                      U1,             Opaque,                                                                         ) | ||||
| OPCODE(GetOverflowFromOp,                                   U1,             Opaque,                                                                         ) | ||||
| OPCODE(GetSparseFromOp,                                     U1,             Opaque,                                                                         ) | ||||
| OPCODE(GetInBoundsFromOp,                                   U1,             Opaque,                                                                         ) | ||||
| 
 | ||||
| // Floating-point operations
 | ||||
| OPCODE(FPAbs16,                                             F16,            F16,                                                                            ) | ||||
| @ -363,8 +364,12 @@ OPCODE(ImageSampleExplicitLod,                              F32x4,          U32, | ||||
| OPCODE(ImageSampleDrefImplicitLod,                          F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         ) | ||||
| OPCODE(ImageSampleDrefExplicitLod,                          F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         ) | ||||
| 
 | ||||
| // Vote operations
 | ||||
| // Warp operations
 | ||||
| OPCODE(VoteAll,                                             U1,             U1,                                                                             ) | ||||
| OPCODE(VoteAny,                                             U1,             U1,                                                                             ) | ||||
| OPCODE(VoteEqual,                                           U1,             U1,                                                                             ) | ||||
| OPCODE(SubgroupBallot,                                      U32,            U1,                                                                             ) | ||||
| OPCODE(ShuffleIndex,                                        U32,            U32,            U32,            U32,            U32,                            ) | ||||
| OPCODE(ShuffleUp,                                           U32,            U32,            U32,            U32,            U32,                            ) | ||||
| OPCODE(ShuffleDown,                                         U32,            U32,            U32,            U32,            U32,                            ) | ||||
| OPCODE(ShuffleButterfly,                                    U32,            U32,            U32,            U32,            U32,                            ) | ||||
|  | ||||
| @ -53,8 +53,8 @@ void TranslatorVisitor::ISCADD_reg(u64 insn) { | ||||
|     ISCADD(*this, insn, GetReg20(insn)); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::ISCADD_cbuf(u64) { | ||||
|     throw NotImplementedException("ISCADD (cbuf)"); | ||||
| void TranslatorVisitor::ISCADD_cbuf(u64 insn) { | ||||
|     ISCADD(*this, insn, GetCbuf(insn)); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::ISCADD_imm(u64 insn) { | ||||
|  | ||||
| @ -301,10 +301,6 @@ void TranslatorVisitor::SETLMEMBASE(u64) { | ||||
|     ThrowNotImplemented(Opcode::SETLMEMBASE); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::SHFL(u64) { | ||||
|     ThrowNotImplemented(Opcode::SHFL); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::SSY() { | ||||
|     // SSY is a no-op
 | ||||
| } | ||||
|  | ||||
| @ -0,0 +1,69 @@ | ||||
| // Copyright 2021 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <optional> | ||||
| 
 | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
| 
 | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
| enum class ShuffleMode : u64 { | ||||
|     IDX, | ||||
|     UP, | ||||
|     DOWN, | ||||
|     BFLY, | ||||
| }; | ||||
| 
 | ||||
| [[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value, | ||||
|                                        const IR::U32& index, const IR::U32& mask, | ||||
|                                        ShuffleMode shfl_op) { | ||||
|     const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))}; | ||||
|     const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))}; | ||||
|     switch (shfl_op) { | ||||
|     case ShuffleMode::IDX: | ||||
|         return ir.ShuffleIndex(value, index, clamp, seg_mask); | ||||
|     case ShuffleMode::UP: | ||||
|         return ir.ShuffleUp(value, index, clamp, seg_mask); | ||||
|     case ShuffleMode::DOWN: | ||||
|         return ir.ShuffleDown(value, index, clamp, seg_mask); | ||||
|     case ShuffleMode::BFLY: | ||||
|         return ir.ShuffleButterfly(value, index, clamp, seg_mask); | ||||
|     default: | ||||
|         throw NotImplementedException("Invalid SHFL op {}", shfl_op); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) { | ||||
|     union { | ||||
|         u64 insn; | ||||
|         BitField<0, 8, IR::Reg> dest_reg; | ||||
|         BitField<8, 8, IR::Reg> src_reg; | ||||
|         BitField<30, 2, ShuffleMode> mode; | ||||
|         BitField<48, 3, IR::Pred> pred; | ||||
|     } const shfl{insn}; | ||||
| 
 | ||||
|     const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)}; | ||||
|     v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result)); | ||||
|     v.X(shfl.dest_reg, result); | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| void TranslatorVisitor::SHFL(u64 insn) { | ||||
|     union { | ||||
|         u64 insn; | ||||
|         BitField<20, 5, u64> src_a_imm; | ||||
|         BitField<28, 1, u64> src_a_flag; | ||||
|         BitField<29, 1, u64> src_b_flag; | ||||
|         BitField<34, 13, u64> src_b_imm; | ||||
|     } const flags{insn}; | ||||
|     const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm)) | ||||
|                                               : GetReg20(insn)}; | ||||
|     const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm)) | ||||
|                                               : GetReg39(insn)}; | ||||
|     Shuffle(*this, insn, src_a, src_b); | ||||
| } | ||||
| 
 | ||||
| } // namespace Shader::Maxwell
 | ||||
| @ -307,6 +307,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { | ||||
|     case IR::Opcode::LocalInvocationId: | ||||
|         info.uses_local_invocation_id = true; | ||||
|         break; | ||||
|     case IR::Opcode::ShuffleIndex: | ||||
|     case IR::Opcode::ShuffleUp: | ||||
|     case IR::Opcode::ShuffleDown: | ||||
|     case IR::Opcode::ShuffleButterfly: | ||||
|         info.uses_subgroup_invocation_id = true; | ||||
|         break; | ||||
|     case IR::Opcode::GetCbufU8: | ||||
|     case IR::Opcode::GetCbufS8: | ||||
|     case IR::Opcode::GetCbufU16: | ||||
|  | ||||
| @ -56,6 +56,7 @@ struct Info { | ||||
| 
 | ||||
|     bool uses_workgroup_id{}; | ||||
|     bool uses_local_invocation_id{}; | ||||
|     bool uses_subgroup_invocation_id{}; | ||||
| 
 | ||||
|     std::array<bool, 32> loads_generics{}; | ||||
|     bool loads_position{}; | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user