mirror of
				https://git.suyu.dev/suyu/suyu.git
				synced 2025-10-25 20:06:58 +08:00 
			
		
		
		
	shader: Implement SHFL
This commit is contained in:
		
							parent
							
								
									49e87ea8ab
								
							
						
					
					
						commit
						32c5483beb
					
				| @ -16,7 +16,7 @@ add_library(shader_recompiler STATIC | |||||||
|     backend/spirv/emit_spirv_select.cpp |     backend/spirv/emit_spirv_select.cpp | ||||||
|     backend/spirv/emit_spirv_special.cpp |     backend/spirv/emit_spirv_special.cpp | ||||||
|     backend/spirv/emit_spirv_undefined.cpp |     backend/spirv/emit_spirv_undefined.cpp | ||||||
|     backend/spirv/emit_spirv_vote.cpp |     backend/spirv/emit_spirv_warp.cpp | ||||||
|     environment.h |     environment.h | ||||||
|     exception.h |     exception.h | ||||||
|     file_environment.cpp |     file_environment.cpp | ||||||
| @ -125,6 +125,7 @@ add_library(shader_recompiler STATIC | |||||||
|     frontend/maxwell/translate/impl/texture_fetch.cpp |     frontend/maxwell/translate/impl/texture_fetch.cpp | ||||||
|     frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp |     frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp | ||||||
|     frontend/maxwell/translate/impl/vote.cpp |     frontend/maxwell/translate/impl/vote.cpp | ||||||
|  |     frontend/maxwell/translate/impl/warp_shuffle.cpp | ||||||
|     frontend/maxwell/translate/translate.cpp |     frontend/maxwell/translate/translate.cpp | ||||||
|     frontend/maxwell/translate/translate.h |     frontend/maxwell/translate/translate.h | ||||||
|     ir_opt/collect_shader_info_pass.cpp |     ir_opt/collect_shader_info_pass.cpp | ||||||
|  | |||||||
| @ -274,7 +274,8 @@ void EmitContext::DefineInputs(const Info& info) { | |||||||
|     if (info.uses_local_invocation_id) { |     if (info.uses_local_invocation_id) { | ||||||
|         local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); |         local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); | ||||||
|     } |     } | ||||||
|     if (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote) { |     if (info.uses_subgroup_invocation_id || | ||||||
|  |         (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote)) { | ||||||
|         subgroup_local_invocation_id = |         subgroup_local_invocation_id = | ||||||
|             DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); |             DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); | ||||||
|     } |     } | ||||||
|  | |||||||
| @ -224,7 +224,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct | |||||||
|         ctx.AddExtension("SPV_KHR_shader_draw_parameters"); |         ctx.AddExtension("SPV_KHR_shader_draw_parameters"); | ||||||
|         ctx.AddCapability(spv::Capability::DrawParameters); |         ctx.AddCapability(spv::Capability::DrawParameters); | ||||||
|     } |     } | ||||||
|     if (info.uses_subgroup_vote && profile.support_vote) { |     if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id) && profile.support_vote) { | ||||||
|         ctx.AddExtension("SPV_KHR_shader_ballot"); |         ctx.AddExtension("SPV_KHR_shader_ballot"); | ||||||
|         ctx.AddCapability(spv::Capability::SubgroupBallotKHR); |         ctx.AddCapability(spv::Capability::SubgroupBallotKHR); | ||||||
|         if (!profile.warp_size_potentially_larger_than_guest) { |         if (!profile.warp_size_potentially_larger_than_guest) { | ||||||
| @ -315,4 +315,8 @@ void EmitGetSparseFromOp(EmitContext&) { | |||||||
|     throw LogicError("Unreachable instruction"); |     throw LogicError("Unreachable instruction"); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void EmitGetInBoundsFromOp(EmitContext&) { | ||||||
|  |     throw LogicError("Unreachable instruction"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // namespace Shader::Backend::SPIRV
 | } // namespace Shader::Backend::SPIRV
 | ||||||
|  | |||||||
| @ -158,6 +158,7 @@ void EmitGetSignFromOp(EmitContext& ctx); | |||||||
| void EmitGetCarryFromOp(EmitContext& ctx); | void EmitGetCarryFromOp(EmitContext& ctx); | ||||||
| void EmitGetOverflowFromOp(EmitContext& ctx); | void EmitGetOverflowFromOp(EmitContext& ctx); | ||||||
| void EmitGetSparseFromOp(EmitContext& ctx); | void EmitGetSparseFromOp(EmitContext& ctx); | ||||||
|  | void EmitGetInBoundsFromOp(EmitContext& ctx); | ||||||
| Id EmitFPAbs16(EmitContext& ctx, Id value); | Id EmitFPAbs16(EmitContext& ctx, Id value); | ||||||
| Id EmitFPAbs32(EmitContext& ctx, Id value); | Id EmitFPAbs32(EmitContext& ctx, Id value); | ||||||
| Id EmitFPAbs64(EmitContext& ctx, Id value); | Id EmitFPAbs64(EmitContext& ctx, Id value); | ||||||
| @ -355,5 +356,13 @@ Id EmitVoteAll(EmitContext& ctx, Id pred); | |||||||
| Id EmitVoteAny(EmitContext& ctx, Id pred); | Id EmitVoteAny(EmitContext& ctx, Id pred); | ||||||
| Id EmitVoteEqual(EmitContext& ctx, Id pred); | Id EmitVoteEqual(EmitContext& ctx, Id pred); | ||||||
| Id EmitSubgroupBallot(EmitContext& ctx, Id pred); | Id EmitSubgroupBallot(EmitContext& ctx, Id pred); | ||||||
|  | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||||
|  |                     Id segmentation_mask); | ||||||
|  | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||||
|  |                  Id segmentation_mask); | ||||||
|  | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||||
|  |                    Id segmentation_mask); | ||||||
|  | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||||
|  |                         Id segmentation_mask); | ||||||
| 
 | 
 | ||||||
| } // namespace Shader::Backend::SPIRV
 | } // namespace Shader::Backend::SPIRV
 | ||||||
|  | |||||||
| @ -1,58 +0,0 @@ | |||||||
| // Copyright 2021 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "shader_recompiler/backend/spirv/emit_spirv.h" |  | ||||||
| 
 |  | ||||||
| namespace Shader::Backend::SPIRV { |  | ||||||
| namespace { |  | ||||||
| Id LargeWarpBallot(EmitContext& ctx, Id ballot) { |  | ||||||
|     const Id shift{ctx.Constant(ctx.U32[1], 5)}; |  | ||||||
|     const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; |  | ||||||
|     return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index); |  | ||||||
| } |  | ||||||
| } // Anonymous namespace
 |  | ||||||
| 
 |  | ||||||
| Id EmitVoteAll(EmitContext& ctx, Id pred) { |  | ||||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { |  | ||||||
|         return ctx.OpSubgroupAllKHR(ctx.U1, pred); |  | ||||||
|     } |  | ||||||
|     const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; |  | ||||||
|     const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; |  | ||||||
|     const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; |  | ||||||
|     const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; |  | ||||||
|     return ctx.OpIEqual(ctx.U1, lhs, active_mask); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Id EmitVoteAny(EmitContext& ctx, Id pred) { |  | ||||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { |  | ||||||
|         return ctx.OpSubgroupAnyKHR(ctx.U1, pred); |  | ||||||
|     } |  | ||||||
|     const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; |  | ||||||
|     const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; |  | ||||||
|     const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; |  | ||||||
|     const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; |  | ||||||
|     return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Id EmitVoteEqual(EmitContext& ctx, Id pred) { |  | ||||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { |  | ||||||
|         return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); |  | ||||||
|     } |  | ||||||
|     const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; |  | ||||||
|     const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; |  | ||||||
|     const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; |  | ||||||
|     const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; |  | ||||||
|     return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), |  | ||||||
|                            ctx.OpIEqual(ctx.U1, lhs, active_mask)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { |  | ||||||
|     const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; |  | ||||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { |  | ||||||
|         return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); |  | ||||||
|     } |  | ||||||
|     return LargeWarpBallot(ctx, ballot); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace Shader::Backend::SPIRV
 |  | ||||||
							
								
								
									
										135
									
								
								src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										135
									
								
								src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,135 @@ | |||||||
|  | // Copyright 2021 yuzu Emulator Project
 | ||||||
|  | // Licensed under GPLv2 or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||||||
|  | 
 | ||||||
|  | namespace Shader::Backend::SPIRV { | ||||||
|  | namespace { | ||||||
|  | Id LargeWarpBallot(EmitContext& ctx, Id ballot) { | ||||||
|  |     const Id shift{ctx.Constant(ctx.U32[1], 5)}; | ||||||
|  |     const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||||||
|  |     return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void SetInBoundsFlag(IR::Inst* inst, Id result) { | ||||||
|  |     IR::Inst* const in_bounds{inst->GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; | ||||||
|  |     if (!in_bounds) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     in_bounds->SetDefinition(result); | ||||||
|  |     in_bounds->Invalidate(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id ComputeMinThreadId(EmitContext& ctx, Id thread_id, Id segmentation_mask) { | ||||||
|  |     return ctx.OpBitwiseAnd(ctx.U32[1], thread_id, segmentation_mask); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id ComputeMaxThreadId(EmitContext& ctx, Id min_thread_id, Id clamp, Id not_seg_mask) { | ||||||
|  |     return ctx.OpBitwiseOr(ctx.U32[1], min_thread_id, | ||||||
|  |                            ctx.OpBitwiseAnd(ctx.U32[1], clamp, not_seg_mask)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id GetMaxThreadId(EmitContext& ctx, Id thread_id, Id clamp, Id segmentation_mask) { | ||||||
|  |     const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | ||||||
|  |     const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | ||||||
|  |     return ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { | ||||||
|  |     return ctx.OpSelect(ctx.U32[1], in_range, | ||||||
|  |                         ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); | ||||||
|  | } | ||||||
|  | } // Anonymous namespace
 | ||||||
|  | 
 | ||||||
|  | Id EmitVoteAll(EmitContext& ctx, Id pred) { | ||||||
|  |     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||||
|  |         return ctx.OpSubgroupAllKHR(ctx.U1, pred); | ||||||
|  |     } | ||||||
|  |     const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||||||
|  |     const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||||||
|  |     const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||||||
|  |     const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||||||
|  |     return ctx.OpIEqual(ctx.U1, lhs, active_mask); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitVoteAny(EmitContext& ctx, Id pred) { | ||||||
|  |     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||||
|  |         return ctx.OpSubgroupAnyKHR(ctx.U1, pred); | ||||||
|  |     } | ||||||
|  |     const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||||||
|  |     const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||||||
|  |     const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||||||
|  |     const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||||||
|  |     return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitVoteEqual(EmitContext& ctx, Id pred) { | ||||||
|  |     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||||
|  |         return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); | ||||||
|  |     } | ||||||
|  |     const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||||||
|  |     const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||||||
|  |     const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||||||
|  |     const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; | ||||||
|  |     return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), | ||||||
|  |                            ctx.OpIEqual(ctx.U1, lhs, active_mask)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { | ||||||
|  |     const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; | ||||||
|  |     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||||
|  |         return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); | ||||||
|  |     } | ||||||
|  |     return LargeWarpBallot(ctx, ballot); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||||
|  |                     Id segmentation_mask) { | ||||||
|  |     const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | ||||||
|  |     const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||||||
|  |     const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | ||||||
|  |     const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; | ||||||
|  | 
 | ||||||
|  |     const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; | ||||||
|  |     const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; | ||||||
|  |     const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||||||
|  | 
 | ||||||
|  |     SetInBoundsFlag(inst, in_range); | ||||||
|  |     return SelectValue(ctx, in_range, value, src_thread_id); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||||
|  |                  Id segmentation_mask) { | ||||||
|  |     const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||||||
|  |     const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||||||
|  |     const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; | ||||||
|  |     const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||||||
|  | 
 | ||||||
|  |     SetInBoundsFlag(inst, in_range); | ||||||
|  |     return SelectValue(ctx, in_range, value, src_thread_id); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||||
|  |                    Id segmentation_mask) { | ||||||
|  |     const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||||||
|  |     const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||||||
|  |     const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; | ||||||
|  |     const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||||||
|  | 
 | ||||||
|  |     SetInBoundsFlag(inst, in_range); | ||||||
|  |     return SelectValue(ctx, in_range, value, src_thread_id); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||||
|  |                         Id segmentation_mask) { | ||||||
|  |     const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||||||
|  |     const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||||||
|  |     const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; | ||||||
|  |     const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||||||
|  | 
 | ||||||
|  |     SetInBoundsFlag(inst, in_range); | ||||||
|  |     return SelectValue(ctx, in_range, value, src_thread_id); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace Shader::Backend::SPIRV
 | ||||||
| @ -374,6 +374,10 @@ U1 IREmitter::GetSparseFromOp(const Value& op) { | |||||||
|     return Inst<U1>(Opcode::GetSparseFromOp, op); |     return Inst<U1>(Opcode::GetSparseFromOp, op); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | U1 IREmitter::GetInBoundsFromOp(const Value& op) { | ||||||
|  |     return Inst<U1>(Opcode::GetInBoundsFromOp, op); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { | F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { | ||||||
|     if (a.Type() != b.Type()) { |     if (a.Type() != b.Type()) { | ||||||
|         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); |         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | ||||||
| @ -1486,4 +1490,23 @@ U32 IREmitter::SubgroupBallot(const U1& value) { | |||||||
|     return Inst<U32>(Opcode::SubgroupBallot, value); |     return Inst<U32>(Opcode::SubgroupBallot, value); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | U32 IREmitter::ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||||
|  |                             const IR::U32& seg_mask) { | ||||||
|  |     return Inst<U32>(Opcode::ShuffleIndex, value, index, clamp, seg_mask); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | U32 IREmitter::ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||||
|  |                          const IR::U32& seg_mask) { | ||||||
|  |     return Inst<U32>(Opcode::ShuffleUp, value, index, clamp, seg_mask); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | U32 IREmitter::ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||||
|  |                            const IR::U32& seg_mask) { | ||||||
|  |     return Inst<U32>(Opcode::ShuffleDown, value, index, clamp, seg_mask); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||||
|  |                                 const IR::U32& seg_mask) { | ||||||
|  |     return Inst<U32>(Opcode::ShuffleButterfly, value, index, clamp, seg_mask); | ||||||
|  | } | ||||||
| } // namespace Shader::IR
 | } // namespace Shader::IR
 | ||||||
|  | |||||||
| @ -104,6 +104,7 @@ public: | |||||||
|     [[nodiscard]] U1 GetCarryFromOp(const Value& op); |     [[nodiscard]] U1 GetCarryFromOp(const Value& op); | ||||||
|     [[nodiscard]] U1 GetOverflowFromOp(const Value& op); |     [[nodiscard]] U1 GetOverflowFromOp(const Value& op); | ||||||
|     [[nodiscard]] U1 GetSparseFromOp(const Value& op); |     [[nodiscard]] U1 GetSparseFromOp(const Value& op); | ||||||
|  |     [[nodiscard]] U1 GetInBoundsFromOp(const Value& op); | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); |     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); | ||||||
|     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); |     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); | ||||||
| @ -147,7 +148,8 @@ public: | |||||||
|     [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); |     [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); | ||||||
|     [[nodiscard]] F32 FPSqrt(const F32& value); |     [[nodiscard]] F32 FPSqrt(const F32& value); | ||||||
|     [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); |     [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); | ||||||
|     [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, const F16F32F64& max_value); |     [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, | ||||||
|  |                                     const F16F32F64& max_value); | ||||||
|     [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); |     [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); | ||||||
|     [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); |     [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); | ||||||
|     [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); |     [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); | ||||||
| @ -242,6 +244,14 @@ public: | |||||||
|     [[nodiscard]] U1 VoteAny(const U1& value); |     [[nodiscard]] U1 VoteAny(const U1& value); | ||||||
|     [[nodiscard]] U1 VoteEqual(const U1& value); |     [[nodiscard]] U1 VoteEqual(const U1& value); | ||||||
|     [[nodiscard]] U32 SubgroupBallot(const U1& value); |     [[nodiscard]] U32 SubgroupBallot(const U1& value); | ||||||
|  |     [[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||||
|  |                                    const IR::U32& seg_mask); | ||||||
|  |     [[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||||
|  |                                 const IR::U32& seg_mask); | ||||||
|  |     [[nodiscard]] U32 ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||||
|  |                                   const IR::U32& seg_mask); | ||||||
|  |     [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index, | ||||||
|  |                                        const IR::U32& clamp, const IR::U32& seg_mask); | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     IR::Block::iterator insertion_point; |     IR::Block::iterator insertion_point; | ||||||
|  | |||||||
| @ -89,6 +89,7 @@ bool Inst::IsPseudoInstruction() const noexcept { | |||||||
|     case Opcode::GetCarryFromOp: |     case Opcode::GetCarryFromOp: | ||||||
|     case Opcode::GetOverflowFromOp: |     case Opcode::GetOverflowFromOp: | ||||||
|     case Opcode::GetSparseFromOp: |     case Opcode::GetSparseFromOp: | ||||||
|  |     case Opcode::GetInBoundsFromOp: | ||||||
|         return true; |         return true; | ||||||
|     default: |     default: | ||||||
|         return false; |         return false; | ||||||
| @ -123,6 +124,9 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { | |||||||
|     case Opcode::GetSparseFromOp: |     case Opcode::GetSparseFromOp: | ||||||
|         CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp); |         CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp); | ||||||
|         return associated_insts->sparse_inst; |         return associated_insts->sparse_inst; | ||||||
|  |     case Opcode::GetInBoundsFromOp: | ||||||
|  |         CheckPseudoInstruction(associated_insts->in_bounds_inst, Opcode::GetInBoundsFromOp); | ||||||
|  |         return associated_insts->in_bounds_inst; | ||||||
|     default: |     default: | ||||||
|         throw InvalidArgument("{} is not a pseudo-instruction", opcode); |         throw InvalidArgument("{} is not a pseudo-instruction", opcode); | ||||||
|     } |     } | ||||||
| @ -262,6 +266,10 @@ void Inst::Use(const Value& value) { | |||||||
|         AllocAssociatedInsts(assoc_inst); |         AllocAssociatedInsts(assoc_inst); | ||||||
|         SetPseudoInstruction(assoc_inst->sparse_inst, this); |         SetPseudoInstruction(assoc_inst->sparse_inst, this); | ||||||
|         break; |         break; | ||||||
|  |     case Opcode::GetInBoundsFromOp: | ||||||
|  |         AllocAssociatedInsts(assoc_inst); | ||||||
|  |         SetPseudoInstruction(assoc_inst->in_bounds_inst, this); | ||||||
|  |         break; | ||||||
|     default: |     default: | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
| @ -289,6 +297,10 @@ void Inst::UndoUse(const Value& value) { | |||||||
|         AllocAssociatedInsts(assoc_inst); |         AllocAssociatedInsts(assoc_inst); | ||||||
|         RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); |         RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); | ||||||
|         break; |         break; | ||||||
|  |     case Opcode::GetInBoundsFromOp: | ||||||
|  |         AllocAssociatedInsts(assoc_inst); | ||||||
|  |         RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp); | ||||||
|  |         break; | ||||||
|     default: |     default: | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|  | |||||||
| @ -134,6 +134,7 @@ static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); | |||||||
| 
 | 
 | ||||||
| struct AssociatedInsts { | struct AssociatedInsts { | ||||||
|     union { |     union { | ||||||
|  |         Inst* in_bounds_inst; | ||||||
|         Inst* sparse_inst; |         Inst* sparse_inst; | ||||||
|         Inst* zero_inst{}; |         Inst* zero_inst{}; | ||||||
|     }; |     }; | ||||||
|  | |||||||
| @ -159,6 +159,7 @@ OPCODE(GetSignFromOp,                                       U1,             Opaq | |||||||
| OPCODE(GetCarryFromOp,                                      U1,             Opaque,                                                                         ) | OPCODE(GetCarryFromOp,                                      U1,             Opaque,                                                                         ) | ||||||
| OPCODE(GetOverflowFromOp,                                   U1,             Opaque,                                                                         ) | OPCODE(GetOverflowFromOp,                                   U1,             Opaque,                                                                         ) | ||||||
| OPCODE(GetSparseFromOp,                                     U1,             Opaque,                                                                         ) | OPCODE(GetSparseFromOp,                                     U1,             Opaque,                                                                         ) | ||||||
|  | OPCODE(GetInBoundsFromOp,                                   U1,             Opaque,                                                                         ) | ||||||
| 
 | 
 | ||||||
| // Floating-point operations
 | // Floating-point operations
 | ||||||
| OPCODE(FPAbs16,                                             F16,            F16,                                                                            ) | OPCODE(FPAbs16,                                             F16,            F16,                                                                            ) | ||||||
| @ -363,8 +364,12 @@ OPCODE(ImageSampleExplicitLod,                              F32x4,          U32, | |||||||
| OPCODE(ImageSampleDrefImplicitLod,                          F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         ) | OPCODE(ImageSampleDrefImplicitLod,                          F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         ) | ||||||
| OPCODE(ImageSampleDrefExplicitLod,                          F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         ) | OPCODE(ImageSampleDrefExplicitLod,                          F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         ) | ||||||
| 
 | 
 | ||||||
| // Vote operations
 | // Warp operations
 | ||||||
| OPCODE(VoteAll,                                             U1,             U1,                                                                             ) | OPCODE(VoteAll,                                             U1,             U1,                                                                             ) | ||||||
| OPCODE(VoteAny,                                             U1,             U1,                                                                             ) | OPCODE(VoteAny,                                             U1,             U1,                                                                             ) | ||||||
| OPCODE(VoteEqual,                                           U1,             U1,                                                                             ) | OPCODE(VoteEqual,                                           U1,             U1,                                                                             ) | ||||||
| OPCODE(SubgroupBallot,                                      U32,            U1,                                                                             ) | OPCODE(SubgroupBallot,                                      U32,            U1,                                                                             ) | ||||||
|  | OPCODE(ShuffleIndex,                                        U32,            U32,            U32,            U32,            U32,                            ) | ||||||
|  | OPCODE(ShuffleUp,                                           U32,            U32,            U32,            U32,            U32,                            ) | ||||||
|  | OPCODE(ShuffleDown,                                         U32,            U32,            U32,            U32,            U32,                            ) | ||||||
|  | OPCODE(ShuffleButterfly,                                    U32,            U32,            U32,            U32,            U32,                            ) | ||||||
|  | |||||||
| @ -53,8 +53,8 @@ void TranslatorVisitor::ISCADD_reg(u64 insn) { | |||||||
|     ISCADD(*this, insn, GetReg20(insn)); |     ISCADD(*this, insn, GetReg20(insn)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void TranslatorVisitor::ISCADD_cbuf(u64) { | void TranslatorVisitor::ISCADD_cbuf(u64 insn) { | ||||||
|     throw NotImplementedException("ISCADD (cbuf)"); |     ISCADD(*this, insn, GetCbuf(insn)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void TranslatorVisitor::ISCADD_imm(u64 insn) { | void TranslatorVisitor::ISCADD_imm(u64 insn) { | ||||||
|  | |||||||
| @ -301,10 +301,6 @@ void TranslatorVisitor::SETLMEMBASE(u64) { | |||||||
|     ThrowNotImplemented(Opcode::SETLMEMBASE); |     ThrowNotImplemented(Opcode::SETLMEMBASE); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void TranslatorVisitor::SHFL(u64) { |  | ||||||
|     ThrowNotImplemented(Opcode::SHFL); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void TranslatorVisitor::SSY() { | void TranslatorVisitor::SSY() { | ||||||
|     // SSY is a no-op
 |     // SSY is a no-op
 | ||||||
| } | } | ||||||
|  | |||||||
| @ -0,0 +1,69 @@ | |||||||
|  | // Copyright 2021 yuzu Emulator Project
 | ||||||
|  | // Licensed under GPLv2 or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #include <optional> | ||||||
|  | 
 | ||||||
|  | #include "common/bit_field.h" | ||||||
|  | #include "common/common_types.h" | ||||||
|  | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||||
|  | 
 | ||||||
|  | namespace Shader::Maxwell { | ||||||
|  | namespace { | ||||||
|  | enum class ShuffleMode : u64 { | ||||||
|  |     IDX, | ||||||
|  |     UP, | ||||||
|  |     DOWN, | ||||||
|  |     BFLY, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | [[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value, | ||||||
|  |                                        const IR::U32& index, const IR::U32& mask, | ||||||
|  |                                        ShuffleMode shfl_op) { | ||||||
|  |     const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))}; | ||||||
|  |     const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))}; | ||||||
|  |     switch (shfl_op) { | ||||||
|  |     case ShuffleMode::IDX: | ||||||
|  |         return ir.ShuffleIndex(value, index, clamp, seg_mask); | ||||||
|  |     case ShuffleMode::UP: | ||||||
|  |         return ir.ShuffleUp(value, index, clamp, seg_mask); | ||||||
|  |     case ShuffleMode::DOWN: | ||||||
|  |         return ir.ShuffleDown(value, index, clamp, seg_mask); | ||||||
|  |     case ShuffleMode::BFLY: | ||||||
|  |         return ir.ShuffleButterfly(value, index, clamp, seg_mask); | ||||||
|  |     default: | ||||||
|  |         throw NotImplementedException("Invalid SHFL op {}", shfl_op); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) { | ||||||
|  |     union { | ||||||
|  |         u64 insn; | ||||||
|  |         BitField<0, 8, IR::Reg> dest_reg; | ||||||
|  |         BitField<8, 8, IR::Reg> src_reg; | ||||||
|  |         BitField<30, 2, ShuffleMode> mode; | ||||||
|  |         BitField<48, 3, IR::Pred> pred; | ||||||
|  |     } const shfl{insn}; | ||||||
|  | 
 | ||||||
|  |     const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)}; | ||||||
|  |     v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result)); | ||||||
|  |     v.X(shfl.dest_reg, result); | ||||||
|  | } | ||||||
|  | } // Anonymous namespace
 | ||||||
|  | 
 | ||||||
|  | void TranslatorVisitor::SHFL(u64 insn) { | ||||||
|  |     union { | ||||||
|  |         u64 insn; | ||||||
|  |         BitField<20, 5, u64> src_a_imm; | ||||||
|  |         BitField<28, 1, u64> src_a_flag; | ||||||
|  |         BitField<29, 1, u64> src_b_flag; | ||||||
|  |         BitField<34, 13, u64> src_b_imm; | ||||||
|  |     } const flags{insn}; | ||||||
|  |     const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm)) | ||||||
|  |                                               : GetReg20(insn)}; | ||||||
|  |     const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm)) | ||||||
|  |                                               : GetReg39(insn)}; | ||||||
|  |     Shuffle(*this, insn, src_a, src_b); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace Shader::Maxwell
 | ||||||
| @ -307,6 +307,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||||||
|     case IR::Opcode::LocalInvocationId: |     case IR::Opcode::LocalInvocationId: | ||||||
|         info.uses_local_invocation_id = true; |         info.uses_local_invocation_id = true; | ||||||
|         break; |         break; | ||||||
|  |     case IR::Opcode::ShuffleIndex: | ||||||
|  |     case IR::Opcode::ShuffleUp: | ||||||
|  |     case IR::Opcode::ShuffleDown: | ||||||
|  |     case IR::Opcode::ShuffleButterfly: | ||||||
|  |         info.uses_subgroup_invocation_id = true; | ||||||
|  |         break; | ||||||
|     case IR::Opcode::GetCbufU8: |     case IR::Opcode::GetCbufU8: | ||||||
|     case IR::Opcode::GetCbufS8: |     case IR::Opcode::GetCbufS8: | ||||||
|     case IR::Opcode::GetCbufU16: |     case IR::Opcode::GetCbufU16: | ||||||
|  | |||||||
| @ -56,6 +56,7 @@ struct Info { | |||||||
| 
 | 
 | ||||||
|     bool uses_workgroup_id{}; |     bool uses_workgroup_id{}; | ||||||
|     bool uses_local_invocation_id{}; |     bool uses_local_invocation_id{}; | ||||||
|  |     bool uses_subgroup_invocation_id{}; | ||||||
| 
 | 
 | ||||||
|     std::array<bool, 32> loads_generics{}; |     std::array<bool, 32> loads_generics{}; | ||||||
|     bool loads_position{}; |     bool loads_position{}; | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user