mirror of
				https://git.suyu.dev/suyu/suyu.git
				synced 2025-10-31 06:46:40 +08:00 
			
		
		
		
	shader: Implement TLD4.PTP
This commit is contained in:
		
							parent
							
								
									981eb6f43b
								
							
						
					
					
						commit
						742d11c2ad
					
				| @ -169,6 +169,7 @@ void EmitContext::DefineCommonTypes(const Info& info) { | |||||||
|         AddCapability(spv::Capability::Float64); |         AddCapability(spv::Capability::Float64); | ||||||
|         F64.Define(*this, TypeFloat(64), "f64"); |         F64.Define(*this, TypeFloat(64), "f64"); | ||||||
|     } |     } | ||||||
|  |     array_U32x2 = Name(TypeArray(U32[2], Constant(U32[1], 4U)), "array-u32x2"); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void EmitContext::DefineCommonConstants() { | void EmitContext::DefineCommonConstants() { | ||||||
|  | |||||||
| @ -65,6 +65,7 @@ public: | |||||||
|     VectorTypes U32; |     VectorTypes U32; | ||||||
|     VectorTypes F16; |     VectorTypes F16; | ||||||
|     VectorTypes F64; |     VectorTypes F64; | ||||||
|  |     Id array_U32x2; | ||||||
| 
 | 
 | ||||||
|     Id true_value{}; |     Id true_value{}; | ||||||
|     Id false_value{}; |     Id false_value{}; | ||||||
|  | |||||||
| @ -95,7 +95,7 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va | |||||||
|                         Id value); |                         Id value); | ||||||
| void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||||
|                          Id value); |                          Id value); | ||||||
| Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); | Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); | ||||||
| Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); | Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); | ||||||
| Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | ||||||
| Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); | Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); | ||||||
| @ -104,7 +104,7 @@ Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); | |||||||
| Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); | Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); | ||||||
| Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); | Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); | ||||||
| Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); | Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); | ||||||
| Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); | Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); | ||||||
| Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); | Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); | ||||||
| Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | ||||||
| Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); | Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); | ||||||
| @ -113,7 +113,7 @@ Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); | |||||||
| Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); | Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); | ||||||
| Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); | Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); | ||||||
| Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); | Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); | ||||||
| Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); | Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); | ||||||
| Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); | Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); | ||||||
| Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | ||||||
| Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); | Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); | ||||||
| @ -122,6 +122,7 @@ Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); | |||||||
| Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); | Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); | ||||||
| Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); | Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); | ||||||
| Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); | Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); | ||||||
|  | Id EmitCompositeConstructArrayU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); | ||||||
| void EmitCompositeConstructF64x2(EmitContext& ctx); | void EmitCompositeConstructF64x2(EmitContext& ctx); | ||||||
| void EmitCompositeConstructF64x3(EmitContext& ctx); | void EmitCompositeConstructF64x3(EmitContext& ctx); | ||||||
| void EmitCompositeConstructF64x4(EmitContext& ctx); | void EmitCompositeConstructF64x4(EmitContext& ctx); | ||||||
|  | |||||||
| @ -3,10 +3,15 @@ | |||||||
| // Refer to the license.txt file included.
 | // Refer to the license.txt file included.
 | ||||||
| 
 | 
 | ||||||
| #include "shader_recompiler/backend/spirv/emit_spirv.h" | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||||||
|  | #include "shader_recompiler/frontend/ir/modifiers.h" | ||||||
| 
 | 
 | ||||||
| namespace Shader::Backend::SPIRV { | namespace Shader::Backend::SPIRV { | ||||||
| 
 | 
 | ||||||
| Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) { | Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { | ||||||
|  |     const auto info{inst->Flags<IR::CompositeDecoration>()}; | ||||||
|  |     if (info.is_constant) { | ||||||
|  |         return ctx.ConstantComposite(ctx.U32[2], e1, e2); | ||||||
|  |     } | ||||||
|     return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2); |     return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -42,7 +47,12 @@ Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index | |||||||
|     return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); |     return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) { | Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { | ||||||
|  | 
 | ||||||
|  |     const auto info{inst->Flags<IR::CompositeDecoration>()}; | ||||||
|  |     if (info.is_constant) { | ||||||
|  |         return ctx.ConstantComposite(ctx.F16[2], e1, e2); | ||||||
|  |     } | ||||||
|     return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); |     return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -78,7 +88,11 @@ Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index | |||||||
|     return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); |     return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) { | Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { | ||||||
|  |     const auto info{inst->Flags<IR::CompositeDecoration>()}; | ||||||
|  |     if (info.is_constant) { | ||||||
|  |         return ctx.ConstantComposite(ctx.F32[2], e1, e2); | ||||||
|  |     } | ||||||
|     return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); |     return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -150,4 +164,15 @@ Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index | |||||||
|     return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); |     return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | Id EmitCompositeConstructArrayU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { | ||||||
|  |     const auto info{inst->Flags<IR::CompositeDecoration>()}; | ||||||
|  |     if (info.is_constant) { | ||||||
|  |         return ctx.ConstantComposite(ctx.array_U32x2, e1, e2, e3, e4); | ||||||
|  |     } | ||||||
|  |     if (ctx.profile.support_variadic_ptp) { | ||||||
|  |         return OpCompositeConstruct(ctx.array_U32x2, e1, e2, e3, e4); | ||||||
|  |     } | ||||||
|  |     return {}; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // namespace Shader::Backend::SPIRV
 | } // namespace Shader::Backend::SPIRV
 | ||||||
|  | |||||||
| @ -30,10 +30,13 @@ public: | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     explicit ImageOperands([[maybe_unused]] EmitContext& ctx, Id offset) { |     explicit ImageOperands([[maybe_unused]] EmitContext& ctx, Id offset, Id offset2) { | ||||||
|         if (Sirit::ValidId(offset)) { |         if (Sirit::ValidId(offset)) { | ||||||
|             Add(spv::ImageOperandsMask::Offset, offset); |             Add(spv::ImageOperandsMask::Offset, offset); | ||||||
|         } |         } | ||||||
|  |         if (Sirit::ValidId(offset2)) { | ||||||
|  |             Add(spv::ImageOperandsMask::ConstOffsets, offset2); | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     void Add(spv::ImageOperandsMask new_mask, Id value) { |     void Add(spv::ImageOperandsMask new_mask, Id value) { | ||||||
| @ -177,7 +180,7 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va | |||||||
| Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, | Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, | ||||||
|                    [[maybe_unused]] Id offset2) { |                    [[maybe_unused]] Id offset2) { | ||||||
|     const auto info{inst->Flags<IR::TextureInstInfo>()}; |     const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||||||
|     const ImageOperands operands(ctx, offset); |     const ImageOperands operands(ctx, offset, offset2); | ||||||
|     return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, |     return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, | ||||||
|                 ctx.F32[4], Texture(ctx, index), coords, |                 ctx.F32[4], Texture(ctx, index), coords, | ||||||
|                 ctx.Constant(ctx.U32[1], info.gather_component.Value()), operands.Mask(), |                 ctx.Constant(ctx.U32[1], info.gather_component.Value()), operands.Mask(), | ||||||
| @ -187,7 +190,7 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id | |||||||
| Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||||||
|                        Id offset, [[maybe_unused]] Id offset2, Id dref) { |                        Id offset, [[maybe_unused]] Id offset2, Id dref) { | ||||||
|     const auto info{inst->Flags<IR::TextureInstInfo>()}; |     const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||||||
|     const ImageOperands operands(ctx, offset); |     const ImageOperands operands(ctx, offset, offset2); | ||||||
|     return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, |     return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, | ||||||
|                 ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); |                 ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); | ||||||
| } | } | ||||||
|  | |||||||
| @ -398,15 +398,16 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) { | |||||||
|     if (e1.Type() != e2.Type()) { |     if (e1.Type() != e2.Type()) { | ||||||
|         throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); |         throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); | ||||||
|     } |     } | ||||||
|  |     CompositeDecoration decor{}; | ||||||
|     switch (e1.Type()) { |     switch (e1.Type()) { | ||||||
|     case Type::U32: |     case Type::U32: | ||||||
|         return Inst(Opcode::CompositeConstructU32x2, e1, e2); |         return Inst(Opcode::CompositeConstructU32x2, Flags{decor}, e1, e2); | ||||||
|     case Type::F16: |     case Type::F16: | ||||||
|         return Inst(Opcode::CompositeConstructF16x2, e1, e2); |         return Inst(Opcode::CompositeConstructF16x2, Flags{decor}, e1, e2); | ||||||
|     case Type::F32: |     case Type::F32: | ||||||
|         return Inst(Opcode::CompositeConstructF32x2, e1, e2); |         return Inst(Opcode::CompositeConstructF32x2, Flags{decor}, e1, e2); | ||||||
|     case Type::F64: |     case Type::F64: | ||||||
|         return Inst(Opcode::CompositeConstructF64x2, e1, e2); |         return Inst(Opcode::CompositeConstructF64x2, Flags{decor}, e1, e2); | ||||||
|     default: |     default: | ||||||
|         ThrowInvalidType(e1.Type()); |         ThrowInvalidType(e1.Type()); | ||||||
|     } |     } | ||||||
| @ -436,6 +437,7 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu | |||||||
|         throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), |         throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), | ||||||
|                               e3.Type(), e4.Type()); |                               e3.Type(), e4.Type()); | ||||||
|     } |     } | ||||||
|  |     CompositeDecoration decor{}; | ||||||
|     switch (e1.Type()) { |     switch (e1.Type()) { | ||||||
|     case Type::U32: |     case Type::U32: | ||||||
|         return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4); |         return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4); | ||||||
| @ -445,6 +447,8 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu | |||||||
|         return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4); |         return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4); | ||||||
|     case Type::F64: |     case Type::F64: | ||||||
|         return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4); |         return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4); | ||||||
|  |     case Type::U32x2: | ||||||
|  |         return Inst(Opcode::CompositeConstructArrayU32x2, Flags{decor}, e1, e2, e3, e4); | ||||||
|     default: |     default: | ||||||
|         ThrowInvalidType(e1.Type()); |         ThrowInvalidType(e1.Type()); | ||||||
|     } |     } | ||||||
| @ -1481,7 +1485,7 @@ Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Val | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, | Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, | ||||||
|                     const Value& offset2, const F32& dref, TextureInstInfo info) { |                                  const Value& offset2, const F32& dref, TextureInstInfo info) { | ||||||
|     const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGatherDref |     const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGatherDref | ||||||
|                                          : Opcode::BindlessImageGatherDref}; |                                          : Opcode::BindlessImageGatherDref}; | ||||||
|     return Inst(op, Flags{info}, handle, coords, offset, offset2, dref); |     return Inst(op, Flags{info}, handle, coords, offset, offset2, dref); | ||||||
|  | |||||||
| @ -99,6 +99,12 @@ public: | |||||||
|         return ret; |         return ret; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     template <typename FlagsType> | ||||||
|  |     requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) | ||||||
|  |         [[nodiscard]] void SetFlags(FlagsType& new_val) noexcept { | ||||||
|  |         std::memcpy(&flags, &new_val, sizeof(new_val)); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     /// Intrusively store the host definition of this instruction.
 |     /// Intrusively store the host definition of this instruction.
 | ||||||
|     template <typename DefinitionType> |     template <typename DefinitionType> | ||||||
|     void SetDefinition(DefinitionType def) { |     void SetDefinition(DefinitionType def) { | ||||||
|  | |||||||
| @ -32,6 +32,11 @@ struct FpControl { | |||||||
| }; | }; | ||||||
| static_assert(sizeof(FpControl) <= sizeof(u32)); | static_assert(sizeof(FpControl) <= sizeof(u32)); | ||||||
| 
 | 
 | ||||||
|  | struct CompositeDecoration { | ||||||
|  |     bool is_constant{false}; | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(CompositeDecoration) <= sizeof(u32)); | ||||||
|  | 
 | ||||||
| union TextureInstInfo { | union TextureInstInfo { | ||||||
|     u32 raw; |     u32 raw; | ||||||
|     BitField<0, 8, TextureType> type; |     BitField<0, 8, TextureType> type; | ||||||
|  | |||||||
| @ -126,6 +126,7 @@ OPCODE(CompositeExtractF64x4,                               F64,            F64x | |||||||
| OPCODE(CompositeInsertF64x2,                                F64x2,          F64x2,          F64,            U32,                                            ) | OPCODE(CompositeInsertF64x2,                                F64x2,          F64x2,          F64,            U32,                                            ) | ||||||
| OPCODE(CompositeInsertF64x3,                                F64x3,          F64x3,          F64,            U32,                                            ) | OPCODE(CompositeInsertF64x3,                                F64x3,          F64x3,          F64,            U32,                                            ) | ||||||
| OPCODE(CompositeInsertF64x4,                                F64x4,          F64x4,          F64,            U32,                                            ) | OPCODE(CompositeInsertF64x4,                                F64x4,          F64x4,          F64,            U32,                                            ) | ||||||
|  | OPCODE(CompositeConstructArrayU32x2, Opaque, U32x2, U32x2, U32x2, U32x2, ) | ||||||
| 
 | 
 | ||||||
| // Select operations
 | // Select operations
 | ||||||
| OPCODE(SelectU1,                                            U1,             U1,             U1,             U1,                                             ) | OPCODE(SelectU1,                                            U1,             U1,             U1,             U1,                                             ) | ||||||
|  | |||||||
| @ -44,6 +44,20 @@ bool Value::IsEmpty() const noexcept { | |||||||
|     return type == Type::Void; |     return type == Type::Void; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | bool Value::IsConstantContainer() const { | ||||||
|  |     if (IsImmediate()) { | ||||||
|  |         return true; | ||||||
|  |     } | ||||||
|  |     ValidateAccess(Type::Opaque); | ||||||
|  |     auto num_args = inst->NumArgs(); | ||||||
|  |     for (size_t i = 0; i < num_args; i++) { | ||||||
|  |         if (!inst->Arg(i).IsConstantContainer()) { | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| bool Value::IsImmediate() const noexcept { | bool Value::IsImmediate() const noexcept { | ||||||
|     if (IsIdentity()) { |     if (IsIdentity()) { | ||||||
|         return inst->Arg(0).IsImmediate(); |         return inst->Arg(0).IsImmediate(); | ||||||
|  | |||||||
| @ -38,6 +38,7 @@ public: | |||||||
|     [[nodiscard]] bool IsImmediate() const noexcept; |     [[nodiscard]] bool IsImmediate() const noexcept; | ||||||
|     [[nodiscard]] bool IsLabel() const noexcept; |     [[nodiscard]] bool IsLabel() const noexcept; | ||||||
|     [[nodiscard]] IR::Type Type() const noexcept; |     [[nodiscard]] IR::Type Type() const noexcept; | ||||||
|  |     [[nodiscard]] bool IsConstantContainer() const; | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] IR::Inst* Inst() const; |     [[nodiscard]] IR::Inst* Inst() const; | ||||||
|     [[nodiscard]] IR::Block* Label() const; |     [[nodiscard]] IR::Block* Label() const; | ||||||
|  | |||||||
| @ -101,16 +101,18 @@ IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | |||||||
|     switch (type) { |     switch (type) { | ||||||
|     case TextureType::_1D: |     case TextureType::_1D: | ||||||
|     case TextureType::ARRAY_1D: |     case TextureType::ARRAY_1D: | ||||||
|         return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); |         return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); | ||||||
|     case TextureType::_2D: |     case TextureType::_2D: | ||||||
|     case TextureType::ARRAY_2D: |     case TextureType::ARRAY_2D: | ||||||
|         return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), |         return v.ir.CompositeConstruct( | ||||||
|                                        v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); |             v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||||||
|  |             v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); | ||||||
|     case TextureType::_3D: |     case TextureType::_3D: | ||||||
|     case TextureType::ARRAY_3D: |     case TextureType::ARRAY_3D: | ||||||
|         return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), |         return v.ir.CompositeConstruct( | ||||||
|                                        v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), |             v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||||||
|                                        v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); |             v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), | ||||||
|  |             v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); | ||||||
|     case TextureType::CUBE: |     case TextureType::CUBE: | ||||||
|     case TextureType::ARRAY_CUBE: |     case TextureType::ARRAY_CUBE: | ||||||
|         throw NotImplementedException("Illegal offset on CUBE sample"); |         throw NotImplementedException("Illegal offset on CUBE sample"); | ||||||
|  | |||||||
| @ -106,17 +106,17 @@ IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | |||||||
|     throw NotImplementedException("Invalid texture type {}", type); |     throw NotImplementedException("Invalid texture type {}", type); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { | IR::Value MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { | ||||||
|     const IR::U32 value1{v.X(reg++)}; |     const IR::U32 value1{v.X(reg++)}; | ||||||
|     const IR::U32 value2{v.X(reg++)}; |     const IR::U32 value2{v.X(reg++)}; | ||||||
|     const auto getVector = ([&v](const IR::U32& value) { |     const IR::U32 bitsize = v.ir.Imm32(6); | ||||||
|  |     const auto getVector = ([&v, &bitsize](const IR::U32& value, u32 base) { | ||||||
|         return v.ir.CompositeConstruct( |         return v.ir.CompositeConstruct( | ||||||
|             v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), |             v.ir.BitFieldExtract(value, v.ir.Imm32(base + 0), bitsize, true), | ||||||
|             v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), |             v.ir.BitFieldExtract(value, v.ir.Imm32(base + 8), bitsize, true)); | ||||||
|             v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true), |  | ||||||
|             v.ir.BitFieldExtract(value, v.ir.Imm32(24), v.ir.Imm32(6), true)); |  | ||||||
|     }); |     }); | ||||||
|     return {getVector(value1), getVector(value2)}; |     return v.ir.CompositeConstruct(getVector(value1, 0), getVector(value1, 16), | ||||||
|  |                                    getVector(value2, 0), getVector(value2, 16)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, | void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, | ||||||
| @ -155,7 +155,7 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy | |||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case OffsetType::PTP: { |     case OffsetType::PTP: { | ||||||
|         std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg); |         offset2 = MakeOffsetPTP(v, meta_reg); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     default: |     default: | ||||||
|  | |||||||
| @ -355,6 +355,17 @@ void FoldBranchConditional(IR::Inst& inst) { | |||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void FoldConstantComposite(IR::Inst& inst, size_t amount = 2) { | ||||||
|  |     for (size_t i = 0; i < amount; i++) { | ||||||
|  |         if (!inst.Arg(i).IsConstantContainer()) { | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     auto info{inst.Flags<IR::CompositeDecoration>()}; | ||||||
|  |     info.is_constant = true; | ||||||
|  |     inst.SetFlags(info); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void ConstantPropagation(IR::Block& block, IR::Inst& inst) { | void ConstantPropagation(IR::Block& block, IR::Inst& inst) { | ||||||
|     switch (inst.Opcode()) { |     switch (inst.Opcode()) { | ||||||
|     case IR::Opcode::GetRegister: |     case IR::Opcode::GetRegister: | ||||||
| @ -380,6 +391,13 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { | |||||||
|     case IR::Opcode::SelectF32: |     case IR::Opcode::SelectF32: | ||||||
|     case IR::Opcode::SelectF64: |     case IR::Opcode::SelectF64: | ||||||
|         return FoldSelect(inst); |         return FoldSelect(inst); | ||||||
|  |     case IR::Opcode::CompositeConstructU32x2: | ||||||
|  |     case IR::Opcode::CompositeConstructF16x2: | ||||||
|  |     case IR::Opcode::CompositeConstructF32x2: | ||||||
|  |     case IR::Opcode::CompositeConstructF64x2: | ||||||
|  |         return FoldConstantComposite(inst, 2); | ||||||
|  |     case IR::Opcode::CompositeConstructArrayU32x2: | ||||||
|  |         return FoldConstantComposite(inst, 4); | ||||||
|     case IR::Opcode::FPMul32: |     case IR::Opcode::FPMul32: | ||||||
|         return FoldFPMul32(inst); |         return FoldFPMul32(inst); | ||||||
|     case IR::Opcode::LogicalAnd: |     case IR::Opcode::LogicalAnd: | ||||||
|  | |||||||
| @ -30,6 +30,7 @@ struct Profile { | |||||||
|     bool support_fp32_signed_zero_nan_preserve{}; |     bool support_fp32_signed_zero_nan_preserve{}; | ||||||
|     bool support_fp64_signed_zero_nan_preserve{}; |     bool support_fp64_signed_zero_nan_preserve{}; | ||||||
|     bool support_vote{}; |     bool support_vote{}; | ||||||
|  |     bool support_variadic_ptp{}; | ||||||
|     bool warp_size_potentially_larger_than_guest{}; |     bool warp_size_potentially_larger_than_guest{}; | ||||||
| 
 | 
 | ||||||
|     // FClamp is broken and OpFMax + OpFMin should be used instead
 |     // FClamp is broken and OpFMax + OpFMin should be used instead
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user