suyu/src/shader_recompiler/backend/msl/msl_emit_context.cpp
2024-10-05 17:02:46 +02:00

605 lines
22 KiB
C++

// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/div_ceil.h"
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/backend/msl/msl_emit_context.h"
#include "shader_recompiler/frontend/ir/program.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader::Backend::MSL {
namespace {
u32 CbufIndex(size_t offset) {
return (offset / 4) % 4;
}
char Swizzle(size_t offset) {
return "xyzw"[CbufIndex(offset)];
}
// TODO
/*
std::string_view InterpDecorator(Interpolation interp) {
switch (interp) {
case Interpolation::Smooth:
return "";
case Interpolation::Flat:
return "flat ";
case Interpolation::NoPerspective:
return "noperspective ";
}
throw InvalidArgument("Invalid interpolation {}", interp);
}
*/
// TODO
/*
std::string_view InputArrayDecorator(Stage stage) {
switch (stage) {
case Stage::Geometry:
case Stage::TessellationControl:
case Stage::TessellationEval:
return "[]";
default:
return "";
}
}
*/
// TODO
std::string OutputDecorator(Stage stage, u32 size) {
switch (stage) {
case Stage::TessellationControl:
return fmt::format("[{}]", size);
default:
return "";
}
}
/*
// TODO
std::string_view GetTessMode(TessPrimitive primitive) {
switch (primitive) {
case TessPrimitive::Triangles:
return "triangles";
case TessPrimitive::Quads:
return "quads";
case TessPrimitive::Isolines:
return "isolines";
}
throw InvalidArgument("Invalid tessellation primitive {}", primitive);
}
// TODO
std::string_view GetTessSpacing(TessSpacing spacing) {
switch (spacing) {
case TessSpacing::Equal:
return "equal_spacing";
case TessSpacing::FractionalOdd:
return "fractional_odd_spacing";
case TessSpacing::FractionalEven:
return "fractional_even_spacing";
}
throw InvalidArgument("Invalid tessellation spacing {}", spacing);
}
// TODO
std::string_view InputPrimitive(InputTopology topology) {
switch (topology) {
case InputTopology::Points:
return "points";
case InputTopology::Lines:
return "lines";
case InputTopology::LinesAdjacency:
return "lines_adjacency";
case InputTopology::Triangles:
return "triangles";
case InputTopology::TrianglesAdjacency:
return "triangles_adjacency";
}
throw InvalidArgument("Invalid input topology {}", topology);
}
// TODO
std::string_view OutputPrimitive(OutputTopology topology) {
switch (topology) {
case OutputTopology::PointList:
return "points";
case OutputTopology::LineStrip:
return "line_strip";
case OutputTopology::TriangleStrip:
return "triangle_strip";
}
throw InvalidArgument("Invalid output topology {}", topology);
}
*/
// TODO
std::string_view DepthSamplerType(TextureType type) {
switch (type) {
case TextureType::Color1D:
return "sampler1DShadow";
case TextureType::ColorArray1D:
return "sampler1DArrayShadow";
case TextureType::Color2D:
return "sampler2DShadow";
case TextureType::ColorArray2D:
return "sampler2DArrayShadow";
case TextureType::ColorCube:
return "samplerCubeShadow";
case TextureType::ColorArrayCube:
return "samplerCubeArrayShadow";
default:
throw NotImplementedException("Texture type: {}", type);
}
}
// TODO: emit sampler as well
// TODO: handle multisample
// TODO: handle texture buffer
std::string_view ColorSamplerType(TextureType type, bool is_multisample = false) {
if (is_multisample) {
ASSERT(type == TextureType::Color2D || type == TextureType::ColorArray2D);
}
switch (type) {
case TextureType::Color1D:
return "texture1d";
case TextureType::ColorArray1D:
return "texture1d_array";
case TextureType::Color2D:
case TextureType::Color2DRect:
return "texture2d";
case TextureType::ColorArray2D:
return "texture2d_array";
case TextureType::Color3D:
return "texture3d";
case TextureType::ColorCube:
return "texturecube";
case TextureType::ColorArrayCube:
return "texturecube_array";
default:
throw NotImplementedException("Texture type: {}", type);
}
}
// TODO: handle texture buffer
std::string_view ImageType(TextureType type) {
switch (type) {
case TextureType::Color1D:
return "texture1d";
case TextureType::ColorArray1D:
return "texture1d_array";
case TextureType::Color2D:
return "texture2d";
case TextureType::ColorArray2D:
return "texture2d_array";
case TextureType::Color3D:
return "texture3d";
case TextureType::ColorCube:
return "texturecube";
case TextureType::ColorArrayCube:
return "texturecube_array";
default:
throw NotImplementedException("Image type: {}", type);
}
}
// TODO: is this needed?
/*
std::string_view ImageFormatString(ImageFormat format) {
switch (format) {
case ImageFormat::Typeless:
return "";
case ImageFormat::R8_UINT:
return ",r8ui";
case ImageFormat::R8_SINT:
return ",r8i";
case ImageFormat::R16_UINT:
return ",r16ui";
case ImageFormat::R16_SINT:
return ",r16i";
case ImageFormat::R32_UINT:
return ",r32ui";
case ImageFormat::R32G32_UINT:
return ",rg32ui";
case ImageFormat::R32G32B32A32_UINT:
return ",rgba32ui";
default:
throw NotImplementedException("Image format: {}", format);
}
}
*/
std::string_view ImageAccessQualifier(bool is_written, bool is_read) {
if (is_written && is_read) {
return ",access::read,access::write";
}
if (is_written) {
return ",access::write";
}
if (is_read) {
return ",access::read";
}
return "";
}
} // Anonymous namespace
EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
const RuntimeInfo& runtime_info_)
: info{program.info}, profile{profile_}, runtime_info{runtime_info_}, stage{program.stage},
uses_geometry_passthrough{program.is_geometry_passthrough &&
profile.support_geometry_shader_passthrough} {
if (profile.need_fastmath_off) {
// TODO
}
switch (program.stage) {
case Stage::VertexA:
case Stage::VertexB:
stage_name = "vertex";
break;
case Stage::TessellationControl:
stage_name = "kernel";
break;
case Stage::TessellationEval:
stage_name = "vertex";
break;
case Stage::Geometry:
stage_name = "vertex";
break;
case Stage::Fragment:
stage_name = "fragment";
break;
case Stage::Compute:
stage_name = "kernel";
const u32 local_x{std::max(program.workgroup_size[0], 1u)};
const u32 local_y{std::max(program.workgroup_size[1], 1u)};
const u32 local_z{std::max(program.workgroup_size[2], 1u)};
header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;",
local_x, local_y, local_z);
break;
}
// TODO
// SetupOutPerVertex(*this, header);
// SetupInPerVertex(*this, header);
// Stage input
bool has_stage_input{};
header += "struct __Input {\n";
if (stage == Stage::Fragment) {
header += "float4 position [[position]];\n";
has_stage_input = true;
}
for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) {
continue;
}
DefineStageInOut(index, program.invocations, true);
has_stage_input = true;
}
for (size_t index = 0; index < info.uses_patches.size(); ++index) {
// TODO: what is this
if (!info.uses_patches[index]) {
continue;
}
// TODO: implement
}
header += "};\n";
if (has_stage_input) {
input_str = "__Input __in [[stage_in]]";
has_at_least_one_input = true;
}
if (stage == Stage::VertexA || stage == Stage::VertexB) {
// TODO: don't always declare these
if (has_at_least_one_input)
input_str += ",";
input_str += "uint vid [[vertex_id]]";
has_at_least_one_input = true;
if (has_at_least_one_input)
input_str += ",";
input_str += "uint iid [[instance_id]]";
has_at_least_one_input = true;
}
// Stage output
header += "struct __Output {\n";
if (stage == Stage::VertexB || stage == Stage::Geometry) {
header += "float4 position [[position]];\n";
}
if (stage == Stage::Fragment) {
for (size_t index = 0; index < info.stores_frag_color.size(); ++index) {
if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) {
continue;
}
header += fmt::format("float4 color{} [[color({})]];", index, index);
}
}
for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
if (info.stores.Generic(index)) {
DefineStageInOut(index, program.invocations, false);
}
}
header += "};\n";
DefineInputs(bindings);
if (info.uses_rescaling_uniform) {
if (has_at_least_one_input)
input_str += ",";
input_str += "constant float4& scaling";
has_at_least_one_input = true;
}
if (info.uses_render_area) {
if (has_at_least_one_input)
input_str += ",";
input_str += "constant float4& render_area";
has_at_least_one_input = true;
}
DefineHelperFunctions();
DefineConstants();
}
void EmitContext::DefineInputs(Bindings& bindings) {
// Constant buffers
bindings.uniform_buffer = 0; // HACK
for (const auto& desc : info.constant_buffer_descriptors) {
const u32 cbuf_used_size{Common::DivCeil(info.constant_buffer_used_sizes[desc.index], 16U)};
const u32 cbuf_binding_size{info.uses_global_memory ? 0x1000U : cbuf_used_size};
const std::string cbuf_struct_name{fmt::format("{}_cbuf{}_t", stage_name, desc.index)};
header +=
fmt::format("struct {} {{float4 data[{}];}};\n", cbuf_struct_name, cbuf_binding_size);
if (has_at_least_one_input)
input_str += ",";
input_str += fmt::format("constant {}& {}_cbuf{} [[buffer({})]]", cbuf_struct_name,
stage_name, desc.index, bindings.uniform_buffer);
bindings.uniform_buffer += desc.count;
has_at_least_one_input = true;
}
// Constant buffer indirect
// TODO
// Storage space buffers
bindings.uniform_buffer = 8; // HACK
u32 index{};
for (const auto& desc : info.storage_buffers_descriptors) {
if (has_at_least_one_input)
input_str += ",";
const std::string address_space{desc.is_written ? "device" : "constant"};
input_str += fmt::format("{} uint* {}_ssbo{} [[buffer({})]]", address_space, stage_name,
index, bindings.uniform_buffer);
bindings.uniform_buffer += desc.count;
index += desc.count;
has_at_least_one_input = true;
}
// Images
// TODO
/*
image_buffers.reserve(info.image_buffer_descriptors.size());
for (const auto& desc : info.image_buffer_descriptors) {
image_buffers.push_back({bindings.image, desc.count});
const auto format{ImageFormatString(desc.format)};
const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
input_str += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};",
bindings.image, format, qualifier, bindings.image,
array_decorator); bindings.image += desc.count;
}
*/
images.reserve(info.image_descriptors.size());
for (const auto& desc : info.image_descriptors) {
images.push_back({bindings.texture, desc.count});
// TODO: do we need format?
// const auto format{ImageFormatString(desc.format)};
const auto image_type{ImageType(desc.type)};
const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
if (has_at_least_one_input)
input_str += ",";
input_str += fmt::format("{}<float{}> img{}{} [[texture({})]]", qualifier, image_type,
bindings.texture, array_decorator, bindings.texture);
bindings.texture += desc.count;
has_at_least_one_input = true;
}
// Textures
// TODO
/*
texture_buffers.reserve(info.texture_buffer_descriptors.size());
for (const auto& desc : info.texture_buffer_descriptors) {
texture_buffers.push_back({bindings.texture, desc.count});
const auto sampler_type{ColorSamplerType(TextureType::Buffer)};
const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
input_str += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture,
sampler_type, bindings.texture, array_decorator);
bindings.texture += desc.count;
}
*/
textures.reserve(info.texture_descriptors.size());
for (const auto& desc : info.texture_descriptors) {
textures.push_back({bindings.texture, desc.count});
const auto texture_type{desc.is_depth ? DepthSamplerType(desc.type)
: ColorSamplerType(desc.type, desc.is_multisample)};
const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
if (has_at_least_one_input)
input_str += ",";
input_str += fmt::format("{}<float> tex{}{} [[texture({})]]", texture_type,
bindings.texture, array_decorator, bindings.texture);
input_str += fmt::format(",sampler samp{}{} [[sampler({})]]", bindings.texture,
array_decorator, bindings.texture);
bindings.texture += desc.count;
has_at_least_one_input = true;
}
}
// TODO
void EmitContext::DefineStageInOut(size_t index, u32 invocations, bool is_input) {
const auto type{fmt::format("float{}", 4)};
std::string name{fmt::format("attr{}", index)};
header += fmt::format("{} {}{} [[user(locn{})]];\n", type, name,
OutputDecorator(stage, invocations), index);
const GenericElementInfo element_info{
.name = (is_input ? "__in." : "__out.") + name,
.first_element = 0,
.num_components = 4,
};
std::fill_n(output_generics[index].begin(), 4, element_info);
}
void EmitContext::DefineHelperFunctions() {
header +=
"uint bitfieldExtract(uint value, int offset, int bits) {\nreturn (value >> offset) & "
"((1 << bits) - 1);\n}\n";
if (info.uses_global_increment || info.uses_shared_increment) {
header += "uint CasIncrement(uint op_a,uint op_b){return op_a>=op_b?0u:(op_a+1u);}";
}
if (info.uses_global_decrement || info.uses_shared_decrement) {
header += "uint CasDecrement(uint op_a,uint op_b){"
"return op_a==0||op_a>op_b?op_b:(op_a-1u);}";
}
if (info.uses_atomic_f32_add) {
header += "uint CasFloatAdd(uint op_a,float op_b){"
"return as_type<uint>(as_type<float>(op_a)+op_b);}";
}
if (info.uses_atomic_f32x2_add) {
header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){"
"return packHalf2x16(unpackHalf2x16(op_a)+op_b);}";
}
if (info.uses_atomic_f32x2_min) {
header += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return "
"packHalf2x16(min(unpackHalf2x16(op_a),op_b));}";
}
if (info.uses_atomic_f32x2_max) {
header += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return "
"packHalf2x16(max(unpackHalf2x16(op_a),op_b));}";
}
if (info.uses_atomic_f16x2_add) {
header += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return "
"packFloat2x16(unpackFloat2x16(op_a)+op_b);}";
}
if (info.uses_atomic_f16x2_min) {
header += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return "
"packFloat2x16(min(unpackFloat2x16(op_a),op_b));}";
}
if (info.uses_atomic_f16x2_max) {
header += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return "
"packFloat2x16(max(unpackFloat2x16(op_a),op_b));}";
}
if (info.uses_atomic_s32_min) {
header += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}";
}
if (info.uses_atomic_s32_max) {
header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}";
}
if (info.uses_global_memory && profile.support_int64) {
header += DefineGlobalMemoryFunctions();
}
if (info.loads_indexed_attributes) {
const bool is_array{stage == Stage::Geometry};
const auto vertex_arg{is_array ? ",uint vertex" : ""};
std::string func{
fmt::format("float IndexedAttrLoad(int offset{}){{int base_index=offset>>2;uint "
"masked_index=uint(base_index)&3u;switch(base_index>>2){{",
vertex_arg)};
if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
const auto position_idx{is_array ? "gl_in[vertex]." : ""};
func += fmt::format("case {}:return {}{}[masked_index];",
static_cast<u32>(IR::Attribute::PositionX) >> 2, position_idx,
"__out.position");
}
const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2;
for (u32 index = 0; index < IR::NUM_GENERICS; ++index) {
if (!info.loads.Generic(index)) {
continue;
}
const auto vertex_idx{is_array ? "[vertex]" : ""};
func += fmt::format("case {}:return in_attr{}{}[masked_index];",
base_attribute_value + index, index, vertex_idx);
}
func += "default: return 0.0;}}";
header += func;
}
if (info.stores_indexed_attributes) {
// TODO
}
}
std::string EmitContext::DefineGlobalMemoryFunctions() {
const auto define_body{[&](std::string& func, size_t index, std::string_view return_statement) {
const auto& ssbo{info.storage_buffers_descriptors[index]};
const u32 size_cbuf_offset{ssbo.cbuf_offset + 8};
const auto ssbo_addr{fmt::format("ssbo_addr{}", index)};
const auto cbuf{fmt::format("{}_cbuf{}.data", stage_name, ssbo.cbuf_index)};
std::array<std::string, 2> addr_xy;
std::array<std::string, 2> size_xy;
for (size_t i = 0; i < addr_xy.size(); ++i) {
const auto addr_loc{ssbo.cbuf_offset + 4 * i};
const auto size_loc{size_cbuf_offset + 4 * i};
addr_xy[i] =
fmt::format("as_type<uint>({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc));
size_xy[i] =
fmt::format("as_type<uint>({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc));
}
const u32 ssbo_align_mask{~(static_cast<u32>(profile.min_ssbo_alignment) - 1U)};
const auto aligned_low_addr{fmt::format("{}&{}", addr_xy[0], ssbo_align_mask)};
const auto aligned_addr{fmt::format("uvec2({},{})", aligned_low_addr, addr_xy[1])};
const auto addr_pack{fmt::format("packUint2x32({})", aligned_addr)};
const auto addr_statement{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)};
func += addr_statement;
const auto size_vec{fmt::format("uvec2({},{})", size_xy[0], size_xy[1])};
const auto comp_lhs{fmt::format("(addr>={})", ssbo_addr)};
const auto comp_rhs{fmt::format("(addr<({}+uint64_t({})))", ssbo_addr, size_vec)};
const auto comparison{fmt::format("if({}&&{}){{", comp_lhs, comp_rhs)};
func += comparison;
const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)};
func += fmt::format(fmt::runtime(return_statement), ssbo_name, ssbo_addr);
}};
std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){"};
std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){"};
std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){"};
std::string load_func{"uint LoadGlobal32(uint64_t addr){"};
std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){"};
std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){"};
const size_t num_buffers{info.storage_buffers_descriptors.size()};
for (size_t index = 0; index < num_buffers; ++index) {
if (!info.nvn_buffer_used[index]) {
continue;
}
define_body(write_func, index, "{0}[uint(addr-{1})>>2]=data;return;}}");
define_body(write_func_64, index,
"{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;return;}}");
define_body(write_func_128, index,
"{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;{0}[uint("
"addr-{1}+8)>>2]=data.z;{0}[uint(addr-{1}+12)>>2]=data.w;return;}}");
define_body(load_func, index, "return {0}[uint(addr-{1})>>2];}}");
define_body(load_func_64, index,
"return uvec2({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2]);}}");
define_body(load_func_128, index,
"return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}["
"uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}");
}
write_func += '}';
write_func_64 += '}';
write_func_128 += '}';
load_func += "return 0u;}";
load_func_64 += "return uint2(0);}";
load_func_128 += "return uint4(0);}";
return write_func + write_func_64 + write_func_128 + load_func + load_func_64 + load_func_128;
}
void EmitContext::DefineConstants() {
if (info.uses_fswzadd) {
header += "const float FSWZ_A[]=float[4](-1.f,1.f,-1.f,0.f);"
"const float FSWZ_B[]=float[4](-1.f,-1.f,1.f,-1.f);";
}
}
} // namespace Shader::Backend::MSL