mirror of
				https://git.suyu.dev/suyu/suyu.git
				synced 2025-10-31 06:46:40 +08:00 
			
		
		
		
	Merge pull request #9423 from vonchenplus/vulkan_quad_strip
video_core: Implement all vulkan topology
This commit is contained in:
		
						commit
						9fdacb5e3a
					
				| @ -666,8 +666,9 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) { | |||||||
|         BindHostIndexBuffer(); |         BindHostIndexBuffer(); | ||||||
|     } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { |     } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { | ||||||
|         const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); |         const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); | ||||||
|         if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) { |         if (draw_state.topology == Maxwell::PrimitiveTopology::Quads || | ||||||
|             runtime.BindQuadArrayIndexBuffer(draw_state.vertex_buffer.first, |             draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) { | ||||||
|  |             runtime.BindQuadIndexBuffer(draw_state.topology, draw_state.vertex_buffer.first, | ||||||
|                                         draw_state.vertex_buffer.count); |                                         draw_state.vertex_buffer.count); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  | |||||||
| @ -16,6 +16,7 @@ layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { | |||||||
| layout (push_constant) uniform PushConstants { | layout (push_constant) uniform PushConstants { | ||||||
|     uint base_vertex; |     uint base_vertex; | ||||||
|     int index_shift; // 0: uint8, 1: uint16, 2: uint32 |     int index_shift; // 0: uint8, 1: uint16, 2: uint32 | ||||||
|  |     int is_strip; // 0: quads 1: quadstrip | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| void main() { | void main() { | ||||||
| @ -28,9 +29,10 @@ void main() { | |||||||
|     int flipped_shift = 2 - index_shift; |     int flipped_shift = 2 - index_shift; | ||||||
|     int mask = (1 << flipped_shift) - 1; |     int mask = (1 << flipped_shift) - 1; | ||||||
| 
 | 
 | ||||||
|     const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3); |     const int quads_swizzle[6] = int[](0, 1, 2, 0, 2, 3); | ||||||
|  |     const int quad_strip_swizzle[6] = int[](0, 3, 1, 0, 2, 3); | ||||||
|     for (uint vertex = 0; vertex < 6; ++vertex) { |     for (uint vertex = 0; vertex < 6; ++vertex) { | ||||||
|         int offset = primitive * 4 + quad_swizzle[vertex]; |         int offset = (is_strip == 0 ? primitive * 4 + quads_swizzle[vertex] : primitive * 2 + quad_strip_swizzle[vertex]); | ||||||
|         int int_offset = offset >> flipped_shift; |         int int_offset = offset >> flipped_shift; | ||||||
|         int bit_offset = (offset & mask) * index_size; |         int bit_offset = (offset & mask) * index_size; | ||||||
|         uint packed_input = input_indexes[int_offset]; |         uint packed_input = input_indexes[int_offset]; | ||||||
|  | |||||||
| @ -301,6 +301,8 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device, | |||||||
|         return VK_PRIMITIVE_TOPOLOGY_POINT_LIST; |         return VK_PRIMITIVE_TOPOLOGY_POINT_LIST; | ||||||
|     case Maxwell::PrimitiveTopology::Lines: |     case Maxwell::PrimitiveTopology::Lines: | ||||||
|         return VK_PRIMITIVE_TOPOLOGY_LINE_LIST; |         return VK_PRIMITIVE_TOPOLOGY_LINE_LIST; | ||||||
|  |     case Maxwell::PrimitiveTopology::LineLoop: | ||||||
|  |         return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; | ||||||
|     case Maxwell::PrimitiveTopology::LineStrip: |     case Maxwell::PrimitiveTopology::LineStrip: | ||||||
|         return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; |         return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; | ||||||
|     case Maxwell::PrimitiveTopology::Triangles: |     case Maxwell::PrimitiveTopology::Triangles: | ||||||
| @ -309,15 +311,28 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device, | |||||||
|         return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; |         return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; | ||||||
|     case Maxwell::PrimitiveTopology::TriangleFan: |     case Maxwell::PrimitiveTopology::TriangleFan: | ||||||
|         return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; |         return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; | ||||||
|  |     case Maxwell::PrimitiveTopology::LinesAdjacency: | ||||||
|  |         return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY; | ||||||
|  |     case Maxwell::PrimitiveTopology::LineStripAdjacency: | ||||||
|  |         return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY; | ||||||
|  |     case Maxwell::PrimitiveTopology::TrianglesAdjacency: | ||||||
|  |         return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY; | ||||||
|  |     case Maxwell::PrimitiveTopology::TriangleStripAdjacency: | ||||||
|  |         return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY; | ||||||
|     case Maxwell::PrimitiveTopology::Quads: |     case Maxwell::PrimitiveTopology::Quads: | ||||||
|         // TODO(Rodrigo): Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT whenever it releases
 |     case Maxwell::PrimitiveTopology::QuadStrip: | ||||||
|  |         // TODO: Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT/VK_PRIMITIVE_TOPOLOGY_QUAD_STRIP_EXT
 | ||||||
|  |         // whenever it releases
 | ||||||
|         return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; |         return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; | ||||||
|     case Maxwell::PrimitiveTopology::Patches: |     case Maxwell::PrimitiveTopology::Patches: | ||||||
|         return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; |         return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; | ||||||
|     default: |     case Maxwell::PrimitiveTopology::Polygon: | ||||||
|  |         LOG_WARNING(Render_Vulkan, "Draw mode is Polygon with a polygon mode of lines should be a " | ||||||
|  |                                    "single body and not a bunch of triangles."); | ||||||
|  |         return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; | ||||||
|  |     } | ||||||
|     UNIMPLEMENTED_MSG("Unimplemented topology={}", topology); |     UNIMPLEMENTED_MSG("Unimplemented topology={}", topology); | ||||||
|     return {}; |     return {}; | ||||||
|     } |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type, | VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type, | ||||||
|  | |||||||
| @ -51,15 +51,6 @@ size_t BytesPerIndex(VkIndexType index_type) { | |||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <typename T> |  | ||||||
| std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) { |  | ||||||
|     std::array<T, 6> indices{0, 1, 2, 0, 2, 3}; |  | ||||||
|     for (T& index : indices) { |  | ||||||
|         index = static_cast<T>(first + index + quad * 4); |  | ||||||
|     } |  | ||||||
|     return indices; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| vk::Buffer CreateBuffer(const Device& device, u64 size) { | vk::Buffer CreateBuffer(const Device& device, u64 size) { | ||||||
|     VkBufferUsageFlags flags = |     VkBufferUsageFlags flags = | ||||||
|         VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | |         VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | ||||||
| @ -123,6 +114,187 @@ VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat | |||||||
|     return *views.back().handle; |     return *views.back().handle; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | class QuadIndexBuffer { | ||||||
|  | public: | ||||||
|  |     QuadIndexBuffer(const Device& device_, MemoryAllocator& memory_allocator_, | ||||||
|  |                     Scheduler& scheduler_, StagingBufferPool& staging_pool_) | ||||||
|  |         : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, | ||||||
|  |           staging_pool{staging_pool_} {} | ||||||
|  | 
 | ||||||
|  |     virtual ~QuadIndexBuffer() = default; | ||||||
|  | 
 | ||||||
|  |     void UpdateBuffer(u32 num_indices_) { | ||||||
|  |         if (num_indices_ <= num_indices) { | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         scheduler.Finish(); | ||||||
|  | 
 | ||||||
|  |         num_indices = num_indices_; | ||||||
|  |         index_type = IndexTypeFromNumElements(device, num_indices); | ||||||
|  | 
 | ||||||
|  |         const u32 num_quads = GetQuadsNum(num_indices); | ||||||
|  |         const u32 num_triangle_indices = num_quads * 6; | ||||||
|  |         const u32 num_first_offset_copies = 4; | ||||||
|  |         const size_t bytes_per_index = BytesPerIndex(index_type); | ||||||
|  |         const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies; | ||||||
|  |         buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||||||
|  |             .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||||||
|  |             .pNext = nullptr, | ||||||
|  |             .flags = 0, | ||||||
|  |             .size = size_bytes, | ||||||
|  |             .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||||||
|  |             .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||||||
|  |             .queueFamilyIndexCount = 0, | ||||||
|  |             .pQueueFamilyIndices = nullptr, | ||||||
|  |         }); | ||||||
|  |         if (device.HasDebuggingToolAttached()) { | ||||||
|  |             buffer.SetObjectNameEXT("Quad LUT"); | ||||||
|  |         } | ||||||
|  |         memory_commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | ||||||
|  | 
 | ||||||
|  |         const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload); | ||||||
|  |         u8* staging_data = staging.mapped_span.data(); | ||||||
|  |         const size_t quad_size = bytes_per_index * 6; | ||||||
|  | 
 | ||||||
|  |         for (u32 first = 0; first < num_first_offset_copies; ++first) { | ||||||
|  |             for (u32 quad = 0; quad < num_quads; ++quad) { | ||||||
|  |                 MakeAndUpdateIndices(staging_data, quad_size, quad, first); | ||||||
|  |                 staging_data += quad_size; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         scheduler.RequestOutsideRenderPassOperationContext(); | ||||||
|  |         scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, | ||||||
|  |                           dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) { | ||||||
|  |             const VkBufferCopy copy{ | ||||||
|  |                 .srcOffset = src_offset, | ||||||
|  |                 .dstOffset = 0, | ||||||
|  |                 .size = size_bytes, | ||||||
|  |             }; | ||||||
|  |             const VkBufferMemoryBarrier write_barrier{ | ||||||
|  |                 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | ||||||
|  |                 .pNext = nullptr, | ||||||
|  |                 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||||||
|  |                 .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, | ||||||
|  |                 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||||
|  |                 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||||
|  |                 .buffer = dst_buffer, | ||||||
|  |                 .offset = 0, | ||||||
|  |                 .size = size_bytes, | ||||||
|  |             }; | ||||||
|  |             cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); | ||||||
|  |             cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, | ||||||
|  |                                    VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier); | ||||||
|  |         }); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void BindBuffer(u32 first) { | ||||||
|  |         const VkIndexType index_type_ = index_type; | ||||||
|  |         const size_t sub_first_offset = static_cast<size_t>(first % 4) * GetQuadsNum(num_indices); | ||||||
|  |         const size_t offset = | ||||||
|  |             (sub_first_offset + GetQuadsNum(first)) * 6ULL * BytesPerIndex(index_type); | ||||||
|  |         scheduler.Record([buffer = *buffer, index_type_, offset](vk::CommandBuffer cmdbuf) { | ||||||
|  |             cmdbuf.BindIndexBuffer(buffer, offset, index_type_); | ||||||
|  |         }); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | protected: | ||||||
|  |     virtual u32 GetQuadsNum(u32 num_indices) const = 0; | ||||||
|  | 
 | ||||||
|  |     virtual void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) = 0; | ||||||
|  | 
 | ||||||
|  |     const Device& device; | ||||||
|  |     MemoryAllocator& memory_allocator; | ||||||
|  |     Scheduler& scheduler; | ||||||
|  |     StagingBufferPool& staging_pool; | ||||||
|  | 
 | ||||||
|  |     vk::Buffer buffer{}; | ||||||
|  |     MemoryCommit memory_commit{}; | ||||||
|  |     VkIndexType index_type{}; | ||||||
|  |     u32 num_indices = 0; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | class QuadArrayIndexBuffer : public QuadIndexBuffer { | ||||||
|  | public: | ||||||
|  |     QuadArrayIndexBuffer(const Device& device_, MemoryAllocator& memory_allocator_, | ||||||
|  |                          Scheduler& scheduler_, StagingBufferPool& staging_pool_) | ||||||
|  |         : QuadIndexBuffer(device_, memory_allocator_, scheduler_, staging_pool_) {} | ||||||
|  | 
 | ||||||
|  |     ~QuadArrayIndexBuffer() = default; | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     u32 GetQuadsNum(u32 num_indices_) const override { | ||||||
|  |         return num_indices_ / 4; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     template <typename T> | ||||||
|  |     static std::array<T, 6> MakeIndices(u32 quad, u32 first) { | ||||||
|  |         std::array<T, 6> indices{0, 1, 2, 0, 2, 3}; | ||||||
|  |         for (T& index : indices) { | ||||||
|  |             index = static_cast<T>(first + index + quad * 4); | ||||||
|  |         } | ||||||
|  |         return indices; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) { | ||||||
|  |         switch (index_type) { | ||||||
|  |         case VK_INDEX_TYPE_UINT8_EXT: | ||||||
|  |             std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size); | ||||||
|  |             break; | ||||||
|  |         case VK_INDEX_TYPE_UINT16: | ||||||
|  |             std::memcpy(staging_data, MakeIndices<u16>(quad, first).data(), quad_size); | ||||||
|  |             break; | ||||||
|  |         case VK_INDEX_TYPE_UINT32: | ||||||
|  |             std::memcpy(staging_data, MakeIndices<u32>(quad, first).data(), quad_size); | ||||||
|  |             break; | ||||||
|  |         default: | ||||||
|  |             ASSERT(false); | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | class QuadStripIndexBuffer : public QuadIndexBuffer { | ||||||
|  | public: | ||||||
|  |     QuadStripIndexBuffer(const Device& device_, MemoryAllocator& memory_allocator_, | ||||||
|  |                          Scheduler& scheduler_, StagingBufferPool& staging_pool_) | ||||||
|  |         : QuadIndexBuffer(device_, memory_allocator_, scheduler_, staging_pool_) {} | ||||||
|  | 
 | ||||||
|  |     ~QuadStripIndexBuffer() = default; | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     u32 GetQuadsNum(u32 num_indices_) const override { | ||||||
|  |         return num_indices_ >= 4 ? (num_indices_ - 2) / 2 : 0; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     template <typename T> | ||||||
|  |     static std::array<T, 6> MakeIndices(u32 quad, u32 first) { | ||||||
|  |         std::array<T, 6> indices{0, 3, 1, 0, 2, 3}; | ||||||
|  |         for (T& index : indices) { | ||||||
|  |             index = static_cast<T>(first + index + quad * 2); | ||||||
|  |         } | ||||||
|  |         return indices; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) { | ||||||
|  |         switch (index_type) { | ||||||
|  |         case VK_INDEX_TYPE_UINT8_EXT: | ||||||
|  |             std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size); | ||||||
|  |             break; | ||||||
|  |         case VK_INDEX_TYPE_UINT16: | ||||||
|  |             std::memcpy(staging_data, MakeIndices<u16>(quad, first).data(), quad_size); | ||||||
|  |             break; | ||||||
|  |         case VK_INDEX_TYPE_UINT32: | ||||||
|  |             std::memcpy(staging_data, MakeIndices<u32>(quad, first).data(), quad_size); | ||||||
|  |             break; | ||||||
|  |         default: | ||||||
|  |             ASSERT(false); | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, | BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, | ||||||
|                                        Scheduler& scheduler_, StagingBufferPool& staging_pool_, |                                        Scheduler& scheduler_, StagingBufferPool& staging_pool_, | ||||||
|                                        UpdateDescriptorQueue& update_descriptor_queue_, |                                        UpdateDescriptorQueue& update_descriptor_queue_, | ||||||
| @ -130,7 +302,12 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m | |||||||
|     : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, |     : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, | ||||||
|       staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, |       staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, | ||||||
|       uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), |       uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||||||
|       quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {} |       quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) { | ||||||
|  |     quad_array_index_buffer = std::make_shared<QuadArrayIndexBuffer>(device_, memory_allocator_, | ||||||
|  |                                                                      scheduler_, staging_pool_); | ||||||
|  |     quad_strip_index_buffer = std::make_shared<QuadStripIndexBuffer>(device_, memory_allocator_, | ||||||
|  |                                                                      scheduler_, staging_pool_); | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) { | StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) { | ||||||
|     return staging_pool.Request(size, MemoryUsage::Upload); |     return staging_pool.Request(size, MemoryUsage::Upload); | ||||||
| @ -245,10 +422,11 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat | |||||||
|     VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format); |     VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format); | ||||||
|     VkDeviceSize vk_offset = offset; |     VkDeviceSize vk_offset = offset; | ||||||
|     VkBuffer vk_buffer = buffer; |     VkBuffer vk_buffer = buffer; | ||||||
|     if (topology == PrimitiveTopology::Quads) { |     if (topology == PrimitiveTopology::Quads || topology == PrimitiveTopology::QuadStrip) { | ||||||
|         vk_index_type = VK_INDEX_TYPE_UINT32; |         vk_index_type = VK_INDEX_TYPE_UINT32; | ||||||
|         std::tie(vk_buffer, vk_offset) = |         std::tie(vk_buffer, vk_offset) = | ||||||
|             quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset); |             quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset, | ||||||
|  |                                      topology == PrimitiveTopology::QuadStrip); | ||||||
|     } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { |     } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { | ||||||
|         vk_index_type = VK_INDEX_TYPE_UINT16; |         vk_index_type = VK_INDEX_TYPE_UINT16; | ||||||
|         std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset); |         std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset); | ||||||
| @ -263,7 +441,7 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat | |||||||
|     }); |     }); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) { | void BufferCacheRuntime::BindQuadIndexBuffer(PrimitiveTopology topology, u32 first, u32 count) { | ||||||
|     if (count == 0) { |     if (count == 0) { | ||||||
|         ReserveNullBuffer(); |         ReserveNullBuffer(); | ||||||
|         scheduler.Record([this](vk::CommandBuffer cmdbuf) { |         scheduler.Record([this](vk::CommandBuffer cmdbuf) { | ||||||
| @ -271,16 +449,14 @@ void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) { | |||||||
|         }); |         }); | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     ReserveQuadArrayLUT(first + count, true); |  | ||||||
| 
 | 
 | ||||||
|     // The LUT has the indices 0, 1, 2, and 3 copied as an array
 |     if (topology == PrimitiveTopology::Quads) { | ||||||
|     // To apply these 'first' offsets we can apply an offset based on the modulus.
 |         quad_array_index_buffer->UpdateBuffer(first + count); | ||||||
|     const VkIndexType index_type = quad_array_lut_index_type; |         quad_array_index_buffer->BindBuffer(first); | ||||||
|     const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4); |     } else if (topology == PrimitiveTopology::QuadStrip) { | ||||||
|     const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type); |         quad_strip_index_buffer->UpdateBuffer(first + count); | ||||||
|     scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) { |         quad_strip_index_buffer->BindBuffer(first); | ||||||
|         cmdbuf.BindIndexBuffer(buffer, offset, index_type); |     } | ||||||
|     }); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, | void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, | ||||||
| @ -323,83 +499,6 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, | |||||||
|     }); |     }); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) { |  | ||||||
|     if (num_indices <= current_num_indices) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     if (wait_for_idle) { |  | ||||||
|         scheduler.Finish(); |  | ||||||
|     } |  | ||||||
|     current_num_indices = num_indices; |  | ||||||
|     quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices); |  | ||||||
| 
 |  | ||||||
|     const u32 num_quads = num_indices / 4; |  | ||||||
|     const u32 num_triangle_indices = num_quads * 6; |  | ||||||
|     const u32 num_first_offset_copies = 4; |  | ||||||
|     const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type); |  | ||||||
|     const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies; |  | ||||||
|     quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .size = size_bytes, |  | ||||||
|         .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, |  | ||||||
|         .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |  | ||||||
|         .queueFamilyIndexCount = 0, |  | ||||||
|         .pQueueFamilyIndices = nullptr, |  | ||||||
|     }); |  | ||||||
|     if (device.HasDebuggingToolAttached()) { |  | ||||||
|         quad_array_lut.SetObjectNameEXT("Quad LUT"); |  | ||||||
|     } |  | ||||||
|     quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal); |  | ||||||
| 
 |  | ||||||
|     const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload); |  | ||||||
|     u8* staging_data = staging.mapped_span.data(); |  | ||||||
|     const size_t quad_size = bytes_per_index * 6; |  | ||||||
|     for (u32 first = 0; first < num_first_offset_copies; ++first) { |  | ||||||
|         for (u32 quad = 0; quad < num_quads; ++quad) { |  | ||||||
|             switch (quad_array_lut_index_type) { |  | ||||||
|             case VK_INDEX_TYPE_UINT8_EXT: |  | ||||||
|                 std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size); |  | ||||||
|                 break; |  | ||||||
|             case VK_INDEX_TYPE_UINT16: |  | ||||||
|                 std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size); |  | ||||||
|                 break; |  | ||||||
|             case VK_INDEX_TYPE_UINT32: |  | ||||||
|                 std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size); |  | ||||||
|                 break; |  | ||||||
|             default: |  | ||||||
|                 ASSERT(false); |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|             staging_data += quad_size; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     scheduler.RequestOutsideRenderPassOperationContext(); |  | ||||||
|     scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, |  | ||||||
|                       dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) { |  | ||||||
|         const VkBufferCopy copy{ |  | ||||||
|             .srcOffset = src_offset, |  | ||||||
|             .dstOffset = 0, |  | ||||||
|             .size = size_bytes, |  | ||||||
|         }; |  | ||||||
|         const VkBufferMemoryBarrier write_barrier{ |  | ||||||
|             .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, |  | ||||||
|             .pNext = nullptr, |  | ||||||
|             .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |  | ||||||
|             .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, |  | ||||||
|             .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |  | ||||||
|             .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |  | ||||||
|             .buffer = dst_buffer, |  | ||||||
|             .offset = 0, |  | ||||||
|             .size = size_bytes, |  | ||||||
|         }; |  | ||||||
|         cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); |  | ||||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, |  | ||||||
|                                0, write_barrier); |  | ||||||
|     }); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void BufferCacheRuntime::ReserveNullBuffer() { | void BufferCacheRuntime::ReserveNullBuffer() { | ||||||
|     if (null_buffer) { |     if (null_buffer) { | ||||||
|         return; |         return; | ||||||
|  | |||||||
| @ -50,6 +50,9 @@ private: | |||||||
|     std::vector<BufferView> views; |     std::vector<BufferView> views; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | class QuadArrayIndexBuffer; | ||||||
|  | class QuadStripIndexBuffer; | ||||||
|  | 
 | ||||||
| class BufferCacheRuntime { | class BufferCacheRuntime { | ||||||
|     friend Buffer; |     friend Buffer; | ||||||
| 
 | 
 | ||||||
| @ -86,7 +89,7 @@ public: | |||||||
|     void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices, |     void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices, | ||||||
|                          u32 base_vertex, VkBuffer buffer, u32 offset, u32 size); |                          u32 base_vertex, VkBuffer buffer, u32 offset, u32 size); | ||||||
| 
 | 
 | ||||||
|     void BindQuadArrayIndexBuffer(u32 first, u32 count); |     void BindQuadIndexBuffer(PrimitiveTopology topology, u32 first, u32 count); | ||||||
| 
 | 
 | ||||||
|     void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride); |     void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride); | ||||||
| 
 | 
 | ||||||
| @ -118,8 +121,6 @@ private: | |||||||
|         update_descriptor_queue.AddBuffer(buffer, offset, size); |         update_descriptor_queue.AddBuffer(buffer, offset, size); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle); |  | ||||||
| 
 |  | ||||||
|     void ReserveNullBuffer(); |     void ReserveNullBuffer(); | ||||||
| 
 | 
 | ||||||
|     const Device& device; |     const Device& device; | ||||||
| @ -128,10 +129,8 @@ private: | |||||||
|     StagingBufferPool& staging_pool; |     StagingBufferPool& staging_pool; | ||||||
|     UpdateDescriptorQueue& update_descriptor_queue; |     UpdateDescriptorQueue& update_descriptor_queue; | ||||||
| 
 | 
 | ||||||
|     vk::Buffer quad_array_lut; |     std::shared_ptr<QuadArrayIndexBuffer> quad_array_index_buffer; | ||||||
|     MemoryCommit quad_array_lut_commit; |     std::shared_ptr<QuadStripIndexBuffer> quad_strip_index_buffer; | ||||||
|     VkIndexType quad_array_lut_index_type{}; |  | ||||||
|     u32 current_num_indices = 0; |  | ||||||
| 
 | 
 | ||||||
|     vk::Buffer null_buffer; |     vk::Buffer null_buffer; | ||||||
|     MemoryCommit null_buffer_commit; |     MemoryCommit null_buffer_commit; | ||||||
|  | |||||||
| @ -245,7 +245,7 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, Scheduler& scheduler_, | |||||||
|                                  UpdateDescriptorQueue& update_descriptor_queue_) |                                  UpdateDescriptorQueue& update_descriptor_queue_) | ||||||
|     : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, |     : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, | ||||||
|                   INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, |                   INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, | ||||||
|                   COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV), |                   COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 3>, VULKAN_QUAD_INDEXED_COMP_SPV), | ||||||
|       scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, |       scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | ||||||
|       update_descriptor_queue{update_descriptor_queue_} {} |       update_descriptor_queue{update_descriptor_queue_} {} | ||||||
| 
 | 
 | ||||||
| @ -253,7 +253,7 @@ QuadIndexedPass::~QuadIndexedPass() = default; | |||||||
| 
 | 
 | ||||||
| std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | ||||||
|     Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, |     Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, | ||||||
|     VkBuffer src_buffer, u32 src_offset) { |     VkBuffer src_buffer, u32 src_offset, bool is_strip) { | ||||||
|     const u32 index_shift = [index_format] { |     const u32 index_shift = [index_format] { | ||||||
|         switch (index_format) { |         switch (index_format) { | ||||||
|         case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: |         case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: | ||||||
| @ -267,7 +267,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||||||
|         return 2; |         return 2; | ||||||
|     }(); |     }(); | ||||||
|     const u32 input_size = num_vertices << index_shift; |     const u32 input_size = num_vertices << index_shift; | ||||||
|     const u32 num_tri_vertices = (num_vertices / 4) * 6; |     const u32 num_tri_vertices = (is_strip ? (num_vertices - 2) / 2 : num_vertices / 4) * 6; | ||||||
| 
 | 
 | ||||||
|     const std::size_t staging_size = num_tri_vertices * sizeof(u32); |     const std::size_t staging_size = num_tri_vertices * sizeof(u32); | ||||||
|     const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); |     const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); | ||||||
| @ -278,8 +278,8 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||||||
|     const void* const descriptor_data{update_descriptor_queue.UpdateData()}; |     const void* const descriptor_data{update_descriptor_queue.UpdateData()}; | ||||||
| 
 | 
 | ||||||
|     scheduler.RequestOutsideRenderPassOperationContext(); |     scheduler.RequestOutsideRenderPassOperationContext(); | ||||||
|     scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex, |     scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex, index_shift, | ||||||
|                       index_shift](vk::CommandBuffer cmdbuf) { |                       is_strip](vk::CommandBuffer cmdbuf) { | ||||||
|         static constexpr u32 DISPATCH_SIZE = 1024; |         static constexpr u32 DISPATCH_SIZE = 1024; | ||||||
|         static constexpr VkMemoryBarrier WRITE_BARRIER{ |         static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||||||
|             .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, |             .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||||||
| @ -287,7 +287,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||||||
|             .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, |             .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||||||
|             .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, |             .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, | ||||||
|         }; |         }; | ||||||
|         const std::array<u32, 2> push_constants{base_vertex, index_shift}; |         const std::array<u32, 3> push_constants{base_vertex, index_shift, is_strip ? 1u : 0u}; | ||||||
|         const VkDescriptorSet set = descriptor_allocator.Commit(); |         const VkDescriptorSet set = descriptor_allocator.Commit(); | ||||||
|         device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); |         device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); | ||||||
|         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); |         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); | ||||||
|  | |||||||
| @ -74,7 +74,7 @@ public: | |||||||
| 
 | 
 | ||||||
|     std::pair<VkBuffer, VkDeviceSize> Assemble( |     std::pair<VkBuffer, VkDeviceSize> Assemble( | ||||||
|         Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, |         Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, | ||||||
|         u32 base_vertex, VkBuffer src_buffer, u32 src_offset); |         u32 base_vertex, VkBuffer src_buffer, u32 src_offset, bool is_strip); | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     Scheduler& scheduler; |     Scheduler& scheduler; | ||||||
|  | |||||||
| @ -138,12 +138,16 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, | |||||||
|         .first_index = is_indexed ? draw_state.index_buffer.first : 0, |         .first_index = is_indexed ? draw_state.index_buffer.first : 0, | ||||||
|         .is_indexed = is_indexed, |         .is_indexed = is_indexed, | ||||||
|     }; |     }; | ||||||
|     if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) { |  | ||||||
|     // 6 triangle vertices per quad, base vertex is part of the index
 |     // 6 triangle vertices per quad, base vertex is part of the index
 | ||||||
|         // See BindQuadArrayIndexBuffer for more details
 |     // See BindQuadIndexBuffer for more details
 | ||||||
|  |     if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) { | ||||||
|         params.num_vertices = (params.num_vertices / 4) * 6; |         params.num_vertices = (params.num_vertices / 4) * 6; | ||||||
|         params.base_vertex = 0; |         params.base_vertex = 0; | ||||||
|         params.is_indexed = true; |         params.is_indexed = true; | ||||||
|  |     } else if (draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) { | ||||||
|  |         params.num_vertices = (params.num_vertices - 2) / 2 * 6; | ||||||
|  |         params.base_vertex = 0; | ||||||
|  |         params.is_indexed = true; | ||||||
|     } |     } | ||||||
|     return params; |     return params; | ||||||
| } | } | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user