mirror of
				https://git.suyu.dev/suyu/suyu.git
				synced 2025-10-30 22:36:46 +08:00 
			
		
		
		
	Merge pull request #11544 from Kelebek1/reduce_stream_buffer_renderdoc
Allow GPUs without rebar to open multiple RenderDoc captures
This commit is contained in:
		
						commit
						15a5bdd979
					
				| @ -24,25 +24,38 @@ using namespace Common::Literals; | |||||||
| 
 | 
 | ||||||
| // Maximum potential alignment of a Vulkan buffer
 | // Maximum potential alignment of a Vulkan buffer
 | ||||||
| constexpr VkDeviceSize MAX_ALIGNMENT = 256; | constexpr VkDeviceSize MAX_ALIGNMENT = 256; | ||||||
| // Maximum size to put elements in the stream buffer
 |  | ||||||
| constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB; |  | ||||||
| // Stream buffer size in bytes
 | // Stream buffer size in bytes
 | ||||||
| constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB; | constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB; | ||||||
| constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; |  | ||||||
| 
 | 
 | ||||||
| size_t Region(size_t iterator) noexcept { | size_t GetStreamBufferSize(const Device& device) { | ||||||
|     return iterator / REGION_SIZE; |     VkDeviceSize size{0}; | ||||||
|  |     if (device.HasDebuggingToolAttached()) { | ||||||
|  |         ForEachDeviceLocalHostVisibleHeap(device, [&size](size_t index, VkMemoryHeap& heap) { | ||||||
|  |             size = std::max(size, heap.size); | ||||||
|  |         }); | ||||||
|  |         // If rebar is not supported, cut the max heap size to 40%. This will allow 2 captures to be
 | ||||||
|  |         // loaded at the same time in RenderDoc. If rebar is supported, this shouldn't be an issue
 | ||||||
|  |         // as the heap will be much larger.
 | ||||||
|  |         if (size <= 256_MiB) { | ||||||
|  |             size = size * 40 / 100; | ||||||
|  |         } | ||||||
|  |     } else { | ||||||
|  |         size = MAX_STREAM_BUFFER_SIZE; | ||||||
|  |     } | ||||||
|  |     return std::min(Common::AlignUp(size, MAX_ALIGNMENT), MAX_STREAM_BUFFER_SIZE); | ||||||
| } | } | ||||||
| } // Anonymous namespace
 | } // Anonymous namespace
 | ||||||
| 
 | 
 | ||||||
| StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, | StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, | ||||||
|                                      Scheduler& scheduler_) |                                      Scheduler& scheduler_) | ||||||
|     : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { |     : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, | ||||||
|  |       stream_buffer_size{GetStreamBufferSize(device)}, region_size{stream_buffer_size / | ||||||
|  |                                                                    StagingBufferPool::NUM_SYNCS} { | ||||||
|     VkBufferCreateInfo stream_ci = { |     VkBufferCreateInfo stream_ci = { | ||||||
|         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||||||
|         .pNext = nullptr, |         .pNext = nullptr, | ||||||
|         .flags = 0, |         .flags = 0, | ||||||
|         .size = STREAM_BUFFER_SIZE, |         .size = stream_buffer_size, | ||||||
|         .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | |         .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | ||||||
|                  VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, |                  VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, | ||||||
|         .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |         .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||||||
| @ -63,7 +76,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem | |||||||
| StagingBufferPool::~StagingBufferPool() = default; | StagingBufferPool::~StagingBufferPool() = default; | ||||||
| 
 | 
 | ||||||
| StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) { | StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) { | ||||||
|     if (!deferred && usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) { |     if (!deferred && usage == MemoryUsage::Upload && size <= region_size) { | ||||||
|         return GetStreamBuffer(size); |         return GetStreamBuffer(size); | ||||||
|     } |     } | ||||||
|     return GetStagingBuffer(size, usage, deferred); |     return GetStagingBuffer(size, usage, deferred); | ||||||
| @ -101,7 +114,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { | |||||||
|     used_iterator = iterator; |     used_iterator = iterator; | ||||||
|     free_iterator = std::max(free_iterator, iterator + size); |     free_iterator = std::max(free_iterator, iterator + size); | ||||||
| 
 | 
 | ||||||
|     if (iterator + size >= STREAM_BUFFER_SIZE) { |     if (iterator + size >= stream_buffer_size) { | ||||||
|         std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS, |         std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS, | ||||||
|                   current_tick); |                   current_tick); | ||||||
|         used_iterator = 0; |         used_iterator = 0; | ||||||
|  | |||||||
| @ -90,6 +90,9 @@ private: | |||||||
|     void ReleaseCache(MemoryUsage usage); |     void ReleaseCache(MemoryUsage usage); | ||||||
| 
 | 
 | ||||||
|     void ReleaseLevel(StagingBuffersCache& cache, size_t log2); |     void ReleaseLevel(StagingBuffersCache& cache, size_t log2); | ||||||
|  |     size_t Region(size_t iter) const noexcept { | ||||||
|  |         return iter / region_size; | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     const Device& device; |     const Device& device; | ||||||
|     MemoryAllocator& memory_allocator; |     MemoryAllocator& memory_allocator; | ||||||
| @ -97,6 +100,8 @@ private: | |||||||
| 
 | 
 | ||||||
|     vk::Buffer stream_buffer; |     vk::Buffer stream_buffer; | ||||||
|     std::span<u8> stream_pointer; |     std::span<u8> stream_pointer; | ||||||
|  |     VkDeviceSize stream_buffer_size; | ||||||
|  |     VkDeviceSize region_size; | ||||||
| 
 | 
 | ||||||
|     size_t iterator = 0; |     size_t iterator = 0; | ||||||
|     size_t used_iterator = 0; |     size_t used_iterator = 0; | ||||||
|  | |||||||
| @ -9,6 +9,7 @@ | |||||||
| #include "common/alignment.h" | #include "common/alignment.h" | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
|  | #include "common/literals.h" | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
| #include "common/polyfill_ranges.h" | #include "common/polyfill_ranges.h" | ||||||
| #include "video_core/vulkan_common/vma.h" | #include "video_core/vulkan_common/vma.h" | ||||||
| @ -69,8 +70,7 @@ struct Range { | |||||||
|     case MemoryUsage::Download: |     case MemoryUsage::Download: | ||||||
|         return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; |         return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; | ||||||
|     case MemoryUsage::DeviceLocal: |     case MemoryUsage::DeviceLocal: | ||||||
|         return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | |         return {}; | ||||||
|                VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT; |  | ||||||
|     } |     } | ||||||
|     return {}; |     return {}; | ||||||
| } | } | ||||||
| @ -212,7 +212,20 @@ MemoryAllocator::MemoryAllocator(const Device& device_) | |||||||
|     : device{device_}, allocator{device.GetAllocator()}, |     : device{device_}, allocator{device.GetAllocator()}, | ||||||
|       properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, |       properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, | ||||||
|       buffer_image_granularity{ |       buffer_image_granularity{ | ||||||
|           device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {} |           device_.GetPhysical().GetProperties().limits.bufferImageGranularity} { | ||||||
|  |     // GPUs not supporting rebar may only have a region with less than 256MB host visible/device
 | ||||||
|  |     // local memory. In that case, opening 2 RenderDoc captures side-by-side is not possible due to
 | ||||||
|  |     // the heap running out of memory. With RenderDoc attached and only a small host/device region,
 | ||||||
|  |     // only allow the stream buffer in this memory heap.
 | ||||||
|  |     if (device.HasDebuggingToolAttached()) { | ||||||
|  |         using namespace Common::Literals; | ||||||
|  |         ForEachDeviceLocalHostVisibleHeap(device, [this](size_t index, VkMemoryHeap& heap) { | ||||||
|  |             if (heap.size <= 256_MiB) { | ||||||
|  |                 valid_memory_types &= ~(1u << index); | ||||||
|  |             } | ||||||
|  |         }); | ||||||
|  |     } | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| MemoryAllocator::~MemoryAllocator() = default; | MemoryAllocator::~MemoryAllocator() = default; | ||||||
| 
 | 
 | ||||||
| @ -244,7 +257,7 @@ vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsa | |||||||
|         .usage = MemoryUsageVma(usage), |         .usage = MemoryUsageVma(usage), | ||||||
|         .requiredFlags = 0, |         .requiredFlags = 0, | ||||||
|         .preferredFlags = MemoryUsagePreferedVmaFlags(usage), |         .preferredFlags = MemoryUsagePreferedVmaFlags(usage), | ||||||
|         .memoryTypeBits = 0, |         .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types, | ||||||
|         .pool = VK_NULL_HANDLE, |         .pool = VK_NULL_HANDLE, | ||||||
|         .pUserData = nullptr, |         .pUserData = nullptr, | ||||||
|         .priority = 0.f, |         .priority = 0.f, | ||||||
|  | |||||||
| @ -7,6 +7,7 @@ | |||||||
| #include <span> | #include <span> | ||||||
| #include <vector> | #include <vector> | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
|  | #include "video_core/vulkan_common/vulkan_device.h" | ||||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||||
| 
 | 
 | ||||||
| VK_DEFINE_HANDLE(VmaAllocator) | VK_DEFINE_HANDLE(VmaAllocator) | ||||||
| @ -26,6 +27,18 @@ enum class MemoryUsage { | |||||||
|     Stream,      ///< Requests device local host visible buffer, falling back host memory.
 |     Stream,      ///< Requests device local host visible buffer, falling back host memory.
 | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | template <typename F> | ||||||
|  | void ForEachDeviceLocalHostVisibleHeap(const Device& device, F&& f) { | ||||||
|  |     auto memory_props = device.GetPhysical().GetMemoryProperties().memoryProperties; | ||||||
|  |     for (size_t i = 0; i < memory_props.memoryTypeCount; i++) { | ||||||
|  |         auto& memory_type = memory_props.memoryTypes[i]; | ||||||
|  |         if ((memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && | ||||||
|  |             (memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) { | ||||||
|  |             f(memory_type.heapIndex, memory_props.memoryHeaps[memory_type.heapIndex]); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /// Ownership handle of a memory commitment.
 | /// Ownership handle of a memory commitment.
 | ||||||
| /// Points to a subregion of a memory allocation.
 | /// Points to a subregion of a memory allocation.
 | ||||||
| class MemoryCommit { | class MemoryCommit { | ||||||
| @ -124,6 +137,7 @@ private: | |||||||
|     std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
 |     std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
 | ||||||
|     VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers
 |     VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers
 | ||||||
|                                            // and optimal images
 |                                            // and optimal images
 | ||||||
|  |     u32 valid_memory_types{~0u}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace Vulkan
 | } // namespace Vulkan
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user