mirror of
				https://git.suyu.dev/suyu/suyu.git
				synced 2025-10-31 14:56:40 +08:00 
			
		
		
		
	Texture Cache: Initial Implementation of Sparse Textures.
This commit is contained in:
		
							parent
							
								
									eb0e10cff2
								
							
						
					
					
						commit
						38165fb7e3
					
				| @ -127,8 +127,13 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s | ||||
| 
 | ||||
|     //// Lock the new page
 | ||||
|     // TryLockPage(page_entry, size);
 | ||||
|     auto& current_page = page_table[PageEntryIndex(gpu_addr)]; | ||||
|     if (current_page.IsValid() != page_entry.IsValid() || | ||||
|         current_page.ToAddress() != page_entry.ToAddress()) { | ||||
|         rasterizer->ModifyGPUMemory(gpu_addr, size); | ||||
|     } | ||||
| 
 | ||||
|     page_table[PageEntryIndex(gpu_addr)] = page_entry; | ||||
|     current_page = page_entry; | ||||
| } | ||||
| 
 | ||||
| std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align, | ||||
|  | ||||
| @ -87,6 +87,9 @@ public: | ||||
|     /// Unmap memory range
 | ||||
|     virtual void UnmapMemory(VAddr addr, u64 size) = 0; | ||||
| 
 | ||||
|     /// Unmap memory range
 | ||||
|     virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0; | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||
|     /// and invalidated
 | ||||
|     virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; | ||||
|  | ||||
| @ -611,6 +611,13 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | ||||
|     shader_cache.OnCPUWrite(addr, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) { | ||||
|     { | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         texture_cache.UnmapGPUMemory(addr, size); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { | ||||
|     if (!gpu.IsAsync()) { | ||||
|         gpu_memory.Write<u32>(addr, value); | ||||
|  | ||||
| @ -80,6 +80,7 @@ public: | ||||
|     void OnCPUWrite(VAddr addr, u64 size) override; | ||||
|     void SyncGuestHost() override; | ||||
|     void UnmapMemory(VAddr addr, u64 size) override; | ||||
|     void ModifyGPUMemory(GPUVAddr addr, u64 size) override; | ||||
|     void SignalSemaphore(GPUVAddr addr, u32 value) override; | ||||
|     void SignalSyncPoint(u32 value) override; | ||||
|     void ReleaseFences() override; | ||||
|  | ||||
| @ -557,6 +557,13 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { | ||||
|     pipeline_cache.OnCPUWrite(addr, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) { | ||||
|     { | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         texture_cache.UnmapGPUMemory(addr, size); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { | ||||
|     if (!gpu.IsAsync()) { | ||||
|         gpu_memory.Write<u32>(addr, value); | ||||
|  | ||||
| @ -72,6 +72,7 @@ public: | ||||
|     void OnCPUWrite(VAddr addr, u64 size) override; | ||||
|     void SyncGuestHost() override; | ||||
|     void UnmapMemory(VAddr addr, u64 size) override; | ||||
|     void ModifyGPUMemory(GPUVAddr addr, u64 size) override; | ||||
|     void SignalSemaphore(GPUVAddr addr, u32 value) override; | ||||
|     void SignalSyncPoint(u32 value) override; | ||||
|     void ReleaseFences() override; | ||||
|  | ||||
| @ -69,6 +69,9 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_ | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_) | ||||
|     : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {} | ||||
| 
 | ||||
| std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept { | ||||
|     if (other_addr < gpu_addr) { | ||||
|         // Subresource address can't be lower than the base
 | ||||
|  | ||||
| @ -57,6 +57,12 @@ struct ImageBase { | ||||
|         return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept { | ||||
|         const VAddr overlap_end = overlap_gpu_addr + overlap_size; | ||||
|         const GPUVAddr gpu_addr_end = gpu_addr + guest_size_bytes; | ||||
|         return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end; | ||||
|     } | ||||
| 
 | ||||
|     void CheckBadOverlapState(); | ||||
|     void CheckAliasState(); | ||||
| 
 | ||||
| @ -84,6 +90,8 @@ struct ImageBase { | ||||
| 
 | ||||
|     std::vector<AliasedImage> aliased_images; | ||||
|     std::vector<ImageId> overlapping_images; | ||||
|     ImageMapId map_view_id{}; | ||||
|     bool is_sparse{}; | ||||
| }; | ||||
| 
 | ||||
| struct ImageAllocBase { | ||||
|  | ||||
| @ -152,6 +152,9 @@ public: | ||||
|     /// Remove images in a region
 | ||||
|     void UnmapMemory(VAddr cpu_addr, size_t size); | ||||
| 
 | ||||
|     /// Remove images in a region
 | ||||
|     void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); | ||||
| 
 | ||||
|     /// Blit an image with the given parameters
 | ||||
|     void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | ||||
|                    const Tegra::Engines::Fermi2D::Surface& src, | ||||
| @ -190,7 +193,22 @@ public: | ||||
| private: | ||||
|     /// Iterate over all page indices in a range
 | ||||
|     template <typename Func> | ||||
|     static void ForEachPage(VAddr addr, size_t size, Func&& func) { | ||||
|     static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { | ||||
|         static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; | ||||
|         const u64 page_end = (addr + size - 1) >> PAGE_BITS; | ||||
|         for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { | ||||
|             if constexpr (RETURNS_BOOL) { | ||||
|                 if (func(page)) { | ||||
|                     break; | ||||
|                 } | ||||
|             } else { | ||||
|                 func(page); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     template <typename Func> | ||||
|     static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) { | ||||
|         static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; | ||||
|         const u64 page_end = (addr + size - 1) >> PAGE_BITS; | ||||
|         for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { | ||||
| @ -269,6 +287,13 @@ private: | ||||
|     template <typename Func> | ||||
|     void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); | ||||
| 
 | ||||
|     template <typename Func> | ||||
|     void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); | ||||
| 
 | ||||
|     /// Iterates over all the images in a region calling func
 | ||||
|     template <typename Func> | ||||
|     void ForEachSparseSegment(ImageBase& image, Func&& func); | ||||
| 
 | ||||
|     /// Find or create an image view in the given image with the passed parameters
 | ||||
|     [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); | ||||
| 
 | ||||
| @ -340,7 +365,8 @@ private: | ||||
|     std::unordered_map<TSCEntry, SamplerId> samplers; | ||||
|     std::unordered_map<RenderTargets, FramebufferId> framebuffers; | ||||
| 
 | ||||
|     std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; | ||||
|     std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; | ||||
|     std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table; | ||||
| 
 | ||||
|     bool has_deleted_images = false; | ||||
|     u64 total_used_memory = 0; | ||||
| @ -349,6 +375,7 @@ private: | ||||
|     u64 critical_memory; | ||||
| 
 | ||||
|     SlotVector<Image> slot_images; | ||||
|     SlotVector<ImageMapView> slot_map_views; | ||||
|     SlotVector<ImageView> slot_image_views; | ||||
|     SlotVector<ImageAlloc> slot_image_allocs; | ||||
|     SlotVector<Sampler> slot_samplers; | ||||
| @ -702,6 +729,21 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { | ||||
|     std::vector<ImageId> deleted_images; | ||||
|     ForEachImageInRegionGPU(gpu_addr, size, | ||||
|                             [&](ImageId id, Image&) { deleted_images.push_back(id); }); | ||||
|     for (const ImageId id : deleted_images) { | ||||
|         Image& image = slot_images[id]; | ||||
|         if (True(image.flags & ImageFlagBits::Tracked)) { | ||||
|             UntrackImage(image); | ||||
|         } | ||||
|         UnregisterImage(id); | ||||
|         DeleteImage(id); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | ||||
|                                 const Tegra::Engines::Fermi2D::Surface& src, | ||||
| @ -833,9 +875,10 @@ typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_ad | ||||
|     if (it == page_table.end()) { | ||||
|         return nullptr; | ||||
|     } | ||||
|     const auto& image_ids = it->second; | ||||
|     for (const ImageId image_id : image_ids) { | ||||
|         const ImageBase& image = slot_images[image_id]; | ||||
|     const auto& image_map_ids = it->second; | ||||
|     for (const ImageMapId map_id : image_map_ids) { | ||||
|         const ImageMapView& map = slot_map_views[map_id]; | ||||
|         const ImageBase& image = slot_images[map.image_id]; | ||||
|         if (image.cpu_addr != cpu_addr) { | ||||
|             continue; | ||||
|         } | ||||
| @ -958,7 +1001,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) | ||||
| 
 | ||||
| template <class P> | ||||
| ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { | ||||
|     if (!IsValidAddress(gpu_memory, config)) { | ||||
|     if (!IsValidEntry(gpu_memory, config)) { | ||||
|         return NULL_IMAGE_VIEW_ID; | ||||
|     } | ||||
|     const auto [pair, is_new] = image_views.try_emplace(config); | ||||
| @ -1026,7 +1069,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | ||||
|         } | ||||
|         return false; | ||||
|     }; | ||||
|     ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); | ||||
|     ForEachImageInRegionGPU(gpu_addr, CalculateGuestSizeInBytes(info), lambda); | ||||
|     return image_id; | ||||
| } | ||||
| 
 | ||||
| @ -1056,7 +1099,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | ||||
|     std::vector<ImageId> left_aliased_ids; | ||||
|     std::vector<ImageId> right_aliased_ids; | ||||
|     std::vector<ImageId> bad_overlap_ids; | ||||
|     ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { | ||||
|     ForEachImageInRegionGPU(gpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { | ||||
|         if (info.type == ImageType::Linear) { | ||||
|             if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { | ||||
|                 // Alias linear images with the same pitch
 | ||||
| @ -1091,6 +1134,24 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | ||||
|     const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); | ||||
|     Image& new_image = slot_images[new_image_id]; | ||||
| 
 | ||||
|     new_image.is_sparse = false; | ||||
|     if (new_image.info.type != ImageType::Linear && new_image.info.type != ImageType::Buffer) { | ||||
|         const LevelArray offsets = CalculateMipLevelOffsets(new_image.info); | ||||
|         size_t level; | ||||
|         const size_t levels = static_cast<size_t>(new_image.info.resources.levels); | ||||
|         VAddr n_cpu_addr = new_image.cpu_addr; | ||||
|         GPUVAddr n_gpu_addr = new_image.gpu_addr; | ||||
|         for (level = 0; level < levels; level++) { | ||||
|             n_gpu_addr += offsets[level]; | ||||
|             n_cpu_addr += offsets[level]; | ||||
|             std::optional<VAddr> cpu_addr_opt = gpu_memory.GpuToCpuAddress(n_gpu_addr); | ||||
|             if (!cpu_addr_opt || *cpu_addr_opt == 0 || n_cpu_addr != *cpu_addr_opt) { | ||||
|                 new_image.is_sparse = true; | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     // TODO: Only upload what we need
 | ||||
|     RefreshContents(new_image); | ||||
| 
 | ||||
| @ -1239,7 +1300,8 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f | ||||
|     using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||||
|     static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||||
|     boost::container::small_vector<ImageId, 32> images; | ||||
|     ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { | ||||
|     boost::container::small_vector<ImageMapId, 32> maps; | ||||
|     ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { | ||||
|         const auto it = page_table.find(page); | ||||
|         if (it == page_table.end()) { | ||||
|             if constexpr (BOOL_BREAK) { | ||||
| @ -1248,12 +1310,63 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f | ||||
|                 return; | ||||
|             } | ||||
|         } | ||||
|         for (const ImageMapId map_id : it->second) { | ||||
|             ImageMapView& map = slot_map_views[map_id]; | ||||
|             if (map.picked) { | ||||
|                 continue; | ||||
|             } | ||||
|             if (!map.Overlaps(cpu_addr, size)) { | ||||
|                 continue; | ||||
|             } | ||||
|             map.picked = true; | ||||
|             maps.push_back(map_id); | ||||
|             Image& image = slot_images[map.image_id]; | ||||
|             if (True(image.flags & ImageFlagBits::Picked)) { | ||||
|                 continue; | ||||
|             } | ||||
|             image.flags |= ImageFlagBits::Picked; | ||||
|             images.push_back(map.image_id); | ||||
|             if constexpr (BOOL_BREAK) { | ||||
|                 if (func(map.image_id, image)) { | ||||
|                     return true; | ||||
|                 } | ||||
|             } else { | ||||
|                 func(map.image_id, image); | ||||
|             } | ||||
|         } | ||||
|         if constexpr (BOOL_BREAK) { | ||||
|             return false; | ||||
|         } | ||||
|     }); | ||||
|     for (const ImageId image_id : images) { | ||||
|         slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||||
|     } | ||||
|     for (const ImageMapId map_id : maps) { | ||||
|         slot_map_views[map_id].picked = false; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| template <typename Func> | ||||
| void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { | ||||
|     using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||||
|     static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||||
|     boost::container::small_vector<ImageId, 8> images; | ||||
|     ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { | ||||
|         const auto it = gpu_page_table.find(page); | ||||
|         if (it == gpu_page_table.end()) { | ||||
|             if constexpr (BOOL_BREAK) { | ||||
|                 return false; | ||||
|             } else { | ||||
|                 return; | ||||
|             } | ||||
|         } | ||||
|         for (const ImageId image_id : it->second) { | ||||
|             Image& image = slot_images[image_id]; | ||||
|             if (True(image.flags & ImageFlagBits::Picked)) { | ||||
|                 continue; | ||||
|             } | ||||
|             if (!image.Overlaps(cpu_addr, size)) { | ||||
|             if (!image.OverlapsGPU(gpu_addr, size)) { | ||||
|                 continue; | ||||
|             } | ||||
|             image.flags |= ImageFlagBits::Picked; | ||||
| @ -1275,6 +1388,30 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| template <typename Func> | ||||
| void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { | ||||
|     using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; | ||||
|     static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||||
|     GPUVAddr gpu_addr = image.gpu_addr; | ||||
|     const size_t levels = image.info.resources.levels; | ||||
|     const auto mipmap_sizes = CalculateMipLevelSizes(image.info); | ||||
|     for (size_t level = 0; level < levels; level++) { | ||||
|         const size_t size = mipmap_sizes[level]; | ||||
|         std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||||
|         if (cpu_addr && *cpu_addr != 0) { | ||||
|             if constexpr (BOOL_BREAK) { | ||||
|                 if (func(gpu_addr, *cpu_addr, size)) { | ||||
|                     return true; | ||||
|                 } | ||||
|             } else { | ||||
|                 func(gpu_addr, *cpu_addr, size); | ||||
|             } | ||||
|         } | ||||
|         gpu_addr += size; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { | ||||
|     Image& image = slot_images[image_id]; | ||||
| @ -1292,8 +1429,6 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | ||||
|     ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), | ||||
|                "Trying to register an already registered image"); | ||||
|     image.flags |= ImageFlagBits::Registered; | ||||
|     ForEachPage(image.cpu_addr, image.guest_size_bytes, | ||||
|                 [this, image_id](u64 page) { page_table[page].push_back(image_id); }); | ||||
|     u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); | ||||
|     if ((IsPixelFormatASTC(image.info.format) && | ||||
|          True(image.flags & ImageFlagBits::AcceleratedUpload)) || | ||||
| @ -1301,6 +1436,21 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | ||||
|         tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | ||||
|     } | ||||
|     total_used_memory += Common::AlignUp(tentative_size, 1024); | ||||
|     ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||||
|                    [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); | ||||
|     if (!image.is_sparse) { | ||||
|         auto map_id = | ||||
|             slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); | ||||
|         ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, | ||||
|                        [this, map_id](u64 page) { page_table[page].push_back(map_id); }); | ||||
|         image.map_view_id = map_id; | ||||
|         return; | ||||
|     } | ||||
|     ForEachSparseSegment(image, [this, image_id](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | ||||
|         auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); | ||||
|         ForEachCPUPage(cpu_addr, size, | ||||
|                        [this, map_id](u64 page) { page_table[page].push_back(map_id); }); | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| @ -1317,9 +1467,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | ||||
|         tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | ||||
|     } | ||||
|     total_used_memory -= Common::AlignUp(tentative_size, 1024); | ||||
|     ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { | ||||
|         const auto page_it = page_table.find(page); | ||||
|         if (page_it == page_table.end()) { | ||||
|     ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { | ||||
|         const auto page_it = gpu_page_table.find(page); | ||||
|         if (page_it == gpu_page_table.end()) { | ||||
|             UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||||
|             return; | ||||
|         } | ||||
| @ -1331,20 +1481,84 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | ||||
|         } | ||||
|         image_ids.erase(vector_it); | ||||
|     }); | ||||
|     if (!image.is_sparse) { | ||||
|         const auto map_id = image.map_view_id; | ||||
|         ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { | ||||
|             const auto page_it = page_table.find(page); | ||||
|             if (page_it == page_table.end()) { | ||||
|                 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||||
|                 return; | ||||
|             } | ||||
|             std::vector<ImageMapId>& image_map_ids = page_it->second; | ||||
|             const auto vector_it = std::ranges::find(image_map_ids, map_id); | ||||
|             if (vector_it == image_map_ids.end()) { | ||||
|                 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", | ||||
|                                 page << PAGE_BITS); | ||||
|                 return; | ||||
|             } | ||||
|             image_map_ids.erase(vector_it); | ||||
|         }); | ||||
|         slot_map_views.erase(map_id); | ||||
|         return; | ||||
|     } | ||||
|     boost::container::small_vector<ImageMapId, 8> maps_to_delete; | ||||
|     ForEachSparseSegment( | ||||
|         image, [this, image_id, &maps_to_delete]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, | ||||
|                                                  size_t size) { | ||||
|             ForEachCPUPage(cpu_addr, size, [this, image_id, &maps_to_delete](u64 page) { | ||||
|                 const auto page_it = page_table.find(page); | ||||
|                 if (page_it == page_table.end()) { | ||||
|                     UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||||
|                     return; | ||||
|                 } | ||||
|                 std::vector<ImageMapId>& image_map_ids = page_it->second; | ||||
|                 auto vector_it = image_map_ids.begin(); | ||||
|                 while (vector_it != image_map_ids.end()) { | ||||
|                     ImageMapView& map = slot_map_views[*vector_it]; | ||||
|                     if (map.image_id != image_id) { | ||||
|                         vector_it++; | ||||
|                         continue; | ||||
|                     } | ||||
|                     if (!map.picked) { | ||||
|                         maps_to_delete.push_back(*vector_it); | ||||
|                         map.picked = true; | ||||
|                     } | ||||
|                     vector_it = image_map_ids.erase(vector_it); | ||||
|                 } | ||||
|             }); | ||||
|         }); | ||||
| 
 | ||||
|     for (const ImageMapId map_id : maps_to_delete) { | ||||
|         slot_map_views.erase(map_id); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| void TextureCache<P>::TrackImage(ImageBase& image) { | ||||
|     ASSERT(False(image.flags & ImageFlagBits::Tracked)); | ||||
|     image.flags |= ImageFlagBits::Tracked; | ||||
|     rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); | ||||
|     if (!image.is_sparse) { | ||||
|         rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); | ||||
|         return; | ||||
|     } | ||||
|     ForEachSparseSegment(image, | ||||
|                          [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | ||||
|                              rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | ||||
|                          }); | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| void TextureCache<P>::UntrackImage(ImageBase& image) { | ||||
|     ASSERT(True(image.flags & ImageFlagBits::Tracked)); | ||||
|     image.flags &= ~ImageFlagBits::Tracked; | ||||
|     rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); | ||||
|     if (!image.is_sparse) { | ||||
|         rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); | ||||
|         return; | ||||
|     } | ||||
|     ForEachSparseSegment(image, | ||||
|                          [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | ||||
|                              rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | ||||
|                          }); | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
|  | ||||
| @ -16,6 +16,7 @@ constexpr size_t MAX_MIP_LEVELS = 14; | ||||
| constexpr SlotId CORRUPT_ID{0xfffffffe}; | ||||
| 
 | ||||
| using ImageId = SlotId; | ||||
| using ImageMapId = SlotId; | ||||
| using ImageViewId = SlotId; | ||||
| using ImageAllocId = SlotId; | ||||
| using SamplerId = SlotId; | ||||
|  | ||||
| @ -664,6 +664,16 @@ LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept { | ||||
|     return offsets; | ||||
| } | ||||
| 
 | ||||
| LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept { | ||||
|     const u32 num_levels = info.resources.levels; | ||||
|     const LevelInfo level_info = MakeLevelInfo(info); | ||||
|     LevelArray sizes{}; | ||||
|     for (u32 level = 0; level < num_levels; ++level) { | ||||
|         sizes[level] = CalculateLevelSize(level_info, level); | ||||
|     } | ||||
|     return sizes; | ||||
| } | ||||
| 
 | ||||
| std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { | ||||
|     ASSERT(info.type == ImageType::e3D); | ||||
|     std::vector<u32> offsets; | ||||
| @ -776,14 +786,37 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn | ||||
|     return copies; | ||||
| } | ||||
| 
 | ||||
| bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { | ||||
|     if (config.Address() == 0) { | ||||
| bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr) { | ||||
|     if (gpu_addr == 0) { | ||||
|         return false; | ||||
|     } | ||||
|     if (config.Address() > (u64(1) << 48)) { | ||||
|     if (gpu_addr > (u64(1) << 48)) { | ||||
|         return false; | ||||
|     } | ||||
|     return gpu_memory.GpuToCpuAddress(config.Address()).has_value(); | ||||
|     const auto cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||||
|     return cpu_addr.has_value() && *cpu_addr != 0; | ||||
| } | ||||
| 
 | ||||
| bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { | ||||
|     const GPUVAddr gpu_addr = config.Address(); | ||||
|     if (IsValidAddress(gpu_memory, gpu_addr)) { | ||||
|         return true; | ||||
|     } | ||||
|     if (!config.IsBlockLinear()) { | ||||
|         return false; | ||||
|     } | ||||
|     const size_t levels = config.max_mip_level + 1; | ||||
|     if (levels <= 1) { | ||||
|         return false; | ||||
|     } | ||||
|     const ImageInfo info{config}; | ||||
|     const LevelArray offsets = CalculateMipLevelOffsets(info); | ||||
|     for (size_t level = 1; level < levels; level++) { | ||||
|         if (IsValidAddress(gpu_memory, static_cast<GPUVAddr>(gpu_addr + offsets[level]))) { | ||||
|             return true; | ||||
|         } | ||||
|     } | ||||
|     return false; | ||||
| } | ||||
| 
 | ||||
| std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||||
|  | ||||
| @ -40,6 +40,8 @@ struct OverlapResult { | ||||
| 
 | ||||
| [[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept; | ||||
| 
 | ||||
| [[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; | ||||
| 
 | ||||
| [[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); | ||||
| 
 | ||||
| [[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); | ||||
| @ -55,7 +57,9 @@ struct OverlapResult { | ||||
|                                                            const ImageInfo& src, | ||||
|                                                            SubresourceBase base); | ||||
| 
 | ||||
| [[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); | ||||
| [[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr); | ||||
| 
 | ||||
| [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); | ||||
| 
 | ||||
| [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, | ||||
|                                                           GPUVAddr gpu_addr, const ImageInfo& info, | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user