mirror of
				https://git.suyu.dev/suyu/suyu.git
				synced 2025-10-26 04:17:12 +08:00 
			
		
		
		
	Implement Block Linear copies in Kepler Memory.
This commit is contained in:
		
							parent
							
								
									8a099ac99f
								
							
						
					
					
						commit
						bec28d692d
					
				| @ -10,7 +10,6 @@ | |||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| #include "video_core/renderer_base.h" | #include "video_core/renderer_base.h" | ||||||
| #include "video_core/textures/convert.h" |  | ||||||
| #include "video_core/textures/decoders.h" | #include "video_core/textures/decoders.h" | ||||||
| 
 | 
 | ||||||
| namespace Tegra::Engines { | namespace Tegra::Engines { | ||||||
| @ -47,13 +46,12 @@ void KeplerMemory::ProcessExec() { | |||||||
| 
 | 
 | ||||||
| void KeplerMemory::ProcessData(u32 data, bool is_last_call) { | void KeplerMemory::ProcessData(u32 data, bool is_last_call) { | ||||||
|     const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset); |     const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset); | ||||||
|     std::memcpy(&state.inner_buffer[state.write_offset], &data, sub_copy_size); |     std::memcpy(&state.inner_buffer[state.write_offset], ®s.data, sub_copy_size); | ||||||
|     state.write_offset += sub_copy_size; |     state.write_offset += sub_copy_size; | ||||||
|     if (is_last_call) { |     if (is_last_call) { | ||||||
|         UNIMPLEMENTED_IF_MSG(regs.exec.linear == 0, "Block Linear Copy is not implemented"); |  | ||||||
|         if (regs.exec.linear != 0) { |  | ||||||
|         const GPUVAddr address{regs.dest.Address()}; |         const GPUVAddr address{regs.dest.Address()}; | ||||||
|         const auto host_ptr = memory_manager.GetPointer(address); |         const auto host_ptr = memory_manager.GetPointer(address); | ||||||
|  |         if (regs.exec.linear != 0) { | ||||||
|             // We have to invalidate the destination region to evict any outdated surfaces from the
 |             // We have to invalidate the destination region to evict any outdated surfaces from the
 | ||||||
|             // cache. We do this before actually writing the new data because the destination
 |             // cache. We do this before actually writing the new data because the destination
 | ||||||
|             // address might contain a dirty surface that will have to be written back to memory.
 |             // address might contain a dirty surface that will have to be written back to memory.
 | ||||||
| @ -61,6 +59,17 @@ void KeplerMemory::ProcessData(u32 data, bool is_last_call) { | |||||||
|             rasterizer.InvalidateRegion(ToCacheAddr(host_ptr), state.copy_size); |             rasterizer.InvalidateRegion(ToCacheAddr(host_ptr), state.copy_size); | ||||||
|             std::memcpy(host_ptr, state.inner_buffer.data(), state.copy_size); |             std::memcpy(host_ptr, state.inner_buffer.data(), state.copy_size); | ||||||
|             system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); |             system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||||||
|  |         } else { | ||||||
|  |             UNIMPLEMENTED_IF(regs.dest.z != 0); | ||||||
|  |             UNIMPLEMENTED_IF(regs.dest.depth != 1); | ||||||
|  |             UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); | ||||||
|  |             UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); | ||||||
|  |             const std::size_t dst_size = Tegra::Texture::CalculateSize( | ||||||
|  |                 true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); | ||||||
|  |             rasterizer.InvalidateRegion(ToCacheAddr(host_ptr), dst_size); | ||||||
|  |             Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, | ||||||
|  |                                           regs.dest.y, regs.dest.BlockHeight(), state.copy_size, | ||||||
|  |                                           state.inner_buffer.data(), host_ptr); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  | |||||||
| @ -288,6 +288,27 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | ||||||
|  |                    std::size_t copy_size, u8* source_data, u8* swizzle_data) { | ||||||
|  |     const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; | ||||||
|  |     std::size_t count = 0; | ||||||
|  |     for (u32 y = dst_y; y < height && count < copy_size; ++y) { | ||||||
|  |         const u32 gob_address_y = | ||||||
|  |             (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + | ||||||
|  |             ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; | ||||||
|  |         const auto& table = legacy_swizzle_table[y % gob_size_y]; | ||||||
|  |         for (u32 x = dst_x; x < width && count < copy_size; ++x) { | ||||||
|  |             const u32 gob_address = gob_address_y + (x / gob_size_x) * gob_size * block_height; | ||||||
|  |             const u32 swizzled_offset = gob_address + table[x % gob_size_x]; | ||||||
|  |             const u8* source_line = source_data + count; | ||||||
|  |             u8* dest_addr = swizzle_data + swizzled_offset; | ||||||
|  |             count++; | ||||||
|  | 
 | ||||||
|  |             std::memcpy(dest_addr, source_line, 1); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, | std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, | ||||||
|                               u32 height) { |                               u32 height) { | ||||||
|     std::vector<u8> rgba_data; |     std::vector<u8> rgba_data; | ||||||
|  | |||||||
| @ -51,4 +51,7 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||||||
|                       u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, |                       u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, | ||||||
|                       u32 offset_x, u32 offset_y); |                       u32 offset_x, u32 offset_y); | ||||||
| 
 | 
 | ||||||
|  | void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | ||||||
|  |                    std::size_t copy_size, u8* source_data, u8* swizzle_data); | ||||||
|  | 
 | ||||||
| } // namespace Tegra::Texture
 | } // namespace Tegra::Texture
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user