mirror of
				https://git.suyu.dev/suyu/suyu.git
				synced 2025-11-04 20:44:02 +08:00 
			
		
		
		
	Merge pull request #6465 from FernandoS27/sex-on-the-beach
GPU: Implement a garbage collector for GPU Caches (project Reaper+)
This commit is contained in:
		
						commit
						17fff10e06
					
				@ -24,6 +24,7 @@ enum : u64 {
 | 
			
		||||
    Size_128_MB = 128ULL * Size_1_MB,
 | 
			
		||||
    Size_448_MB = 448ULL * Size_1_MB,
 | 
			
		||||
    Size_507_MB = 507ULL * Size_1_MB,
 | 
			
		||||
    Size_512_MB = 512ULL * Size_1_MB,
 | 
			
		||||
    Size_562_MB = 562ULL * Size_1_MB,
 | 
			
		||||
    Size_1554_MB = 1554ULL * Size_1_MB,
 | 
			
		||||
    Size_2048_MB = 2048ULL * Size_1_MB,
 | 
			
		||||
 | 
			
		||||
@ -59,6 +59,7 @@ void LogSettings() {
 | 
			
		||||
    log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
 | 
			
		||||
    log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue());
 | 
			
		||||
    log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue());
 | 
			
		||||
    log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue());
 | 
			
		||||
    log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue());
 | 
			
		||||
    log_setting("Audio_OutputEngine", values.sink_id);
 | 
			
		||||
    log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue());
 | 
			
		||||
@ -142,6 +143,7 @@ void RestoreGlobalState(bool is_powered_on) {
 | 
			
		||||
    values.use_assembly_shaders.SetGlobal(true);
 | 
			
		||||
    values.use_asynchronous_shaders.SetGlobal(true);
 | 
			
		||||
    values.use_fast_gpu_time.SetGlobal(true);
 | 
			
		||||
    values.use_caches_gc.SetGlobal(true);
 | 
			
		||||
    values.bg_red.SetGlobal(true);
 | 
			
		||||
    values.bg_green.SetGlobal(true);
 | 
			
		||||
    values.bg_blue.SetGlobal(true);
 | 
			
		||||
 | 
			
		||||
@ -154,6 +154,7 @@ struct Values {
 | 
			
		||||
    Setting<bool> use_assembly_shaders;
 | 
			
		||||
    Setting<bool> use_asynchronous_shaders;
 | 
			
		||||
    Setting<bool> use_fast_gpu_time;
 | 
			
		||||
    Setting<bool> use_caches_gc;
 | 
			
		||||
 | 
			
		||||
    Setting<float> bg_red;
 | 
			
		||||
    Setting<float> bg_green;
 | 
			
		||||
 | 
			
		||||
@ -256,6 +256,16 @@ public:
 | 
			
		||||
        stream_score += score;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Sets the new frame tick
 | 
			
		||||
    void SetFrameTick(u64 new_frame_tick) noexcept {
 | 
			
		||||
        frame_tick = new_frame_tick;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Returns the new frame tick
 | 
			
		||||
    [[nodiscard]] u64 FrameTick() const noexcept {
 | 
			
		||||
        return frame_tick;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Returns the likeliness of this being a stream buffer
 | 
			
		||||
    [[nodiscard]] int StreamScore() const noexcept {
 | 
			
		||||
        return stream_score;
 | 
			
		||||
@ -586,6 +596,7 @@ private:
 | 
			
		||||
    RasterizerInterface* rasterizer = nullptr;
 | 
			
		||||
    VAddr cpu_addr = 0;
 | 
			
		||||
    Words words;
 | 
			
		||||
    u64 frame_tick = 0;
 | 
			
		||||
    BufferFlagBits flags{};
 | 
			
		||||
    int stream_score = 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -16,6 +16,7 @@
 | 
			
		||||
 | 
			
		||||
#include <boost/container/small_vector.hpp>
 | 
			
		||||
 | 
			
		||||
#include "common/common_sizes.h"
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
#include "common/div_ceil.h"
 | 
			
		||||
#include "common/microprofile.h"
 | 
			
		||||
@ -65,6 +66,9 @@ class BufferCache {
 | 
			
		||||
 | 
			
		||||
    static constexpr BufferId NULL_BUFFER_ID{0};
 | 
			
		||||
 | 
			
		||||
    static constexpr u64 EXPECTED_MEMORY = Common::Size_512_MB;
 | 
			
		||||
    static constexpr u64 CRITICAL_MEMORY = Common::Size_1_GB;
 | 
			
		||||
 | 
			
		||||
    using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 | 
			
		||||
 | 
			
		||||
    using Runtime = typename P::Runtime;
 | 
			
		||||
@ -102,6 +106,8 @@ public:
 | 
			
		||||
 | 
			
		||||
    void TickFrame();
 | 
			
		||||
 | 
			
		||||
    void RunGarbageCollector();
 | 
			
		||||
 | 
			
		||||
    void WriteMemory(VAddr cpu_addr, u64 size);
 | 
			
		||||
 | 
			
		||||
    void CachedWriteMemory(VAddr cpu_addr, u64 size);
 | 
			
		||||
@ -243,6 +249,8 @@ private:
 | 
			
		||||
    template <bool insert>
 | 
			
		||||
    void ChangeRegister(BufferId buffer_id);
 | 
			
		||||
 | 
			
		||||
    void TouchBuffer(Buffer& buffer) const noexcept;
 | 
			
		||||
 | 
			
		||||
    bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
 | 
			
		||||
 | 
			
		||||
    bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
 | 
			
		||||
@ -255,6 +263,10 @@ private:
 | 
			
		||||
 | 
			
		||||
    void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
 | 
			
		||||
 | 
			
		||||
    void DownloadBufferMemory(Buffer& buffer_id);
 | 
			
		||||
 | 
			
		||||
    void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size);
 | 
			
		||||
 | 
			
		||||
    void DeleteBuffer(BufferId buffer_id);
 | 
			
		||||
 | 
			
		||||
    void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id);
 | 
			
		||||
@ -319,6 +331,10 @@ private:
 | 
			
		||||
    size_t immediate_buffer_capacity = 0;
 | 
			
		||||
    std::unique_ptr<u8[]> immediate_buffer_alloc;
 | 
			
		||||
 | 
			
		||||
    typename SlotVector<Buffer>::Iterator deletion_iterator;
 | 
			
		||||
    u64 frame_tick = 0;
 | 
			
		||||
    u64 total_used_memory = 0;
 | 
			
		||||
 | 
			
		||||
    std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -332,6 +348,28 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
 | 
			
		||||
      gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} {
 | 
			
		||||
    // Ensure the first slot is used for the null buffer
 | 
			
		||||
    void(slot_buffers.insert(runtime, NullBufferParams{}));
 | 
			
		||||
    deletion_iterator = slot_buffers.end();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class P>
 | 
			
		||||
void BufferCache<P>::RunGarbageCollector() {
 | 
			
		||||
    const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY;
 | 
			
		||||
    const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
 | 
			
		||||
    int num_iterations = aggressive_gc ? 64 : 32;
 | 
			
		||||
    for (; num_iterations > 0; --num_iterations) {
 | 
			
		||||
        if (deletion_iterator == slot_buffers.end()) {
 | 
			
		||||
            deletion_iterator = slot_buffers.begin();
 | 
			
		||||
        }
 | 
			
		||||
        ++deletion_iterator;
 | 
			
		||||
        if (deletion_iterator == slot_buffers.end()) {
 | 
			
		||||
            break;
 | 
			
		||||
        }
 | 
			
		||||
        const auto [buffer_id, buffer] = *deletion_iterator;
 | 
			
		||||
        if (buffer->FrameTick() + ticks_to_destroy < frame_tick) {
 | 
			
		||||
            DownloadBufferMemory(*buffer);
 | 
			
		||||
            DeleteBuffer(buffer_id);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class P>
 | 
			
		||||
@ -349,6 +387,10 @@ void BufferCache<P>::TickFrame() {
 | 
			
		||||
    const bool skip_preferred = hits * 256 < shots * 251;
 | 
			
		||||
    uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
 | 
			
		||||
 | 
			
		||||
    if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) {
 | 
			
		||||
        RunGarbageCollector();
 | 
			
		||||
    }
 | 
			
		||||
    ++frame_tick;
 | 
			
		||||
    delayed_destruction_ring.Tick();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -371,50 +413,8 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
 | 
			
		||||
 | 
			
		||||
template <class P>
 | 
			
		||||
void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
 | 
			
		||||
    ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
 | 
			
		||||
        boost::container::small_vector<BufferCopy, 1> copies;
 | 
			
		||||
        u64 total_size_bytes = 0;
 | 
			
		||||
        u64 largest_copy = 0;
 | 
			
		||||
        buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
 | 
			
		||||
            copies.push_back(BufferCopy{
 | 
			
		||||
                .src_offset = range_offset,
 | 
			
		||||
                .dst_offset = total_size_bytes,
 | 
			
		||||
                .size = range_size,
 | 
			
		||||
            });
 | 
			
		||||
            total_size_bytes += range_size;
 | 
			
		||||
            largest_copy = std::max(largest_copy, range_size);
 | 
			
		||||
        });
 | 
			
		||||
        if (total_size_bytes == 0) {
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
        MICROPROFILE_SCOPE(GPU_DownloadMemory);
 | 
			
		||||
 | 
			
		||||
        if constexpr (USE_MEMORY_MAPS) {
 | 
			
		||||
            auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
 | 
			
		||||
            const u8* const mapped_memory = download_staging.mapped_span.data();
 | 
			
		||||
            const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
 | 
			
		||||
            for (BufferCopy& copy : copies) {
 | 
			
		||||
                // Modify copies to have the staging offset in mind
 | 
			
		||||
                copy.dst_offset += download_staging.offset;
 | 
			
		||||
            }
 | 
			
		||||
            runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
 | 
			
		||||
            runtime.Finish();
 | 
			
		||||
            for (const BufferCopy& copy : copies) {
 | 
			
		||||
                const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
 | 
			
		||||
                // Undo the modified offset
 | 
			
		||||
                const u64 dst_offset = copy.dst_offset - download_staging.offset;
 | 
			
		||||
                const u8* copy_mapped_memory = mapped_memory + dst_offset;
 | 
			
		||||
                cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
 | 
			
		||||
            }
 | 
			
		||||
        } else {
 | 
			
		||||
            const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
 | 
			
		||||
            for (const BufferCopy& copy : copies) {
 | 
			
		||||
                buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
 | 
			
		||||
                const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
 | 
			
		||||
                cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    });
 | 
			
		||||
    ForEachBufferInRange(cpu_addr, size,
 | 
			
		||||
                         [&](BufferId, Buffer& buffer) { DownloadBufferMemory(buffer); });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class P>
 | 
			
		||||
@ -640,6 +640,7 @@ bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
 | 
			
		||||
template <class P>
 | 
			
		||||
void BufferCache<P>::BindHostIndexBuffer() {
 | 
			
		||||
    Buffer& buffer = slot_buffers[index_buffer.buffer_id];
 | 
			
		||||
    TouchBuffer(buffer);
 | 
			
		||||
    const u32 offset = buffer.Offset(index_buffer.cpu_addr);
 | 
			
		||||
    const u32 size = index_buffer.size;
 | 
			
		||||
    SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
 | 
			
		||||
@ -658,6 +659,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
 | 
			
		||||
    for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
 | 
			
		||||
        const Binding& binding = vertex_buffers[index];
 | 
			
		||||
        Buffer& buffer = slot_buffers[binding.buffer_id];
 | 
			
		||||
        TouchBuffer(buffer);
 | 
			
		||||
        SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
 | 
			
		||||
        if (!flags[Dirty::VertexBuffer0 + index]) {
 | 
			
		||||
            continue;
 | 
			
		||||
@ -693,6 +695,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
 | 
			
		||||
    const VAddr cpu_addr = binding.cpu_addr;
 | 
			
		||||
    const u32 size = binding.size;
 | 
			
		||||
    Buffer& buffer = slot_buffers[binding.buffer_id];
 | 
			
		||||
    TouchBuffer(buffer);
 | 
			
		||||
    const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
 | 
			
		||||
                                 size <= uniform_buffer_skip_cache_size &&
 | 
			
		||||
                                 !buffer.IsRegionGpuModified(cpu_addr, size);
 | 
			
		||||
@ -744,6 +747,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
 | 
			
		||||
    ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
 | 
			
		||||
        const Binding& binding = storage_buffers[stage][index];
 | 
			
		||||
        Buffer& buffer = slot_buffers[binding.buffer_id];
 | 
			
		||||
        TouchBuffer(buffer);
 | 
			
		||||
        const u32 size = binding.size;
 | 
			
		||||
        SynchronizeBuffer(buffer, binding.cpu_addr, size);
 | 
			
		||||
 | 
			
		||||
@ -766,6 +770,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
 | 
			
		||||
    for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
 | 
			
		||||
        const Binding& binding = transform_feedback_buffers[index];
 | 
			
		||||
        Buffer& buffer = slot_buffers[binding.buffer_id];
 | 
			
		||||
        TouchBuffer(buffer);
 | 
			
		||||
        const u32 size = binding.size;
 | 
			
		||||
        SynchronizeBuffer(buffer, binding.cpu_addr, size);
 | 
			
		||||
 | 
			
		||||
@ -784,6 +789,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
 | 
			
		||||
    ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
 | 
			
		||||
        const Binding& binding = compute_uniform_buffers[index];
 | 
			
		||||
        Buffer& buffer = slot_buffers[binding.buffer_id];
 | 
			
		||||
        TouchBuffer(buffer);
 | 
			
		||||
        const u32 size = binding.size;
 | 
			
		||||
        SynchronizeBuffer(buffer, binding.cpu_addr, size);
 | 
			
		||||
 | 
			
		||||
@ -803,6 +809,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
 | 
			
		||||
    ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
 | 
			
		||||
        const Binding& binding = compute_storage_buffers[index];
 | 
			
		||||
        Buffer& buffer = slot_buffers[binding.buffer_id];
 | 
			
		||||
        TouchBuffer(buffer);
 | 
			
		||||
        const u32 size = binding.size;
 | 
			
		||||
        SynchronizeBuffer(buffer, binding.cpu_addr, size);
 | 
			
		||||
 | 
			
		||||
@ -1101,6 +1108,7 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
 | 
			
		||||
    const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
 | 
			
		||||
    const u32 size = static_cast<u32>(overlap.end - overlap.begin);
 | 
			
		||||
    const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
 | 
			
		||||
    TouchBuffer(slot_buffers[new_buffer_id]);
 | 
			
		||||
    for (const BufferId overlap_id : overlap.ids) {
 | 
			
		||||
        JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
 | 
			
		||||
    }
 | 
			
		||||
@ -1122,8 +1130,14 @@ template <class P>
 | 
			
		||||
template <bool insert>
 | 
			
		||||
void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
 | 
			
		||||
    const Buffer& buffer = slot_buffers[buffer_id];
 | 
			
		||||
    const auto size = buffer.SizeBytes();
 | 
			
		||||
    if (insert) {
 | 
			
		||||
        total_used_memory += Common::AlignUp(size, 1024);
 | 
			
		||||
    } else {
 | 
			
		||||
        total_used_memory -= Common::AlignUp(size, 1024);
 | 
			
		||||
    }
 | 
			
		||||
    const VAddr cpu_addr_begin = buffer.CpuAddr();
 | 
			
		||||
    const VAddr cpu_addr_end = cpu_addr_begin + buffer.SizeBytes();
 | 
			
		||||
    const VAddr cpu_addr_end = cpu_addr_begin + size;
 | 
			
		||||
    const u64 page_begin = cpu_addr_begin / PAGE_SIZE;
 | 
			
		||||
    const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE);
 | 
			
		||||
    for (u64 page = page_begin; page != page_end; ++page) {
 | 
			
		||||
@ -1135,6 +1149,11 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class P>
 | 
			
		||||
void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept {
 | 
			
		||||
    buffer.SetFrameTick(frame_tick);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class P>
 | 
			
		||||
bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
 | 
			
		||||
    if (buffer.CpuAddr() == 0) {
 | 
			
		||||
@ -1211,6 +1230,57 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
 | 
			
		||||
    runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class P>
 | 
			
		||||
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
 | 
			
		||||
    DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class P>
 | 
			
		||||
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) {
 | 
			
		||||
    boost::container::small_vector<BufferCopy, 1> copies;
 | 
			
		||||
    u64 total_size_bytes = 0;
 | 
			
		||||
    u64 largest_copy = 0;
 | 
			
		||||
    buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
 | 
			
		||||
        copies.push_back(BufferCopy{
 | 
			
		||||
            .src_offset = range_offset,
 | 
			
		||||
            .dst_offset = total_size_bytes,
 | 
			
		||||
            .size = range_size,
 | 
			
		||||
        });
 | 
			
		||||
        total_size_bytes += range_size;
 | 
			
		||||
        largest_copy = std::max(largest_copy, range_size);
 | 
			
		||||
    });
 | 
			
		||||
    if (total_size_bytes == 0) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    MICROPROFILE_SCOPE(GPU_DownloadMemory);
 | 
			
		||||
 | 
			
		||||
    if constexpr (USE_MEMORY_MAPS) {
 | 
			
		||||
        auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
 | 
			
		||||
        const u8* const mapped_memory = download_staging.mapped_span.data();
 | 
			
		||||
        const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
 | 
			
		||||
        for (BufferCopy& copy : copies) {
 | 
			
		||||
            // Modify copies to have the staging offset in mind
 | 
			
		||||
            copy.dst_offset += download_staging.offset;
 | 
			
		||||
        }
 | 
			
		||||
        runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
 | 
			
		||||
        runtime.Finish();
 | 
			
		||||
        for (const BufferCopy& copy : copies) {
 | 
			
		||||
            const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
 | 
			
		||||
            // Undo the modified offset
 | 
			
		||||
            const u64 dst_offset = copy.dst_offset - download_staging.offset;
 | 
			
		||||
            const u8* copy_mapped_memory = mapped_memory + dst_offset;
 | 
			
		||||
            cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
 | 
			
		||||
        }
 | 
			
		||||
    } else {
 | 
			
		||||
        const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
 | 
			
		||||
        for (const BufferCopy& copy : copies) {
 | 
			
		||||
            buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
 | 
			
		||||
            const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
 | 
			
		||||
            cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class P>
 | 
			
		||||
void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
 | 
			
		||||
    const auto scalar_replace = [buffer_id](Binding& binding) {
 | 
			
		||||
@ -1236,6 +1306,7 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
 | 
			
		||||
 | 
			
		||||
    Unregister(buffer_id);
 | 
			
		||||
    delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id]));
 | 
			
		||||
    slot_buffers.erase(buffer_id);
 | 
			
		||||
 | 
			
		||||
    NotifyBufferDeletion();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -737,6 +737,8 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Image::~Image() = default;
 | 
			
		||||
 | 
			
		||||
void Image::UploadMemory(const ImageBufferMap& map,
 | 
			
		||||
                         std::span<const VideoCommon::BufferImageCopy> copies) {
 | 
			
		||||
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
 | 
			
		||||
 | 
			
		||||
@ -143,6 +143,14 @@ public:
 | 
			
		||||
    explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
 | 
			
		||||
                   VAddr cpu_addr);
 | 
			
		||||
 | 
			
		||||
    ~Image();
 | 
			
		||||
 | 
			
		||||
    Image(const Image&) = delete;
 | 
			
		||||
    Image& operator=(const Image&) = delete;
 | 
			
		||||
 | 
			
		||||
    Image(Image&&) = default;
 | 
			
		||||
    Image& operator=(Image&&) = default;
 | 
			
		||||
 | 
			
		||||
    void UploadMemory(const ImageBufferMap& map,
 | 
			
		||||
                      std::span<const VideoCommon::BufferImageCopy> copies);
 | 
			
		||||
 | 
			
		||||
@ -235,6 +243,7 @@ struct TextureCacheParams {
 | 
			
		||||
    static constexpr bool ENABLE_VALIDATION = true;
 | 
			
		||||
    static constexpr bool FRAMEBUFFER_BLITS = true;
 | 
			
		||||
    static constexpr bool HAS_EMULATED_COPIES = true;
 | 
			
		||||
    static constexpr bool HAS_DEVICE_MEMORY_INFO = false;
 | 
			
		||||
 | 
			
		||||
    using Runtime = OpenGL::TextureCacheRuntime;
 | 
			
		||||
    using Image = OpenGL::Image;
 | 
			
		||||
 | 
			
		||||
@ -818,6 +818,10 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
 | 
			
		||||
    });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
 | 
			
		||||
    return device.GetDeviceLocalMemory();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_,
 | 
			
		||||
             VAddr cpu_addr_)
 | 
			
		||||
    : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler},
 | 
			
		||||
@ -876,6 +880,8 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Image::~Image() = default;
 | 
			
		||||
 | 
			
		||||
void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
 | 
			
		||||
    // TODO: Move this to another API
 | 
			
		||||
    scheduler->RequestOutsideRenderPassOperationContext();
 | 
			
		||||
 | 
			
		||||
@ -97,6 +97,8 @@ struct TextureCacheRuntime {
 | 
			
		||||
        // All known Vulkan drivers can natively handle BGR textures
 | 
			
		||||
        return true;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    u64 GetDeviceLocalMemory() const;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class Image : public VideoCommon::ImageBase {
 | 
			
		||||
@ -104,6 +106,14 @@ public:
 | 
			
		||||
    explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
 | 
			
		||||
                   VAddr cpu_addr);
 | 
			
		||||
 | 
			
		||||
    ~Image();
 | 
			
		||||
 | 
			
		||||
    Image(const Image&) = delete;
 | 
			
		||||
    Image& operator=(const Image&) = delete;
 | 
			
		||||
 | 
			
		||||
    Image(Image&&) = default;
 | 
			
		||||
    Image& operator=(Image&&) = default;
 | 
			
		||||
 | 
			
		||||
    void UploadMemory(const StagingBufferRef& map,
 | 
			
		||||
                      std::span<const VideoCommon::BufferImageCopy> copies);
 | 
			
		||||
 | 
			
		||||
@ -257,6 +267,7 @@ struct TextureCacheParams {
 | 
			
		||||
    static constexpr bool ENABLE_VALIDATION = true;
 | 
			
		||||
    static constexpr bool FRAMEBUFFER_BLITS = false;
 | 
			
		||||
    static constexpr bool HAS_EMULATED_COPIES = false;
 | 
			
		||||
    static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
 | 
			
		||||
 | 
			
		||||
    using Runtime = Vulkan::TextureCacheRuntime;
 | 
			
		||||
    using Image = Vulkan::Image;
 | 
			
		||||
 | 
			
		||||
@ -283,4 +283,11 @@ std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
 | 
			
		||||
    return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format) {
 | 
			
		||||
    constexpr u64 RGBA8_PIXEL_SIZE = 4;
 | 
			
		||||
    const u64 base_block_size = static_cast<u64>(DefaultBlockWidth(format)) *
 | 
			
		||||
                                static_cast<u64>(DefaultBlockHeight(format)) * RGBA8_PIXEL_SIZE;
 | 
			
		||||
    return (base_size * base_block_size) / BytesPerBlock(format);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace VideoCore::Surface
 | 
			
		||||
 | 
			
		||||
@ -462,4 +462,6 @@ bool IsPixelFormatSRGB(PixelFormat format);
 | 
			
		||||
 | 
			
		||||
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
 | 
			
		||||
 | 
			
		||||
u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format);
 | 
			
		||||
 | 
			
		||||
} // namespace VideoCore::Surface
 | 
			
		||||
 | 
			
		||||
@ -113,6 +113,43 @@ void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_vie
 | 
			
		||||
    image_view_ids.push_back(image_view_id);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool ImageBase::IsSafeDownload() const noexcept {
 | 
			
		||||
    // Skip images that were not modified from the GPU
 | 
			
		||||
    if (False(flags & ImageFlagBits::GpuModified)) {
 | 
			
		||||
        return false;
 | 
			
		||||
    }
 | 
			
		||||
    // Skip images that .are. modified from the CPU
 | 
			
		||||
    // We don't want to write sensitive data from the guest
 | 
			
		||||
    if (True(flags & ImageFlagBits::CpuModified)) {
 | 
			
		||||
        return false;
 | 
			
		||||
    }
 | 
			
		||||
    if (info.num_samples > 1) {
 | 
			
		||||
        LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
 | 
			
		||||
        return false;
 | 
			
		||||
    }
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ImageBase::CheckBadOverlapState() {
 | 
			
		||||
    if (False(flags & ImageFlagBits::BadOverlap)) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    if (!overlapping_images.empty()) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    flags &= ~ImageFlagBits::BadOverlap;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ImageBase::CheckAliasState() {
 | 
			
		||||
    if (False(flags & ImageFlagBits::Alias)) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    if (!aliased_images.empty()) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    flags &= ~ImageFlagBits::Alias;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
 | 
			
		||||
    static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
 | 
			
		||||
    ASSERT(lhs.info.type == rhs.info.type);
 | 
			
		||||
 | 
			
		||||
@ -25,6 +25,12 @@ enum class ImageFlagBits : u32 {
 | 
			
		||||
    Strong = 1 << 5,      ///< Exists in the image table, the dimensions are can be trusted
 | 
			
		||||
    Registered = 1 << 6,  ///< True when the image is registered
 | 
			
		||||
    Picked = 1 << 7,      ///< Temporary flag to mark the image as picked
 | 
			
		||||
 | 
			
		||||
    // Garbage Collection Flags
 | 
			
		||||
    BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher
 | 
			
		||||
                         ///< garbage collection priority
 | 
			
		||||
    Alias = 1 << 9,      ///< This image has aliases and has priority on garbage
 | 
			
		||||
                         ///< collection
 | 
			
		||||
};
 | 
			
		||||
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
 | 
			
		||||
 | 
			
		||||
@ -44,11 +50,16 @@ struct ImageBase {
 | 
			
		||||
 | 
			
		||||
    void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id);
 | 
			
		||||
 | 
			
		||||
    [[nodiscard]] bool IsSafeDownload() const noexcept;
 | 
			
		||||
 | 
			
		||||
    [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
 | 
			
		||||
        const VAddr overlap_end = overlap_cpu_addr + overlap_size;
 | 
			
		||||
        return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void CheckBadOverlapState();
 | 
			
		||||
    void CheckAliasState();
 | 
			
		||||
 | 
			
		||||
    ImageInfo info;
 | 
			
		||||
 | 
			
		||||
    u32 guest_size_bytes = 0;
 | 
			
		||||
@ -72,6 +83,7 @@ struct ImageBase {
 | 
			
		||||
    std::vector<SubresourceBase> slice_subresources;
 | 
			
		||||
 | 
			
		||||
    std::vector<AliasedImage> aliased_images;
 | 
			
		||||
    std::vector<ImageId> overlapping_images;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct ImageAllocBase {
 | 
			
		||||
 | 
			
		||||
@ -5,6 +5,7 @@
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <array>
 | 
			
		||||
#include <bit>
 | 
			
		||||
#include <concepts>
 | 
			
		||||
#include <numeric>
 | 
			
		||||
#include <type_traits>
 | 
			
		||||
@ -32,6 +33,60 @@ template <class T>
 | 
			
		||||
requires std::is_nothrow_move_assignable_v<T>&&
 | 
			
		||||
    std::is_nothrow_move_constructible_v<T> class SlotVector {
 | 
			
		||||
public:
 | 
			
		||||
    class Iterator {
 | 
			
		||||
        friend SlotVector<T>;
 | 
			
		||||
 | 
			
		||||
    public:
 | 
			
		||||
        constexpr Iterator() = default;
 | 
			
		||||
 | 
			
		||||
        Iterator& operator++() noexcept {
 | 
			
		||||
            const u64* const bitset = slot_vector->stored_bitset.data();
 | 
			
		||||
            const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64;
 | 
			
		||||
            if (id.index < size) {
 | 
			
		||||
                do {
 | 
			
		||||
                    ++id.index;
 | 
			
		||||
                } while (id.index < size && !IsValid(bitset));
 | 
			
		||||
                if (id.index == size) {
 | 
			
		||||
                    id.index = SlotId::INVALID_INDEX;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            return *this;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        Iterator operator++(int) noexcept {
 | 
			
		||||
            const Iterator copy{*this};
 | 
			
		||||
            ++*this;
 | 
			
		||||
            return copy;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        bool operator==(const Iterator& other) const noexcept {
 | 
			
		||||
            return id.index == other.id.index;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        bool operator!=(const Iterator& other) const noexcept {
 | 
			
		||||
            return id.index != other.id.index;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        std::pair<SlotId, T*> operator*() const noexcept {
 | 
			
		||||
            return {id, std::addressof((*slot_vector)[id])};
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        T* operator->() const noexcept {
 | 
			
		||||
            return std::addressof((*slot_vector)[id]);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    private:
 | 
			
		||||
        Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept
 | 
			
		||||
            : slot_vector{slot_vector_}, id{id_} {}
 | 
			
		||||
 | 
			
		||||
        bool IsValid(const u64* bitset) const noexcept {
 | 
			
		||||
            return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        SlotVector<T>* slot_vector;
 | 
			
		||||
        SlotId id;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    ~SlotVector() noexcept {
 | 
			
		||||
        size_t index = 0;
 | 
			
		||||
        for (u64 bits : stored_bitset) {
 | 
			
		||||
@ -70,6 +125,20 @@ public:
 | 
			
		||||
        ResetStorageBit(id.index);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    [[nodiscard]] Iterator begin() noexcept {
 | 
			
		||||
        const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; });
 | 
			
		||||
        if (it == stored_bitset.end()) {
 | 
			
		||||
            return end();
 | 
			
		||||
        }
 | 
			
		||||
        const u32 word_index = static_cast<u32>(std::distance(it, stored_bitset.begin()));
 | 
			
		||||
        const SlotId first_id{word_index * 64 + static_cast<u32>(std::countr_zero(*it))};
 | 
			
		||||
        return Iterator(this, first_id);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    [[nodiscard]] Iterator end() noexcept {
 | 
			
		||||
        return Iterator(this, SlotId{SlotId::INVALID_INDEX});
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    struct NonTrivialDummy {
 | 
			
		||||
        NonTrivialDummy() noexcept {}
 | 
			
		||||
@ -140,7 +209,6 @@ private:
 | 
			
		||||
 | 
			
		||||
    Entry* values = nullptr;
 | 
			
		||||
    size_t values_capacity = 0;
 | 
			
		||||
    size_t values_size = 0;
 | 
			
		||||
 | 
			
		||||
    std::vector<u64> stored_bitset;
 | 
			
		||||
    std::vector<u32> free_list;
 | 
			
		||||
 | 
			
		||||
@ -20,8 +20,10 @@
 | 
			
		||||
 | 
			
		||||
#include "common/alignment.h"
 | 
			
		||||
#include "common/common_funcs.h"
 | 
			
		||||
#include "common/common_sizes.h"
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
#include "common/logging/log.h"
 | 
			
		||||
#include "common/settings.h"
 | 
			
		||||
#include "video_core/compatible_formats.h"
 | 
			
		||||
#include "video_core/delayed_destruction_ring.h"
 | 
			
		||||
#include "video_core/dirty_flags.h"
 | 
			
		||||
@ -69,12 +71,17 @@ class TextureCache {
 | 
			
		||||
    static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
 | 
			
		||||
    /// True when some copies have to be emulated
 | 
			
		||||
    static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
 | 
			
		||||
    /// True when the API can provide info about the memory of the device.
 | 
			
		||||
    static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
 | 
			
		||||
 | 
			
		||||
    /// Image view ID for null descriptors
 | 
			
		||||
    static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
 | 
			
		||||
    /// Sampler ID for bugged sampler ids
 | 
			
		||||
    static constexpr SamplerId NULL_SAMPLER_ID{0};
 | 
			
		||||
 | 
			
		||||
    static constexpr u64 DEFAULT_EXPECTED_MEMORY = Common::Size_1_GB;
 | 
			
		||||
    static constexpr u64 DEFAULT_CRITICAL_MEMORY = Common::Size_2_GB;
 | 
			
		||||
 | 
			
		||||
    using Runtime = typename P::Runtime;
 | 
			
		||||
    using Image = typename P::Image;
 | 
			
		||||
    using ImageAlloc = typename P::ImageAlloc;
 | 
			
		||||
@ -103,6 +110,9 @@ public:
 | 
			
		||||
    /// Notify the cache that a new frame has been queued
 | 
			
		||||
    void TickFrame();
 | 
			
		||||
 | 
			
		||||
    /// Runs the Garbage Collector.
 | 
			
		||||
    void RunGarbageCollector();
 | 
			
		||||
 | 
			
		||||
    /// Return a constant reference to the given image view id
 | 
			
		||||
    [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
 | 
			
		||||
 | 
			
		||||
@ -333,6 +343,10 @@ private:
 | 
			
		||||
    std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
 | 
			
		||||
 | 
			
		||||
    bool has_deleted_images = false;
 | 
			
		||||
    u64 total_used_memory = 0;
 | 
			
		||||
    u64 minimum_memory;
 | 
			
		||||
    u64 expected_memory;
 | 
			
		||||
    u64 critical_memory;
 | 
			
		||||
 | 
			
		||||
    SlotVector<Image> slot_images;
 | 
			
		||||
    SlotVector<ImageView> slot_image_views;
 | 
			
		||||
@ -353,6 +367,7 @@ private:
 | 
			
		||||
 | 
			
		||||
    u64 modification_tick = 0;
 | 
			
		||||
    u64 frame_tick = 0;
 | 
			
		||||
    typename SlotVector<Image>::Iterator deletion_iterator;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class P>
 | 
			
		||||
@ -373,11 +388,94 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
 | 
			
		||||
    // This way the null resource becomes a compile time constant
 | 
			
		||||
    void(slot_image_views.insert(runtime, NullImageParams{}));
 | 
			
		||||
    void(slot_samplers.insert(runtime, sampler_descriptor));
 | 
			
		||||
 | 
			
		||||
    deletion_iterator = slot_images.begin();
 | 
			
		||||
 | 
			
		||||
    if constexpr (HAS_DEVICE_MEMORY_INFO) {
 | 
			
		||||
        const auto device_memory = runtime.GetDeviceLocalMemory();
 | 
			
		||||
        const u64 possible_expected_memory = (device_memory * 3) / 10;
 | 
			
		||||
        const u64 possible_critical_memory = (device_memory * 6) / 10;
 | 
			
		||||
        expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY);
 | 
			
		||||
        critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY);
 | 
			
		||||
        minimum_memory = 0;
 | 
			
		||||
    } else {
 | 
			
		||||
        // on OGL we can be more conservatives as the driver takes care.
 | 
			
		||||
        expected_memory = DEFAULT_EXPECTED_MEMORY + Common::Size_512_MB;
 | 
			
		||||
        critical_memory = DEFAULT_CRITICAL_MEMORY + Common::Size_1_GB;
 | 
			
		||||
        minimum_memory = expected_memory;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class P>
 | 
			
		||||
void TextureCache<P>::RunGarbageCollector() {
 | 
			
		||||
    const bool high_priority_mode = total_used_memory >= expected_memory;
 | 
			
		||||
    const bool aggressive_mode = total_used_memory >= critical_memory;
 | 
			
		||||
    const u64 ticks_to_destroy = high_priority_mode ? 60 : 100;
 | 
			
		||||
    int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64);
 | 
			
		||||
    for (; num_iterations > 0; --num_iterations) {
 | 
			
		||||
        if (deletion_iterator == slot_images.end()) {
 | 
			
		||||
            deletion_iterator = slot_images.begin();
 | 
			
		||||
            if (deletion_iterator == slot_images.end()) {
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        auto [image_id, image_tmp] = *deletion_iterator;
 | 
			
		||||
        Image* image = image_tmp; // fix clang error.
 | 
			
		||||
        const bool is_alias = True(image->flags & ImageFlagBits::Alias);
 | 
			
		||||
        const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap);
 | 
			
		||||
        const bool must_download = image->IsSafeDownload();
 | 
			
		||||
        bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download);
 | 
			
		||||
        const u64 ticks_needed =
 | 
			
		||||
            is_bad_overlap
 | 
			
		||||
                ? ticks_to_destroy >> 4
 | 
			
		||||
                : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy);
 | 
			
		||||
        should_care |= aggressive_mode;
 | 
			
		||||
        if (should_care && image->frame_tick + ticks_needed < frame_tick) {
 | 
			
		||||
            if (is_bad_overlap) {
 | 
			
		||||
                const bool overlap_check = std::ranges::all_of(
 | 
			
		||||
                    image->overlapping_images, [&, image](const ImageId& overlap_id) {
 | 
			
		||||
                        auto& overlap = slot_images[overlap_id];
 | 
			
		||||
                        return overlap.frame_tick >= image->frame_tick;
 | 
			
		||||
                    });
 | 
			
		||||
                if (!overlap_check) {
 | 
			
		||||
                    ++deletion_iterator;
 | 
			
		||||
                    continue;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            if (!is_bad_overlap && must_download) {
 | 
			
		||||
                const bool alias_check = std::ranges::none_of(
 | 
			
		||||
                    image->aliased_images, [&, image](const AliasedImage& alias) {
 | 
			
		||||
                        auto& alias_image = slot_images[alias.id];
 | 
			
		||||
                        return (alias_image.frame_tick < image->frame_tick) ||
 | 
			
		||||
                               (alias_image.modification_tick < image->modification_tick);
 | 
			
		||||
                    });
 | 
			
		||||
 | 
			
		||||
                if (alias_check) {
 | 
			
		||||
                    auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes);
 | 
			
		||||
                    const auto copies = FullDownloadCopies(image->info);
 | 
			
		||||
                    image->DownloadMemory(map, copies);
 | 
			
		||||
                    runtime.Finish();
 | 
			
		||||
                    SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span);
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            if (True(image->flags & ImageFlagBits::Tracked)) {
 | 
			
		||||
                UntrackImage(*image);
 | 
			
		||||
            }
 | 
			
		||||
            UnregisterImage(image_id);
 | 
			
		||||
            DeleteImage(image_id);
 | 
			
		||||
            if (is_bad_overlap) {
 | 
			
		||||
                ++num_iterations;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        ++deletion_iterator;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class P>
 | 
			
		||||
void TextureCache<P>::TickFrame() {
 | 
			
		||||
    // Tick sentenced resources in this order to ensure they are destroyed in the right order
 | 
			
		||||
    if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) {
 | 
			
		||||
        RunGarbageCollector();
 | 
			
		||||
    }
 | 
			
		||||
    sentenced_images.Tick();
 | 
			
		||||
    sentenced_framebuffers.Tick();
 | 
			
		||||
    sentenced_image_view.Tick();
 | 
			
		||||
@ -568,17 +666,7 @@ template <class P>
 | 
			
		||||
void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
 | 
			
		||||
    std::vector<ImageId> images;
 | 
			
		||||
    ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
 | 
			
		||||
        // Skip images that were not modified from the GPU
 | 
			
		||||
        if (False(image.flags & ImageFlagBits::GpuModified)) {
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
        // Skip images that .are. modified from the CPU
 | 
			
		||||
        // We don't want to write sensitive data from the guest
 | 
			
		||||
        if (True(image.flags & ImageFlagBits::CpuModified)) {
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
        if (image.info.num_samples > 1) {
 | 
			
		||||
            LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
 | 
			
		||||
        if (!image.IsSafeDownload()) {
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
        image.flags &= ~ImageFlagBits::GpuModified;
 | 
			
		||||
@ -967,6 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
 | 
			
		||||
    std::vector<ImageId> overlap_ids;
 | 
			
		||||
    std::vector<ImageId> left_aliased_ids;
 | 
			
		||||
    std::vector<ImageId> right_aliased_ids;
 | 
			
		||||
    std::vector<ImageId> bad_overlap_ids;
 | 
			
		||||
    ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
 | 
			
		||||
        if (info.type != overlap.info.type) {
 | 
			
		||||
            return;
 | 
			
		||||
@ -992,9 +1081,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
 | 
			
		||||
        const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
 | 
			
		||||
        if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
 | 
			
		||||
            left_aliased_ids.push_back(overlap_id);
 | 
			
		||||
            overlap.flags |= ImageFlagBits::Alias;
 | 
			
		||||
        } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
 | 
			
		||||
                                 broken_views, native_bgr)) {
 | 
			
		||||
            right_aliased_ids.push_back(overlap_id);
 | 
			
		||||
            overlap.flags |= ImageFlagBits::Alias;
 | 
			
		||||
        } else {
 | 
			
		||||
            bad_overlap_ids.push_back(overlap_id);
 | 
			
		||||
            overlap.flags |= ImageFlagBits::BadOverlap;
 | 
			
		||||
        }
 | 
			
		||||
    });
 | 
			
		||||
    const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
 | 
			
		||||
@ -1022,10 +1116,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
 | 
			
		||||
    for (const ImageId aliased_id : right_aliased_ids) {
 | 
			
		||||
        ImageBase& aliased = slot_images[aliased_id];
 | 
			
		||||
        AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
 | 
			
		||||
        new_image.flags |= ImageFlagBits::Alias;
 | 
			
		||||
    }
 | 
			
		||||
    for (const ImageId aliased_id : left_aliased_ids) {
 | 
			
		||||
        ImageBase& aliased = slot_images[aliased_id];
 | 
			
		||||
        AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
 | 
			
		||||
        new_image.flags |= ImageFlagBits::Alias;
 | 
			
		||||
    }
 | 
			
		||||
    for (const ImageId aliased_id : bad_overlap_ids) {
 | 
			
		||||
        ImageBase& aliased = slot_images[aliased_id];
 | 
			
		||||
        aliased.overlapping_images.push_back(new_image_id);
 | 
			
		||||
        new_image.overlapping_images.push_back(aliased_id);
 | 
			
		||||
        new_image.flags |= ImageFlagBits::BadOverlap;
 | 
			
		||||
    }
 | 
			
		||||
    RegisterImage(new_image_id);
 | 
			
		||||
    return new_image_id;
 | 
			
		||||
@ -1195,6 +1297,13 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
 | 
			
		||||
    image.flags |= ImageFlagBits::Registered;
 | 
			
		||||
    ForEachPage(image.cpu_addr, image.guest_size_bytes,
 | 
			
		||||
                [this, image_id](u64 page) { page_table[page].push_back(image_id); });
 | 
			
		||||
    u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
 | 
			
		||||
    if ((IsPixelFormatASTC(image.info.format) &&
 | 
			
		||||
         True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
 | 
			
		||||
        True(image.flags & ImageFlagBits::Converted)) {
 | 
			
		||||
        tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
 | 
			
		||||
    }
 | 
			
		||||
    total_used_memory += Common::AlignUp(tentative_size, 1024);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class P>
 | 
			
		||||
@ -1203,6 +1312,14 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
 | 
			
		||||
    ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
 | 
			
		||||
               "Trying to unregister an already registered image");
 | 
			
		||||
    image.flags &= ~ImageFlagBits::Registered;
 | 
			
		||||
    image.flags &= ~ImageFlagBits::BadOverlap;
 | 
			
		||||
    u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
 | 
			
		||||
    if ((IsPixelFormatASTC(image.info.format) &&
 | 
			
		||||
         True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
 | 
			
		||||
        True(image.flags & ImageFlagBits::Converted)) {
 | 
			
		||||
        tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
 | 
			
		||||
    }
 | 
			
		||||
    total_used_memory -= Common::AlignUp(tentative_size, 1024);
 | 
			
		||||
    ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
 | 
			
		||||
        const auto page_it = page_table.find(page);
 | 
			
		||||
        if (page_it == page_table.end()) {
 | 
			
		||||
@ -1276,9 +1393,19 @@ void TextureCache<P>::DeleteImage(ImageId image_id) {
 | 
			
		||||
            std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
 | 
			
		||||
                return other_alias.id == image_id;
 | 
			
		||||
            });
 | 
			
		||||
        other_image.CheckAliasState();
 | 
			
		||||
        ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
 | 
			
		||||
                   num_removed_aliases);
 | 
			
		||||
    }
 | 
			
		||||
    for (const ImageId overlap_id : image.overlapping_images) {
 | 
			
		||||
        ImageBase& other_image = slot_images[overlap_id];
 | 
			
		||||
        [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if(
 | 
			
		||||
            other_image.overlapping_images,
 | 
			
		||||
            [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; });
 | 
			
		||||
        other_image.CheckBadOverlapState();
 | 
			
		||||
        ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}",
 | 
			
		||||
                   num_removed_overlaps);
 | 
			
		||||
    }
 | 
			
		||||
    for (const ImageViewId image_view_id : image_view_ids) {
 | 
			
		||||
        sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
 | 
			
		||||
        slot_image_views.erase(image_view_id);
 | 
			
		||||
 | 
			
		||||
@ -581,6 +581,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
 | 
			
		||||
 | 
			
		||||
    for (s32 layer = 0; layer < info.resources.layers; ++layer) {
 | 
			
		||||
        const std::span<const u8> src = input.subspan(host_offset);
 | 
			
		||||
        gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
 | 
			
		||||
 | 
			
		||||
        SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
 | 
			
		||||
                       num_tiles.depth, block.height, block.depth);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -408,6 +408,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
 | 
			
		||||
    }
 | 
			
		||||
    logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld);
 | 
			
		||||
 | 
			
		||||
    CollectPhysicalMemoryInfo();
 | 
			
		||||
    CollectTelemetryParameters();
 | 
			
		||||
    CollectToolingInfo();
 | 
			
		||||
 | 
			
		||||
@ -818,6 +819,17 @@ void Device::CollectTelemetryParameters() {
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Device::CollectPhysicalMemoryInfo() {
 | 
			
		||||
    const auto mem_properties = physical.GetMemoryProperties();
 | 
			
		||||
    const std::size_t num_properties = mem_properties.memoryHeapCount;
 | 
			
		||||
    device_access_memory = 0;
 | 
			
		||||
    for (std::size_t element = 0; element < num_properties; element++) {
 | 
			
		||||
        if ((mem_properties.memoryHeaps[element].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) {
 | 
			
		||||
            device_access_memory += mem_properties.memoryHeaps[element].size;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Device::CollectToolingInfo() {
 | 
			
		||||
    if (!ext_tooling_info) {
 | 
			
		||||
        return;
 | 
			
		||||
 | 
			
		||||
@ -225,6 +225,10 @@ public:
 | 
			
		||||
        return use_asynchronous_shaders;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    u64 GetDeviceLocalMemory() const {
 | 
			
		||||
        return device_access_memory;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    /// Checks if the physical device is suitable.
 | 
			
		||||
    void CheckSuitability(bool requires_swapchain) const;
 | 
			
		||||
@ -244,6 +248,9 @@ private:
 | 
			
		||||
    /// Collects information about attached tools.
 | 
			
		||||
    void CollectToolingInfo();
 | 
			
		||||
 | 
			
		||||
    /// Collects information about the device's local memory.
 | 
			
		||||
    void CollectPhysicalMemoryInfo();
 | 
			
		||||
 | 
			
		||||
    /// Returns a list of queue initialization descriptors.
 | 
			
		||||
    std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
 | 
			
		||||
 | 
			
		||||
@ -302,6 +309,8 @@ private:
 | 
			
		||||
 | 
			
		||||
    /// Nsight Aftermath GPU crash tracker
 | 
			
		||||
    std::unique_ptr<NsightAftermathTracker> nsight_aftermath_tracker;
 | 
			
		||||
 | 
			
		||||
    u64 device_access_memory;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace Vulkan
 | 
			
		||||
 | 
			
		||||
@ -69,10 +69,10 @@ constexpr VkExportMemoryAllocateInfo EXPORT_ALLOCATE_INFO{
 | 
			
		||||
 | 
			
		||||
class MemoryAllocation {
 | 
			
		||||
public:
 | 
			
		||||
    explicit MemoryAllocation(vk::DeviceMemory memory_, VkMemoryPropertyFlags properties,
 | 
			
		||||
                              u64 allocation_size_, u32 type)
 | 
			
		||||
        : memory{std::move(memory_)}, allocation_size{allocation_size_}, property_flags{properties},
 | 
			
		||||
          shifted_memory_type{1U << type} {}
 | 
			
		||||
    explicit MemoryAllocation(MemoryAllocator* const allocator_, vk::DeviceMemory memory_,
 | 
			
		||||
                              VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type)
 | 
			
		||||
        : allocator{allocator_}, memory{std::move(memory_)}, allocation_size{allocation_size_},
 | 
			
		||||
          property_flags{properties}, shifted_memory_type{1U << type} {}
 | 
			
		||||
 | 
			
		||||
#if defined(_WIN32) || defined(__unix__)
 | 
			
		||||
    ~MemoryAllocation() {
 | 
			
		||||
@ -106,6 +106,10 @@ public:
 | 
			
		||||
        const auto it = std::ranges::find(commits, begin, &Range::begin);
 | 
			
		||||
        ASSERT_MSG(it != commits.end(), "Invalid commit");
 | 
			
		||||
        commits.erase(it);
 | 
			
		||||
        if (commits.empty()) {
 | 
			
		||||
            // Do not call any code involving 'this' after this call, the object will be destroyed
 | 
			
		||||
            allocator->ReleaseMemory(this);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    [[nodiscard]] std::span<u8> Map() {
 | 
			
		||||
@ -171,6 +175,7 @@ private:
 | 
			
		||||
        return candidate;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    MemoryAllocator* const allocator;           ///< Parent memory allocation.
 | 
			
		||||
    const vk::DeviceMemory memory;              ///< Vulkan memory allocation handler.
 | 
			
		||||
    const u64 allocation_size;                  ///< Size of this allocation.
 | 
			
		||||
    const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags.
 | 
			
		||||
@ -275,10 +280,17 @@ bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask,
 | 
			
		||||
            return false;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    allocations.push_back(std::make_unique<MemoryAllocation>(std::move(memory), flags, size, type));
 | 
			
		||||
    allocations.push_back(
 | 
			
		||||
        std::make_unique<MemoryAllocation>(this, std::move(memory), flags, size, type));
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void MemoryAllocator::ReleaseMemory(MemoryAllocation* alloc) {
 | 
			
		||||
    const auto it = std::ranges::find(allocations, alloc, &std::unique_ptr<MemoryAllocation>::get);
 | 
			
		||||
    ASSERT(it != allocations.end());
 | 
			
		||||
    allocations.erase(it);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements,
 | 
			
		||||
                                                       VkMemoryPropertyFlags flags) {
 | 
			
		||||
    for (auto& allocation : allocations) {
 | 
			
		||||
 | 
			
		||||
@ -69,6 +69,8 @@ private:
 | 
			
		||||
/// Memory allocator container.
 | 
			
		||||
/// Allocates and releases memory allocations on demand.
 | 
			
		||||
class MemoryAllocator {
 | 
			
		||||
    friend MemoryAllocation;
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
    /**
 | 
			
		||||
     * Construct memory allocator
 | 
			
		||||
@ -104,6 +106,9 @@ private:
 | 
			
		||||
    /// Tries to allocate a chunk of memory.
 | 
			
		||||
    bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size);
 | 
			
		||||
 | 
			
		||||
    /// Releases a chunk of memory.
 | 
			
		||||
    void ReleaseMemory(MemoryAllocation* alloc);
 | 
			
		||||
 | 
			
		||||
    /// Tries to allocate a memory commit.
 | 
			
		||||
    std::optional<MemoryCommit> TryCommit(const VkMemoryRequirements& requirements,
 | 
			
		||||
                                          VkMemoryPropertyFlags flags);
 | 
			
		||||
 | 
			
		||||
@ -822,6 +822,7 @@ void Config::ReadRendererValues() {
 | 
			
		||||
                      QStringLiteral("use_asynchronous_shaders"), false);
 | 
			
		||||
    ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"),
 | 
			
		||||
                      true);
 | 
			
		||||
    ReadSettingGlobal(Settings::values.use_caches_gc, QStringLiteral("use_caches_gc"), false);
 | 
			
		||||
    ReadSettingGlobal(Settings::values.bg_red, QStringLiteral("bg_red"), 0.0);
 | 
			
		||||
    ReadSettingGlobal(Settings::values.bg_green, QStringLiteral("bg_green"), 0.0);
 | 
			
		||||
    ReadSettingGlobal(Settings::values.bg_blue, QStringLiteral("bg_blue"), 0.0);
 | 
			
		||||
@ -1410,6 +1411,7 @@ void Config::SaveRendererValues() {
 | 
			
		||||
                       Settings::values.use_asynchronous_shaders, false);
 | 
			
		||||
    WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time,
 | 
			
		||||
                       true);
 | 
			
		||||
    WriteSettingGlobal(QStringLiteral("use_caches_gc"), Settings::values.use_caches_gc, false);
 | 
			
		||||
    // Cast to double because Qt's written float values are not human-readable
 | 
			
		||||
    WriteSettingGlobal(QStringLiteral("bg_red"), Settings::values.bg_red, 0.0);
 | 
			
		||||
    WriteSettingGlobal(QStringLiteral("bg_green"), Settings::values.bg_green, 0.0);
 | 
			
		||||
 | 
			
		||||
@ -31,6 +31,7 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
 | 
			
		||||
    ui->disable_fps_limit->setChecked(Settings::values.disable_fps_limit.GetValue());
 | 
			
		||||
    ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue());
 | 
			
		||||
    ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
 | 
			
		||||
    ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue());
 | 
			
		||||
    ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
 | 
			
		||||
 | 
			
		||||
    if (Settings::IsConfiguringGlobal()) {
 | 
			
		||||
@ -65,6 +66,8 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
 | 
			
		||||
    ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders,
 | 
			
		||||
                                             ui->use_asynchronous_shaders,
 | 
			
		||||
                                             use_asynchronous_shaders);
 | 
			
		||||
    ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_caches_gc, ui->use_caches_gc,
 | 
			
		||||
                                             use_caches_gc);
 | 
			
		||||
    ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time,
 | 
			
		||||
                                             ui->use_fast_gpu_time, use_fast_gpu_time);
 | 
			
		||||
 | 
			
		||||
@ -105,6 +108,7 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
 | 
			
		||||
        ui->use_asynchronous_shaders->setEnabled(
 | 
			
		||||
            Settings::values.use_asynchronous_shaders.UsingGlobal());
 | 
			
		||||
        ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
 | 
			
		||||
        ui->use_caches_gc->setEnabled(Settings::values.use_caches_gc.UsingGlobal());
 | 
			
		||||
        ui->anisotropic_filtering_combobox->setEnabled(
 | 
			
		||||
            Settings::values.max_anisotropy.UsingGlobal());
 | 
			
		||||
 | 
			
		||||
@ -121,6 +125,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
 | 
			
		||||
                                            use_asynchronous_shaders);
 | 
			
		||||
    ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time,
 | 
			
		||||
                                            Settings::values.use_fast_gpu_time, use_fast_gpu_time);
 | 
			
		||||
    ConfigurationShared::SetColoredTristate(ui->use_caches_gc, Settings::values.use_caches_gc,
 | 
			
		||||
                                            use_caches_gc);
 | 
			
		||||
    ConfigurationShared::SetColoredComboBox(
 | 
			
		||||
        ui->gpu_accuracy, ui->label_gpu_accuracy,
 | 
			
		||||
        static_cast<int>(Settings::values.gpu_accuracy.GetValue(true)));
 | 
			
		||||
 | 
			
		||||
@ -39,4 +39,5 @@ private:
 | 
			
		||||
    ConfigurationShared::CheckState use_assembly_shaders;
 | 
			
		||||
    ConfigurationShared::CheckState use_asynchronous_shaders;
 | 
			
		||||
    ConfigurationShared::CheckState use_fast_gpu_time;
 | 
			
		||||
    ConfigurationShared::CheckState use_caches_gc;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -121,6 +121,16 @@
 | 
			
		||||
          </property>
 | 
			
		||||
         </widget>
 | 
			
		||||
        </item>
 | 
			
		||||
        <item>
 | 
			
		||||
         <widget class="QCheckBox" name="use_caches_gc">
 | 
			
		||||
          <property name="toolTip">
 | 
			
		||||
           <string>Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4 GB by flushing the least used textures/buffers. May cause issues in a few games.</string>
 | 
			
		||||
          </property>
 | 
			
		||||
          <property name="text">
 | 
			
		||||
           <string>Enable GPU cache garbage collection (experimental)</string>
 | 
			
		||||
          </property>
 | 
			
		||||
         </widget>
 | 
			
		||||
        </item>
 | 
			
		||||
        <item>
 | 
			
		||||
         <widget class="QWidget" name="af_layout" native="true">
 | 
			
		||||
          <layout class="QHBoxLayout" name="horizontalLayout_1">
 | 
			
		||||
 | 
			
		||||
@ -227,6 +227,10 @@ use_asynchronous_gpu_emulation =
 | 
			
		||||
# 0: Off, 1 (default): On
 | 
			
		||||
use_vsync =
 | 
			
		||||
 | 
			
		||||
# Whether to use garbage collection or not for GPU caches.
 | 
			
		||||
# 0 (default): Off, 1: On
 | 
			
		||||
use_caches_gc =
 | 
			
		||||
 | 
			
		||||
# The clear color for the renderer. What shows up on the sides of the bottom screen.
 | 
			
		||||
# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
 | 
			
		||||
bg_red =
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user