mirror of
				https://git.suyu.dev/suyu/suyu.git
				synced 2025-10-25 11:56:42 +08:00 
			
		
		
		
	Merge pull request #9889 from Morph1984/time-is-ticking
core_timing: Reduce CPU usage on Windows
This commit is contained in:
		
						commit
						a7792e5ff8
					
				| @ -477,8 +477,8 @@ if (APPLE) | |||||||
|     find_library(COCOA_LIBRARY Cocoa) |     find_library(COCOA_LIBRARY Cocoa) | ||||||
|     set(PLATFORM_LIBRARIES ${COCOA_LIBRARY} ${IOKIT_LIBRARY} ${COREVIDEO_LIBRARY}) |     set(PLATFORM_LIBRARIES ${COCOA_LIBRARY} ${IOKIT_LIBRARY} ${COREVIDEO_LIBRARY}) | ||||||
| elseif (WIN32) | elseif (WIN32) | ||||||
|     # WSAPoll and SHGetKnownFolderPath (AppData/Roaming) didn't exist before WinNT 6.x (Vista) |     # Target Windows 10 | ||||||
|     add_definitions(-D_WIN32_WINNT=0x0600 -DWINVER=0x0600) |     add_definitions(-D_WIN32_WINNT=0x0A00 -DWINVER=0x0A00) | ||||||
|     set(PLATFORM_LIBRARIES winmm ws2_32 iphlpapi) |     set(PLATFORM_LIBRARIES winmm ws2_32 iphlpapi) | ||||||
|     if (MINGW) |     if (MINGW) | ||||||
|         # PSAPI is the Process Status API |         # PSAPI is the Process Status API | ||||||
|  | |||||||
							
								
								
									
										6
									
								
								dist/yuzu.manifest
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								dist/yuzu.manifest
									
									
									
									
										vendored
									
									
								
							| @ -36,12 +36,6 @@ SPDX-License-Identifier: GPL-2.0-or-later | |||||||
|     <application> |     <application> | ||||||
|       <!-- Windows 10 --> |       <!-- Windows 10 --> | ||||||
|       <supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/> |       <supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/> | ||||||
|       <!-- Windows 8.1 --> |  | ||||||
|       <supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/> |  | ||||||
|       <!-- Windows 8 --> |  | ||||||
|       <supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/> |  | ||||||
|       <!-- Windows 7 --> |  | ||||||
|       <supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/> |  | ||||||
|     </application> |     </application> | ||||||
|   </compatibility> |   </compatibility> | ||||||
|   <trustInfo |   <trustInfo | ||||||
|  | |||||||
| @ -113,6 +113,8 @@ add_library(common STATIC | |||||||
|     socket_types.h |     socket_types.h | ||||||
|     spin_lock.cpp |     spin_lock.cpp | ||||||
|     spin_lock.h |     spin_lock.h | ||||||
|  |     steady_clock.cpp | ||||||
|  |     steady_clock.h | ||||||
|     stream.cpp |     stream.cpp | ||||||
|     stream.h |     stream.h | ||||||
|     string_util.cpp |     string_util.cpp | ||||||
| @ -142,6 +144,14 @@ add_library(common STATIC | |||||||
|     zstd_compression.h |     zstd_compression.h | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | if (WIN32) | ||||||
|  |   target_sources(common PRIVATE | ||||||
|  |     windows/timer_resolution.cpp | ||||||
|  |     windows/timer_resolution.h | ||||||
|  |   ) | ||||||
|  |   target_link_libraries(common PRIVATE ntdll) | ||||||
|  | endif() | ||||||
|  | 
 | ||||||
| if(ARCHITECTURE_x86_64) | if(ARCHITECTURE_x86_64) | ||||||
|     target_sources(common |     target_sources(common | ||||||
|         PRIVATE |         PRIVATE | ||||||
|  | |||||||
							
								
								
									
										56
									
								
								src/common/steady_clock.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								src/common/steady_clock.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,56 @@ | |||||||
|  | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 | ||||||
|  | // SPDX-License-Identifier: GPL-2.0-or-later
 | ||||||
|  | 
 | ||||||
|  | #if defined(_WIN32) | ||||||
|  | #include <windows.h> | ||||||
|  | #else | ||||||
|  | #include <time.h> | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #include "common/steady_clock.h" | ||||||
|  | 
 | ||||||
|  | namespace Common { | ||||||
|  | 
 | ||||||
|  | #ifdef _WIN32 | ||||||
|  | static s64 WindowsQueryPerformanceFrequency() { | ||||||
|  |     LARGE_INTEGER frequency; | ||||||
|  |     QueryPerformanceFrequency(&frequency); | ||||||
|  |     return frequency.QuadPart; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static s64 WindowsQueryPerformanceCounter() { | ||||||
|  |     LARGE_INTEGER counter; | ||||||
|  |     QueryPerformanceCounter(&counter); | ||||||
|  |     return counter.QuadPart; | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | SteadyClock::time_point SteadyClock::Now() noexcept { | ||||||
|  | #if defined(_WIN32) | ||||||
|  |     static const auto freq = WindowsQueryPerformanceFrequency(); | ||||||
|  |     const auto counter = WindowsQueryPerformanceCounter(); | ||||||
|  | 
 | ||||||
|  |     // 10 MHz is a very common QPC frequency on modern PCs.
 | ||||||
|  |     // Optimizing for this specific frequency can double the performance of
 | ||||||
|  |     // this function by avoiding the expensive frequency conversion path.
 | ||||||
|  |     static constexpr s64 TenMHz = 10'000'000; | ||||||
|  | 
 | ||||||
|  |     if (freq == TenMHz) [[likely]] { | ||||||
|  |         static_assert(period::den % TenMHz == 0); | ||||||
|  |         static constexpr s64 Multiplier = period::den / TenMHz; | ||||||
|  |         return time_point{duration{counter * Multiplier}}; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     const auto whole = (counter / freq) * period::den; | ||||||
|  |     const auto part = (counter % freq) * period::den / freq; | ||||||
|  |     return time_point{duration{whole + part}}; | ||||||
|  | #elif defined(__APPLE__) | ||||||
|  |     return time_point{duration{clock_gettime_nsec_np(CLOCK_MONOTONIC_RAW)}}; | ||||||
|  | #else | ||||||
|  |     timespec ts; | ||||||
|  |     clock_gettime(CLOCK_MONOTONIC, &ts); | ||||||
|  |     return time_point{std::chrono::seconds{ts.tv_sec} + std::chrono::nanoseconds{ts.tv_nsec}}; | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | }; // namespace Common
 | ||||||
							
								
								
									
										23
									
								
								src/common/steady_clock.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								src/common/steady_clock.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,23 @@ | |||||||
|  | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 | ||||||
|  | // SPDX-License-Identifier: GPL-2.0-or-later
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <chrono> | ||||||
|  | 
 | ||||||
|  | #include "common/common_types.h" | ||||||
|  | 
 | ||||||
|  | namespace Common { | ||||||
|  | 
 | ||||||
|  | struct SteadyClock { | ||||||
|  |     using rep = s64; | ||||||
|  |     using period = std::nano; | ||||||
|  |     using duration = std::chrono::nanoseconds; | ||||||
|  |     using time_point = std::chrono::time_point<SteadyClock>; | ||||||
|  | 
 | ||||||
|  |     static constexpr bool is_steady = true; | ||||||
|  | 
 | ||||||
|  |     [[nodiscard]] static time_point Now() noexcept; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | } // namespace Common
 | ||||||
| @ -1,6 +1,7 @@ | |||||||
| // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
 | ||||||
| // SPDX-License-Identifier: GPL-2.0-or-later
 | // SPDX-License-Identifier: GPL-2.0-or-later
 | ||||||
| 
 | 
 | ||||||
|  | #include "common/steady_clock.h" | ||||||
| #include "common/uint128.h" | #include "common/uint128.h" | ||||||
| #include "common/wall_clock.h" | #include "common/wall_clock.h" | ||||||
| 
 | 
 | ||||||
| @ -11,45 +12,32 @@ | |||||||
| 
 | 
 | ||||||
| namespace Common { | namespace Common { | ||||||
| 
 | 
 | ||||||
| using base_timer = std::chrono::steady_clock; |  | ||||||
| using base_time_point = std::chrono::time_point<base_timer>; |  | ||||||
| 
 |  | ||||||
| class StandardWallClock final : public WallClock { | class StandardWallClock final : public WallClock { | ||||||
| public: | public: | ||||||
|     explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_) |     explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_) | ||||||
|         : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false) { |         : WallClock{emulated_cpu_frequency_, emulated_clock_frequency_, false}, | ||||||
|         start_time = base_timer::now(); |           start_time{SteadyClock::Now()} {} | ||||||
|     } |  | ||||||
| 
 | 
 | ||||||
|     std::chrono::nanoseconds GetTimeNS() override { |     std::chrono::nanoseconds GetTimeNS() override { | ||||||
|         base_time_point current = base_timer::now(); |         return SteadyClock::Now() - start_time; | ||||||
|         auto elapsed = current - start_time; |  | ||||||
|         return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed); |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     std::chrono::microseconds GetTimeUS() override { |     std::chrono::microseconds GetTimeUS() override { | ||||||
|         base_time_point current = base_timer::now(); |         return std::chrono::duration_cast<std::chrono::microseconds>(GetTimeNS()); | ||||||
|         auto elapsed = current - start_time; |  | ||||||
|         return std::chrono::duration_cast<std::chrono::microseconds>(elapsed); |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     std::chrono::milliseconds GetTimeMS() override { |     std::chrono::milliseconds GetTimeMS() override { | ||||||
|         base_time_point current = base_timer::now(); |         return std::chrono::duration_cast<std::chrono::milliseconds>(GetTimeNS()); | ||||||
|         auto elapsed = current - start_time; |  | ||||||
|         return std::chrono::duration_cast<std::chrono::milliseconds>(elapsed); |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     u64 GetClockCycles() override { |     u64 GetClockCycles() override { | ||||||
|         std::chrono::nanoseconds time_now = GetTimeNS(); |         const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_clock_frequency); | ||||||
|         const u128 temporary = |         return Common::Divide128On32(temp, NS_RATIO).first; | ||||||
|             Common::Multiply64Into128(time_now.count(), emulated_clock_frequency); |  | ||||||
|         return Common::Divide128On32(temporary, 1000000000).first; |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     u64 GetCPUCycles() override { |     u64 GetCPUCycles() override { | ||||||
|         std::chrono::nanoseconds time_now = GetTimeNS(); |         const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_cpu_frequency); | ||||||
|         const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency); |         return Common::Divide128On32(temp, NS_RATIO).first; | ||||||
|         return Common::Divide128On32(temporary, 1000000000).first; |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     void Pause([[maybe_unused]] bool is_paused) override { |     void Pause([[maybe_unused]] bool is_paused) override { | ||||||
| @ -57,7 +45,7 @@ public: | |||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     base_time_point start_time; |     SteadyClock::time_point start_time; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| #ifdef ARCHITECTURE_x86_64 | #ifdef ARCHITECTURE_x86_64 | ||||||
| @ -93,4 +81,9 @@ std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, | |||||||
| 
 | 
 | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency, | ||||||
|  |                                                    u64 emulated_clock_frequency) { | ||||||
|  |     return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // namespace Common
 | } // namespace Common
 | ||||||
|  | |||||||
| @ -55,4 +55,7 @@ private: | |||||||
| [[nodiscard]] std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, | [[nodiscard]] std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, | ||||||
|                                                                  u64 emulated_clock_frequency); |                                                                  u64 emulated_clock_frequency); | ||||||
| 
 | 
 | ||||||
|  | [[nodiscard]] std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency, | ||||||
|  |                                                                  u64 emulated_clock_frequency); | ||||||
|  | 
 | ||||||
| } // namespace Common
 | } // namespace Common
 | ||||||
|  | |||||||
							
								
								
									
										109
									
								
								src/common/windows/timer_resolution.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										109
									
								
								src/common/windows/timer_resolution.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,109 @@ | |||||||
|  | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 | ||||||
|  | // SPDX-License-Identifier: GPL-2.0-or-later
 | ||||||
|  | 
 | ||||||
|  | #include <windows.h> | ||||||
|  | 
 | ||||||
|  | #include "common/windows/timer_resolution.h" | ||||||
|  | 
 | ||||||
|  | extern "C" { | ||||||
|  | // http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FTime%2FNtQueryTimerResolution.html
 | ||||||
|  | NTSYSAPI LONG NTAPI NtQueryTimerResolution(PULONG MinimumResolution, PULONG MaximumResolution, | ||||||
|  |                                            PULONG CurrentResolution); | ||||||
|  | 
 | ||||||
|  | // http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FTime%2FNtSetTimerResolution.html
 | ||||||
|  | NTSYSAPI LONG NTAPI NtSetTimerResolution(ULONG DesiredResolution, BOOLEAN SetResolution, | ||||||
|  |                                          PULONG CurrentResolution); | ||||||
|  | 
 | ||||||
|  | // http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FNT%20Objects%2FThread%2FNtDelayExecution.html
 | ||||||
|  | NTSYSAPI LONG NTAPI NtDelayExecution(BOOLEAN Alertable, PLARGE_INTEGER DelayInterval); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Defines for compatibility with older Windows 10 SDKs.
 | ||||||
|  | 
 | ||||||
|  | #ifndef PROCESS_POWER_THROTTLING_EXECUTION_SPEED | ||||||
|  | #define PROCESS_POWER_THROTTLING_EXECUTION_SPEED 0x1 | ||||||
|  | #endif | ||||||
|  | #ifndef PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION | ||||||
|  | #define PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION 0x4 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | namespace Common::Windows { | ||||||
|  | 
 | ||||||
|  | namespace { | ||||||
|  | 
 | ||||||
|  | using namespace std::chrono; | ||||||
|  | 
 | ||||||
|  | constexpr nanoseconds ToNS(ULONG hundred_ns) { | ||||||
|  |     return nanoseconds{hundred_ns * 100}; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | constexpr ULONG ToHundredNS(nanoseconds ns) { | ||||||
|  |     return static_cast<ULONG>(ns.count()) / 100; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | struct TimerResolution { | ||||||
|  |     std::chrono::nanoseconds minimum; | ||||||
|  |     std::chrono::nanoseconds maximum; | ||||||
|  |     std::chrono::nanoseconds current; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | TimerResolution GetTimerResolution() { | ||||||
|  |     ULONG MinimumTimerResolution; | ||||||
|  |     ULONG MaximumTimerResolution; | ||||||
|  |     ULONG CurrentTimerResolution; | ||||||
|  |     NtQueryTimerResolution(&MinimumTimerResolution, &MaximumTimerResolution, | ||||||
|  |                            &CurrentTimerResolution); | ||||||
|  |     return { | ||||||
|  |         .minimum{ToNS(MinimumTimerResolution)}, | ||||||
|  |         .maximum{ToNS(MaximumTimerResolution)}, | ||||||
|  |         .current{ToNS(CurrentTimerResolution)}, | ||||||
|  |     }; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void SetHighQoS() { | ||||||
|  |     // https://learn.microsoft.com/en-us/windows/win32/procthread/quality-of-service
 | ||||||
|  |     PROCESS_POWER_THROTTLING_STATE PowerThrottling{ | ||||||
|  |         .Version{PROCESS_POWER_THROTTLING_CURRENT_VERSION}, | ||||||
|  |         .ControlMask{PROCESS_POWER_THROTTLING_EXECUTION_SPEED | | ||||||
|  |                      PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION}, | ||||||
|  |         .StateMask{}, | ||||||
|  |     }; | ||||||
|  |     SetProcessInformation(GetCurrentProcess(), ProcessPowerThrottling, &PowerThrottling, | ||||||
|  |                           sizeof(PROCESS_POWER_THROTTLING_STATE)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // Anonymous namespace
 | ||||||
|  | 
 | ||||||
|  | nanoseconds GetMinimumTimerResolution() { | ||||||
|  |     return GetTimerResolution().minimum; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | nanoseconds GetMaximumTimerResolution() { | ||||||
|  |     return GetTimerResolution().maximum; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | nanoseconds GetCurrentTimerResolution() { | ||||||
|  |     return GetTimerResolution().current; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | nanoseconds SetCurrentTimerResolution(nanoseconds timer_resolution) { | ||||||
|  |     // Set the timer resolution, and return the current timer resolution.
 | ||||||
|  |     const auto DesiredTimerResolution = ToHundredNS(timer_resolution); | ||||||
|  |     ULONG CurrentTimerResolution; | ||||||
|  |     NtSetTimerResolution(DesiredTimerResolution, TRUE, &CurrentTimerResolution); | ||||||
|  |     return ToNS(CurrentTimerResolution); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | nanoseconds SetCurrentTimerResolutionToMaximum() { | ||||||
|  |     SetHighQoS(); | ||||||
|  |     return SetCurrentTimerResolution(GetMaximumTimerResolution()); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void SleepForOneTick() { | ||||||
|  |     LARGE_INTEGER DelayInterval{ | ||||||
|  |         .QuadPart{-1}, | ||||||
|  |     }; | ||||||
|  |     NtDelayExecution(FALSE, &DelayInterval); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace Common::Windows
 | ||||||
							
								
								
									
										38
									
								
								src/common/windows/timer_resolution.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								src/common/windows/timer_resolution.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,38 @@ | |||||||
|  | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 | ||||||
|  | // SPDX-License-Identifier: GPL-2.0-or-later
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <chrono> | ||||||
|  | 
 | ||||||
|  | namespace Common::Windows { | ||||||
|  | 
 | ||||||
|  | /// Returns the minimum (least precise) supported timer resolution in nanoseconds.
 | ||||||
|  | std::chrono::nanoseconds GetMinimumTimerResolution(); | ||||||
|  | 
 | ||||||
|  | /// Returns the maximum (most precise) supported timer resolution in nanoseconds.
 | ||||||
|  | std::chrono::nanoseconds GetMaximumTimerResolution(); | ||||||
|  | 
 | ||||||
|  | /// Returns the current timer resolution in nanoseconds.
 | ||||||
|  | std::chrono::nanoseconds GetCurrentTimerResolution(); | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * Sets the current timer resolution. | ||||||
|  |  * | ||||||
|  |  * @param timer_resolution Timer resolution in nanoseconds. | ||||||
|  |  * | ||||||
|  |  * @returns The current timer resolution. | ||||||
|  |  */ | ||||||
|  | std::chrono::nanoseconds SetCurrentTimerResolution(std::chrono::nanoseconds timer_resolution); | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * Sets the current timer resolution to the maximum supported timer resolution. | ||||||
|  |  * | ||||||
|  |  * @returns The current timer resolution. | ||||||
|  |  */ | ||||||
|  | std::chrono::nanoseconds SetCurrentTimerResolutionToMaximum(); | ||||||
|  | 
 | ||||||
|  | /// Sleep for one tick of the current timer resolution.
 | ||||||
|  | void SleepForOneTick(); | ||||||
|  | 
 | ||||||
|  | } // namespace Common::Windows
 | ||||||
| @ -6,6 +6,7 @@ | |||||||
| #include <thread> | #include <thread> | ||||||
| 
 | 
 | ||||||
| #include "common/atomic_ops.h" | #include "common/atomic_ops.h" | ||||||
|  | #include "common/steady_clock.h" | ||||||
| #include "common/uint128.h" | #include "common/uint128.h" | ||||||
| #include "common/x64/native_clock.h" | #include "common/x64/native_clock.h" | ||||||
| 
 | 
 | ||||||
| @ -39,6 +40,12 @@ static u64 FencedRDTSC() { | |||||||
| } | } | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | template <u64 Nearest> | ||||||
|  | static u64 RoundToNearest(u64 value) { | ||||||
|  |     const auto mod = value % Nearest; | ||||||
|  |     return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| u64 EstimateRDTSCFrequency() { | u64 EstimateRDTSCFrequency() { | ||||||
|     // Discard the first result measuring the rdtsc.
 |     // Discard the first result measuring the rdtsc.
 | ||||||
|     FencedRDTSC(); |     FencedRDTSC(); | ||||||
| @ -46,18 +53,18 @@ u64 EstimateRDTSCFrequency() { | |||||||
|     FencedRDTSC(); |     FencedRDTSC(); | ||||||
| 
 | 
 | ||||||
|     // Get the current time.
 |     // Get the current time.
 | ||||||
|     const auto start_time = std::chrono::steady_clock::now(); |     const auto start_time = Common::SteadyClock::Now(); | ||||||
|     const u64 tsc_start = FencedRDTSC(); |     const u64 tsc_start = FencedRDTSC(); | ||||||
|     // Wait for 200 milliseconds.
 |     // Wait for 250 milliseconds.
 | ||||||
|     std::this_thread::sleep_for(std::chrono::milliseconds{200}); |     std::this_thread::sleep_for(std::chrono::milliseconds{250}); | ||||||
|     const auto end_time = std::chrono::steady_clock::now(); |     const auto end_time = Common::SteadyClock::Now(); | ||||||
|     const u64 tsc_end = FencedRDTSC(); |     const u64 tsc_end = FencedRDTSC(); | ||||||
|     // Calculate differences.
 |     // Calculate differences.
 | ||||||
|     const u64 timer_diff = static_cast<u64>( |     const u64 timer_diff = static_cast<u64>( | ||||||
|         std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); |         std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); | ||||||
|     const u64 tsc_diff = tsc_end - tsc_start; |     const u64 tsc_diff = tsc_end - tsc_start; | ||||||
|     const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); |     const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); | ||||||
|     return tsc_freq; |     return RoundToNearest<1000>(tsc_freq); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| namespace X64 { | namespace X64 { | ||||||
|  | |||||||
| @ -6,6 +6,10 @@ | |||||||
| #include <string> | #include <string> | ||||||
| #include <tuple> | #include <tuple> | ||||||
| 
 | 
 | ||||||
|  | #ifdef _WIN32 | ||||||
|  | #include "common/windows/timer_resolution.h" | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| #include "common/microprofile.h" | #include "common/microprofile.h" | ||||||
| #include "core/core_timing.h" | #include "core/core_timing.h" | ||||||
| #include "core/core_timing_util.h" | #include "core/core_timing_util.h" | ||||||
| @ -38,7 +42,8 @@ struct CoreTiming::Event { | |||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| CoreTiming::CoreTiming() | CoreTiming::CoreTiming() | ||||||
|     : clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {} |     : cpu_clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)}, | ||||||
|  |       event_clock{Common::CreateStandardWallClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {} | ||||||
| 
 | 
 | ||||||
| CoreTiming::~CoreTiming() { | CoreTiming::~CoreTiming() { | ||||||
|     Reset(); |     Reset(); | ||||||
| @ -185,15 +190,15 @@ void CoreTiming::ResetTicks() { | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| u64 CoreTiming::GetCPUTicks() const { | u64 CoreTiming::GetCPUTicks() const { | ||||||
|     if (is_multicore) { |     if (is_multicore) [[likely]] { | ||||||
|         return clock->GetCPUCycles(); |         return cpu_clock->GetCPUCycles(); | ||||||
|     } |     } | ||||||
|     return ticks; |     return ticks; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| u64 CoreTiming::GetClockTicks() const { | u64 CoreTiming::GetClockTicks() const { | ||||||
|     if (is_multicore) { |     if (is_multicore) [[likely]] { | ||||||
|         return clock->GetClockCycles(); |         return cpu_clock->GetClockCycles(); | ||||||
|     } |     } | ||||||
|     return CpuCyclesToClockCycles(ticks); |     return CpuCyclesToClockCycles(ticks); | ||||||
| } | } | ||||||
| @ -252,22 +257,21 @@ void CoreTiming::ThreadLoop() { | |||||||
|             const auto next_time = Advance(); |             const auto next_time = Advance(); | ||||||
|             if (next_time) { |             if (next_time) { | ||||||
|                 // There are more events left in the queue, wait until the next event.
 |                 // There are more events left in the queue, wait until the next event.
 | ||||||
|                 const auto wait_time = *next_time - GetGlobalTimeNs().count(); |                 auto wait_time = *next_time - GetGlobalTimeNs().count(); | ||||||
|                 if (wait_time > 0) { |                 if (wait_time > 0) { | ||||||
| #ifdef _WIN32 | #ifdef _WIN32 | ||||||
|                     // Assume a timer resolution of 1ms.
 |                     const auto timer_resolution_ns = | ||||||
|                     static constexpr s64 TimerResolutionNS = 1000000; |                         Common::Windows::GetCurrentTimerResolution().count(); | ||||||
| 
 | 
 | ||||||
|                     // Sleep in discrete intervals of the timer resolution, and spin the rest.
 |                     while (!paused && !event.IsSet() && wait_time > 0) { | ||||||
|                     const auto sleep_time = wait_time - (wait_time % TimerResolutionNS); |                         wait_time = *next_time - GetGlobalTimeNs().count(); | ||||||
|                     if (sleep_time > 0) { |  | ||||||
|                         event.WaitFor(std::chrono::nanoseconds(sleep_time)); |  | ||||||
|                     } |  | ||||||
| 
 | 
 | ||||||
|                     while (!paused && !event.IsSet() && GetGlobalTimeNs().count() < *next_time) { |                         if (wait_time >= timer_resolution_ns) { | ||||||
|                         // Yield to reduce thread starvation.
 |                             Common::Windows::SleepForOneTick(); | ||||||
|  |                         } else { | ||||||
|                             std::this_thread::yield(); |                             std::this_thread::yield(); | ||||||
|                         } |                         } | ||||||
|  |                     } | ||||||
| 
 | 
 | ||||||
|                     if (event.IsSet()) { |                     if (event.IsSet()) { | ||||||
|                         event.Reset(); |                         event.Reset(); | ||||||
| @ -285,9 +289,9 @@ void CoreTiming::ThreadLoop() { | |||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         paused_set = true; |         paused_set = true; | ||||||
|         clock->Pause(true); |         event_clock->Pause(true); | ||||||
|         pause_event.Wait(); |         pause_event.Wait(); | ||||||
|         clock->Pause(false); |         event_clock->Pause(false); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -303,16 +307,23 @@ void CoreTiming::Reset() { | |||||||
|     has_started = false; |     has_started = false; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | std::chrono::nanoseconds CoreTiming::GetCPUTimeNs() const { | ||||||
|  |     if (is_multicore) [[likely]] { | ||||||
|  |         return cpu_clock->GetTimeNS(); | ||||||
|  |     } | ||||||
|  |     return CyclesToNs(ticks); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const { | std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const { | ||||||
|     if (is_multicore) { |     if (is_multicore) [[likely]] { | ||||||
|         return clock->GetTimeNS(); |         return event_clock->GetTimeNS(); | ||||||
|     } |     } | ||||||
|     return CyclesToNs(ticks); |     return CyclesToNs(ticks); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { | std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { | ||||||
|     if (is_multicore) { |     if (is_multicore) [[likely]] { | ||||||
|         return clock->GetTimeUS(); |         return event_clock->GetTimeUS(); | ||||||
|     } |     } | ||||||
|     return CyclesToUs(ticks); |     return CyclesToUs(ticks); | ||||||
| } | } | ||||||
|  | |||||||
| @ -122,6 +122,9 @@ public: | |||||||
|     /// Returns current time in emulated in Clock cycles
 |     /// Returns current time in emulated in Clock cycles
 | ||||||
|     u64 GetClockTicks() const; |     u64 GetClockTicks() const; | ||||||
| 
 | 
 | ||||||
|  |     /// Returns current time in nanoseconds.
 | ||||||
|  |     std::chrono::nanoseconds GetCPUTimeNs() const; | ||||||
|  | 
 | ||||||
|     /// Returns current time in microseconds.
 |     /// Returns current time in microseconds.
 | ||||||
|     std::chrono::microseconds GetGlobalTimeUs() const; |     std::chrono::microseconds GetGlobalTimeUs() const; | ||||||
| 
 | 
 | ||||||
| @ -139,7 +142,8 @@ private: | |||||||
| 
 | 
 | ||||||
|     void Reset(); |     void Reset(); | ||||||
| 
 | 
 | ||||||
|     std::unique_ptr<Common::WallClock> clock; |     std::unique_ptr<Common::WallClock> cpu_clock; | ||||||
|  |     std::unique_ptr<Common::WallClock> event_clock; | ||||||
| 
 | 
 | ||||||
|     s64 global_timer = 0; |     s64 global_timer = 0; | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -13,10 +13,8 @@ namespace Core { | |||||||
| 
 | 
 | ||||||
| namespace Hardware { | namespace Hardware { | ||||||
| 
 | 
 | ||||||
| // The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
 | constexpr u64 BASE_CLOCK_RATE = 1'020'000'000; // Default CPU Frequency = 1020 MHz
 | ||||||
| // The exact value used is of course unverified.
 | constexpr u64 CNTFREQ = 19'200'000;            // CNTPCT_EL0 Frequency = 19.2 MHz
 | ||||||
| constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch cpu frequency is 1020MHz un/docked
 |  | ||||||
| constexpr u64 CNTFREQ = 19200000;           // Switch's hardware clock speed
 |  | ||||||
| constexpr u32 NUM_CPU_CORES = 4;               // Number of CPU Cores
 | constexpr u32 NUM_CPU_CORES = 4;               // Number of CPU Cores
 | ||||||
| 
 | 
 | ||||||
| // Virtual to Physical core map.
 | // Virtual to Physical core map.
 | ||||||
|  | |||||||
| @ -197,7 +197,7 @@ struct GPU::Impl { | |||||||
|         constexpr u64 gpu_ticks_num = 384; |         constexpr u64 gpu_ticks_num = 384; | ||||||
|         constexpr u64 gpu_ticks_den = 625; |         constexpr u64 gpu_ticks_den = 625; | ||||||
| 
 | 
 | ||||||
|         u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); |         u64 nanoseconds = system.CoreTiming().GetCPUTimeNs().count(); | ||||||
|         if (Settings::values.use_fast_gpu_time.GetValue()) { |         if (Settings::values.use_fast_gpu_time.GetValue()) { | ||||||
|             nanoseconds /= 256; |             nanoseconds /= 256; | ||||||
|         } |         } | ||||||
|  | |||||||
| @ -91,6 +91,9 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual | |||||||
| #include "common/microprofile.h" | #include "common/microprofile.h" | ||||||
| #include "common/scm_rev.h" | #include "common/scm_rev.h" | ||||||
| #include "common/scope_exit.h" | #include "common/scope_exit.h" | ||||||
|  | #ifdef _WIN32 | ||||||
|  | #include "common/windows/timer_resolution.h" | ||||||
|  | #endif | ||||||
| #ifdef ARCHITECTURE_x86_64 | #ifdef ARCHITECTURE_x86_64 | ||||||
| #include "common/x64/cpu_detect.h" | #include "common/x64/cpu_detect.h" | ||||||
| #endif | #endif | ||||||
| @ -377,6 +380,12 @@ GMainWindow::GMainWindow(std::unique_ptr<Config> config_, bool has_broken_vulkan | |||||||
|     LOG_INFO(Frontend, "Host RAM: {:.2f} GiB", |     LOG_INFO(Frontend, "Host RAM: {:.2f} GiB", | ||||||
|              Common::GetMemInfo().TotalPhysicalMemory / f64{1_GiB}); |              Common::GetMemInfo().TotalPhysicalMemory / f64{1_GiB}); | ||||||
|     LOG_INFO(Frontend, "Host Swap: {:.2f} GiB", Common::GetMemInfo().TotalSwapMemory / f64{1_GiB}); |     LOG_INFO(Frontend, "Host Swap: {:.2f} GiB", Common::GetMemInfo().TotalSwapMemory / f64{1_GiB}); | ||||||
|  | #ifdef _WIN32 | ||||||
|  |     LOG_INFO(Frontend, "Host Timer Resolution: {:.4f} ms", | ||||||
|  |              std::chrono::duration_cast<std::chrono::duration<f64, std::milli>>( | ||||||
|  |                  Common::Windows::SetCurrentTimerResolutionToMaximum()) | ||||||
|  |                  .count()); | ||||||
|  | #endif | ||||||
|     UpdateWindowTitle(); |     UpdateWindowTitle(); | ||||||
| 
 | 
 | ||||||
|     show(); |     show(); | ||||||
|  | |||||||
| @ -42,6 +42,8 @@ | |||||||
| #include <windows.h> | #include <windows.h> | ||||||
| 
 | 
 | ||||||
| #include <shellapi.h> | #include <shellapi.h> | ||||||
|  | 
 | ||||||
|  | #include "common/windows/timer_resolution.h" | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| #undef _UNICODE | #undef _UNICODE | ||||||
| @ -314,6 +316,8 @@ int main(int argc, char** argv) { | |||||||
| 
 | 
 | ||||||
| #ifdef _WIN32 | #ifdef _WIN32 | ||||||
|     LocalFree(argv_w); |     LocalFree(argv_w); | ||||||
|  | 
 | ||||||
|  |     Common::Windows::SetCurrentTimerResolutionToMaximum(); | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|     MicroProfileOnThreadCreate("EmuThread"); |     MicroProfileOnThreadCreate("EmuThread"); | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user